vlop-thumb.s 970 B

12345678910111213141516171819202122232425262728293031323334
  1. #define UMULL(Rs,Rm,Rhi,Rlo,S) WORD $((14<<28)|(4<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm)
  2. #define UMLAL(Rs,Rm,Rhi,Rlo,S) WORD $((14<<28)|(5<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm)
  3. #define MUL(Rs,Rm,Rd,S) WORD $((14<<28)|(0<<21)|(S<<20)|(Rd<<16)|(Rs<<8)|(9<<4)|Rm)
  4. arg=0
  5. /* replaced use of R10 by R11 because the former can be the data segment base register */
  6. TEXT _mulv(SB), $0
  7. MOVW 4(FP), R9 /* l0 */
  8. MOVW 8(FP), R11 /* h0 */
  9. MOVW 12(FP), R4 /* l1 */
  10. MOVW 16(FP), R5 /* h1 */
  11. UMULL(4, 9, 7, 6, 0)
  12. MUL(11, 4, 8, 0)
  13. ADD R8, R7
  14. MUL(9, 5, 8, 0)
  15. ADD R8, R7
  16. MOVW R6, 0(R(arg))
  17. MOVW R7, 4(R(arg))
  18. RET
  19. /* multiply, add, and right-shift, yielding a 32-bit result, while
  20. using 64-bit accuracy for the multiply -- for fast fixed-point math */
  21. TEXT _mularsv(SB), $0
  22. MOVW 4(FP), R11 /* m1 */
  23. MOVW 8(FP), R8 /* a */
  24. MOVW 12(FP), R4 /* rs */
  25. MOVW $0, R9
  26. UMLAL(0, 11, 9, 8, 0)
  27. MOVW R8>>R4, R8
  28. RSB $32, R4, R4
  29. ORR R9<<R4, R8, R0
  30. RET