mpvecdigmuladd.s 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. #define BDNZ BC 16,0,
  2. #define BDNE BC 0,2,
  3. /*
  4. * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p)
  5. *
  6. * p += b*m
  7. *
  8. * each step looks like:
  9. * hi,lo = m*b[i]
  10. * lo += oldhi + carry
  11. * hi += carry
  12. * p[i] += lo
  13. * oldhi = hi
  14. *
  15. * the registers are:
  16. * b = R3
  17. * n = R4
  18. * m = R5
  19. * p = R6
  20. * i = R7
  21. * hi = R8 - constrained by hardware
  22. * lo = R9 - constrained by hardware
  23. * oldhi = R10
  24. * tmp = R11
  25. *
  26. */
  27. TEXT mpvecdigmuladd(SB),$0
  28. MOVW n+4(FP),R4
  29. MOVW m+8(FP),R5
  30. MOVW p+12(FP),R6
  31. SUB $4, R3 /* pre decrement for MOVWU's */
  32. SUB $4, R6 /* pre decrement for MOVWU's */
  33. MOVW R0, R10
  34. MOVW R0, XER
  35. MOVW R4, CTR
  36. _muladdloop:
  37. MOVWU 4(R3),R9 /* lo = b[i] */
  38. MOVW 4(R6),R11 /* tmp = p[i] */
  39. MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */
  40. MULLW R9,R5,R9 /* lo = b[i] * m */
  41. ADDC R10,R9 /* lo += oldhi */
  42. ADDE R0,R8 /* hi += carry */
  43. ADDC R9,R11 /* tmp += lo */
  44. ADDE R0,R8 /* hi += carry */
  45. MOVWU R11,4(R6) /* p[i] = tmp */
  46. MOVW R8,R10 /* oldhi = hi */
  47. BDNZ _muladdloop
  48. MOVW 4(R6),R11 /* tmp = p[i] */
  49. ADDC R10,R11
  50. MOVWU R11,4(R6) /* p[i] = tmp */
  51. RETURN