mpvecdigmulsub.s 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #define BDNZ BC 16,0,
  2. #define BDNE BC 0,2,
  3. #define BLT BC 0xC,0,
  4. /*
  5. * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
  6. *
  7. * p -= b*m
  8. *
  9. * each step looks like:
  10. * hi,lo = m*b[i]
  11. * lo += oldhi + carry
  12. * hi += carry
  13. * p[i] += lo
  14. * oldhi = hi
  15. *
  16. * the registers are:
  17. * b = R3
  18. * n = R4
  19. * m = R5
  20. * p = R6
  21. * i = R7
  22. * hi = R8 - constrained by hardware
  23. * lo = R9 - constrained by hardware
  24. * oldhi = R10
  25. * tmp = R11
  26. * borrow = R12
  27. *
  28. */
  29. TEXT mpvecdigmulsub(SB),$0
  30. MOVW n+4(FP),R10
  31. MOVW R10,CTR
  32. MOVW m+8(FP),R5
  33. MOVW p+12(FP),R6
  34. SUB $4, R3 /* pre decrement for MOVWU's */
  35. SUBC $4, R6 /* pre decrement for MOVWU's and set carry */
  36. MOVW XER,R12
  37. MOVW R0, R10
  38. _mulsubloop:
  39. MOVWU 4(R3),R9 /* lo = b[i] */
  40. MOVW 4(R6),R11 /* tmp = p[i] */
  41. MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */
  42. MULLW R9,R5,R9 /* lo = b[i] * m */
  43. ADDC R10,R9 /* lo += oldhi */
  44. ADDE R0,R8 /* hi += carry */
  45. MOVW R12,XER
  46. SUBE R9,R11 /* tmp -= lo */
  47. MOVW XER,R12
  48. MOVWU R11,4(R6) /* p[i] = tmp */
  49. MOVW R8,R10 /* oldhi = hi */
  50. BDNZ _mulsubloop
  51. MOVW 4(R6),R11 /* tmp = p[i] */
  52. MOVW R12,XER
  53. SUBE R10,R11 /* tmp -= lo */
  54. MOVWU R11,4(R6) /* p[i] = tmp */
  55. /* return -1 if the result was negative, +1 otherwise */
  56. SUBECC R0,R0,R3
  57. BLT _mulsub2
  58. MOVW $1,R3
  59. _mulsub2:
  60. RETURN