vlop.s 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. TEXT _mulv(SB), $0
  2. MOVW u1+8(FP), R8
  3. MOVW u2+16(FP), R13
  4. MOVW R13, R16 /* save low parts for later */
  5. MOVW R8, R12
  6. /*
  7. * unsigned 32x32 => 64 multiply
  8. */
  9. CMP R13, R8
  10. BLE mul1
  11. MOVW R12, R13
  12. MOVW R16, R8
  13. mul1:
  14. MOVW R13, Y
  15. ANDNCC $0xFFF, R13, R0
  16. BE mul_shortway
  17. ANDCC R0, R0, R9 /* zero partial product and clear N and V cond's */
  18. /* long multiply */
  19. MULSCC R8, R9, R9 /* 0 */
  20. MULSCC R8, R9, R9 /* 1 */
  21. MULSCC R8, R9, R9 /* 2 */
  22. MULSCC R8, R9, R9 /* 3 */
  23. MULSCC R8, R9, R9 /* 4 */
  24. MULSCC R8, R9, R9 /* 5 */
  25. MULSCC R8, R9, R9 /* 6 */
  26. MULSCC R8, R9, R9 /* 7 */
  27. MULSCC R8, R9, R9 /* 8 */
  28. MULSCC R8, R9, R9 /* 9 */
  29. MULSCC R8, R9, R9 /* 10 */
  30. MULSCC R8, R9, R9 /* 11 */
  31. MULSCC R8, R9, R9 /* 12 */
  32. MULSCC R8, R9, R9 /* 13 */
  33. MULSCC R8, R9, R9 /* 14 */
  34. MULSCC R8, R9, R9 /* 15 */
  35. MULSCC R8, R9, R9 /* 16 */
  36. MULSCC R8, R9, R9 /* 17 */
  37. MULSCC R8, R9, R9 /* 18 */
  38. MULSCC R8, R9, R9 /* 19 */
  39. MULSCC R8, R9, R9 /* 20 */
  40. MULSCC R8, R9, R9 /* 21 */
  41. MULSCC R8, R9, R9 /* 22 */
  42. MULSCC R8, R9, R9 /* 23 */
  43. MULSCC R8, R9, R9 /* 24 */
  44. MULSCC R8, R9, R9 /* 25 */
  45. MULSCC R8, R9, R9 /* 26 */
  46. MULSCC R8, R9, R9 /* 27 */
  47. MULSCC R8, R9, R9 /* 28 */
  48. MULSCC R8, R9, R9 /* 29 */
  49. MULSCC R8, R9, R9 /* 30 */
  50. MULSCC R8, R9, R9 /* 31 */
  51. MULSCC R0, R9, R9 /* 32; shift only; r9 is high part */
  52. /*
  53. * need to correct top word if top bit set
  54. */
  55. CMP R8, R0
  56. BGE mul_tstlow
  57. ADD R13, R9 /* adjust the high parts */
  58. mul_tstlow:
  59. MOVW Y, R13 /* get low part */
  60. BA mul_done
  61. mul_shortway:
  62. ANDCC R0, R0, R9 /* zero partial product and clear N and V cond's */
  63. MULSCC R8, R9, R9 /* 0 */
  64. MULSCC R8, R9, R9 /* 1 */
  65. MULSCC R8, R9, R9 /* 2 */
  66. MULSCC R8, R9, R9 /* 3 */
  67. MULSCC R8, R9, R9 /* 4 */
  68. MULSCC R8, R9, R9 /* 5 */
  69. MULSCC R8, R9, R9 /* 6 */
  70. MULSCC R8, R9, R9 /* 7 */
  71. MULSCC R8, R9, R9 /* 8 */
  72. MULSCC R8, R9, R9 /* 9 */
  73. MULSCC R8, R9, R9 /* 10 */
  74. MULSCC R8, R9, R9 /* 11 */
  75. MULSCC R0, R9, R9 /* 12; shift only; r9 is high part */
  76. MOVW Y, R8 /* make low part of partial low part & high part */
  77. SLL $12, R9, R13
  78. SRL $20, R8
  79. OR R8, R13
  80. SRA $20, R9 /* high part */
  81. mul_done:
  82. /*
  83. * mul by high halves if needed
  84. */
  85. MOVW R13, 4(R7)
  86. MOVW u2+12(FP), R11
  87. CMP R11, R0
  88. BE nomul1
  89. MUL R11, R12
  90. ADD R12, R9
  91. nomul1:
  92. MOVW u1+4(FP), R11
  93. CMP R11, R0
  94. BE nomul2
  95. MUL R11, R16
  96. ADD R16, R9
  97. nomul2:
  98. MOVW R9, 0(R7)
  99. RETURN