vlop.s 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #define BDNZ BC 16,0,
  2. /*
  3. * 64/64 division adapted from powerpc compiler writer's handbook
  4. *
  5. * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
  6. * quo dvd dvs
  7. *
  8. * Remainder is left in R7:R8
  9. *
  10. * Code comment notation:
  11. * msw = most-significant (high-order) word, i.e. bits 0..31
  12. * lsw = least-significant (low-order) word, i.e. bits 32..63
  13. * LZ = Leading Zeroes
  14. * SD = Significant Digits
  15. *
  16. * R3:R4 = dvd (input dividend); quo (output quotient)
  17. * R5:R6 = dvs (input divisor)
  18. *
  19. * R7:R8 = tmp; rem (output remainder)
  20. */
  21. TEXT _divu64(SB), $0
  22. MOVW a+0(FP), R3
  23. MOVW a+4(FP), R4
  24. MOVW b+8(FP), R5
  25. MOVW b+12(FP), R6
  26. /* count the number of leading 0s in the dividend */
  27. CMP R3, $0 /* dvd.msw == 0? R3, */
  28. CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
  29. CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
  30. BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
  31. ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
  32. lab1:
  33. /* count the number of leading 0s in the divisor */
  34. CMP R5, $0 /* dvd.msw == 0? */
  35. CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
  36. CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
  37. BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
  38. ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
  39. lab2:
  40. /* determine shift amounts to minimize the number of iterations */
  41. CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
  42. SUBC R11, $64, R10 /* R10 = dvd.SD */
  43. BGT lab9 /* if(dvs > dvd) quotient = 0 */
  44. ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
  45. SUBC R9, $64, R9 /* R9 = dvs.SD */
  46. ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
  47. /* initial dvd */
  48. SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
  49. /* initial tmp */
  50. MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
  51. /* R7:R8 = R3:R4 >> R9 */
  52. CMP R9, $32
  53. ADD $-32, R9, R7
  54. BLT lab3 /* if(R9 < 32) jump to lab3 */
  55. SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
  56. MOVW $0, R7 /* tmp.msw = 0 */
  57. BR lab4
  58. lab3:
  59. SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
  60. SUBC R9, $32, R7
  61. SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
  62. OR R7, R8 /* tmp.lsw = R8 | R7 */
  63. SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
  64. lab4:
  65. /* R3:R4 = R3:R4 << R11 */
  66. CMP R11,$32
  67. ADDC $-32, R11, R9
  68. BLT lab5 /* (R11 < 32)? */
  69. SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
  70. MOVW $0, R4 /* dvd.lsw = 0 */
  71. BR lab6
  72. lab5:
  73. SLW R11, R3 /* R3 = dvd.msw << R11 */
  74. SUBC R11, $32, R9
  75. SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
  76. OR R9, R3 /* dvd.msw = R3 | R9 */
  77. SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
  78. lab6:
  79. /* restoring division shift and subtract loop */
  80. MOVW $-1, R10
  81. ADDC $0, R7 /* clear carry bit before loop starts */
  82. lab7:
  83. /* tmp:dvd is considered one large register */
  84. /* each portion is shifted left 1 bit by adding it to itself */
  85. /* adde sums the carry from the previous and creates a new carry */
  86. ADDE R4,R4 /* shift dvd.lsw left 1 bit */
  87. ADDE R3,R3 /* shift dvd.msw to left 1 bit */
  88. ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
  89. ADDE R7,R7 /* shift tmp.msw to left 1 bit */
  90. SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
  91. SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
  92. BLT lab8 /* if(result < 0) clear carry bit */
  93. MOVW R11, R8 /* move lsw */
  94. MOVW R9, R7 /* move msw */
  95. ADDC $1, R10, R11 /* set carry bit */
  96. lab8:
  97. BDNZ lab7
  98. ADDE R4,R4 /* quo.lsw (lsb = CA) */
  99. ADDE R3,R3 /* quo.msw (lsb from lsw) */
  100. lab10:
  101. MOVW qp+16(FP), R9
  102. MOVW rp+20(FP), R10
  103. CMP R9, $0
  104. BEQ lab11
  105. MOVW R3, 0(R9)
  106. MOVW R4, 4(R9)
  107. lab11:
  108. CMP R10, $0
  109. BEQ lab12
  110. MOVW R7, 0(R10)
  111. MOVW R8, 4(R10)
  112. lab12:
  113. RETURN
  114. lab9:
  115. /* Quotient is 0 (dvs > dvd) */
  116. MOVW R4, R8 /* rmd.lsw = dvd.lsw */
  117. MOVW R3, R7 /* rmd.msw = dvd.msw */
  118. MOVW $0, R4 /* dvd.lsw = 0 */
  119. MOVW $0, R3 /* dvd.msw = 0 */
  120. BR lab10