123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- #define BDNZ BC 16,0,
- /*
- * 64/64 division adapted from powerpc compiler writer's handbook
- *
- * (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
- * quo dvd dvs
- *
- * Remainder is left in R7:R8
- *
- * Code comment notation:
- * msw = most-significant (high-order) word, i.e. bits 0..31
- * lsw = least-significant (low-order) word, i.e. bits 32..63
- * LZ = Leading Zeroes
- * SD = Significant Digits
- *
- * R3:R4 = dvd (input dividend); quo (output quotient)
- * R5:R6 = dvs (input divisor)
- *
- * R7:R8 = tmp; rem (output remainder)
- */
- TEXT _divu64(SB), $0
- MOVW a+0(FP), R3
- MOVW a+4(FP), R4
- MOVW b+8(FP), R5
- MOVW b+12(FP), R6
- /* count the number of leading 0s in the dividend */
- CMP R3, $0 /* dvd.msw == 0? R3, */
- CNTLZW R3, R11 /* R11 = dvd.msw.LZ */
- CNTLZW R4, R9 /* R9 = dvd.lsw.LZ */
- BNE lab1 /* if(dvd.msw != 0) dvd.LZ = dvd.msw.LZ */
- ADD $32, R9, R11 /* dvd.LZ = dvd.lsw.LZ + 32 */
- lab1:
- /* count the number of leading 0s in the divisor */
- CMP R5, $0 /* dvd.msw == 0? */
- CNTLZW R5, R9 /* R9 = dvs.msw.LZ */
- CNTLZW R6, R10 /* R10 = dvs.lsw.LZ */
- BNE lab2 /* if(dvs.msw != 0) dvs.LZ = dvs.msw.LZ */
- ADD $32, R10, R9 /* dvs.LZ = dvs.lsw.LZ + 32 */
- lab2:
- /* determine shift amounts to minimize the number of iterations */
- CMP R11, R9 /* compare dvd.LZ to dvs.LZ */
- SUBC R11, $64, R10 /* R10 = dvd.SD */
- BGT lab9 /* if(dvs > dvd) quotient = 0 */
- ADD $1, R9 /* ++dvs.LZ (or --dvs.SD) */
- SUBC R9, $64, R9 /* R9 = dvs.SD */
- ADD R9, R11 /* (dvd.LZ + dvs.SD) = left shift of dvd for */
- /* initial dvd */
- SUB R9, R10, R9 /* (dvd.SD - dvs.SD) = right shift of dvd for */
- /* initial tmp */
- MOVW R9, CTR /* number of iterations = dvd.SD - dvs.SD */
- /* R7:R8 = R3:R4 >> R9 */
- CMP R9, $32
- ADD $-32, R9, R7
- BLT lab3 /* if(R9 < 32) jump to lab3 */
- SRW R7, R3, R8 /* tmp.lsw = dvd.msw >> (R9 - 32) */
- MOVW $0, R7 /* tmp.msw = 0 */
- BR lab4
- lab3:
- SRW R9, R4, R8 /* R8 = dvd.lsw >> R9 */
- SUBC R9, $32, R7
- SLW R7, R3, R7 /* R7 = dvd.msw << 32 - R9 */
- OR R7, R8 /* tmp.lsw = R8 | R7 */
- SRW R9, R3, R7 /* tmp.msw = dvd.msw >> R9 */
- lab4:
- /* R3:R4 = R3:R4 << R11 */
- CMP R11,$32
- ADDC $-32, R11, R9
- BLT lab5 /* (R11 < 32)? */
- SLW R9, R4, R3 /* dvd.msw = dvs.lsw << R9 */
- MOVW $0, R4 /* dvd.lsw = 0 */
- BR lab6
- lab5:
- SLW R11, R3 /* R3 = dvd.msw << R11 */
- SUBC R11, $32, R9
- SRW R9, R4, R9 /* R9 = dvd.lsw >> 32 - R11 */
- OR R9, R3 /* dvd.msw = R3 | R9 */
- SLW R11, R4 /* dvd.lsw = dvd.lsw << R11 */
- lab6:
- /* restoring division shift and subtract loop */
- MOVW $-1, R10
- ADDC $0, R7 /* clear carry bit before loop starts */
- lab7:
- /* tmp:dvd is considered one large register */
- /* each portion is shifted left 1 bit by adding it to itself */
- /* adde sums the carry from the previous and creates a new carry */
- ADDE R4,R4 /* shift dvd.lsw left 1 bit */
- ADDE R3,R3 /* shift dvd.msw to left 1 bit */
- ADDE R8,R8 /* shift tmp.lsw to left 1 bit */
- ADDE R7,R7 /* shift tmp.msw to left 1 bit */
- SUBC R6, R8, R11 /* tmp.lsw - dvs.lsw */
- SUBECC R5, R7, R9 /* tmp.msw - dvs.msw */
- BLT lab8 /* if(result < 0) clear carry bit */
- MOVW R11, R8 /* move lsw */
- MOVW R9, R7 /* move msw */
- ADDC $1, R10, R11 /* set carry bit */
- lab8:
- BDNZ lab7
- ADDE R4,R4 /* quo.lsw (lsb = CA) */
- ADDE R3,R3 /* quo.msw (lsb from lsw) */
- lab10:
- MOVW qp+16(FP), R9
- MOVW rp+20(FP), R10
- CMP R9, $0
- BEQ lab11
- MOVW R3, 0(R9)
- MOVW R4, 4(R9)
- lab11:
- CMP R10, $0
- BEQ lab12
- MOVW R7, 0(R10)
- MOVW R8, 4(R10)
- lab12:
- RETURN
- lab9:
- /* Quotient is 0 (dvs > dvd) */
- MOVW R4, R8 /* rmd.lsw = dvd.lsw */
- MOVW R3, R7 /* rmd.msw = dvd.msw */
- MOVW $0, R4 /* dvd.lsw = 0 */
- MOVW $0, R3 /* dvd.msw = 0 */
- BR lab10
|