123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- #define BDNZ BC 16,0,
- TEXT memcpy(SB), $0
- BR move
- TEXT memmove(SB), $0
- move:
- /*
- * performance:
- * (tba)
- */
- MOVW R3, s1+0(FP)
- MOVW n+8(FP), R9 /* R9 is count */
- MOVW R3, R10 /* R10 is to-pointer */
- CMP R9, $0
- BEQ ret
- BLT trap
- MOVW s2+4(FP), R11 /* R11 is from-pointer */
- /*
- * if no more than 16 bytes, just use one lsw/stsw
- */
- CMP R9, $16
- BLE fout
- ADD R9,R11, R13 /* R13 is end from-pointer */
- ADD R9,R10, R12 /* R12 is end to-pointer */
- /*
- * easiest test is copy backwards if
- * destination string has higher mem address
- */
- CMPU R10, R11
- BGT back
- /*
- * test if both pointers
- * are similarly word aligned
- */
- XOR R10,R11, R7
- ANDCC $3,R7
- BNE fbad
- /*
- * move a few bytes to align pointers
- */
- ANDCC $3,R10,R7
- BEQ f2
- SUBC R7, $4, R7
- SUB R7, R9
- MOVW R7, XER
- LSW (R11), R16
- ADD R7, R11
- STSW R16, (R10)
- ADD R7, R10
- /*
- * turn R14 into doubleword count
- * copy 16 bytes at a time while there's room.
- */
- f2:
- SRAWCC $4, R9, R14
- BLE fout
- MOVW R14, CTR
- SUB $4, R11
- SUB $4, R10
- f3:
- MOVWU 4(R11), R16
- MOVWU R16, 4(R10)
- MOVWU 4(R11), R17
- MOVWU R17, 4(R10)
- MOVWU 4(R11), R16
- MOVWU R16, 4(R10)
- MOVWU 4(R11), R17
- MOVWU R17, 4(R10)
- BDNZ f3
- RLWNMCC $0, R9, $15, R9 /* residue */
- BEQ ret
- ADD $4, R11
- ADD $4, R10
- /*
- * move up to 16 bytes through R16 .. R19; aligned and unaligned
- */
- fout:
- MOVW R9, XER
- LSW (R11), R16
- STSW R16, (R10)
- BR ret
- /*
- * loop for unaligned copy, then copy up to 15 remaining bytes
- */
- fbad:
- SRAWCC $4, R9, R14
- BLE f6
- MOVW R14, CTR
- f5:
- LSW (R11), $16, R16
- ADD $16, R11
- STSW R16, $16, (R10)
- ADD $16, R10
- BDNZ f5
- RLWNMCC $0, R9, $15, R9 /* residue */
- BEQ ret
- f6:
- MOVW R9, XER
- LSW (R11), R16
- STSW R16, (R10)
- BR ret
- /*
- * whole thing repeated for backwards
- */
- back:
- CMP R9, $4
- BLT bout
- XOR R12,R13, R7
- ANDCC $3,R7
- BNE bout
- b1:
- ANDCC $3,R13, R7
- BEQ b2
- MOVBZU -1(R13), R16
- MOVBZU R16, -1(R12)
- SUB $1, R9
- BR b1
- b2:
- SRAWCC $4, R9, R14
- BLE b4
- MOVW R14, CTR
- b3:
- MOVWU -4(R13), R16
- MOVWU R16, -4(R12)
- MOVWU -4(R13), R17
- MOVWU R17, -4(R12)
- MOVWU -4(R13), R16
- MOVWU R16, -4(R12)
- MOVWU -4(R13), R17
- MOVWU R17, -4(R12)
- BDNZ b3
- RLWNMCC $0, R9, $15, R9 /* residue */
- BEQ ret
- b4:
- SRAWCC $2, R9, R14
- BLE bout
- MOVW R14, CTR
- b5:
- MOVWU -4(R13), R16
- MOVWU R16, -4(R12)
- BDNZ b5
- RLWNMCC $0, R9, $3, R9 /* residue */
- BEQ ret
- bout:
- CMPU R13, R11
- BLE ret
- MOVBZU -1(R13), R16
- MOVBZU R16, -1(R12)
- BR bout
- trap:
- /* MOVW $0, R0 */
- MOVW R0, 0(R0)
- ret:
- MOVW s1+0(FP), R3
- RETURN
|