123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- #define QUAD 8
- #define ALIGN 64
- #define BLOCK 64
- TEXT memmove(SB), $0
- MOVL from+4(FP), R7
- MOVL n+8(FP), R10
- MOVQ R0, R6
- CMPUGE R7, R0, R5
- BNE R5, _forward
- MOVQ R6, R8 /* end to address */
- ADDL R10, R6, R6 /* to+n */
- ADDL R10, R7, R7 /* from+n */
- CMPUGE $ALIGN, R10, R1 /* need at least ALIGN bytes */
- BNE R1, _b1tail
- _balign:
- AND $(ALIGN-1), R6, R1
- BEQ R1, _baligned
- MOVBU -1(R7), R2
- ADDL $-1, R6, R6
- MOVB R2, (R6)
- ADDL $-1, R7, R7
- JMP _balign
-
- _baligned:
- AND $(QUAD-1), R7, R1 /* is the source quad-aligned */
- BNE R1, _bunaligned
- ADDL $(BLOCK-1), R8, R9
- _bblock:
- CMPUGE R9, R6, R1
- BNE R1, _b8tail
- MOVQ -64(R7), R22
- MOVQ -56(R7), R23
- MOVQ -48(R7), R24
- MOVQ -40(R7), R25
- MOVQ -32(R7), R2
- MOVQ -24(R7), R3
- MOVQ -16(R7), R4
- MOVQ -8(R7), R5
- SUBL $64, R6, R6
- SUBL $64, R7, R7
- MOVQ R22, (R6)
- MOVQ R23, 8(R6)
- MOVQ R24, 16(R6)
- MOVQ R25, 24(R6)
- MOVQ R2, 32(R6)
- MOVQ R3, 40(R6)
- MOVQ R4, 48(R6)
- MOVQ R5, 56(R6)
- JMP _bblock
- _b8tail:
- ADDL $(QUAD-1), R8, R9
- _b8block:
- CMPUGE R9, R6, R1
- BNE R1, _b1tail
- MOVQ -8(R7), R2
- SUBL $8, R6
- MOVQ R2, (R6)
- SUBL $8, R7
- JMP _b8block
- _b1tail:
- CMPUGE R8, R6, R1
- BNE R1, _ret
- MOVBU -1(R7), R2
- SUBL $1, R6, R6
- MOVB R2, (R6)
- SUBL $1, R7, R7
- JMP _b1tail
- _ret:
- RET
- _bunaligned:
- ADDL $(16-1), R8, R9
- _bu8block:
- CMPUGE R9, R6, R1
- BNE R1, _b1tail
- MOVQU -16(R7), R4
- MOVQU -8(R7), R3
- MOVQU (R7), R2
- SUBL $16, R6
- EXTQH R7, R2, R2
- EXTQL R7, R3, R5
- OR R5, R2, R11
- EXTQH R7, R3, R3
- EXTQL R7, R4, R4
- OR R3, R4, R13
- MOVQ R11, 8(R6)
- MOVQ R13, (R6)
- SUBL $16, R7
- JMP _bu8block
- _forward:
- ADDL R10, R6, R8 /* end to address */
- CMPUGE $ALIGN, R10, R1 /* need at least ALIGN bytes */
- BNE R1, _f1tail
- _falign:
- AND $(ALIGN-1), R6, R1
- BEQ R1, _faligned
- MOVBU (R7), R2
- ADDL $1, R6, R6
- ADDL $1, R7, R7
- MOVB R2, -1(R6)
- JMP _falign
- _faligned:
- AND $(QUAD-1), R7, R1 /* is the source quad-aligned */
- BNE R1, _funaligned
- SUBL $(BLOCK-1), R8, R9
- _fblock:
- CMPUGT R9, R6, R1
- BEQ R1, _f8tail
- MOVQ (R7), R2
- MOVQ 8(R7), R3
- MOVQ 16(R7), R4
- MOVQ 24(R7), R5
- MOVQ 32(R7), R22
- MOVQ 40(R7), R23
- MOVQ 48(R7), R24
- MOVQ 56(R7), R25
- ADDL $64, R6, R6
- ADDL $64, R7, R7
- MOVQ R2, -64(R6)
- MOVQ R3, -56(R6)
- MOVQ R4, -48(R6)
- MOVQ R5, -40(R6)
- MOVQ R22, -32(R6)
- MOVQ R23, -24(R6)
- MOVQ R24, -16(R6)
- MOVQ R25, -8(R6)
- JMP _fblock
- _f8tail:
- SUBL $(QUAD-1), R8, R9
- _f8block:
- CMPUGT R9, R6, R1
- BEQ R1, _f1tail
- MOVQ (R7), R2
- ADDL $8, R6
- ADDL $8, R7
- MOVQ R2, -8(R6)
- JMP _f8block
- _f1tail:
- CMPUGT R8, R6, R1
- BEQ R1, _fret
- MOVBU (R7), R2
- ADDL $1, R6, R6
- ADDL $1, R7, R7
- MOVB R2, -1(R6)
- JMP _f1tail
- _fret:
- RET
- _funaligned:
- SUBL $(16-1), R8, R9
- _fu8block:
- CMPUGT R9, R6, R1
- BEQ R1, _f1tail
- MOVQU (R7), R2
- MOVQU 8(R7), R3
- MOVQU 16(R7), R4
- EXTQL R7, R2, R2
- EXTQH R7, R3, R5
- OR R5, R2, R11
- EXTQL R7, R3, R3
- MOVQ R11, (R6)
- EXTQH R7, R4, R4
- OR R3, R4, R11
- MOVQ R11, 8(R6)
- ADDL $16, R6
- ADDL $16, R7
- JMP _fu8block
|