memmove-power.s 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #define BDNZ BC 16,0,
  2. TEXT memcpy(SB), $0
  3. BR move
  4. TEXT memmove(SB), $0
  5. move:
  6. /*
  7. * performance:
  8. * (tba)
  9. */
  10. MOVW R3, s1+0(FP)
  11. MOVW n+8(FP), R9 /* R9 is count */
  12. MOVW R3, R10 /* R10 is to-pointer */
  13. CMP R9, $0
  14. BEQ ret
  15. BLT trap
  16. MOVW s2+4(FP), R11 /* R11 is from-pointer */
  17. /*
  18. * if no more than 16 bytes, just use one lsw/stsw
  19. */
  20. CMP R9, $16
  21. BLE fout
  22. ADD R9,R11, R13 /* R13 is end from-pointer */
  23. ADD R9,R10, R12 /* R12 is end to-pointer */
  24. /*
  25. * easiest test is copy backwards if
  26. * destination string has higher mem address
  27. */
  28. CMPU R10, R11
  29. BGT back
  30. /*
  31. * test if both pointers
  32. * are similarly word aligned
  33. */
  34. XOR R10,R11, R7
  35. ANDCC $3,R7
  36. BNE fbad
  37. /*
  38. * move a few bytes to align pointers
  39. */
  40. ANDCC $3,R10,R7
  41. BEQ f2
  42. SUBC R7, $4, R7
  43. SUB R7, R9
  44. MOVW R7, XER
  45. LSW (R11), R16
  46. ADD R7, R11
  47. STSW R16, (R10)
  48. ADD R7, R10
  49. /*
  50. * turn R14 into doubleword count
  51. * copy 16 bytes at a time while there's room.
  52. */
  53. f2:
  54. SRAWCC $4, R9, R14
  55. BLE fout
  56. MOVW R14, CTR
  57. SUB $4, R11
  58. SUB $4, R10
  59. f3:
  60. MOVWU 4(R11), R16
  61. MOVWU R16, 4(R10)
  62. MOVWU 4(R11), R17
  63. MOVWU R17, 4(R10)
  64. MOVWU 4(R11), R16
  65. MOVWU R16, 4(R10)
  66. MOVWU 4(R11), R17
  67. MOVWU R17, 4(R10)
  68. BDNZ f3
  69. RLWNMCC $0, R9, $15, R9 /* residue */
  70. BEQ ret
  71. ADD $4, R11
  72. ADD $4, R10
  73. /*
  74. * move up to 16 bytes through R16 .. R19; aligned and unaligned
  75. */
  76. fout:
  77. MOVW R9, XER
  78. LSW (R11), R16
  79. STSW R16, (R10)
  80. BR ret
  81. /*
  82. * loop for unaligned copy, then copy up to 15 remaining bytes
  83. */
  84. fbad:
  85. SRAWCC $4, R9, R14
  86. BLE f6
  87. MOVW R14, CTR
  88. f5:
  89. LSW (R11), $16, R16
  90. ADD $16, R11
  91. STSW R16, $16, (R10)
  92. ADD $16, R10
  93. BDNZ f5
  94. RLWNMCC $0, R9, $15, R9 /* residue */
  95. BEQ ret
  96. f6:
  97. MOVW R9, XER
  98. LSW (R11), R16
  99. STSW R16, (R10)
  100. BR ret
  101. /*
  102. * whole thing repeated for backwards
  103. */
  104. back:
  105. CMP R9, $4
  106. BLT bout
  107. XOR R12,R13, R7
  108. ANDCC $3,R7
  109. BNE bout
  110. b1:
  111. ANDCC $3,R13, R7
  112. BEQ b2
  113. MOVBZU -1(R13), R16
  114. MOVBZU R16, -1(R12)
  115. SUB $1, R9
  116. BR b1
  117. b2:
  118. SRAWCC $4, R9, R14
  119. BLE b4
  120. MOVW R14, CTR
  121. b3:
  122. MOVWU -4(R13), R16
  123. MOVWU R16, -4(R12)
  124. MOVWU -4(R13), R17
  125. MOVWU R17, -4(R12)
  126. MOVWU -4(R13), R16
  127. MOVWU R16, -4(R12)
  128. MOVWU -4(R13), R17
  129. MOVWU R17, -4(R12)
  130. BDNZ b3
  131. RLWNMCC $0, R9, $15, R9 /* residue */
  132. BEQ ret
  133. b4:
  134. SRAWCC $2, R9, R14
  135. BLE bout
  136. MOVW R14, CTR
  137. b5:
  138. MOVWU -4(R13), R16
  139. MOVWU R16, -4(R12)
  140. BDNZ b5
  141. RLWNMCC $0, R9, $3, R9 /* residue */
  142. BEQ ret
  143. bout:
  144. CMPU R13, R11
  145. BLE ret
  146. MOVBZU -1(R13), R16
  147. MOVBZU R16, -1(R12)
  148. BR bout
  149. trap:
  150. /* MOVW $0, R0 */
  151. MOVW R0, 0(R0)
  152. ret:
  153. MOVW s1+0(FP), R3
  154. RETURN