memcmp.s 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. TEXT memcmp(SB), $0
  2. MOVW R1, 0(FP)
  3. /*
  4. * performance:
  5. * alligned about 1.0us/call and 17.4mb/sec
  6. * unalligned is about 3.1mb/sec
  7. */
  8. MOVW n+8(FP), R3 /* R3 is count */
  9. MOVW s1+0(FP), R4 /* R4 is pointer1 */
  10. MOVW s2+4(FP), R5 /* R5 is pointer2 */
  11. ADDU R3,R4, R6 /* R6 is end pointer1 */
  12. JMP out // XXX little endian
  13. /*
  14. * if not at least 4 chars,
  15. * dont even mess around.
  16. * 3 chars to guarantee any
  17. * rounding up to a word
  18. * boundary and 4 characters
  19. * to get at least maybe one
  20. * full word cmp.
  21. */
  22. SGT $4,R3, R1
  23. BNE R1, out
  24. /*
  25. * test if both pointers
  26. * are similarly word alligned
  27. */
  28. XOR R4,R5, R1
  29. AND $3, R1
  30. BNE R1, out
  31. /*
  32. * byte at a time to word allign
  33. */
  34. l1:
  35. AND $3,R4, R1
  36. BEQ R1, l2
  37. MOVBU 0(R4), R8
  38. MOVBU 0(R5), R9
  39. ADDU $1, R4
  40. BNE R8,R9, ne
  41. ADDU $1, R5
  42. JMP l1
  43. /*
  44. * turn R3 into end pointer1-15
  45. * cmp 16 at a time while theres room
  46. */
  47. l2:
  48. ADDU $-15,R6, R3
  49. l3:
  50. SGTU R3,R4, R1
  51. BEQ R1, l4
  52. MOVW 0(R4), R8
  53. MOVW 0(R5), R9
  54. MOVW 4(R4), R10
  55. BNE R8,R9, ne
  56. MOVW 4(R5), R11
  57. MOVW 8(R4), R8
  58. BNE R10,R11, ne1
  59. MOVW 8(R5), R9
  60. MOVW 12(R4), R10
  61. BNE R8,R9, ne
  62. MOVW 12(R5), R11
  63. ADDU $16, R4
  64. BNE R10,R11, ne1
  65. BNE R8,R9, ne
  66. ADDU $16, R5
  67. JMP l3
  68. /*
  69. * turn R3 into end pointer1-3
  70. * cmp 4 at a time while theres room
  71. */
  72. l4:
  73. ADDU $-3,R6, R3
  74. l5:
  75. SGTU R3,R4, R1
  76. BEQ R1, out
  77. MOVW 0(R4), R8
  78. MOVW 0(R5), R9
  79. ADDU $4, R4
  80. BNE R8,R9, ne /* only works because big endian */
  81. ADDU $4, R5
  82. JMP l5
  83. /*
  84. * last loop, cmp byte at a time
  85. */
  86. out:
  87. SGTU R6,R4, R1
  88. BEQ R1, ret
  89. MOVBU 0(R4), R8
  90. MOVBU 0(R5), R9
  91. ADDU $1, R4
  92. BNE R8,R9, ne
  93. ADDU $1, R5
  94. JMP out
  95. ne1:
  96. SGTU R10,R11, R1
  97. BNE R1, ret
  98. MOVW $-1,R1
  99. RET
  100. ne:
  101. SGTU R8,R9, R1
  102. BNE R1, ret
  103. MOVW $-1,R1
  104. ret:
  105. RET
  106. END