memcmp.s 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. TEXT memcmp(SB), $0
  2. MOVW R1, 0(FP)
  3. /*
  4. * performance:
  5. * alligned about 1.0us/call and 17.4mb/sec
  6. * unalligned is about 3.1mb/sec
  7. */
  8. MOVW n+8(FP), R3 /* R3 is count */
  9. MOVW s1+0(FP), R4 /* R4 is pointer1 */
  10. MOVW s2+4(FP), R5 /* R5 is pointer2 */
  11. ADDU R3,R4, R6 /* R6 is end pointer1 */
  12. /*
  13. * if not at least 4 chars,
  14. * dont even mess around.
  15. * 3 chars to guarantee any
  16. * rounding up to a word
  17. * boundary and 4 characters
  18. * to get at least maybe one
  19. * full word cmp.
  20. */
  21. SGT $4,R3, R1
  22. BNE R1, out
  23. /*
  24. * test if both pointers
  25. * are similarly word alligned
  26. */
  27. XOR R4,R5, R1
  28. AND $3, R1
  29. BNE R1, out
  30. /*
  31. * byte at a time to word allign
  32. */
  33. l1:
  34. AND $3,R4, R1
  35. BEQ R1, l2
  36. MOVBU 0(R4), R8
  37. MOVBU 0(R5), R9
  38. ADDU $1, R4
  39. BNE R8,R9, ne
  40. ADDU $1, R5
  41. JMP l1
  42. /*
  43. * turn R3 into end pointer1-15
  44. * cmp 16 at a time while theres room
  45. */
  46. l2:
  47. ADDU $-15,R6, R3
  48. l3:
  49. SGTU R3,R4, R1
  50. BEQ R1, l4
  51. MOVW 0(R4), R8
  52. MOVW 0(R5), R9
  53. MOVW 4(R4), R10
  54. BNE R8,R9, ne
  55. MOVW 4(R5), R11
  56. MOVW 8(R4), R8
  57. BNE R10,R11, ne1
  58. MOVW 8(R5), R9
  59. MOVW 12(R4), R10
  60. BNE R8,R9, ne
  61. MOVW 12(R5), R11
  62. ADDU $16, R4
  63. BNE R10,R11, ne1
  64. BNE R8,R9, ne
  65. ADDU $16, R5
  66. JMP l3
  67. /*
  68. * turn R3 into end pointer1-3
  69. * cmp 4 at a time while theres room
  70. */
  71. l4:
  72. ADDU $-3,R6, R3
  73. l5:
  74. SGTU R3,R4, R1
  75. BEQ R1, out
  76. MOVW 0(R4), R8
  77. MOVW 0(R5), R9
  78. ADDU $4, R4
  79. BNE R8,R9, ne /* only works because big endian */
  80. ADDU $4, R5
  81. JMP l5
  82. /*
  83. * last loop, cmp byte at a time
  84. */
  85. out:
  86. SGTU R6,R4, R1
  87. BEQ R1, ret
  88. MOVBU 0(R4), R8
  89. MOVBU 0(R5), R9
  90. ADDU $1, R4
  91. BNE R8,R9, ne
  92. ADDU $1, R5
  93. JMP out
  94. ne1:
  95. SGTU R10,R11, R1
  96. BNE R1, ret
  97. MOVW $-1,R1
  98. RET
  99. ne:
  100. SGTU R8,R9, R1
  101. BNE R1, ret
  102. MOVW $-1,R1
  103. ret:
  104. RET
  105. END