ptclbsum.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /* $NetBSD: in_cksum.c,v 1.7 1997/09/02 13:18:15 thorpej Exp $ */
  2. /*-
  3. * Copyright (c) 1988, 1992, 1993
  4. * The Regents of the University of California. All rights reserved.
  5. * Copyright (c) 1996
  6. * Matt Thomas <matt@3am-software.com>
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. All advertising materials mentioning features or use of this software
  17. * must display the following acknowledgement:
  18. * This product includes software developed by the University of
  19. * California, Berkeley and its contributors.
  20. * 4. Neither the name of the University nor the names of its contributors
  21. * may be used to endorse or promote products derived from this software
  22. * without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34. * SUCH DAMAGE.
  35. *
  36. * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
  37. */
  38. /*
  39. * Checksum routine for Internet Protocol family headers
  40. * (Portable Alpha version).
  41. *
  42. * This routine is very heavily used in the network
  43. * code and should be modified for each CPU to be as fast as possible.
  44. */
  45. #include <u.h>
  46. #include <libc.h>
  47. #include <ip.h>
  48. #define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
  49. #define REDUCE32 \
  50. { \
  51. q_util.q = sum; \
  52. sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  53. }
  54. #define REDUCE16 \
  55. { \
  56. q_util.q = sum; \
  57. l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  58. sum = l_util.s[0] + l_util.s[1]; \
  59. ADDCARRY(sum); \
  60. }
  61. static const uint32_t in_masks[] = {
  62. /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
  63. 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
  64. 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
  65. 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
  66. 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
  67. };
  68. union l_util {
  69. uint16_t s[2];
  70. uint32_t l;
  71. };
  72. union q_util {
  73. uint16_t s[4];
  74. uint32_t l[2];
  75. uint64_t q;
  76. };
  77. static uint64_t
  78. in_cksumdata(const void *buf, int len)
  79. {
  80. const uint32_t *lw = (const uint32_t *) buf;
  81. uint64_t sum = 0;
  82. uint64_t prefilled;
  83. int offset;
  84. union q_util q_util;
  85. if ((3 & (long) lw) == 0 && len == 20) {
  86. sum = (uint64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
  87. REDUCE32;
  88. return sum;
  89. }
  90. if ((offset = 3 & (long) lw) != 0) {
  91. const uint32_t *masks = in_masks + (offset << 2);
  92. lw = (uint32_t *) (((long) lw) - offset);
  93. sum = *lw++ & masks[len >= 3 ? 3 : len];
  94. len -= 4 - offset;
  95. if (len <= 0) {
  96. REDUCE32;
  97. return sum;
  98. }
  99. }
  100. #if 0
  101. /*
  102. * Force to cache line boundary.
  103. */
  104. offset = 32 - (0x1f & (long) lw);
  105. if (offset < 32 && len > offset) {
  106. len -= offset;
  107. if (4 & offset) {
  108. sum += (uint64_t) lw[0];
  109. lw += 1;
  110. }
  111. if (8 & offset) {
  112. sum += (uint64_t) lw[0] + lw[1];
  113. lw += 2;
  114. }
  115. if (16 & offset) {
  116. sum += (uint64_t) lw[0] + lw[1] + lw[2] + lw[3];
  117. lw += 4;
  118. }
  119. }
  120. #endif
  121. /*
  122. * access prefilling to start load of next cache line.
  123. * then add current cache line
  124. * save result of prefilling for loop iteration.
  125. */
  126. prefilled = lw[0];
  127. while ((len -= 32) >= 4) {
  128. uint64_t prefilling = lw[8];
  129. sum += prefilled + lw[1] + lw[2] + lw[3]
  130. + lw[4] + lw[5] + lw[6] + lw[7];
  131. lw += 8;
  132. prefilled = prefilling;
  133. }
  134. if (len >= 0) {
  135. sum += prefilled + lw[1] + lw[2] + lw[3]
  136. + lw[4] + lw[5] + lw[6] + lw[7];
  137. lw += 8;
  138. } else {
  139. len += 32;
  140. }
  141. while ((len -= 16) >= 0) {
  142. sum += (uint64_t) lw[0] + lw[1] + lw[2] + lw[3];
  143. lw += 4;
  144. }
  145. len += 16;
  146. while ((len -= 4) >= 0) {
  147. sum += (uint64_t) *lw++;
  148. }
  149. len += 4;
  150. if (len > 0)
  151. sum += (uint64_t) (in_masks[len] & *lw);
  152. REDUCE32;
  153. return sum;
  154. }
  155. uint16_t ptclbsum(uint8_t * addr, int len)
  156. {
  157. uint64_t sum = in_cksumdata(addr, len);
  158. union q_util q_util;
  159. union l_util l_util;
  160. REDUCE16;
  161. return ((sum & (uint16_t)0x00ffU) << 8) |
  162. ((sum & (uint16_t)0xff00U) >> 8);
  163. }