whack.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. #include "stdinc.h"
  2. #include "whack.h"
  3. typedef struct Huff Huff;
  4. enum
  5. {
  6. MaxFastLen = 9,
  7. BigLenCode = 0x1f4, /* minimum code for large lenth encoding */
  8. BigLenBits = 9,
  9. BigLenBase = 4, /* starting items to encode for big lens */
  10. MinOffBits = 6,
  11. MaxOffBits = MinOffBits + 8,
  12. MaxLen = 2051 /* max. length encodable in 24 bits */
  13. };
  14. enum
  15. {
  16. StatBytes,
  17. StatOutBytes,
  18. StatLits,
  19. StatMatches,
  20. StatLitBits,
  21. StatOffBits,
  22. StatLenBits,
  23. MaxStat
  24. };
  25. struct Huff
  26. {
  27. short bits; /* length of the code */
  28. ulong encode; /* the code */
  29. };
  30. static Huff lentab[MaxFastLen] =
  31. {
  32. {2, 0x2}, /* 10 */
  33. {3, 0x6}, /* 110 */
  34. {5, 0x1c}, /* 11100 */
  35. {5, 0x1d}, /* 11101 */
  36. {6, 0x3c}, /* 111100 */
  37. {7, 0x7a}, /* 1111010 */
  38. {7, 0x7b}, /* 1111011 */
  39. {8, 0xf8}, /* 11111000 */
  40. {8, 0xf9}, /* 11111001 */
  41. };
  42. static int thwmaxcheck;
  43. void
  44. whackinit(Whack *tw, int level)
  45. {
  46. thwmaxcheck = (1 << level);
  47. thwmaxcheck -= thwmaxcheck >> 2;
  48. if(thwmaxcheck < 2)
  49. thwmaxcheck = 2;
  50. else if(thwmaxcheck > 1024)
  51. thwmaxcheck = 1024;
  52. memset(tw, 0, sizeof *tw);
  53. tw->begin = 2 * WhackMaxOff;
  54. }
  55. /*
  56. * find a string in the dictionary
  57. */
  58. static int
  59. whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
  60. {
  61. ushort then, off, last;
  62. int bestoff, bestlen, check;
  63. uchar *s, *t;
  64. s = *ss;
  65. if(esrc < s + MinMatch)
  66. return 0;
  67. if(s + MaxLen < esrc)
  68. esrc = s + MaxLen;
  69. bestoff = 0;
  70. bestlen = 0;
  71. check = thwmaxcheck;
  72. last = 0;
  73. for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
  74. off = now - then;
  75. if(off <= last || off > WhackMaxOff)
  76. break;
  77. /*
  78. * don't need to check for the end because
  79. * 1) s too close check above
  80. */
  81. t = s - off;
  82. if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
  83. if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
  84. t += 3;
  85. for(s += 3; s < esrc; s++){
  86. if(*s != *t)
  87. break;
  88. t++;
  89. }
  90. if(s - *ss > bestlen){
  91. bestlen = s - *ss;
  92. bestoff = off;
  93. if(bestlen > thwmaxcheck)
  94. break;
  95. }
  96. }
  97. }
  98. s = *ss;
  99. last = off;
  100. }
  101. *ss += bestlen;
  102. return bestoff;
  103. }
  104. /*
  105. * knuth vol. 3 multiplicative hashing
  106. * each byte x chosen according to rules
  107. * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
  108. * with reasonable spread between the bytes & their complements
  109. *
  110. * the 3 byte value appears to be as almost good as the 4 byte value,
  111. * and might be faster on some machines
  112. */
  113. /*
  114. #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
  115. */
  116. #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
  117. /*
  118. * lz77 compression with single lookup in a hash table for each block
  119. */
  120. int
  121. whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
  122. {
  123. uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
  124. ulong cont, code, wbits;
  125. ushort now;
  126. int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
  127. if(n < MinMatch)
  128. return -1;
  129. wdst = dst;
  130. wdmax = dst + n;
  131. now = w->begin;
  132. s = src;
  133. w->data = s;
  134. cont = (s[0] << 16) | (s[1] << 8) | s[2];
  135. esrc = s + n;
  136. half = s + (n >> 1);
  137. wnbits = 0;
  138. wbits = 0;
  139. lits = 0;
  140. matches = 0;
  141. offbits = 0;
  142. lenbits = 0;
  143. lithist = ~0;
  144. while(s < esrc){
  145. h = hashit(cont);
  146. sss = s;
  147. toff = whackmatch(w, &sss, esrc, h, now);
  148. ss = sss;
  149. len = ss - s;
  150. for(; wnbits >= 8; wnbits -= 8){
  151. if(wdst >= wdmax){
  152. w->begin = now;
  153. return -1;
  154. }
  155. *wdst++ = wbits >> (wnbits - 8);
  156. }
  157. if(len < MinMatch){
  158. toff = *s;
  159. lithist = (lithist << 1) | toff < 32 | toff > 127;
  160. if(lithist & 0x1e){
  161. wbits = (wbits << 9) | toff;
  162. wnbits += 9;
  163. }else if(lithist & 1){
  164. toff = (toff + 64) & 0xff;
  165. if(toff < 96){
  166. wbits = (wbits << 10) | toff;
  167. wnbits += 10;
  168. }else{
  169. wbits = (wbits << 11) | toff;
  170. wnbits += 11;
  171. }
  172. }else{
  173. wbits = (wbits << 8) | toff;
  174. wnbits += 8;
  175. }
  176. lits++;
  177. /*
  178. * speed hack
  179. * check for compression progress, bail if none achieved
  180. */
  181. if(s > half){
  182. if(4 * (s - src) < 5 * lits){
  183. w->begin = now;
  184. return -1;
  185. }
  186. half = esrc;
  187. }
  188. if(s + MinMatch <= esrc){
  189. w->next[now & (WhackMaxOff - 1)] = w->hash[h];
  190. w->hash[h] = now;
  191. if(s + MinMatch < esrc)
  192. cont = (cont << 8) | s[MinMatch];
  193. }
  194. now++;
  195. s++;
  196. continue;
  197. }
  198. matches++;
  199. /*
  200. * length of match
  201. */
  202. if(len > MaxLen){
  203. len = MaxLen;
  204. ss = s + len;
  205. }
  206. len -= MinMatch;
  207. if(len < MaxFastLen){
  208. bits = lentab[len].bits;
  209. wbits = (wbits << bits) | lentab[len].encode;
  210. wnbits += bits;
  211. lenbits += bits;
  212. }else{
  213. code = BigLenCode;
  214. bits = BigLenBits;
  215. use = BigLenBase;
  216. len -= MaxFastLen;
  217. while(len >= use){
  218. len -= use;
  219. code = (code + use) << 1;
  220. use <<= (bits & 1) ^ 1;
  221. bits++;
  222. }
  223. wbits = (wbits << bits) | (code + len);
  224. wnbits += bits;
  225. lenbits += bits;
  226. for(; wnbits >= 8; wnbits -= 8){
  227. if(wdst >= wdmax){
  228. w->begin = now;
  229. return -1;
  230. }
  231. *wdst++ = wbits >> (wnbits - 8);
  232. }
  233. }
  234. /*
  235. * offset in history
  236. */
  237. toff--;
  238. for(bits = MinOffBits; toff >= (1 << bits); bits++)
  239. ;
  240. if(bits < MaxOffBits-1){
  241. wbits = (wbits << 3) | (bits - MinOffBits);
  242. if(bits != MinOffBits)
  243. bits--;
  244. wnbits += bits + 3;
  245. offbits += bits + 3;
  246. }else{
  247. wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
  248. bits--;
  249. wnbits += bits + 4;
  250. offbits += bits + 4;
  251. }
  252. wbits = (wbits << bits) | toff & ((1 << bits) - 1);
  253. for(; s != ss; s++){
  254. if(s + MinMatch <= esrc){
  255. h = hashit(cont);
  256. w->next[now & (WhackMaxOff - 1)] = w->hash[h];
  257. w->hash[h] = now;
  258. if(s + MinMatch < esrc)
  259. cont = (cont << 8) | s[MinMatch];
  260. }
  261. now++;
  262. }
  263. }
  264. w->begin = now;
  265. stats[StatBytes] += esrc - src;
  266. stats[StatLits] += lits;
  267. stats[StatMatches] += matches;
  268. stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
  269. stats[StatOffBits] += offbits;
  270. stats[StatLenBits] += lenbits;
  271. if(wnbits & 7){
  272. wbits <<= 8 - (wnbits & 7);
  273. wnbits += 8 - (wnbits & 7);
  274. }
  275. for(; wnbits >= 8; wnbits -= 8){
  276. if(wdst >= wdmax)
  277. return -1;
  278. *wdst++ = wbits >> (wnbits - 8);
  279. }
  280. stats[StatOutBytes] += wdst - dst;
  281. return wdst - dst;
  282. }
  283. int
  284. whackblock(uchar *dst, uchar *src, int ssize)
  285. {
  286. Whack w;
  287. ulong stats[MaxStat];
  288. int r;
  289. whackinit(&w, 6);
  290. r = whack(&w, dst, src, ssize, stats);
  291. return r;
  292. }