whack.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. #include "stdinc.h"
  2. #include "whack.h"
  3. typedef struct Huff Huff;
  4. int compressblocks = 1;
  5. enum
  6. {
  7. MaxFastLen = 9,
  8. BigLenCode = 0x1f4, /* minimum code for large lenth encoding */
  9. BigLenBits = 9,
  10. BigLenBase = 4, /* starting items to encode for big lens */
  11. MinOffBits = 6,
  12. MaxOffBits = MinOffBits + 8,
  13. MaxLen = 2051 /* max. length encodable in 24 bits */
  14. };
  15. enum
  16. {
  17. StatBytes,
  18. StatOutBytes,
  19. StatLits,
  20. StatMatches,
  21. StatLitBits,
  22. StatOffBits,
  23. StatLenBits,
  24. MaxStat
  25. };
  26. struct Huff
  27. {
  28. short bits; /* length of the code */
  29. ulong encode; /* the code */
  30. };
  31. static Huff lentab[MaxFastLen] =
  32. {
  33. {2, 0x2}, /* 10 */
  34. {3, 0x6}, /* 110 */
  35. {5, 0x1c}, /* 11100 */
  36. {5, 0x1d}, /* 11101 */
  37. {6, 0x3c}, /* 111100 */
  38. {7, 0x7a}, /* 1111010 */
  39. {7, 0x7b}, /* 1111011 */
  40. {8, 0xf8}, /* 11111000 */
  41. {8, 0xf9}, /* 11111001 */
  42. };
  43. static int thwmaxcheck;
  44. void
  45. whackinit(Whack *tw, int level)
  46. {
  47. thwmaxcheck = (1 << level);
  48. thwmaxcheck -= thwmaxcheck >> 2;
  49. if(thwmaxcheck < 2)
  50. thwmaxcheck = 2;
  51. else if(thwmaxcheck > 1024)
  52. thwmaxcheck = 1024;
  53. memset(tw, 0, sizeof *tw);
  54. tw->begin = 2 * WhackMaxOff;
  55. }
  56. /*
  57. * find a string in the dictionary
  58. */
  59. static int
  60. whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
  61. {
  62. ushort then, off, last;
  63. int bestoff, bestlen, check;
  64. uchar *s, *t;
  65. s = *ss;
  66. if(esrc < s + MinMatch)
  67. return -1;
  68. if(s + MaxLen < esrc)
  69. esrc = s + MaxLen;
  70. bestoff = 0;
  71. bestlen = 0;
  72. check = thwmaxcheck;
  73. last = 0;
  74. for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
  75. off = now - then;
  76. if(off <= last || off > WhackMaxOff)
  77. break;
  78. /*
  79. * don't need to check for the end because
  80. * 1) s too close check above
  81. */
  82. t = s - off;
  83. if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
  84. if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
  85. t += 3;
  86. for(s += 3; s < esrc; s++){
  87. if(*s != *t)
  88. break;
  89. t++;
  90. }
  91. if(s - *ss > bestlen){
  92. bestlen = s - *ss;
  93. bestoff = off;
  94. if(bestlen > thwmaxcheck)
  95. break;
  96. }
  97. }
  98. }
  99. s = *ss;
  100. last = off;
  101. }
  102. *ss += bestlen;
  103. return bestoff;
  104. }
  105. /*
  106. * knuth vol. 3 multiplicative hashing
  107. * each byte x chosen according to rules
  108. * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
  109. * with reasonable spread between the bytes & their complements
  110. *
  111. * the 3 byte value appears to be as almost good as the 4 byte value,
  112. * and might be faster on some machines
  113. */
  114. /*
  115. #define hashit(c) ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
  116. */
  117. #define hashit(c) (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
  118. /*
  119. * lz77 compression with single lookup in a hash table for each block
  120. */
  121. int
  122. whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
  123. {
  124. uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
  125. ulong cont, code, wbits;
  126. ushort now;
  127. int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
  128. if(!compressblocks || n < MinMatch)
  129. return -1;
  130. wdst = dst;
  131. wdmax = dst + n;
  132. now = w->begin;
  133. s = src;
  134. w->data = s;
  135. cont = (s[0] << 16) | (s[1] << 8) | s[2];
  136. esrc = s + n;
  137. half = s + (n >> 1);
  138. wnbits = 0;
  139. wbits = 0;
  140. lits = 0;
  141. matches = 0;
  142. offbits = 0;
  143. lenbits = 0;
  144. lithist = ~0;
  145. while(s < esrc){
  146. h = hashit(cont);
  147. sss = s;
  148. toff = whackmatch(w, &sss, esrc, h, now);
  149. ss = sss;
  150. len = ss - s;
  151. for(; wnbits >= 8; wnbits -= 8){
  152. if(wdst >= wdmax){
  153. w->begin = now;
  154. return -1;
  155. }
  156. *wdst++ = wbits >> (wnbits - 8);
  157. }
  158. if(len < MinMatch){
  159. toff = *s;
  160. lithist = (lithist << 1) | toff < 32 | toff > 127;
  161. if(lithist & 0x1e){
  162. wbits = (wbits << 9) | toff;
  163. wnbits += 9;
  164. }else if(lithist & 1){
  165. toff = (toff + 64) & 0xff;
  166. if(toff < 96){
  167. wbits = (wbits << 10) | toff;
  168. wnbits += 10;
  169. }else{
  170. wbits = (wbits << 11) | toff;
  171. wnbits += 11;
  172. }
  173. }else{
  174. wbits = (wbits << 8) | toff;
  175. wnbits += 8;
  176. }
  177. lits++;
  178. /*
  179. * speed hack
  180. * check for compression progress, bail if none achieved
  181. */
  182. if(s > half){
  183. if(4 * (s - src) < 5 * lits){
  184. w->begin = now;
  185. return -1;
  186. }
  187. half = esrc;
  188. }
  189. if(s + MinMatch <= esrc){
  190. w->next[now & (WhackMaxOff - 1)] = w->hash[h];
  191. w->hash[h] = now;
  192. if(s + MinMatch < esrc)
  193. cont = (cont << 8) | s[MinMatch];
  194. }
  195. now++;
  196. s++;
  197. continue;
  198. }
  199. matches++;
  200. /*
  201. * length of match
  202. */
  203. if(len > MaxLen){
  204. len = MaxLen;
  205. ss = s + len;
  206. }
  207. len -= MinMatch;
  208. if(len < MaxFastLen){
  209. bits = lentab[len].bits;
  210. wbits = (wbits << bits) | lentab[len].encode;
  211. wnbits += bits;
  212. lenbits += bits;
  213. }else{
  214. code = BigLenCode;
  215. bits = BigLenBits;
  216. use = BigLenBase;
  217. len -= MaxFastLen;
  218. while(len >= use){
  219. len -= use;
  220. code = (code + use) << 1;
  221. use <<= (bits & 1) ^ 1;
  222. bits++;
  223. }
  224. wbits = (wbits << bits) | (code + len);
  225. wnbits += bits;
  226. lenbits += bits;
  227. for(; wnbits >= 8; wnbits -= 8){
  228. if(wdst >= wdmax){
  229. w->begin = now;
  230. return -1;
  231. }
  232. *wdst++ = wbits >> (wnbits - 8);
  233. }
  234. }
  235. /*
  236. * offset in history
  237. */
  238. toff--;
  239. for(bits = MinOffBits; toff >= (1 << bits); bits++)
  240. ;
  241. if(bits < MaxOffBits-1){
  242. wbits = (wbits << 3) | (bits - MinOffBits);
  243. if(bits != MinOffBits)
  244. bits--;
  245. wnbits += bits + 3;
  246. offbits += bits + 3;
  247. }else{
  248. wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
  249. bits--;
  250. wnbits += bits + 4;
  251. offbits += bits + 4;
  252. }
  253. wbits = (wbits << bits) | toff & ((1 << bits) - 1);
  254. for(; s != ss; s++){
  255. if(s + MinMatch <= esrc){
  256. h = hashit(cont);
  257. w->next[now & (WhackMaxOff - 1)] = w->hash[h];
  258. w->hash[h] = now;
  259. if(s + MinMatch < esrc)
  260. cont = (cont << 8) | s[MinMatch];
  261. }
  262. now++;
  263. }
  264. }
  265. w->begin = now;
  266. stats[StatBytes] += esrc - src;
  267. stats[StatLits] += lits;
  268. stats[StatMatches] += matches;
  269. stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
  270. stats[StatOffBits] += offbits;
  271. stats[StatLenBits] += lenbits;
  272. if(wnbits & 7){
  273. wbits <<= 8 - (wnbits & 7);
  274. wnbits += 8 - (wnbits & 7);
  275. }
  276. for(; wnbits >= 8; wnbits -= 8){
  277. if(wdst >= wdmax)
  278. return -1;
  279. *wdst++ = wbits >> (wnbits - 8);
  280. }
  281. stats[StatOutBytes] += wdst - dst;
  282. return wdst - dst;
  283. }
  284. int
  285. whackblock(uchar *dst, uchar *src, int ssize)
  286. {
  287. Whack w;
  288. ulong stats[MaxStat];
  289. int r;
  290. whackinit(&w, 6);
  291. r = whack(&w, dst, src, ssize, stats);
  292. return r;
  293. }