unsac.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. #include <u.h>
  2. #include <libc.h>
  3. #include "sac.h"
  4. typedef struct Huff Huff;
  5. typedef struct Mtf Mtf;
  6. typedef struct Decode Decode;
  7. enum
  8. {
  9. ZBase = 2, /* base of code to encode 0 runs */
  10. LitBase = ZBase-1, /* base of literal values */
  11. MaxLit = 256,
  12. MaxLeaf = MaxLit+LitBase,
  13. MaxHuffBits = 16, /* max bits in a huffman code */
  14. MaxFlatbits = 5, /* max bits decoded in flat table */
  15. CombLog = 4,
  16. CombSpace = 1 << CombLog, /* mtf speedup indices spacing */
  17. CombMask = CombSpace - 1,
  18. };
  19. struct Mtf
  20. {
  21. int maxcomb; /* index of last valid comb */
  22. uchar prev[MaxLit];
  23. uchar next[MaxLit];
  24. uchar comb[MaxLit / CombSpace + 1];
  25. };
  26. struct Huff
  27. {
  28. int maxbits;
  29. int flatbits;
  30. ulong flat[1<<MaxFlatbits];
  31. ulong maxcode[MaxHuffBits];
  32. ulong last[MaxHuffBits];
  33. ulong decode[MaxLeaf];
  34. };
  35. struct Decode{
  36. Huff tab;
  37. Mtf mtf;
  38. int nbits;
  39. ulong bits;
  40. int nzero;
  41. int base;
  42. ulong maxblocksym;
  43. jmp_buf errjmp;
  44. uchar *src; /* input buffer */
  45. uchar *smax; /* limit */
  46. };
  47. static void fatal(Decode *dec, char*);
  48. static int hdec(Decode*);
  49. static void recvtab(Decode*, Huff*, int, ushort*);
  50. static ulong bitget(Decode*, int);
  51. static int mtf(uchar*, int);
  52. #define FORWARD 0
  53. static void
  54. mtflistinit(Mtf *m, uchar *front, int n)
  55. {
  56. int last, me, f, i, comb;
  57. if(n == 0)
  58. return;
  59. /*
  60. * add all entries to free list
  61. */
  62. last = MaxLit - 1;
  63. for(i = 0; i < MaxLit; i++){
  64. m->prev[i] = last;
  65. m->next[i] = i + 1;
  66. last = i;
  67. }
  68. m->next[last] = 0;
  69. f = 0;
  70. /*
  71. * pull valid entries off free list and enter into mtf list
  72. */
  73. comb = 0;
  74. last = front[0];
  75. for(i = 0; i < n; i++){
  76. me = front[i];
  77. f = m->next[me];
  78. m->prev[f] = m->prev[me];
  79. m->next[m->prev[f]] = f;
  80. m->next[last] = me;
  81. m->prev[me] = last;
  82. last = me;
  83. if((i & CombMask) == 0)
  84. m->comb[comb++] = me;
  85. }
  86. /*
  87. * pad out the list with dummies to the next comb,
  88. * using free entries
  89. */
  90. for(; i & CombMask; i++){
  91. me = f;
  92. f = m->next[me];
  93. m->prev[f] = m->prev[me];
  94. m->next[m->prev[f]] = f;
  95. m->next[last] = me;
  96. m->prev[me] = last;
  97. last = me;
  98. }
  99. me = front[0];
  100. m->next[last] = me;
  101. m->prev[me] = last;
  102. m->comb[comb] = me;
  103. m->maxcomb = comb;
  104. }
  105. static int
  106. mtflist(Mtf *m, int pos)
  107. {
  108. uchar *next, *prev, *mycomb;
  109. int c, c0, pc, nc, off;
  110. if(pos == 0)
  111. return m->comb[0];
  112. next = m->next;
  113. prev = m->prev;
  114. mycomb = &m->comb[pos >> CombLog];
  115. off = pos & CombMask;
  116. if(off >= CombSpace / 2){
  117. c = mycomb[1];
  118. for(; off < CombSpace; off++)
  119. c = prev[c];
  120. }else{
  121. c = *mycomb;
  122. for(; off; off--)
  123. c = next[c];
  124. }
  125. nc = next[c];
  126. pc = prev[c];
  127. prev[nc] = pc;
  128. next[pc] = nc;
  129. for(; mycomb > m->comb; mycomb--)
  130. *mycomb = prev[*mycomb];
  131. c0 = *mycomb;
  132. *mycomb = c;
  133. mycomb[m->maxcomb] = c;
  134. next[c] = c0;
  135. pc = prev[c0];
  136. prev[c] = pc;
  137. prev[c0] = c;
  138. next[pc] = c;
  139. return c;
  140. }
  141. static void
  142. hdecblock(Decode *dec, ulong n, ulong I, uchar *buf, ulong *sums, ulong *prev)
  143. {
  144. ulong i, nn, sum;
  145. int m, z, zz, c;
  146. nn = I;
  147. n--;
  148. i = 0;
  149. again:
  150. for(; i < nn; i++){
  151. while((m = hdec(dec)) == 0 && i + dec->nzero < n)
  152. ;
  153. if(z = dec->nzero){
  154. dec->nzero = 0;
  155. c = dec->mtf.comb[0];
  156. sum = sums[c];
  157. sums[c] = sum + z;
  158. z += i;
  159. zz = z;
  160. if(i < I && z > I){
  161. zz = I;
  162. z++;
  163. }
  164. zagain:
  165. for(; i < zz; i++){
  166. buf[i] = c;
  167. prev[i] = sum++;
  168. }
  169. if(i != z){
  170. zz = z;
  171. nn = ++n;
  172. i++;
  173. goto zagain;
  174. }
  175. if(i == nn){
  176. if(i == n)
  177. return;
  178. nn = ++n;
  179. i++;
  180. }
  181. }
  182. c = mtflist(&dec->mtf, m);
  183. buf[i] = c;
  184. sum = sums[c];
  185. prev[i] = sum++;
  186. sums[c] = sum;
  187. }
  188. if(i == n)
  189. return;
  190. nn = ++n;
  191. i++;
  192. goto again;
  193. }
  194. int
  195. unsac(uchar *dst, uchar *src, int n, int nsrc)
  196. {
  197. Decode *dec;
  198. uchar *buf, *front;
  199. ulong *prev, *sums;
  200. ulong sum, i, I;
  201. int m, j, c;
  202. dec = malloc(sizeof *dec);
  203. buf = malloc(n+2);
  204. prev = malloc((n+2) * sizeof *prev);
  205. front = malloc(MaxLit * sizeof *front);
  206. sums = malloc(MaxLit * sizeof *sums);
  207. if(dec == nil || buf == nil || prev == nil || front == nil || sums == nil || setjmp(dec->errjmp)){
  208. free(dec);
  209. free(buf);
  210. free(prev);
  211. free(front);
  212. free(sums);
  213. return -1;
  214. }
  215. dec->src = src;
  216. dec->smax = src + nsrc;
  217. dec->nbits = 0;
  218. dec->bits = 0;
  219. dec->nzero = 0;
  220. for(i = 0; i < MaxLit; i++)
  221. front[i] = i;
  222. n++;
  223. I = bitget(dec, 16);
  224. if(I >= n)
  225. fatal(dec, "corrupted input");
  226. /*
  227. * decode the character usage map
  228. */
  229. for(i = 0; i < MaxLit; i++)
  230. sums[i] = 0;
  231. c = bitget(dec, 1);
  232. for(i = 0; i < MaxLit; ){
  233. m = bitget(dec, 8) + 1;
  234. while(m--){
  235. if(i >= MaxLit)
  236. fatal(dec, "corrupted char map");
  237. front[i++] = c;
  238. }
  239. c = c ^ 1;
  240. }
  241. /*
  242. * initialize mtf state
  243. */
  244. c = 0;
  245. for(i = 0; i < MaxLit; i++)
  246. if(front[i])
  247. front[c++] = i;
  248. mtflistinit(&dec->mtf, front, c);
  249. dec->maxblocksym = c + LitBase;
  250. /*
  251. * huffman decoding, move to front decoding,
  252. * along with character counting
  253. */
  254. dec->base = 1;
  255. recvtab(dec, &dec->tab, MaxLeaf, nil);
  256. hdecblock(dec, n, I, buf, sums, prev);
  257. sum = 1;
  258. for(i = 0; i < MaxLit; i++){
  259. c = sums[i];
  260. sums[i] = sum;
  261. sum += c;
  262. }
  263. i = 0;
  264. for(j = n - 2; j >= 0; j--){
  265. if(i > n || i < 0 || i == I)
  266. fatal(dec, "corrupted data");
  267. c = buf[i];
  268. dst[j] = c;
  269. i = prev[i] + sums[c];
  270. }
  271. free(dec);
  272. free(buf);
  273. free(prev);
  274. free(front);
  275. free(sums);
  276. return n;
  277. }
  278. static ulong
  279. bitget(Decode *dec, int nb)
  280. {
  281. int c;
  282. while(dec->nbits < nb){
  283. if(dec->src >= dec->smax)
  284. fatal(dec, "premature eof 1");
  285. c = *dec->src++;
  286. dec->bits <<= 8;
  287. dec->bits |= c;
  288. dec->nbits += 8;
  289. }
  290. dec->nbits -= nb;
  291. return (dec->bits >> dec->nbits) & ((1 << nb) - 1);
  292. }
  293. static void
  294. fillbits(Decode *dec)
  295. {
  296. int c;
  297. while(dec->nbits < 24){
  298. if(dec->src >= dec->smax)
  299. fatal(dec, "premature eof 2");
  300. c = *dec->src++;
  301. dec->bits <<= 8;
  302. dec->bits |= c;
  303. dec->nbits += 8;
  304. }
  305. }
  306. /*
  307. * decode one symbol
  308. */
  309. static int
  310. hdecsym(Decode *dec, Huff *h, int b)
  311. {
  312. long c;
  313. ulong bits;
  314. int nbits;
  315. bits = dec->bits;
  316. nbits = dec->nbits;
  317. for(; (c = bits >> (nbits - b)) > h->maxcode[b]; b++)
  318. ;
  319. if(b > h->maxbits)
  320. fatal(dec, "too many bits consumed");
  321. dec->nbits = nbits - b;
  322. return h->decode[h->last[b] - c];
  323. }
  324. static int
  325. hdec(Decode *dec)
  326. {
  327. ulong c;
  328. int nbits, nb;
  329. if(dec->nbits < dec->tab.maxbits)
  330. fillbits(dec);
  331. nbits = dec->nbits;
  332. dec->bits &= (1 << nbits) - 1;
  333. c = dec->tab.flat[dec->bits >> (nbits - dec->tab.flatbits)];
  334. nb = c & 0xff;
  335. c >>= 8;
  336. if(nb == 0xff)
  337. c = hdecsym(dec, &dec->tab, c);
  338. else
  339. dec->nbits = nbits - nb;
  340. /*
  341. * reverse funny run-length coding
  342. */
  343. if(c < ZBase){
  344. dec->nzero += dec->base << c;
  345. dec->base <<= 1;
  346. return 0;
  347. }
  348. dec->base = 1;
  349. c -= LitBase;
  350. return c;
  351. }
  352. static void
  353. hufftab(Decode *dec, Huff *h, char *hb, ulong *bitcount, int maxleaf, int maxbits, int flatbits)
  354. {
  355. ulong c, mincode, code, nc[MaxHuffBits];
  356. int i, b, ec;
  357. h->maxbits = maxbits;
  358. if(maxbits < 0)
  359. return;
  360. code = 0;
  361. c = 0;
  362. for(b = 0; b <= maxbits; b++){
  363. h->last[b] = c;
  364. c += bitcount[b];
  365. mincode = code << 1;
  366. nc[b] = mincode;
  367. code = mincode + bitcount[b];
  368. if(code > (1 << b))
  369. fatal(dec, "corrupted huffman table");
  370. h->maxcode[b] = code - 1;
  371. h->last[b] += code - 1;
  372. }
  373. if(code != (1 << maxbits))
  374. fatal(dec, "huffman table not full");
  375. if(flatbits > maxbits)
  376. flatbits = maxbits;
  377. h->flatbits = flatbits;
  378. b = 1 << flatbits;
  379. for(i = 0; i < b; i++)
  380. h->flat[i] = ~0;
  381. /*
  382. * initialize the flat table to include the minimum possible
  383. * bit length for each code prefix
  384. */
  385. for(b = maxbits; b > flatbits; b--){
  386. code = h->maxcode[b];
  387. if(code == -1)
  388. break;
  389. mincode = code + 1 - bitcount[b];
  390. mincode >>= b - flatbits;
  391. code >>= b - flatbits;
  392. for(; mincode <= code; mincode++)
  393. h->flat[mincode] = (b << 8) | 0xff;
  394. }
  395. for(i = 0; i < maxleaf; i++){
  396. b = hb[i];
  397. if(b == -1)
  398. continue;
  399. c = nc[b]++;
  400. if(b <= flatbits){
  401. code = (i << 8) | b;
  402. ec = (c + 1) << (flatbits - b);
  403. if(ec > (1<<flatbits))
  404. fatal(dec, "flat code too big");
  405. for(c <<= (flatbits - b); c < ec; c++)
  406. h->flat[c] = code;
  407. }else{
  408. c = h->last[b] - c;
  409. if(c >= maxleaf)
  410. fatal(dec, "corrupted huffman table");
  411. h->decode[c] = i;
  412. }
  413. }
  414. }
  415. static void
  416. elimBit(int b, char *tmtf, int maxbits)
  417. {
  418. int bb;
  419. for(bb = 0; bb < maxbits; bb++)
  420. if(tmtf[bb] == b)
  421. break;
  422. while(++bb <= maxbits)
  423. tmtf[bb - 1] = tmtf[bb];
  424. }
  425. static int
  426. elimBits(int b, ulong *bused, char *tmtf, int maxbits)
  427. {
  428. int bb, elim;
  429. if(b < 0)
  430. return 0;
  431. elim = 0;
  432. /*
  433. * increase bits counts for all descendants
  434. */
  435. for(bb = b + 1; bb < maxbits; bb++){
  436. bused[bb] += 1 << (bb - b);
  437. if(bused[bb] == (1 << bb)){
  438. elim++;
  439. elimBit(bb, tmtf, maxbits);
  440. }
  441. }
  442. /*
  443. * steal bits from parent & check for fullness
  444. */
  445. for(; b >= 0; b--){
  446. bused[b]++;
  447. if(bused[b] == (1 << b)){
  448. elim++;
  449. elimBit(b, tmtf, maxbits);
  450. }
  451. if((bused[b] & 1) == 0)
  452. break;
  453. }
  454. return elim;
  455. }
  456. static void
  457. recvtab(Decode *dec, Huff *tab, int maxleaf, ushort *map)
  458. {
  459. ulong bitcount[MaxHuffBits+1], bused[MaxHuffBits+1];
  460. char tmtf[MaxHuffBits+1], *hb;
  461. int i, b, ttb, m, maxbits, max, elim;
  462. hb = malloc(MaxLeaf * sizeof *hb);
  463. if(hb == nil)
  464. fatal(dec, "out of memory");
  465. /*
  466. * read the tables for the tables
  467. */
  468. max = 8;
  469. for(i = 0; i <= MaxHuffBits; i++){
  470. bitcount[i] = 0;
  471. tmtf[i] = i;
  472. bused[i] = 0;
  473. }
  474. tmtf[0] = -1;
  475. tmtf[max] = 0;
  476. elim = 0;
  477. maxbits = -1;
  478. for(i = 0; i <= MaxHuffBits && elim != max; i++){
  479. ttb = 4;
  480. while(max - elim < (1 << (ttb-1)))
  481. ttb--;
  482. b = bitget(dec, ttb);
  483. if(b > max - elim)
  484. fatal(dec, "corrupted huffman table table");
  485. b = tmtf[b];
  486. hb[i] = b;
  487. bitcount[b]++;
  488. if(b > maxbits)
  489. maxbits = b;
  490. elim += elimBits(b, bused, tmtf, max);
  491. }
  492. if(elim != max)
  493. fatal(dec, "incomplete huffman table table");
  494. hufftab(dec, tab, hb, bitcount, i, maxbits, MaxFlatbits);
  495. for(i = 0; i <= MaxHuffBits; i++){
  496. tmtf[i] = i;
  497. bitcount[i] = 0;
  498. bused[i] = 0;
  499. }
  500. tmtf[0] = -1;
  501. tmtf[MaxHuffBits] = 0;
  502. elim = 0;
  503. maxbits = -1;
  504. for(i = 0; i < maxleaf && elim != MaxHuffBits; i++){
  505. if(dec->nbits <= tab->maxbits)
  506. fillbits(dec);
  507. dec->bits &= (1 << dec->nbits) - 1;
  508. m = tab->flat[dec->bits >> (dec->nbits - tab->flatbits)];
  509. b = m & 0xff;
  510. m >>= 8;
  511. if(b == 0xff)
  512. m = hdecsym(dec, tab, m);
  513. else
  514. dec->nbits -= b;
  515. b = tmtf[m];
  516. for(; m > 0; m--)
  517. tmtf[m] = tmtf[m-1];
  518. tmtf[0] = b;
  519. if(b > MaxHuffBits)
  520. fatal(dec, "bit length too big");
  521. m = i;
  522. if(map != nil)
  523. m = map[m];
  524. hb[m] = b;
  525. bitcount[b]++;
  526. if(b > maxbits)
  527. maxbits = b;
  528. elim += elimBits(b, bused, tmtf, MaxHuffBits);
  529. }
  530. if(elim != MaxHuffBits && elim != 0)
  531. fatal(dec, "incomplete huffman table");
  532. if(map != nil)
  533. for(; i < maxleaf; i++)
  534. hb[map[i]] = -1;
  535. hufftab(dec, tab, hb, bitcount, i, maxbits, MaxFlatbits);
  536. free(hb);
  537. }
  538. static void
  539. fatal(Decode *dec, char *msg)
  540. {
  541. print("%s: %s\n", argv0, msg);
  542. longjmp(dec->errjmp, 1);
  543. }