inflate.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <flate.h>
  4. enum {
  5. HistorySize= 32*1024,
  6. BufSize= 4*1024,
  7. MaxHuffBits= 17, /* maximum bits in a encoded code */
  8. Nlitlen= 288, /* number of litlen codes */
  9. Noff= 32, /* number of offset codes */
  10. Nclen= 19, /* number of codelen codes */
  11. LenShift= 10, /* code = len<<LenShift|code */
  12. LitlenBits= 7, /* number of bits in litlen decode table */
  13. OffBits= 6, /* number of bits in offset decode table */
  14. ClenBits= 6, /* number of bits in code len decode table */
  15. MaxFlatBits= LitlenBits,
  16. MaxLeaf= Nlitlen
  17. };
  18. typedef struct Input Input;
  19. typedef struct History History;
  20. typedef struct Huff Huff;
  21. struct Input
  22. {
  23. int error; /* first error encountered, or FlateOk */
  24. void *wr;
  25. int (*w)(void*, void*, int);
  26. void *getr;
  27. int (*get)(void*);
  28. ulong sreg;
  29. int nbits;
  30. };
  31. struct History
  32. {
  33. uchar his[HistorySize];
  34. uchar *cp; /* current pointer in history */
  35. int full; /* his has been filled up at least once */
  36. };
  37. struct Huff
  38. {
  39. int maxbits; /* max bits for any code */
  40. int minbits; /* min bits to get before looking in flat */
  41. int flatmask; /* bits used in "flat" fast decoding table */
  42. ulong flat[1<<MaxFlatBits];
  43. ulong maxcode[MaxHuffBits];
  44. ulong last[MaxHuffBits];
  45. ulong decode[MaxLeaf];
  46. };
  47. /* litlen code words 257-285 extra bits */
  48. static int litlenextra[Nlitlen-257] =
  49. {
  50. /* 257 */ 0, 0, 0,
  51. /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
  52. /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
  53. /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
  54. };
  55. static int litlenbase[Nlitlen-257];
  56. /* offset code word extra bits */
  57. static int offextra[Noff] =
  58. {
  59. 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
  60. 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
  61. 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
  62. 0, 0,
  63. };
  64. static int offbase[Noff];
  65. /* order code lengths */
  66. static int clenorder[Nclen] =
  67. {
  68. 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
  69. };
  70. /* for static huffman tables */
  71. static Huff litlentab;
  72. static Huff offtab;
  73. static uchar revtab[256];
  74. static int uncblock(Input *in, History*);
  75. static int fixedblock(Input *in, History*);
  76. static int dynamicblock(Input *in, History*);
  77. static int sregfill(Input *in, int n);
  78. static int sregunget(Input *in);
  79. static int decode(Input*, History*, Huff*, Huff*);
  80. static int hufftab(Huff*, char*, int, int);
  81. static int hdecsym(Input *in, Huff *h, int b);
  82. int
  83. inflateinit(void)
  84. {
  85. char *len;
  86. int i, j, base;
  87. /* byte reverse table */
  88. for(i=0; i<256; i++)
  89. for(j=0; j<8; j++)
  90. if(i & (1<<j))
  91. revtab[i] |= 0x80 >> j;
  92. for(i=257,base=3; i<Nlitlen; i++) {
  93. litlenbase[i-257] = base;
  94. base += 1<<litlenextra[i-257];
  95. }
  96. /* strange table entry in spec... */
  97. litlenbase[285-257]--;
  98. for(i=0,base=1; i<Noff; i++) {
  99. offbase[i] = base;
  100. base += 1<<offextra[i];
  101. }
  102. len = malloc(MaxLeaf);
  103. if(len == nil)
  104. return FlateNoMem;
  105. /* static Litlen bit lengths */
  106. for(i=0; i<144; i++)
  107. len[i] = 8;
  108. for(i=144; i<256; i++)
  109. len[i] = 9;
  110. for(i=256; i<280; i++)
  111. len[i] = 7;
  112. for(i=280; i<Nlitlen; i++)
  113. len[i] = 8;
  114. if(!hufftab(&litlentab, len, Nlitlen, MaxFlatBits))
  115. return FlateInternal;
  116. /* static Offset bit lengths */
  117. for(i=0; i<Noff; i++)
  118. len[i] = 5;
  119. if(!hufftab(&offtab, len, Noff, MaxFlatBits))
  120. return FlateInternal;
  121. free(len);
  122. return FlateOk;
  123. }
  124. int
  125. inflate(void *wr, int (*w)(void*, void*, int), void *getr, int (*get)(void*))
  126. {
  127. History *his;
  128. Input in;
  129. int final, type;
  130. his = malloc(sizeof(History));
  131. if(his == nil)
  132. return FlateNoMem;
  133. his->cp = his->his;
  134. his->full = 0;
  135. in.getr = getr;
  136. in.get = get;
  137. in.wr = wr;
  138. in.w = w;
  139. in.nbits = 0;
  140. in.sreg = 0;
  141. in.error = FlateOk;
  142. do {
  143. if(!sregfill(&in, 3))
  144. goto bad;
  145. final = in.sreg & 0x1;
  146. type = (in.sreg>>1) & 0x3;
  147. in.sreg >>= 3;
  148. in.nbits -= 3;
  149. switch(type) {
  150. default:
  151. in.error = FlateCorrupted;
  152. goto bad;
  153. case 0:
  154. /* uncompressed */
  155. if(!uncblock(&in, his))
  156. goto bad;
  157. break;
  158. case 1:
  159. /* fixed huffman */
  160. if(!fixedblock(&in, his))
  161. goto bad;
  162. break;
  163. case 2:
  164. /* dynamic huffman */
  165. if(!dynamicblock(&in, his))
  166. goto bad;
  167. break;
  168. }
  169. } while(!final);
  170. if(his->cp != his->his && (*w)(wr, his->his, his->cp - his->his) != his->cp - his->his) {
  171. in.error = FlateOutputFail;
  172. goto bad;
  173. }
  174. if(!sregunget(&in))
  175. goto bad;
  176. free(his);
  177. if(in.error != FlateOk)
  178. return FlateInternal;
  179. return FlateOk;
  180. bad:
  181. free(his);
  182. if(in.error == FlateOk)
  183. return FlateInternal;
  184. return in.error;
  185. }
  186. static int
  187. uncblock(Input *in, History *his)
  188. {
  189. int len, nlen, c;
  190. uchar *hs, *hp, *he;
  191. if(!sregunget(in))
  192. return 0;
  193. len = (*in->get)(in->getr);
  194. len |= (*in->get)(in->getr)<<8;
  195. nlen = (*in->get)(in->getr);
  196. nlen |= (*in->get)(in->getr)<<8;
  197. if(len != (~nlen&0xffff)) {
  198. in->error = FlateCorrupted;
  199. return 0;
  200. }
  201. hp = his->cp;
  202. hs = his->his;
  203. he = hs + HistorySize;
  204. while(len > 0) {
  205. c = (*in->get)(in->getr);
  206. if(c < 0)
  207. return 0;
  208. *hp++ = c;
  209. if(hp == he) {
  210. his->full = 1;
  211. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  212. in->error = FlateOutputFail;
  213. return 0;
  214. }
  215. hp = hs;
  216. }
  217. len--;
  218. }
  219. his->cp = hp;
  220. return 1;
  221. }
  222. static int
  223. fixedblock(Input *in, History *his)
  224. {
  225. return decode(in, his, &litlentab, &offtab);
  226. }
  227. static int
  228. dynamicblock(Input *in, History *his)
  229. {
  230. Huff *lentab, *offtab;
  231. char *len;
  232. int i, j, n, c, nlit, ndist, nclen, res, nb;
  233. if(!sregfill(in, 14))
  234. return 0;
  235. nlit = (in->sreg&0x1f) + 257;
  236. ndist = ((in->sreg>>5) & 0x1f) + 1;
  237. nclen = ((in->sreg>>10) & 0xf) + 4;
  238. in->sreg >>= 14;
  239. in->nbits -= 14;
  240. if(nlit > Nlitlen || ndist > Noff || nlit < 257) {
  241. in->error = FlateCorrupted;
  242. return 0;
  243. }
  244. /* huff table header */
  245. len = malloc(Nlitlen+Noff);
  246. lentab = malloc(sizeof(Huff));
  247. offtab = malloc(sizeof(Huff));
  248. if(len == nil || lentab == nil || offtab == nil){
  249. in->error = FlateNoMem;
  250. goto bad;
  251. }
  252. for(i=0; i < Nclen; i++)
  253. len[i] = 0;
  254. for(i=0; i<nclen; i++) {
  255. if(!sregfill(in, 3))
  256. goto bad;
  257. len[clenorder[i]] = in->sreg & 0x7;
  258. in->sreg >>= 3;
  259. in->nbits -= 3;
  260. }
  261. if(!hufftab(lentab, len, Nclen, ClenBits)){
  262. in->error = FlateCorrupted;
  263. goto bad;
  264. }
  265. n = nlit+ndist;
  266. for(i=0; i<n;) {
  267. nb = lentab->minbits;
  268. for(;;){
  269. if(in->nbits<nb && !sregfill(in, nb))
  270. goto bad;
  271. c = lentab->flat[in->sreg & lentab->flatmask];
  272. nb = c & 0xff;
  273. if(nb > in->nbits){
  274. if(nb != 0xff)
  275. continue;
  276. c = hdecsym(in, lentab, c);
  277. if(c < 0)
  278. goto bad;
  279. }else{
  280. c >>= 8;
  281. in->sreg >>= nb;
  282. in->nbits -= nb;
  283. }
  284. break;
  285. }
  286. if(c < 16) {
  287. j = 1;
  288. } else if(c == 16) {
  289. if(in->nbits<2 && !sregfill(in, 2))
  290. goto bad;
  291. j = (in->sreg&0x3)+3;
  292. in->sreg >>= 2;
  293. in->nbits -= 2;
  294. if(i == 0) {
  295. in->error = FlateCorrupted;
  296. goto bad;
  297. }
  298. c = len[i-1];
  299. } else if(c == 17) {
  300. if(in->nbits<3 && !sregfill(in, 3))
  301. goto bad;
  302. j = (in->sreg&0x7)+3;
  303. in->sreg >>= 3;
  304. in->nbits -= 3;
  305. c = 0;
  306. } else if(c == 18) {
  307. if(in->nbits<7 && !sregfill(in, 7))
  308. goto bad;
  309. j = (in->sreg&0x7f)+11;
  310. in->sreg >>= 7;
  311. in->nbits -= 7;
  312. c = 0;
  313. } else {
  314. in->error = FlateCorrupted;
  315. goto bad;
  316. }
  317. if(i+j > n) {
  318. in->error = FlateCorrupted;
  319. goto bad;
  320. }
  321. while(j) {
  322. len[i] = c;
  323. i++;
  324. j--;
  325. }
  326. }
  327. if(!hufftab(lentab, len, nlit, LitlenBits)
  328. || !hufftab(offtab, &len[nlit], ndist, OffBits)){
  329. in->error = FlateCorrupted;
  330. goto bad;
  331. }
  332. res = decode(in, his, lentab, offtab);
  333. free(len);
  334. free(lentab);
  335. free(offtab);
  336. return res;
  337. bad:
  338. free(len);
  339. free(lentab);
  340. free(offtab);
  341. return 0;
  342. }
  343. static int
  344. decode(Input *in, History *his, Huff *litlentab, Huff *offtab)
  345. {
  346. int len, off;
  347. uchar *hs, *hp, *hq, *he;
  348. int c;
  349. int nb;
  350. hs = his->his;
  351. he = hs + HistorySize;
  352. hp = his->cp;
  353. for(;;) {
  354. nb = litlentab->minbits;
  355. for(;;){
  356. if(in->nbits<nb && !sregfill(in, nb))
  357. return 0;
  358. c = litlentab->flat[in->sreg & litlentab->flatmask];
  359. nb = c & 0xff;
  360. if(nb > in->nbits){
  361. if(nb != 0xff)
  362. continue;
  363. c = hdecsym(in, litlentab, c);
  364. if(c < 0)
  365. return 0;
  366. }else{
  367. c >>= 8;
  368. in->sreg >>= nb;
  369. in->nbits -= nb;
  370. }
  371. break;
  372. }
  373. if(c < 256) {
  374. /* literal */
  375. *hp++ = c;
  376. if(hp == he) {
  377. his->full = 1;
  378. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  379. in->error = FlateOutputFail;
  380. return 0;
  381. }
  382. hp = hs;
  383. }
  384. continue;
  385. }
  386. if(c == 256)
  387. break;
  388. if(c > 285) {
  389. in->error = FlateCorrupted;
  390. return 0;
  391. }
  392. c -= 257;
  393. nb = litlenextra[c];
  394. if(in->nbits < nb && !sregfill(in, nb))
  395. return 0;
  396. len = litlenbase[c] + (in->sreg & ((1<<nb)-1));
  397. in->sreg >>= nb;
  398. in->nbits -= nb;
  399. /* get offset */
  400. nb = offtab->minbits;
  401. for(;;){
  402. if(in->nbits<nb && !sregfill(in, nb))
  403. return 0;
  404. c = offtab->flat[in->sreg & offtab->flatmask];
  405. nb = c & 0xff;
  406. if(nb > in->nbits){
  407. if(nb != 0xff)
  408. continue;
  409. c = hdecsym(in, offtab, c);
  410. if(c < 0)
  411. return 0;
  412. }else{
  413. c >>= 8;
  414. in->sreg >>= nb;
  415. in->nbits -= nb;
  416. }
  417. break;
  418. }
  419. if(c > 29) {
  420. in->error = FlateCorrupted;
  421. return 0;
  422. }
  423. nb = offextra[c];
  424. if(in->nbits < nb && !sregfill(in, nb))
  425. return 0;
  426. off = offbase[c] + (in->sreg & ((1<<nb)-1));
  427. in->sreg >>= nb;
  428. in->nbits -= nb;
  429. hq = hp - off;
  430. if(hq < hs) {
  431. if(!his->full) {
  432. in->error = FlateCorrupted;
  433. return 0;
  434. }
  435. hq += HistorySize;
  436. }
  437. /* slow but correct */
  438. while(len) {
  439. *hp = *hq;
  440. hq++;
  441. hp++;
  442. if(hq >= he)
  443. hq = hs;
  444. if(hp == he) {
  445. his->full = 1;
  446. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  447. in->error = FlateOutputFail;
  448. return 0;
  449. }
  450. hp = hs;
  451. }
  452. len--;
  453. }
  454. }
  455. his->cp = hp;
  456. return 1;
  457. }
  458. static int
  459. revcode(int c, int b)
  460. {
  461. /* shift encode up so it starts on bit 15 then reverse */
  462. c <<= (16-b);
  463. c = revtab[c>>8] | (revtab[c&0xff]<<8);
  464. return c;
  465. }
  466. /*
  467. * construct the huffman decoding arrays and a fast lookup table.
  468. * the fast lookup is a table indexed by the next flatbits bits,
  469. * which returns the symbol matched and the number of bits consumed,
  470. * or the minimum number of bits needed and 0xff if more than flatbits
  471. * bits are needed.
  472. *
  473. * flatbits can be longer than the smallest huffman code,
  474. * because shorter codes are assigned smaller lexical prefixes.
  475. * this means assuming zeros for the next few bits will give a
  476. * conservative answer, in the sense that it will either give the
  477. * correct answer, or return the minimum number of bits which
  478. * are needed for an answer.
  479. */
  480. static int
  481. hufftab(Huff *h, char *hb, int maxleaf, int flatbits)
  482. {
  483. ulong bitcount[MaxHuffBits];
  484. ulong c, fc, ec, mincode, code, nc[MaxHuffBits];
  485. int i, b, minbits, maxbits;
  486. for(i = 0; i < MaxHuffBits; i++)
  487. bitcount[i] = 0;
  488. maxbits = -1;
  489. minbits = MaxHuffBits + 1;
  490. for(i=0; i < maxleaf; i++){
  491. b = hb[i];
  492. if(b){
  493. bitcount[b]++;
  494. if(b < minbits)
  495. minbits = b;
  496. if(b > maxbits)
  497. maxbits = b;
  498. }
  499. }
  500. h->maxbits = maxbits;
  501. if(maxbits <= 0){
  502. h->maxbits = 0;
  503. h->minbits = 0;
  504. h->flatmask = 0;
  505. return 1;
  506. }
  507. code = 0;
  508. c = 0;
  509. for(b = 0; b <= maxbits; b++){
  510. h->last[b] = c;
  511. c += bitcount[b];
  512. mincode = code << 1;
  513. nc[b] = mincode;
  514. code = mincode + bitcount[b];
  515. if(code > (1 << b))
  516. return 0;
  517. h->maxcode[b] = code - 1;
  518. h->last[b] += code - 1;
  519. }
  520. if(flatbits > maxbits)
  521. flatbits = maxbits;
  522. h->flatmask = (1 << flatbits) - 1;
  523. if(minbits > flatbits)
  524. minbits = flatbits;
  525. h->minbits = minbits;
  526. b = 1 << flatbits;
  527. for(i = 0; i < b; i++)
  528. h->flat[i] = ~0;
  529. /*
  530. * initialize the flat table to include the minimum possible
  531. * bit length for each code prefix
  532. */
  533. for(b = maxbits; b > flatbits; b--){
  534. code = h->maxcode[b];
  535. if(code == -1)
  536. break;
  537. mincode = code + 1 - bitcount[b];
  538. mincode >>= b - flatbits;
  539. code >>= b - flatbits;
  540. for(; mincode <= code; mincode++)
  541. h->flat[revcode(mincode, flatbits)] = (b << 8) | 0xff;
  542. }
  543. for(i = 0; i < maxleaf; i++){
  544. b = hb[i];
  545. if(b <= 0)
  546. continue;
  547. c = nc[b]++;
  548. if(b <= flatbits){
  549. code = (i << 8) | b;
  550. ec = (c + 1) << (flatbits - b);
  551. if(ec > (1<<flatbits))
  552. return 0; /* this is actually an internal error */
  553. for(fc = c << (flatbits - b); fc < ec; fc++)
  554. h->flat[revcode(fc, flatbits)] = code;
  555. }
  556. if(b > minbits){
  557. c = h->last[b] - c;
  558. if(c >= maxleaf)
  559. return 0;
  560. h->decode[c] = i;
  561. }
  562. }
  563. return 1;
  564. }
  565. static int
  566. hdecsym(Input *in, Huff *h, int nb)
  567. {
  568. long c;
  569. if((nb & 0xff) == 0xff)
  570. nb = nb >> 8;
  571. else
  572. nb = nb & 0xff;
  573. for(; nb <= h->maxbits; nb++){
  574. if(in->nbits<nb && !sregfill(in, nb))
  575. return -1;
  576. c = revtab[in->sreg&0xff]<<8;
  577. c |= revtab[(in->sreg>>8)&0xff];
  578. c >>= (16-nb);
  579. if(c <= h->maxcode[nb]){
  580. in->sreg >>= nb;
  581. in->nbits -= nb;
  582. return h->decode[h->last[nb] - c];
  583. }
  584. }
  585. in->error = FlateCorrupted;
  586. return -1;
  587. }
  588. static int
  589. sregfill(Input *in, int n)
  590. {
  591. int c;
  592. while(n > in->nbits) {
  593. c = (*in->get)(in->getr);
  594. if(c < 0){
  595. in->error = FlateInputFail;
  596. return 0;
  597. }
  598. in->sreg |= c<<in->nbits;
  599. in->nbits += 8;
  600. }
  601. return 1;
  602. }
  603. static int
  604. sregunget(Input *in)
  605. {
  606. if(in->nbits >= 8) {
  607. in->error = FlateInternal;
  608. return 0;
  609. }
  610. /* throw other bits on the floor */
  611. in->nbits = 0;
  612. in->sreg = 0;
  613. return 1;
  614. }