inflate.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <flate.h>
  4. enum {
  5. HistorySize= 32*1024,
  6. BufSize= 4*1024,
  7. MaxHuffBits= 17, /* maximum bits in a encoded code */
  8. Nlitlen= 288, /* number of litlen codes */
  9. Noff= 32, /* number of offset codes */
  10. Nclen= 19, /* number of codelen codes */
  11. LenShift= 10, /* code = len<<LenShift|code */
  12. LitlenBits= 7, /* number of bits in litlen decode table */
  13. OffBits= 6, /* number of bits in offset decode table */
  14. ClenBits= 6, /* number of bits in code len decode table */
  15. MaxFlatBits= LitlenBits,
  16. MaxLeaf= Nlitlen
  17. };
  18. typedef struct Input Input;
  19. typedef struct History History;
  20. typedef struct Huff Huff;
  21. struct Input
  22. {
  23. int error; /* first error encountered, or FlateOk */
  24. void *wr;
  25. int (*w)(void*, void*, int);
  26. void *getr;
  27. int (*get)(void*);
  28. ulong sreg;
  29. int nbits;
  30. };
  31. struct History
  32. {
  33. uchar his[HistorySize];
  34. uchar *cp; /* current pointer in history */
  35. int full; /* his has been filled up at least once */
  36. };
  37. struct Huff
  38. {
  39. int maxbits; /* max bits for any code */
  40. int minbits; /* min bits to get before looking in flat */
  41. int flatmask; /* bits used in "flat" fast decoding table */
  42. ulong flat[1<<MaxFlatBits];
  43. ulong maxcode[MaxHuffBits];
  44. ulong last[MaxHuffBits];
  45. ulong decode[MaxLeaf];
  46. };
  47. /* litlen code words 257-285 extra bits */
  48. static int litlenextra[Nlitlen-257] =
  49. {
  50. /* 257 */ 0, 0, 0,
  51. /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
  52. /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
  53. /* 280 */ 4, 5, 5, 5, 5, 0, 0, 0
  54. };
  55. static int litlenbase[Nlitlen-257];
  56. /* offset code word extra bits */
  57. static int offextra[Noff] =
  58. {
  59. 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
  60. 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
  61. 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
  62. 0, 0,
  63. };
  64. static int offbase[Noff];
  65. /* order code lengths */
  66. static int clenorder[Nclen] =
  67. {
  68. 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
  69. };
  70. /* for static huffman tables */
  71. static Huff litlentab;
  72. static Huff offtab;
  73. static uchar revtab[256];
  74. static int uncblock(Input *in, History*);
  75. static int fixedblock(Input *in, History*);
  76. static int dynamicblock(Input *in, History*);
  77. static int sregfill(Input *in, int n);
  78. static int sregunget(Input *in);
  79. static int decode(Input*, History*, Huff*, Huff*);
  80. static void mismatch(void);
  81. static int hufftab(Huff*, char*, int, int);
  82. static int hdecsym(Input *in, Huff *h, int b);
  83. int
  84. inflateinit(void)
  85. {
  86. char *len;
  87. int i, j, base;
  88. /* byte reverse table */
  89. for(i=0; i<256; i++)
  90. for(j=0; j<8; j++)
  91. if(i & (1<<j))
  92. revtab[i] |= 0x80 >> j;
  93. for(i=257,base=3; i<Nlitlen; i++) {
  94. litlenbase[i-257] = base;
  95. base += 1<<litlenextra[i-257];
  96. }
  97. /* strange table entry in spec... */
  98. litlenbase[285-257]--;
  99. for(i=0,base=1; i<Noff; i++) {
  100. offbase[i] = base;
  101. base += 1<<offextra[i];
  102. }
  103. len = malloc(MaxLeaf);
  104. if(len == nil)
  105. return FlateNoMem;
  106. /* static Litlen bit lengths */
  107. for(i=0; i<144; i++)
  108. len[i] = 8;
  109. for(i=144; i<256; i++)
  110. len[i] = 9;
  111. for(i=256; i<280; i++)
  112. len[i] = 7;
  113. for(i=280; i<Nlitlen; i++)
  114. len[i] = 8;
  115. if(!hufftab(&litlentab, len, Nlitlen, MaxFlatBits))
  116. return FlateInternal;
  117. /* static Offset bit lengths */
  118. for(i=0; i<Noff; i++)
  119. len[i] = 5;
  120. if(!hufftab(&offtab, len, Noff, MaxFlatBits))
  121. return FlateInternal;
  122. free(len);
  123. return FlateOk;
  124. }
  125. int
  126. inflate(void *wr, int (*w)(void*, void*, int), void *getr, int (*get)(void*))
  127. {
  128. History *his;
  129. Input in;
  130. int final, type;
  131. his = malloc(sizeof(History));
  132. if(his == nil)
  133. return FlateNoMem;
  134. his->cp = his->his;
  135. his->full = 0;
  136. in.getr = getr;
  137. in.get = get;
  138. in.wr = wr;
  139. in.w = w;
  140. in.nbits = 0;
  141. in.sreg = 0;
  142. in.error = FlateOk;
  143. do {
  144. if(!sregfill(&in, 3))
  145. goto bad;
  146. final = in.sreg & 0x1;
  147. type = (in.sreg>>1) & 0x3;
  148. in.sreg >>= 3;
  149. in.nbits -= 3;
  150. switch(type) {
  151. default:
  152. in.error = FlateCorrupted;
  153. goto bad;
  154. case 0:
  155. /* uncompressed */
  156. if(!uncblock(&in, his))
  157. goto bad;
  158. break;
  159. case 1:
  160. /* fixed huffman */
  161. if(!fixedblock(&in, his))
  162. goto bad;
  163. break;
  164. case 2:
  165. /* dynamic huffman */
  166. if(!dynamicblock(&in, his))
  167. goto bad;
  168. break;
  169. }
  170. } while(!final);
  171. if(his->cp != his->his && (*w)(wr, his->his, his->cp - his->his) != his->cp - his->his) {
  172. in.error = FlateOutputFail;
  173. goto bad;
  174. }
  175. if(!sregunget(&in))
  176. goto bad;
  177. free(his);
  178. if(in.error != FlateOk)
  179. return FlateInternal;
  180. return FlateOk;
  181. bad:
  182. free(his);
  183. if(in.error == FlateOk)
  184. return FlateInternal;
  185. return in.error;
  186. }
  187. static int
  188. uncblock(Input *in, History *his)
  189. {
  190. int len, nlen, c;
  191. uchar *hs, *hp, *he;
  192. if(!sregunget(in))
  193. return 0;
  194. len = (*in->get)(in->getr);
  195. len |= (*in->get)(in->getr)<<8;
  196. nlen = (*in->get)(in->getr);
  197. nlen |= (*in->get)(in->getr)<<8;
  198. if(len != (~nlen&0xffff)) {
  199. in->error = FlateCorrupted;
  200. return 0;
  201. }
  202. hp = his->cp;
  203. hs = his->his;
  204. he = hs + HistorySize;
  205. while(len > 0) {
  206. c = (*in->get)(in->getr);
  207. if(c < 0)
  208. return 0;
  209. *hp++ = c;
  210. if(hp == he) {
  211. his->full = 1;
  212. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  213. in->error = FlateOutputFail;
  214. return 0;
  215. }
  216. hp = hs;
  217. }
  218. len--;
  219. }
  220. his->cp = hp;
  221. return 1;
  222. }
  223. static int
  224. fixedblock(Input *in, History *his)
  225. {
  226. return decode(in, his, &litlentab, &offtab);
  227. }
  228. static int
  229. dynamicblock(Input *in, History *his)
  230. {
  231. Huff *lentab, *offtab;
  232. char *len;
  233. int i, j, n, c, nlit, ndist, nclen, res, nb;
  234. if(!sregfill(in, 14))
  235. return 0;
  236. nlit = (in->sreg&0x1f) + 257;
  237. ndist = ((in->sreg>>5) & 0x1f) + 1;
  238. nclen = ((in->sreg>>10) & 0xf) + 4;
  239. in->sreg >>= 14;
  240. in->nbits -= 14;
  241. if(nlit > Nlitlen || ndist > Noff || nlit < 257) {
  242. in->error = FlateCorrupted;
  243. return 0;
  244. }
  245. /* huff table header */
  246. len = malloc(Nlitlen+Noff);
  247. lentab = malloc(sizeof(Huff));
  248. offtab = malloc(sizeof(Huff));
  249. if(len == nil || lentab == nil || offtab == nil){
  250. in->error = FlateNoMem;
  251. goto bad;
  252. }
  253. for(i=0; i < Nclen; i++)
  254. len[i] = 0;
  255. for(i=0; i<nclen; i++) {
  256. if(!sregfill(in, 3))
  257. goto bad;
  258. len[clenorder[i]] = in->sreg & 0x7;
  259. in->sreg >>= 3;
  260. in->nbits -= 3;
  261. }
  262. if(!hufftab(lentab, len, Nclen, ClenBits)){
  263. in->error = FlateCorrupted;
  264. goto bad;
  265. }
  266. n = nlit+ndist;
  267. for(i=0; i<n;) {
  268. nb = lentab->minbits;
  269. for(;;){
  270. if(in->nbits<nb && !sregfill(in, nb))
  271. goto bad;
  272. c = lentab->flat[in->sreg & lentab->flatmask];
  273. nb = c & 0xff;
  274. if(nb > in->nbits){
  275. if(nb != 0xff)
  276. continue;
  277. c = hdecsym(in, lentab, c);
  278. if(c < 0)
  279. goto bad;
  280. }else{
  281. c >>= 8;
  282. in->sreg >>= nb;
  283. in->nbits -= nb;
  284. }
  285. break;
  286. }
  287. if(c < 16) {
  288. j = 1;
  289. } else if(c == 16) {
  290. if(in->nbits<2 && !sregfill(in, 2))
  291. goto bad;
  292. j = (in->sreg&0x3)+3;
  293. in->sreg >>= 2;
  294. in->nbits -= 2;
  295. if(i == 0) {
  296. in->error = FlateCorrupted;
  297. goto bad;
  298. }
  299. c = len[i-1];
  300. } else if(c == 17) {
  301. if(in->nbits<3 && !sregfill(in, 3))
  302. goto bad;
  303. j = (in->sreg&0x7)+3;
  304. in->sreg >>= 3;
  305. in->nbits -= 3;
  306. c = 0;
  307. } else if(c == 18) {
  308. if(in->nbits<7 && !sregfill(in, 7))
  309. goto bad;
  310. j = (in->sreg&0x7f)+11;
  311. in->sreg >>= 7;
  312. in->nbits -= 7;
  313. c = 0;
  314. } else {
  315. in->error = FlateCorrupted;
  316. goto bad;
  317. }
  318. if(i+j > n) {
  319. in->error = FlateCorrupted;
  320. goto bad;
  321. }
  322. while(j) {
  323. len[i] = c;
  324. i++;
  325. j--;
  326. }
  327. }
  328. if(!hufftab(lentab, len, nlit, LitlenBits)
  329. || !hufftab(offtab, &len[nlit], ndist, OffBits)){
  330. in->error = FlateCorrupted;
  331. goto bad;
  332. }
  333. res = decode(in, his, lentab, offtab);
  334. free(len);
  335. free(lentab);
  336. free(offtab);
  337. return res;
  338. bad:
  339. free(len);
  340. free(lentab);
  341. free(offtab);
  342. return 0;
  343. }
  344. static int
  345. decode(Input *in, History *his, Huff *litlentab, Huff *offtab)
  346. {
  347. int len, off;
  348. uchar *hs, *hp, *hq, *he;
  349. int c;
  350. int nb;
  351. hs = his->his;
  352. he = hs + HistorySize;
  353. hp = his->cp;
  354. for(;;) {
  355. nb = litlentab->minbits;
  356. for(;;){
  357. if(in->nbits<nb && !sregfill(in, nb))
  358. return 0;
  359. c = litlentab->flat[in->sreg & litlentab->flatmask];
  360. nb = c & 0xff;
  361. if(nb > in->nbits){
  362. if(nb != 0xff)
  363. continue;
  364. c = hdecsym(in, litlentab, c);
  365. if(c < 0)
  366. return 0;
  367. }else{
  368. c >>= 8;
  369. in->sreg >>= nb;
  370. in->nbits -= nb;
  371. }
  372. break;
  373. }
  374. if(c < 256) {
  375. /* literal */
  376. *hp++ = c;
  377. if(hp == he) {
  378. his->full = 1;
  379. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  380. in->error = FlateOutputFail;
  381. return 0;
  382. }
  383. hp = hs;
  384. }
  385. continue;
  386. }
  387. if(c == 256)
  388. break;
  389. if(c > 285) {
  390. in->error = FlateCorrupted;
  391. return 0;
  392. }
  393. c -= 257;
  394. nb = litlenextra[c];
  395. if(in->nbits < nb && !sregfill(in, nb))
  396. return 0;
  397. len = litlenbase[c] + (in->sreg & ((1<<nb)-1));
  398. in->sreg >>= nb;
  399. in->nbits -= nb;
  400. /* get offset */
  401. nb = offtab->minbits;
  402. for(;;){
  403. if(in->nbits<nb && !sregfill(in, nb))
  404. return 0;
  405. c = offtab->flat[in->sreg & offtab->flatmask];
  406. nb = c & 0xff;
  407. if(nb > in->nbits){
  408. if(nb != 0xff)
  409. continue;
  410. c = hdecsym(in, offtab, c);
  411. if(c < 0)
  412. return 0;
  413. }else{
  414. c >>= 8;
  415. in->sreg >>= nb;
  416. in->nbits -= nb;
  417. }
  418. break;
  419. }
  420. if(c > 29) {
  421. in->error = FlateCorrupted;
  422. return 0;
  423. }
  424. nb = offextra[c];
  425. if(in->nbits < nb && !sregfill(in, nb))
  426. return 0;
  427. off = offbase[c] + (in->sreg & ((1<<nb)-1));
  428. in->sreg >>= nb;
  429. in->nbits -= nb;
  430. hq = hp - off;
  431. if(hq < hs) {
  432. if(!his->full) {
  433. in->error = FlateCorrupted;
  434. return 0;
  435. }
  436. hq += HistorySize;
  437. }
  438. /* slow but correct */
  439. while(len) {
  440. *hp = *hq;
  441. hq++;
  442. hp++;
  443. if(hq >= he)
  444. hq = hs;
  445. if(hp == he) {
  446. his->full = 1;
  447. if((*in->w)(in->wr, hs, HistorySize) != HistorySize) {
  448. in->error = FlateOutputFail;
  449. return 0;
  450. }
  451. hp = hs;
  452. }
  453. len--;
  454. }
  455. }
  456. his->cp = hp;
  457. return 1;
  458. }
  459. static int
  460. revcode(int c, int b)
  461. {
  462. /* shift encode up so it starts on bit 15 then reverse */
  463. c <<= (16-b);
  464. c = revtab[c>>8] | (revtab[c&0xff]<<8);
  465. return c;
  466. }
  467. /*
  468. * construct the huffman decoding arrays and a fast lookup table.
  469. * the fast lookup is a table indexed by the next flatbits bits,
  470. * which returns the symbol matched and the number of bits consumed,
  471. * or the minimum number of bits needed and 0xff if more than flatbits
  472. * bits are needed.
  473. *
  474. * flatbits can be longer than the smallest huffman code,
  475. * because shorter codes are assigned smaller lexical prefixes.
  476. * this means assuming zeros for the next few bits will give a
  477. * conservative answer, in the sense that it will either give the
  478. * correct answer, or return the minimum number of bits which
  479. * are needed for an answer.
  480. */
  481. static int
  482. hufftab(Huff *h, char *hb, int maxleaf, int flatbits)
  483. {
  484. ulong bitcount[MaxHuffBits];
  485. ulong c, fc, ec, mincode, code, nc[MaxHuffBits];
  486. int i, b, minbits, maxbits;
  487. for(i = 0; i < MaxHuffBits; i++)
  488. bitcount[i] = 0;
  489. maxbits = -1;
  490. minbits = MaxHuffBits + 1;
  491. for(i=0; i < maxleaf; i++){
  492. b = hb[i];
  493. if(b){
  494. bitcount[b]++;
  495. if(b < minbits)
  496. minbits = b;
  497. if(b > maxbits)
  498. maxbits = b;
  499. }
  500. }
  501. h->maxbits = maxbits;
  502. if(maxbits <= 0){
  503. h->maxbits = 0;
  504. h->minbits = 0;
  505. h->flatmask = 0;
  506. return 1;
  507. }
  508. code = 0;
  509. c = 0;
  510. for(b = 0; b <= maxbits; b++){
  511. h->last[b] = c;
  512. c += bitcount[b];
  513. mincode = code << 1;
  514. nc[b] = mincode;
  515. code = mincode + bitcount[b];
  516. if(code > (1 << b))
  517. return 0;
  518. h->maxcode[b] = code - 1;
  519. h->last[b] += code - 1;
  520. }
  521. if(flatbits > maxbits)
  522. flatbits = maxbits;
  523. h->flatmask = (1 << flatbits) - 1;
  524. if(minbits > flatbits)
  525. minbits = flatbits;
  526. h->minbits = minbits;
  527. b = 1 << flatbits;
  528. for(i = 0; i < b; i++)
  529. h->flat[i] = ~0;
  530. /*
  531. * initialize the flat table to include the minimum possible
  532. * bit length for each code prefix
  533. */
  534. for(b = maxbits; b > flatbits; b--){
  535. code = h->maxcode[b];
  536. if(code == -1)
  537. break;
  538. mincode = code + 1 - bitcount[b];
  539. mincode >>= b - flatbits;
  540. code >>= b - flatbits;
  541. for(; mincode <= code; mincode++)
  542. h->flat[revcode(mincode, flatbits)] = (b << 8) | 0xff;
  543. }
  544. for(i = 0; i < maxleaf; i++){
  545. b = hb[i];
  546. if(b <= 0)
  547. continue;
  548. c = nc[b]++;
  549. if(b <= flatbits){
  550. code = (i << 8) | b;
  551. ec = (c + 1) << (flatbits - b);
  552. if(ec > (1<<flatbits))
  553. return 0; /* this is actually an internal error */
  554. for(fc = c << (flatbits - b); fc < ec; fc++)
  555. h->flat[revcode(fc, flatbits)] = code;
  556. }
  557. if(b > minbits){
  558. c = h->last[b] - c;
  559. if(c >= maxleaf)
  560. return 0;
  561. h->decode[c] = i;
  562. }
  563. }
  564. return 1;
  565. }
  566. static int
  567. hdecsym(Input *in, Huff *h, int nb)
  568. {
  569. long c;
  570. if((nb & 0xff) == 0xff)
  571. nb = nb >> 8;
  572. else
  573. nb = nb & 0xff;
  574. for(; nb <= h->maxbits; nb++){
  575. if(in->nbits<nb && !sregfill(in, nb))
  576. return -1;
  577. c = revtab[in->sreg&0xff]<<8;
  578. c |= revtab[(in->sreg>>8)&0xff];
  579. c >>= (16-nb);
  580. if(c <= h->maxcode[nb]){
  581. in->sreg >>= nb;
  582. in->nbits -= nb;
  583. return h->decode[h->last[nb] - c];
  584. }
  585. }
  586. in->error = FlateCorrupted;
  587. return -1;
  588. }
  589. static int
  590. sregfill(Input *in, int n)
  591. {
  592. int c;
  593. while(n > in->nbits) {
  594. c = (*in->get)(in->getr);
  595. if(c < 0){
  596. in->error = FlateInputFail;
  597. return 0;
  598. }
  599. in->sreg |= c<<in->nbits;
  600. in->nbits += 8;
  601. }
  602. return 1;
  603. }
  604. static int
  605. sregunget(Input *in)
  606. {
  607. if(in->nbits >= 8) {
  608. in->error = FlateInternal;
  609. return 0;
  610. }
  611. /* throw other bits on the floor */
  612. in->nbits = 0;
  613. in->sreg = 0;
  614. return 1;
  615. }