decompress_unlzma.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Small lzma deflate implementation.
  4. * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
  5. *
  6. * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
  7. * Copyright (C) 1999-2005 Igor Pavlov
  8. *
  9. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  10. */
  11. #include "libbb.h"
  12. #include "unarchive.h"
  13. #if ENABLE_FEATURE_LZMA_FAST
  14. # define speed_inline ALWAYS_INLINE
  15. #else
  16. # define speed_inline
  17. #endif
  18. typedef struct {
  19. int fd;
  20. uint8_t *ptr;
  21. /* Was keeping rc on stack in unlzma and separately allocating buffer,
  22. * but with "buffer 'attached to' allocated rc" code is smaller: */
  23. /* uint8_t *buffer; */
  24. #define RC_BUFFER ((uint8_t*)(rc+1))
  25. uint8_t *buffer_end;
  26. /* Had provisions for variable buffer, but we don't need it here */
  27. /* int buffer_size; */
  28. #define RC_BUFFER_SIZE 0x10000
  29. uint32_t code;
  30. uint32_t range;
  31. uint32_t bound;
  32. } rc_t;
  33. #define RC_TOP_BITS 24
  34. #define RC_MOVE_BITS 5
  35. #define RC_MODEL_TOTAL_BITS 11
  36. /* Called twice: once at startup and once in rc_normalize() */
  37. static void rc_read(rc_t * rc)
  38. {
  39. int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE);
  40. if (buffer_size <= 0)
  41. bb_error_msg_and_die("unexpected EOF");
  42. rc->ptr = RC_BUFFER;
  43. rc->buffer_end = RC_BUFFER + buffer_size;
  44. }
  45. /* Called once */
  46. static rc_t* rc_init(int fd) /*, int buffer_size) */
  47. {
  48. int i;
  49. rc_t* rc;
  50. rc = xmalloc(sizeof(rc_t) + RC_BUFFER_SIZE);
  51. rc->fd = fd;
  52. /* rc->buffer_size = buffer_size; */
  53. rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE;
  54. rc->ptr = rc->buffer_end;
  55. rc->code = 0;
  56. rc->range = 0xFFFFFFFF;
  57. for (i = 0; i < 5; i++) {
  58. if (rc->ptr >= rc->buffer_end)
  59. rc_read(rc);
  60. rc->code = (rc->code << 8) | *rc->ptr++;
  61. }
  62. return rc;
  63. }
  64. /* Called once */
  65. static ALWAYS_INLINE void rc_free(rc_t * rc)
  66. {
  67. if (ENABLE_FEATURE_CLEAN_UP)
  68. free(rc);
  69. }
  70. /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */
  71. static void rc_do_normalize(rc_t * rc)
  72. {
  73. if (rc->ptr >= rc->buffer_end)
  74. rc_read(rc);
  75. rc->range <<= 8;
  76. rc->code = (rc->code << 8) | *rc->ptr++;
  77. }
  78. static ALWAYS_INLINE void rc_normalize(rc_t * rc)
  79. {
  80. if (rc->range < (1 << RC_TOP_BITS)) {
  81. rc_do_normalize(rc);
  82. }
  83. }
  84. /* rc_is_bit_0 is called 9 times */
  85. /* Why rc_is_bit_0_helper exists?
  86. * Because we want to always expose (rc->code < rc->bound) to optimizer.
  87. * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined
  88. * only if we compile for speed.
  89. */
  90. static speed_inline uint32_t rc_is_bit_0_helper(rc_t * rc, uint16_t * p)
  91. {
  92. rc_normalize(rc);
  93. rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
  94. return rc->bound;
  95. }
  96. static ALWAYS_INLINE int rc_is_bit_0(rc_t * rc, uint16_t * p)
  97. {
  98. uint32_t t = rc_is_bit_0_helper(rc, p);
  99. return rc->code < t;
  100. }
  101. /* Called ~10 times, but very small, thus inlined */
  102. static speed_inline void rc_update_bit_0(rc_t * rc, uint16_t * p)
  103. {
  104. rc->range = rc->bound;
  105. *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
  106. }
  107. static speed_inline void rc_update_bit_1(rc_t * rc, uint16_t * p)
  108. {
  109. rc->range -= rc->bound;
  110. rc->code -= rc->bound;
  111. *p -= *p >> RC_MOVE_BITS;
  112. }
  113. /* Called 4 times in unlzma loop */
  114. static int rc_get_bit(rc_t * rc, uint16_t * p, int *symbol)
  115. {
  116. if (rc_is_bit_0(rc, p)) {
  117. rc_update_bit_0(rc, p);
  118. *symbol *= 2;
  119. return 0;
  120. } else {
  121. rc_update_bit_1(rc, p);
  122. *symbol = *symbol * 2 + 1;
  123. return 1;
  124. }
  125. }
  126. /* Called once */
  127. static ALWAYS_INLINE int rc_direct_bit(rc_t * rc)
  128. {
  129. rc_normalize(rc);
  130. rc->range >>= 1;
  131. if (rc->code >= rc->range) {
  132. rc->code -= rc->range;
  133. return 1;
  134. }
  135. return 0;
  136. }
  137. /* Called twice */
  138. static speed_inline void
  139. rc_bit_tree_decode(rc_t * rc, uint16_t * p, int num_levels, int *symbol)
  140. {
  141. int i = num_levels;
  142. *symbol = 1;
  143. while (i--)
  144. rc_get_bit(rc, p + *symbol, symbol);
  145. *symbol -= 1 << num_levels;
  146. }
  147. typedef struct {
  148. uint8_t pos;
  149. uint32_t dict_size;
  150. uint64_t dst_size;
  151. } __attribute__ ((packed)) lzma_header_t;
  152. /* #defines will force compiler to compute/optimize each one with each usage.
  153. * Have heart and use enum instead. */
  154. enum {
  155. LZMA_BASE_SIZE = 1846,
  156. LZMA_LIT_SIZE = 768,
  157. LZMA_NUM_POS_BITS_MAX = 4,
  158. LZMA_LEN_NUM_LOW_BITS = 3,
  159. LZMA_LEN_NUM_MID_BITS = 3,
  160. LZMA_LEN_NUM_HIGH_BITS = 8,
  161. LZMA_LEN_CHOICE = 0,
  162. LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1),
  163. LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1),
  164. LZMA_LEN_MID = (LZMA_LEN_LOW \
  165. + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))),
  166. LZMA_LEN_HIGH = (LZMA_LEN_MID \
  167. + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))),
  168. LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)),
  169. LZMA_NUM_STATES = 12,
  170. LZMA_NUM_LIT_STATES = 7,
  171. LZMA_START_POS_MODEL_INDEX = 4,
  172. LZMA_END_POS_MODEL_INDEX = 14,
  173. LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)),
  174. LZMA_NUM_POS_SLOT_BITS = 6,
  175. LZMA_NUM_LEN_TO_POS_STATES = 4,
  176. LZMA_NUM_ALIGN_BITS = 4,
  177. LZMA_MATCH_MIN_LEN = 2,
  178. LZMA_IS_MATCH = 0,
  179. LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
  180. LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES),
  181. LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES),
  182. LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES),
  183. LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES),
  184. LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \
  185. + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
  186. LZMA_SPEC_POS = (LZMA_POS_SLOT \
  187. + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)),
  188. LZMA_ALIGN = (LZMA_SPEC_POS \
  189. + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX),
  190. LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)),
  191. LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS),
  192. LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS),
  193. };
  194. USE_DESKTOP(long long) int
  195. unpack_lzma_stream(int src_fd, int dst_fd)
  196. {
  197. USE_DESKTOP(long long total_written = 0;)
  198. lzma_header_t header;
  199. int lc, pb, lp;
  200. uint32_t pos_state_mask;
  201. uint32_t literal_pos_mask;
  202. uint32_t pos;
  203. uint16_t *p;
  204. uint16_t *prob;
  205. uint16_t *prob_lit;
  206. int num_bits;
  207. int num_probs;
  208. rc_t *rc;
  209. int i, mi;
  210. uint8_t *buffer;
  211. uint8_t previous_byte = 0;
  212. size_t buffer_pos = 0, global_pos = 0;
  213. int len = 0;
  214. int state = 0;
  215. uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
  216. xread(src_fd, &header, sizeof(header));
  217. if (header.pos >= (9 * 5 * 5))
  218. bb_error_msg_and_die("bad header");
  219. mi = header.pos / 9;
  220. lc = header.pos % 9;
  221. pb = mi / 5;
  222. lp = mi % 5;
  223. pos_state_mask = (1 << pb) - 1;
  224. literal_pos_mask = (1 << lp) - 1;
  225. header.dict_size = SWAP_LE32(header.dict_size);
  226. header.dst_size = SWAP_LE64(header.dst_size);
  227. if (header.dict_size == 0)
  228. header.dict_size = 1;
  229. buffer = xmalloc(MIN(header.dst_size, header.dict_size));
  230. num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
  231. p = xmalloc(num_probs * sizeof(*p));
  232. num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
  233. for (i = 0; i < num_probs; i++)
  234. p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
  235. rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */
  236. while (global_pos + buffer_pos < header.dst_size) {
  237. int pos_state = (buffer_pos + global_pos) & pos_state_mask;
  238. prob =
  239. p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
  240. if (rc_is_bit_0(rc, prob)) {
  241. mi = 1;
  242. rc_update_bit_0(rc, prob);
  243. prob = (p + LZMA_LITERAL + (LZMA_LIT_SIZE
  244. * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
  245. + (previous_byte >> (8 - lc)))));
  246. if (state >= LZMA_NUM_LIT_STATES) {
  247. int match_byte;
  248. pos = buffer_pos - rep0;
  249. while (pos >= header.dict_size)
  250. pos += header.dict_size;
  251. match_byte = buffer[pos];
  252. do {
  253. int bit;
  254. match_byte <<= 1;
  255. bit = match_byte & 0x100;
  256. prob_lit = prob + 0x100 + bit + mi;
  257. if (rc_get_bit(rc, prob_lit, &mi)) {
  258. if (!bit)
  259. break;
  260. } else {
  261. if (bit)
  262. break;
  263. }
  264. } while (mi < 0x100);
  265. }
  266. while (mi < 0x100) {
  267. prob_lit = prob + mi;
  268. rc_get_bit(rc, prob_lit, &mi);
  269. }
  270. previous_byte = (uint8_t) mi;
  271. buffer[buffer_pos++] = previous_byte;
  272. if (buffer_pos == header.dict_size) {
  273. buffer_pos = 0;
  274. global_pos += header.dict_size;
  275. if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
  276. goto bad;
  277. USE_DESKTOP(total_written += header.dict_size;)
  278. }
  279. if (state < 4)
  280. state = 0;
  281. else if (state < 10)
  282. state -= 3;
  283. else
  284. state -= 6;
  285. } else {
  286. int offset;
  287. uint16_t *prob_len;
  288. rc_update_bit_1(rc, prob);
  289. prob = p + LZMA_IS_REP + state;
  290. if (rc_is_bit_0(rc, prob)) {
  291. rc_update_bit_0(rc, prob);
  292. rep3 = rep2;
  293. rep2 = rep1;
  294. rep1 = rep0;
  295. state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
  296. prob = p + LZMA_LEN_CODER;
  297. } else {
  298. rc_update_bit_1(rc, prob);
  299. prob = p + LZMA_IS_REP_G0 + state;
  300. if (rc_is_bit_0(rc, prob)) {
  301. rc_update_bit_0(rc, prob);
  302. prob = (p + LZMA_IS_REP_0_LONG
  303. + (state << LZMA_NUM_POS_BITS_MAX) + pos_state);
  304. if (rc_is_bit_0(rc, prob)) {
  305. rc_update_bit_0(rc, prob);
  306. state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
  307. pos = buffer_pos - rep0;
  308. while (pos >= header.dict_size)
  309. pos += header.dict_size;
  310. previous_byte = buffer[pos];
  311. buffer[buffer_pos++] = previous_byte;
  312. if (buffer_pos == header.dict_size) {
  313. buffer_pos = 0;
  314. global_pos += header.dict_size;
  315. if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
  316. goto bad;
  317. USE_DESKTOP(total_written += header.dict_size;)
  318. }
  319. continue;
  320. } else {
  321. rc_update_bit_1(rc, prob);
  322. }
  323. } else {
  324. uint32_t distance;
  325. rc_update_bit_1(rc, prob);
  326. prob = p + LZMA_IS_REP_G1 + state;
  327. if (rc_is_bit_0(rc, prob)) {
  328. rc_update_bit_0(rc, prob);
  329. distance = rep1;
  330. } else {
  331. rc_update_bit_1(rc, prob);
  332. prob = p + LZMA_IS_REP_G2 + state;
  333. if (rc_is_bit_0(rc, prob)) {
  334. rc_update_bit_0(rc, prob);
  335. distance = rep2;
  336. } else {
  337. rc_update_bit_1(rc, prob);
  338. distance = rep3;
  339. rep3 = rep2;
  340. }
  341. rep2 = rep1;
  342. }
  343. rep1 = rep0;
  344. rep0 = distance;
  345. }
  346. state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
  347. prob = p + LZMA_REP_LEN_CODER;
  348. }
  349. prob_len = prob + LZMA_LEN_CHOICE;
  350. if (rc_is_bit_0(rc, prob_len)) {
  351. rc_update_bit_0(rc, prob_len);
  352. prob_len = (prob + LZMA_LEN_LOW
  353. + (pos_state << LZMA_LEN_NUM_LOW_BITS));
  354. offset = 0;
  355. num_bits = LZMA_LEN_NUM_LOW_BITS;
  356. } else {
  357. rc_update_bit_1(rc, prob_len);
  358. prob_len = prob + LZMA_LEN_CHOICE_2;
  359. if (rc_is_bit_0(rc, prob_len)) {
  360. rc_update_bit_0(rc, prob_len);
  361. prob_len = (prob + LZMA_LEN_MID
  362. + (pos_state << LZMA_LEN_NUM_MID_BITS));
  363. offset = 1 << LZMA_LEN_NUM_LOW_BITS;
  364. num_bits = LZMA_LEN_NUM_MID_BITS;
  365. } else {
  366. rc_update_bit_1(rc, prob_len);
  367. prob_len = prob + LZMA_LEN_HIGH;
  368. offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
  369. + (1 << LZMA_LEN_NUM_MID_BITS));
  370. num_bits = LZMA_LEN_NUM_HIGH_BITS;
  371. }
  372. }
  373. rc_bit_tree_decode(rc, prob_len, num_bits, &len);
  374. len += offset;
  375. if (state < 4) {
  376. int pos_slot;
  377. state += LZMA_NUM_LIT_STATES;
  378. prob =
  379. p + LZMA_POS_SLOT +
  380. ((len <
  381. LZMA_NUM_LEN_TO_POS_STATES ? len :
  382. LZMA_NUM_LEN_TO_POS_STATES - 1)
  383. << LZMA_NUM_POS_SLOT_BITS);
  384. rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS,
  385. &pos_slot);
  386. if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
  387. num_bits = (pos_slot >> 1) - 1;
  388. rep0 = 2 | (pos_slot & 1);
  389. if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
  390. rep0 <<= num_bits;
  391. prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
  392. } else {
  393. num_bits -= LZMA_NUM_ALIGN_BITS;
  394. while (num_bits--)
  395. rep0 = (rep0 << 1) | rc_direct_bit(rc);
  396. prob = p + LZMA_ALIGN;
  397. rep0 <<= LZMA_NUM_ALIGN_BITS;
  398. num_bits = LZMA_NUM_ALIGN_BITS;
  399. }
  400. i = 1;
  401. mi = 1;
  402. while (num_bits--) {
  403. if (rc_get_bit(rc, prob + mi, &mi))
  404. rep0 |= i;
  405. i <<= 1;
  406. }
  407. } else
  408. rep0 = pos_slot;
  409. if (++rep0 == 0)
  410. break;
  411. }
  412. len += LZMA_MATCH_MIN_LEN;
  413. do {
  414. pos = buffer_pos - rep0;
  415. while (pos >= header.dict_size)
  416. pos += header.dict_size;
  417. previous_byte = buffer[pos];
  418. buffer[buffer_pos++] = previous_byte;
  419. if (buffer_pos == header.dict_size) {
  420. buffer_pos = 0;
  421. global_pos += header.dict_size;
  422. if (full_write(dst_fd, buffer, header.dict_size) != header.dict_size)
  423. goto bad;
  424. USE_DESKTOP(total_written += header.dict_size;)
  425. }
  426. len--;
  427. } while (len != 0 && buffer_pos < header.dst_size);
  428. }
  429. }
  430. if (full_write(dst_fd, buffer, buffer_pos) != buffer_pos) {
  431. bad:
  432. rc_free(rc);
  433. return -1;
  434. }
  435. rc_free(rc);
  436. USE_DESKTOP(total_written += buffer_pos;)
  437. return USE_DESKTOP(total_written) + 0;
  438. }