decompress_unlzma.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Small lzma deflate implementation.
  4. * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
  5. *
  6. * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
  7. * Copyright (C) 1999-2005 Igor Pavlov
  8. *
  9. * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
  10. */
  11. #include "libbb.h"
  12. #include "unarchive.h"
  13. #if ENABLE_FEATURE_LZMA_FAST
  14. # define speed_inline ALWAYS_INLINE
  15. #else
  16. # define speed_inline
  17. #endif
  18. typedef struct {
  19. int fd;
  20. uint8_t *ptr;
  21. /* Was keeping rc on stack in unlzma and separately allocating buffer,
  22. * but with "buffer 'attached to' allocated rc" code is smaller: */
  23. /* uint8_t *buffer; */
  24. #define RC_BUFFER ((uint8_t*)(rc+1))
  25. uint8_t *buffer_end;
  26. /* Had provisions for variable buffer, but we don't need it here */
  27. /* int buffer_size; */
  28. #define RC_BUFFER_SIZE 0x10000
  29. uint32_t code;
  30. uint32_t range;
  31. uint32_t bound;
  32. } rc_t;
  33. #define RC_TOP_BITS 24
  34. #define RC_MOVE_BITS 5
  35. #define RC_MODEL_TOTAL_BITS 11
  36. /* Called twice: once at startup and once in rc_normalize() */
  37. static void rc_read(rc_t *rc)
  38. {
  39. int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE);
  40. if (buffer_size <= 0)
  41. bb_error_msg_and_die("unexpected EOF");
  42. rc->ptr = RC_BUFFER;
  43. rc->buffer_end = RC_BUFFER + buffer_size;
  44. }
  45. /* Called once */
  46. static rc_t* rc_init(int fd) /*, int buffer_size) */
  47. {
  48. int i;
  49. rc_t *rc;
  50. rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE);
  51. rc->fd = fd;
  52. /* rc->buffer_size = buffer_size; */
  53. rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE;
  54. rc->ptr = rc->buffer_end;
  55. rc->code = 0;
  56. rc->range = 0xFFFFFFFF;
  57. for (i = 0; i < 5; i++) {
  58. if (rc->ptr >= rc->buffer_end)
  59. rc_read(rc);
  60. rc->code = (rc->code << 8) | *rc->ptr++;
  61. }
  62. return rc;
  63. }
  64. /* Called once */
  65. static ALWAYS_INLINE void rc_free(rc_t *rc)
  66. {
  67. free(rc);
  68. }
  69. /* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */
  70. static void rc_do_normalize(rc_t *rc)
  71. {
  72. if (rc->ptr >= rc->buffer_end)
  73. rc_read(rc);
  74. rc->range <<= 8;
  75. rc->code = (rc->code << 8) | *rc->ptr++;
  76. }
  77. static ALWAYS_INLINE void rc_normalize(rc_t *rc)
  78. {
  79. if (rc->range < (1 << RC_TOP_BITS)) {
  80. rc_do_normalize(rc);
  81. }
  82. }
  83. /* rc_is_bit_0 is called 9 times */
  84. /* Why rc_is_bit_0_helper exists?
  85. * Because we want to always expose (rc->code < rc->bound) to optimizer.
  86. * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined
  87. * only if we compile for speed.
  88. */
  89. static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p)
  90. {
  91. rc_normalize(rc);
  92. rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
  93. return rc->bound;
  94. }
  95. static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p)
  96. {
  97. uint32_t t = rc_is_bit_0_helper(rc, p);
  98. return rc->code < t;
  99. }
  100. /* Called ~10 times, but very small, thus inlined */
  101. static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p)
  102. {
  103. rc->range = rc->bound;
  104. *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
  105. }
  106. static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p)
  107. {
  108. rc->range -= rc->bound;
  109. rc->code -= rc->bound;
  110. *p -= *p >> RC_MOVE_BITS;
  111. }
  112. /* Called 4 times in unlzma loop */
  113. static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol)
  114. {
  115. if (rc_is_bit_0(rc, p)) {
  116. rc_update_bit_0(rc, p);
  117. *symbol *= 2;
  118. return 0;
  119. } else {
  120. rc_update_bit_1(rc, p);
  121. *symbol = *symbol * 2 + 1;
  122. return 1;
  123. }
  124. }
  125. /* Called once */
  126. static ALWAYS_INLINE int rc_direct_bit(rc_t *rc)
  127. {
  128. rc_normalize(rc);
  129. rc->range >>= 1;
  130. if (rc->code >= rc->range) {
  131. rc->code -= rc->range;
  132. return 1;
  133. }
  134. return 0;
  135. }
  136. /* Called twice */
  137. static speed_inline void
  138. rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol)
  139. {
  140. int i = num_levels;
  141. *symbol = 1;
  142. while (i--)
  143. rc_get_bit(rc, p + *symbol, symbol);
  144. *symbol -= 1 << num_levels;
  145. }
  146. typedef struct {
  147. uint8_t pos;
  148. uint32_t dict_size;
  149. uint64_t dst_size;
  150. } __attribute__ ((packed)) lzma_header_t;
  151. /* #defines will force compiler to compute/optimize each one with each usage.
  152. * Have heart and use enum instead. */
  153. enum {
  154. LZMA_BASE_SIZE = 1846,
  155. LZMA_LIT_SIZE = 768,
  156. LZMA_NUM_POS_BITS_MAX = 4,
  157. LZMA_LEN_NUM_LOW_BITS = 3,
  158. LZMA_LEN_NUM_MID_BITS = 3,
  159. LZMA_LEN_NUM_HIGH_BITS = 8,
  160. LZMA_LEN_CHOICE = 0,
  161. LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1),
  162. LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1),
  163. LZMA_LEN_MID = (LZMA_LEN_LOW \
  164. + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))),
  165. LZMA_LEN_HIGH = (LZMA_LEN_MID \
  166. + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))),
  167. LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)),
  168. LZMA_NUM_STATES = 12,
  169. LZMA_NUM_LIT_STATES = 7,
  170. LZMA_START_POS_MODEL_INDEX = 4,
  171. LZMA_END_POS_MODEL_INDEX = 14,
  172. LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)),
  173. LZMA_NUM_POS_SLOT_BITS = 6,
  174. LZMA_NUM_LEN_TO_POS_STATES = 4,
  175. LZMA_NUM_ALIGN_BITS = 4,
  176. LZMA_MATCH_MIN_LEN = 2,
  177. LZMA_IS_MATCH = 0,
  178. LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
  179. LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES),
  180. LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES),
  181. LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES),
  182. LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES),
  183. LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \
  184. + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)),
  185. LZMA_SPEC_POS = (LZMA_POS_SLOT \
  186. + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)),
  187. LZMA_ALIGN = (LZMA_SPEC_POS \
  188. + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX),
  189. LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)),
  190. LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS),
  191. LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS),
  192. };
  193. USE_DESKTOP(long long) int FAST_FUNC
  194. unpack_lzma_stream(int src_fd, int dst_fd)
  195. {
  196. USE_DESKTOP(long long total_written = 0;)
  197. lzma_header_t header;
  198. int lc, pb, lp;
  199. uint32_t pos_state_mask;
  200. uint32_t literal_pos_mask;
  201. uint32_t pos;
  202. uint16_t *p;
  203. uint16_t *prob;
  204. uint16_t *prob_lit;
  205. int num_bits;
  206. int num_probs;
  207. rc_t *rc;
  208. int i, mi;
  209. uint8_t *buffer;
  210. uint8_t previous_byte = 0;
  211. size_t buffer_pos = 0, global_pos = 0;
  212. int len = 0;
  213. int state = 0;
  214. uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
  215. xread(src_fd, &header, sizeof(header));
  216. if (header.pos >= (9 * 5 * 5))
  217. bb_error_msg_and_die("bad header");
  218. mi = header.pos / 9;
  219. lc = header.pos % 9;
  220. pb = mi / 5;
  221. lp = mi % 5;
  222. pos_state_mask = (1 << pb) - 1;
  223. literal_pos_mask = (1 << lp) - 1;
  224. header.dict_size = SWAP_LE32(header.dict_size);
  225. header.dst_size = SWAP_LE64(header.dst_size);
  226. if (header.dict_size == 0)
  227. header.dict_size = 1;
  228. buffer = xmalloc(MIN(header.dst_size, header.dict_size));
  229. num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
  230. p = xmalloc(num_probs * sizeof(*p));
  231. num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
  232. for (i = 0; i < num_probs; i++)
  233. p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
  234. rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */
  235. while (global_pos + buffer_pos < header.dst_size) {
  236. int pos_state = (buffer_pos + global_pos) & pos_state_mask;
  237. prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state;
  238. if (rc_is_bit_0(rc, prob)) {
  239. mi = 1;
  240. rc_update_bit_0(rc, prob);
  241. prob = (p + LZMA_LITERAL
  242. + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc)
  243. + (previous_byte >> (8 - lc))
  244. )
  245. )
  246. );
  247. if (state >= LZMA_NUM_LIT_STATES) {
  248. int match_byte;
  249. pos = buffer_pos - rep0;
  250. while (pos >= header.dict_size)
  251. pos += header.dict_size;
  252. match_byte = buffer[pos];
  253. do {
  254. int bit;
  255. match_byte <<= 1;
  256. bit = match_byte & 0x100;
  257. prob_lit = prob + 0x100 + bit + mi;
  258. bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */
  259. if (bit)
  260. break;
  261. } while (mi < 0x100);
  262. }
  263. while (mi < 0x100) {
  264. prob_lit = prob + mi;
  265. rc_get_bit(rc, prob_lit, &mi);
  266. }
  267. state -= 3;
  268. if (state < 4-3)
  269. state = 0;
  270. if (state >= 10-3)
  271. state -= 6-3;
  272. previous_byte = (uint8_t) mi;
  273. #if ENABLE_FEATURE_LZMA_FAST
  274. one_byte1:
  275. buffer[buffer_pos++] = previous_byte;
  276. if (buffer_pos == header.dict_size) {
  277. buffer_pos = 0;
  278. global_pos += header.dict_size;
  279. if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
  280. goto bad;
  281. USE_DESKTOP(total_written += header.dict_size;)
  282. }
  283. #else
  284. len = 1;
  285. goto one_byte2;
  286. #endif
  287. } else {
  288. int offset;
  289. uint16_t *prob_len;
  290. rc_update_bit_1(rc, prob);
  291. prob = p + LZMA_IS_REP + state;
  292. if (rc_is_bit_0(rc, prob)) {
  293. rc_update_bit_0(rc, prob);
  294. rep3 = rep2;
  295. rep2 = rep1;
  296. rep1 = rep0;
  297. state = state < LZMA_NUM_LIT_STATES ? 0 : 3;
  298. prob = p + LZMA_LEN_CODER;
  299. } else {
  300. rc_update_bit_1(rc, prob);
  301. prob = p + LZMA_IS_REP_G0 + state;
  302. if (rc_is_bit_0(rc, prob)) {
  303. rc_update_bit_0(rc, prob);
  304. prob = (p + LZMA_IS_REP_0_LONG
  305. + (state << LZMA_NUM_POS_BITS_MAX)
  306. + pos_state
  307. );
  308. if (rc_is_bit_0(rc, prob)) {
  309. rc_update_bit_0(rc, prob);
  310. state = state < LZMA_NUM_LIT_STATES ? 9 : 11;
  311. #if ENABLE_FEATURE_LZMA_FAST
  312. pos = buffer_pos - rep0;
  313. while (pos >= header.dict_size)
  314. pos += header.dict_size;
  315. previous_byte = buffer[pos];
  316. goto one_byte1;
  317. #else
  318. len = 1;
  319. goto string;
  320. #endif
  321. } else {
  322. rc_update_bit_1(rc, prob);
  323. }
  324. } else {
  325. uint32_t distance;
  326. rc_update_bit_1(rc, prob);
  327. prob = p + LZMA_IS_REP_G1 + state;
  328. if (rc_is_bit_0(rc, prob)) {
  329. rc_update_bit_0(rc, prob);
  330. distance = rep1;
  331. } else {
  332. rc_update_bit_1(rc, prob);
  333. prob = p + LZMA_IS_REP_G2 + state;
  334. if (rc_is_bit_0(rc, prob)) {
  335. rc_update_bit_0(rc, prob);
  336. distance = rep2;
  337. } else {
  338. rc_update_bit_1(rc, prob);
  339. distance = rep3;
  340. rep3 = rep2;
  341. }
  342. rep2 = rep1;
  343. }
  344. rep1 = rep0;
  345. rep0 = distance;
  346. }
  347. state = state < LZMA_NUM_LIT_STATES ? 8 : 11;
  348. prob = p + LZMA_REP_LEN_CODER;
  349. }
  350. prob_len = prob + LZMA_LEN_CHOICE;
  351. if (rc_is_bit_0(rc, prob_len)) {
  352. rc_update_bit_0(rc, prob_len);
  353. prob_len = (prob + LZMA_LEN_LOW
  354. + (pos_state << LZMA_LEN_NUM_LOW_BITS));
  355. offset = 0;
  356. num_bits = LZMA_LEN_NUM_LOW_BITS;
  357. } else {
  358. rc_update_bit_1(rc, prob_len);
  359. prob_len = prob + LZMA_LEN_CHOICE_2;
  360. if (rc_is_bit_0(rc, prob_len)) {
  361. rc_update_bit_0(rc, prob_len);
  362. prob_len = (prob + LZMA_LEN_MID
  363. + (pos_state << LZMA_LEN_NUM_MID_BITS));
  364. offset = 1 << LZMA_LEN_NUM_LOW_BITS;
  365. num_bits = LZMA_LEN_NUM_MID_BITS;
  366. } else {
  367. rc_update_bit_1(rc, prob_len);
  368. prob_len = prob + LZMA_LEN_HIGH;
  369. offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
  370. + (1 << LZMA_LEN_NUM_MID_BITS));
  371. num_bits = LZMA_LEN_NUM_HIGH_BITS;
  372. }
  373. }
  374. rc_bit_tree_decode(rc, prob_len, num_bits, &len);
  375. len += offset;
  376. if (state < 4) {
  377. int pos_slot;
  378. state += LZMA_NUM_LIT_STATES;
  379. prob = p + LZMA_POS_SLOT +
  380. ((len < LZMA_NUM_LEN_TO_POS_STATES ? len :
  381. LZMA_NUM_LEN_TO_POS_STATES - 1)
  382. << LZMA_NUM_POS_SLOT_BITS);
  383. rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS,
  384. &pos_slot);
  385. if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
  386. num_bits = (pos_slot >> 1) - 1;
  387. rep0 = 2 | (pos_slot & 1);
  388. if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
  389. rep0 <<= num_bits;
  390. prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1;
  391. } else {
  392. num_bits -= LZMA_NUM_ALIGN_BITS;
  393. while (num_bits--)
  394. rep0 = (rep0 << 1) | rc_direct_bit(rc);
  395. prob = p + LZMA_ALIGN;
  396. rep0 <<= LZMA_NUM_ALIGN_BITS;
  397. num_bits = LZMA_NUM_ALIGN_BITS;
  398. }
  399. i = 1;
  400. mi = 1;
  401. while (num_bits--) {
  402. if (rc_get_bit(rc, prob + mi, &mi))
  403. rep0 |= i;
  404. i <<= 1;
  405. }
  406. } else
  407. rep0 = pos_slot;
  408. if (++rep0 == 0)
  409. break;
  410. }
  411. len += LZMA_MATCH_MIN_LEN;
  412. SKIP_FEATURE_LZMA_FAST(string:)
  413. do {
  414. pos = buffer_pos - rep0;
  415. while (pos >= header.dict_size)
  416. pos += header.dict_size;
  417. previous_byte = buffer[pos];
  418. SKIP_FEATURE_LZMA_FAST(one_byte2:)
  419. buffer[buffer_pos++] = previous_byte;
  420. if (buffer_pos == header.dict_size) {
  421. buffer_pos = 0;
  422. global_pos += header.dict_size;
  423. if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size)
  424. goto bad;
  425. USE_DESKTOP(total_written += header.dict_size;)
  426. }
  427. len--;
  428. } while (len != 0 && buffer_pos < header.dst_size);
  429. }
  430. }
  431. {
  432. SKIP_DESKTOP(int total_written = 0; /* success */)
  433. USE_DESKTOP(total_written += buffer_pos;)
  434. if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) {
  435. bad:
  436. total_written = -1; /* failure */
  437. }
  438. rc_free(rc);
  439. free(p);
  440. free(buffer);
  441. return total_written;
  442. }
  443. }