shcgen.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /* Copyright (C) 1994, 1996, 1998 Aladdin Enterprises. All rights reserved.
  2. This file is part of AFPL Ghostscript.
  3. AFPL Ghostscript is distributed with NO WARRANTY OF ANY KIND. No author or
  4. distributor accepts any responsibility for the consequences of using it, or
  5. for whether it serves any particular purpose or works at all, unless he or
  6. she says so in writing. Refer to the Aladdin Free Public License (the
  7. "License") for full details.
  8. Every copy of AFPL Ghostscript must include a copy of the License, normally
  9. in a plain ASCII text file named PUBLIC. The License grants you the right
  10. to copy, modify and redistribute AFPL Ghostscript, but only under certain
  11. conditions described in the License. Among other things, the License
  12. requires that the copyright notice and this notice be preserved on all
  13. copies.
  14. */
  15. /*$Id: shcgen.c,v 1.2 2000/09/19 19:00:50 lpd Exp $ */
  16. /* Generate (bounded) Huffman code definitions from frequencies, */
  17. /* and tables from definitions. */
  18. #include "memory_.h"
  19. #include "stdio_.h"
  20. #include <stdlib.h> /* for qsort */
  21. #include "gdebug.h"
  22. #include "gserror.h"
  23. #include "gserrors.h"
  24. #include "gsmemory.h"
  25. #include "scommon.h"
  26. #include "shc.h"
  27. #include "shcgen.h"
  28. /* ------ Frequency -> definition procedure ------ */
  29. /* Define a node for the Huffman code tree. */
  30. typedef struct count_node_s count_node;
  31. struct count_node_s {
  32. long freq; /* frequency of value */
  33. uint value; /* data value being encoded */
  34. uint code_length; /* length of Huffman code */
  35. count_node *next; /* next node in freq-sorted list */
  36. count_node *left; /* left child in tree (smaller code_length) */
  37. count_node *right; /* right child in tree (greater code_length) */
  38. };
  39. #ifdef DEBUG
  40. # define debug_print_nodes(nodes, n, tag, lengths)\
  41. if ( gs_debug_c('W') ) print_nodes_proc(nodes, n, tag, lengths);
  42. private void
  43. print_nodes_proc(const count_node * nodes, int n, const char *tag, int lengths)
  44. {
  45. int i;
  46. dlprintf1("[w]---------------- %s ----------------\n", tag);
  47. for (i = 0; i < n; ++i)
  48. dlprintf7("[w]node %d: f=%ld v=%d len=%d N=%d L=%d R=%d\n",
  49. i, nodes[i].freq, nodes[i].value, nodes[i].code_length,
  50. (nodes[i].next == 0 ? -1 : (int)(nodes[i].next - nodes)),
  51. (nodes[i].left == 0 ? -1 : (int)(nodes[i].left - nodes)),
  52. (nodes[i].right == 0 ? -1 : (int)(nodes[i].right - nodes)));
  53. for (i = lengths; i > 0;) {
  54. int j = i;
  55. int len = nodes[--j].code_length;
  56. while (j > 0 && nodes[j - 1].code_length == len)
  57. --j;
  58. dlprintf2("[w]%d codes of length %d\n", i - j, len);
  59. i = j;
  60. }
  61. }
  62. #else
  63. # define debug_print_nodes(nodes, n, tag, lengths) DO_NOTHING
  64. #endif
  65. /* Node comparison procedures for sorting. */
  66. #define pn1 ((const count_node *)p1)
  67. #define pn2 ((const count_node *)p2)
  68. /* Sort by decreasing frequency. */
  69. private int
  70. compare_freqs(const void *p1, const void *p2)
  71. {
  72. long diff = pn2->freq - pn1->freq;
  73. return (diff < 0 ? -1 : diff > 0 ? 1 : 0);
  74. }
  75. /* Sort by increasing code length, and secondarily by decreasing frequency. */
  76. private int
  77. compare_code_lengths(const void *p1, const void *p2)
  78. {
  79. int diff = pn1->code_length - pn2->code_length;
  80. return (diff < 0 ? -1 : diff > 0 ? 1 : compare_freqs(p1, p2));
  81. }
  82. /* Sort by increasing code value. */
  83. private int
  84. compare_values(const void *p1, const void *p2)
  85. {
  86. return (pn1->value < pn2->value ? -1 :
  87. pn1->value > pn2->value ? 1 : 0);
  88. }
  89. #undef pn1
  90. #undef pn2
  91. /* Adjust code lengths so that none of them exceeds max_length. */
  92. /* We break this out just to help organize the code; it's only called */
  93. /* from one place in hc_compute. */
  94. private void
  95. hc_limit_code_lengths(count_node * nodes, uint num_values, int max_length)
  96. {
  97. int needed; /* # of max_length codes we need to free up */
  98. count_node *longest = nodes + num_values;
  99. { /* Compute the number of additional max_length codes */
  100. /* we need to make available. */
  101. int length = longest[-1].code_length;
  102. int next_length;
  103. int avail = 0;
  104. while ((next_length = longest[-1].code_length) > max_length) {
  105. avail >>= length - next_length;
  106. length = next_length;
  107. (--longest)->code_length = max_length;
  108. ++avail;
  109. }
  110. needed = (nodes + num_values - longest) -
  111. (avail >>= (length - max_length));
  112. if_debug2('W', "[w]avail=%d, needed=%d\n",
  113. avail, needed);
  114. }
  115. /* Skip over all max_length codes. */
  116. while (longest[-1].code_length == max_length)
  117. --longest;
  118. /*
  119. * To make available a code of length N, suppose that the next
  120. * shortest used code is of length M.
  121. * We take the lowest-frequency code of length M and change it
  122. * to M+1; we then have to compensate by reducing the length of
  123. * some of the highest-frequency codes of length N, as follows:
  124. * M new lengths for codes of length N
  125. * --- -----------
  126. * N-1 (none)
  127. * N-2 N-1
  128. * <N-2 M+2, M+2, N-1
  129. * In the present situation, N = max_length.
  130. */
  131. for (; needed > 0; --needed) { /* longest points to the first code of length max_length. */
  132. /* Since codes are sorted by increasing code length, */
  133. /* longest-1 is the desired code of length M. */
  134. int M1 = ++(longest[-1].code_length);
  135. switch (max_length - M1) {
  136. case 0: /* M == N-1 */
  137. --longest;
  138. break;
  139. case 1: /* M == N-2 */
  140. longest++->code_length = M1;
  141. break;
  142. default:
  143. longest->code_length = M1 + 1;
  144. longest[1].code_length = M1 + 1;
  145. longest[2].code_length--;
  146. longest += 3;
  147. }
  148. }
  149. }
  150. /* Compute an optimal Huffman code from an input data set. */
  151. /* The client must have set all the elements of *def. */
  152. int
  153. hc_compute(hc_definition * def, const long *freqs, gs_memory_t * mem)
  154. {
  155. uint num_values = def->num_values;
  156. count_node *nodes =
  157. (count_node *) gs_alloc_byte_array(mem, num_values * 2 - 1,
  158. sizeof(count_node), "hc_compute");
  159. int i;
  160. count_node *lowest;
  161. count_node *comb;
  162. if (nodes == 0)
  163. return_error(gs_error_VMerror);
  164. /* Create leaf nodes for the input data. */
  165. for (i = 0; i < num_values; ++i)
  166. nodes[i].freq = freqs[i], nodes[i].value = i;
  167. /* Create a list sorted by increasing frequency. */
  168. /* Also initialize the tree structure. */
  169. qsort(nodes, num_values, sizeof(count_node), compare_freqs);
  170. for (i = 0; i < num_values; ++i)
  171. nodes[i].next = &nodes[i - 1],
  172. nodes[i].code_length = 0,
  173. nodes[i].left = nodes[i].right = 0;
  174. nodes[0].next = 0;
  175. debug_print_nodes(nodes, num_values, "after sort", 0);
  176. /* Construct the Huffman code tree. */
  177. for (lowest = &nodes[num_values - 1], comb = &nodes[num_values];;
  178. ++comb
  179. ) {
  180. count_node *pn1 = lowest;
  181. count_node *pn2 = pn1->next;
  182. long freq = pn1->freq + pn2->freq;
  183. /* Create a parent for the two lowest-frequency nodes. */
  184. lowest = pn2->next;
  185. comb->freq = freq;
  186. if (pn1->code_length <= pn2->code_length)
  187. comb->left = pn1, comb->right = pn2,
  188. comb->code_length = pn2->code_length + 1;
  189. else
  190. comb->left = pn2, comb->right = pn1,
  191. comb->code_length = pn1->code_length + 1;
  192. if (lowest == 0) /* no nodes left to combine */
  193. break;
  194. /* Insert comb in the sorted list. */
  195. if (freq < lowest->freq)
  196. comb->next = lowest, lowest = comb;
  197. else {
  198. count_node *here = lowest;
  199. while (here->next != 0 && freq >= here->next->freq)
  200. here = here->next;
  201. comb->next = here->next;
  202. here->next = comb;
  203. }
  204. }
  205. /* comb (i.e., &nodes[num_values * 2 - 2] is the root of the tree. */
  206. /* Note that the left and right children of an interior node */
  207. /* were constructed before, and therefore have lower indices */
  208. /* in the nodes array than, the parent node. Thus we can assign */
  209. /* the code lengths (node depths) in a single descending-order */
  210. /* sweep. */
  211. comb++->code_length = 0;
  212. while (comb > nodes + num_values) {
  213. --comb;
  214. comb->left->code_length = comb->right->code_length =
  215. comb->code_length + 1;
  216. }
  217. debug_print_nodes(nodes, num_values * 2 - 1, "after combine", 0);
  218. /* Sort the leaves again by code length. */
  219. qsort(nodes, num_values, sizeof(count_node), compare_code_lengths);
  220. debug_print_nodes(nodes, num_values, "after re-sort", num_values);
  221. /* Limit the code length to def->num_counts. */
  222. hc_limit_code_lengths(nodes, num_values, def->num_counts);
  223. debug_print_nodes(nodes, num_values, "after limit", num_values);
  224. /* Sort within each code length by increasing code value. */
  225. /* This doesn't affect data compression, but it makes */
  226. /* the code definition itself compress better using our */
  227. /* incremental encoding. */
  228. for (i = num_values; i > 0;) {
  229. int j = i;
  230. int len = nodes[--j].code_length;
  231. while (j > 0 && nodes[j - 1].code_length == len)
  232. --j;
  233. qsort(&nodes[j], i - j, sizeof(count_node), compare_values);
  234. i = j;
  235. }
  236. /* Extract the definition from the nodes. */
  237. memset(def->counts, 0, sizeof(*def->counts) * (def->num_counts + 1));
  238. for (i = 0; i < num_values; ++i) {
  239. def->values[i] = nodes[i].value;
  240. def->counts[nodes[i].code_length]++;
  241. }
  242. /* All done, release working storage. */
  243. gs_free_object(mem, nodes, "hc_compute");
  244. return 0;
  245. }
  246. /* ------ Byte string <-> definition procedures ------ */
  247. /*
  248. * We define a compressed representation for (well-behaved) definitions
  249. * as a byte string. A "well-behaved" definition is one where if
  250. * code values A and B have the same code length and A < B,
  251. * A precedes B in the values table of the definition, and hence
  252. * A's encoding lexicographically precedes B's.
  253. *
  254. * The successive bytes in the compressed string give the code lengths for
  255. * runs of decoded values, in the form nnnnllll where nnnn is the number of
  256. * consecutive values -1 and llll is the code length -1.
  257. */
  258. /* Convert a definition to a byte string. */
  259. /* The caller must provide the byte string, of length def->num_values. */
  260. /* Assume (do not check) that the definition is well-behaved. */
  261. /* Return the actual length of the string. */
  262. int
  263. hc_bytes_from_definition(byte * dbytes, const hc_definition * def)
  264. {
  265. int i, j;
  266. byte *bp = dbytes;
  267. const byte *lp = dbytes;
  268. const byte *end = dbytes + def->num_values;
  269. const ushort *values = def->values;
  270. /* Temporarily use the output string as a map from */
  271. /* values to code lengths. */
  272. for (i = 1; i <= def->num_counts; i++)
  273. for (j = 0; j < def->counts[i]; j++)
  274. bp[*values++] = i;
  275. /* Now construct the actual string. */
  276. while (lp < end) {
  277. const byte *vp;
  278. byte len = *lp;
  279. for (vp = lp + 1; vp < end && vp < lp + 16 && *vp == len;)
  280. vp++;
  281. *bp++ = ((vp - lp - 1) << 4) + (len - 1);
  282. lp = vp;
  283. }
  284. return bp - dbytes;
  285. }
  286. /* Extract num_counts and num_values from a byte string. */
  287. void
  288. hc_sizes_from_bytes(hc_definition * def, const byte * dbytes, int num_bytes)
  289. {
  290. uint num_counts = 0, num_values = 0;
  291. int i;
  292. for (i = 0; i < num_bytes; i++) {
  293. int n = (dbytes[i] >> 4) + 1;
  294. int l = (dbytes[i] & 15) + 1;
  295. if (l > num_counts)
  296. num_counts = l;
  297. num_values += n;
  298. }
  299. def->num_counts = num_counts;
  300. def->num_values = num_values;
  301. }
  302. /* Convert a byte string back to a definition. */
  303. /* The caller must initialize *def, including allocating counts and values. */
  304. void
  305. hc_definition_from_bytes(hc_definition * def, const byte * dbytes)
  306. {
  307. int v, i;
  308. ushort counts[max_hc_length + 1];
  309. /* Make a first pass to set the counts for each code length. */
  310. memset(counts, 0, sizeof(counts[0]) * (def->num_counts + 1));
  311. for (i = 0, v = 0; v < def->num_values; i++) {
  312. int n = (dbytes[i] >> 4) + 1;
  313. int l = (dbytes[i] & 15) + 1;
  314. counts[l] += n;
  315. v += n;
  316. }
  317. /* Now fill in the definition. */
  318. memcpy(def->counts, counts, sizeof(counts[0]) * (def->num_counts + 1));
  319. for (i = 1, v = 0; i <= def->num_counts; i++) {
  320. uint prev = counts[i];
  321. counts[i] = v;
  322. v += prev;
  323. }
  324. for (i = 0, v = 0; v < def->num_values; i++) {
  325. int n = (dbytes[i] >> 4) + 1;
  326. int l = (dbytes[i] & 15) + 1;
  327. int j;
  328. for (j = 0; j < n; n++)
  329. def->values[counts[l]++] = v++;
  330. }
  331. }
  332. /* ------ Definition -> table procedures ------ */
  333. /* Generate the encoding table from the definition. */
  334. /* The size of the encode array is def->num_values. */
  335. void
  336. hc_make_encoding(hce_code * encode, const hc_definition * def)
  337. {
  338. uint next = 0;
  339. const ushort *pvalue = def->values;
  340. uint i, k;
  341. for (i = 1; i <= def->num_counts; i++) {
  342. for (k = 0; k < def->counts[i]; k++, pvalue++, next++) {
  343. hce_code *pce = encode + *pvalue;
  344. pce->code = next;
  345. pce->code_length = i;
  346. }
  347. next <<= 1;
  348. }
  349. }
  350. /* We decode in two steps, first indexing into a table with */
  351. /* a fixed number of bits from the source, and then indexing into */
  352. /* an auxiliary table if necessary. (See shc.h for details.) */
  353. /* Calculate the size of the decoding table. */
  354. uint
  355. hc_sizeof_decoding(const hc_definition * def, int initial_bits)
  356. {
  357. uint size = 1 << initial_bits;
  358. uint carry = 0, mask = (uint) ~ 1;
  359. uint i;
  360. for (i = initial_bits + 1; i <= def->num_counts;
  361. i++, carry <<= 1, mask <<= 1
  362. ) {
  363. carry += def->counts[i];
  364. size += carry & mask;
  365. carry &= ~mask;
  366. }
  367. return size;
  368. }
  369. /* Generate the decoding tables. */
  370. void
  371. hc_make_decoding(hcd_code * decode, const hc_definition * def,
  372. int initial_bits)
  373. { /* Make entries for single-dispatch codes. */
  374. {
  375. hcd_code *pcd = decode;
  376. const ushort *pvalue = def->values;
  377. uint i, k, d;
  378. for (i = 0; i <= initial_bits; i++) {
  379. for (k = 0; k < def->counts[i]; k++, pvalue++) {
  380. for (d = 1 << (initial_bits - i); d > 0;
  381. d--, pcd++
  382. )
  383. pcd->value = *pvalue,
  384. pcd->code_length = i;
  385. }
  386. }
  387. }
  388. /* Make entries for two-dispatch codes. */
  389. /* By working backward, we can do this more easily */
  390. /* in a single pass. */
  391. {
  392. uint dsize = hc_sizeof_decoding(def, initial_bits);
  393. hcd_code *pcd = decode + (1 << initial_bits);
  394. hcd_code *pcd2 = decode + dsize;
  395. const ushort *pvalue = def->values + def->num_values;
  396. uint entries_left = 0, slots_left = 0, mult_shift = 0;
  397. uint i = def->num_counts + 1, j;
  398. for (;;) {
  399. if (slots_left == 0) {
  400. if (entries_left != 0) {
  401. slots_left = 1 << (i - initial_bits);
  402. mult_shift = 0;
  403. continue;
  404. }
  405. if (--i <= initial_bits)
  406. break;
  407. entries_left = def->counts[i];
  408. continue;
  409. }
  410. if (entries_left == 0) {
  411. entries_left = def->counts[--i];
  412. mult_shift++;
  413. continue;
  414. }
  415. --entries_left, --pvalue;
  416. for (j = 1 << mult_shift; j > 0; j--) {
  417. --pcd2;
  418. pcd2->value = *pvalue;
  419. pcd2->code_length = i - initial_bits;
  420. }
  421. if ((slots_left -= 1 << mult_shift) == 0) {
  422. --pcd;
  423. pcd->value = pcd2 - decode;
  424. pcd->code_length = i + mult_shift;
  425. }
  426. }
  427. }
  428. }