huffcode.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * COMPONENT_NAME: austext
  25. *
  26. * FUNCTIONS: build_tree
  27. * char_label
  28. * huffman_code
  29. * init_treebase
  30. * main
  31. * next_sorted_node
  32. * print_usage
  33. * strrev
  34. * user_args_processor
  35. *
  36. * ORIGINS: 27
  37. *
  38. *
  39. * (C) COPYRIGHT International Business Machines Corp. 1990,1996
  40. * All Rights Reserved
  41. * Licensed Materials - Property of IBM
  42. * US Government Users Restricted Rights - Use, duplication or
  43. * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
  44. */
  45. /************************** HUFFCODE.C ******************************
  46. * $XConsortium: huffcode.c /main/9 1996/11/14 15:31:05 rcs $
  47. * 12/90.
  48. * Counts frequency of occurrence of every possible byte value of input text.
  49. * Creates Huffman Code Table based on byte frequencies and writes it
  50. * in 2 formats to 2 different output files.
  51. * The encode table (.huf) maintains the frequency counts and explicitly
  52. * includes the huffman code strings. Generally speaking, the .huf file
  53. * is intended for humans to read. The decode table (.c) is an array
  54. * of integers meant to be compiled into an object module, then linked
  55. * into the decode program. The .c format closely resembles the original
  56. * huffman code tree in this program.
  57. * By keeping the tree as an obscure array of integers,
  58. * the huffman code can double as an encryption technique,
  59. * and the decoding method kept somewhat proprietary.
  60. *
  61. * For a good discussion of Huffman codes and related algorithms,
  62. * see "Data Compression Techniques and Applications Hardware and
  63. * Software Considerations" by Gilbert Held and Thomas R. Marshall.
  64. * The tree itself is balanced to minimize longest bitstring length
  65. * per Eugene Schwartz, Information and Control 7, 37-44 (1964).
  66. *
  67. * At beginning of each new execution, the program tries to
  68. * open the .huf table file and continue byte frequency counting
  69. * from the last run.
  70. * If the .huf file doesn't exist, the table's counts are
  71. * initialized to zeroes. The .c decode table is recomputed fresh
  72. * each run, whether it existed before or not.
  73. *
  74. * If the input file is not specified then the frequencies in the table
  75. * are not changed, and the huffman codes are recomputed with the
  76. * existing frequencies.
  77. *
  78. * THIS PROGRAM DOES NOT CHECK .HUF FILE FORMAT!--it had better be correct.
  79. *
  80. * HUFFMAN ENCODE TABLE (.huf) FILE FORMAT:
  81. * Each line represents each possible byte value (0 - 255),
  82. * the huffman 'literal' character (#256), or comments.
  83. * There are exactly 257 lines sorted by decreasing count.
  84. * There are four fields, each separated by one or more tabs (\t).
  85. *
  86. * 1. CHARACTER. a number from 0 to 256.
  87. *
  88. * The 'character' represented by the number 256 is the literal.
  89. * it represents all characters whose frequency is so low that
  90. * there is no huffman code translation--this reduces the max
  91. * length of the coded bit string when there are lots of zero
  92. * or low frequency bytes in the input. For example,
  93. * pure ascii text files only occasionally have byte values
  94. * less than 32 (control chars) and rarely greater than 127
  95. * (high order bit turned on).
  96. *
  97. * 2. HUFFMAN CODE. a string of binary digits (0's and 1's).
  98. * Each string is unique to that character.
  99. * This field will consist of a single blank, when the character
  100. * will be coded by the huffman literal. If the code of
  101. * the literal itself is blank, then literal coding is
  102. * not used in this table--all characters are represented
  103. * by complete huffman code strings.
  104. *
  105. * 3. COUNT. The number of times this character appeared in the text.
  106. * The literal's count equals the sum of the counts of all the
  107. * real characters which are represented by the literal.
  108. * The literal's count may be 0 if all the characters it
  109. * represents have zero frequencies.
  110. *
  111. * 4. COMMENTS. A label depicting the printable char or its description.
  112. *
  113. * HUFFMAN DECODE TABLE (.c) FILE FORMAT:
  114. * A sequence of integers formatted as a C array of integer pairs
  115. * and intended to be compiled and linked into the decode program.
  116. * Each huffman tree node contains two integers.
  117. * The root of the tree is the LAST integer pair.
  118. * The first (left) integer contains the array index down the '0' branch,
  119. * the right integer points down the '1' branch.
  120. * However if an integer is negative, the decoding ceases and the
  121. * resulting plaintext character is the negative integer + 257,
  122. * and will always be in the range 0 - 255, or 256 for the literal code.
  123. *
  124. * $Log$
  125. * Revision 2.3 1996/03/25 18:55:04 miker
  126. * Changed FILENAME_MAX to _POSIX_PATH_MAX.
  127. *
  128. * Revision 2.2 1995/10/25 17:50:34 miker
  129. * Added prolog.
  130. *
  131. * Revision 2.1 1995/09/22 20:46:28 miker
  132. * Freeze DtSearch 0.1, AusText 2.1.8
  133. *
  134. * Revision 1.4 1995/09/19 22:04:11 miker
  135. * Print out nonascii chars in .huf file comments.
  136. *
  137. * Revision 1.3 1995/09/05 18:08:00 miker
  138. * Name changes for DtSearch.
  139. */
  140. #include "SearchP.h"
  141. #include <limits.h>
  142. #include <ctype.h>
  143. #include <errno.h>
  144. #include <locale.h>
  145. #include <sys/stat.h>
  146. #include <stdlib.h>
  147. #define MS_huff 30 /* message catalog set number */
  148. #define DELIMITERS "\t\n" /* betw fields in .huf file */
  149. #define LAST_BIT '-'
  150. #define MAX_BITLEN 24
  151. #define MAX_NODES 514
  152. /*
  153. * 256 chars + 'literal' char = max leaves, therefore max treesize
  154. * = 2n - 1 = 513
  155. */
  156. /*----------------------- HCTREE --------------------------*/
  157. /* tree is also a table so tree ptrs are table indexes:
  158. * 0 - 255 = characters themselves (leaves at base of tree).
  159. * 256 = literal code (special char/leaf).
  160. * > 256 = higher nodes.
  161. * Global 'last_node' = highest actual node alloc so far.
  162. * When tree completed, last_node = root of tree.
  163. * -1 = null links.
  164. */
  165. typedef struct {
  166. char bit; /* '0' or '1' (assoc with link to
  167. * father) */
  168. long count; /* freq of occurrence of char */
  169. int sort; /* specifies output sort order */
  170. int father; /* index points UP toward root of
  171. * tree */
  172. int son0; /* index of '0' (left) subnode */
  173. int son1; /* index of '1' (right) subnode */
  174. } HCTREE;
  175. /*------------------------ GLOBALS ---------------------------*/
  176. static int last_node = 256;
  177. long total_count;
  178. long literal_threshold = 0L;
  179. int debug_switch = FALSE;
  180. int literal_coding_on = TRUE;
  181. int input_file_specified; /* TRUE if user enters
  182. * filename */
  183. int no_huffcode_file; /* TRUE if table file not
  184. * found */
  185. HCTREE hctree1[MAX_NODES];
  186. char filename_input[_POSIX_PATH_MAX];
  187. char filename_huf[_POSIX_PATH_MAX];
  188. char filename_huc[_POSIX_PATH_MAX];
  189. #ifndef TURBO_COMPILER
  190. /****************************************/
  191. /* */
  192. /* strrev */
  193. /* */
  194. /****************************************/
  195. static char *strrev (char *string)
  196. {
  197. int i;
  198. int j;
  199. char temp;
  200. for (i = 0, j = strlen (string) - 1; i < j; i++, j--) {
  201. temp = string[i];
  202. string[i] = string[j];
  203. string[j] = temp;
  204. }
  205. return string;
  206. }
  207. #endif /* !TURBO_COMPILER */
  208. /****************************************/
  209. /* */
  210. /* Build Tree */
  211. /* */
  212. /****************************************/
  213. /* Each call joins the two nodes with smallest count
  214. * into a single higher level node. If there are more than 2 nodes with
  215. * similar 'smallest' counts, then within that group the 2 nodes with the
  216. * shortest current bitstring length are joined.
  217. * Returns TRUE for each successful lower level join.
  218. * Returns FALSE when final join is made at highest level (root).
  219. */
  220. static int build_tree (void)
  221. {
  222. int i, j;
  223. int low0 = -1;
  224. int low1 = -1;
  225. int len0 = 0;
  226. int len1 = 0;
  227. int curr;
  228. /* find 2 lowest counts */
  229. for (i = 0; i < 257; i++) {
  230. /* skip over real chars with counts <= 'literal' threshold */
  231. if (literal_coding_on
  232. && i != 256
  233. && hctree1[i].count <= literal_threshold) {
  234. hctree1[i].sort = MAX_BITLEN + 1;
  235. continue;
  236. }
  237. /* skip over literal if literal coding turned off */
  238. if (i == 256 && !literal_coding_on) {
  239. hctree1[256].sort = MAX_BITLEN + 1;
  240. continue;
  241. }
  242. /*
  243. * Ascend to highest tree level for current table entry,
  244. * putting length of bitstring into sort field. Save
  245. * highest tree level in curr.
  246. */
  247. hctree1[i].sort = 0;
  248. for (j = i; j != -1; j = hctree1[j].father) {
  249. hctree1[i].sort++;
  250. curr = j;
  251. }
  252. /*
  253. * sanity checks after ascending tree: 1. if bit strings
  254. * have grown too large, quit. 2. if curr points to top
  255. * tree level, quit.
  256. */
  257. if (hctree1[i].sort > MAX_BITLEN) {
  258. fprintf (stderr, "%s", CATGETS(dtsearch_catd, MS_huff, 30,
  259. "\n183 Bit strings have grown too large. You probably "
  260. "have literals\n turned off with grossly unbalanced "
  261. "character counts.\n\7"));
  262. exit (2);
  263. }
  264. if (hctree1[curr].count >= total_count) {
  265. fprintf (stderr, "%s", CATGETS(dtsearch_catd, MS_huff, 31,
  266. "\n191 Programming Error: Still trying to build\n"
  267. " Huffman Code Tree after root created.\n\7"));
  268. exit (2);
  269. }
  270. /*
  271. * if curr ptr already joins low0 or low1, try the next
  272. * table entry
  273. */
  274. if (curr == low0 || curr == low1)
  275. continue;
  276. /*
  277. * If curr count is less than low0, or if curr count = low0
  278. * but curr bitstring length is less, replace both low0 and
  279. * low1. (that way, we keep low0 always <= low1)
  280. */
  281. if (low0 == -1 || hctree1[curr].count < hctree1[low0].count ||
  282. (hctree1[curr].count == hctree1[low0].count && hctree1[i].sort < len0)) {
  283. low1 = low0;
  284. len1 = len0;
  285. low0 = curr;
  286. len0 = hctree1[i].sort;
  287. continue;
  288. }
  289. /*
  290. * At this point curr count is 'greater' than low0. If curr
  291. * count is less than low1, or if curr count = low1 but
  292. * curr bitstring length is less, replace only low1
  293. */
  294. if (low1 == -1 || hctree1[curr].count < hctree1[low1].count ||
  295. (hctree1[curr].count == hctree1[low1].count && hctree1[i].sort < len1)) {
  296. low1 = curr;
  297. len1 = hctree1[i].sort;
  298. continue;
  299. }
  300. /*
  301. * default: curr count is greater than BOTH low0 and low1,
  302. * try next table entry
  303. */
  304. } /* end loop to find two lowest counts */
  305. /* low0 and low1 now point to two lowest count nodes.
  306. * link in low0 and low1 to next available new node.
  307. */
  308. last_node++;
  309. hctree1[low0].bit = '0';
  310. hctree1[low0].father = last_node;
  311. hctree1[low1].bit = '1';
  312. hctree1[low1].father = last_node;
  313. hctree1[last_node].bit = LAST_BIT;
  314. hctree1[last_node].father = -1;
  315. hctree1[last_node].count = hctree1[low0].count + hctree1[low1].count;
  316. hctree1[last_node].son0 = low0;
  317. hctree1[last_node].son1 = low1;
  318. if (debug_switch)
  319. printf ("%3d: low0=%6ld\tlow1=%6ld\tsum=%6ld\t(%ld)\n",
  320. last_node, hctree1[low0].count, hctree1[low1].count,
  321. hctree1[last_node].count, total_count);
  322. if (hctree1[last_node].count < total_count)
  323. return TRUE;
  324. else
  325. return FALSE;
  326. } /* end of function build_tree */
  327. /****************************************/
  328. /* */
  329. /* Char Label */
  330. /* */
  331. /****************************************/
  332. static char *char_label (int x)
  333. {
  334. static char buf[64];
  335. switch (x) {
  336. case 0:
  337. return "NULL";
  338. case 8:
  339. return "\\b (backspace)";
  340. case 9:
  341. return "\\t (tab)";
  342. case 10:
  343. return "\\n (linefeed)";
  344. case 11:
  345. return "\\v (vert tab)";
  346. case 12:
  347. return "\\f (form feed)";
  348. case 13:
  349. return "\\r (carr retn)";
  350. case 26:
  351. return "CTRL-Z (EOF)";
  352. case 27:
  353. return "CTRL-[ (ESC)";
  354. case 31:
  355. return "CTRL-dash";
  356. case 32:
  357. return "SPACE (blank)";
  358. case 45:
  359. return "- (dash)";
  360. case 95:
  361. return "_ (underscore)";
  362. case 127:
  363. return "DEL";
  364. case 256:
  365. return "*** LITERAL CODE ***";
  366. default:
  367. if (x > 256)
  368. return "";
  369. else if (x < 32) {
  370. snprintf(buf, sizeof(buf), "'CTRL-%c'", 0x40 | x);
  371. return buf;
  372. }
  373. else if (x >= 128) {
  374. snprintf(buf, sizeof(buf), "%s", CATGETS(dtsearch_catd, MS_huff, 32,
  375. "(nonascii char, high bit set)"));
  376. return buf;
  377. }
  378. else {
  379. sprintf (buf, "'%c'", x);
  380. return buf;
  381. }
  382. }
  383. } /* end of function char_label */
  384. /****************************************/
  385. /* */
  386. /* Next Sorted Node */
  387. /* */
  388. /****************************************/
  389. /* Called repeatedly, returns the next treebase node in sorted order.
  390. * Sort order is by length of Huffman Code String.
  391. * Caller must pass index of last node returned (neg at first call).
  392. * Lasti should never be larger than treebase.
  393. */
  394. static int next_sorted_node (int lasti)
  395. {
  396. int i;
  397. int nexti = -1;
  398. long nextsortval = MAX_BITLEN + 2;
  399. /* permanently mark last returned node as unavailable */
  400. if (lasti >= 0)
  401. hctree1[lasti].sort = MAX_BITLEN + 2;
  402. /* find next shortest string length */
  403. for (i = 0; i < 257; i++)
  404. if (hctree1[i].sort < nextsortval) {
  405. nextsortval = hctree1[i].sort;
  406. nexti = i;
  407. }
  408. return nexti;
  409. } /* end of function next_sorted_node */
  410. /****************************************/
  411. /* */
  412. /* Initialize Treebase */
  413. /* */
  414. /****************************************/
  415. /* 'Treebase' is original 257 character nodes (including literal code).
  416. * If huffcode table file exists, initializes treebase with its values,
  417. * else initializes treebase with zero counts.
  418. */
  419. static void init_treebase (void)
  420. {
  421. int i;
  422. FILE *instream_huf;
  423. char filebuf[128];
  424. total_count = 0L;
  425. /* .huf table file does not exist--zero all counts */
  426. if ((instream_huf = fopen (filename_huf, "r")) == NULL) {
  427. no_huffcode_file = TRUE;
  428. for (i = 0; i < 257; i++) {
  429. hctree1[i].bit = LAST_BIT;
  430. hctree1[i].count = 0L;
  431. hctree1[i].father = -1;
  432. hctree1[i].son0 = -1;
  433. hctree1[i].son1 = -1;
  434. }
  435. }
  436. /* Table file exists--init treebase with values from file.
  437. * We are only interested in the character itself (i),
  438. * and its current count. All other fields will be recreated
  439. * at output time. FILE FORMAT IS NOT CHECKED--IT HAD BETTER BE CORRECT!
  440. */
  441. else {
  442. no_huffcode_file = FALSE;
  443. if(NULL == fgets (filebuf, sizeof (filebuf) - 1, instream_huf)) {
  444. fprintf (stderr, "No first line in file\n");
  445. exit(2);
  446. }
  447. /* discard this first line (don't need id stamp) */
  448. while (fgets (filebuf, sizeof (filebuf) - 1, instream_huf)
  449. != NULL) {
  450. i = atoi (strtok (filebuf, DELIMITERS)); /* char */
  451. if (i < 0 || i > 256) {
  452. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 33,
  453. "366 Invalid file format for %s.\n"),
  454. filename_huf);
  455. exit (2);
  456. }
  457. strtok (NULL, DELIMITERS); /* skip over current huff
  458. * code */
  459. hctree1[i].count = (i == 256) ?
  460. 0L : atol (strtok (NULL, DELIMITERS));
  461. hctree1[i].bit = LAST_BIT;
  462. hctree1[i].father = -1;
  463. hctree1[i].son0 = -1;
  464. hctree1[i].son1 = -1;
  465. if (i != 256)
  466. total_count += hctree1[i].count;
  467. } /* endwhile loop that reads each table line */
  468. fclose (instream_huf);
  469. }
  470. return;
  471. } /* end of function init_treebase */
  472. /****************************************/
  473. /* */
  474. /* Huffman Code */
  475. /* */
  476. /****************************************/
  477. /* determines correct huffman code based on current counts in tree,
  478. * writes out all to both files overlaying previous values if they existed.
  479. */
  480. static void huffman_code (time_t idstamp)
  481. {
  482. int i; /* current char */
  483. int lasti;
  484. int j; /* ascends tree from i to build bit_string */
  485. char bit_string[MAX_BITLEN + 4];
  486. char sprintbuf[128];
  487. char *bitptr;
  488. FILE *outstream_huc;
  489. FILE *outstream_huf;
  490. /* establish the 'literal' node (char #256) count
  491. * equal to sum of all chars whose counts are less than threshold.
  492. */
  493. if (literal_coding_on) {
  494. hctree1[256].count = 0L;
  495. for (i = 0; i < 256; i++)
  496. if (hctree1[i].count <= literal_threshold)
  497. hctree1[256].count += hctree1[i].count;
  498. }
  499. /* build the Huffman Code tree, and determine root (last_node) */
  500. while (build_tree ());
  501. /* now that we know the total number of tree nodes (last_node),
  502. * we are ready to write.
  503. * Open both output files and verify they are not write protected.
  504. */
  505. if ((outstream_huc = fopen (filename_huc, "w")) == NULL) {
  506. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 34,
  507. "424 File '%s' failed to open for write. Is it read-only?\n"),
  508. filename_huc);
  509. exit (2);
  510. }
  511. if ((outstream_huf = fopen (filename_huf, "w")) == NULL) {
  512. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 34,
  513. "439 File '%s' failed to open for write. Is it read-only?\n"),
  514. filename_huf);
  515. exit (2);
  516. }
  517. /* create the .c decode file (tree as integer array) */
  518. fprintf (outstream_huc,
  519. "#include <time.h>\n"
  520. "char *hctree_name =\t\"%s\";\n"
  521. "time_t hctree_id =\t%ldL;\n"
  522. "int hctree_root =\t%d;\n"
  523. "static int hctree_array[] = {\n",
  524. filename_huc, idstamp, last_node - 257);
  525. for (i = 257; i <= last_node; i++) {
  526. fprintf (outstream_huc, "\t%4d,\t%4d%c\t/* %3d */\n",
  527. hctree1[i].son0 - 257, hctree1[i].son1 - 257,
  528. (i == last_node) ? ' ' : ',', /* no comma after last
  529. * one */
  530. i - 257); /* comment contains node number */
  531. }
  532. fprintf (outstream_huc, "\t};\nint *hctree =\thctree_array;\n");
  533. fclose (outstream_huc);
  534. /* write out the tree base (0-256) in sorted order to .huf file */
  535. fprintf (outstream_huf, "%ld\tHCTREE_ID\n", idstamp);
  536. for (lasti = -1; (i = next_sorted_node (lasti)) >= 0; lasti = i) {
  537. /*
  538. * Create huffman code digit string. j ascends tree from i
  539. * to build string in reverse order.
  540. */
  541. bitptr = bit_string;
  542. for (j = i; j != -1; j = hctree1[j].father)
  543. *bitptr++ = hctree1[j].bit;
  544. *bitptr = '\0'; /* terminate reversed string */
  545. strrev (bit_string); /* reverse the string order */
  546. if (bit_string[1] == 0)
  547. strcpy (bit_string, " ");
  548. if (strlen (bit_string) < 9)
  549. strcat (bit_string, "\t");
  550. /* write out the line for this char */
  551. sprintf (sprintbuf, "%d\t%s\t%ld\t%s\n",
  552. i,
  553. bit_string + 1, /* hop over LAST_BIT */
  554. hctree1[i].count,
  555. char_label (i));
  556. fprintf (outstream_huf, "%s", sprintbuf);
  557. } /* end forloop printing out each tree base entry */
  558. fclose (outstream_huf);
  559. return;
  560. } /* end of function huffman_code */
  561. /****************************************/
  562. /* */
  563. /* Print Usage */
  564. /* */
  565. /****************************************/
  566. static void print_usage (void)
  567. {
  568. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 35,
  569. "USAGE: huffcode [-lN | -l-] [-o] <huffname> [<infile>]\n"
  570. " -l<N> specifies the 'literal' threshold count. Any character occurring\n"
  571. " <= <N> times will be coded with the Huffman literal. Default is -l0,\n"
  572. " literal coding only for bytes with counts of zero.\n"
  573. " -l- turns off literal coding. Turning off literal coding in unbalanced\n"
  574. " trees leads to EXTREMELY LONG bit string codes--don't do it unless\n"
  575. " the input is known to be a well balanced binary file.\n"
  576. " -o preauthorizes overwriting any currently existing decode file.\n"
  577. " <huffname> is the filename prefix for the Huffman Code files.\n"
  578. " If the encode file (%s) already exists, byte counts from infile will\n"
  579. " be added to it, otherwise it will be newly created.\n"
  580. " The decode file (%s) is always newly created each run.\n"
  581. " <infile> is an input file containing bytes to be counted.\n"
  582. " It may be omitted if the encode file already exists.\n"),
  583. EXT_HUFFCODE, EXT_HDECODE);
  584. return;
  585. } /* end of function print_usage */
  586. /********************************************************/
  587. /* */
  588. /* USER_ARGS_PROCESSOR */
  589. /* */
  590. /********************************************************/
  591. /* handles command line arguments for 'main' */
  592. static void user_args_processor (int argc, char **argv)
  593. {
  594. char *argptr;
  595. int OK_to_overwrite = FALSE;
  596. FILE *stream;
  597. if (argc <= 1) { /* user just wants to see usage msg */
  598. print_usage ();
  599. exit (1);
  600. }
  601. /* each pass grabs new parm of "-xxx" format */
  602. while (--argc > 0 && (*++argv)[0] == '-') {
  603. argptr = argv[0];
  604. argptr[1] = tolower (argptr[1]);
  605. switch (argptr[1]) {
  606. case 'l': /* literal threshold */
  607. if (argptr[2] == 0)
  608. goto BADARG;
  609. else if (argptr[2] == '-')
  610. literal_coding_on = FALSE;
  611. else
  612. literal_threshold = atoi (argptr + 2);
  613. break;
  614. case 'o': /* OK_to_overwrite .c file if it already
  615. * exists */
  616. OK_to_overwrite = TRUE;
  617. break;
  618. case 'v': /* verbose mode = debug switch */
  619. debug_switch = TRUE;
  620. break;
  621. BADARG:
  622. default:
  623. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 36,
  624. "'%s' is invalid argument.\n"), argptr);
  625. print_usage ();
  626. exit (2); /* ABORT program */
  627. } /* endswitch */
  628. } /* endwhile for cmd line '-'processing */
  629. /* test for required tree file name */
  630. if (argc <= 0) {
  631. fprintf (stderr, "%s", CATGETS(dtsearch_catd, MS_huff, 37,
  632. "576 Missing Huffman Code file names prefix.\n"));
  633. print_usage ();
  634. exit (2);
  635. }
  636. /* create 2 output file names from passed argument */
  637. strncpy (filename_huf, argv[0], _POSIX_PATH_MAX);
  638. filename_huf[_POSIX_PATH_MAX - 6] = 0;
  639. strcat (filename_huf, EXT_HUFFCODE);
  640. strncpy (filename_huc, argv[0], _POSIX_PATH_MAX);
  641. filename_huc[_POSIX_PATH_MAX - 6] = 0;
  642. strcat (filename_huc, EXT_HDECODE);
  643. /* Since the decode file is a C source code file (.c extension),
  644. * we want to be sure not to erase somebody's source program.
  645. * So if the .c file already exists, and the user didn't specify
  646. * overwrite in a command line argument, ask him now if it's OK to
  647. * blow away the old file.
  648. */
  649. if (!OK_to_overwrite)
  650. if ((stream = fopen (filename_huc, "r")) != NULL) {
  651. fclose (stream);
  652. printf (CATGETS(dtsearch_catd, MS_huff, 38,
  653. "Decode file '%s' already exists. "
  654. "Is it OK to overwrite it? [y/n] "),
  655. filename_huc);
  656. if (toupper (getchar ()) != 'Y')
  657. exit (2);
  658. }
  659. /* test for optional input file name */
  660. if (--argc <= 0)
  661. input_file_specified = FALSE;
  662. else {
  663. input_file_specified = TRUE;
  664. strncpy (filename_input, argv[1], _POSIX_PATH_MAX);
  665. filename_input[_POSIX_PATH_MAX - 1] = 0;
  666. }
  667. return;
  668. } /* end of function user_args_processor */
  669. /****************************************/
  670. /* */
  671. /* Main */
  672. /* */
  673. /****************************************/
  674. int main (int argc, char *argv[])
  675. {
  676. FILE *instream;
  677. struct stat fstat_input;
  678. long bytes_in = 0L;
  679. int mychar;
  680. time_t now, start_stamp;
  681. setlocale (LC_ALL, "");
  682. dtsearch_catd = CATOPEN(FNAME_DTSRCAT, 0);
  683. printf (CATGETS(dtsearch_catd, MS_huff, 40,
  684. "HUFFCODE Version %s\n"), AUSAPI_VERSION);
  685. /* validate user's command line arguments */
  686. user_args_processor (argc, argv);
  687. /* initialize tree table, using the table file if it exists */
  688. init_treebase ();
  689. if (total_count == 0L)
  690. printf ("%s", CATGETS(dtsearch_catd, MS_huff, 41,
  691. "Huffman Code Tables will be newly created.\n"));
  692. else
  693. printf (CATGETS(dtsearch_catd, MS_huff, 42,
  694. "Table '%s' already contains %ld Kbytes from previous runs.\n"),
  695. filename_huf, total_count / 1000L);
  696. if (!input_file_specified && no_huffcode_file) {
  697. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 43,
  698. "645 Input file not specified and '%s' table file\n"
  699. " doesn't exist--nothing to do!\n"),
  700. filename_huf);
  701. print_usage ();
  702. exit (2);
  703. }
  704. /* read the input file and count its bytes */
  705. if (input_file_specified) {
  706. if ((instream = fopen (filename_input, "rb")) == NULL) {
  707. BAD_INPUT_FILE:
  708. fprintf (stderr, CATGETS(dtsearch_catd, MS_huff, 44,
  709. "Could not open input file '%s' or access status: %s\n"),
  710. filename_input, strerror (errno));
  711. exit (2);
  712. }
  713. if (fstat (fileno (instream), &fstat_input) == -1)
  714. goto BAD_INPUT_FILE;
  715. printf (CATGETS(dtsearch_catd, MS_huff, 45,
  716. "Input file '%s' contains about %ld Kbytes.\n"),
  717. filename_input, fstat_input.st_size / 1000L);
  718. time (&start_stamp);
  719. while ((mychar = getc (instream)) != EOF) {
  720. hctree1[mychar].count++;
  721. total_count++;
  722. /* echo progress to user every so often */
  723. if (!(++bytes_in % 10000L))
  724. printf (CATGETS(dtsearch_catd, MS_huff, 46,
  725. "\r%ld%% done. %2ld Kbytes read. "
  726. "Estimate %3ld seconds to completion. "),
  727. (bytes_in * 100L) / fstat_input.st_size,
  728. bytes_in / 1000L,
  729. (fstat_input.st_size - bytes_in) *
  730. (time (NULL) - start_stamp) / bytes_in);
  731. } /* end read loop for each char in input file */
  732. putchar ('\n');
  733. fclose (instream);
  734. } /* endif that processes input file */
  735. /* build huffman code tree, write out files */
  736. time (&now); /* this will be the official tree id time
  737. * stamp */
  738. printf (CATGETS(dtsearch_catd, MS_huff, 47,
  739. "Identifying timestamp will be '%ld'.\n"
  740. "%s Huffman Code Tables in '%s' and '%s'..."),
  741. now,
  742. (no_huffcode_file) ?
  743. CATGETS(dtsearch_catd, MS_huff, 48, "Creating") :
  744. CATGETS(dtsearch_catd, MS_huff, 49, "Rebuilding"),
  745. filename_huf,
  746. filename_huc);
  747. huffman_code (now);
  748. putchar ('\n');
  749. return 0;
  750. } /* end of function main */
  751. /************************** HUFFCODE.C ******************************/