hencode.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. * CDE - Common Desktop Environment
  3. *
  4. * Copyright (c) 1993-2012, The Open Group. All rights reserved.
  5. *
  6. * These libraries and programs are free software; you can
  7. * redistribute them and/or modify them under the terms of the GNU
  8. * Lesser General Public License as published by the Free Software
  9. * Foundation; either version 2 of the License, or (at your option)
  10. * any later version.
  11. *
  12. * These libraries and programs are distributed in the hope that
  13. * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14. * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15. * PURPOSE. See the GNU Lesser General Public License for more
  16. * details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with these libraries and programs; if not, write
  20. * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21. * Floor, Boston, MA 02110-1301 USA
  22. */
  23. /*
  24. * COMPONENT_NAME: austext
  25. *
  26. * FUNCTIONS: convert_str_2_char
  27. * gen_vec
  28. * hc_encode
  29. * main
  30. * process_char
  31. *
  32. * ORIGINS: 27
  33. *
  34. *
  35. * (C) COPYRIGHT International Business Machines Corp. 1990,1995
  36. * All Rights Reserved
  37. * Licensed Materials - Property of IBM
  38. * US Government Users Restricted Rights - Use, duplication or
  39. * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
  40. */
  41. /********************** HENCODE.C ***********************
  42. * $XConsortium: hencode.c /main/9 1996/11/21 19:50:29 drk $
  43. * Huffman encoder program.
  44. *
  45. * $Log$
  46. * Revision 2.3 1996/03/13 22:56:39 miker
  47. * Changed char to UCHAR several places.
  48. *
  49. * Revision 2.2 1995/10/26 15:11:42 miker
  50. * Added prolog.
  51. *
  52. * Revision 2.1 1995/09/22 20:50:40 miker
  53. * Freeze DtSearch 0.1, AusText 2.1.8
  54. *
  55. * Revision 1.3 1995/09/05 18:07:00 miker
  56. * Name changes for DtSearch.
  57. */
  58. #include "SearchP.h"
  59. #include <errno.h>
  60. #include <stdlib.h>
  61. #define X_INCLUDE_STRING_H
  62. #define XOS_USE_NO_LOCKING
  63. #include <X11/Xos_r.h>
  64. #define PROGNAME "HENCODE"
  65. #define MS_huff 30 /* message catalog set number */
  66. #define DELIMITERS "\t\n"
  67. #define LAST_BIT '-'
  68. #define LITERAL_NUM 256
  69. #define NO_SPACE 0
  70. /*------------------------ GLOBALS ---------------------------*/
  71. long gen_vec_hufid = 0L;
  72. static struct or_blobrec blobrec;
  73. static char *huff_code[257];
  74. static int code_length[257];
  75. static char coded_bits_str[9];
  76. static char bit_pos = 0;
  77. static char bits_left;
  78. static int total_num_chars = 0;
  79. static int num_char_coded = 0;
  80. static char zero_str[] = "00000000";
  81. #define MAX_NUM_CHAR (sizeof(blobrec.or_blob) - 1)
  82. #define MAX_NUM_BITS (8 * MAX_NUM_CHAR)
  83. /****************************************/
  84. /* */
  85. /* GENERATE VECTORS */
  86. /* */
  87. /****************************************/
  88. void gen_vec (char *fname_huffcode_tab)
  89. {
  90. char temp[40];
  91. int i, j;
  92. char tab_filebuf[128];
  93. unsigned char ch;
  94. FILE *tab_stream;
  95. _Xstrtokparams strtok_buf;
  96. if ((tab_stream = fopen (fname_huffcode_tab, "r")) == NULL) {
  97. printf (CATGETS(dtsearch_catd, MS_huff, 1,
  98. "%s: Cannot open huffman encode file '%s':\n"
  99. " %s\n Exit Code = 2\n"),
  100. PROGNAME"222", fname_huffcode_tab, strerror (errno));
  101. DtSearchExit (2);
  102. }
  103. memset (huff_code, 0, sizeof(huff_code));
  104. memset (code_length, 0, sizeof(code_length));
  105. /*
  106. * First line in the file contains time stamp. We have to read
  107. * it separately. First token on first line is hufid. Save it
  108. * in a global for optional use by caller.
  109. */
  110. fgets (tab_filebuf, sizeof (tab_filebuf) - 1, tab_stream);
  111. gen_vec_hufid = atol (tab_filebuf);
  112. /*-------------- READ IN HUFFMAN FILE ------------*/
  113. /*
  114. * We are only interested in the character itself (index) and
  115. * its Huffman Code
  116. */
  117. while (fgets (tab_filebuf, sizeof (tab_filebuf) - 1, tab_stream)
  118. != NULL) {
  119. i = atoi (_XStrtok (tab_filebuf, DELIMITERS, strtok_buf)); /* char */
  120. /* read current huff code */
  121. strcpy (temp, _XStrtok (NULL, DELIMITERS, strtok_buf));
  122. if (temp[0] == ' ') {
  123. /* Empty huffcode associated with LITERAL CODE.
  124. * Either this is literal char itself and literal
  125. * encodeing has been turned off, or this char is
  126. * so rare that it is coded using the literal char.
  127. */
  128. if (i == 256)
  129. continue;
  130. /* current character has LITERAL CODE */
  131. snprintf(temp, sizeof(temp), "%s", huff_code[LITERAL_NUM]);
  132. *(code_length + i) = *(code_length + LITERAL_NUM) + 8;
  133. ch = (unsigned char) i;
  134. for (j = 0; j < 8; j++) {
  135. if (ch & 0x80) {
  136. temp[*(code_length + LITERAL_NUM) + j] =
  137. '1';
  138. }
  139. else {
  140. temp[*(code_length + LITERAL_NUM) + j] =
  141. '0';
  142. }
  143. ch = ch << 1;
  144. }
  145. temp[*(code_length + LITERAL_NUM) + 8] = '\0';
  146. huff_code[i] =
  147. (char *) malloc (*(code_length + i) + 1);
  148. strcpy (huff_code[i], temp);
  149. }
  150. else {
  151. /* regular HUFFMAN code */
  152. *(code_length + i) = strlen (temp);
  153. huff_code[i] =
  154. (char *) malloc (*(code_length + i) + 1);
  155. strcpy (huff_code[i], temp);
  156. }
  157. }
  158. fclose (tab_stream);
  159. } /* end of function gen_vec */
  160. /********************************************************/
  161. /* */
  162. /* Convert Coded String to Coded Character */
  163. /* */
  164. /********************************************************/
  165. void convert_str_2_char (char *code)
  166. {
  167. int i, j;
  168. *code = 0;
  169. j = 1;
  170. for (i = 0; i < 8; i++) {
  171. if (*(coded_bits_str + (7 - i)) == '1') {
  172. *code += j;
  173. }
  174. j = j * 2;
  175. }
  176. total_num_chars++;
  177. return;
  178. }
  179. /****************************************/
  180. /* */
  181. /* Process Current Character */
  182. /* */
  183. /****************************************/
  184. int process_char (UCHAR ch, char *bitstr)
  185. {
  186. char temp_code[40];
  187. char coded_char;
  188. int i, j;
  189. int num_of_bits_in_code;
  190. i = (int) ch;
  191. num_of_bits_in_code = *(code_length + i);
  192. if ((MAX_NUM_BITS - total_num_chars * 8 - bit_pos) <
  193. num_of_bits_in_code) {
  194. return NO_SPACE;
  195. }
  196. strcpy (temp_code, huff_code[i]);
  197. while (TRUE) {
  198. /* fill new character with Huffman Code */
  199. if (bit_pos == 0) {
  200. if (num_of_bits_in_code == 8) {
  201. strcpy (coded_bits_str, temp_code);
  202. convert_str_2_char (&coded_char);
  203. bitstr[total_num_chars - 1] = coded_char;
  204. return TRUE;
  205. }
  206. if (num_of_bits_in_code < 8) {
  207. strcpy (coded_bits_str, temp_code);
  208. bit_pos = num_of_bits_in_code;
  209. bits_left = 8 - bit_pos;
  210. return TRUE;
  211. }
  212. if (num_of_bits_in_code > 8) {
  213. strncpy (coded_bits_str, temp_code, 8);
  214. coded_bits_str[8] = '\0';
  215. convert_str_2_char (&coded_char);
  216. bitstr[total_num_chars - 1] = coded_char;
  217. num_of_bits_in_code -= 8;
  218. strcpy (temp_code, &temp_code[8]);
  219. }
  220. } /* end of bit_pos == 0 loop */
  221. else {
  222. j = bit_pos + num_of_bits_in_code;
  223. if (j == 8) {
  224. bit_pos = 0;
  225. strcat (coded_bits_str, temp_code);
  226. convert_str_2_char (&coded_char);
  227. bitstr[total_num_chars - 1] = coded_char;
  228. return TRUE;
  229. }
  230. if (j < 8) {
  231. strcat (coded_bits_str, temp_code);
  232. bit_pos = j;
  233. bits_left = 8 - bit_pos;
  234. return TRUE;
  235. }
  236. if (j > 8) {
  237. strncat (coded_bits_str, temp_code,
  238. (size_t) bits_left);
  239. convert_str_2_char (&coded_char);
  240. bitstr[total_num_chars - 1] = coded_char;
  241. num_of_bits_in_code -= bits_left;
  242. strcpy (temp_code, &huff_code[i][bits_left]);
  243. bit_pos = 0;
  244. }
  245. } /* end of else loop */
  246. } /* end of while(TRUE) loop */
  247. }
  248. /************************************************/
  249. /* */
  250. /* HC Encode */
  251. /* */
  252. /************************************************/
  253. int hc_encode (struct or_blobrec * targblobrec,
  254. UCHAR *charbuf,
  255. int charcount,
  256. int file_pos)
  257. {
  258. /********** replaced by blobrec above...
  259. union charint
  260. {
  261. char ch[2];
  262. INT orig_char_count;
  263. } un1;
  264. static char temp1 [MAX_NUM_CHAR+1]; ...repl by blobrec;
  265. ************/
  266. char *targ, *src;
  267. int i, j;
  268. char temp;
  269. char ret_code = TRUE;
  270. char write = FALSE;
  271. char last_call = FALSE;
  272. if (charcount == 0) {
  273. last_call = TRUE;
  274. charcount = 1;
  275. }
  276. for (i = 0; i < charcount; i++) {
  277. if (!last_call) {
  278. ret_code = process_char (charbuf[i], (char *) blobrec.or_blob);
  279. }
  280. if ((ret_code == NO_SPACE) ||
  281. (file_pos && (i == (charcount - 1)))) {
  282. if (!last_call) {
  283. if (file_pos && (i == (charcount - 1))) {
  284. num_char_coded++;
  285. }
  286. }
  287. if (bit_pos) {
  288. strncat (coded_bits_str, zero_str,
  289. (size_t) bits_left);
  290. convert_str_2_char (&temp);
  291. blobrec.or_blob[total_num_chars - 1][0] = temp;
  292. }
  293. write = TRUE;
  294. /**********
  295. un1.orig_char_count = num_char_coded;
  296. bitstring[0] = un1.ch[0];
  297. bitstring[1] = un1.ch[1];
  298. for (j = 0; j <= total_num_chars; j++) {
  299. *(bitstring + j + 2) = *(temp1 + j);
  300. };
  301. **************/
  302. targblobrec->or_bloblen = num_char_coded;
  303. targ = (char *) targblobrec->or_blob;
  304. src = (char *) blobrec.or_blob;
  305. for (j = 0; j < total_num_chars; j++)
  306. *targ++ = *src++;
  307. num_char_coded = 0;
  308. bit_pos = 0;
  309. total_num_chars = 0;
  310. if (file_pos && (i == (charcount - 1))) {
  311. return write;
  312. }
  313. i--;
  314. }
  315. else {
  316. num_char_coded++;
  317. }
  318. }
  319. return write;
  320. }
  321. #ifdef DEBUG_HENCODE
  322. /****************************************/
  323. /* */
  324. /* Main */
  325. /* */
  326. /****************************************/
  327. main (int argc, char *argv[])
  328. {
  329. FILE *stream;
  330. char bitstring[MAX_NUM_CHAR + 2];
  331. char charbuf[MAX_NUM_CHAR + 1];
  332. int charcount = 0;
  333. int mychar;
  334. if (argc < 2) {
  335. fprintf (stderr, "Usage: try filename\n");
  336. exit (1);
  337. }
  338. if ((stream = fopen (argv[1], "rb")) == NULL) {
  339. fprintf (stderr, "Could not open input file '%s'\n", argv[1]);
  340. exit (2);
  341. }
  342. fp = fopen ("codefile.dat", "wb");
  343. gen_vec ();
  344. while ((mychar = getc (stream)) != EOF) {
  345. charbuf[charcount] = mychar;
  346. charcount++;
  347. if (charcount == MAX_NUM_CHAR) {
  348. hc_encode (bitstring, charbuf, charcount, 0);
  349. /*
  350. * for (j = 0; j < charcount; j++) {
  351. * fputc(bitstring[j], fp); }
  352. */
  353. charcount = 0;
  354. }
  355. }
  356. hc_encode (bitstring, charbuf, charcount, 1);
  357. printf ("Total Number of Characters = %ld\n", total_num_chars);
  358. fclose (fp);
  359. fclose (stream);
  360. return;
  361. }
  362. #endif
  363. /********************** HENCODE.C ***********************/