tcs.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. #ifndef PLAN9
  2. #include <sys/types.h>
  3. #include <stdio.h>
  4. #include <unistd.h>
  5. #include <stdlib.h>
  6. #include <fcntl.h>
  7. #include <string.h>
  8. #include <errno.h>
  9. #include "plan9.h"
  10. #else /* PLAN9 */
  11. #include <u.h>
  12. #include <libc.h>
  13. #include <bio.h>
  14. #endif /* PLAN9 */
  15. #include "cyrillic.h"
  16. #include "misc.h"
  17. #include "ms.h"
  18. #include "8859.h"
  19. #include "big5.h"
  20. #include "gb.h"
  21. #include "hdr.h"
  22. #include "conv.h"
  23. void usage(void);
  24. void list(void);
  25. int squawk = 1;
  26. int clean = 0;
  27. int verbose = 0;
  28. long ninput, noutput, nrunes, nerrors;
  29. char *file = "stdin";
  30. char *argv0;
  31. Rune runes[N];
  32. char obuf[UTFmax*N]; /* maximum bloat from N runes */
  33. long tab[NRUNE];
  34. #ifndef PLAN9
  35. extern char version[];
  36. #endif
  37. void intable(int, long *, struct convert *);
  38. void unicode_in(int, long *, struct convert *);
  39. void unicode_out(Rune *, int, long *);
  40. int
  41. main(int argc, char **argv)
  42. {
  43. char *from = "utf";
  44. char *to = "utf";
  45. int fd;
  46. int listem = 0;
  47. struct convert *t, *f;
  48. ARGBEGIN {
  49. case 'c':
  50. clean = 1;
  51. break;
  52. case 'f':
  53. from = EARGF(usage());
  54. break;
  55. case 'l':
  56. listem = 1;
  57. break;
  58. case 's':
  59. squawk = 0;
  60. break;
  61. case 't':
  62. to = EARGF(usage());
  63. break;
  64. case 'v':
  65. verbose = 1;
  66. break;
  67. default:
  68. usage();
  69. break;
  70. } ARGEND
  71. USED(argc);
  72. if(verbose)
  73. squawk = 1;
  74. if(listem){
  75. list();
  76. EXIT(0, 0);
  77. }
  78. if(!from || !to)
  79. usage();
  80. f = conv(from, 1);
  81. t = conv(to, 0);
  82. #define PROC {if(f->flags&Table)\
  83. intable(fd, (long *)f->data, t);\
  84. else\
  85. ((Infn)(f->fn))(fd, (long *)0, t);}
  86. if(*argv){
  87. while(*argv){
  88. file = *argv;
  89. #ifndef PLAN9
  90. if((fd = open(*argv, 0)) < 0){
  91. EPR "%s: %s: %s\n", argv0, *argv, strerror(errno));
  92. #else /* PLAN9 */
  93. if((fd = open(*argv, OREAD)) < 0){
  94. EPR "%s: %s: %r\n", argv0, *argv);
  95. #endif /* PLAN9 */
  96. EXIT(1, "open failure");
  97. }
  98. PROC
  99. close(fd);
  100. argv++;
  101. }
  102. } else {
  103. fd = 0;
  104. PROC
  105. }
  106. if(verbose)
  107. EPR "%s: %ld input bytes, %ld runes, %ld output bytes (%ld errors)\n", argv0,
  108. ninput, nrunes, noutput, nerrors);
  109. EXIT(((nerrors && squawk)? 1:0), ((nerrors && squawk)? "conversion error":0));
  110. return(0); /* shut up compiler */
  111. }
  112. void
  113. usage(void)
  114. {
  115. EPR "Usage: %s [-slv] [-f cs] [-t cs] [file ...]\n", argv0);
  116. verbose = 1;
  117. list();
  118. EXIT(1, "usage");
  119. }
  120. void
  121. list(void)
  122. {
  123. struct convert *c;
  124. char ch = verbose?'\t':' ';
  125. #ifndef PLAN9
  126. EPR "%s version = '%s'\n", argv0, version);
  127. #endif
  128. if(verbose)
  129. EPR "character sets:\n");
  130. else
  131. EPR "cs:");
  132. for(c = convert; c->name; c++){
  133. if((c->flags&From) && c[1].name && (strcmp(c[1].name, c->name) == 0)){
  134. EPR "%c%s", ch, c->name);
  135. c++;
  136. } else if(c->flags&Table)
  137. EPR "%c%s", ch, c->name);
  138. else if(c->flags&From)
  139. EPR "%c%s(from)", ch, c->name);
  140. else
  141. EPR "%c%s(to)", ch, c->name);
  142. if(verbose)
  143. EPR "\t%s\n", c->chatter);
  144. }
  145. if(!verbose)
  146. EPR "\n");
  147. }
  148. struct convert *
  149. conv(char *name, int from)
  150. {
  151. struct convert *c;
  152. for(c = convert; c->name; c++){
  153. if(cistrcmp(c->name, name) != 0)
  154. continue;
  155. if(c->flags&Table)
  156. return(c);
  157. if(((c->flags&From) == 0) == (from == 0))
  158. return(c);
  159. }
  160. EPR "%s: charset `%s' unknown\n", argv0, name);
  161. EXIT(1, "unknown character set");
  162. return(0); /* just shut the compiler up */
  163. }
  164. void
  165. swab2(char *b, int n)
  166. {
  167. char *e, p;
  168. for(e = b+n; b < e; b++){
  169. p = *b;
  170. *b = b[1];
  171. *++b = p;
  172. }
  173. }
  174. void
  175. unicode_in(int fd, long *notused, struct convert *out)
  176. {
  177. Rune buf[N];
  178. int n;
  179. int swabme;
  180. USED(notused);
  181. if(read(fd, (char *)buf, 2) != 2)
  182. return;
  183. ninput += 2;
  184. switch(buf[0])
  185. {
  186. default:
  187. OUT(out, buf, 1);
  188. case 0xFEFF:
  189. swabme = 0;
  190. break;
  191. case 0xFFFE:
  192. swabme = 1;
  193. break;
  194. }
  195. while((n = read(fd, (char *)buf, 2*N)) > 0){
  196. ninput += n;
  197. if(swabme)
  198. swab2((char *)buf, n);
  199. if(n&1){
  200. if(squawk)
  201. EPR "%s: odd byte count in %s\n", argv0, file);
  202. nerrors++;
  203. if(clean)
  204. n--;
  205. else
  206. buf[n++/2] = Runeerror;
  207. }
  208. OUT(out, buf, n/2);
  209. }
  210. OUT(out, buf, 0);
  211. }
  212. void
  213. unicode_in_be(int fd, long *notused, struct convert *out)
  214. {
  215. int i, n;
  216. Rune buf[N], r;
  217. uchar *p;
  218. USED(notused);
  219. while((n = read(fd, (char *)buf, 2*N)) > 0){
  220. ninput += n;
  221. p = (uchar*)buf;
  222. for(i=0; i<n/2; i++){
  223. r = *p++<<8;
  224. r |= *p++;
  225. buf[i] = r;
  226. }
  227. if(n&1){
  228. if(squawk)
  229. EPR "%s: odd byte count in %s\n", argv0, file);
  230. nerrors++;
  231. if(clean)
  232. n--;
  233. else
  234. buf[n++/2] = Runeerror;
  235. }
  236. OUT(out, buf, n/2);
  237. }
  238. OUT(out, buf, 0);
  239. }
  240. void
  241. unicode_in_le(int fd, long *notused, struct convert *out)
  242. {
  243. int i, n;
  244. Rune buf[N], r;
  245. uchar *p;
  246. USED(notused);
  247. while((n = read(fd, (char *)buf, 2*N)) > 0){
  248. ninput += n;
  249. p = (uchar*)buf;
  250. for(i=0; i<n/2; i++){
  251. r = *p++;
  252. r |= *p++<<8;
  253. buf[i] = r;
  254. }
  255. if(n&1){
  256. if(squawk)
  257. EPR "%s: odd byte count in %s\n", argv0, file);
  258. nerrors++;
  259. if(clean)
  260. n--;
  261. else
  262. buf[n++/2] = Runeerror;
  263. }
  264. OUT(out, buf, n/2);
  265. }
  266. OUT(out, buf, 0);
  267. }
  268. void
  269. unicode_out(Rune *base, int n, long *notused)
  270. {
  271. static int first = 1;
  272. USED(notused);
  273. nrunes += n;
  274. if(first){
  275. unsigned short x = 0xFEFF;
  276. noutput += 2;
  277. write(1, (char *)&x, 2);
  278. first = 0;
  279. }
  280. noutput += 2*n;
  281. write(1, (char *)base, 2*n);
  282. }
  283. void
  284. unicode_out_be(Rune *base, int n, long *notused)
  285. {
  286. int i;
  287. uchar *p;
  288. Rune r;
  289. USED(notused);
  290. p = (uchar*)base;
  291. for(i=0; i<n; i++){
  292. r = base[i];
  293. *p++ = r>>8;
  294. *p++ = r;
  295. }
  296. nrunes += n;
  297. noutput += 2*n;
  298. write(1, (char *)base, 2*n);
  299. }
  300. void
  301. unicode_out_le(Rune *base, int n, long *notused)
  302. {
  303. int i;
  304. uchar *p;
  305. Rune r;
  306. USED(notused);
  307. p = (uchar*)base;
  308. for(i=0; i<n; i++){
  309. r = base[i];
  310. *p++ = r;
  311. *p++ = r>>8;
  312. }
  313. nrunes += n;
  314. noutput += 2*n;
  315. write(1, (char *)base, 2*n);
  316. }
  317. void
  318. intable(int fd, long *table, struct convert *out)
  319. {
  320. uchar buf[N];
  321. uchar *p, *e;
  322. Rune *r;
  323. int n;
  324. long c;
  325. while((n = read(fd, (char *)buf, N)) > 0){
  326. ninput += n;
  327. r = runes;
  328. for(p = buf, e = buf+n; p < e; p++){
  329. c = table[*p];
  330. if(c < 0){
  331. if(squawk)
  332. EPR "%s: bad char 0x%x near byte %ld in %s\n", argv0, *p, ninput+(p-e), file);
  333. nerrors++;
  334. if(clean)
  335. continue;
  336. c = BADMAP;
  337. }
  338. *r++ = c;
  339. }
  340. OUT(out, runes, r-runes);
  341. }
  342. OUT(out, runes, 0);
  343. if(n < 0){
  344. #ifdef PLAN9
  345. EPR "%s: input read: %r\n", argv0);
  346. #else
  347. EPR "%s: input read: %s\n", argv0, strerror(errno));
  348. #endif
  349. EXIT(1, "input read error");
  350. }
  351. }
  352. void
  353. outtable(Rune *base, int n, long *map)
  354. {
  355. long c;
  356. char *p;
  357. int i;
  358. nrunes += n;
  359. for(i = 0; i < NRUNE; i++)
  360. tab[i] = -1;
  361. for(i = 0; i < 256; i++)
  362. if(map[i] >= 0)
  363. tab[map[i]] = i;
  364. for(i = 0, p = obuf; i < n; i++){
  365. c = tab[base[i]];
  366. if(c < 0){
  367. if(squawk)
  368. EPR "%s: rune 0x%x not in output cs\n", argv0, base[i]);
  369. nerrors++;
  370. if(clean)
  371. continue;
  372. c = BADMAP;
  373. }
  374. *p++ = c;
  375. }
  376. noutput += p-obuf;
  377. write(1, obuf, p-obuf);
  378. }
  379. long tabascii[256] =
  380. {
  381. 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
  382. 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
  383. 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
  384. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
  385. 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
  386. 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
  387. 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
  388. 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
  389. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  390. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  391. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  392. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  393. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  394. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  395. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  396. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  397. };
  398. long tabmsdos[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
  399. {
  400. 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
  401. 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
  402. 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
  403. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
  404. 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
  405. 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
  406. 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
  407. 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
  408. 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
  409. 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
  410. 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
  411. 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
  412. 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
  413. 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
  414. 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
  415. 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
  416. 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
  417. 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
  418. 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
  419. 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
  420. 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
  421. 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
  422. 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
  423. 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
  424. };
  425. long tabmsdos2[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
  426. {
  427. 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
  428. 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
  429. 0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x2043, 0x21a8,
  430. 0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25b2, 0x25bc,
  431. 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
  432. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
  433. 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
  434. 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
  435. 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
  436. 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
  437. 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
  438. 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
  439. 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
  440. 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
  441. 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
  442. 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
  443. 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
  444. 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
  445. 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
  446. 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
  447. 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
  448. 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
  449. 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
  450. 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
  451. 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
  452. 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
  453. };
  454. struct convert convert[] =
  455. { /* if two entries have the same name, put the from one first */
  456. { "8859-1", "Latin-1 (Western and Northern Europe including Italian)", Table, (void *)tab8859_1 },
  457. { "8859-2", "Latin-2 (Eastern Europe except Turkey and the Baltic countries)", Table, (void *)tab8859_2 },
  458. { "8859-3", "Latin-3 (Mediterranean, South Africa, Esperanto)", Table, (void *)tab8859_3 },
  459. { "8859-4", "Latin-4 (Scandinavia and the Baltic countries; obsolete)", Table, (void *)tab8859_4 },
  460. { "8859-5", "Part 5 (Cyrillic)", Table, (void *)tab8859_5 },
  461. { "8859-6", "Part 6 (Arabic)", Table, (void *)tab8859_6 },
  462. { "8859-7", "Part 7 (Greek)", Table, (void *)tab8859_7 },
  463. { "8859-8", "Part 8 (Hebrew)", Table, (void *)tab8859_8 },
  464. { "8859-9", "Latin-5 (Turkey, Western Europe except Icelandic and Faroese)", Table, (void *)tab8859_9 },
  465. { "8859-10", "Latin-6 (Northern Europe)", Table, (void *)tab8859_10 },
  466. { "8859-15", "Latin-9 (Western Europe)", Table, (void *)tab8859_15 },
  467. { "ascii", "7-bit ASCII", Table, (void *)tabascii },
  468. { "atari", "ATARI-ST character set", Table, (void *)tabatari },
  469. { "av", "Alternativnyj Variant", Table, (void *)tabav },
  470. { "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
  471. { "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
  472. { "ebcdic", "EBCDIC", Table, (void *)tabebcdic }, /* 6f is recommended bad map */
  473. { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
  474. { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
  475. { "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
  476. { "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
  477. { "html", "HTML", From|Func, 0, (Fnptr)html_in },
  478. { "html", "HTML", Func, 0, (Fnptr)html_out },
  479. { "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 },
  480. { "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 },
  481. { "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 },
  482. { "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 },
  483. { "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
  484. { "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 },
  485. { "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
  486. { "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 },
  487. { "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
  488. { "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
  489. { "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 },
  490. { "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 },
  491. { "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)jisjis_in },
  492. { "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_out },
  493. { "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 },
  494. { "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 },
  495. { "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 },
  496. { "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 },
  497. { "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 },
  498. { "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 },
  499. { "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 },
  500. { "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 },
  501. { "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 },
  502. { "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10 },
  503. { "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15 },
  504. { "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
  505. { "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_in },
  506. { "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out },
  507. { "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
  508. { "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 },
  509. { "latin1", "alias for 8859-1", Table, (void *)tab8859_1 },
  510. { "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
  511. { "microsoft", "alias for windows1252", Table, (void *)tabcp1252 },
  512. { "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
  513. { "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
  514. { "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 },
  515. { "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabmsdos2 },
  516. { "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
  517. { "ov", "Osnovnoj Variant", Table, (void *)tabov },
  518. { "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 },
  519. { "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
  520. { "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
  521. { "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
  522. { "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in },
  523. { "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out },
  524. { "ucode", "Russian U-code", Table, (void *)tabucode },
  525. { "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
  526. { "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
  527. { "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
  528. { "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
  529. { "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode_in_be },
  530. { "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_be },
  531. { "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unicode_in_le },
  532. { "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_out_le },
  533. { "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii },
  534. { "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
  535. { "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
  536. { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
  537. { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
  538. { "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in },
  539. { "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out },
  540. { "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_in },
  541. { "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out },
  542. { "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unicode_in_be },
  543. { "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
  544. { "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
  545. { "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
  546. { "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
  547. { "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
  548. { "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
  549. { "windows1250", "Windows Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
  550. { "windows1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
  551. { "windows1252", "Windows Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
  552. { "windows1253", "Windows Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
  553. { "windows1254", "Windows Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
  554. { "windows1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
  555. { "windows1256", "Windows Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
  556. { "windows1257", "Windows Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
  557. { "windows1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
  558. { 0 },
  559. };