html2ms.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <ctype.h>
  4. #include <bio.h>
  5. enum
  6. {
  7. SSIZE = 10,
  8. /* list types */
  9. Lordered = 0,
  10. Lunordered,
  11. Lmenu,
  12. Ldir,
  13. };
  14. Biobuf in, out;
  15. int lastc = '\n';
  16. int inpre = 0;
  17. /* stack for fonts */
  18. char *fontstack[SSIZE];
  19. char *font = "R";
  20. int fsp;
  21. /* stack for lists */
  22. struct
  23. {
  24. int type;
  25. int ord;
  26. } liststack[SSIZE];
  27. int lsp;
  28. int quoting;
  29. typedef struct Goobie Goobie;
  30. struct Goobie
  31. {
  32. char *name;
  33. void (*f)(Goobie*, char*);
  34. void (*ef)(Goobie*, char*);
  35. };
  36. void eatwhite(void);
  37. void escape(void);
  38. typedef void Action(Goobie*, char*);
  39. Action g_ignore;
  40. Action g_unexpected;
  41. Action g_title;
  42. Action g_p;
  43. Action g_h;
  44. Action g_li;
  45. Action g_list, g_listend;
  46. Action g_pre;
  47. Action g_fpush, g_fpop;
  48. Action g_indent, g_exdent;
  49. Action g_dt;
  50. Action g_display;
  51. Action g_displayend;
  52. Action g_table, g_tableend, g_caption, g_captionend;
  53. Action g_br, g_hr;
  54. Goobie gtab[] =
  55. {
  56. "!--", g_ignore, g_unexpected,
  57. "!doctype", g_ignore, g_unexpected,
  58. "a", g_ignore, g_ignore,
  59. "address", g_display, g_displayend,
  60. "b", g_fpush, g_fpop,
  61. "base", g_ignore, g_unexpected,
  62. "blink", g_ignore, g_ignore,
  63. "blockquote", g_ignore, g_ignore,
  64. "body", g_ignore, g_ignore,
  65. "br", g_br, g_unexpected,
  66. "caption", g_caption, g_captionend,
  67. "center", g_ignore, g_ignore,
  68. "cite", g_ignore, g_ignore,
  69. "code", g_ignore, g_ignore,
  70. "dd", g_ignore, g_unexpected,
  71. "dfn", g_ignore, g_ignore,
  72. "dir", g_list, g_listend,
  73. "dl", g_indent, g_exdent,
  74. "dt", g_dt, g_unexpected,
  75. "em", g_ignore, g_ignore,
  76. "font", g_ignore, g_ignore,
  77. "form", g_ignore, g_ignore,
  78. "h1", g_h, g_p,
  79. "h2", g_h, g_p,
  80. "h3", g_h, g_p,
  81. "h4", g_h, g_p,
  82. "h5", g_h, g_p,
  83. "h6", g_h, g_p,
  84. "head", g_ignore, g_ignore,
  85. "hr", g_hr, g_unexpected,
  86. "html", g_ignore, g_ignore,
  87. "i", g_fpush, g_fpop,
  88. "input", g_ignore, g_unexpected,
  89. "img", g_ignore, g_unexpected,
  90. "isindex", g_ignore, g_unexpected,
  91. "kbd", g_fpush, g_fpop,
  92. "key", g_ignore, g_ignore,
  93. "li", g_li, g_unexpected,
  94. "link", g_ignore, g_unexpected,
  95. "listing", g_ignore, g_ignore,
  96. "menu", g_list, g_listend,
  97. "meta", g_ignore, g_unexpected,
  98. "nextid", g_ignore, g_unexpected,
  99. "ol", g_list, g_listend,
  100. "option", g_ignore, g_unexpected,
  101. "p", g_p, g_ignore,
  102. "plaintext", g_ignore, g_unexpected,
  103. "pre", g_pre, g_displayend,
  104. "samp", g_ignore, g_ignore,
  105. "select", g_ignore, g_ignore,
  106. "strong", g_ignore, g_ignore,
  107. "table", g_table, g_tableend,
  108. "textarea", g_ignore, g_ignore,
  109. "title", g_title, g_ignore,
  110. "tt", g_fpush, g_fpop,
  111. "u", g_ignore, g_ignore,
  112. "ul", g_list, g_listend,
  113. "var", g_ignore, g_ignore,
  114. "xmp", g_ignore, g_ignore,
  115. 0, 0, 0,
  116. };
  117. typedef struct Entity Entity;
  118. struct Entity
  119. {
  120. char *name;
  121. Rune value;
  122. };
  123. Entity pl_entity[]=
  124. {
  125. "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"',
  126. "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å',
  127. "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É',
  128. "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î',
  129. "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô',
  130. "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ',
  131. "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý',
  132. "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&',
  133. "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é',
  134. "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>',
  135. "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<',
  136. "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø',
  137. "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú',
  138. "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ',
  139. 0
  140. };
  141. int
  142. cistrcmp(char *a, char *b)
  143. {
  144. int c, d;
  145. for(;; a++, b++){
  146. d = tolower(*a);
  147. c = d - tolower(*b);
  148. if(c)
  149. break;
  150. if(d == 0)
  151. break;
  152. }
  153. return c;
  154. }
  155. int
  156. readupto(char *buf, int n, char d, char notme)
  157. {
  158. char *p;
  159. int c;
  160. buf[0] = 0;
  161. for(p = buf;; p++){
  162. c = Bgetc(&in);
  163. if(c < 0){
  164. *p = 0;
  165. return -1;
  166. }
  167. if(c == notme){
  168. Bungetc(&in);
  169. return -1;
  170. }
  171. if(c == d){
  172. *p = 0;
  173. return 0;
  174. }
  175. *p = c;
  176. if(p == buf + n){
  177. *p = 0;
  178. Bprint(&out, "<%s", buf);
  179. return -1;
  180. }
  181. }
  182. }
  183. void
  184. dogoobie(void)
  185. {
  186. char *arg, *type;
  187. Goobie *g;
  188. char buf[1024];
  189. int closing;
  190. if(readupto(buf, sizeof(buf), '>', '<') < 0){
  191. Bprint(&out, "<%s", buf);
  192. return;
  193. }
  194. type = buf;
  195. if(*type == '/'){
  196. type++;
  197. closing = 1;
  198. } else
  199. closing = 0;
  200. arg = strchr(type, ' ');
  201. if(arg == 0)
  202. arg = strchr(type, '\r');
  203. if(arg == 0)
  204. arg = strchr(type, '\n');
  205. if(arg)
  206. *arg++ = 0;
  207. for(g = gtab; g->name; g++)
  208. if(cistrcmp(type, g->name) == 0){
  209. if(closing){
  210. if(g->ef){
  211. (*g->ef)(g, arg);
  212. return;
  213. }
  214. } else {
  215. if(g->f){
  216. (*g->f)(g, arg);
  217. return;
  218. }
  219. }
  220. }
  221. if(closing)
  222. type--;
  223. if(arg)
  224. Bprint(&out, "<%s %s>\n", type, arg);
  225. else
  226. Bprint(&out, "<%s>\n", type);
  227. }
  228. void
  229. main(void)
  230. {
  231. int c;
  232. Binit(&in, 0, OREAD);
  233. Binit(&out, 1, OWRITE);
  234. for(;;){
  235. c = Bgetc(&in);
  236. if(c < 0)
  237. return;
  238. switch(c){
  239. case '<':
  240. dogoobie();
  241. break;
  242. case '&':
  243. escape();
  244. break;
  245. case '\r':
  246. break;
  247. case '\n':
  248. if(quoting){
  249. Bputc(&out, '"');
  250. quoting = 0;
  251. }
  252. if(lastc != '\n')
  253. Bputc(&out, '\n');
  254. /* can't emit leading spaces in filled troff docs */
  255. if (!inpre)
  256. eatwhite();
  257. lastc = c;
  258. break;
  259. default:
  260. Bputc(&out, c);
  261. lastc = c;
  262. break;
  263. }
  264. }
  265. }
  266. void
  267. escape(void)
  268. {
  269. Entity *e;
  270. char buf[8];
  271. if(readupto(buf, sizeof(buf), ';', '\n') < 0){
  272. Bprint(&out, "&%s", buf);
  273. return;
  274. }
  275. for(e = pl_entity; e->name; e++)
  276. if(strcmp(buf, e->name) == 0){
  277. Bprint(&out, "%C", e->value);
  278. return;
  279. }
  280. Bprint(&out, "&%s;", buf);
  281. }
  282. /*
  283. * whitespace is not significant to HTML, but newlines
  284. * and leading spaces are significant to troff.
  285. */
  286. void
  287. eatwhite(void)
  288. {
  289. int c;
  290. for(;;){
  291. c = Bgetc(&in);
  292. if(c < 0)
  293. break;
  294. if(!isspace(c)){
  295. Bungetc(&in);
  296. break;
  297. }
  298. }
  299. }
  300. /*
  301. * print at start of line
  302. */
  303. void
  304. printsol(char *fmt, ...)
  305. {
  306. va_list arg;
  307. if(quoting){
  308. Bputc(&out, '"');
  309. quoting = 0;
  310. }
  311. if(lastc != '\n')
  312. Bputc(&out, '\n');
  313. va_start(arg, fmt);
  314. Bvprint(&out, fmt, arg);
  315. va_end(arg);
  316. lastc = '\n';
  317. }
  318. void
  319. g_ignore(Goobie *g, char *arg)
  320. {
  321. USED(g, arg);
  322. }
  323. void
  324. g_unexpected(Goobie *g, char *arg)
  325. {
  326. USED(arg);
  327. fprint(2, "unexpected %s ending\n", g->name);
  328. }
  329. void
  330. g_title(Goobie *g, char *arg)
  331. {
  332. USED(arg);
  333. printsol(".TL\n", g->name);
  334. }
  335. void
  336. g_p(Goobie *g, char *arg)
  337. {
  338. USED(arg);
  339. printsol(".LP\n", g->name);
  340. }
  341. void
  342. g_h(Goobie *g, char *arg)
  343. {
  344. USED(arg);
  345. printsol(".SH %c\n", g->name[1]);
  346. }
  347. void
  348. g_list(Goobie *g, char *arg)
  349. {
  350. USED(arg);
  351. if(lsp != SSIZE){
  352. switch(g->name[0]){
  353. case 'o':
  354. liststack[lsp].type = Lordered;
  355. liststack[lsp].ord = 0;
  356. break;
  357. default:
  358. liststack[lsp].type = Lunordered;
  359. break;
  360. }
  361. }
  362. lsp++;
  363. }
  364. void
  365. g_br(Goobie *g, char *arg)
  366. {
  367. USED(g, arg);
  368. printsol(".br\n");
  369. }
  370. void
  371. g_li(Goobie *g, char *arg)
  372. {
  373. USED(g, arg);
  374. if(lsp <= 0 || lsp > SSIZE){
  375. printsol(".IP \\(bu\n");
  376. return;
  377. }
  378. switch(liststack[lsp-1].type){
  379. case Lunordered:
  380. printsol(".IP \\(bu\n");
  381. break;
  382. case Lordered:
  383. printsol(".IP %d\n", ++liststack[lsp-1].ord);
  384. break;
  385. }
  386. }
  387. void
  388. g_listend(Goobie *g, char *arg)
  389. {
  390. USED(g, arg);
  391. if(--lsp < 0)
  392. lsp = 0;
  393. printsol(".LP\n");
  394. }
  395. void
  396. g_display(Goobie *g, char *arg)
  397. {
  398. USED(g, arg);
  399. printsol(".DS\n");
  400. }
  401. void
  402. g_pre(Goobie *g, char *arg)
  403. {
  404. USED(g, arg);
  405. printsol(".DS L\n");
  406. inpre = 1;
  407. }
  408. void
  409. g_displayend(Goobie *g, char *arg)
  410. {
  411. USED(g, arg);
  412. printsol(".DE\n");
  413. inpre = 0;
  414. }
  415. void
  416. g_fpush(Goobie *g, char *arg)
  417. {
  418. USED(arg);
  419. if(fsp < SSIZE)
  420. fontstack[fsp] = font;
  421. fsp++;
  422. switch(g->name[0]){
  423. case 'b':
  424. font = "B";
  425. break;
  426. case 'i':
  427. font = "I";
  428. break;
  429. case 'k': /* kbd */
  430. case 't': /* tt */
  431. font = "(CW";
  432. break;
  433. }
  434. Bprint(&out, "\\f%s", font);
  435. }
  436. void
  437. g_fpop(Goobie *g, char *arg)
  438. {
  439. USED(g, arg);
  440. fsp--;
  441. if(fsp < SSIZE)
  442. font = fontstack[fsp];
  443. else
  444. font = "R";
  445. Bprint(&out, "\\f%s", font);
  446. }
  447. void
  448. g_indent(Goobie *g, char *arg)
  449. {
  450. USED(g, arg);
  451. printsol(".RS\n");
  452. }
  453. void
  454. g_exdent(Goobie *g, char *arg)
  455. {
  456. USED(g, arg);
  457. printsol(".RE\n");
  458. }
  459. void
  460. g_dt(Goobie *g, char *arg)
  461. {
  462. USED(g, arg);
  463. printsol(".IP \"");
  464. quoting = 1;
  465. }
  466. void
  467. g_hr(Goobie *g, char *arg)
  468. {
  469. USED(g, arg);
  470. printsol(".br\n");
  471. printsol("\\l'5i'\n");
  472. }
  473. /*
  474. <table border>
  475. <caption><font size="+1"><b>Cumulative Class Data</b></font></caption>
  476. <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th>
  477. </tr>
  478. <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th>
  479. </tr>
  480. <tr align=center>
  481. <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  482. </tr>
  483. <tr align=center>
  484. <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  485. </tr>
  486. <tr align=center>
  487. <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  488. </tr>
  489. <tr align=center>
  490. <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  491. </tr>
  492. <tr align=center>
  493. <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  494. </tr>
  495. <tr align=center>
  496. <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  497. </tr>
  498. <tr align=center>
  499. <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  500. </tr>
  501. <tr align=center>
  502. <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  503. </tr>
  504. <tr align=center>
  505. <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  506. </tr>
  507. </table>
  508. */
  509. void
  510. g_table(Goobie *g, char *arg)
  511. {
  512. USED(g, arg);
  513. printsol(".TS\ncenter ;\n");
  514. }
  515. void
  516. g_tableend(Goobie *g, char *arg)
  517. {
  518. USED(g, arg);
  519. printsol(".TE\n");
  520. }
  521. void
  522. g_caption(Goobie *g, char *arg)
  523. {
  524. USED(g, arg);
  525. }
  526. void
  527. g_captionend(Goobie *g, char *arg)
  528. {
  529. USED(g, arg);
  530. }