html2ms.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <ctype.h>
  12. #include <bio.h>
  13. enum
  14. {
  15. SSIZE = 10,
  16. /* list types */
  17. Lordered = 0,
  18. Lunordered,
  19. Lmenu,
  20. Ldir,
  21. };
  22. Biobuf in, out;
  23. int lastc = '\n';
  24. int inpre = 0;
  25. /* stack for fonts */
  26. char *fontstack[SSIZE];
  27. char *font = "R";
  28. int fsp;
  29. /* stack for lists */
  30. struct
  31. {
  32. int type;
  33. int ord;
  34. } liststack[SSIZE];
  35. int lsp;
  36. int quoting;
  37. typedef struct Goobie Goobie;
  38. struct Goobie
  39. {
  40. char *name;
  41. void (*f)(Goobie*, char*);
  42. void (*ef)(Goobie*, char*);
  43. };
  44. void eatwhite(void);
  45. void escape(void);
  46. typedef void Action(Goobie*, char*);
  47. Action g_ignore;
  48. Action g_unexpected;
  49. Action g_title;
  50. Action g_p;
  51. Action g_h;
  52. Action g_li;
  53. Action g_list, g_listend;
  54. Action g_pre;
  55. Action g_fpush, g_fpop;
  56. Action g_indent, g_exdent;
  57. Action g_dt;
  58. Action g_display;
  59. Action g_displayend;
  60. Action g_table, g_tableend, g_caption, g_captionend;
  61. Action g_br, g_hr;
  62. Goobie gtab[] =
  63. {
  64. "!--", g_ignore, g_unexpected,
  65. "!doctype", g_ignore, g_unexpected,
  66. "a", g_ignore, g_ignore,
  67. "address", g_display, g_displayend,
  68. "b", g_fpush, g_fpop,
  69. "base", g_ignore, g_unexpected,
  70. "blink", g_ignore, g_ignore,
  71. "blockquote", g_ignore, g_ignore,
  72. "body", g_ignore, g_ignore,
  73. "br", g_br, g_unexpected,
  74. "caption", g_caption, g_captionend,
  75. "center", g_ignore, g_ignore,
  76. "cite", g_ignore, g_ignore,
  77. "code", g_ignore, g_ignore,
  78. "dd", g_ignore, g_unexpected,
  79. "dfn", g_ignore, g_ignore,
  80. "dir", g_list, g_listend,
  81. "div", g_ignore, g_br,
  82. "dl", g_indent, g_exdent,
  83. "dt", g_dt, g_unexpected,
  84. "em", g_ignore, g_ignore,
  85. "font", g_ignore, g_ignore,
  86. "form", g_ignore, g_ignore,
  87. "h1", g_h, g_p,
  88. "h2", g_h, g_p,
  89. "h3", g_h, g_p,
  90. "h4", g_h, g_p,
  91. "h5", g_h, g_p,
  92. "h6", g_h, g_p,
  93. "head", g_ignore, g_ignore,
  94. "hr", g_hr, g_unexpected,
  95. "html", g_ignore, g_ignore,
  96. "i", g_fpush, g_fpop,
  97. "input", g_ignore, g_unexpected,
  98. "img", g_ignore, g_unexpected,
  99. "isindex", g_ignore, g_unexpected,
  100. "kbd", g_fpush, g_fpop,
  101. "key", g_ignore, g_ignore,
  102. "li", g_li, g_unexpected,
  103. "link", g_ignore, g_unexpected,
  104. "listing", g_ignore, g_ignore,
  105. "menu", g_list, g_listend,
  106. "meta", g_ignore, g_unexpected,
  107. "nextid", g_ignore, g_unexpected,
  108. "ol", g_list, g_listend,
  109. "option", g_ignore, g_unexpected,
  110. "p", g_p, g_ignore,
  111. "plaintext", g_ignore, g_unexpected,
  112. "pre", g_pre, g_displayend,
  113. "samp", g_ignore, g_ignore,
  114. "script", g_ignore, g_ignore,
  115. "select", g_ignore, g_ignore,
  116. "span", g_ignore, g_ignore,
  117. "strong", g_ignore, g_ignore,
  118. "table", g_table, g_tableend,
  119. "textarea", g_ignore, g_ignore,
  120. "title", g_title, g_ignore,
  121. "tt", g_fpush, g_fpop,
  122. "u", g_ignore, g_ignore,
  123. "ul", g_list, g_listend,
  124. "var", g_ignore, g_ignore,
  125. "xmp", g_ignore, g_ignore,
  126. 0, 0, 0,
  127. };
  128. typedef struct Entity Entity;
  129. struct Entity
  130. {
  131. char *name;
  132. Rune value;
  133. };
  134. Entity pl_entity[]=
  135. {
  136. "#SPACE", L' ', "#RS", L'\n', "#RE", L'\r', "quot", L'"',
  137. "AElig", L'Æ', "Aacute", L'Á', "Acirc", L'Â', "Agrave", L'À', "Aring", L'Å',
  138. "Atilde", L'Ã', "Auml", L'Ä', "Ccedil", L'Ç', "ETH", L'Ð', "Eacute", L'É',
  139. "Ecirc", L'Ê', "Egrave", L'È', "Euml", L'Ë', "Iacute", L'Í', "Icirc", L'Î',
  140. "Igrave", L'Ì', "Iuml", L'Ï', "Ntilde", L'Ñ', "Oacute", L'Ó', "Ocirc", L'Ô',
  141. "Ograve", L'Ò', "Oslash", L'Ø', "Otilde", L'Õ', "Ouml", L'Ö', "THORN", L'Þ',
  142. "Uacute", L'Ú', "Ucirc", L'Û', "Ugrave", L'Ù', "Uuml", L'Ü', "Yacute", L'Ý',
  143. "aacute", L'á', "acirc", L'â', "aelig", L'æ', "agrave", L'à', "amp", L'&',
  144. "aring", L'å', "atilde", L'ã', "auml", L'ä', "ccedil", L'ç', "eacute", L'é',
  145. "ecirc", L'ê', "egrave", L'è', "eth", L'ð', "euml", L'ë', "gt", L'>',
  146. "iacute", L'í', "icirc", L'î', "igrave", L'ì', "iuml", L'ï', "lt", L'<',
  147. "nbsp", L' ',
  148. "ntilde", L'ñ', "oacute", L'ó', "ocirc", L'ô', "ograve", L'ò', "oslash", L'ø',
  149. "otilde", L'õ', "ouml", L'ö', "szlig", L'ß', "thorn", L'þ', "uacute", L'ú',
  150. "ucirc", L'û', "ugrave", L'ù', "uuml", L'ü', "yacute", L'ý', "yuml", L'ÿ',
  151. 0
  152. };
  153. int
  154. cistrcmp(char *a, char *b)
  155. {
  156. int c, d;
  157. for(;; a++, b++){
  158. d = tolower(*a);
  159. c = d - tolower(*b);
  160. if(c)
  161. break;
  162. if(d == 0)
  163. break;
  164. }
  165. return c;
  166. }
  167. int
  168. readupto(char *buf, int n, char d, char notme)
  169. {
  170. char *p;
  171. int c;
  172. buf[0] = 0;
  173. for(p = buf;; p++){
  174. c = Bgetc(&in);
  175. if(c < 0){
  176. *p = 0;
  177. return -1;
  178. }
  179. if(c == notme){
  180. Bungetc(&in);
  181. return -1;
  182. }
  183. if(c == d){
  184. *p = 0;
  185. return 0;
  186. }
  187. *p = c;
  188. if(p == buf + n){
  189. *p = 0;
  190. Bprint(&out, "<%s", buf);
  191. return -1;
  192. }
  193. }
  194. }
  195. void
  196. dogoobie(void)
  197. {
  198. char *arg, *type;
  199. Goobie *g;
  200. char buf[1024];
  201. int closing;
  202. if(readupto(buf, sizeof(buf), '>', '<') < 0){
  203. Bprint(&out, "<%s", buf);
  204. return;
  205. }
  206. type = buf;
  207. if(*type == '/'){
  208. type++;
  209. closing = 1;
  210. } else
  211. closing = 0;
  212. arg = strchr(type, ' ');
  213. if(arg == 0)
  214. arg = strchr(type, '\r');
  215. if(arg == 0)
  216. arg = strchr(type, '\n');
  217. if(arg)
  218. *arg++ = 0;
  219. for(g = gtab; g->name; g++)
  220. if(cistrcmp(type, g->name) == 0){
  221. if(closing){
  222. if(g->ef){
  223. (*g->ef)(g, arg);
  224. return;
  225. }
  226. } else {
  227. if(g->f){
  228. (*g->f)(g, arg);
  229. return;
  230. }
  231. }
  232. }
  233. if(closing)
  234. type--;
  235. if(arg)
  236. Bprint(&out, "<%s %s>\n", type, arg);
  237. else
  238. Bprint(&out, "<%s>\n", type);
  239. }
  240. void
  241. main(void)
  242. {
  243. int c, pos;
  244. Binit(&in, 0, OREAD);
  245. Binit(&out, 1, OWRITE);
  246. pos = 0;
  247. for(;;){
  248. c = Bgetc(&in);
  249. if(c < 0)
  250. return;
  251. switch(c){
  252. case '<':
  253. dogoobie();
  254. break;
  255. case '&':
  256. escape();
  257. break;
  258. case '\r':
  259. pos = 0;
  260. break;
  261. case '\n':
  262. if(quoting){
  263. Bputc(&out, '"');
  264. quoting = 0;
  265. }
  266. if(lastc != '\n')
  267. Bputc(&out, '\n');
  268. /* can't emit leading spaces in filled troff docs */
  269. if (!inpre)
  270. eatwhite();
  271. lastc = c;
  272. break;
  273. default:
  274. ++pos;
  275. if(!inpre && isascii(c) && isspace(c) && pos > 80){
  276. Bputc(&out, '\n');
  277. eatwhite();
  278. pos = 0;
  279. }else
  280. Bputc(&out, c);
  281. lastc = c;
  282. break;
  283. }
  284. }
  285. }
  286. void
  287. escape(void)
  288. {
  289. int c;
  290. Entity *e;
  291. char buf[8];
  292. if(readupto(buf, sizeof(buf), ';', '\n') < 0){
  293. Bprint(&out, "&%s", buf);
  294. return;
  295. }
  296. for(e = pl_entity; e->name; e++)
  297. if(strcmp(buf, e->name) == 0){
  298. Bprint(&out, "%C", e->value);
  299. return;
  300. }
  301. if(*buf == '#'){
  302. c = atoi(buf+1);
  303. if(isascii(c) && isprint(c)){
  304. Bputc(&out, c);
  305. return;
  306. }
  307. }
  308. Bprint(&out, "&%s;", buf);
  309. }
  310. /*
  311. * whitespace is not significant to HTML, but newlines
  312. * and leading spaces are significant to troff.
  313. */
  314. void
  315. eatwhite(void)
  316. {
  317. int c;
  318. for(;;){
  319. c = Bgetc(&in);
  320. if(c < 0)
  321. break;
  322. if(!isspace(c)){
  323. Bungetc(&in);
  324. break;
  325. }
  326. }
  327. }
  328. /*
  329. * print at start of line
  330. */
  331. void
  332. printsol(char *fmt, ...)
  333. {
  334. va_list arg;
  335. if(quoting){
  336. Bputc(&out, '"');
  337. quoting = 0;
  338. }
  339. if(lastc != '\n')
  340. Bputc(&out, '\n');
  341. va_start(arg, fmt);
  342. Bvprint(&out, fmt, arg);
  343. va_end(arg);
  344. lastc = '\n';
  345. }
  346. void
  347. g_ignore(Goobie *g, char *arg)
  348. {
  349. USED(g); USED(arg);
  350. }
  351. void
  352. g_unexpected(Goobie *g, char *arg)
  353. {
  354. USED(arg);
  355. fprint(2, "unexpected %s ending\n", g->name);
  356. }
  357. void
  358. g_title(Goobie *g, char *arg)
  359. {
  360. USED(arg);
  361. printsol(".TL\n", g->name);
  362. }
  363. void
  364. g_p(Goobie *g, char *arg)
  365. {
  366. USED(arg);
  367. printsol(".LP\n", g->name);
  368. }
  369. void
  370. g_h(Goobie *g, char *arg)
  371. {
  372. USED(arg);
  373. printsol(".SH %c\n", g->name[1]);
  374. }
  375. void
  376. g_list(Goobie *g, char *arg)
  377. {
  378. USED(arg);
  379. if(lsp != SSIZE){
  380. switch(g->name[0]){
  381. case 'o':
  382. liststack[lsp].type = Lordered;
  383. liststack[lsp].ord = 0;
  384. break;
  385. default:
  386. liststack[lsp].type = Lunordered;
  387. break;
  388. }
  389. }
  390. lsp++;
  391. }
  392. void
  393. g_br(Goobie *g, char *arg)
  394. {
  395. USED(g); USED(arg);
  396. printsol(".br\n");
  397. }
  398. void
  399. g_li(Goobie *g, char *arg)
  400. {
  401. USED(g); USED(arg);
  402. if(lsp <= 0 || lsp > SSIZE){
  403. printsol(".IP \\(bu\n");
  404. return;
  405. }
  406. switch(liststack[lsp-1].type){
  407. case Lunordered:
  408. printsol(".IP \\(bu\n");
  409. break;
  410. case Lordered:
  411. printsol(".IP %d\n", ++liststack[lsp-1].ord);
  412. break;
  413. }
  414. }
  415. void
  416. g_listend(Goobie *g, char *arg)
  417. {
  418. USED(g); USED(arg);
  419. if(--lsp < 0)
  420. lsp = 0;
  421. printsol(".LP\n");
  422. }
  423. void
  424. g_display(Goobie *g, char *arg)
  425. {
  426. USED(g); USED(arg);
  427. printsol(".DS\n");
  428. }
  429. void
  430. g_pre(Goobie *g, char *arg)
  431. {
  432. USED(g); USED(arg);
  433. printsol(".DS L\n");
  434. inpre = 1;
  435. }
  436. void
  437. g_displayend(Goobie *g, char *arg)
  438. {
  439. USED(g); USED(arg);
  440. printsol(".DE\n");
  441. inpre = 0;
  442. }
  443. void
  444. g_fpush(Goobie *g, char *arg)
  445. {
  446. USED(arg);
  447. if(fsp < SSIZE)
  448. fontstack[fsp] = font;
  449. fsp++;
  450. switch(g->name[0]){
  451. case 'b':
  452. font = "B";
  453. break;
  454. case 'i':
  455. font = "I";
  456. break;
  457. case 'k': /* kbd */
  458. case 't': /* tt */
  459. font = "(CW";
  460. break;
  461. }
  462. Bprint(&out, "\\f%s", font);
  463. }
  464. void
  465. g_fpop(Goobie *g, char *arg)
  466. {
  467. USED(g); USED(arg);
  468. fsp--;
  469. if(fsp < SSIZE)
  470. font = fontstack[fsp];
  471. else
  472. font = "R";
  473. Bprint(&out, "\\f%s", font);
  474. }
  475. void
  476. g_indent(Goobie *g, char *arg)
  477. {
  478. USED(g); USED(arg);
  479. printsol(".RS\n");
  480. }
  481. void
  482. g_exdent(Goobie *g, char *arg)
  483. {
  484. USED(g); USED(arg);
  485. printsol(".RE\n");
  486. }
  487. void
  488. g_dt(Goobie *g, char *arg)
  489. {
  490. USED(g); USED(arg);
  491. printsol(".IP \"");
  492. quoting = 1;
  493. }
  494. void
  495. g_hr(Goobie *g, char *arg)
  496. {
  497. USED(g); USED(arg);
  498. printsol(".br\n");
  499. printsol("\\l'5i'\n");
  500. }
  501. /*
  502. <table border>
  503. <caption><font size="+1"><b>Cumulative Class Data</b></font></caption>
  504. <tr><th rowspan=2>DOSE<br>mg/kg</th><th colspan=2>PARALYSIS</th><th colspan=2>DEATH</th>
  505. </tr>
  506. <tr><th width=80>Number</th><th width=80>Percent</th><th width=80>Number</th><th width=80>Percent</th>
  507. </tr>
  508. <tr align=center>
  509. <td>0.1</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  510. </tr>
  511. <tr align=center>
  512. <td>0.2</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  513. </tr>
  514. <tr align=center>
  515. <td>0.3</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  516. </tr>
  517. <tr align=center>
  518. <td>0.4</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  519. </tr>
  520. <tr align=center>
  521. <td>0.5</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  522. </tr>
  523. <tr align=center>
  524. <td>0.6</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  525. </tr>
  526. <tr align=center>
  527. <td>0.7</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  528. </tr>
  529. <tr align=center>
  530. <td>0.8</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  531. </tr>
  532. <tr align=center>
  533. <td>0.8 oral</td><td><br></td> <td><br></td> <td><br></td> <td><br></td>
  534. </tr>
  535. </table>
  536. */
  537. void
  538. g_table(Goobie *g, char *arg)
  539. {
  540. USED(g); USED(arg);
  541. printsol(".TS\ncenter ;\n");
  542. }
  543. void
  544. g_tableend(Goobie *g, char *arg)
  545. {
  546. USED(g); USED(arg);
  547. printsol(".TE\n");
  548. }
  549. void
  550. g_caption(Goobie *g, char *arg)
  551. {
  552. USED(g); USED(arg);
  553. }
  554. void
  555. g_captionend(Goobie *g, char *arg)
  556. {
  557. USED(g); USED(arg);
  558. }