123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- enum{
- Nfont = 11,
- Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */
- };
- typedef uintptr Char;
- typedef struct Troffchar Troffchar;
- typedef struct Htmlchar Htmlchar;
- typedef struct Font Font;
- typedef struct HTMLfont HTMLfont;
- /*
- * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes.
- * must be able to hold a pointer.
- */
- enum
- {
- Italic = 16,
- Bold,
- CW,
- Indent1,
- Indent2,
- Indent3,
- Heading = 25,
- Anchor = 26, /* must be last */
- };
- enum /* magic emissions */
- {
- Estring = 0,
- Epp = 1<<16,
- };
- int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };
- int nest[10];
- int nnest;
- struct Troffchar
- {
- char *name;
- char *value;
- };
- struct Htmlchar
- {
- char *utf;
- char *name;
- int value;
- };
- #include "chars.h"
- struct Font{
- char *name;
- HTMLfont *htmlfont;
- };
- struct HTMLfont{
- char *name;
- char *htmlname;
- int bit;
- };
- /* R must be first; it's the default representation for fonts we don't recognize */
- HTMLfont htmlfonts[] =
- {
- "R", nil, 0,
- "LucidaSans", nil, 0,
- "I", "i", Italic,
- "LucidaSansI", "i", Italic,
- "CW", "tt", CW,
- "LucidaCW", "tt", CW,
- nil, nil,
- };
- #define TABLE "<table border=0 cellpadding=0 cellspacing=0>"
- char*
- onattr[8*sizeof(int)] =
- {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- "<i>", /* italic */
- "<b>", /* bold */
- "<tt><font size=+1>", /* cw */
- "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */
- "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */
- "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */
- 0,
- 0,
- 0,
- "<p><font size=+1><b>", /* heading 25 */
- "<unused>", /* anchor 26 */
- };
- char*
- offattr[8*sizeof(int)] =
- {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- "</i>", /* italic */
- "</b>", /* bold */
- "</font></tt>", /* cw */
- "<-/table>", /* indent1 */
- "<-/table>", /* indent2 */
- "<-/table>", /* indent3 */
- 0,
- 0,
- 0,
- "</b></font>", /* heading 25 */
- "</a>", /* anchor 26 */
- };
- Font *font[Nfont];
- Biobuf bout;
- int debug = 0;
- /* troff state */
- int page = 1;
- int ft = 1;
- int vp = 0;
- int hp = 0;
- int ps = 1;
- int res = 720;
- int didP = 0;
- int atnewline = 1;
- int prevlineH = 0;
- Char attr = 0; /* or'ed into each Char */
- Char *chars;
- int nchars;
- int nalloc;
- char** anchors; /* allocated in order */
- int nanchors;
- char *filename;
- int cno;
- char buf[8192];
- char *title = "Plan 9 man page";
- void process(Biobuf*, char*);
- void mountfont(int, char*);
- void switchfont(int);
- void header(char*);
- void flush(void);
- void trailer(void);
- void*
- emalloc(ulong n)
- {
- void *p;
- p = malloc(n);
- if(p == nil)
- sysfatal("malloc failed: %r");
- return p;
- }
- void*
- erealloc(void *p, ulong n)
- {
- p = realloc(p, n);
- if(p == nil)
- sysfatal("realloc failed: %r");
- return p;
- }
- char*
- estrdup(char *s)
- {
- char *t;
- t = strdup(s);
- if(t == nil)
- sysfatal("strdup failed: %r");
- return t;
- }
- void
- usage(void)
- {
- fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
- exits("usage");
- }
- int
- hccmp(const void *va, const void *vb)
- {
- Htmlchar *a, *b;
- a = (Htmlchar*)va;
- b = (Htmlchar*)vb;
- return a->value - b->value;
- }
- void
- main(int argc, char *argv[])
- {
- int i;
- Biobuf in, *inp;
- Rune r;
- for(i=0; i<nelem(htmlchars); i++){
- chartorune(&r, htmlchars[i].utf);
- htmlchars[i].value = r;
- }
- qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);
- ARGBEGIN{
- case 't':
- title = ARGF();
- if(title == nil)
- usage();
- break;
- case 'd':
- debug++;
- break;
- default:
- usage();
- }ARGEND
- Binit(&bout, 1, OWRITE);
- if(argc == 0){
- header(title);
- Binit(&in, 0, OREAD);
- process(&in, "<stdin>");
- }else{
- header(title);
- for(i=0; i<argc; i++){
- inp = Bopen(argv[i], OREAD);
- if(inp == nil)
- sysfatal("can't open %s: %r", argv[i]);
- process(inp, argv[i]);
- Bterm(inp);
- }
- }
- flush();
- trailer();
- exits(nil);
- }
- void
- emitchar(Char c)
- {
- if(nalloc == nchars){
- nalloc += 10000;
- chars = realloc(chars, nalloc*sizeof(chars[0]));
- if(chars == nil)
- sysfatal("malloc failed: %r");
- }
- chars[nchars++] = c;
- }
- void
- emit(Rune r)
- {
- emitchar(r | attr);
- /*
- * Close man page references early, so that
- * .IR proof (1),
- * doesn't make the comma part of the link.
- */
- if(r == ')')
- attr &= ~(1<<Anchor);
- }
- void
- emitstr(char *s)
- {
- emitchar(Estring);
- emitchar((Char)s);
- }
- int indentlevel;
- int linelen;
- void
- iputrune(Biobuf *b, Rune r)
- {
- int i;
- if(linelen++ > 60 && r == ' ')
- r = '\n';
- Bputrune(b, r);
- if(r == '\n'){
- for(i=0; i<indentlevel; i++)
- Bprint(b, " ");
- linelen = 0;
- }
- }
- void
- iputs(Biobuf *b, char *s)
- {
- if(s[0]=='<' && s[1]=='+'){
- iputrune(b, '\n');
- Bprint(b, "<%s", s+2);
- indentlevel++;
- iputrune(b, '\n');
- }else if(s[0]=='<' && s[1]=='-'){
- indentlevel--;
- iputrune(b, '\n');
- Bprint(b, "<%s", s+2);
- iputrune(b, '\n');
- }else
- Bprint(b, "%s", s);
- }
- void
- setattr(Char a)
- {
- Char on, off;
- int i, j;
- on = a & ~attr;
- off = attr & ~a;
- /* walk up the nest stack until we reach something we need to turn off. */
- for(i=0; i<nnest; i++)
- if(off&(1<<nest[i]))
- break;
- /* turn off everything above that */
- for(j=nnest-1; j>=i; j--)
- iputs(&bout, offattr[nest[j]]);
- /* turn on everything we just turned off but didn't want to */
- for(j=i; j<nnest; j++)
- if(a&(1<<nest[j]))
- iputs(&bout, onattr[nest[j]]);
- else
- nest[j] = 0;
- /* shift the zeros (turned off things) up */
- for(i=j=0; i<nnest; i++)
- if(nest[i] != 0)
- nest[j++] = nest[i];
- nnest = j;
- /* now turn on the new attributes */
- for(i=0; i<nelem(attrorder); i++){
- j = attrorder[i];
- if(on&(1<<j)){
- if(j == Anchor)
- onattr[j] = anchors[nanchors++];
- iputs(&bout, onattr[j]);
- if(nnest >= nelem(nest))
- sysfatal("nesting too deep");
- nest[nnest++] = j;
- }
- }
- attr = a;
- }
- void
- flush(void)
- {
- int i;
- Char c, a;
- nanchors = 0;
- for(i=0; i<nchars; i++){
- c = chars[i];
- if(c == Estring){
- /* next word is string to print */
- iputs(&bout, (char*)chars[++i]);
- continue;
- }
- if(c == Epp){
- iputrune(&bout, '\n');
- iputs(&bout, TABLE "<tr height=5><td></table>");
- iputrune(&bout, '\n');
- continue;
- }
- a = c & ~0xFFFF;
- c &= 0xFFFF;
- /*
- * If we're going to something off after a space,
- * let's just turn it off before.
- */
- if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
- a ^= a & ~chars[i+1];
- setattr(a);
- iputrune(&bout, c & 0xFFFF);
- }
- }
- void
- header(char *s)
- {
- Bprint(&bout, "<head>\n");
- Bprint(&bout, "<title>%s</title>\n", s);
- Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
- Bprint(&bout, "</head>\n");
- Bprint(&bout, "<body bgcolor=#ffffff>\n");
- }
- void
- trailer(void)
- {
- #ifdef LUCENT
- Tm *t;
- t = localtime(time(nil));
- Bprint(&bout, TABLE "<tr height=20><td></table>\n");
- Bprint(&bout, "<font size=-1><a href=\"http://www.lucent.com/copyright.html\">\n");
- Bprint(&bout, "Copyright</A> © %d Lucent Technologies. All rights reserved.</font>\n", t->year+1900);
- #endif
- Bprint(&bout, "</body></html>\n");
- }
- int
- getc(Biobuf *b)
- {
- cno++;
- return Bgetrune(b);
- }
- void
- ungetc(Biobuf *b)
- {
- cno--;
- Bungetrune(b);
- }
- char*
- getline(Biobuf *b)
- {
- int i, c;
- for(i=0; i<sizeof buf; i++){
- c = getc(b);
- if(c == Beof)
- return nil;
- buf[i] = c;
- if(c == '\n'){
- buf[i] = '\0';
- break;
- }
- }
- return buf;
- }
- int
- getnum(Biobuf *b)
- {
- int i, c;
- i = 0;
- for(;;){
- c = getc(b);
- if(c<'0' || '9'<c){
- ungetc(b);
- break;
- }
- i = i*10 + (c-'0');
- }
- return i;
- }
- char*
- getstr(Biobuf *b)
- {
- int i, c;
- for(i=0; i<sizeof buf; i++){
- /* must get bytes not runes */
- cno++;
- c = Bgetc(b);
- if(c == Beof)
- return nil;
- buf[i] = c;
- if(c == '\n' || c==' ' || c=='\t'){
- ungetc(b);
- buf[i] = '\0';
- break;
- }
- }
- return buf;
- }
- int
- setnum(Biobuf *b, char *name, int min, int max)
- {
- int i;
- i = getnum(b);
- if(debug > 2)
- fprint(2, "set %s = %d\n", name, i);
- if(min<=i && i<max)
- return i;
- sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
- return i;
- }
- void
- xcmd(Biobuf *b)
- {
- char *p, *fld[16], buf[1024];
- int i, nfld;
- p = getline(b);
- if(p == nil)
- sysfatal("xcmd error: %r");
- if(debug)
- fprint(2, "x command '%s'\n", p);
- nfld = tokenize(p, fld, nelem(fld));
- if(nfld == 0)
- return;
- switch(fld[0][0]){
- case 'f':
- /* mount font */
- if(nfld != 3)
- break;
- i = atoi(fld[1]);
- if(i<0 || Nfont<=i)
- sysfatal("font %d out of range at %s:#%d", i, filename, cno);
- mountfont(i, fld[2]);
- return;
- case 'i':
- /* init */
- return;
- case 'r':
- if(nfld<2 || atoi(fld[1])!=res)
- sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
- return;
- case 's':
- /* stop */
- return;
- case 't':
- /* trailer */
- return;
- case 'T':
- if(nfld!=2 || strcmp(fld[1], "utf")!=0)
- sysfatal("output for unknown typesetter type %s", fld[1]);
- return;
- case 'X':
- if(nfld<3 || strcmp(fld[1], "html")!=0)
- break;
- /* is it a man reference of the form cp(1)? */
- /* X manref start/end cp (1) */
- if(nfld==6 && strcmp(fld[2], "manref")==0){
- /* was the right macro; is it the right form? */
- if(strlen(fld[5])>=3 &&
- fld[5][0]=='(' && fld[5][2]==')' &&
- '0'<=fld[5][1] && fld[5][1]<='9'){
- if(strcmp(fld[3], "start") == 0){
- /* set anchor attribute and remember string */
- attr |= (1<<Anchor);
- snprint(buf, sizeof buf,
- "<a href=\"/magic/man2html/%c/%s\">",
- fld[5][1], fld[4]);
- nanchors++;
- anchors = erealloc(anchors, nanchors*sizeof(char*));
- anchors[nanchors-1] = estrdup(buf);
- }else if(strcmp(fld[3], "end") == 0)
- attr &= ~(1<<Anchor);
- }
- }else if(strcmp(fld[2], "manPP") == 0){
- didP = 1;
- emitchar(Epp);
- }else if(nfld<4 || strcmp(fld[2], "manref")!=0){
- if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */
- didP = 1;
- /* clear all font attributes before paragraph */
- emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))));
- emitstr("<P>");
- /* next emittec char will turn font attributes back on */
- }else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
- attr |= (1<<Heading);
- else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
- attr &= ~(1<<Heading);
- else if(debug)
- fprint(2, "unknown in-line html %s... at %s:%#d\n",
- fld[2], filename, cno);
- }
- return;
- }
- if(debug)
- fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
- }
- int
- lookup(int c, Htmlchar tab[], int ntab)
- {
- int low, high, mid;
- low = 0;
- high = ntab - 1;
- while(low <= high){
- mid = (low+high)/2;
- if(c < tab[mid].value)
- high = mid - 1;
- else if(c > tab[mid].value)
- low = mid + 1;
- else
- return mid;
- }
- return -1; /* no match */
- }
- void
- emithtmlchar(int r)
- {
- static char buf[10];
- int i;
- i = lookup(r, htmlchars, nelem(htmlchars));
- if(i >= 0)
- emitstr(htmlchars[i].name);
- else
- emit(r);
- }
- char*
- troffchar(char *s)
- {
- int i;
- for(i=0; troffchars[i].name!=nil; i++)
- if(strcmp(s, troffchars[i].name) == 0)
- return troffchars[i].value;
- return "??";
- }
- void
- indent(void)
- {
- int nind;
- didP = 0;
- if(atnewline){
- if(hp != prevlineH){
- prevlineH = hp;
- /* these most peculiar numbers appear in the troff -man output */
- nind = ((prevlineH-1*res)+323)/324;
- attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
- if(nind >= 1)
- attr |= (1<<Indent1);
- if(nind >= 2)
- attr |= (1<<Indent2);
- if(nind >= 3)
- attr |= (1<<Indent3);
- }
- atnewline = 0;
- }
- }
- void
- process(Biobuf *b, char *name)
- {
- int c, r, v, i;
- char *p;
- cno = 0;
- prevlineH = res;
- filename = name;
- for(;;){
- c = getc(b);
- switch(c){
- case Beof:
- /* go to ground state */
- attr = 0;
- emit('\n');
- return;
- case '\n':
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- v = c-'0';
- c = getc(b);
- if(c<'0' || '9'<c)
- sysfatal("illegal character motion at %s:#%d", filename, cno);
- v = v*10 + (c-'0');
- hp += v;
- /* fall through to character case */
- case 'c':
- indent();
- r = getc(b);
- emithtmlchar(r);
- break;
- case 'D':
- /* draw line; ignore */
- do
- c = getc(b);
- while(c!='\n' && c!= Beof);
- break;
- case 'f':
- v = setnum(b, "font", 0, Nfont);
- switchfont(v);
- break;
- case 'h':
- v = setnum(b, "hpos", -20000, 20000);
- /* generate spaces if motion is large and within a line */
- if(!atnewline && v>2*72)
- for(i=0; i<v; i+=72)
- emitstr(" ");
- hp += v;
- break;
- case 'n':
- setnum(b, "n1", -10000, 10000);
- //Bprint(&bout, " N1=%d", v);
- getc(b); /* space separates */
- setnum(b, "n2", -10000, 10000);
- atnewline = 1;
- if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */
- emitstr("<br>");
- emit('\n');
- break;
- case 'p':
- page = setnum(b, "ps", -10000, 10000);
- break;
- case 's':
- ps = setnum(b, "ps", 1, 1000);
- break;
- case 'v':
- vp += setnum(b, "vpos", -10000, 10000);
- /* BUG: ignore motion */
- break;
- case 'x':
- xcmd(b);
- break;
- case 'w':
- emit(' ');
- break;
- case 'C':
- indent();
- p = getstr(b);
- emitstr(troffchar(p));
- break;
- case 'H':
- hp = setnum(b, "hpos", 0, 20000);
- //Bprint(&bout, " H=%d ", hp);
- break;
- case 'V':
- vp = setnum(b, "vpos", 0, 10000);
- break;
- default:
- fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
- return;
- }
- }
- }
- HTMLfont*
- htmlfont(char *name)
- {
- int i;
- for(i=0; htmlfonts[i].name!=nil; i++)
- if(strcmp(name, htmlfonts[i].name) == 0)
- return &htmlfonts[i];
- return &htmlfonts[0];
- }
- void
- mountfont(int pos, char *name)
- {
- if(debug)
- fprint(2, "mount font %s on %d\n", name, pos);
- if(font[pos] != nil){
- free(font[pos]->name);
- free(font[pos]);
- }
- font[pos] = emalloc(sizeof(Font));
- font[pos]->name = estrdup(name);
- font[pos]->htmlfont = htmlfont(name);
- }
- void
- switchfont(int pos)
- {
- HTMLfont *hf;
- if(debug)
- fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
- if(pos == ft)
- return;
- hf = font[ft]->htmlfont;
- if(hf->bit != 0)
- attr &= ~(1<<hf->bit);
- ft = pos;
- hf = font[ft]->htmlfont;
- if(hf->bit != 0)
- attr |= (1<<hf->bit);
- }
|