123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331 |
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- #include <String.h>
- #include <ctype.h>
- #include <thread.h>
- #include "wiki.h"
- static Wpage*
- mkwtxt(int type, char *text)
- {
- Wpage *w;
- w = emalloc(sizeof(*w));
- w->type = type;
- w->text = text;
- setmalloctag(w, getcallerpc(&type));
- return w;
- }
- /*
- * turn runs of whitespace into single spaces,
- * eliminate whitespace at beginning and end.
- */
- char*
- strcondense(char *s, int cutbegin)
- {
- char *r, *w, *es;
- int inspace;
- es = s+strlen(s);
- inspace = cutbegin;
- for(r=w=s; *r; r++){
- if(isspace(*r)){
- if(!inspace){
- inspace=1;
- *w++ = ' ';
- }
- }else{
- inspace=0;
- *w++ = *r;
- }
- }
- assert(w <= es);
- if(inspace && w>s){
- --w;
- *w = '\0';
- }
- else
- *w = '\0';
- return s;
- }
- /*
- * turn runs of Wplain into single Wplain.
- */
- static Wpage*
- wcondense(Wpage *wtxt)
- {
- Wpage *ow, *w;
- for(w=wtxt; w; ){
- if(w->type == Wplain)
- strcondense(w->text, 1);
- if(w->type != Wplain || w->next==nil
- || w->next->type != Wplain){
- w=w->next;
- continue;
- }
- w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
- strcat(w->text, " ");
- strcat(w->text, w->next->text);
-
- free(w->next->text);
- ow = w->next;
- w->next = w->next->next;
- free(ow);
- }
- return wtxt;
- }
- /*
- * Parse a link, without the brackets.
- */
- static Wpage*
- mklink(char *s)
- {
- char *q;
- Wpage *w;
- for(q=s; *q && *q != '|'; q++)
- ;
- if(*q == '\0'){
- w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
- w->url = nil;
- }else{
- *q = '\0';
- w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
- w->url = estrdup(strcondense(q+1, 1));
- }
- setmalloctag(w, getcallerpc(&s));
- return w;
- }
- /*
- * Parse Wplains, inserting Wlink nodes where appropriate.
- */
- static Wpage*
- wlink(Wpage *wtxt)
- {
- char *p, *q, *r, *s;
- Wpage *w, *nw;
- for(w=wtxt; w; w=nw){
- nw = w->next;
- if(w->type != Wplain)
- continue;
- while(w->text[0]){
- p = w->text;
- for(q=p; *q && *q != '['; q++)
- ;
- if(*q == '\0')
- break;
- for(r=q; *r && *r != ']'; r++)
- ;
- if(*r == '\0')
- break;
- *q = '\0';
- *r = '\0';
- s = w->text;
- w->text = estrdup(w->text);
- w->next = mklink(q+1);
- w = w->next;
- w->next = mkwtxt(Wplain, estrdup(r+1));
- free(s);
- w = w->next;
- w->next = nw;
- }
- assert(w->next == nw);
- }
- return wtxt;
- }
- static int
- ismanchar(int c)
- {
- return ('a' <= c && c <= 'z')
- || ('A' <= c && c <= 'Z')
- || ('0' <= c && c <= '9')
- || c=='_' || c=='-' || c=='.' || c=='/'
- || (c < 0); /* UTF */
- }
- static Wpage*
- findmanref(char *p, char **beginp, char **endp)
- {
- char *q, *r;
- Wpage *w;
- q=p;
- for(;;){
- for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
- ;
- if(*q == '\0')
- break;
- for(r=q; r>p && ismanchar(r[-1]); r--)
- ;
- if(r==q){
- q += 3;
- continue;
- }
- *q = '\0';
- w = mkwtxt(Wman, estrdup(r));
- *beginp = r;
- *q = '(';
- w->section = q[1]-'0';
- *endp = q+3;
- setmalloctag(w, getcallerpc(&p));
- return w;
- }
- return nil;
- }
- /*
- * Parse Wplains, looking for man page references.
- * This should be done by using a plumb(6)-style
- * control file rather than hard-coding things here.
- */
- static Wpage*
- wman(Wpage *wtxt)
- {
- char *q, *r;
- Wpage *w, *mw, *nw;
- for(w=wtxt; w; w=nw){
- nw = w->next;
- if(w->type != Wplain)
- continue;
- while(w->text[0]){
- if((mw = findmanref(w->text, &q, &r)) == nil)
- break;
- *q = '\0';
- w->next = mw;
- w = w->next;
- w->next = mkwtxt(Wplain, estrdup(r));
- w = w->next;
- w->next = nw;
- }
- assert(w->next == nw);
- }
- return wtxt;
- }
- static int isheading(char *p) {
- Rune r;
- int hasupper=0;
- while(*p) {
- p+=chartorune(&r,p);
- if(isupperrune(r))
- hasupper=1;
- else if(islowerrune(r))
- return 0;
- }
- return hasupper;
- }
- Wpage*
- Brdpage(char *(*rdline)(void*,int), void *b)
- {
- char *p, *c;
- int waspara;
- Wpage *w, **pw;
- w = nil;
- pw = &w;
- waspara = 1;
- while((p = rdline(b, '\n')) != nil){
- if(p[0] != '!')
- p = strcondense(p, 1);
- if(p[0] == '\0'){
- if(waspara==0){
- waspara=1;
- *pw = mkwtxt(Wpara, nil);
- pw = &(*pw)->next;
- }
- continue;
- }
- waspara = 0;
- switch(p[0]){
- case '*':
- *pw = mkwtxt(Wbullet, nil);
- pw = &(*pw)->next;
- *pw = mkwtxt(Wplain, estrdup(p+1));
- pw = &(*pw)->next;
- break;
- case '!':
- *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
- pw = &(*pw)->next;
- break;
- case '-':
- for(c = p; *c != '\0'; c++) {
- if(*c != '-') {
- c = p;
- break;
- }
- }
- if( (c-p) > 4) {
- *pw = mkwtxt(Whr, nil);
- pw = &(*pw)->next;
- break;
- }
- /* else fall thru */
- default:
- if(isheading(p)){
- *pw = mkwtxt(Wheading, estrdup(p));
- pw = &(*pw)->next;
- continue;
- }
- *pw = mkwtxt(Wplain, estrdup(p));
- pw = &(*pw)->next;
- break;
- }
- }
- if(w == nil)
- werrstr("empty page");
-
- *pw = nil;
- w = wcondense(w);
- w = wlink(w);
- w = wman(w);
- setmalloctag(w, getcallerpc(&rdline));
- return w;
- }
- void
- printpage(Wpage *w)
- {
- for(; w; w=w->next){
- switch(w->type){
- case Wpara:
- print("para\n");
- break;
- case Wheading:
- print("heading '%s'\n", w->text);
- break;
- case Wbullet:
- print("bullet\n");
- break;
- case Wlink:
- print("link '%s' '%s'\n", w->text, w->url);
- break;
- case Wman:
- print("man %d %s\n", w->section, w->text);
- break;
- case Wplain:
- print("plain '%s'\n", w->text);
- break;
- case Whr:
- print("hr\n");
- break;
- case Wpre:
- print("pre '%s'\n", w->text);
- break;
- }
- }
- }
|