parse.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <String.h>
  5. #include <ctype.h>
  6. #include <thread.h>
  7. #include "wiki.h"
  8. static Wpage*
  9. mkwtxt(int type, char *text)
  10. {
  11. Wpage *w;
  12. w = emalloc(sizeof(*w));
  13. w->type = type;
  14. w->text = text;
  15. return w;
  16. }
  17. /*
  18. * turn runs of whitespace into single spaces,
  19. * eliminate whitespace at beginning and end.
  20. */
  21. char*
  22. strcondense(char *s, int cutbegin)
  23. {
  24. char *r, *w, *es;
  25. int inspace;
  26. es = s+strlen(s);
  27. inspace = cutbegin;
  28. for(r=w=s; *r; r++){
  29. if(isspace(*r)){
  30. if(!inspace){
  31. inspace=1;
  32. *w++ = ' ';
  33. }
  34. }else{
  35. inspace=0;
  36. *w++ = *r;
  37. }
  38. }
  39. assert(w <= es);
  40. if(inspace && w>s){
  41. --w;
  42. *w = '\0';
  43. }
  44. else
  45. *w = '\0';
  46. return s;
  47. }
  48. /*
  49. * turn runs of Wplain into single Wplain.
  50. */
  51. static Wpage*
  52. wcondense(Wpage *wtxt)
  53. {
  54. Wpage *ow, *w;
  55. for(w=wtxt; w; ){
  56. if(w->type == Wplain)
  57. strcondense(w->text, 1);
  58. if(w->type != Wplain || w->next==nil
  59. || w->next->type != Wplain){
  60. w=w->next;
  61. continue;
  62. }
  63. w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
  64. strcat(w->text, " ");
  65. strcat(w->text, w->next->text);
  66. free(w->next->text);
  67. ow = w->next;
  68. w->next = w->next->next;
  69. free(ow);
  70. }
  71. return wtxt;
  72. }
  73. /*
  74. * Parse a link, without the brackets.
  75. */
  76. static Wpage*
  77. mklink(char *s)
  78. {
  79. char *q;
  80. Wpage *w;
  81. for(q=s; *q && *q != '|'; q++)
  82. ;
  83. if(*q == '\0'){
  84. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  85. w->url = nil;
  86. }else{
  87. *q = '\0';
  88. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  89. w->url = estrdup(strcondense(q+1, 1));
  90. }
  91. return w;
  92. }
  93. /*
  94. * Parse Wplains, inserting Wlink nodes where appropriate.
  95. */
  96. static Wpage*
  97. wlink(Wpage *wtxt)
  98. {
  99. char *p, *q, *r, *s;
  100. Wpage *w, *nw;
  101. for(w=wtxt; w; w=nw){
  102. nw = w->next;
  103. if(w->type != Wplain)
  104. continue;
  105. while(w->text[0]){
  106. p = w->text;
  107. for(q=p; *q && *q != '['; q++)
  108. ;
  109. if(*q == '\0')
  110. break;
  111. for(r=q; *r && *r != ']'; r++)
  112. ;
  113. if(*r == '\0')
  114. break;
  115. *q = '\0';
  116. *r = '\0';
  117. s = w->text;
  118. w->text = estrdup(w->text);
  119. w->next = mklink(q+1);
  120. w = w->next;
  121. w->next = mkwtxt(Wplain, estrdup(r+1));
  122. free(s);
  123. w = w->next;
  124. w->next = nw;
  125. }
  126. assert(w->next == nw);
  127. }
  128. return wtxt;
  129. }
  130. static int
  131. ismanchar(int c)
  132. {
  133. return ('a' <= c && c <= 'z')
  134. || ('A' <= c && c <= 'Z')
  135. || ('0' <= c && c <= '9')
  136. || c=='_' || c=='-' || c=='.' || c=='/'
  137. || (c < 0); /* UTF */
  138. }
  139. static Wpage*
  140. findmanref(char *p, char **beginp, char **endp)
  141. {
  142. char *q, *r;
  143. Wpage *w;
  144. q=p;
  145. for(;;){
  146. for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
  147. ;
  148. if(*q == '\0')
  149. break;
  150. for(r=q; r>p && ismanchar(r[-1]); r--)
  151. ;
  152. if(r==q){
  153. q += 3;
  154. continue;
  155. }
  156. *q = '\0';
  157. w = mkwtxt(Wman, estrdup(r));
  158. *beginp = r;
  159. *q = '(';
  160. w->section = q[1]-'0';
  161. *endp = q+3;
  162. return w;
  163. }
  164. return nil;
  165. }
  166. /*
  167. * Parse Wplains, looking for man page references.
  168. * This should be done by using a plumb(6)-style
  169. * control file rather than hard-coding things here.
  170. */
  171. static Wpage*
  172. wman(Wpage *wtxt)
  173. {
  174. char *q, *r;
  175. Wpage *w, *mw, *nw;
  176. for(w=wtxt; w; w=nw){
  177. nw = w->next;
  178. if(w->type != Wplain)
  179. continue;
  180. while(w->text[0]){
  181. if((mw = findmanref(w->text, &q, &r)) == nil)
  182. break;
  183. *q = '\0';
  184. w->next = mw;
  185. w = w->next;
  186. w->next = mkwtxt(Wplain, estrdup(r));
  187. w = w->next;
  188. w->next = nw;
  189. }
  190. assert(w->next == nw);
  191. }
  192. return wtxt;
  193. }
  194. static int isheading(char *p) {
  195. Rune r;
  196. int hasupper=0;
  197. while(*p) {
  198. p+=chartorune(&r,p);
  199. if(isupperrune(r))
  200. hasupper=1;
  201. else if(islowerrune(r))
  202. return 0;
  203. }
  204. return hasupper;
  205. }
  206. Wpage*
  207. Brdpage(char *(*rdline)(void*,int), void *b)
  208. {
  209. char *p;
  210. int waspara;
  211. Wpage *w, **pw;
  212. w = nil;
  213. pw = &w;
  214. waspara = 1;
  215. while((p = rdline(b, '\n')) != nil){
  216. if(p[0] != '!')
  217. p = strcondense(p, 1);
  218. if(p[0] == '\0'){
  219. if(waspara==0){
  220. waspara=1;
  221. *pw = mkwtxt(Wpara, nil);
  222. pw = &(*pw)->next;
  223. }
  224. continue;
  225. }
  226. waspara = 0;
  227. switch(p[0]){
  228. case '*':
  229. *pw = mkwtxt(Wbullet, nil);
  230. pw = &(*pw)->next;
  231. *pw = mkwtxt(Wplain, estrdup(p+1));
  232. pw = &(*pw)->next;
  233. break;
  234. case '!':
  235. *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
  236. pw = &(*pw)->next;
  237. break;
  238. default:
  239. if(isheading(p)){
  240. *pw = mkwtxt(Wheading, estrdup(p));
  241. pw = &(*pw)->next;
  242. continue;
  243. }
  244. *pw = mkwtxt(Wplain, estrdup(p));
  245. pw = &(*pw)->next;
  246. break;
  247. }
  248. }
  249. if(w == nil)
  250. werrstr("empty page");
  251. *pw = nil;
  252. w = wcondense(w);
  253. w = wlink(w);
  254. w = wman(w);
  255. return w;
  256. }
  257. void
  258. printpage(Wpage *w)
  259. {
  260. for(; w; w=w->next){
  261. switch(w->type){
  262. case Wpara:
  263. print("para\n");
  264. break;
  265. case Wheading:
  266. print("heading '%s'\n", w->text);
  267. break;
  268. case Wbullet:
  269. print("bullet\n");
  270. break;
  271. case Wlink:
  272. print("link '%s' '%s'\n", w->text, w->url);
  273. break;
  274. case Wman:
  275. print("man %d %s\n", w->section, w->text);
  276. break;
  277. case Wplain:
  278. print("plain '%s'\n", w->text);
  279. break;
  280. case Wpre:
  281. print("pre '%s'\n", w->text);
  282. break;
  283. }
  284. }
  285. }