parse.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include <String.h>
  13. #include <ctype.h>
  14. #include <thread.h>
  15. #include "wiki.h"
  16. static Wpage*
  17. mkwtxt(int type, char *text)
  18. {
  19. Wpage *w;
  20. w = emalloc(sizeof(*w));
  21. w->type = type;
  22. w->text = text;
  23. setmalloctag(w, getcallerpc(&type));
  24. return w;
  25. }
  26. /*
  27. * turn runs of whitespace into single spaces,
  28. * eliminate whitespace at beginning and end.
  29. */
  30. char*
  31. strcondense(char *s, int cutbegin)
  32. {
  33. char *r, *w, *es;
  34. int inspace;
  35. es = s+strlen(s);
  36. inspace = cutbegin;
  37. for(r=w=s; *r; r++){
  38. if(isspace(*r)){
  39. if(!inspace){
  40. inspace=1;
  41. *w++ = ' ';
  42. }
  43. }else{
  44. inspace=0;
  45. *w++ = *r;
  46. }
  47. }
  48. assert(w <= es);
  49. if(inspace && w>s){
  50. --w;
  51. *w = '\0';
  52. }
  53. else
  54. *w = '\0';
  55. return s;
  56. }
  57. /*
  58. * turn runs of Wplain into single Wplain.
  59. */
  60. static Wpage*
  61. wcondense(Wpage *wtxt)
  62. {
  63. Wpage *ow, *w;
  64. for(w=wtxt; w; ){
  65. if(w->type == Wplain)
  66. strcondense(w->text, 1);
  67. if(w->type != Wplain || w->next==nil
  68. || w->next->type != Wplain){
  69. w=w->next;
  70. continue;
  71. }
  72. w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
  73. strcat(w->text, " ");
  74. strcat(w->text, w->next->text);
  75. ow = w->next;
  76. w->next = ow->next;
  77. ow->next = nil;
  78. freepage(ow);
  79. }
  80. return wtxt;
  81. }
  82. /*
  83. * Parse a link, without the brackets.
  84. */
  85. static Wpage*
  86. mklink(char *s)
  87. {
  88. char *q;
  89. Wpage *w;
  90. for(q=s; *q && *q != '|'; q++)
  91. ;
  92. if(*q == '\0'){
  93. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  94. w->url = nil;
  95. }else{
  96. *q = '\0';
  97. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  98. w->url = estrdup(strcondense(q+1, 1));
  99. }
  100. setmalloctag(w, getcallerpc(&s));
  101. return w;
  102. }
  103. /*
  104. * Parse Wplains, inserting Wlink nodes where appropriate.
  105. */
  106. static Wpage*
  107. wlink(Wpage *wtxt)
  108. {
  109. char *p, *q, *r, *s;
  110. Wpage *w, *nw;
  111. for(w=wtxt; w; w=nw){
  112. nw = w->next;
  113. if(w->type != Wplain)
  114. continue;
  115. while(w->text[0]){
  116. p = w->text;
  117. for(q=p; *q && *q != '['; q++)
  118. ;
  119. if(*q == '\0')
  120. break;
  121. for(r=q; *r && *r != ']'; r++)
  122. ;
  123. if(*r == '\0')
  124. break;
  125. *q = '\0';
  126. *r = '\0';
  127. s = w->text;
  128. w->text = estrdup(w->text);
  129. w->next = mklink(q+1);
  130. w = w->next;
  131. w->next = mkwtxt(Wplain, estrdup(r+1));
  132. free(s);
  133. w = w->next;
  134. w->next = nw;
  135. }
  136. assert(w->next == nw);
  137. }
  138. return wtxt;
  139. }
  140. static int
  141. ismanchar(int c)
  142. {
  143. return ('a' <= c && c <= 'z')
  144. || ('A' <= c && c <= 'Z')
  145. || ('0' <= c && c <= '9')
  146. || c=='_' || c=='-' || c=='.' || c=='/'
  147. || (c < 0); /* UTF */
  148. }
  149. static Wpage*
  150. findmanref(char *p, char **beginp, char **endp)
  151. {
  152. char *q, *r;
  153. Wpage *w;
  154. q=p;
  155. for(;;){
  156. for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
  157. ;
  158. if(*q == '\0')
  159. break;
  160. for(r=q; r>p && ismanchar(r[-1]); r--)
  161. ;
  162. if(r==q){
  163. q += 3;
  164. continue;
  165. }
  166. *q = '\0';
  167. w = mkwtxt(Wman, estrdup(r));
  168. *beginp = r;
  169. *q = '(';
  170. w->section = q[1]-'0';
  171. *endp = q+3;
  172. setmalloctag(w, getcallerpc(&p));
  173. return w;
  174. }
  175. return nil;
  176. }
  177. /*
  178. * Parse Wplains, looking for man page references.
  179. * This should be done by using a plumb(6)-style
  180. * control file rather than hard-coding things here.
  181. */
  182. static Wpage*
  183. wman(Wpage *wtxt)
  184. {
  185. char *q, *r;
  186. Wpage *w, *mw, *nw;
  187. for(w=wtxt; w; w=nw){
  188. nw = w->next;
  189. if(w->type != Wplain)
  190. continue;
  191. while(w->text[0]){
  192. if((mw = findmanref(w->text, &q, &r)) == nil)
  193. break;
  194. *q = '\0';
  195. w->next = mw;
  196. w = w->next;
  197. w->next = mkwtxt(Wplain, estrdup(r));
  198. w = w->next;
  199. w->next = nw;
  200. }
  201. assert(w->next == nw);
  202. }
  203. return wtxt;
  204. }
  205. static int isheading(char *p) {
  206. Rune r;
  207. int hasupper=0;
  208. while(*p) {
  209. p+=chartorune(&r,p);
  210. if(isupperrune(r))
  211. hasupper=1;
  212. else if(islowerrune(r))
  213. return 0;
  214. }
  215. return hasupper;
  216. }
  217. Wpage*
  218. Brdpage(char *(*rdline)(void*,int), void *b)
  219. {
  220. char *p, *c;
  221. int waspara;
  222. Wpage *w, **pw;
  223. w = nil;
  224. pw = &w;
  225. waspara = 1;
  226. while((p = rdline(b, '\n')) != nil){
  227. if(p[0] != '!')
  228. p = strcondense(p, 1);
  229. if(p[0] == '\0'){
  230. if(waspara==0){
  231. waspara=1;
  232. *pw = mkwtxt(Wpara, nil);
  233. pw = &(*pw)->next;
  234. }
  235. continue;
  236. }
  237. waspara = 0;
  238. switch(p[0]){
  239. case '*':
  240. *pw = mkwtxt(Wbullet, nil);
  241. pw = &(*pw)->next;
  242. *pw = mkwtxt(Wplain, estrdup(p+1));
  243. pw = &(*pw)->next;
  244. break;
  245. case '!':
  246. *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
  247. pw = &(*pw)->next;
  248. break;
  249. case '-':
  250. for(c = p; *c != '\0'; c++) {
  251. if(*c != '-') {
  252. c = p;
  253. break;
  254. }
  255. }
  256. if( (c-p) > 4) {
  257. *pw = mkwtxt(Whr, nil);
  258. pw = &(*pw)->next;
  259. break;
  260. }
  261. /* else fall thru */
  262. default:
  263. if(isheading(p)){
  264. *pw = mkwtxt(Wheading, estrdup(p));
  265. pw = &(*pw)->next;
  266. continue;
  267. }
  268. *pw = mkwtxt(Wplain, estrdup(p));
  269. pw = &(*pw)->next;
  270. break;
  271. }
  272. }
  273. if(w == nil)
  274. werrstr("empty page");
  275. *pw = nil;
  276. w = wcondense(w);
  277. w = wlink(w);
  278. w = wman(w);
  279. setmalloctag(w, getcallerpc(&rdline));
  280. return w;
  281. }
  282. void
  283. printpage(Wpage *w)
  284. {
  285. for(; w; w=w->next){
  286. switch(w->type){
  287. case Wpara:
  288. print("para\n");
  289. break;
  290. case Wheading:
  291. print("heading '%s'\n", w->text);
  292. break;
  293. case Wbullet:
  294. print("bullet\n");
  295. break;
  296. case Wlink:
  297. print("link '%s' '%s'\n", w->text, w->url);
  298. break;
  299. case Wman:
  300. print("man %d %s\n", w->section, w->text);
  301. break;
  302. case Wplain:
  303. print("plain '%s'\n", w->text);
  304. break;
  305. case Whr:
  306. print("hr\n");
  307. break;
  308. case Wpre:
  309. print("pre '%s'\n", w->text);
  310. break;
  311. }
  312. }
  313. }