parse.c 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <String.h>
  5. #include <ctype.h>
  6. #include <thread.h>
  7. #include "wiki.h"
  8. static Wpage*
  9. mkwtxt(int type, char *text)
  10. {
  11. Wpage *w;
  12. w = emalloc(sizeof(*w));
  13. w->type = type;
  14. w->text = text;
  15. setmalloctag(w, getcallerpc(&type));
  16. return w;
  17. }
  18. /*
  19. * turn runs of whitespace into single spaces,
  20. * eliminate whitespace at beginning and end.
  21. */
  22. char*
  23. strcondense(char *s, int cutbegin)
  24. {
  25. char *r, *w, *es;
  26. int inspace;
  27. es = s+strlen(s);
  28. inspace = cutbegin;
  29. for(r=w=s; *r; r++){
  30. if(isspace(*r)){
  31. if(!inspace){
  32. inspace=1;
  33. *w++ = ' ';
  34. }
  35. }else{
  36. inspace=0;
  37. *w++ = *r;
  38. }
  39. }
  40. assert(w <= es);
  41. if(inspace && w>s){
  42. --w;
  43. *w = '\0';
  44. }
  45. else
  46. *w = '\0';
  47. return s;
  48. }
  49. /*
  50. * turn runs of Wplain into single Wplain.
  51. */
  52. static Wpage*
  53. wcondense(Wpage *wtxt)
  54. {
  55. Wpage *ow, *w;
  56. for(w=wtxt; w; ){
  57. if(w->type == Wplain)
  58. strcondense(w->text, 1);
  59. if(w->type != Wplain || w->next==nil
  60. || w->next->type != Wplain){
  61. w=w->next;
  62. continue;
  63. }
  64. w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
  65. strcat(w->text, " ");
  66. strcat(w->text, w->next->text);
  67. ow = w->next;
  68. w->next = ow->next;
  69. ow->next = nil;
  70. freepage(ow);
  71. }
  72. return wtxt;
  73. }
  74. /*
  75. * Parse a link, without the brackets.
  76. */
  77. static Wpage*
  78. mklink(char *s)
  79. {
  80. char *q;
  81. Wpage *w;
  82. for(q=s; *q && *q != '|'; q++)
  83. ;
  84. if(*q == '\0'){
  85. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  86. w->url = nil;
  87. }else{
  88. *q = '\0';
  89. w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
  90. w->url = estrdup(strcondense(q+1, 1));
  91. }
  92. setmalloctag(w, getcallerpc(&s));
  93. return w;
  94. }
  95. /*
  96. * Parse Wplains, inserting Wlink nodes where appropriate.
  97. */
  98. static Wpage*
  99. wlink(Wpage *wtxt)
  100. {
  101. char *p, *q, *r, *s;
  102. Wpage *w, *nw;
  103. for(w=wtxt; w; w=nw){
  104. nw = w->next;
  105. if(w->type != Wplain)
  106. continue;
  107. while(w->text[0]){
  108. p = w->text;
  109. for(q=p; *q && *q != '['; q++)
  110. ;
  111. if(*q == '\0')
  112. break;
  113. for(r=q; *r && *r != ']'; r++)
  114. ;
  115. if(*r == '\0')
  116. break;
  117. *q = '\0';
  118. *r = '\0';
  119. s = w->text;
  120. w->text = estrdup(w->text);
  121. w->next = mklink(q+1);
  122. w = w->next;
  123. w->next = mkwtxt(Wplain, estrdup(r+1));
  124. free(s);
  125. w = w->next;
  126. w->next = nw;
  127. }
  128. assert(w->next == nw);
  129. }
  130. return wtxt;
  131. }
  132. static int
  133. ismanchar(int c)
  134. {
  135. return ('a' <= c && c <= 'z')
  136. || ('A' <= c && c <= 'Z')
  137. || ('0' <= c && c <= '9')
  138. || c=='_' || c=='-' || c=='.' || c=='/'
  139. || (c < 0); /* UTF */
  140. }
  141. static Wpage*
  142. findmanref(char *p, char **beginp, char **endp)
  143. {
  144. char *q, *r;
  145. Wpage *w;
  146. q=p;
  147. for(;;){
  148. for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
  149. ;
  150. if(*q == '\0')
  151. break;
  152. for(r=q; r>p && ismanchar(r[-1]); r--)
  153. ;
  154. if(r==q){
  155. q += 3;
  156. continue;
  157. }
  158. *q = '\0';
  159. w = mkwtxt(Wman, estrdup(r));
  160. *beginp = r;
  161. *q = '(';
  162. w->section = q[1]-'0';
  163. *endp = q+3;
  164. setmalloctag(w, getcallerpc(&p));
  165. return w;
  166. }
  167. return nil;
  168. }
  169. /*
  170. * Parse Wplains, looking for man page references.
  171. * This should be done by using a plumb(6)-style
  172. * control file rather than hard-coding things here.
  173. */
  174. static Wpage*
  175. wman(Wpage *wtxt)
  176. {
  177. char *q, *r;
  178. Wpage *w, *mw, *nw;
  179. for(w=wtxt; w; w=nw){
  180. nw = w->next;
  181. if(w->type != Wplain)
  182. continue;
  183. while(w->text[0]){
  184. if((mw = findmanref(w->text, &q, &r)) == nil)
  185. break;
  186. *q = '\0';
  187. w->next = mw;
  188. w = w->next;
  189. w->next = mkwtxt(Wplain, estrdup(r));
  190. w = w->next;
  191. w->next = nw;
  192. }
  193. assert(w->next == nw);
  194. }
  195. return wtxt;
  196. }
  197. static int isheading(char *p) {
  198. Rune r;
  199. int hasupper=0;
  200. while(*p) {
  201. p+=chartorune(&r,p);
  202. if(isupperrune(r))
  203. hasupper=1;
  204. else if(islowerrune(r))
  205. return 0;
  206. }
  207. return hasupper;
  208. }
  209. Wpage*
  210. Brdpage(char *(*rdline)(void*,int), void *b)
  211. {
  212. char *p, *c;
  213. int waspara;
  214. Wpage *w, **pw;
  215. w = nil;
  216. pw = &w;
  217. waspara = 1;
  218. while((p = rdline(b, '\n')) != nil){
  219. if(p[0] != '!')
  220. p = strcondense(p, 1);
  221. if(p[0] == '\0'){
  222. if(waspara==0){
  223. waspara=1;
  224. *pw = mkwtxt(Wpara, nil);
  225. pw = &(*pw)->next;
  226. }
  227. continue;
  228. }
  229. waspara = 0;
  230. switch(p[0]){
  231. case '*':
  232. *pw = mkwtxt(Wbullet, nil);
  233. pw = &(*pw)->next;
  234. *pw = mkwtxt(Wplain, estrdup(p+1));
  235. pw = &(*pw)->next;
  236. break;
  237. case '!':
  238. *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
  239. pw = &(*pw)->next;
  240. break;
  241. case '-':
  242. for(c = p; *c != '\0'; c++) {
  243. if(*c != '-') {
  244. c = p;
  245. break;
  246. }
  247. }
  248. if( (c-p) > 4) {
  249. *pw = mkwtxt(Whr, nil);
  250. pw = &(*pw)->next;
  251. break;
  252. }
  253. /* else fall thru */
  254. default:
  255. if(isheading(p)){
  256. *pw = mkwtxt(Wheading, estrdup(p));
  257. pw = &(*pw)->next;
  258. continue;
  259. }
  260. *pw = mkwtxt(Wplain, estrdup(p));
  261. pw = &(*pw)->next;
  262. break;
  263. }
  264. }
  265. if(w == nil)
  266. werrstr("empty page");
  267. *pw = nil;
  268. w = wcondense(w);
  269. w = wlink(w);
  270. w = wman(w);
  271. setmalloctag(w, getcallerpc(&rdline));
  272. return w;
  273. }
  274. void
  275. printpage(Wpage *w)
  276. {
  277. for(; w; w=w->next){
  278. switch(w->type){
  279. case Wpara:
  280. print("para\n");
  281. break;
  282. case Wheading:
  283. print("heading '%s'\n", w->text);
  284. break;
  285. case Wbullet:
  286. print("bullet\n");
  287. break;
  288. case Wlink:
  289. print("link '%s' '%s'\n", w->text, w->url);
  290. break;
  291. case Wman:
  292. print("man %d %s\n", w->section, w->text);
  293. break;
  294. case Wplain:
  295. print("plain '%s'\n", w->text);
  296. break;
  297. case Whr:
  298. print("hr\n");
  299. break;
  300. case Wpre:
  301. print("pre '%s'\n", w->text);
  302. break;
  303. }
  304. }
  305. }