tokens.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <stdio.h>
  12. #include "cpp.h"
  13. static char wbuf[2*OBS];
  14. static char *wbp = wbuf;
  15. /*
  16. * 1 for tokens that don't need whitespace when they get inserted
  17. * by macro expansion
  18. */
  19. static char wstab[] = {
  20. 0, /* END */
  21. 0, /* UNCLASS */
  22. 0, /* NAME */
  23. 0, /* NUMBER */
  24. 0, /* STRING */
  25. 0, /* CCON */
  26. 1, /* NL */
  27. 0, /* WS */
  28. 0, /* DSHARP */
  29. 0, /* EQ */
  30. 0, /* NEQ */
  31. 0, /* LEQ */
  32. 0, /* GEQ */
  33. 0, /* LSH */
  34. 0, /* RSH */
  35. 0, /* LAND */
  36. 0, /* LOR */
  37. 0, /* PPLUS */
  38. 0, /* MMINUS */
  39. 0, /* ARROW */
  40. 1, /* SBRA */
  41. 1, /* SKET */
  42. 1, /* LP */
  43. 1, /* RP */
  44. 0, /* DOT */
  45. 0, /* AND */
  46. 0, /* STAR */
  47. 0, /* PLUS */
  48. 0, /* MINUS */
  49. 0, /* TILDE */
  50. 0, /* NOT */
  51. 0, /* SLASH */
  52. 0, /* PCT */
  53. 0, /* LT */
  54. 0, /* GT */
  55. 0, /* CIRC */
  56. 0, /* OR */
  57. 0, /* QUEST */
  58. 0, /* COLON */
  59. 0, /* ASGN */
  60. 1, /* COMMA */
  61. 0, /* SHARP */
  62. 1, /* SEMIC */
  63. 1, /* CBRA */
  64. 1, /* CKET */
  65. 0, /* ASPLUS */
  66. 0, /* ASMINUS */
  67. 0, /* ASSTAR */
  68. 0, /* ASSLASH */
  69. 0, /* ASPCT */
  70. 0, /* ASCIRC */
  71. 0, /* ASLSH */
  72. 0, /* ASRSH */
  73. 0, /* ASOR */
  74. 0, /* ASAND */
  75. 0, /* ELLIPS */
  76. 0, /* DSHARP1 */
  77. 0, /* NAME1 */
  78. 0, /* DEFINED */
  79. 0, /* UMINUS */
  80. };
  81. void
  82. maketokenrow(int size, Tokenrow *trp)
  83. {
  84. trp->max = size;
  85. if (size>0)
  86. trp->bp = (Token *)domalloc(size*sizeof(Token));
  87. else
  88. trp->bp = NULL;
  89. trp->tp = trp->bp;
  90. trp->lp = trp->bp;
  91. }
  92. Token *
  93. growtokenrow(Tokenrow *trp)
  94. {
  95. int ncur = trp->tp - trp->bp;
  96. int nlast = trp->lp - trp->bp;
  97. trp->max = 3*trp->max/2 + 1;
  98. trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token));
  99. trp->lp = &trp->bp[nlast];
  100. trp->tp = &trp->bp[ncur];
  101. return trp->lp;
  102. }
  103. /*
  104. * Compare a row of tokens, ignoring the content of WS; return !=0 if different
  105. */
  106. int
  107. comparetokens(Tokenrow *tr1, Tokenrow *tr2)
  108. {
  109. Token *tp1, *tp2;
  110. tp1 = tr1->tp;
  111. tp2 = tr2->tp;
  112. if (tr1->lp-tp1 != tr2->lp-tp2)
  113. return 1;
  114. for (; tp1<tr1->lp ; tp1++, tp2++) {
  115. if (tp1->type != tp2->type
  116. || (tp1->wslen==0) != (tp2->wslen==0)
  117. || tp1->len != tp2->len
  118. || strncmp((char*)tp1->t, (char*)tp2->t, tp1->len)!=0)
  119. return 1;
  120. }
  121. return 0;
  122. }
  123. /*
  124. * replace ntok tokens starting at dtr->tp with the contents of str.
  125. * tp ends up pointing just beyond the replacement.
  126. * Canonical whitespace is assured on each side.
  127. */
  128. void
  129. insertrow(Tokenrow *dtr, int ntok, Tokenrow *str)
  130. {
  131. int nrtok = rowlen(str);
  132. dtr->tp += ntok;
  133. adjustrow(dtr, nrtok-ntok);
  134. dtr->tp -= ntok;
  135. movetokenrow(dtr, str);
  136. makespace(dtr);
  137. dtr->tp += nrtok;
  138. makespace(dtr);
  139. }
  140. /*
  141. * make sure there is WS before trp->tp, if tokens might merge in the output
  142. */
  143. void
  144. makespace(Tokenrow *trp)
  145. {
  146. uint8_t *tt;
  147. Token *tp = trp->tp;
  148. if (tp >= trp->lp)
  149. return;
  150. if (tp->wslen) {
  151. if (tp->flag&XPWS
  152. && (wstab[tp->type] || trp->tp>trp->bp && wstab[(tp-1)->type])) {
  153. tp->wslen = 0;
  154. return;
  155. }
  156. tp->t[-1] = ' ';
  157. return;
  158. }
  159. if (wstab[tp->type] || trp->tp>trp->bp && wstab[(tp-1)->type])
  160. return;
  161. tt = newstring(tp->t, tp->len, 1);
  162. *tt++ = ' ';
  163. tp->t = tt;
  164. tp->wslen = 1;
  165. tp->flag |= XPWS;
  166. }
  167. /*
  168. * Copy an entire tokenrow into another, at tp.
  169. * It is assumed that there is enough space.
  170. * Not strictly conforming.
  171. */
  172. void
  173. movetokenrow(Tokenrow *dtr, Tokenrow *str)
  174. {
  175. int nby;
  176. /* nby = sizeof(Token) * (str->lp - str->bp); */
  177. nby = (char *)str->lp - (char *)str->bp;
  178. memmove(dtr->tp, str->bp, nby);
  179. }
  180. /*
  181. * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
  182. * nt may be negative (left move).
  183. * The row may need to be grown.
  184. * Non-strictly conforming because of the (char *), but easily fixed
  185. */
  186. void
  187. adjustrow(Tokenrow *trp, int nt)
  188. {
  189. int nby, size;
  190. if (nt==0)
  191. return;
  192. size = (trp->lp - trp->bp) + nt;
  193. while (size > trp->max)
  194. growtokenrow(trp);
  195. /* nby = sizeof(Token) * (trp->lp - trp->tp); */
  196. nby = (char *)trp->lp - (char *)trp->tp;
  197. if (nby)
  198. memmove(trp->tp+nt, trp->tp, nby);
  199. trp->lp += nt;
  200. }
  201. /*
  202. * Copy a row of tokens into the destination holder, allocating
  203. * the space for the contents. Return the destination.
  204. */
  205. Tokenrow *
  206. copytokenrow(Tokenrow *dtr, Tokenrow *str)
  207. {
  208. int len = rowlen(str);
  209. maketokenrow(len, dtr);
  210. movetokenrow(dtr, str);
  211. dtr->lp += len;
  212. return dtr;
  213. }
  214. /*
  215. * Produce a copy of a row of tokens. Start at trp->tp.
  216. * The value strings are copied as well. The first token
  217. * has WS available.
  218. */
  219. Tokenrow *
  220. normtokenrow(Tokenrow *trp)
  221. {
  222. Token *tp;
  223. Tokenrow *ntrp = new(Tokenrow);
  224. int len;
  225. len = trp->lp - trp->tp;
  226. if (len<=0)
  227. len = 1;
  228. maketokenrow(len, ntrp);
  229. for (tp=trp->tp; tp < trp->lp; tp++) {
  230. *ntrp->lp = *tp;
  231. if (tp->len) {
  232. ntrp->lp->t = newstring(tp->t, tp->len, 1);
  233. *ntrp->lp->t++ = ' ';
  234. if (tp->wslen)
  235. ntrp->lp->wslen = 1;
  236. }
  237. ntrp->lp++;
  238. }
  239. if (ntrp->lp > ntrp->bp)
  240. ntrp->bp->wslen = 0;
  241. return ntrp;
  242. }
  243. /*
  244. * Debugging
  245. */
  246. void
  247. peektokens(Tokenrow *trp, char *str)
  248. {
  249. Token *tp;
  250. int c;
  251. tp = trp->tp;
  252. flushout();
  253. if (str)
  254. fprintf(stderr, "%s ", str);
  255. if (tp<trp->bp || tp>trp->lp)
  256. fprintf(stderr, "(tp offset %d) ", tp-trp->bp);
  257. for (tp=trp->bp; tp<trp->lp && tp<trp->bp+32; tp++) {
  258. if (tp->type!=NL) {
  259. c = tp->t[tp->len];
  260. tp->t[tp->len] = 0;
  261. fprintf(stderr, "%s", tp->t, tp->len);
  262. tp->t[tp->len] = c;
  263. }
  264. if (tp->type==NAME) {
  265. fprintf(stderr, tp==trp->tp?"{*":"{");
  266. prhideset(tp->hideset);
  267. fprintf(stderr, "} ");
  268. } else
  269. fprintf(stderr, tp==trp->tp?"{%x*} ":"{%x} ", tp->type);
  270. }
  271. fprintf(stderr, "\n");
  272. fflush(stderr);
  273. }
  274. void
  275. puttokens(Tokenrow *trp)
  276. {
  277. Token *tp;
  278. int len;
  279. uint8_t *p;
  280. if (verbose)
  281. peektokens(trp, "");
  282. tp = trp->bp;
  283. for (; tp<trp->lp; tp++) {
  284. len = tp->len+tp->wslen;
  285. p = tp->t-tp->wslen;
  286. while (tp<trp->lp-1 && p+len == (tp+1)->t - (tp+1)->wslen) {
  287. tp++;
  288. len += tp->wslen+tp->len;
  289. }
  290. if (Mflag==0) {
  291. if (len>OBS/2) { /* handle giant token */
  292. if (wbp > wbuf)
  293. write(1, wbuf, wbp-wbuf);
  294. write(1, p, len);
  295. wbp = wbuf;
  296. } else {
  297. memcpy(wbp, p, len);
  298. wbp += len;
  299. }
  300. }
  301. if (wbp >= &wbuf[OBS]) {
  302. write(1, wbuf, OBS);
  303. if (wbp > &wbuf[OBS])
  304. memcpy(wbuf, wbuf+OBS, wbp - &wbuf[OBS]);
  305. wbp -= OBS;
  306. }
  307. }
  308. trp->tp = tp;
  309. if (cursource->fd==0)
  310. flushout();
  311. }
  312. void
  313. flushout(void)
  314. {
  315. if (wbp>wbuf) {
  316. write(1, wbuf, wbp-wbuf);
  317. wbp = wbuf;
  318. }
  319. }
  320. /*
  321. * turn a row into just a newline
  322. */
  323. void
  324. setempty(Tokenrow *trp)
  325. {
  326. trp->tp = trp->bp;
  327. trp->lp = trp->bp+1;
  328. *trp->bp = nltoken;
  329. }
  330. /*
  331. * generate a number
  332. */
  333. char *
  334. outnum(char *p, int n)
  335. {
  336. if (n>=10)
  337. p = outnum(p, n/10);
  338. *p++ = n%10 + '0';
  339. return p;
  340. }
  341. /*
  342. * allocate and initialize a new string from s, of length l, at offset o
  343. * Null terminated.
  344. */
  345. uint8_t *
  346. newstring(uint8_t *s, int l, int o)
  347. {
  348. uint8_t *ns = (uint8_t *)domalloc(l+o+1);
  349. ns[l+o] = '\0';
  350. return (uint8_t*)strncpy((char*)ns+o, (char*)s, l) - o;
  351. }