lex.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include <ctype.h>
  13. #include <mach.h>
  14. #define Extern extern
  15. #include "acid.h"
  16. #include "y.tab.h"
  17. struct keywd
  18. {
  19. char *name;
  20. int terminal;
  21. }
  22. keywds[] =
  23. {
  24. { "do", Tdo, },
  25. { "if", Tif, },
  26. { "then", Tthen, },
  27. { "else", Telse, },
  28. { "while", Twhile, },
  29. { "loop", Tloop, },
  30. { "head", Thead, },
  31. { "tail", Ttail, },
  32. { "append", Tappend, },
  33. { "defn", Tfn, },
  34. { "return", Tret, },
  35. { "local", Tlocal, },
  36. { "aggr", Tcomplex, },
  37. { "union", Tcomplex, },
  38. { "adt", Tcomplex, },
  39. { "complex", Tcomplex, },
  40. { "delete", Tdelete, },
  41. { "whatis", Twhat, },
  42. { "eval", Teval, },
  43. { "builtin", Tbuiltin, },
  44. { 0, 0 },
  45. };
  46. char cmap[256] =
  47. {
  48. ['0'] = '\0'+1,
  49. ['n'] = '\n'+1,
  50. ['r'] = '\r'+1,
  51. ['t'] = '\t'+1,
  52. ['b'] = '\b'+1,
  53. ['f'] = '\f'+1,
  54. ['a'] = '\a'+1,
  55. ['v'] = '\v'+1,
  56. ['\\'] = '\\'+1,
  57. ['"'] = '"'+1,
  58. };
  59. void
  60. kinit(void)
  61. {
  62. int i;
  63. for(i = 0; keywds[i].name; i++)
  64. enter(keywds[i].name, keywds[i].terminal);
  65. }
  66. typedef struct IOstack IOstack;
  67. struct IOstack
  68. {
  69. char *name;
  70. int line;
  71. char *text;
  72. char *ip;
  73. Biobuf *fin;
  74. IOstack *prev;
  75. };
  76. IOstack *lexio;
  77. void
  78. pushfile(char *file)
  79. {
  80. Biobuf *b;
  81. IOstack *io;
  82. if(file)
  83. b = Bopen(file, OREAD);
  84. else{
  85. b = Bopen("/fd/0", OREAD);
  86. file = "<stdin>";
  87. }
  88. if(b == 0)
  89. error("pushfile: %s: %r", file);
  90. io = malloc(sizeof(IOstack));
  91. if(io == 0)
  92. fatal("no memory");
  93. io->name = strdup(file);
  94. if(io->name == 0)
  95. fatal("no memory");
  96. io->line = line;
  97. line = 1;
  98. io->text = 0;
  99. io->fin = b;
  100. io->prev = lexio;
  101. lexio = io;
  102. }
  103. void
  104. pushstr(Node *s)
  105. {
  106. IOstack *io;
  107. io = malloc(sizeof(IOstack));
  108. if(io == 0)
  109. fatal("no memory");
  110. io->line = line;
  111. line = 1;
  112. io->name = strdup("<string>");
  113. if(io->name == 0)
  114. fatal("no memory");
  115. io->line = line;
  116. line = 1;
  117. io->text = strdup(s->store.string->string);
  118. if(io->text == 0)
  119. fatal("no memory");
  120. io->ip = io->text;
  121. io->fin = 0;
  122. io->prev = lexio;
  123. lexio = io;
  124. }
  125. void
  126. restartio(void)
  127. {
  128. Bflush(lexio->fin);
  129. Binit(lexio->fin, 0, OREAD);
  130. }
  131. int
  132. popio(void)
  133. {
  134. IOstack *s;
  135. if(lexio == 0)
  136. return 0;
  137. if(lexio->prev == 0){
  138. if(lexio->fin)
  139. restartio();
  140. return 0;
  141. }
  142. if(lexio->fin)
  143. Bterm(lexio->fin);
  144. else
  145. free(lexio->text);
  146. free(lexio->name);
  147. line = lexio->line;
  148. s = lexio;
  149. lexio = s->prev;
  150. free(s);
  151. return 1;
  152. }
  153. int
  154. Lfmt(Fmt *f)
  155. {
  156. int i;
  157. char buf[1024];
  158. IOstack *e;
  159. e = lexio;
  160. if(e) {
  161. i = snprint(buf, sizeof(buf), "%s:%d", e->name, line);
  162. while(e->prev) {
  163. e = e->prev;
  164. if(initialising && e->prev == 0)
  165. break;
  166. i += snprint(buf+i, sizeof(buf)-i, " [%s:%d]", e->name, e->line);
  167. }
  168. } else
  169. snprint(buf, sizeof(buf), "no file:0");
  170. fmtstrcpy(f, buf);
  171. return 0;
  172. }
  173. void
  174. unlexc(int s)
  175. {
  176. if(s == '\n')
  177. line--;
  178. if(lexio->fin)
  179. Bungetc(lexio->fin);
  180. else
  181. lexio->ip--;
  182. }
  183. int
  184. lexc(void)
  185. {
  186. int c;
  187. if(lexio->fin) {
  188. c = Bgetc(lexio->fin);
  189. if(gotint)
  190. error("interrupt");
  191. return c;
  192. }
  193. c = *lexio->ip++;
  194. if(c == 0)
  195. return -1;
  196. return c;
  197. }
  198. int
  199. escchar(i8 c)
  200. {
  201. int n;
  202. char buf[Strsize];
  203. if(c >= '0' && c <= '9') {
  204. n = 1;
  205. buf[0] = c;
  206. for(;;) {
  207. c = lexc();
  208. if(c == Eof)
  209. error("%d: <eof> in escape sequence", line);
  210. if(strchr("0123456789xX", c) == 0) {
  211. unlexc(c);
  212. break;
  213. }
  214. if(n >= Strsize)
  215. error("string escape too long");
  216. buf[n++] = c;
  217. }
  218. buf[n] = '\0';
  219. return strtol(buf, 0, 0);
  220. }
  221. n = cmap[c];
  222. if(n == 0)
  223. return c;
  224. return n-1;
  225. }
  226. void
  227. eatstring(void)
  228. {
  229. int esc, c, cnt;
  230. char buf[Strsize];
  231. esc = 0;
  232. for(cnt = 0;;) {
  233. c = lexc();
  234. switch(c) {
  235. case Eof:
  236. error("%d: <eof> in string constant", line);
  237. case '\n':
  238. error("newline in string constant");
  239. goto done;
  240. case '\\':
  241. if(esc)
  242. goto Default;
  243. esc = 1;
  244. break;
  245. case '"':
  246. if(esc == 0)
  247. goto done;
  248. /* Fall through */
  249. default:
  250. Default:
  251. if(esc) {
  252. c = escchar(c);
  253. esc = 0;
  254. }
  255. buf[cnt++] = c;
  256. break;
  257. }
  258. if(cnt >= Strsize)
  259. error("string token too long");
  260. }
  261. done:
  262. buf[cnt] = '\0';
  263. yylval.string = strnode(buf);
  264. }
  265. void
  266. eatnl(void)
  267. {
  268. int c;
  269. line++;
  270. for(;;) {
  271. c = lexc();
  272. if(c == Eof)
  273. error("eof in comment");
  274. if(c == '\n')
  275. return;
  276. }
  277. }
  278. int
  279. yylex(void)
  280. {
  281. int c;
  282. extern char vfmt[];
  283. loop:
  284. Bflush(bout);
  285. c = lexc();
  286. switch(c) {
  287. case Eof:
  288. if(gotint) {
  289. gotint = 0;
  290. stacked = 0;
  291. Bprint(bout, "\nacid: ");
  292. goto loop;
  293. }
  294. return Eof;
  295. case '"':
  296. eatstring();
  297. return Tstring;
  298. case ' ':
  299. case '\t':
  300. goto loop;
  301. case '\n':
  302. line++;
  303. if(interactive == 0)
  304. goto loop;
  305. if(stacked) {
  306. print("\t");
  307. goto loop;
  308. }
  309. return ';';
  310. case '.':
  311. c = lexc();
  312. unlexc(c);
  313. if(isdigit(c))
  314. return numsym('.');
  315. return '.';
  316. case '(':
  317. case ')':
  318. case '[':
  319. case ']':
  320. case ';':
  321. case ':':
  322. case ',':
  323. case '~':
  324. case '?':
  325. case '*':
  326. case '@':
  327. case '^':
  328. case '%':
  329. return c;
  330. case '{':
  331. stacked++;
  332. return c;
  333. case '}':
  334. stacked--;
  335. return c;
  336. case '\\':
  337. c = lexc();
  338. if(strchr(vfmt, c) == 0) {
  339. unlexc(c);
  340. return '\\';
  341. }
  342. yylval.ival = c;
  343. return Tfmt;
  344. case '!':
  345. c = lexc();
  346. if(c == '=')
  347. return Tneq;
  348. unlexc(c);
  349. return '!';
  350. case '+':
  351. c = lexc();
  352. if(c == '+')
  353. return Tinc;
  354. unlexc(c);
  355. return '+';
  356. case '/':
  357. c = lexc();
  358. if(c == '/') {
  359. eatnl();
  360. goto loop;
  361. }
  362. unlexc(c);
  363. return '/';
  364. case '\'':
  365. c = lexc();
  366. if(c == '\\')
  367. yylval.ival = escchar(lexc());
  368. else
  369. yylval.ival = c;
  370. c = lexc();
  371. if(c != '\'') {
  372. error("missing '");
  373. unlexc(c);
  374. }
  375. return Tconst;
  376. case '&':
  377. c = lexc();
  378. if(c == '&')
  379. return Tandand;
  380. unlexc(c);
  381. return '&';
  382. case '=':
  383. c = lexc();
  384. if(c == '=')
  385. return Teq;
  386. unlexc(c);
  387. return '=';
  388. case '|':
  389. c = lexc();
  390. if(c == '|')
  391. return Toror;
  392. unlexc(c);
  393. return '|';
  394. case '<':
  395. c = lexc();
  396. if(c == '=')
  397. return Tleq;
  398. if(c == '<')
  399. return Tlsh;
  400. unlexc(c);
  401. return '<';
  402. case '>':
  403. c = lexc();
  404. if(c == '=')
  405. return Tgeq;
  406. if(c == '>')
  407. return Trsh;
  408. unlexc(c);
  409. return '>';
  410. case '-':
  411. c = lexc();
  412. if(c == '>')
  413. return Tindir;
  414. if(c == '-')
  415. return Tdec;
  416. unlexc(c);
  417. return '-';
  418. default:
  419. return numsym(c);
  420. }
  421. }
  422. int
  423. numsym(char first)
  424. {
  425. int c, isbin, isfloat, ishex;
  426. char *sel, *p;
  427. Lsym *s;
  428. symbol[0] = first;
  429. p = symbol;
  430. ishex = 0;
  431. isbin = 0;
  432. isfloat = 0;
  433. if(first == '.')
  434. isfloat = 1;
  435. if(isdigit(*p++) || isfloat) {
  436. for(;;) {
  437. c = lexc();
  438. if(c < 0)
  439. error("%d: <eof> eating symbols", line);
  440. if(c == '\n')
  441. line++;
  442. sel = "01234567890.xb";
  443. if(ishex)
  444. sel = "01234567890abcdefABCDEF";
  445. else if(isbin)
  446. sel = "01";
  447. else if(isfloat)
  448. sel = "01234567890eE-+";
  449. if(strchr(sel, c) == 0) {
  450. unlexc(c);
  451. break;
  452. }
  453. if(c == '.')
  454. isfloat = 1;
  455. if(!isbin && c == 'x')
  456. ishex = 1;
  457. if(!ishex && c == 'b')
  458. isbin = 1;
  459. *p++ = c;
  460. }
  461. *p = '\0';
  462. if(isfloat) {
  463. yylval.fval = atof(symbol);
  464. return Tfconst;
  465. }
  466. if(isbin)
  467. yylval.ival = strtoull(symbol+2, 0, 2);
  468. else
  469. yylval.ival = strtoull(symbol, 0, 0);
  470. return Tconst;
  471. }
  472. for(;;) {
  473. c = lexc();
  474. if(c < 0)
  475. error("%d <eof> eating symbols", line);
  476. if(c == '\n')
  477. line++;
  478. if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) { /* checking against ~ lets UTF names through */
  479. unlexc(c);
  480. break;
  481. }
  482. *p++ = c;
  483. }
  484. *p = '\0';
  485. s = look(symbol);
  486. if(s == 0)
  487. s = enter(symbol, Tid);
  488. yylval.sym = s;
  489. return s->lexval;
  490. }
  491. Lsym*
  492. enter(char *name, int t)
  493. {
  494. Lsym *s;
  495. u32 h;
  496. char *p;
  497. Value *v;
  498. h = 0;
  499. for(p = name; *p; p++)
  500. h = h*3 + *p;
  501. h %= Hashsize;
  502. s = gmalloc(sizeof(Lsym));
  503. memset(s, 0, sizeof(Lsym));
  504. s->name = strdup(name);
  505. s->hash = hash[h];
  506. hash[h] = s;
  507. s->lexval = t;
  508. v = gmalloc(sizeof(Value));
  509. s->v = v;
  510. v->store.fmt = 'X';
  511. v->type = TINT;
  512. memset(v, 0, sizeof(Value));
  513. return s;
  514. }
  515. Lsym*
  516. look(char *name)
  517. {
  518. Lsym *s;
  519. u32 h;
  520. char *p;
  521. h = 0;
  522. for(p = name; *p; p++)
  523. h = h*3 + *p;
  524. h %= Hashsize;
  525. for(s = hash[h]; s; s = s->hash)
  526. if(strcmp(name, s->name) == 0)
  527. return s;
  528. return 0;
  529. }
  530. Lsym*
  531. mkvar(char *s)
  532. {
  533. Lsym *l;
  534. l = look(s);
  535. if(l == 0)
  536. l = enter(s, Tid);
  537. return l;
  538. }