lex.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <ctype.h>
  5. #include <mach.h>
  6. #define Extern extern
  7. #include "acid.h"
  8. #include "y.tab.h"
  9. struct keywd
  10. {
  11. char *name;
  12. int terminal;
  13. }
  14. keywds[] =
  15. {
  16. "do", Tdo,
  17. "if", Tif,
  18. "then", Tthen,
  19. "else", Telse,
  20. "while", Twhile,
  21. "loop", Tloop,
  22. "head", Thead,
  23. "tail", Ttail,
  24. "append", Tappend,
  25. "defn", Tfn,
  26. "return", Tret,
  27. "local", Tlocal,
  28. "aggr", Tcomplex,
  29. "union", Tcomplex,
  30. "adt", Tcomplex,
  31. "complex", Tcomplex,
  32. "delete", Tdelete,
  33. "whatis", Twhat,
  34. "eval", Teval,
  35. "builtin", Tbuiltin,
  36. 0, 0
  37. };
  38. char cmap[256] =
  39. {
  40. ['0'] '\0'+1,
  41. ['n'] '\n'+1,
  42. ['r'] '\r'+1,
  43. ['t'] '\t'+1,
  44. ['b'] '\b'+1,
  45. ['f'] '\f'+1,
  46. ['a'] '\a'+1,
  47. ['v'] '\v'+1,
  48. ['\\'] '\\'+1,
  49. ['"'] '"'+1,
  50. };
  51. void
  52. kinit(void)
  53. {
  54. int i;
  55. for(i = 0; keywds[i].name; i++)
  56. enter(keywds[i].name, keywds[i].terminal);
  57. }
  58. typedef struct IOstack IOstack;
  59. struct IOstack
  60. {
  61. char *name;
  62. int line;
  63. char *text;
  64. char *ip;
  65. Biobuf *fin;
  66. IOstack *prev;
  67. };
  68. IOstack *lexio;
  69. void
  70. pushfile(char *file)
  71. {
  72. Biobuf *b;
  73. IOstack *io;
  74. if(file)
  75. b = Bopen(file, OREAD);
  76. else{
  77. b = Bopen("/fd/0", OREAD);
  78. file = "<stdin>";
  79. }
  80. if(b == 0)
  81. error("pushfile: %s: %r", file);
  82. io = malloc(sizeof(IOstack));
  83. if(io == 0)
  84. fatal("no memory");
  85. io->name = strdup(file);
  86. if(io->name == 0)
  87. fatal("no memory");
  88. io->line = line;
  89. line = 1;
  90. io->text = 0;
  91. io->fin = b;
  92. io->prev = lexio;
  93. lexio = io;
  94. }
  95. void
  96. pushstr(Node *s)
  97. {
  98. IOstack *io;
  99. io = malloc(sizeof(IOstack));
  100. if(io == 0)
  101. fatal("no memory");
  102. io->line = line;
  103. line = 1;
  104. io->name = strdup("<string>");
  105. if(io->name == 0)
  106. fatal("no memory");
  107. io->line = line;
  108. line = 1;
  109. io->text = strdup(s->string->string);
  110. if(io->text == 0)
  111. fatal("no memory");
  112. io->ip = io->text;
  113. io->fin = 0;
  114. io->prev = lexio;
  115. lexio = io;
  116. }
  117. void
  118. restartio(void)
  119. {
  120. Bflush(lexio->fin);
  121. Binit(lexio->fin, 0, OREAD);
  122. }
  123. int
  124. popio(void)
  125. {
  126. IOstack *s;
  127. if(lexio == 0)
  128. return 0;
  129. if(lexio->prev == 0){
  130. if(lexio->fin)
  131. restartio();
  132. return 0;
  133. }
  134. if(lexio->fin)
  135. Bterm(lexio->fin);
  136. else
  137. free(lexio->text);
  138. free(lexio->name);
  139. line = lexio->line;
  140. s = lexio;
  141. lexio = s->prev;
  142. free(s);
  143. return 1;
  144. }
  145. int
  146. Lfmt(Fmt *f)
  147. {
  148. int i;
  149. char buf[1024];
  150. IOstack *e;
  151. e = lexio;
  152. if(e) {
  153. i = snprint(buf, sizeof(buf), "%s:%d", e->name, line);
  154. while(e->prev) {
  155. e = e->prev;
  156. if(initialising && e->prev == 0)
  157. break;
  158. i += snprint(buf+i, sizeof(buf)-i, " [%s:%d]", e->name, e->line);
  159. }
  160. } else
  161. snprint(buf, sizeof(buf), "no file:0");
  162. fmtstrcpy(f, buf);
  163. return 0;
  164. }
  165. void
  166. unlexc(int s)
  167. {
  168. if(s == '\n')
  169. line--;
  170. if(lexio->fin)
  171. Bungetc(lexio->fin);
  172. else
  173. lexio->ip--;
  174. }
  175. int
  176. lexc(void)
  177. {
  178. int c;
  179. if(lexio->fin) {
  180. c = Bgetc(lexio->fin);
  181. if(gotint)
  182. error("interrupt");
  183. return c;
  184. }
  185. c = *lexio->ip++;
  186. if(c == 0)
  187. return -1;
  188. return c;
  189. }
  190. int
  191. escchar(char c)
  192. {
  193. int n;
  194. char buf[Strsize];
  195. if(c >= '0' && c <= '9') {
  196. n = 1;
  197. buf[0] = c;
  198. for(;;) {
  199. c = lexc();
  200. if(c == Eof)
  201. error("%d: <eof> in escape sequence", line);
  202. if(strchr("0123456789xX", c) == 0) {
  203. unlexc(c);
  204. break;
  205. }
  206. if(n >= Strsize)
  207. error("string escape too long");
  208. buf[n++] = c;
  209. }
  210. buf[n] = '\0';
  211. return strtol(buf, 0, 0);
  212. }
  213. n = cmap[c];
  214. if(n == 0)
  215. return c;
  216. return n-1;
  217. }
  218. void
  219. eatstring(void)
  220. {
  221. int esc, c, cnt;
  222. char buf[Strsize];
  223. esc = 0;
  224. for(cnt = 0;;) {
  225. c = lexc();
  226. switch(c) {
  227. case Eof:
  228. error("%d: <eof> in string constant", line);
  229. case '\n':
  230. error("newline in string constant");
  231. goto done;
  232. case '\\':
  233. if(esc)
  234. goto Default;
  235. esc = 1;
  236. break;
  237. case '"':
  238. if(esc == 0)
  239. goto done;
  240. /* Fall through */
  241. default:
  242. Default:
  243. if(esc) {
  244. c = escchar(c);
  245. esc = 0;
  246. }
  247. buf[cnt++] = c;
  248. break;
  249. }
  250. if(cnt >= Strsize)
  251. error("string token too long");
  252. }
  253. done:
  254. buf[cnt] = '\0';
  255. yylval.string = strnode(buf);
  256. }
  257. void
  258. eatnl(void)
  259. {
  260. int c;
  261. line++;
  262. for(;;) {
  263. c = lexc();
  264. if(c == Eof)
  265. error("eof in comment");
  266. if(c == '\n')
  267. return;
  268. }
  269. }
  270. int
  271. yylex(void)
  272. {
  273. int c;
  274. extern char vfmt[];
  275. loop:
  276. Bflush(bout);
  277. c = lexc();
  278. switch(c) {
  279. case Eof:
  280. if(gotint) {
  281. gotint = 0;
  282. stacked = 0;
  283. Bprint(bout, "\nacid: ");
  284. goto loop;
  285. }
  286. return Eof;
  287. case '"':
  288. eatstring();
  289. return Tstring;
  290. case ' ':
  291. case '\t':
  292. goto loop;
  293. case '\n':
  294. line++;
  295. if(interactive == 0)
  296. goto loop;
  297. if(stacked) {
  298. print("\t");
  299. goto loop;
  300. }
  301. return ';';
  302. case '.':
  303. c = lexc();
  304. unlexc(c);
  305. if(isdigit(c))
  306. return numsym('.');
  307. return '.';
  308. case '(':
  309. case ')':
  310. case '[':
  311. case ']':
  312. case ';':
  313. case ':':
  314. case ',':
  315. case '~':
  316. case '?':
  317. case '*':
  318. case '@':
  319. case '^':
  320. case '%':
  321. return c;
  322. case '{':
  323. stacked++;
  324. return c;
  325. case '}':
  326. stacked--;
  327. return c;
  328. case '\\':
  329. c = lexc();
  330. if(strchr(vfmt, c) == 0) {
  331. unlexc(c);
  332. return '\\';
  333. }
  334. yylval.ival = c;
  335. return Tfmt;
  336. case '!':
  337. c = lexc();
  338. if(c == '=')
  339. return Tneq;
  340. unlexc(c);
  341. return '!';
  342. case '+':
  343. c = lexc();
  344. if(c == '+')
  345. return Tinc;
  346. unlexc(c);
  347. return '+';
  348. case '/':
  349. c = lexc();
  350. if(c == '/') {
  351. eatnl();
  352. goto loop;
  353. }
  354. unlexc(c);
  355. return '/';
  356. case '\'':
  357. c = lexc();
  358. if(c == '\\')
  359. yylval.ival = escchar(lexc());
  360. else
  361. yylval.ival = c;
  362. c = lexc();
  363. if(c != '\'') {
  364. error("missing '");
  365. unlexc(c);
  366. }
  367. return Tconst;
  368. case '&':
  369. c = lexc();
  370. if(c == '&')
  371. return Tandand;
  372. unlexc(c);
  373. return '&';
  374. case '=':
  375. c = lexc();
  376. if(c == '=')
  377. return Teq;
  378. unlexc(c);
  379. return '=';
  380. case '|':
  381. c = lexc();
  382. if(c == '|')
  383. return Toror;
  384. unlexc(c);
  385. return '|';
  386. case '<':
  387. c = lexc();
  388. if(c == '=')
  389. return Tleq;
  390. if(c == '<')
  391. return Tlsh;
  392. unlexc(c);
  393. return '<';
  394. case '>':
  395. c = lexc();
  396. if(c == '=')
  397. return Tgeq;
  398. if(c == '>')
  399. return Trsh;
  400. unlexc(c);
  401. return '>';
  402. case '-':
  403. c = lexc();
  404. if(c == '>')
  405. return Tindir;
  406. if(c == '-')
  407. return Tdec;
  408. unlexc(c);
  409. return '-';
  410. default:
  411. return numsym(c);
  412. }
  413. }
  414. int
  415. numsym(char first)
  416. {
  417. int c, isbin, isfloat, ishex;
  418. char *sel, *p;
  419. Lsym *s;
  420. symbol[0] = first;
  421. p = symbol;
  422. ishex = 0;
  423. isbin = 0;
  424. isfloat = 0;
  425. if(first == '.')
  426. isfloat = 1;
  427. if(isdigit(*p++) || isfloat) {
  428. for(;;) {
  429. c = lexc();
  430. if(c < 0)
  431. error("%d: <eof> eating symbols", line);
  432. if(c == '\n')
  433. line++;
  434. sel = "01234567890.xb";
  435. if(ishex)
  436. sel = "01234567890abcdefABCDEF";
  437. else if(isbin)
  438. sel = "01";
  439. else if(isfloat)
  440. sel = "01234567890eE-+";
  441. if(strchr(sel, c) == 0) {
  442. unlexc(c);
  443. break;
  444. }
  445. if(c == '.')
  446. isfloat = 1;
  447. if(!isbin && c == 'x')
  448. ishex = 1;
  449. if(!ishex && c == 'b')
  450. isbin = 1;
  451. *p++ = c;
  452. }
  453. *p = '\0';
  454. if(isfloat) {
  455. yylval.fval = atof(symbol);
  456. return Tfconst;
  457. }
  458. if(isbin)
  459. yylval.ival = strtoull(symbol+2, 0, 2);
  460. else
  461. yylval.ival = strtoull(symbol, 0, 0);
  462. return Tconst;
  463. }
  464. for(;;) {
  465. c = lexc();
  466. if(c < 0)
  467. error("%d <eof> eating symbols", line);
  468. if(c == '\n')
  469. line++;
  470. if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) { /* checking against ~ lets UTF names through */
  471. unlexc(c);
  472. break;
  473. }
  474. *p++ = c;
  475. }
  476. *p = '\0';
  477. s = look(symbol);
  478. if(s == 0)
  479. s = enter(symbol, Tid);
  480. yylval.sym = s;
  481. return s->lexval;
  482. }
  483. Lsym*
  484. enter(char *name, int t)
  485. {
  486. Lsym *s;
  487. uint h;
  488. char *p;
  489. Value *v;
  490. h = 0;
  491. for(p = name; *p; p++)
  492. h = h*3 + *p;
  493. h %= Hashsize;
  494. s = gmalloc(sizeof(Lsym));
  495. memset(s, 0, sizeof(Lsym));
  496. s->name = strdup(name);
  497. s->hash = hash[h];
  498. hash[h] = s;
  499. s->lexval = t;
  500. v = gmalloc(sizeof(Value));
  501. s->v = v;
  502. v->fmt = 'X';
  503. v->type = TINT;
  504. memset(v, 0, sizeof(Value));
  505. return s;
  506. }
  507. Lsym*
  508. look(char *name)
  509. {
  510. Lsym *s;
  511. uint h;
  512. char *p;
  513. h = 0;
  514. for(p = name; *p; p++)
  515. h = h*3 + *p;
  516. h %= Hashsize;
  517. for(s = hash[h]; s; s = s->hash)
  518. if(strcmp(name, s->name) == 0)
  519. return s;
  520. return 0;
  521. }
  522. Lsym*
  523. mkvar(char *s)
  524. {
  525. Lsym *l;
  526. l = look(s);
  527. if(l == 0)
  528. l = enter(s, Tid);
  529. return l;
  530. }