lex.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <ctype.h>
  5. #include <mach.h>
  6. #define Extern extern
  7. #include "acid.h"
  8. #include "y.tab.h"
  9. struct keywd
  10. {
  11. char *name;
  12. int terminal;
  13. }
  14. keywds[] =
  15. {
  16. "do", Tdo,
  17. "if", Tif,
  18. "then", Tthen,
  19. "else", Telse,
  20. "while", Twhile,
  21. "loop", Tloop,
  22. "head", Thead,
  23. "tail", Ttail,
  24. "append", Tappend,
  25. "defn", Tfn,
  26. "return", Tret,
  27. "local", Tlocal,
  28. "aggr", Tcomplex,
  29. "union", Tcomplex,
  30. "adt", Tcomplex,
  31. "complex", Tcomplex,
  32. "delete", Tdelete,
  33. "whatis", Twhat,
  34. "eval", Teval,
  35. "builtin", Tbuiltin,
  36. 0, 0
  37. };
  38. char cmap[256] =
  39. {
  40. ['0'] '\0'+1,
  41. ['n'] '\n'+1,
  42. ['r'] '\r'+1,
  43. ['t'] '\t'+1,
  44. ['b'] '\b'+1,
  45. ['f'] '\f'+1,
  46. ['a'] '\a'+1,
  47. ['v'] '\v'+1,
  48. ['\\'] '\\'+1,
  49. ['"'] '"'+1,
  50. };
  51. void
  52. kinit(void)
  53. {
  54. int i;
  55. for(i = 0; keywds[i].name; i++)
  56. enter(keywds[i].name, keywds[i].terminal);
  57. }
  58. typedef struct IOstack IOstack;
  59. struct IOstack
  60. {
  61. char *name;
  62. int line;
  63. char *text;
  64. char *ip;
  65. Biobuf *fin;
  66. IOstack *prev;
  67. };
  68. IOstack *lexio;
  69. void
  70. pushfile(char *file)
  71. {
  72. Biobuf *b;
  73. IOstack *io;
  74. if(file)
  75. b = Bopen(file, OREAD);
  76. else{
  77. b = Bopen("/fd/0", OREAD);
  78. file = "<stdin>";
  79. }
  80. if(b == 0)
  81. error("pushfile: %s: %r", file);
  82. io = malloc(sizeof(IOstack));
  83. if(io == 0)
  84. fatal("no memory");
  85. io->name = strdup(file);
  86. if(io->name == 0)
  87. fatal("no memory");
  88. io->line = line;
  89. line = 1;
  90. io->text = 0;
  91. io->fin = b;
  92. io->prev = lexio;
  93. lexio = io;
  94. }
  95. void
  96. pushstr(Node *s)
  97. {
  98. IOstack *io;
  99. io = malloc(sizeof(IOstack));
  100. if(io == 0)
  101. fatal("no memory");
  102. io->line = line;
  103. line = 1;
  104. io->name = strdup("<string>");
  105. if(io->name == 0)
  106. fatal("no memory");
  107. io->line = line;
  108. line = 1;
  109. io->text = strdup(s->string->string);
  110. if(io->text == 0)
  111. fatal("no memory");
  112. io->ip = io->text;
  113. io->fin = 0;
  114. io->prev = lexio;
  115. lexio = io;
  116. }
  117. void
  118. restartio(void)
  119. {
  120. Bflush(lexio->fin);
  121. Binit(lexio->fin, 0, OREAD);
  122. }
  123. int
  124. popio(void)
  125. {
  126. IOstack *s;
  127. if(lexio == 0)
  128. return 0;
  129. if(lexio->prev == 0){
  130. if(lexio->fin)
  131. restartio();
  132. return 0;
  133. }
  134. if(lexio->fin)
  135. Bterm(lexio->fin);
  136. else
  137. free(lexio->text);
  138. free(lexio->name);
  139. line = lexio->line;
  140. s = lexio;
  141. lexio = s->prev;
  142. free(s);
  143. return 1;
  144. }
  145. int
  146. Lfmt(Fmt *f)
  147. {
  148. int i;
  149. char buf[1024];
  150. IOstack *e;
  151. e = lexio;
  152. if(e) {
  153. i = sprint(buf, "%s:%d", e->name, line);
  154. while(e->prev) {
  155. e = e->prev;
  156. if(initialising && e->prev == 0)
  157. break;
  158. i += sprint(buf+i, " [%s:%d]", e->name, e->line);
  159. }
  160. } else
  161. sprint(buf, "no file:0");
  162. fmtstrcpy(f, buf);
  163. return 0;
  164. }
  165. void
  166. unlexc(int s)
  167. {
  168. if(s == '\n')
  169. line--;
  170. if(lexio->fin)
  171. Bungetc(lexio->fin);
  172. else
  173. lexio->ip--;
  174. }
  175. int
  176. lexc(void)
  177. {
  178. int c;
  179. if(lexio->fin) {
  180. c = Bgetc(lexio->fin);
  181. if(gotint)
  182. error("interrupt");
  183. return c;
  184. }
  185. c = *lexio->ip++;
  186. if(c == 0)
  187. return -1;
  188. return c;
  189. }
  190. int
  191. escchar(char c)
  192. {
  193. int n;
  194. char buf[Strsize];
  195. if(c >= '0' && c <= '9') {
  196. n = 1;
  197. buf[0] = c;
  198. for(;;) {
  199. c = lexc();
  200. if(c == Eof)
  201. error("%d: <eof> in escape sequence", line);
  202. if(strchr("0123456789xX", c) == 0) {
  203. unlexc(c);
  204. break;
  205. }
  206. buf[n++] = c;
  207. }
  208. buf[n] = '\0';
  209. return strtol(buf, 0, 0);
  210. }
  211. n = cmap[c];
  212. if(n == 0)
  213. return c;
  214. return n-1;
  215. }
  216. void
  217. eatstring(void)
  218. {
  219. int esc, c, cnt;
  220. char buf[Strsize];
  221. esc = 0;
  222. for(cnt = 0;;) {
  223. c = lexc();
  224. switch(c) {
  225. case Eof:
  226. error("%d: <eof> in string constant", line);
  227. case '\n':
  228. error("newline in string constant");
  229. goto done;
  230. case '\\':
  231. if(esc)
  232. goto Default;
  233. esc = 1;
  234. break;
  235. case '"':
  236. if(esc == 0)
  237. goto done;
  238. /* Fall through */
  239. default:
  240. Default:
  241. if(esc) {
  242. c = escchar(c);
  243. esc = 0;
  244. }
  245. buf[cnt++] = c;
  246. break;
  247. }
  248. if(cnt >= Strsize)
  249. error("string token too long");
  250. }
  251. done:
  252. buf[cnt] = '\0';
  253. yylval.string = strnode(buf);
  254. }
  255. void
  256. eatnl(void)
  257. {
  258. int c;
  259. line++;
  260. for(;;) {
  261. c = lexc();
  262. if(c == Eof)
  263. error("eof in comment");
  264. if(c == '\n')
  265. return;
  266. }
  267. }
  268. int
  269. yylex(void)
  270. {
  271. int c;
  272. extern char vfmt[];
  273. loop:
  274. Bflush(bout);
  275. c = lexc();
  276. switch(c) {
  277. case Eof:
  278. if(gotint) {
  279. gotint = 0;
  280. stacked = 0;
  281. Bprint(bout, "\nacid: ");
  282. goto loop;
  283. }
  284. return Eof;
  285. case '"':
  286. eatstring();
  287. return Tstring;
  288. case ' ':
  289. case '\t':
  290. goto loop;
  291. case '\n':
  292. line++;
  293. if(interactive == 0)
  294. goto loop;
  295. if(stacked) {
  296. print("\t");
  297. goto loop;
  298. }
  299. return ';';
  300. case '.':
  301. c = lexc();
  302. unlexc(c);
  303. if(isdigit(c))
  304. return numsym('.');
  305. return '.';
  306. case '(':
  307. case ')':
  308. case '[':
  309. case ']':
  310. case ';':
  311. case ':':
  312. case ',':
  313. case '~':
  314. case '?':
  315. case '*':
  316. case '@':
  317. case '^':
  318. case '%':
  319. return c;
  320. case '{':
  321. stacked++;
  322. return c;
  323. case '}':
  324. stacked--;
  325. return c;
  326. case '\\':
  327. c = lexc();
  328. if(strchr(vfmt, c) == 0) {
  329. unlexc(c);
  330. return '\\';
  331. }
  332. yylval.ival = c;
  333. return Tfmt;
  334. case '!':
  335. c = lexc();
  336. if(c == '=')
  337. return Tneq;
  338. unlexc(c);
  339. return '!';
  340. case '+':
  341. c = lexc();
  342. if(c == '+')
  343. return Tinc;
  344. unlexc(c);
  345. return '+';
  346. case '/':
  347. c = lexc();
  348. if(c == '/') {
  349. eatnl();
  350. goto loop;
  351. }
  352. unlexc(c);
  353. return '/';
  354. case '\'':
  355. c = lexc();
  356. if(c == '\\')
  357. yylval.ival = escchar(lexc());
  358. else
  359. yylval.ival = c;
  360. c = lexc();
  361. if(c != '\'') {
  362. error("missing '");
  363. unlexc(c);
  364. }
  365. return Tconst;
  366. case '&':
  367. c = lexc();
  368. if(c == '&')
  369. return Tandand;
  370. unlexc(c);
  371. return '&';
  372. case '=':
  373. c = lexc();
  374. if(c == '=')
  375. return Teq;
  376. unlexc(c);
  377. return '=';
  378. case '|':
  379. c = lexc();
  380. if(c == '|')
  381. return Toror;
  382. unlexc(c);
  383. return '|';
  384. case '<':
  385. c = lexc();
  386. if(c == '=')
  387. return Tleq;
  388. if(c == '<')
  389. return Tlsh;
  390. unlexc(c);
  391. return '<';
  392. case '>':
  393. c = lexc();
  394. if(c == '=')
  395. return Tgeq;
  396. if(c == '>')
  397. return Trsh;
  398. unlexc(c);
  399. return '>';
  400. case '-':
  401. c = lexc();
  402. if(c == '>')
  403. return Tindir;
  404. if(c == '-')
  405. return Tdec;
  406. unlexc(c);
  407. return '-';
  408. default:
  409. return numsym(c);
  410. }
  411. }
  412. int
  413. numsym(char first)
  414. {
  415. int c, isbin, isfloat, ishex;
  416. char *sel, *p;
  417. Lsym *s;
  418. symbol[0] = first;
  419. p = symbol;
  420. ishex = 0;
  421. isbin = 0;
  422. isfloat = 0;
  423. if(first == '.')
  424. isfloat = 1;
  425. if(isdigit(*p++) || isfloat) {
  426. for(;;) {
  427. c = lexc();
  428. if(c < 0)
  429. error("%d: <eof> eating symbols", line);
  430. if(c == '\n')
  431. line++;
  432. sel = "01234567890.xb";
  433. if(ishex)
  434. sel = "01234567890abcdefABCDEF";
  435. else if(isbin)
  436. sel = "01";
  437. else if(isfloat)
  438. sel = "01234567890eE-+";
  439. if(strchr(sel, c) == 0) {
  440. unlexc(c);
  441. break;
  442. }
  443. if(c == '.')
  444. isfloat = 1;
  445. if(!isbin && c == 'x')
  446. ishex = 1;
  447. if(!ishex && c == 'b')
  448. isbin = 1;
  449. *p++ = c;
  450. }
  451. *p = '\0';
  452. if(isfloat) {
  453. yylval.fval = atof(symbol);
  454. return Tfconst;
  455. }
  456. if(isbin)
  457. yylval.ival = strtoul(symbol+2, 0, 2);
  458. else
  459. yylval.ival = strtoul(symbol, 0, 0);
  460. return Tconst;
  461. }
  462. for(;;) {
  463. c = lexc();
  464. if(c < 0)
  465. error("%d <eof> eating symbols", line);
  466. if(c == '\n')
  467. line++;
  468. if(c != '_' && c != '$' && c <= '~' && !isalnum(c)) { /* checking against ~ lets UTF names through */
  469. unlexc(c);
  470. break;
  471. }
  472. *p++ = c;
  473. }
  474. *p = '\0';
  475. s = look(symbol);
  476. if(s == 0)
  477. s = enter(symbol, Tid);
  478. yylval.sym = s;
  479. return s->lexval;
  480. }
  481. Lsym*
  482. enter(char *name, int t)
  483. {
  484. Lsym *s;
  485. ulong h;
  486. char *p;
  487. Value *v;
  488. h = 0;
  489. for(p = name; *p; p++)
  490. h = h*3 + *p;
  491. h %= Hashsize;
  492. s = gmalloc(sizeof(Lsym));
  493. memset(s, 0, sizeof(Lsym));
  494. s->name = strdup(name);
  495. s->hash = hash[h];
  496. hash[h] = s;
  497. s->lexval = t;
  498. v = gmalloc(sizeof(Value));
  499. s->v = v;
  500. v->fmt = 'X';
  501. v->type = TINT;
  502. memset(v, 0, sizeof(Value));
  503. return s;
  504. }
  505. Lsym*
  506. look(char *name)
  507. {
  508. Lsym *s;
  509. ulong h;
  510. char *p;
  511. h = 0;
  512. for(p = name; *p; p++)
  513. h = h*3 + *p;
  514. h %= Hashsize;
  515. for(s = hash[h]; s; s = s->hash)
  516. if(strcmp(name, s->name) == 0)
  517. return s;
  518. return 0;
  519. }
  520. Lsym*
  521. mkvar(char *s)
  522. {
  523. Lsym *l;
  524. l = look(s);
  525. if(l == 0)
  526. l = enter(s, Tid);
  527. return l;
  528. }