lex.c 22 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453
  1. #define Extern
  2. #include "limbo.h"
  3. #include "y.tab.h"
  4. enum
  5. {
  6. Leof = -1,
  7. Linestart = 0,
  8. Mlower = 1,
  9. Mupper = 2,
  10. Munder = 4,
  11. Malpha = Mupper|Mlower|Munder,
  12. Mdigit = 8,
  13. Msign = 16,
  14. Mexp = 32,
  15. Mhex = 64,
  16. Mradix = 128,
  17. HashSize = 1024,
  18. MaxPath = 4096
  19. };
  20. typedef struct Keywd Keywd;
  21. struct Keywd
  22. {
  23. char *name;
  24. int token;
  25. };
  26. File **files; /* files making up the module, sorted by absolute line */
  27. int nfiles;
  28. static int lenfiles;
  29. static int lastfile; /* index of last file looked up */
  30. static char *incpath[MaxIncPath];
  31. static Sym *symbols[HashSize];
  32. static Sym *strings[HashSize];
  33. static char map[256];
  34. static Biobuf *bin;
  35. static Line linestack[MaxInclude];
  36. static int lineno;
  37. static int linepos;
  38. static int bstack;
  39. static int ineof;
  40. static int lasttok;
  41. static YYSTYPE lastyylval;
  42. static char srcdir[MaxPath];
  43. static Keywd keywords[] =
  44. {
  45. "adt", Ladt,
  46. "alt", Lalt,
  47. "array", Larray,
  48. "big", Ltid,
  49. "break", Lbreak,
  50. "byte", Ltid,
  51. "case", Lcase,
  52. "chan", Lchan,
  53. "con", Lcon,
  54. "continue", Lcont,
  55. "cyclic", Lcyclic,
  56. "do", Ldo,
  57. "dynamic", Ldynamic,
  58. "else", Lelse,
  59. "exception", Lexcept,
  60. "exit", Lexit,
  61. "fixed", Lfix,
  62. "fn", Lfn,
  63. "for", Lfor,
  64. "hd", Lhd,
  65. "if", Lif,
  66. "implement", Limplement,
  67. "import", Limport,
  68. "include", Linclude,
  69. "int", Ltid,
  70. "len", Llen,
  71. "list", Llist,
  72. "load", Lload,
  73. "module", Lmodule,
  74. "nil", Lnil,
  75. "of", Lof,
  76. "or", Lor,
  77. "pick", Lpick,
  78. "raise", Lraise,
  79. "raises", Lraises,
  80. "real", Ltid,
  81. "ref", Lref,
  82. "return", Lreturn,
  83. "self", Lself,
  84. "spawn", Lspawn,
  85. "string", Ltid,
  86. "tagof", Ltagof,
  87. "tl", Ltl,
  88. "to", Lto,
  89. "type", Ltype,
  90. "while", Lwhile,
  91. 0,
  92. };
  93. static Keywd tokwords[] =
  94. {
  95. "&=", Landeq,
  96. "|=", Loreq,
  97. "^=", Lxoreq,
  98. "<<=", Llsheq,
  99. ">>=", Lrsheq,
  100. "+=", Laddeq,
  101. "-=", Lsubeq,
  102. "*=", Lmuleq,
  103. "/=", Ldiveq,
  104. "%=", Lmodeq,
  105. "**=", Lexpeq,
  106. ":=", Ldeclas,
  107. "||", Loror,
  108. "&&", Landand,
  109. "::", Lcons,
  110. "==", Leq,
  111. "!=", Lneq,
  112. "<=", Lleq,
  113. ">=", Lgeq,
  114. "<<", Llsh,
  115. ">>", Lrsh,
  116. "<-", Lcomm,
  117. "++", Linc,
  118. "--", Ldec,
  119. "->", Lmdot,
  120. "=>", Llabs,
  121. "**", Lexp,
  122. "EOF", Leof,
  123. "eof", Beof,
  124. 0,
  125. };
  126. void
  127. lexinit(void)
  128. {
  129. Keywd *k;
  130. int i;
  131. for(i = 0; i < 256; i++){
  132. if(i == '_' || i > 0xa0)
  133. map[i] |= Munder;
  134. if(i >= 'A' && i <= 'Z')
  135. map[i] |= Mupper;
  136. if(i >= 'a' && i <= 'z')
  137. map[i] |= Mlower;
  138. if(i >= 'A' && i <= 'F' || i >= 'a' && i <= 'f')
  139. map[i] |= Mhex;
  140. if(i == 'e' || i == 'E')
  141. map[i] |= Mexp;
  142. if(i == 'r' || i == 'R')
  143. map[i] |= Mradix;
  144. if(i == '-' || i == '+')
  145. map[i] |= Msign;
  146. if(i >= '0' && i <= '9')
  147. map[i] |= Mdigit;
  148. }
  149. memset(escmap, -1, sizeof(escmap));
  150. escmap['\''] = '\'';
  151. unescmap['\''] = '\'';
  152. escmap['"'] = '"';
  153. unescmap['"'] = '"';
  154. escmap['\\'] = '\\';
  155. unescmap['\\'] = '\\';
  156. escmap['a'] = '\a';
  157. unescmap['\a'] = 'a';
  158. escmap['b'] = '\b';
  159. unescmap['\b'] = 'b';
  160. escmap['f'] = '\f';
  161. unescmap['\f'] = 'f';
  162. escmap['n'] = '\n';
  163. unescmap['\n'] = 'n';
  164. escmap['r'] = '\r';
  165. unescmap['\r'] = 'r';
  166. escmap['t'] = '\t';
  167. unescmap['\t'] = 't';
  168. escmap['v'] = '\v';
  169. unescmap['\v'] = 'v';
  170. escmap['0'] = '\0';
  171. unescmap['\0'] = '0';
  172. for(k = keywords; k->name != nil; k++)
  173. enter(k->name, k->token);
  174. }
  175. int
  176. cmap(int c)
  177. {
  178. if(c<0)
  179. return 0;
  180. if(c<256)
  181. return map[c];
  182. return Mlower;
  183. }
  184. void
  185. lexstart(char *in)
  186. {
  187. char *p;
  188. ineof = 0;
  189. bstack = 0;
  190. nfiles = 0;
  191. lastfile = 0;
  192. addfile(mkfile(strdup(in), 1, 0, -1, nil, 0, -1));
  193. bin = bins[bstack];
  194. lineno = 1;
  195. linepos = Linestart;
  196. secpy(srcdir, srcdir+MaxPath, in);
  197. p = strrchr(srcdir, '/');
  198. if(p == nil)
  199. srcdir[0] = '\0';
  200. else
  201. p[1] = '\0';
  202. }
  203. static int
  204. Getc(void)
  205. {
  206. int c;
  207. if(ineof)
  208. return Beof;
  209. c = BGETC(bin);
  210. if(c == Beof)
  211. ineof = 1;
  212. linepos++;
  213. return c;
  214. }
  215. static void
  216. unGetc(void)
  217. {
  218. if(ineof)
  219. return;
  220. Bungetc(bin);
  221. linepos--;
  222. }
  223. static int
  224. getrune(void)
  225. {
  226. int c;
  227. if(ineof)
  228. return Beof;
  229. c = Bgetrune(bin);
  230. if(c == Beof)
  231. ineof = 1;
  232. linepos++;
  233. return c;
  234. }
  235. static void
  236. ungetrune(void)
  237. {
  238. if(ineof)
  239. return;
  240. Bungetrune(bin);
  241. linepos--;
  242. }
  243. void
  244. addinclude(char *s)
  245. {
  246. int i;
  247. for(i = 0; i < MaxIncPath; i++){
  248. if(incpath[i] == 0){
  249. incpath[i] = s;
  250. return;
  251. }
  252. }
  253. fatal("out of include path space");
  254. }
  255. File*
  256. mkfile(char *name, int abs, int off, int in, char *act, int actoff, int sbl)
  257. {
  258. File *f;
  259. f = allocmem(sizeof *f);
  260. f->name = name;
  261. f->abs = abs;
  262. f->off = off;
  263. f->in = in;
  264. f->act = act;
  265. f->actoff = actoff;
  266. f->sbl = sbl;
  267. return f;
  268. }
  269. int
  270. addfile(File *f)
  271. {
  272. if(nfiles >= lenfiles){
  273. lenfiles = nfiles+32;
  274. files = reallocmem(files, lenfiles*sizeof(File*));
  275. }
  276. files[nfiles] = f;
  277. return nfiles++;
  278. }
  279. void
  280. includef(Sym *file)
  281. {
  282. Biobuf *b;
  283. char *p, buf[MaxPath];
  284. int i;
  285. linestack[bstack].line = lineno;
  286. linestack[bstack].pos = linepos;
  287. bstack++;
  288. if(bstack >= MaxInclude)
  289. fatal("%L: include file depth too great", curline());
  290. p = "";
  291. if(file->name[0] != '/')
  292. p = srcdir;
  293. seprint(buf, buf+sizeof(buf), "%s%s", p, file->name);
  294. b = Bopen(buf, OREAD);
  295. for(i = 0; b == nil && i < MaxIncPath && incpath[i] != nil && file->name[0] != '/'; i++){
  296. seprint(buf, buf+sizeof(buf), "%s/%s", incpath[i], file->name);
  297. b = Bopen(buf, OREAD);
  298. }
  299. bins[bstack] = b;
  300. if(bins[bstack] == nil){
  301. yyerror("can't include %s: %r", file->name);
  302. bstack--;
  303. }else{
  304. addfile(mkfile(strdup(buf), lineno+1, -lineno, lineno, nil, 0, -1));
  305. lineno++;
  306. linepos = Linestart;
  307. }
  308. bin = bins[bstack];
  309. }
  310. /*
  311. * we hit eof in the current file
  312. * revert to the file which included it.
  313. */
  314. static void
  315. popinclude(void)
  316. {
  317. Fline fl;
  318. File *f;
  319. int oline, opos, ln;
  320. ineof = 0;
  321. bstack--;
  322. bin = bins[bstack];
  323. oline = linestack[bstack].line;
  324. opos = linestack[bstack].pos;
  325. fl = fline(oline);
  326. f = fl.file;
  327. ln = fl.line;
  328. lineno++;
  329. linepos = opos;
  330. addfile(mkfile(f->name, lineno, ln-lineno, f->in, f->act, f->actoff, -1));
  331. }
  332. /*
  333. * convert an absolute Line into a file and line within the file
  334. */
  335. Fline
  336. fline(int absline)
  337. {
  338. Fline fl;
  339. int l, r, m, s;
  340. if(absline < files[lastfile]->abs
  341. || lastfile+1 < nfiles && absline >= files[lastfile+1]->abs){
  342. lastfile = 0;
  343. l = 0;
  344. r = nfiles - 1;
  345. while(l <= r){
  346. m = (r + l) / 2;
  347. s = files[m]->abs;
  348. if(s <= absline){
  349. l = m + 1;
  350. lastfile = m;
  351. }else
  352. r = m - 1;
  353. }
  354. }
  355. fl.file = files[lastfile];
  356. fl.line = absline + files[lastfile]->off;
  357. return fl;
  358. }
  359. /*
  360. * read a comment
  361. */
  362. static int
  363. lexcom(void)
  364. {
  365. File *f;
  366. char buf[StrSize], *s, *t, *act;
  367. int i, n, c, actline;
  368. i = 0;
  369. while((c = Getc()) != '\n'){
  370. if(c == Beof)
  371. return -1;
  372. if(i < sizeof(buf)-1)
  373. buf[i++] = c;
  374. }
  375. buf[i] = 0;
  376. lineno++;
  377. linepos = Linestart;
  378. if(strncmp(buf, "line ", 5) != 0 && strncmp(buf, "line\t", 5) != 0)
  379. return 0;
  380. for(s = buf+5; *s == ' ' || *s == '\t'; s++)
  381. ;
  382. if(!(cmap(*s) & Mdigit))
  383. return 0;
  384. n = 0;
  385. for(; cmap(c = *s) & Mdigit; s++)
  386. n = n * 10 + c - '0';
  387. for(; *s == ' ' || *s == '\t'; s++)
  388. ;
  389. if(*s != '"')
  390. return 0;
  391. s++;
  392. t = strchr(s, '"');
  393. if(t == nil || t[1] != '\0')
  394. return 0;
  395. *t = '\0';
  396. f = files[nfiles - 1];
  397. if(n == f->off+lineno && strcmp(s, f->name) == 0)
  398. return 1;
  399. act = f->name;
  400. actline = lineno + f->off;
  401. if(f->act != nil){
  402. actline += f->actoff;
  403. act = f->act;
  404. }
  405. addfile(mkfile(strdup(s), lineno, n-lineno, f->in, act, actline - n, -1));
  406. return 1;
  407. }
  408. Line
  409. curline(void)
  410. {
  411. Line line;
  412. line.line = lineno;
  413. line.pos = linepos;
  414. return line;
  415. }
  416. int
  417. lineconv(Fmt *f)
  418. {
  419. Fline fl;
  420. File *file;
  421. Line inl, line;
  422. char buf[StrSize], *s;
  423. line = va_arg(f->args, Line);
  424. if(line.line < 0)
  425. return fmtstrcpy(f, "<noline>");
  426. fl = fline(line.line);
  427. file = fl.file;
  428. s = seprint(buf, buf+sizeof(buf), "%s:%d", file->name, fl.line);
  429. if(file->act != nil)
  430. s = seprint(s, buf+sizeof(buf), " [ %s:%d ]", file->act, file->actoff+fl.line);
  431. if(file->in >= 0){
  432. inl.line = file->in;
  433. inl.pos = 0;
  434. seprint(s, buf+sizeof(buf), ": %L", inl);
  435. }
  436. return fmtstrcpy(f, buf);
  437. }
  438. static char*
  439. posconv(char *s, char *e, Line line)
  440. {
  441. Fline fl;
  442. if(line.line < 0)
  443. return secpy(s, e, "nopos");
  444. fl = fline(line.line);
  445. return seprint(s, e, "%s:%d.%d", fl.file->name, fl.line, line.pos);
  446. }
  447. int
  448. srcconv(Fmt *f)
  449. {
  450. Src src;
  451. char buf[StrSize], *s;
  452. src = va_arg(f->args, Src);
  453. s = posconv(buf, buf+sizeof(buf), src.start);
  454. s = secpy(s, buf+sizeof(buf), ",");
  455. posconv(s, buf+sizeof(buf), src.stop);
  456. return fmtstrcpy(f, buf);
  457. }
  458. int
  459. lexid(int c)
  460. {
  461. Sym *sym;
  462. char id[StrSize*UTFmax+1], *p;
  463. Rune r;
  464. int i, t;
  465. p = id;
  466. i = 0;
  467. for(;;){
  468. if(i < StrSize){
  469. if(c < Runeself)
  470. *p++ = c;
  471. else{
  472. r = c;
  473. p += runetochar(p, &r);
  474. }
  475. i++;
  476. }
  477. c = getrune();
  478. if(c == Beof
  479. || !(cmap(c) & (Malpha|Mdigit))){
  480. ungetrune();
  481. break;
  482. }
  483. }
  484. *p = '\0';
  485. sym = enter(id, Lid);
  486. t = sym->token;
  487. if(t == Lid || t == Ltid)
  488. yylval.tok.v.idval = sym;
  489. return t;
  490. }
  491. Long
  492. strtoi(char *t, int base)
  493. {
  494. char *s;
  495. Long v;
  496. int c, neg, ck;
  497. neg = 0;
  498. if(t[0] == '-'){
  499. neg = 1;
  500. t++;
  501. }else if(t[0] == '+')
  502. t++;
  503. v = 0;
  504. for(s = t; c = *s; s++){
  505. ck = cmap(c);
  506. if(ck & Mdigit)
  507. c -= '0';
  508. else if(ck & Mlower)
  509. c = c - 'a' + 10;
  510. else if(ck & Mupper)
  511. c = c - 'A' + 10;
  512. if(c >= base){
  513. yyerror("digit '%c' not radix %d", *s, base);
  514. return -1;
  515. }
  516. v = v * base + c;
  517. }
  518. if(neg)
  519. return -v;
  520. return v;
  521. }
  522. static int
  523. digit(int c, int base)
  524. {
  525. int cc, ck;
  526. cc = c;
  527. ck = cmap(c);
  528. if(ck & Mdigit)
  529. c -= '0';
  530. else if(ck & Mlower)
  531. c = c - 'a' + 10;
  532. else if(ck & Mupper)
  533. c = c - 'A' + 10;
  534. else if(ck & Munder)
  535. {}
  536. else
  537. return -1;
  538. if(c >= base)
  539. yyerror("digit '%c' not radix %d", cc, base);
  540. return c;
  541. }
  542. double
  543. strtodb(char *t, int base)
  544. {
  545. double num, dem;
  546. int neg, eneg, dig, exp, c, d;
  547. num = 0;
  548. neg = 0;
  549. dig = 0;
  550. exp = 0;
  551. eneg = 0;
  552. c = *t++;
  553. if(c == '-' || c == '+'){
  554. if(c == '-')
  555. neg = 1;
  556. c = *t++;
  557. }
  558. while((d = digit(c, base)) >= 0){
  559. num = num*base + d;
  560. c = *t++;
  561. }
  562. if(c == '.')
  563. c = *t++;
  564. while((d = digit(c, base)) >= 0){
  565. num = num*base + d;
  566. dig++;
  567. c = *t++;
  568. }
  569. if(c == 'e' || c == 'E'){
  570. c = *t++;
  571. if(c == '-' || c == '+'){
  572. if(c == '-'){
  573. dig = -dig;
  574. eneg = 1;
  575. }
  576. c = *t++;
  577. }
  578. while((d = digit(c, base)) >= 0){
  579. exp = exp*base + d;
  580. c = *t++;
  581. }
  582. }
  583. exp -= dig;
  584. if(exp < 0){
  585. exp = -exp;
  586. eneg = !eneg;
  587. }
  588. dem = rpow(base, exp);
  589. if(eneg)
  590. num /= dem;
  591. else
  592. num *= dem;
  593. if(neg)
  594. return -num;
  595. return num;
  596. }
  597. /*
  598. * parse a numeric identifier
  599. * format [0-9]+(r[0-9A-Za-z]+)?
  600. * or ([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?
  601. */
  602. int
  603. lexnum(int c)
  604. {
  605. char buf[StrSize], *base;
  606. enum { Int, Radix, RadixSeen, Frac, ExpSeen, ExpSignSeen, Exp, FracB } state;
  607. double d;
  608. Long v;
  609. int i, ck;
  610. i = 0;
  611. buf[i++] = c;
  612. state = Int;
  613. if(c == '.')
  614. state = Frac;
  615. base = nil;
  616. for(;;){
  617. c = Getc();
  618. if(c == Beof){
  619. yyerror("end of file in numeric constant");
  620. return Leof;
  621. }
  622. ck = cmap(c);
  623. switch(state){
  624. case Int:
  625. if(ck & Mdigit)
  626. break;
  627. if(ck & Mexp){
  628. state = ExpSeen;
  629. break;
  630. }
  631. if(ck & Mradix){
  632. base = &buf[i];
  633. state = RadixSeen;
  634. break;
  635. }
  636. if(c == '.'){
  637. state = Frac;
  638. break;
  639. }
  640. goto done;
  641. case RadixSeen:
  642. case Radix:
  643. if(ck & (Mdigit|Malpha)){
  644. state = Radix;
  645. break;
  646. }
  647. if(c == '.'){
  648. state = FracB;
  649. break;
  650. }
  651. goto done;
  652. case Frac:
  653. if(ck & Mdigit)
  654. break;
  655. if(ck & Mexp)
  656. state = ExpSeen;
  657. else
  658. goto done;
  659. break;
  660. case FracB:
  661. if(ck & (Mdigit|Malpha))
  662. break;
  663. goto done;
  664. case ExpSeen:
  665. if(ck & Msign){
  666. state = ExpSignSeen;
  667. break;
  668. }
  669. /* fall through */
  670. case ExpSignSeen:
  671. case Exp:
  672. if(ck & Mdigit){
  673. state = Exp;
  674. break;
  675. }
  676. goto done;
  677. }
  678. if(i < StrSize-1)
  679. buf[i++] = c;
  680. }
  681. done:
  682. buf[i] = 0;
  683. unGetc();
  684. switch(state){
  685. default:
  686. yyerror("malformed numerical constant '%s'", buf);
  687. yylval.tok.v.ival = 0;
  688. return Lconst;
  689. case Radix:
  690. *base++ = '\0';
  691. v = strtoi(buf, 10);
  692. if(v < 0)
  693. break;
  694. if(v < 2 || v > 36){
  695. yyerror("radix '%s' must be between 2 and 36", buf);
  696. break;
  697. }
  698. v = strtoi(base, v);
  699. break;
  700. case Int:
  701. v = strtoi(buf, 10);
  702. break;
  703. case Frac:
  704. case Exp:
  705. d = strtod(buf, nil);
  706. yylval.tok.v.rval = d;
  707. return Lrconst;
  708. case FracB:
  709. *base++ = '\0';
  710. v = strtoi(buf, 10);
  711. if(v < 0)
  712. break;
  713. if(v < 2 || v > 36){
  714. yyerror("radix '%s' must be between 2 and 36", buf);
  715. break;
  716. }
  717. d = strtodb(base, v);
  718. yylval.tok.v.rval = d;
  719. return Lrconst;
  720. }
  721. yylval.tok.v.ival = v;
  722. return Lconst;
  723. }
  724. int
  725. escchar(void)
  726. {
  727. char buf[4+1];
  728. int c, i;
  729. c = getrune();
  730. if(c == Beof)
  731. return Beof;
  732. if(c == 'u'){
  733. for(i = 0; i < 4; i++){
  734. c = getrune();
  735. if(c == Beof || !(cmap(c) & (Mdigit|Mhex))){
  736. yyerror("malformed \\u escape sequence");
  737. ungetrune();
  738. break;
  739. }
  740. buf[i] = c;
  741. }
  742. buf[i] = 0;
  743. return strtoul(buf, 0, 16);
  744. }
  745. if(c < 256 && (i = escmap[c]) >= 0)
  746. return i;
  747. yyerror("unrecognized escape \\%C", c);
  748. return c;
  749. }
  750. void
  751. lexstring(int israw)
  752. {
  753. char *str;
  754. int c, t, startlno;
  755. Rune r;
  756. int len, alloc;
  757. alloc = 32;
  758. len = 0;
  759. str = allocmem(alloc * sizeof(str));
  760. startlno = lineno;
  761. for(;;){
  762. c = getrune();
  763. if(israw){
  764. switch(c){
  765. case '`':
  766. yylval.tok.v.idval = enterstring(str, len);
  767. return;
  768. case '\n':
  769. lineno++;
  770. linepos = Linestart;
  771. break;
  772. case Beof:
  773. t = lineno;
  774. lineno = startlno;
  775. yyerror("end of file in raw string constant");
  776. lineno = t;
  777. yylval.tok.v.idval = enterstring(str, len);
  778. return;
  779. }
  780. }else{
  781. switch(c){
  782. case '\\':
  783. c = escchar();
  784. if(c != Beof)
  785. break;
  786. /* fall through */
  787. case Beof:
  788. yyerror("end of file in string constant");
  789. yylval.tok.v.idval = enterstring(str, len);
  790. return;
  791. case '\n':
  792. yyerror("newline in string constant");
  793. lineno++;
  794. linepos = Linestart;
  795. yylval.tok.v.idval = enterstring(str, len);
  796. return;
  797. case '"':
  798. yylval.tok.v.idval = enterstring(str, len);
  799. return;
  800. }
  801. }
  802. while(len+UTFmax+1 >= alloc){
  803. alloc += 32;
  804. str = reallocmem(str, alloc * sizeof(str));
  805. }
  806. r = c;
  807. len += runetochar(&str[len], &r);
  808. str[len] = '\0';
  809. }
  810. }
  811. static int
  812. lex(void)
  813. {
  814. int c;
  815. loop:
  816. yylval.tok.src.start.line = lineno;
  817. yylval.tok.src.start.pos = linepos;
  818. c = getrune(); /* ehg: outside switch() to avoid bug in VisualC++5.0 */
  819. switch(c){
  820. case Beof:
  821. Bterm(bin);
  822. if(bstack == 0)
  823. return Leof;
  824. popinclude();
  825. break;
  826. case '#':
  827. if(lexcom() < 0){
  828. Bterm(bin);
  829. if(bstack == 0)
  830. return Leof;
  831. popinclude();
  832. }
  833. break;
  834. case '\n':
  835. lineno++;
  836. linepos = Linestart;
  837. goto loop;
  838. case ' ':
  839. case '\t':
  840. case '\r':
  841. case '\v':
  842. case '\f':
  843. goto loop;
  844. case '"':
  845. lexstring(0);
  846. return Lsconst;
  847. case '`':
  848. lexstring(1);
  849. return Lsconst;
  850. case '\'':
  851. c = getrune();
  852. if(c == '\\')
  853. c = escchar();
  854. if(c == Beof){
  855. yyerror("end of file in character constant");
  856. return Beof;
  857. }else
  858. yylval.tok.v.ival = c;
  859. c = Getc();
  860. if(c != '\'') {
  861. yyerror("missing closing '");
  862. unGetc();
  863. }
  864. return Lconst;
  865. case '(':
  866. case ')':
  867. case '[':
  868. case ']':
  869. case '{':
  870. case '}':
  871. case ',':
  872. case ';':
  873. case '~':
  874. return c;
  875. case ':':
  876. c = Getc();
  877. if(c == ':')
  878. return Lcons;
  879. if(c == '=')
  880. return Ldeclas;
  881. unGetc();
  882. return ':';
  883. case '.':
  884. c = Getc();
  885. unGetc();
  886. if(c != Beof && (cmap(c) & Mdigit))
  887. return lexnum('.');
  888. return '.';
  889. case '|':
  890. c = Getc();
  891. if(c == '=')
  892. return Loreq;
  893. if(c == '|')
  894. return Loror;
  895. unGetc();
  896. return '|';
  897. case '&':
  898. c = Getc();
  899. if(c == '=')
  900. return Landeq;
  901. if(c == '&')
  902. return Landand;
  903. unGetc();
  904. return '&';
  905. case '^':
  906. c = Getc();
  907. if(c == '=')
  908. return Lxoreq;
  909. unGetc();
  910. return '^';
  911. case '*':
  912. c = Getc();
  913. if(c == '=')
  914. return Lmuleq;
  915. if(c == '*'){
  916. c = Getc();
  917. if(c == '=')
  918. return Lexpeq;
  919. unGetc();
  920. return Lexp;
  921. }
  922. unGetc();
  923. return '*';
  924. case '/':
  925. c = Getc();
  926. if(c == '=')
  927. return Ldiveq;
  928. unGetc();
  929. return '/';
  930. case '%':
  931. c = Getc();
  932. if(c == '=')
  933. return Lmodeq;
  934. unGetc();
  935. return '%';
  936. case '=':
  937. c = Getc();
  938. if(c == '=')
  939. return Leq;
  940. if(c == '>')
  941. return Llabs;
  942. unGetc();
  943. return '=';
  944. case '!':
  945. c = Getc();
  946. if(c == '=')
  947. return Lneq;
  948. unGetc();
  949. return '!';
  950. case '>':
  951. c = Getc();
  952. if(c == '=')
  953. return Lgeq;
  954. if(c == '>'){
  955. c = Getc();
  956. if(c == '=')
  957. return Lrsheq;
  958. unGetc();
  959. return Lrsh;
  960. }
  961. unGetc();
  962. return '>';
  963. case '<':
  964. c = Getc();
  965. if(c == '=')
  966. return Lleq;
  967. if(c == '-')
  968. return Lcomm;
  969. if(c == '<'){
  970. c = Getc();
  971. if(c == '=')
  972. return Llsheq;
  973. unGetc();
  974. return Llsh;
  975. }
  976. unGetc();
  977. return '<';
  978. case '+':
  979. c = Getc();
  980. if(c == '=')
  981. return Laddeq;
  982. if(c == '+')
  983. return Linc;
  984. unGetc();
  985. return '+';
  986. case '-':
  987. c = Getc();
  988. if(c == '=')
  989. return Lsubeq;
  990. if(c == '-')
  991. return Ldec;
  992. if(c == '>')
  993. return Lmdot;
  994. unGetc();
  995. return '-';
  996. case '1': case '2': case '3': case '4': case '5':
  997. case '0': case '6': case '7': case '8': case '9':
  998. return lexnum(c);
  999. default:
  1000. if(cmap(c) & Malpha)
  1001. return lexid(c);
  1002. yyerror("unknown character %c", c);
  1003. break;
  1004. }
  1005. goto loop;
  1006. }
  1007. int
  1008. yylex(void)
  1009. {
  1010. int t;
  1011. t = lex();
  1012. yylval.tok.src.stop.line = lineno;
  1013. yylval.tok.src.stop.pos = linepos;
  1014. lasttok = t;
  1015. lastyylval = yylval;
  1016. return t;
  1017. }
  1018. static char*
  1019. toksp(int t)
  1020. {
  1021. Keywd *k;
  1022. static char buf[256];
  1023. switch(t){
  1024. case Lconst:
  1025. snprint(buf, sizeof(buf), "%lld", lastyylval.tok.v.ival);
  1026. return buf;
  1027. case Lrconst:
  1028. snprint(buf, sizeof(buf), "%f", lastyylval.tok.v.rval);
  1029. return buf;
  1030. case Lsconst:
  1031. snprint(buf, sizeof(buf), "\"%s\"", lastyylval.tok.v.idval->name);
  1032. return buf;
  1033. case Ltid:
  1034. case Lid:
  1035. return lastyylval.tok.v.idval->name;
  1036. }
  1037. for(k = keywords; k->name != nil; k++)
  1038. if(t == k->token)
  1039. return k->name;
  1040. for(k = tokwords; k->name != nil; k++)
  1041. if(t == k->token)
  1042. return k->name;
  1043. if(t < 0 || t > 255)
  1044. fatal("bad token %d in toksp()", t);
  1045. buf[0] = t;
  1046. buf[1] = '\0';
  1047. return buf;
  1048. }
  1049. Sym*
  1050. enterstring(char *str, int n)
  1051. {
  1052. Sym *s;
  1053. char *p, *e;
  1054. ulong h;
  1055. int c, c0;
  1056. e = str + n;
  1057. h = 0;
  1058. for(p = str; p < e; p++){
  1059. c = *p;
  1060. c ^= c << 6;
  1061. h += (c << 11) ^ (c >> 1);
  1062. c = *p;
  1063. h ^= (c << 14) + (c << 7) + (c << 4) + c;
  1064. }
  1065. c0 = str[0];
  1066. h %= HashSize;
  1067. for(s = strings[h]; s != nil; s = s->next){
  1068. if(s->name[0] == c0 && s->len == n && memcmp(s->name, str, n) == 0){
  1069. free(str);
  1070. return s;
  1071. }
  1072. }
  1073. if(n == 0)
  1074. return enter("", 0);
  1075. s = allocmem(sizeof(Sym));
  1076. memset(s, 0, sizeof(Sym));
  1077. s->name = str;
  1078. s->len = n;
  1079. s->next = strings[h];
  1080. strings[h] = s;
  1081. return s;
  1082. }
  1083. int
  1084. symcmp(Sym *s, Sym *t)
  1085. {
  1086. int n, c;
  1087. n = s->len;
  1088. if(n > t->len)
  1089. n = t->len;
  1090. c = memcmp(s->name, t->name, n);
  1091. if(c == 0)
  1092. return s->len - t->len;
  1093. return c;
  1094. }
  1095. Sym*
  1096. stringcat(Sym *s, Sym *t)
  1097. {
  1098. char *str;
  1099. int n;
  1100. n = s->len + t->len;
  1101. str = allocmem(n+1);
  1102. memmove(str, s->name, s->len);
  1103. memmove(str+s->len, t->name, t->len);
  1104. str[n] = '\0';
  1105. return enterstring(str, n);
  1106. }
  1107. Sym*
  1108. enter(char *name, int token)
  1109. {
  1110. Sym *s;
  1111. char *p;
  1112. ulong h;
  1113. int c0, c, n;
  1114. c0 = name[0];
  1115. h = 0;
  1116. for(p = name; c = *p; p++){
  1117. c ^= c << 6;
  1118. h += (c << 11) ^ (c >> 1);
  1119. c = *p;
  1120. h ^= (c << 14) + (c << 7) + (c << 4) + c;
  1121. }
  1122. n = p - name;
  1123. h %= HashSize;
  1124. for(s = symbols[h]; s != nil; s = s->next)
  1125. if(s->name[0] == c0 && strcmp(s->name, name) == 0)
  1126. return s;
  1127. s = allocmem(sizeof(Sym));
  1128. memset(s, 0, sizeof(Sym));
  1129. s->hash = h;
  1130. s->name = allocmem(n+1);
  1131. memmove(s->name, name, n+1);
  1132. if(token == 0)
  1133. token = Lid;
  1134. s->token = token;
  1135. s->next = symbols[h];
  1136. s->len = n;
  1137. symbols[h] = s;
  1138. return s;
  1139. }
  1140. char*
  1141. stringpr(char *buf, char *end, Sym *sym)
  1142. {
  1143. char sb[30], *s, *p;
  1144. int i, c, n;
  1145. s = sym->name;
  1146. n = sym->len;
  1147. if(n > 10)
  1148. n = 10;
  1149. p = sb;
  1150. *p++ = '"';
  1151. for(i = 0; i < n; i++){
  1152. c = s[i];
  1153. switch(c){
  1154. case '\\':
  1155. case '"':
  1156. case '\n':
  1157. case '\r':
  1158. case '\t':
  1159. case '\b':
  1160. case '\a':
  1161. case '\v':
  1162. case '\0':
  1163. *p++ = '\\';
  1164. *p++ = unescmap[c];
  1165. break;
  1166. default:
  1167. *p++ = c;
  1168. break;
  1169. }
  1170. }
  1171. if(n != sym->len){
  1172. *p++ = '.';
  1173. *p++ = '.';
  1174. *p++ = '.';
  1175. }
  1176. *p++ = '"';
  1177. *p = 0;
  1178. return secpy(buf, end, sb);
  1179. }
  1180. void
  1181. warn(Line line, char *fmt, ...)
  1182. {
  1183. char buf[4096];
  1184. va_list arg;
  1185. if(errors || !dowarn)
  1186. return;
  1187. va_start(arg, fmt);
  1188. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1189. va_end(arg);
  1190. fprint(2, "%L: warning: %s\n", line, buf);
  1191. }
  1192. void
  1193. nwarn(Node *n, char *fmt, ...)
  1194. {
  1195. char buf[4096];
  1196. va_list arg;
  1197. if(errors || !dowarn)
  1198. return;
  1199. va_start(arg, fmt);
  1200. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1201. va_end(arg);
  1202. fprint(2, "%L: warning: %s\n", n->src.start, buf);
  1203. }
  1204. void
  1205. error(Line line, char *fmt, ...)
  1206. {
  1207. char buf[4096];
  1208. va_list arg;
  1209. errors++;
  1210. if(errors >= maxerr){
  1211. if(errors == maxerr)
  1212. fprint(2, "too many errors, stopping\n");
  1213. return;
  1214. }
  1215. va_start(arg, fmt);
  1216. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1217. va_end(arg);
  1218. fprint(2, "%L: %s\n", line, buf);
  1219. }
  1220. void
  1221. nerror(Node *n, char *fmt, ...)
  1222. {
  1223. char buf[4096];
  1224. va_list arg;
  1225. errors++;
  1226. if(errors >= maxerr){
  1227. if(errors == maxerr)
  1228. fprint(2, "too many errors, stopping\n");
  1229. return;
  1230. }
  1231. va_start(arg, fmt);
  1232. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1233. va_end(arg);
  1234. fprint(2, "%L: %s\n", n->src.start, buf);
  1235. }
  1236. void
  1237. yyerror(char *fmt, ...)
  1238. {
  1239. char buf[4096];
  1240. va_list arg;
  1241. errors++;
  1242. if(errors >= maxerr){
  1243. if(errors == maxerr)
  1244. fprint(2, "too many errors, stopping\n");
  1245. return;
  1246. }
  1247. va_start(arg, fmt);
  1248. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1249. va_end(arg);
  1250. if(lasttok != 0)
  1251. fprint(2, "%L: near ` %s ` : %s\n", curline(), toksp(lasttok), buf);
  1252. else
  1253. fprint(2, "%L: %s\n", curline(), buf);
  1254. }
  1255. void
  1256. fatal(char *fmt, ...)
  1257. {
  1258. char buf[4096];
  1259. va_list arg;
  1260. if(errors == 0 || isfatal){
  1261. va_start(arg, fmt);
  1262. vseprint(buf, buf+sizeof(buf), fmt, arg);
  1263. va_end(arg);
  1264. fprint(2, "fatal limbo compiler error: %s\n", buf);
  1265. }
  1266. if(bout != nil)
  1267. remove(outfile);
  1268. if(bsym != nil)
  1269. remove(symfile);
  1270. if(isfatal)
  1271. abort();
  1272. exits(buf);
  1273. }
  1274. int
  1275. gfltconv(Fmt *f)
  1276. {
  1277. double d;
  1278. char buf[32];
  1279. d = va_arg(f->args, double);
  1280. g_fmt(buf, d, 'e');
  1281. return fmtstrcpy(f, buf);
  1282. }
  1283. char*
  1284. secpy(char *p, char *e, char *s)
  1285. {
  1286. int c;
  1287. if(p == e){
  1288. p[-1] = '\0';
  1289. return p;
  1290. }
  1291. for(; c = *s; s++){
  1292. *p++ = c;
  1293. if(p == e){
  1294. p[-1] = '\0';
  1295. return p;
  1296. }
  1297. }
  1298. *p = '\0';
  1299. return p;
  1300. }
  1301. char*
  1302. seprint(char *buf, char *end, char *fmt, ...)
  1303. {
  1304. va_list arg;
  1305. if(buf == end)
  1306. return buf;
  1307. va_start(arg, fmt);
  1308. buf = vseprint(buf, end, fmt, arg);
  1309. va_end(arg);
  1310. return buf;
  1311. }
  1312. void*
  1313. allocmem(ulong n)
  1314. {
  1315. void *p;
  1316. p = malloc(n);
  1317. if(p == nil)
  1318. fatal("out of memory");
  1319. return p;
  1320. }
  1321. void*
  1322. reallocmem(void *p, ulong n)
  1323. {
  1324. if(p == nil)
  1325. p = malloc(n);
  1326. else
  1327. p = realloc(p, n);
  1328. if(p == nil)
  1329. fatal("out of memory");
  1330. return p;
  1331. }