123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568 |
- /****************************************************************
- Copyright (C) Lucent Technologies 1997
- All Rights Reserved
- Permission to use, copy, modify, and distribute this software and
- its documentation for any purpose and without fee is hereby
- granted, provided that the above copyright notice appear in all
- copies and that both that the copyright notice and this
- permission notice and warranty disclaimer appear in supporting
- documentation, and that the name Lucent Technologies or any of
- its entities not be used in advertising or publicity pertaining
- to distribution of the software without specific, written prior
- permission.
- LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
- IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
- SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
- IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
- ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
- THIS SOFTWARE.
- ****************************************************************/
- #include <u.h>
- #include <lib9.h>
- #include <chartypes.h>
- #include <bio.h>
- #include "awk.h"
- #include "y.tab.h"
- extern YYSTYPE yylval;
- extern int infunc;
- int lineno = 1;
- int bracecnt = 0;
- int brackcnt = 0;
- int parencnt = 0;
- typedef struct Keyword {
- char *word;
- int sub;
- int type;
- } Keyword;
- Keyword keywords[] ={ /* keep sorted: binary searched */
- { "BEGIN", XBEGIN, XBEGIN },
- { "END", XEND, XEND },
- { "NF", VARNF, VARNF },
- { "atan2", FATAN, BLTIN },
- { "break", BREAK, BREAK },
- { "close", CLOSE, CLOSE },
- { "continue", CONTINUE, CONTINUE },
- { "cos", FCOS, BLTIN },
- { "delete", DELETE, DELETE },
- { "do", DO, DO },
- { "else", ELSE, ELSE },
- { "exit", EXIT, EXIT },
- { "exp", FEXP, BLTIN },
- { "fflush", FFLUSH, BLTIN },
- { "for", FOR, FOR },
- { "func", FUNC, FUNC },
- { "function", FUNC, FUNC },
- { "getline", GETLINE, GETLINE },
- { "gsub", GSUB, GSUB },
- { "if", IF, IF },
- { "in", IN, IN },
- { "index", INDEX, INDEX },
- { "int", FINT, BLTIN },
- { "length", FLENGTH, BLTIN },
- { "log", FLOG, BLTIN },
- { "match", MATCHFCN, MATCHFCN },
- { "next", NEXT, NEXT },
- { "nextfile", NEXTFILE, NEXTFILE },
- { "print", PRINT, PRINT },
- { "printf", PRINTF, PRINTF },
- { "rand", FRAND, BLTIN },
- { "return", RETURN, RETURN },
- { "sin", FSIN, BLTIN },
- { "split", SPLIT, SPLIT },
- { "sprintf", SPRINTF, SPRINTF },
- { "sqrt", FSQRT, BLTIN },
- { "srand", FSRAND, BLTIN },
- { "sub", SUB, SUB },
- { "substr", SUBSTR, SUBSTR },
- { "system", FSYSTEM, BLTIN },
- { "tolower", FTOLOWER, BLTIN },
- { "toupper", FTOUPPER, BLTIN },
- { "utf", FUTF, BLTIN },
- { "while", WHILE, WHILE },
- };
- #ifdef DEBUG
- #define RET(x) { if(dbg)print("lex %s\n", tokname(x)); return(x); }
- #else
- #define RET(x) return(x)
- #endif
- int peek(void)
- {
- int c = input();
- unput(c);
- return c;
- }
- int gettok(char **pbuf, int *psz) /* get next input token */
- {
- int c;
- char *buf = *pbuf;
- int sz = *psz;
- char *bp = buf;
- c = input();
- if (c == 0)
- return 0;
- buf[0] = c;
- buf[1] = 0;
- if (!isalnum(c) && c != '.' && c != '_')
- return c;
- *bp++ = c;
- if (isalpha(c) || c == '_') { /* it's a varname */
- for ( ; (c = input()) != 0; ) {
- if (bp-buf >= sz)
- if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
- FATAL( "out of space for name %.10s...", buf );
- if (isalnum(c) || c == '_')
- *bp++ = c;
- else {
- *bp = 0;
- unput(c);
- break;
- }
- }
- } else { /* it's a number */
- char *rem;
- /* read input until can't be a number */
- for ( ; (c = input()) != 0; ) {
- if (bp-buf >= sz)
- if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
- FATAL( "out of space for number %.10s...", buf );
- if (isdigit(c) || c == 'e' || c == 'E'
- || c == '.' || c == '+' || c == '-')
- *bp++ = c;
- else {
- unput(c);
- break;
- }
- }
- *bp = 0;
- strtod(buf, (const char**)&rem); /* parse the number */
- unputstr(rem); /* put rest back for later */
- rem[0] = 0;
- }
- *pbuf = buf;
- *psz = sz;
- return buf[0];
- }
- int word(char *);
- int string(void);
- int regexpr(void);
- int sc = 0; /* 1 => return a } right now */
- int reg = 0; /* 1 => return a REGEXPR now */
- int yylex(void)
- {
- int c;
- static char *buf = 0;
- static int bufsize = 500;
- if (buf == 0 && (buf = (char *) malloc(bufsize)) == nil)
- FATAL( "out of space in yylex" );
- if (sc) {
- sc = 0;
- RET('}');
- }
- if (reg) {
- reg = 0;
- return regexpr();
- }
- for (;;) {
- c = gettok(&buf, &bufsize);
- if (c == 0)
- return 0;
- if (isalpha(c) || c == '_')
- return word(buf);
- if (isdigit(c) || c == '.') {
- yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
- /* should this also have STR set? */
- RET(NUMBER);
- }
-
- yylval.i = c;
- switch (c) {
- case '\n': /* {EOL} */
- RET(NL);
- case '\r': /* assume \n is coming */
- case ' ': /* {WS}+ */
- case '\t':
- break;
- case '#': /* #.* strip comments */
- while ((c = input()) != '\n' && c != 0)
- ;
- unput(c);
- break;
- case ';':
- RET(';');
- case '\\':
- if (peek() == '\n') {
- input();
- } else if (peek() == '\r') {
- input(); input(); /* \n */
- lineno++;
- } else {
- RET(c);
- }
- break;
- case '&':
- if (peek() == '&') {
- input(); RET(AND);
- } else
- RET('&');
- case '|':
- if (peek() == '|') {
- input(); RET(BOR);
- } else
- RET('|');
- case '!':
- if (peek() == '=') {
- input(); yylval.i = NE; RET(NE);
- } else if (peek() == '~') {
- input(); yylval.i = NOTMATCH; RET(MATCHOP);
- } else
- RET(NOT);
- case '~':
- yylval.i = MATCH;
- RET(MATCHOP);
- case '<':
- if (peek() == '=') {
- input(); yylval.i = LE; RET(LE);
- } else {
- yylval.i = LT; RET(LT);
- }
- case '=':
- if (peek() == '=') {
- input(); yylval.i = EQ; RET(EQ);
- } else {
- yylval.i = ASSIGN; RET(ASGNOP);
- }
- case '>':
- if (peek() == '=') {
- input(); yylval.i = GE; RET(GE);
- } else if (peek() == '>') {
- input(); yylval.i = APPEND; RET(APPEND);
- } else {
- yylval.i = GT; RET(GT);
- }
- case '+':
- if (peek() == '+') {
- input(); yylval.i = INCR; RET(INCR);
- } else if (peek() == '=') {
- input(); yylval.i = ADDEQ; RET(ASGNOP);
- } else
- RET('+');
- case '-':
- if (peek() == '-') {
- input(); yylval.i = DECR; RET(DECR);
- } else if (peek() == '=') {
- input(); yylval.i = SUBEQ; RET(ASGNOP);
- } else
- RET('-');
- case '*':
- if (peek() == '=') { /* *= */
- input(); yylval.i = MULTEQ; RET(ASGNOP);
- } else if (peek() == '*') { /* ** or **= */
- input(); /* eat 2nd * */
- if (peek() == '=') {
- input(); yylval.i = POWEQ; RET(ASGNOP);
- } else {
- RET(POWER);
- }
- } else
- RET('*');
- case '/':
- RET('/');
- case '%':
- if (peek() == '=') {
- input(); yylval.i = MODEQ; RET(ASGNOP);
- } else
- RET('%');
- case '^':
- if (peek() == '=') {
- input(); yylval.i = POWEQ; RET(ASGNOP);
- } else
- RET(POWER);
-
- case '$':
- /* BUG: awkward, if not wrong */
- c = gettok(&buf, &bufsize);
- if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
- unputstr(buf);
- RET(INDIRECT);
- } else if (isalpha(c)) {
- if (strcmp(buf, "NF") == 0) { /* very special */
- unputstr("(NF)");
- RET(INDIRECT);
- }
- yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
- RET(IVAR);
- } else {
- unputstr(buf);
- RET(INDIRECT);
- }
-
- case '}':
- if (--bracecnt < 0)
- SYNTAX( "extra }" );
- sc = 1;
- RET(';');
- case ']':
- if (--brackcnt < 0)
- SYNTAX( "extra ]" );
- RET(']');
- case ')':
- if (--parencnt < 0)
- SYNTAX( "extra )" );
- RET(')');
- case '{':
- bracecnt++;
- RET('{');
- case '[':
- brackcnt++;
- RET('[');
- case '(':
- parencnt++;
- RET('(');
-
- case '"':
- return string(); /* BUG: should be like tran.c ? */
-
- default:
- RET(c);
- }
- }
- }
- int string(void)
- {
- int c, n;
- char *s, *bp;
- static char *buf = 0;
- static int bufsz = 500;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
- FATAL("out of space for strings");
- for (bp = buf; (c = input()) != '"'; ) {
- if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
- FATAL("out of space for string %.10s...", buf);
- switch (c) {
- case '\n':
- case '\r':
- case 0:
- SYNTAX( "non-terminated string %.10s...", buf );
- lineno++;
- RET(0);
- case '\\':
- c = input();
- switch (c) {
- case '"': *bp++ = '"'; break;
- case 'n': *bp++ = '\n'; break;
- case 't': *bp++ = '\t'; break;
- case 'f': *bp++ = '\f'; break;
- case 'r': *bp++ = '\r'; break;
- case 'b': *bp++ = '\b'; break;
- case 'v': *bp++ = '\v'; break;
- case 'a': *bp++ = '\007'; break;
- case '\\': *bp++ = '\\'; break;
- case '0': case '1': case '2': /* octal: \d \dd \ddd */
- case '3': case '4': case '5': case '6': case '7':
- n = c - '0';
- if ((c = peek()) >= '0' && c < '8') {
- n = 8 * n + input() - '0';
- if ((c = peek()) >= '0' && c < '8')
- n = 8 * n + input() - '0';
- }
- *bp++ = n;
- break;
- case 'x': /* hex \x0-9a-fA-F + */
- { char xbuf[100], *px;
- for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
- if (isdigit(c)
- || (c >= 'a' && c <= 'f')
- || (c >= 'A' && c <= 'F'))
- *px++ = c;
- else
- break;
- }
- *px = 0;
- unput(c);
- n = strtol(xbuf, nil, 16);
- *bp++ = n;
- break;
- }
- default:
- *bp++ = c;
- break;
- }
- break;
- default:
- *bp++ = c;
- break;
- }
- }
- *bp = 0;
- s = tostring(buf);
- *bp++ = ' '; *bp++ = 0;
- yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
- RET(STRING);
- }
- int binsearch(char *w, Keyword *kp, int n)
- {
- int cond, low, mid, high;
- low = 0;
- high = n - 1;
- while (low <= high) {
- mid = (low + high) / 2;
- if ((cond = strcmp(w, kp[mid].word)) < 0)
- high = mid - 1;
- else if (cond > 0)
- low = mid + 1;
- else
- return mid;
- }
- return -1;
- }
- int word(char *w)
- {
- Keyword *kp;
- int c, n;
- n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
- kp = keywords + n;
- if (n != -1) { /* found in table */
- yylval.i = kp->sub;
- switch (kp->type) { /* special handling */
- case FSYSTEM:
- if (safe)
- SYNTAX( "system is unsafe" );
- RET(kp->type);
- case FUNC:
- if (infunc)
- SYNTAX( "illegal nested function" );
- RET(kp->type);
- case RETURN:
- if (!infunc)
- SYNTAX( "return not in function" );
- RET(kp->type);
- case VARNF:
- yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
- RET(VARNF);
- default:
- RET(kp->type);
- }
- }
- c = peek(); /* look for '(' */
- if (c != '(' && infunc && (n=isarg(w)) >= 0) {
- yylval.i = n;
- RET(ARG);
- } else {
- yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
- if (c == '(') {
- RET(CALL);
- } else {
- RET(VAR);
- }
- }
- }
- void startreg(void) /* next call to yyles will return a regular expression */
- {
- reg = 1;
- }
- int regexpr(void)
- {
- int c;
- static char *buf = 0;
- static int bufsz = 500;
- char *bp;
- if (buf == 0 && (buf = (char *) malloc(bufsz)) == nil)
- FATAL("out of space for rex expr");
- bp = buf;
- for ( ; (c = input()) != '/' && c != 0; ) {
- if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
- FATAL("out of space for reg expr %.10s...", buf);
- if (c == '\n') {
- SYNTAX( "newline in regular expression %.10s...", buf );
- unput('\n');
- break;
- } else if (c == '\\') {
- *bp++ = '\\';
- *bp++ = input();
- } else {
- *bp++ = c;
- }
- }
- *bp = 0;
- yylval.s = tostring(buf);
- unput('/');
- RET(REGEXPR);
- }
- /* low-level lexical stuff, sort of inherited from lex */
- char ebuf[300];
- char *ep = ebuf;
- char yysbuf[100]; /* pushback buffer */
- char *yysptr = yysbuf;
- Biobuf *yyin;
- int input(void) /* get next lexical input character */
- {
- int c;
- extern char *lexprog;
- if (yysptr > yysbuf)
- c = *--yysptr;
- else if (lexprog != nil) { /* awk '...' */
- if ((c = *lexprog) != 0)
- lexprog++;
- } else /* awk -f ... */
- c = pgetc();
- if (c == '\n')
- lineno++;
- else if (c == Beof)
- c = 0;
- if (ep >= ebuf + sizeof ebuf)
- ep = ebuf;
- return *ep++ = c;
- }
- void unput(int c) /* put lexical character back on input */
- {
- if (c == '\n')
- lineno--;
- if (yysptr >= yysbuf + sizeof(yysbuf))
- FATAL("pushed back too much: %.20s...", yysbuf);
- *yysptr++ = c;
- if (--ep < ebuf)
- ep = ebuf + sizeof(ebuf) - 1;
- }
- void unputstr(char *s) /* put a string back on input */
- {
- int i;
- for (i = strlen(s)-1; i >= 0; i--)
- unput(s[i]);
- }
|