123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- #include "rc.h"
- #include "exec.h"
- #include "io.h"
- #include "getflags.h"
- #include "fns.h"
- int getnext(void);
- int
- wordchr(int c)
- {
- return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
- }
- int
- idchr(int c)
- {
- /*
- * Formerly:
- * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
- * || c=='_' || c=='*';
- */
- return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
- }
- int future = EOF;
- int doprompt = 1;
- int inquote;
- int incomm;
- /*
- * Look ahead in the input stream
- */
- int
- nextc(void)
- {
- if(future==EOF)
- future = getnext();
- return future;
- }
- /*
- * Consume the lookahead character.
- */
- int
- advance(void)
- {
- int c = nextc();
- lastc = future;
- future = EOF;
- return c;
- }
- /*
- * read a character from the input stream
- */
- int
- getnext(void)
- {
- int c;
- static int peekc = EOF;
- if(peekc!=EOF){
- c = peekc;
- peekc = EOF;
- return c;
- }
- if(runq->eof)
- return EOF;
- if(doprompt)
- pprompt();
- c = rchr(runq->cmdfd);
- if(!inquote && c=='\\'){
- c = rchr(runq->cmdfd);
- if(c=='\n' && !incomm){ /* don't continue a comment */
- doprompt = 1;
- c=' ';
- }
- else{
- peekc = c;
- c='\\';
- }
- }
- doprompt = doprompt || c=='\n' || c==EOF;
- if(c==EOF)
- runq->eof++;
- else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
- return c;
- }
- void
- pprompt(void)
- {
- var *prompt;
- if(runq->iflag){
- pstr(err, promptstr);
- flush(err);
- prompt = vlook("prompt");
- if(prompt->val && prompt->val->next)
- promptstr = prompt->val->next->word;
- else
- promptstr="\t";
- }
- runq->lineno++;
- doprompt = 0;
- }
- void
- skipwhite(void)
- {
- int c;
- for(;;){
- c = nextc();
- /* Why did this used to be if(!inquote && c=='#') ?? */
- if(c=='#'){
- incomm = 1;
- for(;;){
- c = nextc();
- if(c=='\n' || c==EOF) {
- incomm = 0;
- break;
- }
- advance();
- }
- }
- if(c==' ' || c=='\t')
- advance();
- else return;
- }
- }
- void
- skipnl(void)
- {
- int c;
- for(;;){
- skipwhite();
- c = nextc();
- if(c!='\n')
- return;
- advance();
- }
- }
- int
- nextis(int c)
- {
- if(nextc()==c){
- advance();
- return 1;
- }
- return 0;
- }
- char*
- addtok(char *p, int val)
- {
- if(p==0)
- return 0;
- if(p==&tok[NTOK-1]){
- *p = 0;
- yyerror("token buffer too short");
- return 0;
- }
- *p++=val;
- return p;
- }
- char*
- addutf(char *p, int c)
- {
- p = addtok(p, c);
- if(twobyte(c)) /* 2-byte escape */
- return addtok(p, advance());
- if(threebyte(c)){ /* 3-byte escape */
- p = addtok(p, advance());
- return addtok(p, advance());
- }
- return p;
- }
- int lastdol; /* was the last token read '$' or '$#' or '"'? */
- int lastword; /* was the last token read a word or compound word terminator? */
- int
- yylex(void)
- {
- int c, d = nextc();
- char *w = tok;
- struct tree *t;
- yylval.tree = 0;
- /*
- * Embarassing sneakiness: if the last token read was a quoted or unquoted
- * WORD then we alter the meaning of what follows. If the next character
- * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
- * if the next character is the first character of a simple or compound word,
- * we insert a `^' before it.
- */
- if(lastword){
- lastword = 0;
- if(d=='('){
- advance();
- strcpy(tok, "( [SUB]");
- return SUB;
- }
- if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
- strcpy(tok, "^");
- return '^';
- }
- }
- inquote = 0;
- skipwhite();
- switch(c = advance()){
- case EOF:
- lastdol = 0;
- strcpy(tok, "EOF");
- return EOF;
- case '$':
- lastdol = 1;
- if(nextis('#')){
- strcpy(tok, "$#");
- return COUNT;
- }
- if(nextis('"')){
- strcpy(tok, "$\"");
- return '"';
- }
- strcpy(tok, "$");
- return '$';
- case '&':
- lastdol = 0;
- if(nextis('&')){
- skipnl();
- strcpy(tok, "&&");
- return ANDAND;
- }
- strcpy(tok, "&");
- return '&';
- case '|':
- lastdol = 0;
- if(nextis(c)){
- skipnl();
- strcpy(tok, "||");
- return OROR;
- }
- case '<':
- case '>':
- lastdol = 0;
- /*
- * funny redirection tokens:
- * redir: arrow | arrow '[' fd ']'
- * arrow: '<' | '<<' | '>' | '>>' | '|'
- * fd: digit | digit '=' | digit '=' digit
- * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
- * some possibilities are nonsensical and get a message.
- */
- *w++=c;
- t = newtree();
- switch(c){
- case '|':
- t->type = PIPE;
- t->fd0 = 1;
- t->fd1 = 0;
- break;
- case '>':
- t->type = REDIR;
- if(nextis(c)){
- t->rtype = APPEND;
- *w++=c;
- }
- else t->rtype = WRITE;
- t->fd0 = 1;
- break;
- case '<':
- t->type = REDIR;
- if(nextis(c)){
- t->rtype = HERE;
- *w++=c;
- } else if (nextis('>')){
- t->rtype = RDWR;
- *w++=c;
- } else t->rtype = READ;
- t->fd0 = 0;
- break;
- }
- if(nextis('[')){
- *w++='[';
- c = advance();
- *w++=c;
- if(c<'0' || '9'<c){
- RedirErr:
- *w = 0;
- yyerror(t->type==PIPE?"pipe syntax"
- :"redirection syntax");
- return EOF;
- }
- t->fd0 = 0;
- do{
- t->fd0 = t->fd0*10+c-'0';
- *w++=c;
- c = advance();
- }while('0'<=c && c<='9');
- if(c=='='){
- *w++='=';
- if(t->type==REDIR)
- t->type = DUP;
- c = advance();
- if('0'<=c && c<='9'){
- t->rtype = DUPFD;
- t->fd1 = t->fd0;
- t->fd0 = 0;
- do{
- t->fd0 = t->fd0*10+c-'0';
- *w++=c;
- c = advance();
- }while('0'<=c && c<='9');
- }
- else{
- if(t->type==PIPE)
- goto RedirErr;
- t->rtype = CLOSE;
- }
- }
- if(c!=']'
- || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
- goto RedirErr;
- *w++=']';
- }
- *w='\0';
- yylval.tree = t;
- if(t->type==PIPE)
- skipnl();
- return t->type;
- case '\'':
- lastdol = 0;
- lastword = 1;
- inquote = 1;
- for(;;){
- c = advance();
- if(c==EOF)
- break;
- if(c=='\''){
- if(nextc()!='\'')
- break;
- advance();
- }
- w = addutf(w, c);
- }
- if(w!=0)
- *w='\0';
- t = token(tok, WORD);
- t->quoted = 1;
- yylval.tree = t;
- return t->type;
- }
- if(!wordchr(c)){
- lastdol = 0;
- tok[0] = c;
- tok[1]='\0';
- return c;
- }
- for(;;){
- /* next line should have (char)c==GLOB, but ken's compiler is broken */
- if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
- w = addtok(w, GLOB);
- w = addutf(w, c);
- c = nextc();
- if(lastdol?!idchr(c):!wordchr(c)) break;
- advance();
- }
- lastword = 1;
- lastdol = 0;
- if(w!=0)
- *w='\0';
- t = klook(tok);
- if(t->type!=WORD)
- lastword = 0;
- t->quoted = 0;
- yylval.tree = t;
- return t->type;
- }
|