lex.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #include "rc.h"
  2. #include "exec.h"
  3. #include "io.h"
  4. #include "getflags.h"
  5. #include "fns.h"
  6. int getnext(void);
  7. int wordchr(int c)
  8. {
  9. return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
  10. }
  11. int idchr(int c)
  12. {
  13. /*
  14. * Formerly:
  15. * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
  16. * || c=='_' || c=='*';
  17. */
  18. return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
  19. }
  20. int future=EOF;
  21. int doprompt=1;
  22. int inquote;
  23. /*
  24. * Look ahead in the input stream
  25. */
  26. int nextc(void){
  27. if(future==EOF) future=getnext();
  28. return future;
  29. }
  30. /*
  31. * Consume the lookahead character.
  32. */
  33. int advance(void){
  34. int c=nextc();
  35. lastc=future;
  36. future=EOF;
  37. return c;
  38. }
  39. /*
  40. * read a character from the input stream
  41. */
  42. int getnext(void){
  43. register int c;
  44. static peekc=EOF;
  45. if(peekc!=EOF){
  46. c=peekc;
  47. peekc=EOF;
  48. return c;
  49. }
  50. if(runq->eof) return EOF;
  51. if(doprompt) pprompt();
  52. c=rchr(runq->cmdfd);
  53. if(!inquote && c=='\\'){
  54. c=rchr(runq->cmdfd);
  55. if(c=='\n'){
  56. doprompt=1;
  57. c=' ';
  58. }
  59. else{
  60. peekc=c;
  61. c='\\';
  62. }
  63. }
  64. doprompt=doprompt || c=='\n' || c==EOF;
  65. if(c==EOF) runq->eof++;
  66. else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
  67. return c;
  68. }
  69. void pprompt(void){
  70. var *prompt;
  71. if(runq->iflag){
  72. pstr(err, promptstr);
  73. flush(err);
  74. prompt=vlook("prompt");
  75. if(prompt->val && prompt->val->next)
  76. promptstr=prompt->val->next->word;
  77. else
  78. promptstr="\t";
  79. }
  80. runq->lineno++;
  81. doprompt=0;
  82. }
  83. void skipwhite(void){
  84. int c;
  85. for(;;){
  86. c=nextc();
  87. if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */
  88. for(;;){
  89. c=nextc();
  90. if(c=='\n' || c==EOF) break;
  91. advance();
  92. }
  93. }
  94. if(c==' ' || c=='\t') advance();
  95. else return;
  96. }
  97. }
  98. void skipnl(void){
  99. register int c;
  100. for(;;){
  101. skipwhite();
  102. c=nextc();
  103. if(c!='\n') return;
  104. advance();
  105. }
  106. }
  107. int nextis(int c){
  108. if(nextc()==c){
  109. advance();
  110. return 1;
  111. }
  112. return 0;
  113. }
  114. char *addtok(char *p, int val){
  115. if(p==0) return 0;
  116. if(p==&tok[NTOK]){
  117. *p=0;
  118. yyerror("token buffer too short");
  119. return 0;
  120. }
  121. *p++=val;
  122. return p;
  123. }
  124. char *addutf(char *p, int c){
  125. p=addtok(p, c);
  126. if(twobyte(c)) /* 2-byte escape */
  127. return addtok(p, advance());
  128. if(threebyte(c)){ /* 3-byte escape */
  129. p=addtok(p, advance());
  130. return addtok(p, advance());
  131. }
  132. return p;
  133. }
  134. int lastdol; /* was the last token read '$' or '$#' or '"'? */
  135. int lastword; /* was the last token read a word or compound word terminator? */
  136. int yylex(void){
  137. register int c, d=nextc();
  138. register char *w=tok;
  139. register struct tree *t;
  140. yylval.tree=0;
  141. /*
  142. * Embarassing sneakiness: if the last token read was a quoted or unquoted
  143. * WORD then we alter the meaning of what follows. If the next character
  144. * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
  145. * if the next character is the first character of a simple or compound word,
  146. * we insert a `^' before it.
  147. */
  148. if(lastword){
  149. lastword=0;
  150. if(d=='('){
  151. advance();
  152. strcpy(tok, "( [SUB]");
  153. return SUB;
  154. }
  155. if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
  156. strcpy(tok, "^");
  157. return '^';
  158. }
  159. }
  160. inquote=0;
  161. skipwhite();
  162. switch(c=advance()){
  163. case EOF:
  164. lastdol=0;
  165. strcpy(tok, "EOF");
  166. return EOF;
  167. case '$':
  168. lastdol=1;
  169. if(nextis('#')){
  170. strcpy(tok, "$#");
  171. return COUNT;
  172. }
  173. if(nextis('"')){
  174. strcpy(tok, "$\"");
  175. return '"';
  176. }
  177. strcpy(tok, "$");
  178. return '$';
  179. case '&':
  180. lastdol=0;
  181. if(nextis('&')){
  182. skipnl();
  183. strcpy(tok, "&&");
  184. return ANDAND;
  185. }
  186. strcpy(tok, "&");
  187. return '&';
  188. case '|':
  189. lastdol=0;
  190. if(nextis(c)){
  191. skipnl();
  192. strcpy(tok, "||");
  193. return OROR;
  194. }
  195. case '<':
  196. case '>':
  197. lastdol=0;
  198. /*
  199. * funny redirection tokens:
  200. * redir: arrow | arrow '[' fd ']'
  201. * arrow: '<' | '<<' | '>' | '>>' | '|'
  202. * fd: digit | digit '=' | digit '=' digit
  203. * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
  204. * some possibilities are nonsensical and get a message.
  205. */
  206. *w++=c;
  207. t=newtree();
  208. switch(c){
  209. case '|':
  210. t->type=PIPE;
  211. t->fd0=1;
  212. t->fd1=0;
  213. break;
  214. case '>':
  215. t->type=REDIR;
  216. if(nextis(c)){
  217. t->rtype=APPEND;
  218. *w++=c;
  219. }
  220. else t->rtype=WRITE;
  221. t->fd0=1;
  222. break;
  223. case '<':
  224. t->type=REDIR;
  225. if(nextis(c)){
  226. t->rtype=HERE;
  227. *w++=c;
  228. }
  229. else t->rtype=READ;
  230. t->fd0=0;
  231. break;
  232. }
  233. if(nextis('[')){
  234. *w++='[';
  235. c=advance();
  236. *w++=c;
  237. if(c<'0' || '9'<c){
  238. RedirErr:
  239. *w=0;
  240. yyerror(t->type==PIPE?"pipe syntax"
  241. :"redirection syntax");
  242. return EOF;
  243. }
  244. t->fd0=0;
  245. do{
  246. t->fd0=t->fd0*10+c-'0';
  247. *w++=c;
  248. c=advance();
  249. }while('0'<=c && c<='9');
  250. if(c=='='){
  251. *w++='=';
  252. if(t->type==REDIR) t->type=DUP;
  253. c=advance();
  254. if('0'<=c && c<='9'){
  255. t->rtype=DUPFD;
  256. t->fd1=t->fd0;
  257. t->fd0=0;
  258. do{
  259. t->fd0=t->fd0*10+c-'0';
  260. *w++=c;
  261. c=advance();
  262. }while('0'<=c && c<='9');
  263. }
  264. else{
  265. if(t->type==PIPE) goto RedirErr;
  266. t->rtype=CLOSE;
  267. }
  268. }
  269. if(c!=']'
  270. || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
  271. goto RedirErr;
  272. *w++=']';
  273. }
  274. *w='\0';
  275. yylval.tree=t;
  276. if(t->type==PIPE) skipnl();
  277. return t->type;
  278. case '\'':
  279. lastdol=0;
  280. lastword=1;
  281. inquote=1;
  282. for(;;){
  283. c=advance();
  284. if(c==EOF) break;
  285. if(c=='\''){
  286. if(nextc()!='\'')
  287. break;
  288. advance();
  289. }
  290. w=addutf(w, c);
  291. }
  292. if(w!=0) *w='\0';
  293. t=token(tok, WORD);
  294. t->quoted=1;
  295. yylval.tree=t;
  296. return t->type;
  297. }
  298. if(!wordchr(c)){
  299. lastdol=0;
  300. tok[0]=c;
  301. tok[1]='\0';
  302. return c;
  303. }
  304. for(;;){
  305. /* next line should have (char)c==GLOB, but ken's compiler is broken */
  306. if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
  307. w=addtok(w, GLOB);
  308. w=addutf(w, c);
  309. c=nextc();
  310. if(lastdol?!idchr(c):!wordchr(c)) break;
  311. advance();
  312. }
  313. lastword=1;
  314. lastdol=0;
  315. if(w!=0) *w='\0';
  316. t=klook(tok);
  317. if(t->type!=WORD) lastword=0;
  318. t->quoted=0;
  319. yylval.tree=t;
  320. return t->type;
  321. }