lex.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #include "rc.h"
  2. #include "exec.h"
  3. #include "io.h"
  4. #include "getflags.h"
  5. #include "fns.h"
  6. int getnext(void);
  7. int
  8. wordchr(int c)
  9. {
  10. return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
  11. }
  12. int
  13. idchr(int c)
  14. {
  15. /*
  16. * Formerly:
  17. * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
  18. * || c=='_' || c=='*';
  19. */
  20. return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
  21. }
  22. int future = EOF;
  23. int doprompt = 1;
  24. int inquote;
  25. /*
  26. * Look ahead in the input stream
  27. */
  28. int
  29. nextc(void)
  30. {
  31. if(future==EOF)
  32. future = getnext();
  33. return future;
  34. }
  35. /*
  36. * Consume the lookahead character.
  37. */
  38. int
  39. advance(void)
  40. {
  41. int c = nextc();
  42. lastc = future;
  43. future = EOF;
  44. return c;
  45. }
  46. /*
  47. * read a character from the input stream
  48. */
  49. int
  50. getnext(void)
  51. {
  52. int c;
  53. static peekc = EOF;
  54. if(peekc!=EOF){
  55. c = peekc;
  56. peekc = EOF;
  57. return c;
  58. }
  59. if(runq->eof)
  60. return EOF;
  61. if(doprompt)
  62. pprompt();
  63. c = rchr(runq->cmdfd);
  64. if(!inquote && c=='\\'){
  65. c = rchr(runq->cmdfd);
  66. if(c=='\n'){
  67. doprompt = 1;
  68. c=' ';
  69. }
  70. else{
  71. peekc = c;
  72. c='\\';
  73. }
  74. }
  75. doprompt = doprompt || c=='\n' || c==EOF;
  76. if(c==EOF)
  77. runq->eof++;
  78. else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
  79. return c;
  80. }
  81. void
  82. pprompt(void)
  83. {
  84. var *prompt;
  85. if(runq->iflag){
  86. pstr(err, promptstr);
  87. flush(err);
  88. prompt = vlook("prompt");
  89. if(prompt->val && prompt->val->next)
  90. promptstr = prompt->val->next->word;
  91. else
  92. promptstr="\t";
  93. }
  94. runq->lineno++;
  95. doprompt = 0;
  96. }
  97. void
  98. skipwhite(void)
  99. {
  100. int c;
  101. for(;;){
  102. c = nextc();
  103. if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */
  104. for(;;){
  105. c = nextc();
  106. if(c=='\n' || c==EOF)
  107. break;
  108. advance();
  109. }
  110. }
  111. if(c==' ' || c=='\t')
  112. advance();
  113. else return;
  114. }
  115. }
  116. void
  117. skipnl(void)
  118. {
  119. int c;
  120. for(;;){
  121. skipwhite();
  122. c = nextc();
  123. if(c!='\n')
  124. return;
  125. advance();
  126. }
  127. }
  128. int
  129. nextis(int c)
  130. {
  131. if(nextc()==c){
  132. advance();
  133. return 1;
  134. }
  135. return 0;
  136. }
  137. char*
  138. addtok(char *p, int val)
  139. {
  140. if(p==0)
  141. return 0;
  142. if(p==&tok[NTOK]){
  143. *p = 0;
  144. yyerror("token buffer too short");
  145. return 0;
  146. }
  147. *p++=val;
  148. return p;
  149. }
  150. char*
  151. addutf(char *p, int c)
  152. {
  153. p = addtok(p, c);
  154. if(twobyte(c)) /* 2-byte escape */
  155. return addtok(p, advance());
  156. if(threebyte(c)){ /* 3-byte escape */
  157. p = addtok(p, advance());
  158. return addtok(p, advance());
  159. }
  160. return p;
  161. }
  162. int lastdol; /* was the last token read '$' or '$#' or '"'? */
  163. int lastword; /* was the last token read a word or compound word terminator? */
  164. int
  165. yylex(void)
  166. {
  167. int c, d = nextc();
  168. char *w = tok;
  169. struct tree *t;
  170. yylval.tree = 0;
  171. /*
  172. * Embarassing sneakiness: if the last token read was a quoted or unquoted
  173. * WORD then we alter the meaning of what follows. If the next character
  174. * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
  175. * if the next character is the first character of a simple or compound word,
  176. * we insert a `^' before it.
  177. */
  178. if(lastword){
  179. lastword = 0;
  180. if(d=='('){
  181. advance();
  182. strcpy(tok, "( [SUB]");
  183. return SUB;
  184. }
  185. if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
  186. strcpy(tok, "^");
  187. return '^';
  188. }
  189. }
  190. inquote = 0;
  191. skipwhite();
  192. switch(c = advance()){
  193. case EOF:
  194. lastdol = 0;
  195. strcpy(tok, "EOF");
  196. return EOF;
  197. case '$':
  198. lastdol = 1;
  199. if(nextis('#')){
  200. strcpy(tok, "$#");
  201. return COUNT;
  202. }
  203. if(nextis('"')){
  204. strcpy(tok, "$\"");
  205. return '"';
  206. }
  207. strcpy(tok, "$");
  208. return '$';
  209. case '&':
  210. lastdol = 0;
  211. if(nextis('&')){
  212. skipnl();
  213. strcpy(tok, "&&");
  214. return ANDAND;
  215. }
  216. strcpy(tok, "&");
  217. return '&';
  218. case '|':
  219. lastdol = 0;
  220. if(nextis(c)){
  221. skipnl();
  222. strcpy(tok, "||");
  223. return OROR;
  224. }
  225. case '<':
  226. case '>':
  227. lastdol = 0;
  228. /*
  229. * funny redirection tokens:
  230. * redir: arrow | arrow '[' fd ']'
  231. * arrow: '<' | '<<' | '>' | '>>' | '|'
  232. * fd: digit | digit '=' | digit '=' digit
  233. * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
  234. * some possibilities are nonsensical and get a message.
  235. */
  236. *w++=c;
  237. t = newtree();
  238. switch(c){
  239. case '|':
  240. t->type = PIPE;
  241. t->fd0 = 1;
  242. t->fd1 = 0;
  243. break;
  244. case '>':
  245. t->type = REDIR;
  246. if(nextis(c)){
  247. t->rtype = APPEND;
  248. *w++=c;
  249. }
  250. else t->rtype = WRITE;
  251. t->fd0 = 1;
  252. break;
  253. case '<':
  254. t->type = REDIR;
  255. if(nextis(c)){
  256. t->rtype = HERE;
  257. *w++=c;
  258. } else if (nextis('>')){
  259. t->rtype = RDWR;
  260. *w++=c;
  261. } else t->rtype = READ;
  262. t->fd0 = 0;
  263. break;
  264. }
  265. if(nextis('[')){
  266. *w++='[';
  267. c = advance();
  268. *w++=c;
  269. if(c<'0' || '9'<c){
  270. RedirErr:
  271. *w = 0;
  272. yyerror(t->type==PIPE?"pipe syntax"
  273. :"redirection syntax");
  274. return EOF;
  275. }
  276. t->fd0 = 0;
  277. do{
  278. t->fd0 = t->fd0*10+c-'0';
  279. *w++=c;
  280. c = advance();
  281. }while('0'<=c && c<='9');
  282. if(c=='='){
  283. *w++='=';
  284. if(t->type==REDIR)
  285. t->type = DUP;
  286. c = advance();
  287. if('0'<=c && c<='9'){
  288. t->rtype = DUPFD;
  289. t->fd1 = t->fd0;
  290. t->fd0 = 0;
  291. do{
  292. t->fd0 = t->fd0*10+c-'0';
  293. *w++=c;
  294. c = advance();
  295. }while('0'<=c && c<='9');
  296. }
  297. else{
  298. if(t->type==PIPE)
  299. goto RedirErr;
  300. t->rtype = CLOSE;
  301. }
  302. }
  303. if(c!=']'
  304. || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
  305. goto RedirErr;
  306. *w++=']';
  307. }
  308. *w='\0';
  309. yylval.tree = t;
  310. if(t->type==PIPE)
  311. skipnl();
  312. return t->type;
  313. case '\'':
  314. lastdol = 0;
  315. lastword = 1;
  316. inquote = 1;
  317. for(;;){
  318. c = advance();
  319. if(c==EOF)
  320. break;
  321. if(c=='\''){
  322. if(nextc()!='\'')
  323. break;
  324. advance();
  325. }
  326. w = addutf(w, c);
  327. }
  328. if(w!=0)
  329. *w='\0';
  330. t = token(tok, WORD);
  331. t->quoted = 1;
  332. yylval.tree = t;
  333. return t->type;
  334. }
  335. if(!wordchr(c)){
  336. lastdol = 0;
  337. tok[0] = c;
  338. tok[1]='\0';
  339. return c;
  340. }
  341. for(;;){
  342. /* next line should have (char)c==GLOB, but ken's compiler is broken */
  343. if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
  344. w = addtok(w, GLOB);
  345. w = addutf(w, c);
  346. c = nextc();
  347. if(lastdol?!idchr(c):!wordchr(c)) break;
  348. advance();
  349. }
  350. lastword = 1;
  351. lastdol = 0;
  352. if(w!=0)
  353. *w='\0';
  354. t = klook(tok);
  355. if(t->type!=WORD)
  356. lastword = 0;
  357. t->quoted = 0;
  358. yylval.tree = t;
  359. return t->type;
  360. }