lex.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. #include "rc.h"
  2. #include "exec.h"
  3. #include "io.h"
  4. #include "getflags.h"
  5. #include "fns.h"
  6. int getnext(void);
  7. int
  8. wordchr(int c)
  9. {
  10. return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
  11. }
  12. int
  13. idchr(int c)
  14. {
  15. /*
  16. * Formerly:
  17. * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
  18. * || c=='_' || c=='*';
  19. */
  20. return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
  21. }
  22. int future = EOF;
  23. int doprompt = 1;
  24. int inquote;
  25. int incomm;
  26. /*
  27. * Look ahead in the input stream
  28. */
  29. int
  30. nextc(void)
  31. {
  32. if(future==EOF)
  33. future = getnext();
  34. return future;
  35. }
  36. /*
  37. * Consume the lookahead character.
  38. */
  39. int
  40. advance(void)
  41. {
  42. int c = nextc();
  43. lastc = future;
  44. future = EOF;
  45. return c;
  46. }
  47. /*
  48. * read a character from the input stream
  49. */
  50. int
  51. getnext(void)
  52. {
  53. int c;
  54. static int peekc = EOF;
  55. if(peekc!=EOF){
  56. c = peekc;
  57. peekc = EOF;
  58. return c;
  59. }
  60. if(runq->eof)
  61. return EOF;
  62. if(doprompt)
  63. pprompt();
  64. c = rchr(runq->cmdfd);
  65. if(!inquote && c=='\\'){
  66. c = rchr(runq->cmdfd);
  67. if(c=='\n' && !incomm){ /* don't continue a comment */
  68. doprompt = 1;
  69. c=' ';
  70. }
  71. else{
  72. peekc = c;
  73. c='\\';
  74. }
  75. }
  76. doprompt = doprompt || c=='\n' || c==EOF;
  77. if(c==EOF)
  78. runq->eof++;
  79. else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
  80. return c;
  81. }
  82. void
  83. pprompt(void)
  84. {
  85. var *prompt;
  86. if(runq->iflag){
  87. pstr(err, promptstr);
  88. flush(err);
  89. prompt = vlook("prompt");
  90. if(prompt->val && prompt->val->next)
  91. promptstr = prompt->val->next->word;
  92. else
  93. promptstr="\t";
  94. }
  95. runq->lineno++;
  96. doprompt = 0;
  97. }
  98. void
  99. skipwhite(void)
  100. {
  101. int c;
  102. for(;;){
  103. c = nextc();
  104. /* Why did this used to be if(!inquote && c=='#') ?? */
  105. if(c=='#'){
  106. incomm = 1;
  107. for(;;){
  108. c = nextc();
  109. if(c=='\n' || c==EOF) {
  110. incomm = 0;
  111. break;
  112. }
  113. advance();
  114. }
  115. }
  116. if(c==' ' || c=='\t')
  117. advance();
  118. else return;
  119. }
  120. }
  121. void
  122. skipnl(void)
  123. {
  124. int c;
  125. for(;;){
  126. skipwhite();
  127. c = nextc();
  128. if(c!='\n')
  129. return;
  130. advance();
  131. }
  132. }
  133. int
  134. nextis(int c)
  135. {
  136. if(nextc()==c){
  137. advance();
  138. return 1;
  139. }
  140. return 0;
  141. }
  142. char*
  143. addtok(char *p, int val)
  144. {
  145. if(p==0)
  146. return 0;
  147. if(p==&tok[NTOK-1]){
  148. *p = 0;
  149. yyerror("token buffer too short");
  150. return 0;
  151. }
  152. *p++=val;
  153. return p;
  154. }
  155. char*
  156. addutf(char *p, int c)
  157. {
  158. p = addtok(p, c);
  159. if(twobyte(c)) /* 2-byte escape */
  160. return addtok(p, advance());
  161. if(threebyte(c)){ /* 3-byte escape */
  162. p = addtok(p, advance());
  163. return addtok(p, advance());
  164. }
  165. return p;
  166. }
  167. int lastdol; /* was the last token read '$' or '$#' or '"'? */
  168. int lastword; /* was the last token read a word or compound word terminator? */
  169. int
  170. yylex(void)
  171. {
  172. int c, d = nextc();
  173. char *w = tok;
  174. struct tree *t;
  175. yylval.tree = 0;
  176. /*
  177. * Embarassing sneakiness: if the last token read was a quoted or unquoted
  178. * WORD then we alter the meaning of what follows. If the next character
  179. * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
  180. * if the next character is the first character of a simple or compound word,
  181. * we insert a `^' before it.
  182. */
  183. if(lastword){
  184. lastword = 0;
  185. if(d=='('){
  186. advance();
  187. strcpy(tok, "( [SUB]");
  188. return SUB;
  189. }
  190. if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
  191. strcpy(tok, "^");
  192. return '^';
  193. }
  194. }
  195. inquote = 0;
  196. skipwhite();
  197. switch(c = advance()){
  198. case EOF:
  199. lastdol = 0;
  200. strcpy(tok, "EOF");
  201. return EOF;
  202. case '$':
  203. lastdol = 1;
  204. if(nextis('#')){
  205. strcpy(tok, "$#");
  206. return COUNT;
  207. }
  208. if(nextis('"')){
  209. strcpy(tok, "$\"");
  210. return '"';
  211. }
  212. strcpy(tok, "$");
  213. return '$';
  214. case '&':
  215. lastdol = 0;
  216. if(nextis('&')){
  217. skipnl();
  218. strcpy(tok, "&&");
  219. return ANDAND;
  220. }
  221. strcpy(tok, "&");
  222. return '&';
  223. case '|':
  224. lastdol = 0;
  225. if(nextis(c)){
  226. skipnl();
  227. strcpy(tok, "||");
  228. return OROR;
  229. }
  230. case '<':
  231. case '>':
  232. lastdol = 0;
  233. /*
  234. * funny redirection tokens:
  235. * redir: arrow | arrow '[' fd ']'
  236. * arrow: '<' | '<<' | '>' | '>>' | '|'
  237. * fd: digit | digit '=' | digit '=' digit
  238. * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
  239. * some possibilities are nonsensical and get a message.
  240. */
  241. *w++=c;
  242. t = newtree();
  243. switch(c){
  244. case '|':
  245. t->type = PIPE;
  246. t->fd0 = 1;
  247. t->fd1 = 0;
  248. break;
  249. case '>':
  250. t->type = REDIR;
  251. if(nextis(c)){
  252. t->rtype = APPEND;
  253. *w++=c;
  254. }
  255. else t->rtype = WRITE;
  256. t->fd0 = 1;
  257. break;
  258. case '<':
  259. t->type = REDIR;
  260. if(nextis(c)){
  261. t->rtype = HERE;
  262. *w++=c;
  263. } else if (nextis('>')){
  264. t->rtype = RDWR;
  265. *w++=c;
  266. } else t->rtype = READ;
  267. t->fd0 = 0;
  268. break;
  269. }
  270. if(nextis('[')){
  271. *w++='[';
  272. c = advance();
  273. *w++=c;
  274. if(c<'0' || '9'<c){
  275. RedirErr:
  276. *w = 0;
  277. yyerror(t->type==PIPE?"pipe syntax"
  278. :"redirection syntax");
  279. return EOF;
  280. }
  281. t->fd0 = 0;
  282. do{
  283. t->fd0 = t->fd0*10+c-'0';
  284. *w++=c;
  285. c = advance();
  286. }while('0'<=c && c<='9');
  287. if(c=='='){
  288. *w++='=';
  289. if(t->type==REDIR)
  290. t->type = DUP;
  291. c = advance();
  292. if('0'<=c && c<='9'){
  293. t->rtype = DUPFD;
  294. t->fd1 = t->fd0;
  295. t->fd0 = 0;
  296. do{
  297. t->fd0 = t->fd0*10+c-'0';
  298. *w++=c;
  299. c = advance();
  300. }while('0'<=c && c<='9');
  301. }
  302. else{
  303. if(t->type==PIPE)
  304. goto RedirErr;
  305. t->rtype = CLOSE;
  306. }
  307. }
  308. if(c!=']'
  309. || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
  310. goto RedirErr;
  311. *w++=']';
  312. }
  313. *w='\0';
  314. yylval.tree = t;
  315. if(t->type==PIPE)
  316. skipnl();
  317. return t->type;
  318. case '\'':
  319. lastdol = 0;
  320. lastword = 1;
  321. inquote = 1;
  322. for(;;){
  323. c = advance();
  324. if(c==EOF)
  325. break;
  326. if(c=='\''){
  327. if(nextc()!='\'')
  328. break;
  329. advance();
  330. }
  331. w = addutf(w, c);
  332. }
  333. if(w!=0)
  334. *w='\0';
  335. t = token(tok, WORD);
  336. t->quoted = 1;
  337. yylval.tree = t;
  338. return t->type;
  339. }
  340. if(!wordchr(c)){
  341. lastdol = 0;
  342. tok[0] = c;
  343. tok[1]='\0';
  344. return c;
  345. }
  346. for(;;){
  347. /* next line should have (char)c==GLOB, but ken's compiler is broken */
  348. if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
  349. w = addtok(w, GLOB);
  350. w = addutf(w, c);
  351. c = nextc();
  352. if(lastdol?!idchr(c):!wordchr(c)) break;
  353. advance();
  354. }
  355. lastword = 1;
  356. lastdol = 0;
  357. if(w!=0)
  358. *w='\0';
  359. t = klook(tok);
  360. if(t->type!=WORD)
  361. lastword = 0;
  362. t->quoted = 0;
  363. yylval.tree = t;
  364. return t->type;
  365. }