lex.c 63 KB


  1. /*++
  2. Copyright (c) 2013 Minoca Corp. All Rights Reserved
  3. Module Name:
  4. lex.c
  5. Abstract:
  6. This module implements the lexical tokenizer for the shell.
  7. Author:
  8. Evan Green 5-Jun-2013
  9. Environment:
  10. User Mode
  11. --*/
  12. //
  13. // ------------------------------------------------------------------- Includes
  14. //
  15. #include <assert.h>
  16. #include <ctype.h>
  17. #include <errno.h>
  18. #include <stdarg.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include <unistd.h>
  22. #include "sh.h"
  23. #include "shparse.h"
  24. #include "../swlib.h"
  25. //
  26. // --------------------------------------------------------------------- Macros
  27. //
  28. //
  29. // This macro puts a character back into the input stream.
  30. //
  31. #define SHELL_LEXER_UNPUT(_Shell, _Character) \
  32. if ((_Character) != EOF) { \
  33. \
  34. assert((_Shell)->Lexer.UnputCharacterValid == FALSE); \
  35. \
  36. (_Shell)->Lexer.UnputCharacter = (_Character); \
  37. (_Shell)->Lexer.UnputCharacterValid = TRUE; \
  38. if ((_Character) == '\n') { \
  39. (_Shell)->Lexer.LineNumber -= 1; \
  40. } \
  41. }
  42. //
  43. // ---------------------------------------------------------------- Definitions
  44. //
  45. //
  46. // ------------------------------------------------------ Data Type Definitions
  47. //
  48. typedef enum _EXPANSION_SYNTAX {
  49. ExpansionSyntaxInvalid,
  50. ExpansionSyntaxName,
  51. ExpansionSyntaxBackquote,
  52. ExpansionSyntaxCurlyBrace,
  53. ExpansionSyntaxParentheses,
  54. ExpansionSyntaxDoubleParentheses
  55. } EXPANSION_SYNTAX, *PEXPANSION_SYNTAX;
  56. //
  57. // ----------------------------------------------- Internal Function Prototypes
  58. //
  59. BOOL
  60. ShGetInputCharacter (
  61. PSHELL Shell,
  62. PINT Character
  63. );
  64. BOOL
  65. ShGetAnyInputCharacter (
  66. PSHELL Shell,
  67. PINT Character
  68. );
  69. BOOL
  70. ShAddCharacterToTokenBuffer (
  71. PSHELL Shell,
  72. CHAR Character
  73. );
  74. BOOL
  75. ShScanExpansion (
  76. PSHELL Shell,
  77. INT Character
  78. );
  79. VOID
  80. ShCheckForReservedWord (
  81. PSHELL Shell
  82. );
  83. BOOL
  84. ShScanPendingHereDocuments (
  85. PSHELL Shell
  86. );
  87. BOOL
  88. ShScanHereDocument (
  89. PSHELL Shell,
  90. PSHELL_HERE_DOCUMENT HereDocument
  91. );
  92. VOID
  93. ShLexerError (
  94. PSHELL Shell,
  95. PSTR Format,
  96. ...
  97. );
  98. //
  99. // -------------------------------------------------------------------- Globals
  100. //
  101. //
  102. // Set this to TRUE to have the lexer print out each token it grabs.
  103. //
  104. BOOL ShDebugLexer = FALSE;
  105. //
  106. // Define the characters that must be explictly escaped when inside double
  107. // quotes. This also applies for single quotes, but with the addition of a
  108. // backslash. This is null terminated so it is a legitimate string.
  109. //
  110. CHAR ShQuoteEscapeCharacters[] = {
  111. '!',
  112. '*',
  113. '?',
  114. '[',
  115. '=',
  116. '~',
  117. ':',
  118. '/',
  119. '-',
  120. ']',
  121. SHELL_CONTROL_QUOTE,
  122. SHELL_CONTROL_ESCAPE,
  123. '\0',
  124. };
  125. //
  126. // Define the names of all the tokens.
  127. //
  128. PSTR ShTokenStrings[] = {
  129. "WORD",
  130. "ASSIGNMENT_WORD",
  131. "NAME",
  132. "IO_NUMBER",
  133. "DOUBLE_AND",
  134. "DOUBLE_OR",
  135. "DOUBLE_SEMICOLON",
  136. "DOUBLE_LESS_THAN",
  137. "DOUBLE_GREATER_THAN",
  138. "LESS_THAN_AND",
  139. "GREATER_THAN_AND",
  140. "LESS_THAN_GREATER_THAN",
  141. "DOUBLE_LESS_THAN_DASH",
  142. "CLOBBER",
  143. "IF",
  144. "THEN",
  145. "ELSE",
  146. "ELIF",
  147. "FI",
  148. "DO",
  149. "DONE",
  150. "CASE",
  151. "ESAC",
  152. "WHILE",
  153. "UNTIL",
  154. "FOR",
  155. "TOKEN_IN",
  156. };
  157. //
  158. // ------------------------------------------------------------------ Functions
  159. //
  160. BOOL
  161. ShInitializeLexer (
  162. PSHELL_LEXER_STATE Lexer,
  163. FILE *InputFile,
  164. PSTR InputBuffer,
  165. UINTN InputBufferSize
  166. )
  167. /*++
  168. Routine Description:
  169. This routine initializes the shell lexer state.
  170. Arguments:
  171. Lexer - Supplies a pointer to the lexer state.
  172. InputFile - Supplies an optional pointer to the input file.
  173. InputBuffer - Supplies an optional pointer to the input buffer to use. If
  174. no buffer is provided one will be created, otherwise the provided one
  175. will be copied.
  176. InputBufferSize - Supplies the size of the provided input buffer in bytes
  177. including the null terminator.
  178. Return Value:
  179. TRUE on success.
  180. FALSE on failure.
  181. --*/
  182. {
  183. BOOL Result;
  184. Result = FALSE;
  185. memset(Lexer, 0, sizeof(SHELL_LEXER_STATE));
  186. Lexer->TokenType = -1;
  187. Lexer->InputFile = InputFile;
  188. Lexer->LineNumber = 1;
  189. INITIALIZE_LIST_HEAD(&(Lexer->HereDocumentList));
  190. if (InputBuffer != NULL) {
  191. Lexer->InputBuffer = malloc(InputBufferSize);
  192. if (Lexer->InputBuffer == NULL) {
  193. goto InitializeLexerEnd;
  194. }
  195. memcpy(Lexer->InputBuffer, InputBuffer, InputBufferSize);
  196. Lexer->InputBufferSize = InputBufferSize;
  197. Lexer->InputBufferCapacity = InputBufferSize;
  198. } else {
  199. Lexer->InputBuffer = malloc(DEFAULT_INPUT_BUFFER_SIZE);
  200. if (Lexer->InputBuffer == NULL) {
  201. goto InitializeLexerEnd;
  202. }
  203. Lexer->InputBufferCapacity = DEFAULT_INPUT_BUFFER_SIZE;
  204. }
  205. Lexer->TokenBuffer = malloc(DEFAULT_TOKEN_BUFFER_SIZE);
  206. if (Lexer->TokenBuffer == NULL) {
  207. goto InitializeLexerEnd;
  208. }
  209. Lexer->TokenBufferCapacity = DEFAULT_TOKEN_BUFFER_SIZE;
  210. Result = TRUE;
  211. InitializeLexerEnd:
  212. if (Result == FALSE) {
  213. if (Lexer->InputBuffer != NULL) {
  214. free(Lexer->InputBuffer);
  215. }
  216. if (Lexer->TokenBuffer != NULL) {
  217. free(Lexer->TokenBuffer);
  218. }
  219. }
  220. return Result;
  221. }
  222. VOID
  223. ShDestroyLexer (
  224. PSHELL_LEXER_STATE Lexer
  225. )
  226. /*++
  227. Routine Description:
  228. This routine tears down the shell lexer state.
  229. Arguments:
  230. Lexer - Supplies a pointer to the lexer state.
  231. Return Value:
  232. None.
  233. --*/
  234. {
  235. if (Lexer->InputBuffer != NULL) {
  236. free(Lexer->InputBuffer);
  237. Lexer->InputBuffer = NULL;
  238. }
  239. if (Lexer->TokenBuffer != NULL) {
  240. free(Lexer->TokenBuffer);
  241. Lexer->TokenBuffer = NULL;
  242. }
  243. if (Lexer->InputFile != NULL) {
  244. if (Lexer->InputFile != stdin) {
  245. fclose(Lexer->InputFile);
  246. }
  247. Lexer->InputFile = NULL;
  248. }
  249. return;
  250. }
  251. BOOL
  252. ShGetToken (
  253. PSHELL Shell,
  254. BOOL FirstCommandToken
  255. )
  256. /*++
  257. Routine Description:
  258. This routine fetches the next token out of the shell input.
  259. Arguments:
  260. Shell - Supplies a pointer to the shell to read from.
  261. FirstCommandToken - Supplies a boolean indicating if this token could be
  262. the first word in a command, in which case alias substitution will be
  263. enabled.
  264. Return Value:
  265. TRUE on success. The next token will be written into the shell structure.
  266. FALSE on failure.
  267. --*/
  268. {
  269. BOOL AddCharacter;
  270. INT Character;
  271. UINTN CharacterIndex;
  272. BOOL Delimit;
  273. BOOL InComment;
  274. BOOL IsOperator;
  275. PSHELL_LEXER_STATE Lexer;
  276. CHAR Quote;
  277. ULONG QuoteLineNumber;
  278. BOOL Result;
  279. BOOL UnputCharacter;
  280. Delimit = FALSE;
  281. InComment = FALSE;
  282. IsOperator = FALSE;
  283. Lexer = &(Shell->Lexer);
  284. Lexer->TokenType = -1;
  285. Lexer->TokenBufferSize = 0;
  286. Lexer->LastAlias = NULL;
  287. Quote = 0;
  288. QuoteLineNumber = 0;
  289. while (TRUE) {
  290. AddCharacter = TRUE;
  291. UnputCharacter = FALSE;
  292. Result = ShGetInputCharacter(Shell, &Character);
  293. if (Result == FALSE) {
  294. goto GetTokenEnd;
  295. }
  296. //
  297. // If inside a quote of some kind, scan according to those rules.
  298. // Single quotes are only ended by another single quote. Double quotes
  299. // are ended by an unescaped double quote.
  300. //
  301. if ((Quote != 0) && (Character != '\\')) {
  302. assert((Quote == '"') || (Quote == '\''));
  303. //
  304. // Watch out for unterminated quotes.
  305. //
  306. if (Character == EOF) {
  307. ShLexerError(Shell,
  308. "Unterminated string starting at line %d.\n",
  309. QuoteLineNumber);
  310. Result = FALSE;
  311. goto GetTokenEnd;
  312. }
  313. //
  314. // Escape the magic characters when in quotes to make them unmagic.
  315. //
  316. if ((strchr(ShQuoteEscapeCharacters, Character) != NULL) ||
  317. ((Quote == '\'') &&
  318. ((Character == '$') || (Character == '`')))) {
  319. Result = ShAddCharacterToTokenBuffer(Shell,
  320. SHELL_CONTROL_ESCAPE);
  321. if (Result == FALSE) {
  322. goto GetTokenEnd;
  323. }
  324. }
  325. if (Quote == '\'') {
  326. if (Character == '\'') {
  327. Quote = 0;
  328. Character = SHELL_CONTROL_QUOTE;
  329. }
  330. } else if (Quote == '"') {
  331. if (Character == '"') {
  332. Quote = 0;
  333. Character = SHELL_CONTROL_QUOTE;
  334. } else if ((Character == '`') || (Character == '$')) {
  335. Result = ShScanExpansion(Shell, Character);
  336. if (Result == FALSE) {
  337. goto GetTokenEnd;
  338. }
  339. AddCharacter = FALSE;
  340. }
  341. }
  342. //
  343. // If inside a comment, wait for a newline. When the newline comes,
  344. // put it back so it gets the full newline treatment on the next pass.
  345. //
  346. } else if (InComment != FALSE) {
  347. AddCharacter = FALSE;
  348. if (Character == '\n') {
  349. UnputCharacter = TRUE;
  350. InComment = FALSE;
  351. } else if (Character == EOF) {
  352. InComment = FALSE;
  353. }
  354. //
  355. // If the end of the input is found, delimit the current token, or
  356. // return it by itself.
  357. //
  358. } else if (Character == EOF) {
  359. Delimit = TRUE;
  360. if (Lexer->TokenBufferSize != 0) {
  361. AddCharacter = FALSE;
  362. if (Character != EOF) {
  363. UnputCharacter = TRUE;
  364. }
  365. } else {
  366. Lexer->TokenType = TOKEN_END_OF_FILE;
  367. }
  368. //
  369. // If the previous character was an operator and this one can glom on,
  370. // then do it.
  371. //
  372. } else if (IsOperator != FALSE) {
  373. assert(Lexer->TokenBufferSize != 0);
  374. IsOperator = FALSE;
  375. Delimit = TRUE;
  376. //
  377. // This is the second byte, so look at the first.
  378. //
  379. if (Lexer->TokenBufferSize == 1) {
  380. switch (Lexer->TokenBuffer[0]) {
  381. //
  382. // Allow <<, <&, <>, and <<-.
  383. //
  384. case '<':
  385. if (Character == '&') {
  386. Lexer->TokenType = TOKEN_LESS_THAN_AND;
  387. } else if (Character == '<') {
  388. Delimit = FALSE;
  389. IsOperator = TRUE;
  390. } else if (Character == '>') {
  391. Lexer->TokenType = TOKEN_LESS_THAN_GREATER_THAN;
  392. } else {
  393. AddCharacter = FALSE;
  394. UnputCharacter = TRUE;
  395. }
  396. break;
  397. //
  398. // Allow >>, >&, and >|.
  399. //
  400. case '>':
  401. if (Character == '&') {
  402. Lexer->TokenType = TOKEN_GREATER_THAN_AND;
  403. } else if (Character == '|') {
  404. Lexer->TokenType = TOKEN_CLOBBER;
  405. } else if (Character == '>') {
  406. Lexer->TokenType = TOKEN_DOUBLE_GREATER_THAN;
  407. } else {
  408. AddCharacter = FALSE;
  409. UnputCharacter = TRUE;
  410. }
  411. break;
  412. //
  413. // Allow for ;;.
  414. //
  415. case ';':
  416. if (Character == ';') {
  417. Lexer->TokenType = TOKEN_DOUBLE_SEMICOLON;
  418. } else {
  419. AddCharacter = FALSE;
  420. UnputCharacter = TRUE;
  421. }
  422. break;
  423. //
  424. // Allow for &&.
  425. //
  426. case '&':
  427. if (Character == '&') {
  428. Lexer->TokenType = TOKEN_DOUBLE_AND;
  429. } else {
  430. AddCharacter = FALSE;
  431. UnputCharacter = TRUE;
  432. }
  433. break;
  434. //
  435. // Allow for ||.
  436. //
  437. case '|':
  438. if (Character == '|') {
  439. Lexer->TokenType = TOKEN_DOUBLE_OR;
  440. } else {
  441. AddCharacter = FALSE;
  442. UnputCharacter = TRUE;
  443. }
  444. break;
  445. default:
  446. assert(FALSE);
  447. Result = FALSE;
  448. goto GetTokenEnd;
  449. }
  450. //
  451. // The only three character operator is <<-.
  452. //
  453. } else {
  454. assert(Lexer->TokenBufferSize == 2);
  455. assert((Lexer->TokenBuffer[0] == '<') &&
  456. (Lexer->TokenBuffer[1] == '<'));
  457. if (Character == '-') {
  458. Lexer->TokenType = TOKEN_DOUBLE_LESS_THAN_DASH;
  459. } else {
  460. Lexer->TokenType = TOKEN_DOUBLE_LESS_THAN;
  461. AddCharacter = FALSE;
  462. UnputCharacter = TRUE;
  463. }
  464. }
  465. //
  466. // Watch out for the beginning of a quoted section.
  467. //
  468. } else if ((Character == '\'') || (Character == '"')) {
  469. Quote = Character;
  470. Character = SHELL_CONTROL_QUOTE;
  471. QuoteLineNumber = Lexer->LineNumber;
  472. Lexer->TokenType = TOKEN_WORD;
  473. //
  474. // If it's a backslash, escape the next character, or prepare a line
  475. // continuation. This logic is entered even if inside a quoted region.
  476. //
  477. } else if (Character == '\\') {
  478. if (Quote == '\'') {
  479. //
  480. // In single quotes, the backslash is escaped and literal.
  481. //
  482. Result = ShAddCharacterToTokenBuffer(Shell,
  483. SHELL_CONTROL_ESCAPE);
  484. if (Result == FALSE) {
  485. goto GetTokenEnd;
  486. }
  487. //
  488. // Not in single quotes, so look at the next character.
  489. //
  490. } else {
  491. Result = ShGetInputCharacter(Shell, &Character);
  492. if (Result == FALSE) {
  493. goto GetTokenEnd;
  494. }
  495. if (Character == EOF) {
  496. Character = '\\';
  497. //
  498. // If it's a newline, then it's a line continuation, so just
  499. // swallow the backslash and add the newline as a normal
  500. // character (ie do nothing).
  501. //
  502. } else if (Character == '\n') {
  503. if (Quote == 0) {
  504. ShPrintPrompt(Shell, 2);
  505. }
  506. AddCharacter = FALSE;
  507. } else {
  508. //
  509. // If inside double quotes and the backslash isn't quoting
  510. // anything, then add it as a literal.
  511. //
  512. if ((Quote == '"') && (Character != '\\') &&
  513. (Character != '`') && (Character != '$') &&
  514. (Character != '"')) {
  515. Result = ShAddCharacterToTokenBuffer(Shell, '\\');
  516. if (Result == FALSE) {
  517. goto GetTokenEnd;
  518. }
  519. }
  520. //
  521. // Escape the next character, whatever it may be.
  522. //
  523. Result = ShAddCharacterToTokenBuffer(Shell,
  524. SHELL_CONTROL_ESCAPE);
  525. if (Result == FALSE) {
  526. goto GetTokenEnd;
  527. }
  528. }
  529. if ((Character != '\n') && (Lexer->TokenType == -1)) {
  530. Lexer->TokenType = TOKEN_WORD;
  531. }
  532. }
  533. //
  534. // If it's an unquoted dollar sign or backquote, scan past the
  535. // following expansion. The expansion does not delimit the token.
  536. //
  537. } else if ((Character == '$') || (Character == '`')) {
  538. Lexer->TokenType = TOKEN_WORD;
  539. Result = ShScanExpansion(Shell, Character);
  540. if (Result == FALSE) {
  541. goto GetTokenEnd;
  542. }
  543. AddCharacter = FALSE;
  544. //
  545. // Check for a new operator. Lump newlines in here too since their
  546. // processing is about the same. Notice that bang and the braces aren't
  547. // in here, as they're recognized at the token level rather than the
  548. // lexical level.
  549. //
  550. } else if ((Character == '&') || (Character == '|') ||
  551. (Character == ';') || (Character == '<') ||
  552. (Character == '>') || (Character == ')') ||
  553. (Character == '(') || (Character == '\n')) {
  554. //
  555. // If there was a previous token, delimit it now.
  556. //
  557. if (Lexer->TokenBufferSize != 0) {
  558. Delimit = TRUE;
  559. AddCharacter = FALSE;
  560. UnputCharacter = TRUE;
  561. //
  562. // If this is a redirection symbol and everything in the token
  563. // is a digit, then this is an I/O number token.
  564. //
  565. if ((Lexer->TokenType == TOKEN_WORD) &&
  566. ((Character == '>') || (Character == '<'))) {
  567. for (CharacterIndex = 0;
  568. CharacterIndex < Lexer->TokenBufferSize;
  569. CharacterIndex += 1) {
  570. if ((Lexer->TokenBuffer[CharacterIndex] < '0') ||
  571. (Lexer->TokenBuffer[CharacterIndex] > '9')) {
  572. break;
  573. }
  574. }
  575. if (CharacterIndex == Lexer->TokenBufferSize) {
  576. Lexer->TokenType = TOKEN_IO_NUMBER;
  577. }
  578. }
  579. //
  580. // The token buffer is empty, this operator is up. If there's a
  581. // possibility that it's a multi-character operator, then don't
  582. // delimit right away.
  583. //
  584. } else {
  585. Lexer->TokenType = Character;
  586. if ((Character == '>') || (Character == '<') ||
  587. (Character == '&') || (Character == '|') ||
  588. (Character == ';')) {
  589. IsOperator = TRUE;
  590. } else {
  591. Delimit = TRUE;
  592. //
  593. // If this is a newline, parse out any pending here
  594. // documents.
  595. //
  596. if (Character == '\n') {
  597. Result = ShScanPendingHereDocuments(Shell);
  598. if (Result == FALSE) {
  599. goto GetTokenEnd;
  600. }
  601. }
  602. }
  603. }
  604. //
  605. // If it's an unquoted space, any token containing the previous
  606. // character is delimited, and the blank is discarded.
  607. //
  608. } else if (isspace(Character)) {
  609. AddCharacter = FALSE;
  610. if (Lexer->TokenBufferSize != 0) {
  611. Delimit = TRUE;
  612. }
  613. //
  614. // Look out for a comment. Comments can only start if there's not
  615. // already a word in progress.
  616. //
  617. } else if ((Lexer->TokenBufferSize == 0) && (Character == '#')) {
  618. AddCharacter = FALSE;
  619. InComment = TRUE;
  620. //
  621. // It doesn't fit any other interesting case, so it's just a word.
  622. //
  623. } else {
  624. if (Lexer->TokenType == -1) {
  625. Lexer->TokenType = TOKEN_WORD;
  626. }
  627. //
  628. // If it's a control character, escape it.
  629. //
  630. if ((Character == SHELL_CONTROL_QUOTE) ||
  631. (Character == SHELL_CONTROL_ESCAPE)) {
  632. Result = ShAddCharacterToTokenBuffer(Shell,
  633. SHELL_CONTROL_ESCAPE);
  634. if (Result == FALSE) {
  635. goto GetTokenEnd;
  636. }
  637. }
  638. }
  639. if ((Quote != 0) && (Character == '\n')) {
  640. ShPrintPrompt(Shell, 2);
  641. }
  642. //
  643. // Add the character if desired.
  644. //
  645. if (AddCharacter != FALSE) {
  646. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  647. if (Result == FALSE) {
  648. goto GetTokenEnd;
  649. }
  650. }
  651. if (UnputCharacter != FALSE) {
  652. assert(AddCharacter == FALSE);
  653. SHELL_LEXER_UNPUT(Shell, Character);
  654. }
  655. //
  656. // If the token is over, null terminate it, put back this character,
  657. // and break out.
  658. //
  659. if (Delimit != FALSE) {
  660. Delimit = FALSE;
  661. Result = ShAddCharacterToTokenBuffer(Shell, '\0');
  662. if (Result == FALSE) {
  663. goto GetTokenEnd;
  664. }
  665. if (Lexer->TokenType == TOKEN_WORD) {
  666. ShCheckForReservedWord(Shell);
  667. }
  668. //
  669. // If it's still just a word but has an equals in it, it's an
  670. // assignment word. It could also be a ! } or { if it's just that
  671. // character.
  672. //
  673. if (Lexer->TokenType == TOKEN_WORD) {
  674. if (strchr(Lexer->TokenBuffer, '=') != NULL) {
  675. Lexer->TokenType = TOKEN_ASSIGNMENT_WORD;
  676. } else if (Lexer->TokenBufferSize == 2) {
  677. if (Lexer->TokenBuffer[0] == '!') {
  678. Lexer->TokenType = '!';
  679. } else if (Lexer->TokenBuffer[0] == '{') {
  680. Lexer->TokenType = '{';
  681. } else if (Lexer->TokenBuffer[0] == '}') {
  682. Lexer->TokenType = '}';
  683. }
  684. }
  685. }
  686. //
  687. // If even after all that it's still a word and it's the first
  688. // word of the command, perform alias substitution.
  689. //
  690. if ((FirstCommandToken != FALSE) &&
  691. (Lexer->TokenType == TOKEN_WORD)) {
  692. Result = ShPerformAliasSubstitution(Shell);
  693. if (Result == FALSE) {
  694. goto GetTokenEnd;
  695. }
  696. } else {
  697. assert(Shell->Lexer.TokenType != -1);
  698. }
  699. //
  700. // If alias substitution didn't kill this token, then break out
  701. // and return it.
  702. //
  703. if (Shell->Lexer.TokenType != -1) {
  704. break;
  705. } else {
  706. Character = 0;
  707. }
  708. }
  709. }
  710. GetTokenEnd:
  711. assert((Result == FALSE) || (Lexer->TokenType != -1));
  712. if (ShDebugLexer != FALSE) {
  713. if (Result != FALSE) {
  714. if (Lexer->TokenType == TOKEN_END_OF_FILE) {
  715. ShPrintTrace(Shell, "Reached end of file.\n");
  716. } else if (Lexer->TokenType < 0xFF) {
  717. if (Lexer->TokenType < 0x20) {
  718. if (Lexer->TokenType == '\n') {
  719. ShPrintTrace(Shell,
  720. "%20s: Line %d\n",
  721. "<newline>",
  722. Lexer->LineNumber);
  723. } else {
  724. ShPrintTrace(Shell, "%20d: \n", Lexer->TokenType);
  725. }
  726. } else {
  727. ShPrintTrace(Shell,
  728. "%20c: %s\n",
  729. Lexer->TokenType,
  730. Lexer->TokenBuffer);
  731. }
  732. } else {
  733. assert(Lexer->TokenType >= TOKEN_WORD);
  734. ShPrintTrace(Shell,
  735. "%20s: %s\n",
  736. ShTokenStrings[Lexer->TokenType - TOKEN_WORD],
  737. Lexer->TokenBuffer);
  738. }
  739. } else {
  740. ShPrintTrace(Shell,
  741. "Error: Failed to parse token at line %d.\n",
  742. Lexer->LineNumber);
  743. }
  744. }
  745. return Result;
  746. }
  747. BOOL
  748. ShScanPastExpansion (
  749. PSTR String,
  750. UINTN StringSize,
  751. PUINTN ExpansionSize
  752. )
  753. /*++
  754. Routine Description:
  755. This routine is called to find the end of an expansion.
  756. Arguments:
  757. String - Supplies a pointer to the string at an expansion to scan.
  758. StringSize - Supplies the number of bytes in the string.
  759. ExpansionSize - Supplies a pointer where the size of the expansion in
  760. bytes will be returned.
  761. Return Value:
  762. TRUE on success. The extent of the expansion will be added to the token
  763. buffer.
  764. FALSE on failure.
  765. --*/
  766. {
  767. CHAR Character;
  768. UINTN Index;
  769. UINTN InnerExpansionSize;
  770. UINTN OpenCount;
  771. CHAR Quote;
  772. BOOL RecognizeComments;
  773. BOOL RecognizeQuotes;
  774. BOOL Result;
  775. EXPANSION_SYNTAX Syntax;
  776. BOOL WasBackslash;
  777. BOOL WasName;
  778. BOOL WasParentheses;
  779. Index = 1;
  780. OpenCount = 0;
  781. //
  782. // Figure out what type of expansion this is.
  783. //
  784. assert(StringSize != 0);
  785. assert((*String == '$') || (*String == '`') || (*String == '~'));
  786. RecognizeComments = TRUE;
  787. RecognizeQuotes = TRUE;
  788. if (*String == '`') {
  789. Syntax = ExpansionSyntaxBackquote;
  790. RecognizeComments = FALSE;
  791. RecognizeQuotes = FALSE;
  792. } else if (*String == '~') {
  793. Syntax = ExpansionSyntaxName;
  794. } else {
  795. assert(*String == '$');
  796. Character = String[Index];
  797. //
  798. // If it was a digit or a special parameter, then that's all there is
  799. // to it.
  800. //
  801. if (((Character >= '0') && (Character <= '9')) ||
  802. (Character == '@') || (Character == '*') || (Character == '#') ||
  803. (Character == '?') || (Character == '-') || (Character == '$') ||
  804. (Character == '!')) {
  805. *ExpansionSize = Index + 1;
  806. return TRUE;
  807. }
  808. //
  809. // It shouldn't be the end of file.
  810. //
  811. if (Character == '\0') {
  812. *ExpansionSize = Index;
  813. return TRUE;
  814. //
  815. // Note if it's a single curly.
  816. //
  817. } else if (Character == '{') {
  818. Syntax = ExpansionSyntaxCurlyBrace;
  819. RecognizeComments = FALSE;
  820. //
  821. // Note if it's a single parentheses. It could also be a double
  822. // parentheses.
  823. //
  824. } else if (Character == '(') {
  825. Syntax = ExpansionSyntaxParentheses;
  826. Index += 1;
  827. if (Index == StringSize) {
  828. return FALSE;
  829. }
  830. Character = String[Index];
  831. if (Character == '\0') {
  832. return FALSE;
  833. } else if (Character == '(') {
  834. Syntax = ExpansionSyntaxDoubleParentheses;
  835. Index += 1;
  836. }
  837. //
  838. // The only other option is it's a raw name.
  839. //
  840. } else if (SHELL_NAME_FIRST_CHARACTER(Character) != FALSE) {
  841. Syntax = ExpansionSyntaxName;
  842. //
  843. // Something funky is following the dollar sign.
  844. //
  845. } else {
  846. *ExpansionSize = 0;
  847. return TRUE;
  848. }
  849. }
  850. if (Syntax == ExpansionSyntaxName) {
  851. RecognizeComments = FALSE;
  852. RecognizeQuotes = FALSE;
  853. }
  854. //
  855. // Loop looking at characters until the parameter is finished.
  856. //
  857. Quote = 0;
  858. WasBackslash = FALSE;
  859. WasParentheses = FALSE;
  860. WasName = FALSE;
  861. while (TRUE) {
  862. Character = String[Index];
  863. //
  864. // If quoting is in progress, look for the end.
  865. //
  866. if (Quote != 0) {
  867. if ((Quote == '\'') || (Quote == SHELL_CONTROL_QUOTE)) {
  868. if (Character == Quote) {
  869. Quote = 0;
  870. }
  871. } else if (Quote == '"') {
  872. if ((WasBackslash == FALSE) && (Character == '"')) {
  873. Quote = 0;
  874. }
  875. } else if (Quote == '#') {
  876. if (Character == '\n') {
  877. Quote = 0;
  878. }
  879. } else {
  880. assert((Quote == '\\') || (Quote == SHELL_CONTROL_ESCAPE));
  881. Quote = 0;
  882. }
  883. //
  884. // If eligible for quotes, look for quotes starting.
  885. //
  886. } else if ((RecognizeQuotes != FALSE) &&
  887. ((Character == '\'') || (Character == '"') ||
  888. (Character == '\\') ||
  889. (Character == SHELL_CONTROL_QUOTE) ||
  890. (Character == SHELL_CONTROL_ESCAPE))) {
  891. Quote = Character;
  892. //
  893. // If eligible for comments, look for comments starting.
  894. //
  895. } else if ((RecognizeComments != FALSE) && (Character == '#') &&
  896. (WasName == FALSE)) {
  897. Quote = Character;
  898. //
  899. // No quotes or comments, look for the end expansion character.
  900. //
  901. } else {
  902. switch (Syntax) {
  903. case ExpansionSyntaxName:
  904. if (!SHELL_NAME_CHARACTER(Character)) {
  905. *ExpansionSize = Index;
  906. return TRUE;
  907. }
  908. break;
  909. case ExpansionSyntaxBackquote:
  910. if ((Character == '`') && (WasBackslash == FALSE)) {
  911. *ExpansionSize = Index + 1;
  912. return TRUE;
  913. }
  914. break;
  915. case ExpansionSyntaxCurlyBrace:
  916. if (Character == '}') {
  917. *ExpansionSize = Index + 1;
  918. return TRUE;
  919. }
  920. break;
  921. case ExpansionSyntaxParentheses:
  922. if (Character == '(') {
  923. OpenCount += 1;
  924. } else if (Character == ')') {
  925. if (OpenCount == 0) {
  926. *ExpansionSize = Index + 1;
  927. return TRUE;
  928. } else {
  929. OpenCount -= 1;
  930. }
  931. }
  932. break;
  933. case ExpansionSyntaxDoubleParentheses:
  934. if (Character == ')') {
  935. if (OpenCount != 0) {
  936. OpenCount -= 1;
  937. } else {
  938. if (WasParentheses != FALSE) {
  939. *ExpansionSize = Index + 1;
  940. return TRUE;
  941. } else {
  942. WasParentheses = TRUE;
  943. }
  944. }
  945. } else {
  946. WasParentheses = FALSE;
  947. if (Character == '(') {
  948. OpenCount += 1;
  949. }
  950. }
  951. break;
  952. default:
  953. assert(FALSE);
  954. return FALSE;
  955. }
  956. //
  957. // Look for a new expansion beginning.
  958. //
  959. if (((Character == '$') || (Character == '`')) &&
  960. (Syntax != ExpansionSyntaxBackquote)) {
  961. Result = ShScanPastExpansion(String + Index,
  962. StringSize - Index,
  963. &InnerExpansionSize);
  964. if (Result == FALSE) {
  965. return FALSE;
  966. }
  967. if (Index == StringSize) {
  968. return FALSE;
  969. }
  970. if (InnerExpansionSize == 0) {
  971. InnerExpansionSize = 1;
  972. }
  973. Index += InnerExpansionSize;
  974. WasBackslash = FALSE;
  975. WasParentheses = FALSE;
  976. WasName = FALSE;
  977. continue;
  978. }
  979. }
  980. if (Character == '\\') {
  981. WasBackslash = !WasBackslash;
  982. } else if (Character == SHELL_CONTROL_ESCAPE) {
  983. WasBackslash = TRUE;
  984. } else {
  985. WasBackslash = FALSE;
  986. }
  987. if (SHELL_NAME_CHARACTER(Character)) {
  988. WasName = TRUE;
  989. } else {
  990. WasName = FALSE;
  991. }
  992. Index += 1;
  993. if (Index == StringSize) {
  994. return FALSE;
  995. }
  996. }
  997. return FALSE;
  998. }
  999. //
  1000. // --------------------------------------------------------- Internal Functions
  1001. //
  1002. BOOL
  1003. ShGetInputCharacter (
  1004. PSHELL Shell,
  1005. PINT Character
  1006. )
  1007. /*++
  1008. Routine Description:
  1009. This routine gets a character from the input stream.
  1010. Arguments:
  1011. Shell - Supplies a pointer to the shell to read from.
  1012. Character - Supplies a pointer where the character will be returned on
  1013. success.
  1014. Return Value:
  1015. TRUE on success.
  1016. FALSE on failure.
  1017. --*/
  1018. {
  1019. BOOL Result;
  1020. do {
  1021. Result = ShGetAnyInputCharacter(Shell, Character);
  1022. } while ((Result != FALSE) &&
  1023. ((*Character == '\r') || (*Character == '\0')));
  1024. return Result;
  1025. }
  1026. BOOL
  1027. ShGetAnyInputCharacter (
  1028. PSHELL Shell,
  1029. PINT Character
  1030. )
  1031. /*++
  1032. Routine Description:
  1033. This routine gets a character from the input stream.
  1034. Arguments:
  1035. Shell - Supplies a pointer to the shell to read from.
  1036. Character - Supplies a pointer where the character will be returned on
  1037. success.
  1038. Return Value:
  1039. TRUE on success.
  1040. FALSE on failure.
  1041. --*/
  1042. {
  1043. ssize_t BytesRead;
  1044. size_t BytesToRead;
  1045. PSHELL_LEXER_STATE Lexer;
  1046. PSTR NewInputBuffer;
  1047. ULONG NewInputBufferSize;
  1048. BOOL Result;
  1049. Lexer = &(Shell->Lexer);
  1050. if (Lexer->UnputCharacterValid != FALSE) {
  1051. *Character = Lexer->UnputCharacter;
  1052. if (*Character == '\n') {
  1053. Lexer->LineNumber += 1;
  1054. }
  1055. Lexer->UnputCharacterValid = FALSE;
  1056. return TRUE;
  1057. }
  1058. //
  1059. // If there's more in the buffer, return that.
  1060. //
  1061. if (Lexer->InputBufferNextIndex < Lexer->InputBufferSize) {
  1062. *Character = Lexer->InputBuffer[Lexer->InputBufferNextIndex];
  1063. Lexer->InputBufferNextIndex += 1;
  1064. goto GetInputCharacterEnd;
  1065. }
  1066. //
  1067. // If there is no file, donezo.
  1068. //
  1069. if (((Shell->Options & SHELL_OPTION_READ_FROM_STDIN) == 0) &&
  1070. (Lexer->InputFile == NULL)) {
  1071. *Character = EOF;
  1072. goto GetInputCharacterEnd;
  1073. }
  1074. if ((Shell->Options & SHELL_OPTION_INPUT_BUFFER_ONLY) != 0) {
  1075. *Character = EOF;
  1076. goto GetInputCharacterEnd;
  1077. }
  1078. //
  1079. // Read from the file, or do fancy line-based input for interactive shells.
  1080. //
  1081. if ((Shell->Options & SHELL_OPTION_RAW_INPUT) != 0) {
  1082. Result = ShReadLine(Shell, &NewInputBuffer, &NewInputBufferSize);
  1083. if (Result == FALSE) {
  1084. return FALSE;
  1085. }
  1086. //
  1087. // Change the null terminator into a newline.
  1088. //
  1089. if ((NewInputBufferSize != 0) &&
  1090. (NewInputBuffer[NewInputBufferSize - 1] == '\0')) {
  1091. NewInputBuffer[NewInputBufferSize - 1] = '\n';
  1092. }
  1093. if (Lexer->InputBuffer != NULL) {
  1094. free(Lexer->InputBuffer);
  1095. Lexer->InputBuffer = NewInputBuffer;
  1096. Lexer->InputBufferCapacity = NewInputBufferSize;
  1097. }
  1098. BytesRead = NewInputBufferSize;
  1099. } else {
  1100. if ((Shell->Options & SHELL_OPTION_INTERACTIVE) != 0) {
  1101. BytesToRead = 1;
  1102. } else {
  1103. BytesToRead = Lexer->InputBufferCapacity;
  1104. }
  1105. //
  1106. // Read using a file stream.
  1107. //
  1108. if (Lexer->InputFile != NULL) {
  1109. do {
  1110. BytesRead = fread(Lexer->InputBuffer,
  1111. 1,
  1112. BytesToRead,
  1113. Lexer->InputFile);
  1114. } while ((BytesRead == 0) && (errno == EINTR));
  1115. if (BytesRead <= 0) {
  1116. if (feof(Lexer->InputFile) != 0) {
  1117. *Character = EOF;
  1118. goto GetInputCharacterEnd;
  1119. }
  1120. return FALSE;
  1121. }
  1122. //
  1123. // If reading from standard in, read directly from the descriptor.
  1124. //
  1125. } else {
  1126. assert((Shell->Options & SHELL_OPTION_READ_FROM_STDIN) != 0);
  1127. do {
  1128. BytesRead = read(STDIN_FILENO, Lexer->InputBuffer, BytesToRead);
  1129. } while ((BytesRead < 0) && (errno == EINTR));
  1130. if (BytesRead <= 0) {
  1131. if (BytesRead == 0) {
  1132. *Character = EOF;
  1133. goto GetInputCharacterEnd;
  1134. }
  1135. return FALSE;
  1136. }
  1137. }
  1138. }
  1139. Lexer->InputBufferSize = BytesRead;
  1140. *Character = Lexer->InputBuffer[0];
  1141. Lexer->InputBufferNextIndex = 1;
  1142. GetInputCharacterEnd:
  1143. if ((*Character != 0) &&
  1144. ((Shell->Options & SHELL_OPTION_DISPLAY_INPUT) != 0)) {
  1145. if (*Character == EOF) {
  1146. ShPrintTrace(Shell, "<EOF>");
  1147. } else {
  1148. ShPrintTrace(Shell, "%c", *Character);
  1149. }
  1150. }
  1151. if (*Character == '\n') {
  1152. Lexer->LineNumber += 1;
  1153. }
  1154. return TRUE;
  1155. }
  1156. BOOL
  1157. ShAddCharacterToTokenBuffer (
  1158. PSHELL Shell,
  1159. CHAR Character
  1160. )
  1161. /*++
  1162. Routine Description:
  1163. This routine adds the given character to the token buffer, expanding it if
  1164. necessary.
  1165. Arguments:
  1166. Shell - Supplies a pointer to the shell to operate on.
  1167. Character - Supplies the character to add.
  1168. Return Value:
  1169. TRUE on success.
  1170. FALSE on failure.
  1171. --*/
  1172. {
  1173. PSHELL_LEXER_STATE Lexer;
  1174. UINTN NewCapacity;
  1175. Lexer = &(Shell->Lexer);
  1176. if (Lexer->TokenBufferSize < Lexer->TokenBufferCapacity) {
  1177. Lexer->TokenBuffer[Lexer->TokenBufferSize] = Character;
  1178. Lexer->TokenBufferSize += 1;
  1179. return TRUE;
  1180. }
  1181. //
  1182. // Bummer, the buffer needs to be reallocated.
  1183. //
  1184. NewCapacity = Lexer->TokenBufferCapacity * 2;
  1185. Lexer->TokenBuffer = realloc(Lexer->TokenBuffer, NewCapacity);
  1186. if (Lexer->TokenBuffer == NULL) {
  1187. printf("Error: Failed to allocate %d bytes for expanded token "
  1188. "buffer.\n");
  1189. return FALSE;
  1190. }
  1191. //
  1192. // Now add the byte.
  1193. //
  1194. Lexer->TokenBufferCapacity = NewCapacity;
  1195. Lexer->TokenBuffer[Lexer->TokenBufferSize] = Character;
  1196. Lexer->TokenBufferSize += 1;
  1197. return TRUE;
  1198. }
  1199. BOOL
  1200. ShScanExpansion (
  1201. PSHELL Shell,
  1202. INT Character
  1203. )
  1204. /*++
  1205. Routine Description:
  1206. This routine is called when the lexer finds a dollar sign. It recursively
  1207. scans the inside of an expansion such as `...`, $param, ${...}, $(...), and
  1208. $((...)).
  1209. Arguments:
  1210. Shell - Supplies a pointer to the shell to read from.
  1211. Character - Supplies the initial character that caused entry into this
  1212. function. It is assumed that this character has not yet been added to
  1213. the token buffer.
  1214. Return Value:
  1215. TRUE on success. The extent of the expansion will be added to the token
  1216. buffer.
  1217. FALSE on failure.
  1218. --*/
  1219. {
  1220. BOOL AddCharacter;
  1221. BOOL InComment;
  1222. BOOL InWord;
  1223. CHAR LastCharacter;
  1224. PSHELL_LEXER_STATE Lexer;
  1225. ULONG OpenCount;
  1226. CHAR Quote;
  1227. BOOL Result;
  1228. BOOL Stop;
  1229. EXPANSION_SYNTAX Syntax;
  1230. BOOL WasParentheses;
  1231. InComment = FALSE;
  1232. Lexer = &(Shell->Lexer);
  1233. OpenCount = 0;
  1234. Quote = 0;
  1235. WasParentheses = FALSE;
  1236. //
  1237. // First add the dollar sign or backquote to the token buffer.
  1238. //
  1239. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  1240. if (Result == FALSE) {
  1241. return FALSE;
  1242. }
  1243. //
  1244. // Figure out what type of expansion this is.
  1245. //
  1246. if (Character == '`') {
  1247. Syntax = ExpansionSyntaxBackquote;
  1248. } else {
  1249. assert(Character == '$');
  1250. //
  1251. // Get the next character to learn more.
  1252. //
  1253. Result = ShGetInputCharacter(Shell, &Character);
  1254. if ((Result == FALSE) || (Character == EOF)) {
  1255. return TRUE;
  1256. }
  1257. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  1258. if (Result == FALSE) {
  1259. return FALSE;
  1260. }
  1261. //
  1262. // If it was a digit or a special parameter, then that's all there is
  1263. // to it.
  1264. //
  1265. if (((Character >= '0') && (Character <= '9')) ||
  1266. (Character == '@') || (Character == '*') || (Character == '#') ||
  1267. (Character == '?') || (Character == '-') || (Character == '$') ||
  1268. (Character == '!')) {
  1269. return TRUE;
  1270. }
  1271. //
  1272. // Note if it's a single curly.
  1273. //
  1274. if (Character == '{') {
  1275. Syntax = ExpansionSyntaxCurlyBrace;
  1276. //
  1277. // Note if it's a single parentheses. It could also be a double
  1278. // parentheses.
  1279. //
  1280. } else if (Character == '(') {
  1281. Syntax = ExpansionSyntaxParentheses;
  1282. Result = ShGetInputCharacter(Shell, &Character);
  1283. if ((Result == FALSE) || (Character == EOF)) {
  1284. return FALSE;
  1285. }
  1286. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  1287. if (Result == FALSE) {
  1288. return FALSE;
  1289. }
  1290. if (Character == '\0') {
  1291. return FALSE;
  1292. } else if (Character == '(') {
  1293. Syntax = ExpansionSyntaxDoubleParentheses;
  1294. } else {
  1295. SHELL_LEXER_UNPUT(Shell, Character);
  1296. assert(Lexer->TokenBufferSize != 0);
  1297. Lexer->TokenBufferSize -= 1;
  1298. }
  1299. //
  1300. // The only other option is it's a raw name.
  1301. //
  1302. } else if (SHELL_NAME_CHARACTER(Character) != FALSE) {
  1303. Syntax = ExpansionSyntaxName;
  1304. //
  1305. // Something funky is following the dollar sign, this isn't really
  1306. // an expansion.
  1307. //
  1308. } else {
  1309. SHELL_LEXER_UNPUT(Shell, Character);
  1310. assert(Lexer->TokenBufferSize != 0);
  1311. Lexer->TokenBufferSize -= 1;
  1312. return TRUE;
  1313. }
  1314. }
  1315. //
  1316. // Loop getting input until this expansion is over.
  1317. //
  1318. Stop = FALSE;
  1319. while (Stop == FALSE) {
  1320. AddCharacter = TRUE;
  1321. Result = ShGetInputCharacter(Shell, &Character);
  1322. if (Result == FALSE) {
  1323. return FALSE;
  1324. }
  1325. //
  1326. // If inside a quote of some kind, scan according to those rules.
  1327. // Single quotes are only ended by another single quote. Double quotes
  1328. // are ended by an unescaped double quote.
  1329. //
  1330. if ((Quote != 0) && (Character != '\\')) {
  1331. assert((Quote == '"') || (Quote == '\''));
  1332. //
  1333. // Escape the magic characters when in quotes to make them unmagic.
  1334. //
  1335. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1336. if ((strchr(ShQuoteEscapeCharacters, Character) != NULL) ||
  1337. ((Quote == '\'') &&
  1338. ((Character == '$') || (Character == '`')))) {
  1339. Result = ShAddCharacterToTokenBuffer(Shell,
  1340. SHELL_CONTROL_ESCAPE);
  1341. if (Result == FALSE) {
  1342. return FALSE;
  1343. }
  1344. }
  1345. }
  1346. if (Quote == '\'') {
  1347. if (Character == '\'') {
  1348. Quote = 0;
  1349. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1350. Character = SHELL_CONTROL_QUOTE;
  1351. }
  1352. }
  1353. } else if (Quote == '"') {
  1354. if (Character == '"') {
  1355. Quote = 0;
  1356. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1357. Character = SHELL_CONTROL_QUOTE;
  1358. }
  1359. }
  1360. }
  1361. //
  1362. // If inside a comment, wait for a newline. When the newline comes,
  1363. // it's not really handled any differently other than it would break up
  1364. // two parentheses in a row.
  1365. //
  1366. } else if (InComment != FALSE) {
  1367. AddCharacter = FALSE;
  1368. if (Character == '\n') {
  1369. AddCharacter = TRUE;
  1370. WasParentheses = FALSE;
  1371. InComment = FALSE;
  1372. }
  1373. //
  1374. // If it's a backslash, escape the next character, or prepare a line
  1375. // continuation. This logic is entered even if inside a quoted region.
  1376. //
  1377. } else if ((Character == '\\') && (Syntax != ExpansionSyntaxName)) {
  1378. if (Quote == '\'') {
  1379. //
  1380. // In single quotes, the backslash is escaped and literal.
  1381. //
  1382. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1383. Result = ShAddCharacterToTokenBuffer(Shell,
  1384. SHELL_CONTROL_ESCAPE);
  1385. if (Result == FALSE) {
  1386. return FALSE;
  1387. }
  1388. }
  1389. //
  1390. // Not in single quotes, so look at the next character.
  1391. //
  1392. } else {
  1393. Result = ShGetInputCharacter(Shell, &Character);
  1394. if (Result == FALSE) {
  1395. return FALSE;
  1396. }
  1397. //
  1398. // If it's a newline, then it's a line continuation, so just
  1399. // swallow the backslash and newline.
  1400. //
  1401. if (Character == '\n') {
  1402. AddCharacter = FALSE;
  1403. } else {
  1404. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1405. //
  1406. // If inside double quotes and the backslash isn't
  1407. // quoting anything, then add it as a literal.
  1408. //
  1409. if ((Quote == '"') && (Character != '\\') &&
  1410. (Character != '`') && (Character != '$') &&
  1411. (Character != '"')) {
  1412. Result = ShAddCharacterToTokenBuffer(Shell, '\\');
  1413. if (Result == FALSE) {
  1414. return FALSE;
  1415. }
  1416. }
  1417. //
  1418. // Escape the next character, whatever it may be.
  1419. //
  1420. Result = ShAddCharacterToTokenBuffer(
  1421. Shell,
  1422. SHELL_CONTROL_ESCAPE);
  1423. if (Result == FALSE) {
  1424. return FALSE;
  1425. }
  1426. //
  1427. // Pass everything through for non-curly expansion, as it
  1428. // gets reinterpreted inside the subshell.
  1429. //
  1430. } else {
  1431. Result = ShAddCharacterToTokenBuffer(Shell, '\\');
  1432. if (Result == FALSE) {
  1433. return FALSE;
  1434. }
  1435. }
  1436. }
  1437. }
  1438. //
  1439. // Look for the elusive closing sequence.
  1440. //
  1441. } else {
  1442. switch (Syntax) {
  1443. case ExpansionSyntaxName:
  1444. if (SHELL_NAME_CHARACTER(Character) == FALSE) {
  1445. Stop = TRUE;
  1446. AddCharacter = FALSE;
  1447. SHELL_LEXER_UNPUT(Shell, Character);
  1448. }
  1449. break;
  1450. case ExpansionSyntaxBackquote:
  1451. if (Character == '`') {
  1452. Stop = TRUE;
  1453. }
  1454. break;
  1455. case ExpansionSyntaxCurlyBrace:
  1456. case ExpansionSyntaxParentheses:
  1457. if ((Syntax == ExpansionSyntaxParentheses) &&
  1458. (Character == '(')) {
  1459. OpenCount += 1;
  1460. } else if ((Syntax == ExpansionSyntaxParentheses) &&
  1461. (Character == ')')) {
  1462. if (OpenCount != 0) {
  1463. OpenCount -= 1;
  1464. } else {
  1465. Stop = TRUE;
  1466. break;
  1467. }
  1468. //
  1469. // Note that curly braces don't allow recursion or quotes
  1470. // inside the variable name, but they can be in the
  1471. // post-variable-name part (ie ${myvar+"other$var"}).
  1472. //
  1473. } else if ((Syntax == ExpansionSyntaxCurlyBrace) &&
  1474. (Character == '}')) {
  1475. Stop = TRUE;
  1476. break;
  1477. }
  1478. //
  1479. // Watch out for quotes starting.
  1480. //
  1481. if ((Character == '"') || (Character == '\'')) {
  1482. Quote = Character;
  1483. if (Syntax == ExpansionSyntaxCurlyBrace) {
  1484. Character = SHELL_CONTROL_QUOTE;
  1485. }
  1486. //
  1487. // If it's a dollar sign or backquote, recurse into another
  1488. // expansion.
  1489. //
  1490. } else if ((Character == '$') || (Character == '`')) {
  1491. AddCharacter = FALSE;
  1492. Result = ShScanExpansion(Shell, Character);
  1493. if (Result == FALSE) {
  1494. return FALSE;
  1495. }
  1496. //
  1497. // Watch out for a comment beginning, but only if it's not
  1498. // already in the middle of a word. Don't do this inside
  1499. // curly brace expansions, as something like ${#a} means
  1500. // "length of a".
  1501. //
  1502. } else if ((Character == '#') &&
  1503. (Syntax != ExpansionSyntaxCurlyBrace)) {
  1504. InWord = FALSE;
  1505. assert(Lexer->TokenBufferSize != 0);
  1506. LastCharacter =
  1507. Lexer->TokenBuffer[Lexer->TokenBufferSize - 1];
  1508. if (SHELL_NAME_CHARACTER(LastCharacter) != FALSE) {
  1509. InWord = TRUE;
  1510. }
  1511. if (InWord == FALSE) {
  1512. InComment = TRUE;
  1513. AddCharacter = FALSE;
  1514. }
  1515. }
  1516. break;
  1517. case ExpansionSyntaxDoubleParentheses:
  1518. if (Character == ')') {
  1519. if (OpenCount != 0) {
  1520. OpenCount -= 1;
  1521. } else {
  1522. if (WasParentheses != FALSE) {
  1523. Stop = TRUE;
  1524. } else {
  1525. WasParentheses = TRUE;
  1526. }
  1527. }
  1528. } else {
  1529. WasParentheses = FALSE;
  1530. if (Character == '(') {
  1531. OpenCount += 1;
  1532. }
  1533. }
  1534. break;
  1535. default:
  1536. assert(FALSE);
  1537. return FALSE;
  1538. }
  1539. }
  1540. if ((Character == '\0') || (Character == EOF)) {
  1541. AddCharacter = FALSE;
  1542. }
  1543. if (AddCharacter != FALSE) {
  1544. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  1545. if (Result == FALSE) {
  1546. return FALSE;
  1547. }
  1548. }
  1549. if (Stop != FALSE) {
  1550. break;
  1551. }
  1552. if (Character == '\n') {
  1553. ShPrintPrompt(Shell, 2);
  1554. }
  1555. if ((Character == '\0') || (Character == EOF)) {
  1556. return TRUE;
  1557. }
  1558. }
  1559. return TRUE;
  1560. }
  1561. VOID
  1562. ShCheckForReservedWord (
  1563. PSHELL Shell
  1564. )
  1565. /*++
  1566. Routine Description:
  1567. This routine is called immediately before returning what would otherwise
  1568. be a WORD token from the lexer. It checks against the reserved words of
  1569. the shell language and fixes up the token if it matches.
  1570. Arguments:
  1571. Shell - Supplies a pointer to the shell about to return a WORD.
  1572. Return Value:
  1573. None.
  1574. --*/
  1575. {
  1576. PSTR Word;
  1577. assert(Shell->Lexer.TokenType == TOKEN_WORD);
  1578. Word = Shell->Lexer.TokenBuffer;
  1579. switch (Word[0]) {
  1580. case 'c':
  1581. if (strcmp(Word + 1, "ase") == 0) {
  1582. Shell->Lexer.TokenType = TOKEN_CASE;
  1583. }
  1584. break;
  1585. case 'd':
  1586. if (strcmp(Word + 1, "o") == 0) {
  1587. Shell->Lexer.TokenType = TOKEN_DO;
  1588. } else if (strcmp(Word + 1, "one") == 0) {
  1589. Shell->Lexer.TokenType = TOKEN_DONE;
  1590. }
  1591. break;
  1592. case 'e':
  1593. if (strcmp(Word + 1, "sac") == 0) {
  1594. Shell->Lexer.TokenType = TOKEN_ESAC;
  1595. } else if (strcmp(Word + 1, "lse") == 0) {
  1596. Shell->Lexer.TokenType = TOKEN_ELSE;
  1597. } else if (strcmp(Word + 1, "lif") == 0) {
  1598. Shell->Lexer.TokenType = TOKEN_ELIF;
  1599. }
  1600. break;
  1601. case 'f':
  1602. if (strcmp(Word + 1, "i") == 0) {
  1603. Shell->Lexer.TokenType = TOKEN_FI;
  1604. } else if (strcmp(Word + 1, "or") == 0) {
  1605. Shell->Lexer.TokenType = TOKEN_FOR;
  1606. }
  1607. break;
  1608. case 'i':
  1609. if (strcmp(Word + 1, "f") == 0) {
  1610. Shell->Lexer.TokenType = TOKEN_IF;
  1611. } else if (strcmp(Word + 1, "n") == 0) {
  1612. Shell->Lexer.TokenType = TOKEN_IN;
  1613. }
  1614. break;
  1615. case 't':
  1616. if (strcmp(Word + 1, "hen") == 0) {
  1617. Shell->Lexer.TokenType = TOKEN_THEN;
  1618. }
  1619. break;
  1620. case 'u':
  1621. if (strcmp(Word + 1, "ntil") == 0) {
  1622. Shell->Lexer.TokenType = TOKEN_UNTIL;
  1623. }
  1624. break;
  1625. case 'w':
  1626. if (strcmp(Word + 1, "hile") == 0) {
  1627. Shell->Lexer.TokenType = TOKEN_WHILE;
  1628. }
  1629. break;
  1630. }
  1631. return;
  1632. }
  1633. BOOL
  1634. ShScanPendingHereDocuments (
  1635. PSHELL Shell
  1636. )
  1637. /*++
  1638. Routine Description:
  1639. This routine scans any pending here documents that are starting now.
  1640. Arguments:
  1641. Shell - Supplies a pointer to the shell.
  1642. Return Value:
  1643. TRUE on success or if there are no here documents to scan.
  1644. FALSE on failure.
  1645. --*/
  1646. {
  1647. PSHELL_HERE_DOCUMENT HereDocument;
  1648. BOOL Result;
  1649. while (LIST_EMPTY(&(Shell->Lexer.HereDocumentList)) == FALSE) {
  1650. HereDocument = LIST_VALUE(Shell->Lexer.HereDocumentList.Next,
  1651. SHELL_HERE_DOCUMENT,
  1652. ListEntry);
  1653. Result = ShScanHereDocument(Shell, HereDocument);
  1654. if (Result == FALSE) {
  1655. return FALSE;
  1656. }
  1657. LIST_REMOVE(&(HereDocument->ListEntry));
  1658. HereDocument->ListEntry.Next = NULL;
  1659. }
  1660. return TRUE;
  1661. }
  1662. BOOL
  1663. ShScanHereDocument (
  1664. PSHELL Shell,
  1665. PSHELL_HERE_DOCUMENT HereDocument
  1666. )
  1667. /*++
  1668. Routine Description:
  1669. This routine scans out the contents of a here document from the shell
  1670. input.
  1671. Arguments:
  1672. Shell - Supplies a pointer to the shell.
  1673. HereDocument - Supplies a pointer to the here document to fill out.
  1674. Return Value:
  1675. TRUE on success or if there are no here documents to scan.
  1676. FALSE on failure.
  1677. --*/
  1678. {
  1679. ULONG BeginLineNumber;
  1680. BOOL BeginningOfLine;
  1681. INT Character;
  1682. UINTN EndWordSize;
  1683. PSHELL_LEXER_STATE Lexer;
  1684. PSTR Line;
  1685. UINTN LineBegin;
  1686. UINTN LineSize;
  1687. BOOL Result;
  1688. INT StringDifference;
  1689. BOOL WasBackslash;
  1690. EndWordSize = HereDocument->EndWordSize;
  1691. Lexer = &(Shell->Lexer);
  1692. BeginLineNumber = Lexer->LineNumber;
  1693. LineBegin = 0;
  1694. //
  1695. // This routine borrows the token buffer, so there had better be nothing
  1696. // in it.
  1697. //
  1698. assert(Lexer->TokenBufferSize == 0);
  1699. //
  1700. // If it's going to be expanded, simulate the whole thing being in double
  1701. // quotes so that control characters inside variable expansions get
  1702. // escaped during expansion.
  1703. //
  1704. if (HereDocument->EndWordWasQuoted == FALSE) {
  1705. Result = ShAddCharacterToTokenBuffer(Shell, SHELL_CONTROL_QUOTE);
  1706. if (Result == FALSE) {
  1707. return FALSE;
  1708. }
  1709. LineBegin = 1;
  1710. }
  1711. ShPrintPrompt(Shell, 2);
  1712. WasBackslash = FALSE;
  1713. BeginningOfLine = TRUE;
  1714. while (TRUE) {
  1715. Result = ShGetInputCharacter(Shell, &Character);
  1716. if (Result == FALSE) {
  1717. ShLexerError(Shell,
  1718. "Unterminated here document at line %d.\n",
  1719. BeginLineNumber);
  1720. return FALSE;
  1721. }
  1722. if ((Character == '\n') || (Character == EOF) ||
  1723. (Character == '\0')) {
  1724. //
  1725. // If there was a backslash, remove both the newline and the
  1726. // backslash. Don't do this if the original end word was
  1727. // quoted in any way.
  1728. //
  1729. if ((HereDocument->EndWordWasQuoted == FALSE) &&
  1730. (WasBackslash != FALSE)) {
  1731. assert(Lexer->TokenBufferSize != 0);
  1732. Lexer->TokenBufferSize -= 1;
  1733. WasBackslash = FALSE;
  1734. ShPrintPrompt(Shell, 2);
  1735. if (Character == EOF) {
  1736. break;
  1737. }
  1738. continue;
  1739. }
  1740. //
  1741. // It's not a backslash, this is a complete line. It needs to
  1742. // be checked against the ending line. Null terminate it and
  1743. // compare strings.
  1744. //
  1745. Result = ShAddCharacterToTokenBuffer(Shell, '\0');
  1746. if (Result == FALSE) {
  1747. return FALSE;
  1748. }
  1749. assert(Lexer->TokenBufferSize > LineBegin);
  1750. Line = Lexer->TokenBuffer + LineBegin;
  1751. LineSize = Lexer->TokenBufferSize - LineBegin - 1;
  1752. while ((LineSize != 0) && (Line[LineSize - 1] == '\r')) {
  1753. LineSize -= 1;
  1754. }
  1755. if ((LineSize == 0) || (LineSize != EndWordSize - 1)) {
  1756. StringDifference = 1;
  1757. } else {
  1758. StringDifference = strncmp(Line,
  1759. HereDocument->EndWord,
  1760. LineSize);
  1761. }
  1762. //
  1763. // If the line matched, then throw out this line, as it was the
  1764. // terminating word.
  1765. //
  1766. if (StringDifference == 0) {
  1767. Lexer->TokenBufferSize = LineBegin;
  1768. Result = ShAddCharacterToTokenBuffer(Shell, '\0');
  1769. if (Result == FALSE) {
  1770. return FALSE;
  1771. }
  1772. HereDocument->Document =
  1773. SwStringDuplicate(Lexer->TokenBuffer,
  1774. Lexer->TokenBufferSize);
  1775. if (HereDocument->Document == NULL) {
  1776. return FALSE;
  1777. }
  1778. HereDocument->DocumentSize = Lexer->TokenBufferSize;
  1779. Lexer->TokenBufferSize = 0;
  1780. break;
  1781. //
  1782. // If it didn't match, then remove null terminator and reset the
  1783. // line beginning to be right after the newline.
  1784. //
  1785. } else {
  1786. assert(Lexer->TokenBufferSize != 0);
  1787. LineBegin = Lexer->TokenBufferSize;
  1788. Lexer->TokenBufferSize -= 1;
  1789. }
  1790. ShPrintPrompt(Shell, 2);
  1791. BeginningOfLine = TRUE;
  1792. //
  1793. // If this was not an EOF, null, newline, or tab, then this is
  1794. // not the beginning of the line.
  1795. //
  1796. } else if (Character != '\t') {
  1797. BeginningOfLine = FALSE;
  1798. //
  1799. // Watch out for expansions.
  1800. //
  1801. if (HereDocument->EndWordWasQuoted == FALSE) {
  1802. //
  1803. // Just like in double quotes, some characters need to be
  1804. // escaped if preceded by a backslash.
  1805. //
  1806. if ((Character == '$') || (Character == '`') ||
  1807. (Character == '\\')) {
  1808. if (WasBackslash != FALSE) {
  1809. assert(Lexer->TokenBufferSize != 0);
  1810. Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] =
  1811. SHELL_CONTROL_ESCAPE;
  1812. //
  1813. // For unescaped $ and `, scan through an expansion.
  1814. //
  1815. } else if (Character != '\\') {
  1816. Result = ShScanExpansion(Shell, Character);
  1817. if (Result == FALSE) {
  1818. return FALSE;
  1819. }
  1820. continue;
  1821. }
  1822. //
  1823. // Quote the magic characters.
  1824. //
  1825. } else if ((Character == SHELL_CONTROL_QUOTE) ||
  1826. (Character == SHELL_CONTROL_ESCAPE)) {
  1827. Result = ShAddCharacterToTokenBuffer(Shell,
  1828. SHELL_CONTROL_ESCAPE);
  1829. if (Result == FALSE) {
  1830. return FALSE;
  1831. }
  1832. }
  1833. }
  1834. }
  1835. if (Character == '\\') {
  1836. WasBackslash = !WasBackslash;
  1837. } else {
  1838. WasBackslash = FALSE;
  1839. }
  1840. if (Character == EOF) {
  1841. return FALSE;
  1842. }
  1843. //
  1844. // Potentially strip leading tabs from the beginning of every line
  1845. // including the one with the ending word.
  1846. //
  1847. if ((BeginningOfLine != FALSE) &&
  1848. (Character == '\t') && (HereDocument->StripLeadingTabs != FALSE)) {
  1849. continue;
  1850. }
  1851. Result = ShAddCharacterToTokenBuffer(Shell, Character);
  1852. if (Result == FALSE) {
  1853. return FALSE;
  1854. }
  1855. }
  1856. return TRUE;
  1857. }
  1858. VOID
  1859. ShLexerError (
  1860. PSHELL Shell,
  1861. PSTR Format,
  1862. ...
  1863. )
  1864. /*++
  1865. Routine Description:
  1866. This routine prints a shell lexer error to standard error.
  1867. Arguments:
  1868. Shell - Supplies a pointer to the shell.
  1869. Format - Supplies the printf style format string.
  1870. ... - Supplies the remaining arguments to the printf string.
  1871. Return Value:
  1872. TRUE if the string has a quoting character in it.
  1873. FALSE if the string is clean.
  1874. --*/
  1875. {
  1876. va_list ArgumentList;
  1877. PSHELL_LEXER_STATE Lexer;
  1878. Lexer = &(Shell->Lexer);
  1879. fprintf(stderr, "sh: %d: ", Lexer->LineNumber);
  1880. va_start(ArgumentList, Format);
  1881. vfprintf(stderr, Format, ArgumentList);
  1882. va_end(ArgumentList);
  1883. if (Lexer->TokenBufferSize != 0) {
  1884. if (Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] != '\0') {
  1885. if (Lexer->TokenBufferCapacity > Lexer->TokenBufferSize) {
  1886. Lexer->TokenBuffer[Lexer->TokenBufferSize] = '\0';
  1887. } else {
  1888. Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] = '\0';
  1889. }
  1890. }
  1891. fprintf(stderr, ".\nToken: %s.", Shell->Lexer.TokenBuffer);
  1892. }
  1893. return;
  1894. }