12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543 |
- /*++
- Copyright (c) 2013 Minoca Corp. All Rights Reserved
- Module Name:
- lex.c
- Abstract:
- This module implements the lexical tokenizer for the shell.
- Author:
- Evan Green 5-Jun-2013
- Environment:
- User Mode
- --*/
- //
- // ------------------------------------------------------------------- Includes
- //
- #include <assert.h>
- #include <ctype.h>
- #include <errno.h>
- #include <stdarg.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include "sh.h"
- #include "shparse.h"
- #include "../swlib.h"
- //
- // --------------------------------------------------------------------- Macros
- //
- //
- // This macro puts a character back into the input stream.
- //
- #define SHELL_LEXER_UNPUT(_Shell, _Character) \
- if ((_Character) != EOF) { \
- \
- assert((_Shell)->Lexer.UnputCharacterValid == FALSE); \
- \
- (_Shell)->Lexer.UnputCharacter = (_Character); \
- (_Shell)->Lexer.UnputCharacterValid = TRUE; \
- if ((_Character) == '\n') { \
- (_Shell)->Lexer.LineNumber -= 1; \
- } \
- }
- //
- // ---------------------------------------------------------------- Definitions
- //
- //
- // ------------------------------------------------------ Data Type Definitions
- //
- typedef enum _EXPANSION_SYNTAX {
- ExpansionSyntaxInvalid,
- ExpansionSyntaxName,
- ExpansionSyntaxBackquote,
- ExpansionSyntaxCurlyBrace,
- ExpansionSyntaxParentheses,
- ExpansionSyntaxDoubleParentheses
- } EXPANSION_SYNTAX, *PEXPANSION_SYNTAX;
- //
- // ----------------------------------------------- Internal Function Prototypes
- //
- BOOL
- ShGetInputCharacter (
- PSHELL Shell,
- PINT Character
- );
- BOOL
- ShGetAnyInputCharacter (
- PSHELL Shell,
- PINT Character
- );
- BOOL
- ShAddCharacterToTokenBuffer (
- PSHELL Shell,
- CHAR Character
- );
- BOOL
- ShScanExpansion (
- PSHELL Shell,
- INT Character
- );
- VOID
- ShCheckForReservedWord (
- PSHELL Shell
- );
- BOOL
- ShScanPendingHereDocuments (
- PSHELL Shell
- );
- BOOL
- ShScanHereDocument (
- PSHELL Shell,
- PSHELL_HERE_DOCUMENT HereDocument
- );
- VOID
- ShLexerError (
- PSHELL Shell,
- PSTR Format,
- ...
- );
- //
- // -------------------------------------------------------------------- Globals
- //
- //
- // Set this to TRUE to have the lexer print out each token it grabs.
- //
- BOOL ShDebugLexer = FALSE;
- //
- // Define the characters that must be explictly escaped when inside double
- // quotes. This also applies for single quotes, but with the addition of a
- // backslash. This is null terminated so it is a legitimate string.
- //
- CHAR ShQuoteEscapeCharacters[] = {
- '!',
- '*',
- '?',
- '[',
- '=',
- '~',
- ':',
- '/',
- '-',
- ']',
- SHELL_CONTROL_QUOTE,
- SHELL_CONTROL_ESCAPE,
- '\0',
- };
- //
- // Define the names of all the tokens.
- //
- PSTR ShTokenStrings[] = {
- "WORD",
- "ASSIGNMENT_WORD",
- "NAME",
- "IO_NUMBER",
- "DOUBLE_AND",
- "DOUBLE_OR",
- "DOUBLE_SEMICOLON",
- "DOUBLE_LESS_THAN",
- "DOUBLE_GREATER_THAN",
- "LESS_THAN_AND",
- "GREATER_THAN_AND",
- "LESS_THAN_GREATER_THAN",
- "DOUBLE_LESS_THAN_DASH",
- "CLOBBER",
- "IF",
- "THEN",
- "ELSE",
- "ELIF",
- "FI",
- "DO",
- "DONE",
- "CASE",
- "ESAC",
- "WHILE",
- "UNTIL",
- "FOR",
- "TOKEN_IN",
- };
- //
- // ------------------------------------------------------------------ Functions
- //
- BOOL
- ShInitializeLexer (
- PSHELL_LEXER_STATE Lexer,
- FILE *InputFile,
- PSTR InputBuffer,
- UINTN InputBufferSize
- )
- /*++
- Routine Description:
- This routine initializes the shell lexer state.
- Arguments:
- Lexer - Supplies a pointer to the lexer state.
- InputFile - Supplies an optional pointer to the input file.
- InputBuffer - Supplies an optional pointer to the input buffer to use. If
- no buffer is provided one will be created, otherwise the provided one
- will be copied.
- InputBufferSize - Supplies the size of the provided input buffer in bytes
- including the null terminator.
- Return Value:
- TRUE on success.
- FALSE on failure.
- --*/
- {
- BOOL Result;
- Result = FALSE;
- memset(Lexer, 0, sizeof(SHELL_LEXER_STATE));
- Lexer->TokenType = -1;
- Lexer->InputFile = InputFile;
- Lexer->LineNumber = 1;
- INITIALIZE_LIST_HEAD(&(Lexer->HereDocumentList));
- if (InputBuffer != NULL) {
- Lexer->InputBuffer = malloc(InputBufferSize);
- if (Lexer->InputBuffer == NULL) {
- goto InitializeLexerEnd;
- }
- memcpy(Lexer->InputBuffer, InputBuffer, InputBufferSize);
- Lexer->InputBufferSize = InputBufferSize;
- Lexer->InputBufferCapacity = InputBufferSize;
- } else {
- Lexer->InputBuffer = malloc(DEFAULT_INPUT_BUFFER_SIZE);
- if (Lexer->InputBuffer == NULL) {
- goto InitializeLexerEnd;
- }
- Lexer->InputBufferCapacity = DEFAULT_INPUT_BUFFER_SIZE;
- }
- Lexer->TokenBuffer = malloc(DEFAULT_TOKEN_BUFFER_SIZE);
- if (Lexer->TokenBuffer == NULL) {
- goto InitializeLexerEnd;
- }
- Lexer->TokenBufferCapacity = DEFAULT_TOKEN_BUFFER_SIZE;
- Result = TRUE;
- InitializeLexerEnd:
- if (Result == FALSE) {
- if (Lexer->InputBuffer != NULL) {
- free(Lexer->InputBuffer);
- }
- if (Lexer->TokenBuffer != NULL) {
- free(Lexer->TokenBuffer);
- }
- }
- return Result;
- }
- VOID
- ShDestroyLexer (
- PSHELL_LEXER_STATE Lexer
- )
- /*++
- Routine Description:
- This routine tears down the shell lexer state.
- Arguments:
- Lexer - Supplies a pointer to the lexer state.
- Return Value:
- None.
- --*/
- {
- if (Lexer->InputBuffer != NULL) {
- free(Lexer->InputBuffer);
- Lexer->InputBuffer = NULL;
- }
- if (Lexer->TokenBuffer != NULL) {
- free(Lexer->TokenBuffer);
- Lexer->TokenBuffer = NULL;
- }
- if (Lexer->InputFile != NULL) {
- if (Lexer->InputFile != stdin) {
- fclose(Lexer->InputFile);
- }
- Lexer->InputFile = NULL;
- }
- return;
- }
- BOOL
- ShGetToken (
- PSHELL Shell,
- BOOL FirstCommandToken
- )
- /*++
- Routine Description:
- This routine fetches the next token out of the shell input.
- Arguments:
- Shell - Supplies a pointer to the shell to read from.
- FirstCommandToken - Supplies a boolean indicating if this token could be
- the first word in a command, in which case alias substitution will be
- enabled.
- Return Value:
- TRUE on success. The next token will be written into the shell structure.
- FALSE on failure.
- --*/
- {
- BOOL AddCharacter;
- INT Character;
- UINTN CharacterIndex;
- BOOL Delimit;
- BOOL InComment;
- BOOL IsOperator;
- PSHELL_LEXER_STATE Lexer;
- CHAR Quote;
- ULONG QuoteLineNumber;
- BOOL Result;
- BOOL UnputCharacter;
- Delimit = FALSE;
- InComment = FALSE;
- IsOperator = FALSE;
- Lexer = &(Shell->Lexer);
- Lexer->TokenType = -1;
- Lexer->TokenBufferSize = 0;
- Lexer->LastAlias = NULL;
- Quote = 0;
- QuoteLineNumber = 0;
- while (TRUE) {
- AddCharacter = TRUE;
- UnputCharacter = FALSE;
- Result = ShGetInputCharacter(Shell, &Character);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- //
- // If inside a quote of some kind, scan according to those rules.
- // Single quotes are only ended by another single quote. Double quotes
- // are ended by an unescaped double quote.
- //
- if ((Quote != 0) && (Character != '\\')) {
- assert((Quote == '"') || (Quote == '\''));
- //
- // Watch out for unterminated quotes.
- //
- if (Character == EOF) {
- ShLexerError(Shell,
- "Unterminated string starting at line %d.\n",
- QuoteLineNumber);
- Result = FALSE;
- goto GetTokenEnd;
- }
- //
- // Escape the magic characters when in quotes to make them unmagic.
- //
- if ((strchr(ShQuoteEscapeCharacters, Character) != NULL) ||
- ((Quote == '\'') &&
- ((Character == '$') || (Character == '`')))) {
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- if (Quote == '\'') {
- if (Character == '\'') {
- Quote = 0;
- Character = SHELL_CONTROL_QUOTE;
- }
- } else if (Quote == '"') {
- if (Character == '"') {
- Quote = 0;
- Character = SHELL_CONTROL_QUOTE;
- } else if ((Character == '`') || (Character == '$')) {
- Result = ShScanExpansion(Shell, Character);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- AddCharacter = FALSE;
- }
- }
- //
- // If inside a comment, wait for a newline. When the newline comes,
- // put it back so it gets the full newline treatment on the next pass.
- //
- } else if (InComment != FALSE) {
- AddCharacter = FALSE;
- if (Character == '\n') {
- UnputCharacter = TRUE;
- InComment = FALSE;
- } else if (Character == EOF) {
- InComment = FALSE;
- }
- //
- // If the end of the input is found, delimit the current token, or
- // return it by itself.
- //
- } else if (Character == EOF) {
- Delimit = TRUE;
- if (Lexer->TokenBufferSize != 0) {
- AddCharacter = FALSE;
- if (Character != EOF) {
- UnputCharacter = TRUE;
- }
- } else {
- Lexer->TokenType = TOKEN_END_OF_FILE;
- }
- //
- // If the previous character was an operator and this one can glom on,
- // then do it.
- //
- } else if (IsOperator != FALSE) {
- assert(Lexer->TokenBufferSize != 0);
- IsOperator = FALSE;
- Delimit = TRUE;
- //
- // This is the second byte, so look at the first.
- //
- if (Lexer->TokenBufferSize == 1) {
- switch (Lexer->TokenBuffer[0]) {
- //
- // Allow <<, <&, <>, and <<-.
- //
- case '<':
- if (Character == '&') {
- Lexer->TokenType = TOKEN_LESS_THAN_AND;
- } else if (Character == '<') {
- Delimit = FALSE;
- IsOperator = TRUE;
- } else if (Character == '>') {
- Lexer->TokenType = TOKEN_LESS_THAN_GREATER_THAN;
- } else {
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- break;
- //
- // Allow >>, >&, and >|.
- //
- case '>':
- if (Character == '&') {
- Lexer->TokenType = TOKEN_GREATER_THAN_AND;
- } else if (Character == '|') {
- Lexer->TokenType = TOKEN_CLOBBER;
- } else if (Character == '>') {
- Lexer->TokenType = TOKEN_DOUBLE_GREATER_THAN;
- } else {
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- break;
- //
- // Allow for ;;.
- //
- case ';':
- if (Character == ';') {
- Lexer->TokenType = TOKEN_DOUBLE_SEMICOLON;
- } else {
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- break;
- //
- // Allow for &&.
- //
- case '&':
- if (Character == '&') {
- Lexer->TokenType = TOKEN_DOUBLE_AND;
- } else {
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- break;
- //
- // Allow for ||.
- //
- case '|':
- if (Character == '|') {
- Lexer->TokenType = TOKEN_DOUBLE_OR;
- } else {
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- break;
- default:
- assert(FALSE);
- Result = FALSE;
- goto GetTokenEnd;
- }
- //
- // The only three character operator is <<-.
- //
- } else {
- assert(Lexer->TokenBufferSize == 2);
- assert((Lexer->TokenBuffer[0] == '<') &&
- (Lexer->TokenBuffer[1] == '<'));
- if (Character == '-') {
- Lexer->TokenType = TOKEN_DOUBLE_LESS_THAN_DASH;
- } else {
- Lexer->TokenType = TOKEN_DOUBLE_LESS_THAN;
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- }
- }
- //
- // Watch out for the beginning of a quoted section.
- //
- } else if ((Character == '\'') || (Character == '"')) {
- Quote = Character;
- Character = SHELL_CONTROL_QUOTE;
- QuoteLineNumber = Lexer->LineNumber;
- Lexer->TokenType = TOKEN_WORD;
- //
- // If it's a backslash, escape the next character, or prepare a line
- // continuation. This logic is entered even if inside a quoted region.
- //
- } else if (Character == '\\') {
- if (Quote == '\'') {
- //
- // In single quotes, the backslash is escaped and literal.
- //
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- //
- // Not in single quotes, so look at the next character.
- //
- } else {
- Result = ShGetInputCharacter(Shell, &Character);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- if (Character == EOF) {
- Character = '\\';
- //
- // If it's a newline, then it's a line continuation, so just
- // swallow the backslash and add the newline as a normal
- // character (ie do nothing).
- //
- } else if (Character == '\n') {
- if (Quote == 0) {
- ShPrintPrompt(Shell, 2);
- }
- AddCharacter = FALSE;
- } else {
- //
- // If inside double quotes and the backslash isn't quoting
- // anything, then add it as a literal.
- //
- if ((Quote == '"') && (Character != '\\') &&
- (Character != '`') && (Character != '$') &&
- (Character != '"')) {
- Result = ShAddCharacterToTokenBuffer(Shell, '\\');
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- //
- // Escape the next character, whatever it may be.
- //
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- if ((Character != '\n') && (Lexer->TokenType == -1)) {
- Lexer->TokenType = TOKEN_WORD;
- }
- }
- //
- // If it's an unquoted dollar sign or backquote, scan past the
- // following expansion. The expansion does not delimit the token.
- //
- } else if ((Character == '$') || (Character == '`')) {
- Lexer->TokenType = TOKEN_WORD;
- Result = ShScanExpansion(Shell, Character);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- AddCharacter = FALSE;
- //
- // Check for a new operator. Lump newlines in here too since their
- // processing is about the same. Notice that bang and the braces aren't
- // in here, as they're recognized at the token level rather than the
- // lexical level.
- //
- } else if ((Character == '&') || (Character == '|') ||
- (Character == ';') || (Character == '<') ||
- (Character == '>') || (Character == ')') ||
- (Character == '(') || (Character == '\n')) {
- //
- // If there was a previous token, delimit it now.
- //
- if (Lexer->TokenBufferSize != 0) {
- Delimit = TRUE;
- AddCharacter = FALSE;
- UnputCharacter = TRUE;
- //
- // If this is a redirection symbol and everything in the token
- // is a digit, then this is an I/O number token.
- //
- if ((Lexer->TokenType == TOKEN_WORD) &&
- ((Character == '>') || (Character == '<'))) {
- for (CharacterIndex = 0;
- CharacterIndex < Lexer->TokenBufferSize;
- CharacterIndex += 1) {
- if ((Lexer->TokenBuffer[CharacterIndex] < '0') ||
- (Lexer->TokenBuffer[CharacterIndex] > '9')) {
- break;
- }
- }
- if (CharacterIndex == Lexer->TokenBufferSize) {
- Lexer->TokenType = TOKEN_IO_NUMBER;
- }
- }
- //
- // The token buffer is empty, this operator is up. If there's a
- // possibility that it's a multi-character operator, then don't
- // delimit right away.
- //
- } else {
- Lexer->TokenType = Character;
- if ((Character == '>') || (Character == '<') ||
- (Character == '&') || (Character == '|') ||
- (Character == ';')) {
- IsOperator = TRUE;
- } else {
- Delimit = TRUE;
- //
- // If this is a newline, parse out any pending here
- // documents.
- //
- if (Character == '\n') {
- Result = ShScanPendingHereDocuments(Shell);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- }
- }
- //
- // If it's an unquoted space, any token containing the previous
- // character is delimited, and the blank is discarded.
- //
- } else if (isspace(Character)) {
- AddCharacter = FALSE;
- if (Lexer->TokenBufferSize != 0) {
- Delimit = TRUE;
- }
- //
- // Look out for a comment. Comments can only start if there's not
- // already a word in progress.
- //
- } else if ((Lexer->TokenBufferSize == 0) && (Character == '#')) {
- AddCharacter = FALSE;
- InComment = TRUE;
- //
- // It doesn't fit any other interesting case, so it's just a word.
- //
- } else {
- if (Lexer->TokenType == -1) {
- Lexer->TokenType = TOKEN_WORD;
- }
- //
- // If it's a control character, escape it.
- //
- if ((Character == SHELL_CONTROL_QUOTE) ||
- (Character == SHELL_CONTROL_ESCAPE)) {
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- }
- if ((Quote != 0) && (Character == '\n')) {
- ShPrintPrompt(Shell, 2);
- }
- //
- // Add the character if desired.
- //
- if (AddCharacter != FALSE) {
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- }
- if (UnputCharacter != FALSE) {
- assert(AddCharacter == FALSE);
- SHELL_LEXER_UNPUT(Shell, Character);
- }
- //
- // If the token is over, null terminate it, put back this character,
- // and break out.
- //
- if (Delimit != FALSE) {
- Delimit = FALSE;
- Result = ShAddCharacterToTokenBuffer(Shell, '\0');
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- if (Lexer->TokenType == TOKEN_WORD) {
- ShCheckForReservedWord(Shell);
- }
- //
- // If it's still just a word but has an equals in it, it's an
- // assignment word. It could also be a ! } or { if it's just that
- // character.
- //
- if (Lexer->TokenType == TOKEN_WORD) {
- if (strchr(Lexer->TokenBuffer, '=') != NULL) {
- Lexer->TokenType = TOKEN_ASSIGNMENT_WORD;
- } else if (Lexer->TokenBufferSize == 2) {
- if (Lexer->TokenBuffer[0] == '!') {
- Lexer->TokenType = '!';
- } else if (Lexer->TokenBuffer[0] == '{') {
- Lexer->TokenType = '{';
- } else if (Lexer->TokenBuffer[0] == '}') {
- Lexer->TokenType = '}';
- }
- }
- }
- //
- // If even after all that it's still a word and it's the first
- // word of the command, perform alias substitution.
- //
- if ((FirstCommandToken != FALSE) &&
- (Lexer->TokenType == TOKEN_WORD)) {
- Result = ShPerformAliasSubstitution(Shell);
- if (Result == FALSE) {
- goto GetTokenEnd;
- }
- } else {
- assert(Shell->Lexer.TokenType != -1);
- }
- //
- // If alias substitution didn't kill this token, then break out
- // and return it.
- //
- if (Shell->Lexer.TokenType != -1) {
- break;
- } else {
- Character = 0;
- }
- }
- }
- GetTokenEnd:
- assert((Result == FALSE) || (Lexer->TokenType != -1));
- if (ShDebugLexer != FALSE) {
- if (Result != FALSE) {
- if (Lexer->TokenType == TOKEN_END_OF_FILE) {
- ShPrintTrace(Shell, "Reached end of file.\n");
- } else if (Lexer->TokenType < 0xFF) {
- if (Lexer->TokenType < 0x20) {
- if (Lexer->TokenType == '\n') {
- ShPrintTrace(Shell,
- "%20s: Line %d\n",
- "<newline>",
- Lexer->LineNumber);
- } else {
- ShPrintTrace(Shell, "%20d: \n", Lexer->TokenType);
- }
- } else {
- ShPrintTrace(Shell,
- "%20c: %s\n",
- Lexer->TokenType,
- Lexer->TokenBuffer);
- }
- } else {
- assert(Lexer->TokenType >= TOKEN_WORD);
- ShPrintTrace(Shell,
- "%20s: %s\n",
- ShTokenStrings[Lexer->TokenType - TOKEN_WORD],
- Lexer->TokenBuffer);
- }
- } else {
- ShPrintTrace(Shell,
- "Error: Failed to parse token at line %d.\n",
- Lexer->LineNumber);
- }
- }
- return Result;
- }
- BOOL
- ShScanPastExpansion (
- PSTR String,
- UINTN StringSize,
- PUINTN ExpansionSize
- )
- /*++
- Routine Description:
- This routine is called to find the end of an expansion.
- Arguments:
- String - Supplies a pointer to the string at an expansion to scan.
- StringSize - Supplies the number of bytes in the string.
- ExpansionSize - Supplies a pointer where the size of the expansion in
- bytes will be returned.
- Return Value:
- TRUE on success. The extent of the expansion will be added to the token
- buffer.
- FALSE on failure.
- --*/
- {
- CHAR Character;
- UINTN Index;
- UINTN InnerExpansionSize;
- UINTN OpenCount;
- CHAR Quote;
- BOOL RecognizeComments;
- BOOL RecognizeQuotes;
- BOOL Result;
- EXPANSION_SYNTAX Syntax;
- BOOL WasBackslash;
- BOOL WasName;
- BOOL WasParentheses;
- Index = 1;
- OpenCount = 0;
- //
- // Figure out what type of expansion this is.
- //
- assert(StringSize != 0);
- assert((*String == '$') || (*String == '`') || (*String == '~'));
- RecognizeComments = TRUE;
- RecognizeQuotes = TRUE;
- if (*String == '`') {
- Syntax = ExpansionSyntaxBackquote;
- RecognizeComments = FALSE;
- RecognizeQuotes = FALSE;
- } else if (*String == '~') {
- Syntax = ExpansionSyntaxName;
- } else {
- assert(*String == '$');
- Character = String[Index];
- //
- // If it was a digit or a special parameter, then that's all there is
- // to it.
- //
- if (((Character >= '0') && (Character <= '9')) ||
- (Character == '@') || (Character == '*') || (Character == '#') ||
- (Character == '?') || (Character == '-') || (Character == '$') ||
- (Character == '!')) {
- *ExpansionSize = Index + 1;
- return TRUE;
- }
- //
- // It shouldn't be the end of file.
- //
- if (Character == '\0') {
- *ExpansionSize = Index;
- return TRUE;
- //
- // Note if it's a single curly.
- //
- } else if (Character == '{') {
- Syntax = ExpansionSyntaxCurlyBrace;
- RecognizeComments = FALSE;
- //
- // Note if it's a single parentheses. It could also be a double
- // parentheses.
- //
- } else if (Character == '(') {
- Syntax = ExpansionSyntaxParentheses;
- Index += 1;
- if (Index == StringSize) {
- return FALSE;
- }
- Character = String[Index];
- if (Character == '\0') {
- return FALSE;
- } else if (Character == '(') {
- Syntax = ExpansionSyntaxDoubleParentheses;
- Index += 1;
- }
- //
- // The only other option is it's a raw name.
- //
- } else if (SHELL_NAME_FIRST_CHARACTER(Character) != FALSE) {
- Syntax = ExpansionSyntaxName;
- //
- // Something funky is following the dollar sign.
- //
- } else {
- *ExpansionSize = 0;
- return TRUE;
- }
- }
- if (Syntax == ExpansionSyntaxName) {
- RecognizeComments = FALSE;
- RecognizeQuotes = FALSE;
- }
- //
- // Loop looking at characters until the parameter is finished.
- //
- Quote = 0;
- WasBackslash = FALSE;
- WasParentheses = FALSE;
- WasName = FALSE;
- while (TRUE) {
- Character = String[Index];
- //
- // If quoting is in progress, look for the end.
- //
- if (Quote != 0) {
- if ((Quote == '\'') || (Quote == SHELL_CONTROL_QUOTE)) {
- if (Character == Quote) {
- Quote = 0;
- }
- } else if (Quote == '"') {
- if ((WasBackslash == FALSE) && (Character == '"')) {
- Quote = 0;
- }
- } else if (Quote == '#') {
- if (Character == '\n') {
- Quote = 0;
- }
- } else {
- assert((Quote == '\\') || (Quote == SHELL_CONTROL_ESCAPE));
- Quote = 0;
- }
- //
- // If eligible for quotes, look for quotes starting.
- //
- } else if ((RecognizeQuotes != FALSE) &&
- ((Character == '\'') || (Character == '"') ||
- (Character == '\\') ||
- (Character == SHELL_CONTROL_QUOTE) ||
- (Character == SHELL_CONTROL_ESCAPE))) {
- Quote = Character;
- //
- // If eligible for comments, look for comments starting.
- //
- } else if ((RecognizeComments != FALSE) && (Character == '#') &&
- (WasName == FALSE)) {
- Quote = Character;
- //
- // No quotes or comments, look for the end expansion character.
- //
- } else {
- switch (Syntax) {
- case ExpansionSyntaxName:
- if (!SHELL_NAME_CHARACTER(Character)) {
- *ExpansionSize = Index;
- return TRUE;
- }
- break;
- case ExpansionSyntaxBackquote:
- if ((Character == '`') && (WasBackslash == FALSE)) {
- *ExpansionSize = Index + 1;
- return TRUE;
- }
- break;
- case ExpansionSyntaxCurlyBrace:
- if (Character == '}') {
- *ExpansionSize = Index + 1;
- return TRUE;
- }
- break;
- case ExpansionSyntaxParentheses:
- if (Character == '(') {
- OpenCount += 1;
- } else if (Character == ')') {
- if (OpenCount == 0) {
- *ExpansionSize = Index + 1;
- return TRUE;
- } else {
- OpenCount -= 1;
- }
- }
- break;
- case ExpansionSyntaxDoubleParentheses:
- if (Character == ')') {
- if (OpenCount != 0) {
- OpenCount -= 1;
- } else {
- if (WasParentheses != FALSE) {
- *ExpansionSize = Index + 1;
- return TRUE;
- } else {
- WasParentheses = TRUE;
- }
- }
- } else {
- WasParentheses = FALSE;
- if (Character == '(') {
- OpenCount += 1;
- }
- }
- break;
- default:
- assert(FALSE);
- return FALSE;
- }
- //
- // Look for a new expansion beginning.
- //
- if (((Character == '$') || (Character == '`')) &&
- (Syntax != ExpansionSyntaxBackquote)) {
- Result = ShScanPastExpansion(String + Index,
- StringSize - Index,
- &InnerExpansionSize);
- if (Result == FALSE) {
- return FALSE;
- }
- if (Index == StringSize) {
- return FALSE;
- }
- if (InnerExpansionSize == 0) {
- InnerExpansionSize = 1;
- }
- Index += InnerExpansionSize;
- WasBackslash = FALSE;
- WasParentheses = FALSE;
- WasName = FALSE;
- continue;
- }
- }
- if (Character == '\\') {
- WasBackslash = !WasBackslash;
- } else if (Character == SHELL_CONTROL_ESCAPE) {
- WasBackslash = TRUE;
- } else {
- WasBackslash = FALSE;
- }
- if (SHELL_NAME_CHARACTER(Character)) {
- WasName = TRUE;
- } else {
- WasName = FALSE;
- }
- Index += 1;
- if (Index == StringSize) {
- return FALSE;
- }
- }
- return FALSE;
- }
- //
- // --------------------------------------------------------- Internal Functions
- //
- BOOL
- ShGetInputCharacter (
- PSHELL Shell,
- PINT Character
- )
- /*++
- Routine Description:
- This routine gets a character from the input stream.
- Arguments:
- Shell - Supplies a pointer to the shell to read from.
- Character - Supplies a pointer where the character will be returned on
- success.
- Return Value:
- TRUE on success.
- FALSE on failure.
- --*/
- {
- BOOL Result;
- do {
- Result = ShGetAnyInputCharacter(Shell, Character);
- } while ((Result != FALSE) &&
- ((*Character == '\r') || (*Character == '\0')));
- return Result;
- }
- BOOL
- ShGetAnyInputCharacter (
- PSHELL Shell,
- PINT Character
- )
- /*++
- Routine Description:
- This routine gets a character from the input stream.
- Arguments:
- Shell - Supplies a pointer to the shell to read from.
- Character - Supplies a pointer where the character will be returned on
- success.
- Return Value:
- TRUE on success.
- FALSE on failure.
- --*/
- {
- ssize_t BytesRead;
- size_t BytesToRead;
- PSHELL_LEXER_STATE Lexer;
- PSTR NewInputBuffer;
- ULONG NewInputBufferSize;
- BOOL Result;
- Lexer = &(Shell->Lexer);
- if (Lexer->UnputCharacterValid != FALSE) {
- *Character = Lexer->UnputCharacter;
- if (*Character == '\n') {
- Lexer->LineNumber += 1;
- }
- Lexer->UnputCharacterValid = FALSE;
- return TRUE;
- }
- //
- // If there's more in the buffer, return that.
- //
- if (Lexer->InputBufferNextIndex < Lexer->InputBufferSize) {
- *Character = Lexer->InputBuffer[Lexer->InputBufferNextIndex];
- Lexer->InputBufferNextIndex += 1;
- goto GetInputCharacterEnd;
- }
- //
- // If there is no file, donezo.
- //
- if (((Shell->Options & SHELL_OPTION_READ_FROM_STDIN) == 0) &&
- (Lexer->InputFile == NULL)) {
- *Character = EOF;
- goto GetInputCharacterEnd;
- }
- if ((Shell->Options & SHELL_OPTION_INPUT_BUFFER_ONLY) != 0) {
- *Character = EOF;
- goto GetInputCharacterEnd;
- }
- //
- // Read from the file, or do fancy line-based input for interactive shells.
- //
- if ((Shell->Options & SHELL_OPTION_RAW_INPUT) != 0) {
- Result = ShReadLine(Shell, &NewInputBuffer, &NewInputBufferSize);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // Change the null terminator into a newline.
- //
- if ((NewInputBufferSize != 0) &&
- (NewInputBuffer[NewInputBufferSize - 1] == '\0')) {
- NewInputBuffer[NewInputBufferSize - 1] = '\n';
- }
- if (Lexer->InputBuffer != NULL) {
- free(Lexer->InputBuffer);
- Lexer->InputBuffer = NewInputBuffer;
- Lexer->InputBufferCapacity = NewInputBufferSize;
- }
- BytesRead = NewInputBufferSize;
- } else {
- if ((Shell->Options & SHELL_OPTION_INTERACTIVE) != 0) {
- BytesToRead = 1;
- } else {
- BytesToRead = Lexer->InputBufferCapacity;
- }
- //
- // Read using a file stream.
- //
- if (Lexer->InputFile != NULL) {
- do {
- BytesRead = fread(Lexer->InputBuffer,
- 1,
- BytesToRead,
- Lexer->InputFile);
- } while ((BytesRead == 0) && (errno == EINTR));
- if (BytesRead <= 0) {
- if (feof(Lexer->InputFile) != 0) {
- *Character = EOF;
- goto GetInputCharacterEnd;
- }
- return FALSE;
- }
- //
- // If reading from standard in, read directly from the descriptor.
- //
- } else {
- assert((Shell->Options & SHELL_OPTION_READ_FROM_STDIN) != 0);
- do {
- BytesRead = read(STDIN_FILENO, Lexer->InputBuffer, BytesToRead);
- } while ((BytesRead < 0) && (errno == EINTR));
- if (BytesRead <= 0) {
- if (BytesRead == 0) {
- *Character = EOF;
- goto GetInputCharacterEnd;
- }
- return FALSE;
- }
- }
- }
- Lexer->InputBufferSize = BytesRead;
- *Character = Lexer->InputBuffer[0];
- Lexer->InputBufferNextIndex = 1;
- GetInputCharacterEnd:
- if ((*Character != 0) &&
- ((Shell->Options & SHELL_OPTION_DISPLAY_INPUT) != 0)) {
- if (*Character == EOF) {
- ShPrintTrace(Shell, "<EOF>");
- } else {
- ShPrintTrace(Shell, "%c", *Character);
- }
- }
- if (*Character == '\n') {
- Lexer->LineNumber += 1;
- }
- return TRUE;
- }
- BOOL
- ShAddCharacterToTokenBuffer (
- PSHELL Shell,
- CHAR Character
- )
- /*++
- Routine Description:
- This routine adds the given character to the token buffer, expanding it if
- necessary.
- Arguments:
- Shell - Supplies a pointer to the shell to operate on.
- Character - Supplies the character to add.
- Return Value:
- TRUE on success.
- FALSE on failure.
- --*/
- {
- PSHELL_LEXER_STATE Lexer;
- UINTN NewCapacity;
- Lexer = &(Shell->Lexer);
- if (Lexer->TokenBufferSize < Lexer->TokenBufferCapacity) {
- Lexer->TokenBuffer[Lexer->TokenBufferSize] = Character;
- Lexer->TokenBufferSize += 1;
- return TRUE;
- }
- //
- // Bummer, the buffer needs to be reallocated.
- //
- NewCapacity = Lexer->TokenBufferCapacity * 2;
- Lexer->TokenBuffer = realloc(Lexer->TokenBuffer, NewCapacity);
- if (Lexer->TokenBuffer == NULL) {
- printf("Error: Failed to allocate %d bytes for expanded token "
- "buffer.\n");
- return FALSE;
- }
- //
- // Now add the byte.
- //
- Lexer->TokenBufferCapacity = NewCapacity;
- Lexer->TokenBuffer[Lexer->TokenBufferSize] = Character;
- Lexer->TokenBufferSize += 1;
- return TRUE;
- }
- BOOL
- ShScanExpansion (
- PSHELL Shell,
- INT Character
- )
- /*++
- Routine Description:
- This routine is called when the lexer finds a dollar sign. It recursively
- scans the inside of an expansion such as `...`, $param, ${...}, $(...), and
- $((...)).
- Arguments:
- Shell - Supplies a pointer to the shell to read from.
- Character - Supplies the initial character that caused entry into this
- function. It is assumed that this character has not yet been added to
- the token buffer.
- Return Value:
- TRUE on success. The extent of the expansion will be added to the token
- buffer.
- FALSE on failure.
- --*/
- {
- BOOL AddCharacter;
- BOOL InComment;
- BOOL InWord;
- CHAR LastCharacter;
- PSHELL_LEXER_STATE Lexer;
- ULONG OpenCount;
- CHAR Quote;
- BOOL Result;
- BOOL Stop;
- EXPANSION_SYNTAX Syntax;
- BOOL WasParentheses;
- InComment = FALSE;
- Lexer = &(Shell->Lexer);
- OpenCount = 0;
- Quote = 0;
- WasParentheses = FALSE;
- //
- // First add the dollar sign or backquote to the token buffer.
- //
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // Figure out what type of expansion this is.
- //
- if (Character == '`') {
- Syntax = ExpansionSyntaxBackquote;
- } else {
- assert(Character == '$');
- //
- // Get the next character to learn more.
- //
- Result = ShGetInputCharacter(Shell, &Character);
- if ((Result == FALSE) || (Character == EOF)) {
- return TRUE;
- }
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // If it was a digit or a special parameter, then that's all there is
- // to it.
- //
- if (((Character >= '0') && (Character <= '9')) ||
- (Character == '@') || (Character == '*') || (Character == '#') ||
- (Character == '?') || (Character == '-') || (Character == '$') ||
- (Character == '!')) {
- return TRUE;
- }
- //
- // Note if it's a single curly.
- //
- if (Character == '{') {
- Syntax = ExpansionSyntaxCurlyBrace;
- //
- // Note if it's a single parentheses. It could also be a double
- // parentheses.
- //
- } else if (Character == '(') {
- Syntax = ExpansionSyntaxParentheses;
- Result = ShGetInputCharacter(Shell, &Character);
- if ((Result == FALSE) || (Character == EOF)) {
- return FALSE;
- }
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- if (Character == '\0') {
- return FALSE;
- } else if (Character == '(') {
- Syntax = ExpansionSyntaxDoubleParentheses;
- } else {
- SHELL_LEXER_UNPUT(Shell, Character);
- assert(Lexer->TokenBufferSize != 0);
- Lexer->TokenBufferSize -= 1;
- }
- //
- // The only other option is it's a raw name.
- //
- } else if (SHELL_NAME_CHARACTER(Character) != FALSE) {
- Syntax = ExpansionSyntaxName;
- //
- // Something funky is following the dollar sign, this isn't really
- // an expansion.
- //
- } else {
- SHELL_LEXER_UNPUT(Shell, Character);
- assert(Lexer->TokenBufferSize != 0);
- Lexer->TokenBufferSize -= 1;
- return TRUE;
- }
- }
- //
- // Loop getting input until this expansion is over.
- //
- Stop = FALSE;
- while (Stop == FALSE) {
- AddCharacter = TRUE;
- Result = ShGetInputCharacter(Shell, &Character);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // If inside a quote of some kind, scan according to those rules.
- // Single quotes are only ended by another single quote. Double quotes
- // are ended by an unescaped double quote.
- //
- if ((Quote != 0) && (Character != '\\')) {
- assert((Quote == '"') || (Quote == '\''));
- //
- // Escape the magic characters when in quotes to make them unmagic.
- //
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- if ((strchr(ShQuoteEscapeCharacters, Character) != NULL) ||
- ((Quote == '\'') &&
- ((Character == '$') || (Character == '`')))) {
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- return FALSE;
- }
- }
- }
- if (Quote == '\'') {
- if (Character == '\'') {
- Quote = 0;
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- Character = SHELL_CONTROL_QUOTE;
- }
- }
- } else if (Quote == '"') {
- if (Character == '"') {
- Quote = 0;
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- Character = SHELL_CONTROL_QUOTE;
- }
- }
- }
- //
- // If inside a comment, wait for a newline. When the newline comes,
- // it's not really handled any differently other than it would break up
- // two parentheses in a row.
- //
- } else if (InComment != FALSE) {
- AddCharacter = FALSE;
- if (Character == '\n') {
- AddCharacter = TRUE;
- WasParentheses = FALSE;
- InComment = FALSE;
- }
- //
- // If it's a backslash, escape the next character, or prepare a line
- // continuation. This logic is entered even if inside a quoted region.
- //
- } else if ((Character == '\\') && (Syntax != ExpansionSyntaxName)) {
- if (Quote == '\'') {
- //
- // In single quotes, the backslash is escaped and literal.
- //
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- return FALSE;
- }
- }
- //
- // Not in single quotes, so look at the next character.
- //
- } else {
- Result = ShGetInputCharacter(Shell, &Character);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // If it's a newline, then it's a line continuation, so just
- // swallow the backslash and newline.
- //
- if (Character == '\n') {
- AddCharacter = FALSE;
- } else {
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- //
- // If inside double quotes and the backslash isn't
- // quoting anything, then add it as a literal.
- //
- if ((Quote == '"') && (Character != '\\') &&
- (Character != '`') && (Character != '$') &&
- (Character != '"')) {
- Result = ShAddCharacterToTokenBuffer(Shell, '\\');
- if (Result == FALSE) {
- return FALSE;
- }
- }
- //
- // Escape the next character, whatever it may be.
- //
- Result = ShAddCharacterToTokenBuffer(
- Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // Pass everything through for non-curly expansion, as it
- // gets reinterpreted inside the subshell.
- //
- } else {
- Result = ShAddCharacterToTokenBuffer(Shell, '\\');
- if (Result == FALSE) {
- return FALSE;
- }
- }
- }
- }
- //
- // Look for the elusive closing sequence.
- //
- } else {
- switch (Syntax) {
- case ExpansionSyntaxName:
- if (SHELL_NAME_CHARACTER(Character) == FALSE) {
- Stop = TRUE;
- AddCharacter = FALSE;
- SHELL_LEXER_UNPUT(Shell, Character);
- }
- break;
- case ExpansionSyntaxBackquote:
- if (Character == '`') {
- Stop = TRUE;
- }
- break;
- case ExpansionSyntaxCurlyBrace:
- case ExpansionSyntaxParentheses:
- if ((Syntax == ExpansionSyntaxParentheses) &&
- (Character == '(')) {
- OpenCount += 1;
- } else if ((Syntax == ExpansionSyntaxParentheses) &&
- (Character == ')')) {
- if (OpenCount != 0) {
- OpenCount -= 1;
- } else {
- Stop = TRUE;
- break;
- }
- //
- // Note that curly braces don't allow recursion or quotes
- // inside the variable name, but they can be in the
- // post-variable-name part (ie ${myvar+"other$var"}).
- //
- } else if ((Syntax == ExpansionSyntaxCurlyBrace) &&
- (Character == '}')) {
- Stop = TRUE;
- break;
- }
- //
- // Watch out for quotes starting.
- //
- if ((Character == '"') || (Character == '\'')) {
- Quote = Character;
- if (Syntax == ExpansionSyntaxCurlyBrace) {
- Character = SHELL_CONTROL_QUOTE;
- }
- //
- // If it's a dollar sign or backquote, recurse into another
- // expansion.
- //
- } else if ((Character == '$') || (Character == '`')) {
- AddCharacter = FALSE;
- Result = ShScanExpansion(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- //
- // Watch out for a comment beginning, but only if it's not
- // already in the middle of a word. Don't do this inside
- // curly brace expansions, as something like ${#a} means
- // "length of a".
- //
- } else if ((Character == '#') &&
- (Syntax != ExpansionSyntaxCurlyBrace)) {
- InWord = FALSE;
- assert(Lexer->TokenBufferSize != 0);
- LastCharacter =
- Lexer->TokenBuffer[Lexer->TokenBufferSize - 1];
- if (SHELL_NAME_CHARACTER(LastCharacter) != FALSE) {
- InWord = TRUE;
- }
- if (InWord == FALSE) {
- InComment = TRUE;
- AddCharacter = FALSE;
- }
- }
- break;
- case ExpansionSyntaxDoubleParentheses:
- if (Character == ')') {
- if (OpenCount != 0) {
- OpenCount -= 1;
- } else {
- if (WasParentheses != FALSE) {
- Stop = TRUE;
- } else {
- WasParentheses = TRUE;
- }
- }
- } else {
- WasParentheses = FALSE;
- if (Character == '(') {
- OpenCount += 1;
- }
- }
- break;
- default:
- assert(FALSE);
- return FALSE;
- }
- }
- if ((Character == '\0') || (Character == EOF)) {
- AddCharacter = FALSE;
- }
- if (AddCharacter != FALSE) {
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- }
- if (Stop != FALSE) {
- break;
- }
- if (Character == '\n') {
- ShPrintPrompt(Shell, 2);
- }
- if ((Character == '\0') || (Character == EOF)) {
- return TRUE;
- }
- }
- return TRUE;
- }
- VOID
- ShCheckForReservedWord (
- PSHELL Shell
- )
- /*++
- Routine Description:
- This routine is called immediately before returning what would otherwise
- be a WORD token from the lexer. It checks against the reserved words of
- the shell language and fixes up the token if it matches.
- Arguments:
- Shell - Supplies a pointer to the shell about to return a WORD.
- Return Value:
- None.
- --*/
- {
- PSTR Word;
- assert(Shell->Lexer.TokenType == TOKEN_WORD);
- Word = Shell->Lexer.TokenBuffer;
- switch (Word[0]) {
- case 'c':
- if (strcmp(Word + 1, "ase") == 0) {
- Shell->Lexer.TokenType = TOKEN_CASE;
- }
- break;
- case 'd':
- if (strcmp(Word + 1, "o") == 0) {
- Shell->Lexer.TokenType = TOKEN_DO;
- } else if (strcmp(Word + 1, "one") == 0) {
- Shell->Lexer.TokenType = TOKEN_DONE;
- }
- break;
- case 'e':
- if (strcmp(Word + 1, "sac") == 0) {
- Shell->Lexer.TokenType = TOKEN_ESAC;
- } else if (strcmp(Word + 1, "lse") == 0) {
- Shell->Lexer.TokenType = TOKEN_ELSE;
- } else if (strcmp(Word + 1, "lif") == 0) {
- Shell->Lexer.TokenType = TOKEN_ELIF;
- }
- break;
- case 'f':
- if (strcmp(Word + 1, "i") == 0) {
- Shell->Lexer.TokenType = TOKEN_FI;
- } else if (strcmp(Word + 1, "or") == 0) {
- Shell->Lexer.TokenType = TOKEN_FOR;
- }
- break;
- case 'i':
- if (strcmp(Word + 1, "f") == 0) {
- Shell->Lexer.TokenType = TOKEN_IF;
- } else if (strcmp(Word + 1, "n") == 0) {
- Shell->Lexer.TokenType = TOKEN_IN;
- }
- break;
- case 't':
- if (strcmp(Word + 1, "hen") == 0) {
- Shell->Lexer.TokenType = TOKEN_THEN;
- }
- break;
- case 'u':
- if (strcmp(Word + 1, "ntil") == 0) {
- Shell->Lexer.TokenType = TOKEN_UNTIL;
- }
- break;
- case 'w':
- if (strcmp(Word + 1, "hile") == 0) {
- Shell->Lexer.TokenType = TOKEN_WHILE;
- }
- break;
- }
- return;
- }
- BOOL
- ShScanPendingHereDocuments (
- PSHELL Shell
- )
- /*++
- Routine Description:
- This routine scans any pending here documents that are starting now.
- Arguments:
- Shell - Supplies a pointer to the shell.
- Return Value:
- TRUE on success or if there are no here documents to scan.
- FALSE on failure.
- --*/
- {
- PSHELL_HERE_DOCUMENT HereDocument;
- BOOL Result;
- while (LIST_EMPTY(&(Shell->Lexer.HereDocumentList)) == FALSE) {
- HereDocument = LIST_VALUE(Shell->Lexer.HereDocumentList.Next,
- SHELL_HERE_DOCUMENT,
- ListEntry);
- Result = ShScanHereDocument(Shell, HereDocument);
- if (Result == FALSE) {
- return FALSE;
- }
- LIST_REMOVE(&(HereDocument->ListEntry));
- HereDocument->ListEntry.Next = NULL;
- }
- return TRUE;
- }
- BOOL
- ShScanHereDocument (
- PSHELL Shell,
- PSHELL_HERE_DOCUMENT HereDocument
- )
- /*++
- Routine Description:
- This routine scans out the contents of a here document from the shell
- input.
- Arguments:
- Shell - Supplies a pointer to the shell.
- HereDocument - Supplies a pointer to the here document to fill out.
- Return Value:
- TRUE on success or if there are no here documents to scan.
- FALSE on failure.
- --*/
- {
- ULONG BeginLineNumber;
- BOOL BeginningOfLine;
- INT Character;
- UINTN EndWordSize;
- PSHELL_LEXER_STATE Lexer;
- PSTR Line;
- UINTN LineBegin;
- UINTN LineSize;
- BOOL Result;
- INT StringDifference;
- BOOL WasBackslash;
- EndWordSize = HereDocument->EndWordSize;
- Lexer = &(Shell->Lexer);
- BeginLineNumber = Lexer->LineNumber;
- LineBegin = 0;
- //
- // This routine borrows the token buffer, so there had better be nothing
- // in it.
- //
- assert(Lexer->TokenBufferSize == 0);
- //
- // If it's going to be expanded, simulate the whole thing being in double
- // quotes so that control characters inside variable expansions get
- // escaped during expansion.
- //
- if (HereDocument->EndWordWasQuoted == FALSE) {
- Result = ShAddCharacterToTokenBuffer(Shell, SHELL_CONTROL_QUOTE);
- if (Result == FALSE) {
- return FALSE;
- }
- LineBegin = 1;
- }
- ShPrintPrompt(Shell, 2);
- WasBackslash = FALSE;
- BeginningOfLine = TRUE;
- while (TRUE) {
- Result = ShGetInputCharacter(Shell, &Character);
- if (Result == FALSE) {
- ShLexerError(Shell,
- "Unterminated here document at line %d.\n",
- BeginLineNumber);
- return FALSE;
- }
- if ((Character == '\n') || (Character == EOF) ||
- (Character == '\0')) {
- //
- // If there was a backslash, remove both the newline and the
- // backslash. Don't do this if the original end word was
- // quoted in any way.
- //
- if ((HereDocument->EndWordWasQuoted == FALSE) &&
- (WasBackslash != FALSE)) {
- assert(Lexer->TokenBufferSize != 0);
- Lexer->TokenBufferSize -= 1;
- WasBackslash = FALSE;
- ShPrintPrompt(Shell, 2);
- if (Character == EOF) {
- break;
- }
- continue;
- }
- //
- // It's not a backslash, this is a complete line. It needs to
- // be checked against the ending line. Null terminate it and
- // compare strings.
- //
- Result = ShAddCharacterToTokenBuffer(Shell, '\0');
- if (Result == FALSE) {
- return FALSE;
- }
- assert(Lexer->TokenBufferSize > LineBegin);
- Line = Lexer->TokenBuffer + LineBegin;
- LineSize = Lexer->TokenBufferSize - LineBegin - 1;
- while ((LineSize != 0) && (Line[LineSize - 1] == '\r')) {
- LineSize -= 1;
- }
- if ((LineSize == 0) || (LineSize != EndWordSize - 1)) {
- StringDifference = 1;
- } else {
- StringDifference = strncmp(Line,
- HereDocument->EndWord,
- LineSize);
- }
- //
- // If the line matched, then throw out this line, as it was the
- // terminating word.
- //
- if (StringDifference == 0) {
- Lexer->TokenBufferSize = LineBegin;
- Result = ShAddCharacterToTokenBuffer(Shell, '\0');
- if (Result == FALSE) {
- return FALSE;
- }
- HereDocument->Document =
- SwStringDuplicate(Lexer->TokenBuffer,
- Lexer->TokenBufferSize);
- if (HereDocument->Document == NULL) {
- return FALSE;
- }
- HereDocument->DocumentSize = Lexer->TokenBufferSize;
- Lexer->TokenBufferSize = 0;
- break;
- //
- // If it didn't match, then remove null terminator and reset the
- // line beginning to be right after the newline.
- //
- } else {
- assert(Lexer->TokenBufferSize != 0);
- LineBegin = Lexer->TokenBufferSize;
- Lexer->TokenBufferSize -= 1;
- }
- ShPrintPrompt(Shell, 2);
- BeginningOfLine = TRUE;
- //
- // If this was not an EOF, null, newline, or tab, then this is
- // not the beginning of the line.
- //
- } else if (Character != '\t') {
- BeginningOfLine = FALSE;
- //
- // Watch out for expansions.
- //
- if (HereDocument->EndWordWasQuoted == FALSE) {
- //
- // Just like in double quotes, some characters need to be
- // escaped if preceded by a backslash.
- //
- if ((Character == '$') || (Character == '`') ||
- (Character == '\\')) {
- if (WasBackslash != FALSE) {
- assert(Lexer->TokenBufferSize != 0);
- Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] =
- SHELL_CONTROL_ESCAPE;
- //
- // For unescaped $ and `, scan through an expansion.
- //
- } else if (Character != '\\') {
- Result = ShScanExpansion(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- continue;
- }
- //
- // Quote the magic characters.
- //
- } else if ((Character == SHELL_CONTROL_QUOTE) ||
- (Character == SHELL_CONTROL_ESCAPE)) {
- Result = ShAddCharacterToTokenBuffer(Shell,
- SHELL_CONTROL_ESCAPE);
- if (Result == FALSE) {
- return FALSE;
- }
- }
- }
- }
- if (Character == '\\') {
- WasBackslash = !WasBackslash;
- } else {
- WasBackslash = FALSE;
- }
- if (Character == EOF) {
- return FALSE;
- }
- //
- // Potentially strip leading tabs from the beginning of every line
- // including the one with the ending word.
- //
- if ((BeginningOfLine != FALSE) &&
- (Character == '\t') && (HereDocument->StripLeadingTabs != FALSE)) {
- continue;
- }
- Result = ShAddCharacterToTokenBuffer(Shell, Character);
- if (Result == FALSE) {
- return FALSE;
- }
- }
- return TRUE;
- }
- VOID
- ShLexerError (
- PSHELL Shell,
- PSTR Format,
- ...
- )
- /*++
- Routine Description:
- This routine prints a shell lexer error to standard error.
- Arguments:
- Shell - Supplies a pointer to the shell.
- Format - Supplies the printf style format string.
- ... - Supplies the remaining arguments to the printf string.
- Return Value:
- TRUE if the string has a quoting character in it.
- FALSE if the string is clean.
- --*/
- {
- va_list ArgumentList;
- PSHELL_LEXER_STATE Lexer;
- Lexer = &(Shell->Lexer);
- fprintf(stderr, "sh: %d: ", Lexer->LineNumber);
- va_start(ArgumentList, Format);
- vfprintf(stderr, Format, ArgumentList);
- va_end(ArgumentList);
- if (Lexer->TokenBufferSize != 0) {
- if (Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] != '\0') {
- if (Lexer->TokenBufferCapacity > Lexer->TokenBufferSize) {
- Lexer->TokenBuffer[Lexer->TokenBufferSize] = '\0';
- } else {
- Lexer->TokenBuffer[Lexer->TokenBufferSize - 1] = '\0';
- }
- }
- fprintf(stderr, ".\nToken: %s.", Shell->Lexer.TokenBuffer);
- }
- return;
- }
|