regexcmp.c 63 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU General Public License
  4. version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details. See the LICENSE file at the root of this
  6. project for complete licensing information.
  7. Module Name:
  8. regexcmp.c
  9. Abstract:
  10. This module implements support for compiling regular expressions.
  11. Author:
  12. Evan Green 8-Jul-2013
  13. Environment:
  14. User Mode C Library
  15. --*/
  16. //
  17. // ------------------------------------------------------------------- Includes
  18. //
  19. #define LIBC_API __DLLEXPORT
  20. #include <minoca/lib/types.h>
  21. #include <assert.h>
  22. #include <ctype.h>
  23. #include <regex.h>
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include "regexp.h"
  28. //
  29. // ---------------------------------------------------------------- Definitions
  30. //
  31. //
  32. // Define the maximum size of a string returned by regerror.
  33. //
  34. #define REGULAR_EXPRESSION_ERROR_STRING_MAX_SIZE 512
  35. //
  36. // Define regular expression tokens.
  37. //
  38. #define TOKEN_ESCAPED_OPEN_PARENTHESES 512
  39. #define TOKEN_ESCAPED_CLOSE_PARENTHESES 513
  40. #define TOKEN_ESCAPED_OPEN_BRACE 514
  41. #define TOKEN_ESCAPED_CLOSE_BRACE 515
  42. #define TOKEN_QUOTED_CHARACTER 516
  43. #define TOKEN_BACK_REFERENCE 517
  44. //
  45. // Define bracket expression tokens.
  46. //
  47. #define TOKEN_OPEN_EQUAL 550
  48. #define TOKEN_EQUAL_CLOSE 551
  49. #define TOKEN_OPEN_DOT 552
  50. #define TOKEN_DOT_CLOSE 553
  51. #define TOKEN_OPEN_COLON 554
  52. #define TOKEN_COLON_CLOSE 555
  53. //
  54. // Define the initial buffer size for regular expression strings.
  55. //
  56. #define REGULAR_EXPRESSION_INITIAL_STRING_SIZE 16
  57. //
  58. // ------------------------------------------------------ Data Type Definitions
  59. //
  60. /*++
  61. Structure Description:
  62. This structure defines the lexer state for the regular expression compiler.
  63. Members:
  64. Input - Stores the input string.
  65. InputSize - Stores the size of the input string in bytes including the
  66. null terminator.
  67. NextInput - Stores the offset of the next character to grab.
  68. Token - Stores the current token.
  69. ActiveSubexpressionCount - Stores the number of subexpressions currently
  70. being parsed (the number of close parentheses to treat as special
  71. characters).
  72. --*/
  73. typedef struct _REGULAR_EXPRESSION_LEXER {
  74. PSTR Input;
  75. ULONG InputSize;
  76. ULONG NextInput;
  77. ULONG Token;
  78. ULONG ActiveSubexpressionCount;
  79. } REGULAR_EXPRESSION_LEXER, *PREGULAR_EXPRESSION_LEXER;
  80. //
  81. // ----------------------------------------------- Internal Function Prototypes
  82. //
  83. REGULAR_EXPRESSION_STATUS
  84. ClpCompileRegularExpression (
  85. PSTR Pattern,
  86. ULONG Flags,
  87. PREGULAR_EXPRESSION *Expression
  88. );
  89. VOID
  90. ClpDestroyRegularExpression (
  91. PREGULAR_EXPRESSION Expression
  92. );
  93. REGULAR_EXPRESSION_STATUS
  94. ClpParseCompleteBasicRegularExpression (
  95. PREGULAR_EXPRESSION_LEXER Lexer,
  96. PREGULAR_EXPRESSION Expression
  97. );
  98. REGULAR_EXPRESSION_STATUS
  99. ClpParseBasicRegularExpression (
  100. PREGULAR_EXPRESSION_LEXER Lexer,
  101. PREGULAR_EXPRESSION Expression,
  102. PREGULAR_EXPRESSION_ENTRY ParentEntry
  103. );
  104. REGULAR_EXPRESSION_STATUS
  105. ClpParseSimpleExpression (
  106. PREGULAR_EXPRESSION_LEXER Lexer,
  107. PREGULAR_EXPRESSION Expression,
  108. PREGULAR_EXPRESSION_ENTRY *Entry
  109. );
  110. REGULAR_EXPRESSION_STATUS
  111. ClpParseExtendedRegularExpression (
  112. PREGULAR_EXPRESSION_LEXER Lexer,
  113. PREGULAR_EXPRESSION Expression,
  114. PREGULAR_EXPRESSION_ENTRY ParentEntry
  115. );
  116. REGULAR_EXPRESSION_STATUS
  117. ClpParseExtendedRegularExpressionBranch (
  118. PREGULAR_EXPRESSION_LEXER Lexer,
  119. PREGULAR_EXPRESSION Expression,
  120. PREGULAR_EXPRESSION_ENTRY ParentEntry
  121. );
  122. REGULAR_EXPRESSION_STATUS
  123. ClpParseExtendedExpression (
  124. PREGULAR_EXPRESSION_LEXER Lexer,
  125. PREGULAR_EXPRESSION Expression,
  126. PREGULAR_EXPRESSION_ENTRY *Entry
  127. );
  128. REGULAR_EXPRESSION_STATUS
  129. ClpParseBracketExpression (
  130. PREGULAR_EXPRESSION_LEXER Lexer,
  131. PREGULAR_EXPRESSION Expression,
  132. PREGULAR_EXPRESSION_ENTRY Entry
  133. );
  134. REGULAR_EXPRESSION_STATUS
  135. ClpParseRegularExpressionDuplicationSymbol (
  136. PREGULAR_EXPRESSION_LEXER Lexer,
  137. PREGULAR_EXPRESSION Expression,
  138. PREGULAR_EXPRESSION_ENTRY Entry
  139. );
  140. REGULAR_EXPRESSION_STATUS
  141. ClpParseRegularExpressionDuplicationCount (
  142. PREGULAR_EXPRESSION_LEXER Lexer,
  143. PREGULAR_EXPRESSION Expression,
  144. PREGULAR_EXPRESSION_ENTRY Entry
  145. );
  146. REGULAR_EXPRESSION_STATUS
  147. ClpGetRegularExpressionToken (
  148. PREGULAR_EXPRESSION_LEXER Lexer,
  149. PREGULAR_EXPRESSION Expression
  150. );
  151. REGULAR_EXPRESSION_STATUS
  152. ClpGetBracketExpressionToken (
  153. PREGULAR_EXPRESSION_LEXER Lexer,
  154. PREGULAR_EXPRESSION Expression
  155. );
  156. PREGULAR_EXPRESSION_ENTRY
  157. ClpCreateRegularExpressionEntry (
  158. REGEX_ENTRY_TYPE Type
  159. );
  160. VOID
  161. ClpDestroyRegularExpressionEntry (
  162. PREGULAR_EXPRESSION_ENTRY Entry
  163. );
  164. BOOL
  165. ClpAppendRegularExpressionString (
  166. PREGULAR_EXPRESSION_STRING String,
  167. PSTR Data,
  168. ULONG Size
  169. );
  170. //
  171. // -------------------------------------------------------------------- Globals
  172. //
  173. //
  174. // ------------------------------------------------------------------ Functions
  175. //
  176. LIBC_API
  177. int
  178. regcomp (
  179. regex_t *RegularExpression,
  180. const char *Pattern,
  181. int Flags
  182. )
  183. /*++
  184. Routine Description:
  185. This routine compiles a regular expression.
  186. Arguments:
  187. RegularExpression - Supplies a pointer to the regular expression structure
  188. where the compiled form will reside on success.
  189. Pattern - Supplies a pointer to the pattern input string.
  190. Flags - Supplies a bitfield of flags governing the behavior of the regular
  191. expression. See some REG_* definitions.
  192. Return Value:
  193. 0 on success.
  194. Returns a REG_* status code on failure.
  195. --*/
  196. {
  197. PREGULAR_EXPRESSION CompiledExpression;
  198. REGULAR_EXPRESSION_STATUS Status;
  199. RegularExpression->re_nsub = 0;
  200. RegularExpression->re_data = NULL;
  201. Status = ClpCompileRegularExpression((PSTR)Pattern,
  202. Flags,
  203. &CompiledExpression);
  204. if (Status != RegexStatusSuccess) {
  205. return Status;
  206. }
  207. RegularExpression->re_nsub = CompiledExpression->SubexpressionCount;
  208. RegularExpression->re_data = CompiledExpression;
  209. return 0;
  210. }
  211. LIBC_API
  212. void
  213. regfree (
  214. regex_t *RegularExpression
  215. )
  216. /*++
  217. Routine Description:
  218. This routine destroys and frees all resources associated with a compiled
  219. regular expression.
  220. Arguments:
  221. RegularExpression - Supplies a pointer to the regular expression structure
  222. to destroy. The caller owns the structure itself, this routine just
  223. guts all the innards.
  224. Return Value:
  225. None.
  226. --*/
  227. {
  228. if (RegularExpression == NULL) {
  229. return;
  230. }
  231. ClpDestroyRegularExpression(RegularExpression->re_data);
  232. RegularExpression->re_data = NULL;
  233. RegularExpression->re_nsub = 0;
  234. return;
  235. }
  236. LIBC_API
  237. size_t
  238. regerror (
  239. int ErrorCode,
  240. const regex_t *Expression,
  241. char *Buffer,
  242. size_t BufferSize
  243. )
  244. /*++
  245. Routine Description:
  246. This routine returns error information about what went wrong trying to
  247. compile the regular expression.
  248. Arguments:
  249. ErrorCode - Supplies the error code returned from a regular expression
  250. token.
  251. Expression - Supplies an optional pointer to the expression.
  252. Buffer - Supplies a pointer to a buffer where the error string will be
  253. returned, always null terminated.
  254. BufferSize - Supplies the size of the buffer in bytes.
  255. Return Value:
  256. Returns the number of bytes needed to hold the entire error string,
  257. including the null terminator. If the return value is greater than the
  258. supplied size, then the buffer will be truncated and null terminated.
  259. --*/
  260. {
  261. int BytesPrinted;
  262. PSTR ErrorString;
  263. switch (ErrorCode) {
  264. case 0:
  265. ErrorString = "No error";
  266. break;
  267. case REG_NOMATCH:
  268. ErrorString = "No match";
  269. break;
  270. case REG_BADPAT:
  271. ErrorString = "Bad pattern";
  272. break;
  273. case REG_ECOLLATE:
  274. ErrorString = "Invalid collating element";
  275. break;
  276. case REG_ECTYPE:
  277. ErrorString = "Invalid character class";
  278. break;
  279. case REG_EESCAPE:
  280. ErrorString = "Dangling escape character";
  281. break;
  282. case REG_ESUBREG:
  283. ErrorString = "Invalid subexpression";
  284. break;
  285. case REG_EBRACK:
  286. ErrorString = "Square bracket imbalance";
  287. break;
  288. case REG_EPAREN:
  289. ErrorString = "Parentheses imbalance";
  290. break;
  291. case REG_BADBR:
  292. ErrorString = "Invalid curly braces";
  293. break;
  294. case REG_ERANGE:
  295. ErrorString = "Invalid range expression";
  296. break;
  297. case REG_ESPACE:
  298. ErrorString = "Out of memory";
  299. break;
  300. case REG_BADRPT:
  301. ErrorString = "Bad repeat expression";
  302. break;
  303. default:
  304. ErrorString = "Unknown error";
  305. break;
  306. }
  307. BytesPrinted = snprintf(Buffer, BufferSize, "%s", ErrorString);
  308. if (BytesPrinted < 0) {
  309. BytesPrinted = REGULAR_EXPRESSION_ERROR_STRING_MAX_SIZE;
  310. if ((Buffer != NULL) && (BufferSize != 0)) {
  311. *Buffer = '\0';
  312. }
  313. } else if (BytesPrinted == BufferSize) {
  314. Buffer[BufferSize - 1] = '\0';
  315. BytesPrinted = REGULAR_EXPRESSION_ERROR_STRING_MAX_SIZE;
  316. } else {
  317. //
  318. // Include the null terminator.
  319. //
  320. BytesPrinted += 1;
  321. }
  322. return BytesPrinted;
  323. }
  324. //
  325. // --------------------------------------------------------- Internal Functions
  326. //
  327. REGULAR_EXPRESSION_STATUS
  328. ClpCompileRegularExpression (
  329. PSTR Pattern,
  330. ULONG Flags,
  331. PREGULAR_EXPRESSION *Expression
  332. )
  333. /*++
  334. Routine Description:
  335. This routine compiles a regular expression.
  336. Arguments:
  337. Pattern - Supplies a pointer to the pattern input string.
  338. Flags - Supplies a bitfield of flags governing the behavior of the regular
  339. expression. See some REG_* definitions.
  340. Expression - Supplies a pointer where a pointer to the regular expression
  341. will be returned on success. The caller is responsible for calling the
  342. appropriate destroy routine for this structure.
  343. Return Value:
  344. Regular expression status code.
  345. --*/
  346. {
  347. REGULAR_EXPRESSION_LEXER Lexer;
  348. PREGULAR_EXPRESSION Result;
  349. REGULAR_EXPRESSION_STATUS Status;
  350. //
  351. // Allocate and initialize the basic structures.
  352. //
  353. Result = malloc(sizeof(REGULAR_EXPRESSION));
  354. if (Result == NULL) {
  355. Status = RegexStatusNoMemory;
  356. goto CompileRegularExpressionEnd;
  357. }
  358. memset(Result, 0, sizeof(REGULAR_EXPRESSION));
  359. Result->BaseEntry.Type = RegexEntrySubexpression;
  360. INITIALIZE_LIST_HEAD(&(Result->BaseEntry.ChildList));
  361. Result->BaseEntry.DuplicateMin = 1;
  362. Result->BaseEntry.DuplicateMax = 1;
  363. Result->Flags = Flags;
  364. memset(&Lexer, 0, sizeof(REGULAR_EXPRESSION_LEXER));
  365. Lexer.Input = Pattern;
  366. Lexer.InputSize = strlen(Pattern) + 1;
  367. //
  368. // Prime the lexer.
  369. //
  370. Status = ClpGetRegularExpressionToken(&Lexer, Result);
  371. if (Status != RegexStatusSuccess) {
  372. goto CompileRegularExpressionEnd;
  373. }
  374. if ((Flags & REG_EXTENDED) != 0) {
  375. Status = ClpParseExtendedRegularExpression(&Lexer,
  376. Result,
  377. &(Result->BaseEntry));
  378. } else {
  379. Status = ClpParseCompleteBasicRegularExpression(&Lexer, Result);
  380. }
  381. if (Status != RegexStatusSuccess) {
  382. goto CompileRegularExpressionEnd;
  383. }
  384. //
  385. // Fail if this isn't the end.
  386. //
  387. if (Lexer.Token != '\0') {
  388. Status = RegexStatusBadPattern;
  389. goto CompileRegularExpressionEnd;
  390. }
  391. CompileRegularExpressionEnd:
  392. if (Status != RegexStatusSuccess) {
  393. if (Result != NULL) {
  394. ClpDestroyRegularExpression(Result);
  395. Result = NULL;
  396. }
  397. }
  398. *Expression = Result;
  399. return Status;
  400. }
  401. VOID
  402. ClpDestroyRegularExpression (
  403. PREGULAR_EXPRESSION Expression
  404. )
  405. /*++
  406. Routine Description:
  407. This routine destroys a regular expression structure.
  408. Arguments:
  409. Expression - Supplies a pointer to the regular expression structure to
  410. destroy.
  411. Return Value:
  412. None.
  413. --*/
  414. {
  415. PREGULAR_EXPRESSION_ENTRY Entry;
  416. if (Expression == NULL) {
  417. return;
  418. }
  419. while (LIST_EMPTY(&(Expression->BaseEntry.ChildList)) == FALSE) {
  420. Entry = LIST_VALUE(Expression->BaseEntry.ChildList.Next,
  421. REGULAR_EXPRESSION_ENTRY,
  422. ListEntry);
  423. LIST_REMOVE(&(Entry->ListEntry));
  424. ClpDestroyRegularExpressionEntry(Entry);
  425. }
  426. free(Expression);
  427. return;
  428. }
  429. REGULAR_EXPRESSION_STATUS
  430. ClpParseCompleteBasicRegularExpression (
  431. PREGULAR_EXPRESSION_LEXER Lexer,
  432. PREGULAR_EXPRESSION Expression
  433. )
  434. /*++
  435. Routine Description:
  436. This routine compiles a basic regular expression.
  437. Arguments:
  438. Lexer - Supplies a pointer to the initialized lexer.
  439. Expression - Supplies the expression to compile on.
  440. Return Value:
  441. Regular expression status code.
  442. --*/
  443. {
  444. ULONG EntryFlags;
  445. REGULAR_EXPRESSION_STATUS Status;
  446. EntryFlags = 0;
  447. //
  448. // Parse an optional left anchor (^).
  449. //
  450. if (Lexer->Token == '^') {
  451. EntryFlags |= REGULAR_EXPRESSION_ANCHORED_LEFT;
  452. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  453. if (Status != RegexStatusSuccess) {
  454. goto ParseBasicRegularExpressionEnd;
  455. }
  456. }
  457. //
  458. // Parse an expression.
  459. //
  460. Status = ClpParseBasicRegularExpression(Lexer,
  461. Expression,
  462. &(Expression->BaseEntry));
  463. if (Status != RegexStatusSuccess) {
  464. goto ParseBasicRegularExpressionEnd;
  465. }
  466. //
  467. // Parse an optional right anchor.
  468. //
  469. if (Lexer->Token == '$') {
  470. EntryFlags |= REGULAR_EXPRESSION_ANCHORED_RIGHT;
  471. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  472. if (Status != RegexStatusSuccess) {
  473. goto ParseBasicRegularExpressionEnd;
  474. }
  475. }
  476. Expression->BaseEntry.Flags = EntryFlags;
  477. ParseBasicRegularExpressionEnd:
  478. return Status;
  479. }
  480. REGULAR_EXPRESSION_STATUS
  481. ClpParseBasicRegularExpression (
  482. PREGULAR_EXPRESSION_LEXER Lexer,
  483. PREGULAR_EXPRESSION Expression,
  484. PREGULAR_EXPRESSION_ENTRY ParentEntry
  485. )
  486. /*++
  487. Routine Description:
  488. This routine parses a de-anchored expression for a basic regular
  489. expression.
  490. Arguments:
  491. Lexer - Supplies a pointer to the initialized lexer.
  492. Expression - Supplies the expression to compile on.
  493. ParentEntry - Supplies a pointer to the regular expression entry this entry
  494. belongs under.
  495. Return Value:
  496. Regular expression status code.
  497. --*/
  498. {
  499. PREGULAR_EXPRESSION_ENTRY Entry;
  500. PREGULAR_EXPRESSION_ENTRY PreviousEntry;
  501. BOOL Result;
  502. REGULAR_EXPRESSION_STATUS Status;
  503. Entry = NULL;
  504. PreviousEntry = NULL;
  505. //
  506. // Loop parsing simple expressions and duplication symbols.
  507. //
  508. while (TRUE) {
  509. Status = ClpParseSimpleExpression(Lexer, Expression, &Entry);
  510. if (Status != RegexStatusSuccess) {
  511. goto ParseRegularExpressionEnd;
  512. }
  513. if (Entry == NULL) {
  514. //
  515. // Don't allow repeat symbols coming up if there was no entry,
  516. // which could happen if the first input is a repeat character.
  517. //
  518. if ((Lexer->Token == '*') ||
  519. (Lexer->Token == TOKEN_ESCAPED_OPEN_BRACE)) {
  520. Status = RegexStatusInvalidRepeat;
  521. }
  522. break;
  523. }
  524. Status = ClpParseRegularExpressionDuplicationSymbol(Lexer,
  525. Expression,
  526. Entry);
  527. if (Status != RegexStatusSuccess) {
  528. goto ParseRegularExpressionEnd;
  529. }
  530. //
  531. // Here's a little optimization. If this entry is just an ordinary
  532. // character and the last one is too, combine them into one entry to
  533. // avoid a gigantic chain of single character expression entries.
  534. // Watch out not to do this if either one had duplicate symbols on it
  535. // (ie "f*a" can't just be combined to "fa").
  536. //
  537. if ((Entry->Type == RegexEntryOrdinaryCharacters) &&
  538. (Entry->DuplicateMin == 1) && (Entry->DuplicateMax == 1) &&
  539. (PreviousEntry != NULL) &&
  540. (PreviousEntry->Type == RegexEntryOrdinaryCharacters) &&
  541. (PreviousEntry->DuplicateMin == 1) &&
  542. (PreviousEntry->DuplicateMax == 1)) {
  543. Result = ClpAppendRegularExpressionString(
  544. &(PreviousEntry->U.String),
  545. Entry->U.String.Data,
  546. Entry->U.String.Size);
  547. if (Result == FALSE) {
  548. Status = RegexStatusNoMemory;
  549. goto ParseRegularExpressionEnd;
  550. }
  551. ClpDestroyRegularExpressionEntry(Entry);
  552. Entry = NULL;
  553. continue;
  554. }
  555. //
  556. // Add this expression entry to the parent.
  557. //
  558. Entry->Parent = ParentEntry;
  559. INSERT_BEFORE(&(Entry->ListEntry), &(ParentEntry->ChildList));
  560. PreviousEntry = Entry;
  561. Entry = NULL;
  562. }
  563. ParseRegularExpressionEnd:
  564. if (Entry != NULL) {
  565. ClpDestroyRegularExpressionEntry(Entry);
  566. }
  567. return Status;
  568. }
  569. REGULAR_EXPRESSION_STATUS
  570. ClpParseSimpleExpression (
  571. PREGULAR_EXPRESSION_LEXER Lexer,
  572. PREGULAR_EXPRESSION Expression,
  573. PREGULAR_EXPRESSION_ENTRY *Entry
  574. )
  575. /*++
  576. Routine Description:
  577. This routine parses a simple expression for a basic regular expression.
  578. Arguments:
  579. Lexer - Supplies a pointer to the initialized lexer.
  580. Expression - Supplies the expression to compile on.
  581. Entry - Supplies a pointer where the regular expression entry will be
  582. returned on success.
  583. Return Value:
  584. Regular expression status code.
  585. --*/
  586. {
  587. CHAR Character;
  588. PREGULAR_EXPRESSION_ENTRY NewEntry;
  589. BOOL Result;
  590. REGULAR_EXPRESSION_STATUS Status;
  591. Status = RegexStatusSuccess;
  592. NewEntry = ClpCreateRegularExpressionEntry(RegexEntryInvalid);
  593. if (NewEntry == NULL) {
  594. Status = RegexStatusNoMemory;
  595. goto ParseSimpleExpressionEnd;
  596. }
  597. switch (Lexer->Token) {
  598. case '.':
  599. NewEntry->Type = RegexEntryAnyCharacter;
  600. break;
  601. case '[':
  602. Status = ClpParseBracketExpression(Lexer, Expression, NewEntry);
  603. if (Status != RegexStatusSuccess) {
  604. goto ParseSimpleExpressionEnd;
  605. }
  606. break;
  607. //
  608. // Parse a subexpression.
  609. //
  610. case TOKEN_ESCAPED_OPEN_PARENTHESES:
  611. NewEntry->Type = RegexEntrySubexpression;
  612. //
  613. // Zoom past the open parentheses.
  614. //
  615. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  616. if (Status != RegexStatusSuccess) {
  617. goto ParseSimpleExpressionEnd;
  618. }
  619. //
  620. // Take a subexpression number and parse a subexpression.
  621. //
  622. Expression->SubexpressionCount += 1;
  623. NewEntry->U.SubexpressionNumber = Expression->SubexpressionCount;
  624. Status = ClpParseBasicRegularExpression(Lexer,
  625. Expression,
  626. NewEntry);
  627. if (Status != RegexStatusSuccess) {
  628. goto ParseSimpleExpressionEnd;
  629. }
  630. //
  631. // Get the close parentheses and swallow it.
  632. //
  633. if (Lexer->Token != TOKEN_ESCAPED_CLOSE_PARENTHESES) {
  634. Status = RegexStatusParenthesesImbalance;
  635. goto ParseSimpleExpressionEnd;
  636. }
  637. break;
  638. //
  639. // Parse a quoted character like a normal character.
  640. //
  641. case TOKEN_QUOTED_CHARACTER:
  642. NewEntry->Type = RegexEntryOrdinaryCharacters;
  643. assert(Lexer->NextInput != 0);
  644. Character = Lexer->Input[Lexer->NextInput - 1];
  645. Result = ClpAppendRegularExpressionString(&(NewEntry->U.String),
  646. &Character,
  647. 1);
  648. if (Result == FALSE) {
  649. goto ParseSimpleExpressionEnd;
  650. }
  651. break;
  652. //
  653. // Parse a back reference.
  654. //
  655. case TOKEN_BACK_REFERENCE:
  656. NewEntry->Type = RegexEntryBackReference;
  657. assert(Lexer->NextInput != 0);
  658. Character = Lexer->Input[Lexer->NextInput - 1];
  659. //
  660. // The lexer shouldn't be returning the back reference token unless
  661. // it's valid.
  662. //
  663. assert((Character >= '1') && (Character <= '9'));
  664. NewEntry->U.BackReferenceNumber = Character - '0';
  665. if (NewEntry->U.BackReferenceNumber > Expression->SubexpressionCount) {
  666. Status = RegexStatusInvalidSubexpression;
  667. goto ParseSimpleExpressionEnd;
  668. }
  669. break;
  670. //
  671. // Some items are not simple expression entries by themselves. As an oddity,
  672. // one of these characters at the beginning of a basic regular expression
  673. // is considered a normal character.
  674. //
  675. case '*':
  676. case TOKEN_ESCAPED_CLOSE_PARENTHESES:
  677. case TOKEN_ESCAPED_OPEN_BRACE:
  678. case TOKEN_ESCAPED_CLOSE_BRACE:
  679. case '\0':
  680. if ((Lexer->Token == '\0') || (Lexer->NextInput != 1)) {
  681. ClpDestroyRegularExpressionEntry(NewEntry);
  682. NewEntry = NULL;
  683. goto ParseSimpleExpressionEnd;
  684. }
  685. //
  686. // Fall through.
  687. //
  688. //
  689. // This must be an ordinary character.
  690. //
  691. default:
  692. assert(Lexer->Token < MAX_UCHAR);
  693. NewEntry->Type = RegexEntryOrdinaryCharacters;
  694. Character = Lexer->Token;
  695. //
  696. // Watch out for a dollar sign at the end, which is actually an anchor.
  697. //
  698. if ((Character == '$') &&
  699. ((Lexer->NextInput >= Lexer->InputSize) ||
  700. (Lexer->Input[Lexer->NextInput] == '\0'))) {
  701. ClpDestroyRegularExpressionEntry(NewEntry);
  702. NewEntry = NULL;
  703. goto ParseSimpleExpressionEnd;
  704. }
  705. Result = ClpAppendRegularExpressionString(&(NewEntry->U.String),
  706. &Character,
  707. 1);
  708. if (Result == FALSE) {
  709. goto ParseSimpleExpressionEnd;
  710. }
  711. break;
  712. }
  713. //
  714. // Swallow the token that has just been dealt with.
  715. //
  716. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  717. if (Status != RegexStatusSuccess) {
  718. goto ParseSimpleExpressionEnd;
  719. }
  720. ParseSimpleExpressionEnd:
  721. if (Status != RegexStatusSuccess) {
  722. if (NewEntry != NULL) {
  723. ClpDestroyRegularExpressionEntry(NewEntry);
  724. NewEntry = NULL;
  725. }
  726. }
  727. *Entry = NewEntry;
  728. return Status;
  729. }
  730. REGULAR_EXPRESSION_STATUS
  731. ClpParseExtendedRegularExpression (
  732. PREGULAR_EXPRESSION_LEXER Lexer,
  733. PREGULAR_EXPRESSION Expression,
  734. PREGULAR_EXPRESSION_ENTRY ParentEntry
  735. )
  736. /*++
  737. Routine Description:
  738. This routine parses an extended regular expression.
  739. Arguments:
  740. Lexer - Supplies a pointer to the initialized lexer.
  741. Expression - Supplies the expression to compile on.
  742. ParentEntry - Supplies a pointer to the parent any new expression entries
  743. should be placed under.
  744. Return Value:
  745. Regular expression status code.
  746. --*/
  747. {
  748. PREGULAR_EXPRESSION_ENTRY Branch;
  749. PREGULAR_EXPRESSION_ENTRY Child;
  750. PREGULAR_EXPRESSION_ENTRY Entry;
  751. REGULAR_EXPRESSION_STATUS Status;
  752. Entry = NULL;
  753. //
  754. // Create the branch umbrella.
  755. //
  756. Branch = ClpCreateRegularExpressionEntry(RegexEntryBranch);
  757. if (Branch == NULL) {
  758. Status = RegexStatusNoMemory;
  759. goto ParseExtendedRegularExpressionEnd;
  760. }
  761. //
  762. // Loop creating the branch options.
  763. //
  764. while (TRUE) {
  765. //
  766. // Create a branch entry to contain the upcoming expression.
  767. //
  768. Entry = ClpCreateRegularExpressionEntry(RegexEntryBranchOption);
  769. if (Entry == NULL) {
  770. Status = RegexStatusNoMemory;
  771. goto ParseExtendedRegularExpressionEnd;
  772. }
  773. //
  774. // Parse out the contents of the branch.
  775. //
  776. Status = ClpParseExtendedRegularExpressionBranch(Lexer,
  777. Expression,
  778. Entry);
  779. if (Status != RegexStatusSuccess) {
  780. goto ParseExtendedRegularExpressionEnd;
  781. }
  782. //
  783. // Add this branch to the parent.
  784. //
  785. Entry->Parent = Branch;
  786. INSERT_BEFORE(&(Entry->ListEntry), &(Branch->ChildList));
  787. Entry = NULL;
  788. //
  789. // Stop if there's not more.
  790. //
  791. if (Lexer->Token != '|') {
  792. break;
  793. }
  794. //
  795. // Get past that pipe and go around again.
  796. //
  797. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  798. if (Status != RegexStatusSuccess) {
  799. goto ParseExtendedRegularExpressionEnd;
  800. }
  801. }
  802. //
  803. // If there's only one branch option, pull all the children off of the
  804. // branch option and stick them on the parent. Then the branch and branch
  805. // option entries can be destroyed.
  806. //
  807. assert(LIST_EMPTY(&(Branch->ChildList)) == FALSE);
  808. if (Branch->ChildList.Next->Next == &(Branch->ChildList)) {
  809. Entry = LIST_VALUE(Branch->ChildList.Next,
  810. REGULAR_EXPRESSION_ENTRY,
  811. ListEntry);
  812. while (LIST_EMPTY(&(Entry->ChildList)) == FALSE) {
  813. Child = LIST_VALUE(Entry->ChildList.Next,
  814. REGULAR_EXPRESSION_ENTRY,
  815. ListEntry);
  816. LIST_REMOVE(&(Child->ListEntry));
  817. INSERT_BEFORE(&(Child->ListEntry), &(ParentEntry->ChildList));
  818. Child->Parent = ParentEntry;
  819. }
  820. ClpDestroyRegularExpressionEntry(Branch);
  821. Entry = NULL;
  822. Branch = NULL;
  823. //
  824. // There are multiple branch options, so put the branch entry on the parent.
  825. //
  826. } else {
  827. Branch->Parent = ParentEntry;
  828. INSERT_BEFORE(&(Branch->ListEntry), &(ParentEntry->ChildList));
  829. }
  830. ParseExtendedRegularExpressionEnd:
  831. if (Status != RegexStatusSuccess) {
  832. if (Branch != NULL) {
  833. ClpDestroyRegularExpressionEntry(Branch);
  834. }
  835. }
  836. if (Entry != NULL) {
  837. ClpDestroyRegularExpressionEntry(Entry);
  838. }
  839. return Status;
  840. }
  841. REGULAR_EXPRESSION_STATUS
  842. ClpParseExtendedRegularExpressionBranch (
  843. PREGULAR_EXPRESSION_LEXER Lexer,
  844. PREGULAR_EXPRESSION Expression,
  845. PREGULAR_EXPRESSION_ENTRY ParentEntry
  846. )
  847. /*++
  848. Routine Description:
  849. This routine parses a single extended regular expression branch (ie the
  850. expression "a|b|c" has three branches, this routine parses just one
  851. of them).
  852. Arguments:
  853. Lexer - Supplies a pointer to the initialized lexer.
  854. Expression - Supplies the expression to compile on.
  855. ParentEntry - Supplies a pointer to the parent any new expression entries
  856. should be placed under.
  857. Return Value:
  858. Regular expression status code.
  859. --*/
  860. {
  861. PREGULAR_EXPRESSION_ENTRY Entry;
  862. PREGULAR_EXPRESSION_ENTRY PreviousEntry;
  863. BOOL Result;
  864. REGULAR_EXPRESSION_STATUS Status;
  865. Entry = NULL;
  866. PreviousEntry = NULL;
  867. //
  868. // Loop parsing simple expressions and duplication symbols.
  869. //
  870. while (TRUE) {
  871. Status = ClpParseExtendedExpression(Lexer, Expression, &Entry);
  872. if (Status != RegexStatusSuccess) {
  873. goto ParseRegularExpressionEnd;
  874. }
  875. if (Entry == NULL) {
  876. //
  877. // Don't allow repeat symbols coming up if there was no entry,
  878. // which could happen if the first input is a repeat character.
  879. //
  880. if ((Lexer->Token == '*') || (Lexer->Token == '+') ||
  881. (Lexer->Token == '?') || (Lexer->Token == '{')) {
  882. Status = RegexStatusInvalidRepeat;
  883. }
  884. break;
  885. }
  886. Status = ClpParseRegularExpressionDuplicationSymbol(Lexer,
  887. Expression,
  888. Entry);
  889. if (Status != RegexStatusSuccess) {
  890. goto ParseRegularExpressionEnd;
  891. }
  892. //
  893. // Here's a little optimization. If this entry is just an ordinary
  894. // character and the last one is too, combine them into one entry to
  895. // avoid a gigantic chain of single character expression entries.
  896. // Watch out not to do this if either one had duplicate symbols on it
  897. // (ie "f*a" can't just be combined to "fa").
  898. //
  899. if ((Entry->Type == RegexEntryOrdinaryCharacters) &&
  900. (Entry->DuplicateMin == 1) && (Entry->DuplicateMax == 1) &&
  901. (PreviousEntry != NULL) &&
  902. (PreviousEntry->Type == RegexEntryOrdinaryCharacters) &&
  903. (PreviousEntry->DuplicateMin == 1) &&
  904. (PreviousEntry->DuplicateMax == 1)) {
  905. Result = ClpAppendRegularExpressionString(
  906. &(PreviousEntry->U.String),
  907. Entry->U.String.Data,
  908. Entry->U.String.Size);
  909. if (Result == FALSE) {
  910. Status = RegexStatusNoMemory;
  911. goto ParseRegularExpressionEnd;
  912. }
  913. ClpDestroyRegularExpressionEntry(Entry);
  914. Entry = NULL;
  915. continue;
  916. }
  917. //
  918. // Add this expression entry to the parent.
  919. //
  920. Entry->Parent = ParentEntry;
  921. INSERT_BEFORE(&(Entry->ListEntry), &(ParentEntry->ChildList));
  922. PreviousEntry = Entry;
  923. Entry = NULL;
  924. }
  925. ParseRegularExpressionEnd:
  926. if (Entry != NULL) {
  927. ClpDestroyRegularExpressionEntry(Entry);
  928. }
  929. return Status;
  930. }
  931. REGULAR_EXPRESSION_STATUS
  932. ClpParseExtendedExpression (
  933. PREGULAR_EXPRESSION_LEXER Lexer,
  934. PREGULAR_EXPRESSION Expression,
  935. PREGULAR_EXPRESSION_ENTRY *Entry
  936. )
  937. /*++
  938. Routine Description:
  939. This routine parses a base expression ("ERE_expression" as described in the
  940. specs) for an extended regular expression.
  941. Arguments:
  942. Lexer - Supplies a pointer to the initialized lexer.
  943. Expression - Supplies the expression to compile on.
  944. Entry - Supplies a pointer where the regular expression entry will be
  945. returned on success.
  946. Return Value:
  947. Regular expression status code.
  948. --*/
  949. {
  950. CHAR Character;
  951. PREGULAR_EXPRESSION_ENTRY NewEntry;
  952. BOOL Result;
  953. REGULAR_EXPRESSION_STATUS Status;
  954. Status = RegexStatusSuccess;
  955. NewEntry = ClpCreateRegularExpressionEntry(RegexEntryInvalid);
  956. if (NewEntry == NULL) {
  957. Status = RegexStatusNoMemory;
  958. goto ParseExtendedExpressionEnd;
  959. }
  960. switch (Lexer->Token) {
  961. case '.':
  962. NewEntry->Type = RegexEntryAnyCharacter;
  963. break;
  964. case '^':
  965. NewEntry->Type = RegexEntryStringBegin;
  966. break;
  967. case '$':
  968. NewEntry->Type = RegexEntryStringEnd;
  969. break;
  970. case '[':
  971. Status = ClpParseBracketExpression(Lexer, Expression, NewEntry);
  972. if (Status != RegexStatusSuccess) {
  973. goto ParseExtendedExpressionEnd;
  974. }
  975. break;
  976. //
  977. // Parse a subexpression.
  978. //
  979. case '(':
  980. NewEntry->Type = RegexEntrySubexpression;
  981. //
  982. // Zoom past the open parentheses.
  983. //
  984. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  985. if (Status != RegexStatusSuccess) {
  986. goto ParseExtendedExpressionEnd;
  987. }
  988. //
  989. // Take a subexpression number and parse the subexpression.
  990. //
  991. Lexer->ActiveSubexpressionCount += 1;
  992. Expression->SubexpressionCount += 1;
  993. NewEntry->U.SubexpressionNumber = Expression->SubexpressionCount;
  994. Status = ClpParseExtendedRegularExpression(Lexer,
  995. Expression,
  996. NewEntry);
  997. if (Status != RegexStatusSuccess) {
  998. goto ParseExtendedExpressionEnd;
  999. }
  1000. //
  1001. // Verify the close parentheses.
  1002. //
  1003. if (Lexer->Token != ')') {
  1004. Status = RegexStatusParenthesesImbalance;
  1005. goto ParseExtendedExpressionEnd;
  1006. }
  1007. Lexer->ActiveSubexpressionCount -= 1;
  1008. break;
  1009. //
  1010. // Parse a quoted character like a normal character.
  1011. //
  1012. case TOKEN_QUOTED_CHARACTER:
  1013. NewEntry->Type = RegexEntryOrdinaryCharacters;
  1014. assert(Lexer->NextInput != 0);
  1015. Character = Lexer->Input[Lexer->NextInput - 1];
  1016. Result = ClpAppendRegularExpressionString(&(NewEntry->U.String),
  1017. &Character,
  1018. 1);
  1019. if (Result == FALSE) {
  1020. goto ParseExtendedExpressionEnd;
  1021. }
  1022. break;
  1023. //
  1024. // Parse a back reference.
  1025. //
  1026. case TOKEN_BACK_REFERENCE:
  1027. NewEntry->Type = RegexEntryBackReference;
  1028. assert(Lexer->NextInput != 0);
  1029. Character = Lexer->Input[Lexer->NextInput - 1];
  1030. //
  1031. // The lexer shouldn't be returning the back reference token unless
  1032. // it's valid.
  1033. //
  1034. assert((Character >= '0') && (Character <= '9'));
  1035. NewEntry->U.BackReferenceNumber = Character - '0';
  1036. if (NewEntry->U.BackReferenceNumber > Expression->SubexpressionCount) {
  1037. Status = RegexStatusInvalidSubexpression;
  1038. goto ParseExtendedExpressionEnd;
  1039. }
  1040. break;
  1041. //
  1042. // Some items are not simple expression entries by themselves.
  1043. //
  1044. case '*':
  1045. case '+':
  1046. case '?':
  1047. case '{':
  1048. case '|':
  1049. case '\0':
  1050. ClpDestroyRegularExpressionEntry(NewEntry);
  1051. NewEntry = NULL;
  1052. goto ParseExtendedExpressionEnd;
  1053. //
  1054. // This must be an ordinary character.
  1055. //
  1056. default:
  1057. assert(Lexer->Token < MAX_UCHAR);
  1058. NewEntry->Type = RegexEntryOrdinaryCharacters;
  1059. Character = Lexer->Token;
  1060. //
  1061. // Watch out for a close parentheses if there are active open ones.
  1062. //
  1063. if ((Character == ')') && (Lexer->ActiveSubexpressionCount != 0)) {
  1064. ClpDestroyRegularExpressionEntry(NewEntry);
  1065. NewEntry = NULL;
  1066. goto ParseExtendedExpressionEnd;
  1067. }
  1068. Result = ClpAppendRegularExpressionString(&(NewEntry->U.String),
  1069. &Character,
  1070. 1);
  1071. if (Result == FALSE) {
  1072. goto ParseExtendedExpressionEnd;
  1073. }
  1074. break;
  1075. }
  1076. //
  1077. // Swallow the token that has just been dealt with.
  1078. //
  1079. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  1080. if (Status != RegexStatusSuccess) {
  1081. goto ParseExtendedExpressionEnd;
  1082. }
  1083. ParseExtendedExpressionEnd:
  1084. if (Status != RegexStatusSuccess) {
  1085. if (NewEntry != NULL) {
  1086. ClpDestroyRegularExpressionEntry(NewEntry);
  1087. NewEntry = NULL;
  1088. }
  1089. }
  1090. *Entry = NewEntry;
  1091. return Status;
  1092. }
  1093. REGULAR_EXPRESSION_STATUS
  1094. ClpParseBracketExpression (
  1095. PREGULAR_EXPRESSION_LEXER Lexer,
  1096. PREGULAR_EXPRESSION Expression,
  1097. PREGULAR_EXPRESSION_ENTRY Entry
  1098. )
  1099. /*++
  1100. Routine Description:
  1101. This routine parses a bracket expression, which expresses a set of
  1102. characters or collating elements that satisfy the expression.
  1103. Arguments:
  1104. Lexer - Supplies a pointer to the initialized lexer.
  1105. Expression - Supplies the expression to compile on.
  1106. Entry - Supplies a pointer to the regular expression bracket entry.
  1107. Return Value:
  1108. Regular expression status code.
  1109. --*/
  1110. {
  1111. PREGULAR_BRACKET_ENTRY BracketEntry;
  1112. BRACKET_EXPRESSION_TYPE BracketType;
  1113. CHAR PreviousCharacter;
  1114. PREGULAR_EXPRESSION_STRING RegularCharacters;
  1115. BOOL Result;
  1116. REGULAR_EXPRESSION_STATUS Status;
  1117. BOOL Stop;
  1118. PSTR String;
  1119. assert(Lexer->Token == '[');
  1120. assert(Lexer->NextInput != 0);
  1121. //
  1122. // See if this is a start-of-word or end-of-word, and not actually a
  1123. // bracket expression.
  1124. //
  1125. String = Lexer->Input + Lexer->NextInput - 1;
  1126. if (strncmp(String, "[[:<:]]", 7) == 0) {
  1127. Entry->Type = RegexEntryStartOfWord;
  1128. Lexer->Input += 6;
  1129. Status = RegexStatusSuccess;
  1130. goto ParseBracketExpressionEnd;
  1131. } else if (strncmp(String, "[[:>:]]", 7) == 0) {
  1132. Entry->Type = RegexEntryEndOfWord;
  1133. Lexer->Input += 6;
  1134. Status = RegexStatusSuccess;
  1135. goto ParseBracketExpressionEnd;
  1136. }
  1137. Entry->Type = RegexEntryBracketExpression;
  1138. INITIALIZE_LIST_HEAD(&(Entry->U.BracketExpression.EntryList));
  1139. //
  1140. // Swallow the open bracket.
  1141. //
  1142. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1143. if (Status != RegexStatusSuccess) {
  1144. goto ParseBracketExpressionEnd;
  1145. }
  1146. RegularCharacters = &(Entry->U.BracketExpression.RegularCharacters);
  1147. //
  1148. // A circumflex negates the whole expression (ie matches on characters
  1149. // *not* in this set.
  1150. //
  1151. if (Lexer->Token == '^') {
  1152. Entry->Flags |= REGULAR_EXPRESSION_NEGATED;
  1153. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1154. if (Status != RegexStatusSuccess) {
  1155. goto ParseBracketExpressionEnd;
  1156. }
  1157. }
  1158. //
  1159. // A closing bracket or minus here is treated as an ordinary character.
  1160. //
  1161. if ((Lexer->Token == ']') || (Lexer->Token == '-')) {
  1162. Result = ClpAppendRegularExpressionString(RegularCharacters,
  1163. (PSTR)&(Lexer->Token),
  1164. 1);
  1165. if (Result == FALSE) {
  1166. Status = RegexStatusNoMemory;
  1167. goto ParseBracketExpressionEnd;
  1168. }
  1169. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1170. if (Status != RegexStatusSuccess) {
  1171. goto ParseBracketExpressionEnd;
  1172. }
  1173. }
  1174. //
  1175. // Loop adding characters to this bracket expression.
  1176. //
  1177. PreviousCharacter = 0;
  1178. Stop = FALSE;
  1179. while (TRUE) {
  1180. switch (Lexer->Token) {
  1181. case TOKEN_OPEN_COLON:
  1182. String = Lexer->Input + Lexer->NextInput;
  1183. BracketType = BracketExpressionInvalid;
  1184. if (strncmp(String, "alnum", 5) == 0) {
  1185. BracketType = BracketExpressionCharacterClassAlphanumeric;
  1186. Lexer->NextInput += 5;
  1187. } else if (strncmp(String, "alpha", 5) == 0) {
  1188. BracketType = BracketExpressionCharacterClassAlphabetic;
  1189. Lexer->NextInput += 5;
  1190. } else if (strncmp(String, "blank", 5) == 0) {
  1191. BracketType = BracketExpressionCharacterClassBlank;
  1192. Lexer->NextInput += 5;
  1193. } else if (strncmp(String, "cntrl", 5) == 0) {
  1194. BracketType = BracketExpressionCharacterClassControl;
  1195. Lexer->NextInput += 5;
  1196. } else if (strncmp(String, "digit", 5) == 0) {
  1197. BracketType = BracketExpressionCharacterClassDigit;
  1198. Lexer->NextInput += 5;
  1199. } else if (strncmp(String, "graph", 5) == 0) {
  1200. BracketType = BracketExpressionCharacterClassGraph;
  1201. Lexer->NextInput += 5;
  1202. } else if (strncmp(String, "lower", 5) == 0) {
  1203. BracketType = BracketExpressionCharacterClassLowercase;
  1204. Lexer->NextInput += 5;
  1205. } else if (strncmp(String, "print", 5) == 0) {
  1206. BracketType = BracketExpressionCharacterClassPrintable;
  1207. Lexer->NextInput += 5;
  1208. } else if (strncmp(String, "punct", 5) == 0) {
  1209. BracketType = BracketExpressionCharacterClassPunctuation;
  1210. Lexer->NextInput += 5;
  1211. } else if (strncmp(String, "space", 5) == 0) {
  1212. BracketType = BracketExpressionCharacterClassSpace;
  1213. Lexer->NextInput += 5;
  1214. } else if (strncmp(String, "upper", 5) == 0) {
  1215. BracketType = BracketExpressionCharacterClassUppercase;
  1216. Lexer->NextInput += 5;
  1217. } else if (strncmp(String, "xdigit", 6) == 0) {
  1218. BracketType = BracketExpressionCharacterClassHexDigit;
  1219. Lexer->NextInput += 6;
  1220. } else if (strncmp(String, "name", 4) == 0) {
  1221. BracketType = BracketExpressionCharacterClassName;
  1222. Lexer->NextInput += 5;
  1223. }
  1224. if (BracketType == BracketExpressionInvalid) {
  1225. Status = RegexStatusBadCharacterClass;
  1226. goto ParseBracketExpressionEnd;
  1227. }
  1228. BracketEntry = malloc(sizeof(REGULAR_BRACKET_ENTRY));
  1229. if (BracketEntry == NULL) {
  1230. Status = RegexStatusNoMemory;
  1231. goto ParseBracketExpressionEnd;
  1232. }
  1233. memset(BracketEntry, 0, sizeof(REGULAR_BRACKET_ENTRY));
  1234. BracketEntry->Type = BracketType;
  1235. INSERT_BEFORE(&(BracketEntry->ListEntry),
  1236. &(Entry->U.BracketExpression.EntryList));
  1237. //
  1238. // Swallow up the colon close.
  1239. //
  1240. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1241. if (Status != RegexStatusSuccess) {
  1242. goto ParseBracketExpressionEnd;
  1243. }
  1244. if (Lexer->Token != ':') {
  1245. Status = RegexStatusBadPattern;
  1246. goto ParseBracketExpressionEnd;
  1247. }
  1248. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1249. if (Status != RegexStatusSuccess) {
  1250. goto ParseBracketExpressionEnd;
  1251. }
  1252. if (Lexer->Token != ']') {
  1253. Status = RegexStatusBadPattern;
  1254. goto ParseBracketExpressionEnd;
  1255. }
  1256. break;
  1257. case TOKEN_OPEN_DOT:
  1258. printf("regex: Collating element support not implemented!\n");
  1259. //
  1260. // Spin until the dot close.
  1261. //
  1262. while ((PreviousCharacter != '.') || (Lexer->Token != ']')) {
  1263. if (Lexer->Token == '\0') {
  1264. Status = RegexStatusBracketImbalance;
  1265. goto ParseBracketExpressionEnd;
  1266. }
  1267. PreviousCharacter = (UCHAR)Lexer->Token;
  1268. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1269. if (Status != RegexStatusSuccess) {
  1270. goto ParseBracketExpressionEnd;
  1271. }
  1272. }
  1273. break;
  1274. case TOKEN_OPEN_EQUAL:
  1275. printf("regex: Equivalence class support not implemented!\n");
  1276. //
  1277. // Spin until the equal close.
  1278. //
  1279. while ((PreviousCharacter != '=') || (Lexer->Token != ']')) {
  1280. if (Lexer->Token == '\0') {
  1281. Status = RegexStatusBracketImbalance;
  1282. goto ParseBracketExpressionEnd;
  1283. }
  1284. PreviousCharacter = (UCHAR)Lexer->Token;
  1285. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1286. if (Status != RegexStatusSuccess) {
  1287. goto ParseBracketExpressionEnd;
  1288. }
  1289. }
  1290. break;
  1291. case ']':
  1292. Stop = TRUE;
  1293. break;
  1294. case '\0':
  1295. Status = RegexStatusBracketImbalance;
  1296. goto ParseBracketExpressionEnd;
  1297. default:
  1298. if (Lexer->Token > MAX_UCHAR) {
  1299. Status = RegexStatusBadPattern;
  1300. goto ParseBracketExpressionEnd;
  1301. }
  1302. //
  1303. // If the previous character was a -, this is actually a range.
  1304. // Pull the dash and first character off of the regular characters
  1305. // list, and create a range.
  1306. //
  1307. if (PreviousCharacter == '-') {
  1308. BracketEntry = malloc(sizeof(REGULAR_BRACKET_ENTRY));
  1309. if (BracketEntry == NULL) {
  1310. Status = RegexStatusNoMemory;
  1311. goto ParseBracketExpressionEnd;
  1312. }
  1313. memset(BracketEntry, 0, sizeof(REGULAR_BRACKET_ENTRY));
  1314. BracketEntry->Type = BracketExpressionRange;
  1315. assert(RegularCharacters->Size >= 2);
  1316. BracketEntry->U.Range.Minimum =
  1317. RegularCharacters->Data[RegularCharacters->Size - 2];
  1318. RegularCharacters->Size -= 2;
  1319. BracketEntry->U.Range.Maximum = Lexer->Token;
  1320. INSERT_BEFORE(&(BracketEntry->ListEntry),
  1321. &(Entry->U.BracketExpression.EntryList));
  1322. //
  1323. // This is a regular character and not part of a range (or at least
  1324. // the beginning character of the range. Add it to the regular
  1325. // character string.
  1326. //
  1327. } else {
  1328. Result = ClpAppendRegularExpressionString(RegularCharacters,
  1329. (PSTR)&(Lexer->Token),
  1330. 1);
  1331. if (Result == FALSE) {
  1332. Status = RegexStatusNoMemory;
  1333. goto ParseBracketExpressionEnd;
  1334. }
  1335. }
  1336. break;
  1337. }
  1338. if (Stop != FALSE) {
  1339. break;
  1340. }
  1341. if (Lexer->Token < MAX_UCHAR) {
  1342. PreviousCharacter = (CHAR)Lexer->Token;
  1343. }
  1344. Status = ClpGetBracketExpressionToken(Lexer, Expression);
  1345. if (Status != RegexStatusSuccess) {
  1346. goto ParseBracketExpressionEnd;
  1347. }
  1348. }
  1349. ParseBracketExpressionEnd:
  1350. return Status;
  1351. }
  1352. REGULAR_EXPRESSION_STATUS
  1353. ClpParseRegularExpressionDuplicationSymbol (
  1354. PREGULAR_EXPRESSION_LEXER Lexer,
  1355. PREGULAR_EXPRESSION Expression,
  1356. PREGULAR_EXPRESSION_ENTRY Entry
  1357. )
  1358. /*++
  1359. Routine Description:
  1360. This routine parses any optional duplication symbols for the regular
  1361. expression.
  1362. Arguments:
  1363. Lexer - Supplies a pointer to the initialized lexer.
  1364. Expression - Supplies the expression to compile on.
  1365. Entry - Supplies a pointer where the regular expression entry will be
  1366. returned on success.
  1367. Return Value:
  1368. Regular expression status code.
  1369. --*/
  1370. {
  1371. REGULAR_EXPRESSION_STATUS Status;
  1372. BOOL SwallowToken;
  1373. Status = RegexStatusSuccess;
  1374. SwallowToken = FALSE;
  1375. while (TRUE) {
  1376. //
  1377. // Stars are pretty easy, they're just zero or more occurrences.
  1378. //
  1379. if (Lexer->Token == '*') {
  1380. Entry->DuplicateMin = 0;
  1381. Entry->DuplicateMax = -1;
  1382. SwallowToken = TRUE;
  1383. //
  1384. // Extended regular expressions are fancy in that they have a couple
  1385. // extra duplication options.
  1386. //
  1387. } else if ((Expression->Flags & REG_EXTENDED) != 0) {
  1388. //
  1389. // Handle a +, which is one or more occurrences.
  1390. //
  1391. if (Lexer->Token == '+') {
  1392. if (Entry->DuplicateMin > 1) {
  1393. Entry->DuplicateMin = 1;
  1394. }
  1395. Entry->DuplicateMax = -1;
  1396. SwallowToken = TRUE;
  1397. //
  1398. // Handle a ?, which is zero or one occurrences.
  1399. //
  1400. } else if (Lexer->Token == '?') {
  1401. Entry->DuplicateMin = 0;
  1402. Entry->DuplicateMax = 1;
  1403. SwallowToken = TRUE;
  1404. //
  1405. // Handle an opening curly brace, which just like the escaped curly
  1406. // brace in basic regular expressions in that it takes the form {M},
  1407. // {M,}, or {M,N} specifying the minimum and maximum number of
  1408. // occurrences (inclusive).
  1409. //
  1410. } else if (Lexer->Token == '{') {
  1411. Status = ClpParseRegularExpressionDuplicationCount(Lexer,
  1412. Expression,
  1413. Entry);
  1414. if (Status != RegexStatusSuccess) {
  1415. goto ParseRegularExpressionDuplicationSymbolEnd;
  1416. }
  1417. } else {
  1418. Status = RegexStatusSuccess;
  1419. goto ParseRegularExpressionDuplicationSymbolEnd;
  1420. }
  1421. //
  1422. // Basic expressions only. The only other duplication basic expressions
  1423. // can do is backquote braces.
  1424. //
  1425. } else {
  1426. if (Lexer->Token != TOKEN_ESCAPED_OPEN_BRACE) {
  1427. Status = RegexStatusSuccess;
  1428. goto ParseRegularExpressionDuplicationSymbolEnd;
  1429. }
  1430. //
  1431. // Parse the duplication range.
  1432. //
  1433. Status = ClpParseRegularExpressionDuplicationCount(Lexer,
  1434. Expression,
  1435. Entry);
  1436. if (Status != RegexStatusSuccess) {
  1437. goto ParseRegularExpressionDuplicationSymbolEnd;
  1438. }
  1439. }
  1440. if (SwallowToken != FALSE) {
  1441. SwallowToken = FALSE;
  1442. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  1443. if (Status != RegexStatusSuccess) {
  1444. goto ParseRegularExpressionDuplicationSymbolEnd;
  1445. }
  1446. }
  1447. }
  1448. ParseRegularExpressionDuplicationSymbolEnd:
  1449. if (SwallowToken != FALSE) {
  1450. assert(Status == RegexStatusSuccess);
  1451. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  1452. }
  1453. return Status;
  1454. }
  1455. REGULAR_EXPRESSION_STATUS
  1456. ClpParseRegularExpressionDuplicationCount (
  1457. PREGULAR_EXPRESSION_LEXER Lexer,
  1458. PREGULAR_EXPRESSION Expression,
  1459. PREGULAR_EXPRESSION_ENTRY Entry
  1460. )
  1461. /*++
  1462. Routine Description:
  1463. This routine parses a duplication count, which takes the form "M", "M,", or
  1464. "M,N", followed by either a close curly brace or close escaped curly brace
  1465. depending on whether extended mode is on or not.
  1466. Arguments:
  1467. Lexer - Supplies a pointer to the initialized lexer.
  1468. Expression - Supplies the expression to compile on.
  1469. Entry - Supplies a pointer where the regular expression entry will be
  1470. returned on success.
  1471. Return Value:
  1472. Regular expression status code.
  1473. --*/
  1474. {
  1475. PSTR AfterScan;
  1476. PSTR BeforeScan;
  1477. LONG Begin;
  1478. CHAR Character;
  1479. LONG End;
  1480. REGULAR_EXPRESSION_STATUS Status;
  1481. End = Entry->DuplicateMax;
  1482. //
  1483. // Get the first number.
  1484. //
  1485. BeforeScan = Lexer->Input + Lexer->NextInput;
  1486. Begin = strtol(BeforeScan, &AfterScan, 10);
  1487. if ((Begin < 0) || (AfterScan == BeforeScan)) {
  1488. Status = RegexStatusInvalidBraces;
  1489. goto ClpParseRegularExpressionDuplicationCountEnd;
  1490. }
  1491. End = Begin;
  1492. Lexer->NextInput += (UINTN)AfterScan - (UINTN)BeforeScan;
  1493. assert(Lexer->NextInput <= Lexer->InputSize);
  1494. while ((Lexer->NextInput < Lexer->InputSize) &&
  1495. (isspace(Lexer->Input[Lexer->NextInput]))) {
  1496. Lexer->NextInput += 1;
  1497. }
  1498. if (Lexer->NextInput == Lexer->InputSize) {
  1499. Status = RegexStatusInvalidBraces;
  1500. goto ClpParseRegularExpressionDuplicationCountEnd;
  1501. }
  1502. //
  1503. // If there's a comma, swallow that and get an optional second number.
  1504. //
  1505. if (Lexer->Input[Lexer->NextInput] == ',') {
  1506. Lexer->NextInput += 1;
  1507. while ((Lexer->NextInput < Lexer->InputSize) &&
  1508. (isspace(Lexer->Input[Lexer->NextInput]))) {
  1509. Lexer->NextInput += 1;
  1510. }
  1511. if (Lexer->NextInput == Lexer->InputSize) {
  1512. Status = RegexStatusInvalidBraces;
  1513. goto ClpParseRegularExpressionDuplicationCountEnd;
  1514. }
  1515. Character = Lexer->Input[Lexer->NextInput];
  1516. if ((Character >= '1') && (Character <= '9')) {
  1517. BeforeScan = Lexer->Input + Lexer->NextInput;
  1518. End = strtol(BeforeScan, &AfterScan, 10);
  1519. if ((End < 0) || (AfterScan == BeforeScan)) {
  1520. Status = RegexStatusInvalidBraces;
  1521. goto ClpParseRegularExpressionDuplicationCountEnd;
  1522. }
  1523. Lexer->NextInput += (UINTN)AfterScan - (UINTN)BeforeScan;
  1524. assert(Lexer->NextInput <= Lexer->InputSize);
  1525. while ((Lexer->NextInput < Lexer->InputSize) &&
  1526. (isspace(Lexer->Input[Lexer->NextInput]))) {
  1527. Lexer->NextInput += 1;
  1528. }
  1529. if (Lexer->NextInput == Lexer->InputSize) {
  1530. Status = RegexStatusInvalidBraces;
  1531. goto ClpParseRegularExpressionDuplicationCountEnd;
  1532. }
  1533. //
  1534. // In the {M,} form, the pattern matches at least M times with no
  1535. // upper limit.
  1536. //
  1537. } else {
  1538. End = -1;
  1539. }
  1540. }
  1541. //
  1542. // Now, get the next token and verify that it's a closing brace.
  1543. //
  1544. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  1545. if (Status != RegexStatusSuccess) {
  1546. goto ClpParseRegularExpressionDuplicationCountEnd;
  1547. }
  1548. if ((Expression->Flags & REG_EXTENDED) != 0) {
  1549. if (Lexer->Token != '}') {
  1550. Status = RegexStatusInvalidBraces;
  1551. goto ClpParseRegularExpressionDuplicationCountEnd;
  1552. }
  1553. } else {
  1554. if (Lexer->Token != TOKEN_ESCAPED_CLOSE_BRACE) {
  1555. Status = RegexStatusInvalidBraces;
  1556. goto ClpParseRegularExpressionDuplicationCountEnd;
  1557. }
  1558. }
  1559. //
  1560. // Swallow that ending token.
  1561. //
  1562. Status = ClpGetRegularExpressionToken(Lexer, Expression);
  1563. if (Status != RegexStatusSuccess) {
  1564. goto ClpParseRegularExpressionDuplicationCountEnd;
  1565. }
  1566. //
  1567. // Watch out for a backwards range.
  1568. //
  1569. if ((End != -1) && (End < Begin)) {
  1570. Status = RegexStatusInvalidBraces;
  1571. goto ClpParseRegularExpressionDuplicationCountEnd;
  1572. }
  1573. Status = RegexStatusSuccess;
  1574. ClpParseRegularExpressionDuplicationCountEnd:
  1575. if (Status == RegexStatusSuccess) {
  1576. if ((Begin < Entry->DuplicateMin) || (Entry->DuplicateMin == 1)) {
  1577. Entry->DuplicateMin = Begin;
  1578. }
  1579. if ((Entry->DuplicateMax != -1) && (End > Entry->DuplicateMax)) {
  1580. Entry->DuplicateMax = End;
  1581. }
  1582. }
  1583. return Status;
  1584. }
  1585. REGULAR_EXPRESSION_STATUS
  1586. ClpGetRegularExpressionToken (
  1587. PREGULAR_EXPRESSION_LEXER Lexer,
  1588. PREGULAR_EXPRESSION Expression
  1589. )
  1590. /*++
  1591. Routine Description:
  1592. This routine gets the next token out of the regular expression input.
  1593. Arguments:
  1594. Lexer - Supplies a pointer to the initialized lexer. The token will be
  1595. placed in the input lexer state.
  1596. Expression - Supplies the expression to compile on.
  1597. Return Value:
  1598. Regular expression status code.
  1599. --*/
  1600. {
  1601. CHAR Character;
  1602. REGULAR_EXPRESSION_STATUS Status;
  1603. Status = RegexStatusSuccess;
  1604. //
  1605. // Watch out for the end.
  1606. //
  1607. if ((Lexer->NextInput == Lexer->InputSize) ||
  1608. (Lexer->Input[Lexer->NextInput] == '\0')) {
  1609. Lexer->Token = '\0';
  1610. goto GetRegularExpressionTokenEnd;
  1611. }
  1612. Character = Lexer->Input[Lexer->NextInput];
  1613. Lexer->NextInput += 1;
  1614. //
  1615. // If it's just a regular character, send it on.
  1616. //
  1617. if (Character != '\\') {
  1618. Lexer->Token = (UCHAR)Character;
  1619. goto GetRegularExpressionTokenEnd;
  1620. }
  1621. //
  1622. // If this was the end, that's a dangling escape.
  1623. //
  1624. if ((Lexer->NextInput == Lexer->InputSize) ||
  1625. (Lexer->Input[Lexer->NextInput] == '\0')) {
  1626. Status = RegexStatusTrailingEscape;
  1627. goto GetRegularExpressionTokenEnd;
  1628. }
  1629. Character = Lexer->Input[Lexer->NextInput];
  1630. Lexer->NextInput += 1;
  1631. //
  1632. // Back references work in both regular and extended.
  1633. //
  1634. if ((Character >= '1') && (Character <= '9')) {
  1635. Lexer->Token = TOKEN_BACK_REFERENCE;
  1636. goto GetRegularExpressionTokenEnd;
  1637. //
  1638. // Some quoted characters are common to both basic and extended regular
  1639. // expressions.
  1640. //
  1641. } else if ((Character == '^') || (Character == '.') || (Character == '*') ||
  1642. (Character == '[') || (Character == '$') || (Character == ']') ||
  1643. (Character == '\\')) {
  1644. Lexer->Token = TOKEN_QUOTED_CHARACTER;
  1645. goto GetRegularExpressionTokenEnd;
  1646. }
  1647. //
  1648. // Check for tokens specific to extended regular expressions.
  1649. //
  1650. if ((Expression->Flags & REG_EXTENDED) != 0) {
  1651. //
  1652. // Some quoted characters are only quoted in extended regular
  1653. // expressions.
  1654. //
  1655. if ((Character == '(') || (Character == ')') || (Character == '|') ||
  1656. (Character == '+') || (Character == '?') || (Character == '{') ||
  1657. (Character == '}')) {
  1658. Lexer->Token = TOKEN_QUOTED_CHARACTER;
  1659. goto GetRegularExpressionTokenEnd;
  1660. }
  1661. //
  1662. // Basic regular expression syntax only.
  1663. //
  1664. } else {
  1665. if (Character == '(') {
  1666. Lexer->Token = TOKEN_ESCAPED_OPEN_PARENTHESES;
  1667. goto GetRegularExpressionTokenEnd;
  1668. } else if (Character == ')') {
  1669. Lexer->Token = TOKEN_ESCAPED_CLOSE_PARENTHESES;
  1670. goto GetRegularExpressionTokenEnd;
  1671. } else if (Character == '{') {
  1672. Lexer->Token = TOKEN_ESCAPED_OPEN_BRACE;
  1673. goto GetRegularExpressionTokenEnd;
  1674. } else if (Character == '}') {
  1675. Lexer->Token = TOKEN_ESCAPED_CLOSE_BRACE;
  1676. goto GetRegularExpressionTokenEnd;
  1677. }
  1678. }
  1679. //
  1680. // If it's quoting a special character, back up and send the backslash
  1681. // through directly.
  1682. //
  1683. if ((Character == '0') || (Character == 'n') || (Character == 'r') ||
  1684. (Character == 'f') || (Character == 't') || (Character == 'v') ||
  1685. (Character == 'b') || (Character == 'a')) {
  1686. Lexer->NextInput -= 1;
  1687. Lexer->Token = '\\';
  1688. goto GetRegularExpressionTokenEnd;
  1689. }
  1690. //
  1691. // This backslash doesn't seem to be quoting anything. Just serve up the
  1692. // next character.
  1693. //
  1694. Lexer->Token = (CHAR)Character;
  1695. GetRegularExpressionTokenEnd:
  1696. return Status;
  1697. }
  1698. REGULAR_EXPRESSION_STATUS
  1699. ClpGetBracketExpressionToken (
  1700. PREGULAR_EXPRESSION_LEXER Lexer,
  1701. PREGULAR_EXPRESSION Expression
  1702. )
  1703. /*++
  1704. Routine Description:
  1705. This routine gets the next token out of the regular expression input.
  1706. Arguments:
  1707. Lexer - Supplies a pointer to the initialized lexer. The token will be
  1708. placed in the input lexer state.
  1709. Expression - Supplies the expression to compile on.
  1710. Return Value:
  1711. Regular expression status code.
  1712. --*/
  1713. {
  1714. CHAR Character;
  1715. REGULAR_EXPRESSION_STATUS Status;
  1716. Status = RegexStatusSuccess;
  1717. //
  1718. // Watch out for the end.
  1719. //
  1720. if ((Lexer->NextInput == Lexer->InputSize) ||
  1721. (Lexer->Input[Lexer->NextInput] == '\0')) {
  1722. Lexer->Token = '\0';
  1723. goto GetBracketExpressionTokenEnd;
  1724. }
  1725. Character = Lexer->Input[Lexer->NextInput];
  1726. Lexer->NextInput += 1;
  1727. //
  1728. // If it's just a regular character, send it on.
  1729. //
  1730. if (Character != '[') {
  1731. Lexer->Token = (UCHAR)Character;
  1732. goto GetBracketExpressionTokenEnd;
  1733. }
  1734. //
  1735. // If this was the end, that's a dangling open bracket.
  1736. //
  1737. if ((Lexer->NextInput == Lexer->InputSize) ||
  1738. (Lexer->Input[Lexer->NextInput] == '\0')) {
  1739. Status = RegexStatusBracketImbalance;
  1740. goto GetBracketExpressionTokenEnd;
  1741. }
  1742. Character = Lexer->Input[Lexer->NextInput];
  1743. if (Character == '=') {
  1744. Lexer->Token = TOKEN_OPEN_EQUAL;
  1745. Lexer->NextInput += 1;
  1746. } else if (Character == '.') {
  1747. Lexer->Token = TOKEN_OPEN_DOT;
  1748. Lexer->NextInput += 1;
  1749. } else if (Character == ':') {
  1750. Lexer->Token = TOKEN_OPEN_COLON;
  1751. Lexer->NextInput += 1;
  1752. } else {
  1753. //
  1754. // This is just a plain Jane open bracket.
  1755. //
  1756. Lexer->Token = '[';
  1757. }
  1758. GetBracketExpressionTokenEnd:
  1759. return Status;
  1760. }
  1761. PREGULAR_EXPRESSION_ENTRY
  1762. ClpCreateRegularExpressionEntry (
  1763. REGEX_ENTRY_TYPE Type
  1764. )
  1765. /*++
  1766. Routine Description:
  1767. This routine creates a new regular expression entry of the given type.
  1768. Arguments:
  1769. Type - Supplies the type of entry to create.
  1770. Return Value:
  1771. Returns a pointer to the created entry on success.
  1772. NULL on allocation failure.
  1773. --*/
  1774. {
  1775. PREGULAR_EXPRESSION_ENTRY Entry;
  1776. Entry = malloc(sizeof(REGULAR_EXPRESSION_ENTRY));
  1777. if (Entry == NULL) {
  1778. return NULL;
  1779. }
  1780. memset(Entry, 0, sizeof(REGULAR_EXPRESSION_ENTRY));
  1781. Entry->Type = Type;
  1782. Entry->DuplicateMin = 1;
  1783. Entry->DuplicateMax = 1;
  1784. INITIALIZE_LIST_HEAD(&(Entry->ChildList));
  1785. return Entry;
  1786. }
  1787. VOID
  1788. ClpDestroyRegularExpressionEntry (
  1789. PREGULAR_EXPRESSION_ENTRY Entry
  1790. )
  1791. /*++
  1792. Routine Description:
  1793. This routine destroys a regular expression entry and recursively all of its
  1794. children. It assumes the entry has already been removed from any parent
  1795. list.
  1796. Arguments:
  1797. Entry - Supplies a pointer to the entry to destroy.
  1798. Return Value:
  1799. None.
  1800. --*/
  1801. {
  1802. PREGULAR_BRACKET_ENTRY BracketEntry;
  1803. PREGULAR_EXPRESSION_ENTRY Subentry;
  1804. switch (Entry->Type) {
  1805. case RegexEntryOrdinaryCharacters:
  1806. if (Entry->U.String.Data != NULL) {
  1807. free(Entry->U.String.Data);
  1808. }
  1809. break;
  1810. case RegexEntryInvalid:
  1811. case RegexEntryAnyCharacter:
  1812. case RegexEntryBackReference:
  1813. case RegexEntryStringBegin:
  1814. case RegexEntryStringEnd:
  1815. case RegexEntryBranch:
  1816. case RegexEntryBranchOption:
  1817. case RegexEntrySubexpression:
  1818. case RegexEntryStartOfWord:
  1819. case RegexEntryEndOfWord:
  1820. break;
  1821. case RegexEntryBracketExpression:
  1822. if (Entry->U.BracketExpression.RegularCharacters.Data != NULL) {
  1823. free(Entry->U.BracketExpression.RegularCharacters.Data);
  1824. }
  1825. while (LIST_EMPTY(&(Entry->U.BracketExpression.EntryList)) == FALSE) {
  1826. BracketEntry = LIST_VALUE(Entry->U.BracketExpression.EntryList.Next,
  1827. REGULAR_BRACKET_ENTRY,
  1828. ListEntry);
  1829. LIST_REMOVE(&(BracketEntry->ListEntry));
  1830. free(BracketEntry);
  1831. }
  1832. break;
  1833. default:
  1834. assert(FALSE);
  1835. return;
  1836. }
  1837. while (LIST_EMPTY(&(Entry->ChildList)) == FALSE) {
  1838. Subentry = LIST_VALUE(Entry->ChildList.Next,
  1839. REGULAR_EXPRESSION_ENTRY,
  1840. ListEntry);
  1841. LIST_REMOVE(&(Subentry->ListEntry));
  1842. ClpDestroyRegularExpressionEntry(Subentry);
  1843. }
  1844. free(Entry);
  1845. return;
  1846. }
  1847. BOOL
  1848. ClpAppendRegularExpressionString (
  1849. PREGULAR_EXPRESSION_STRING String,
  1850. PSTR Data,
  1851. ULONG Size
  1852. )
  1853. /*++
  1854. Routine Description:
  1855. This routine appends data onto a regular expression string.
  1856. Arguments:
  1857. String - Supplies a pointer to the string to append to.
  1858. Data - Supplies a pointer to the data to append.
  1859. Size - Supplies the number of bytes in the data buffer.
  1860. Return Value:
  1861. TRUE on success.
  1862. FALSE on allocation failure.
  1863. --*/
  1864. {
  1865. PVOID NewBuffer;
  1866. ULONG NewCapacity;
  1867. //
  1868. // Reallocate the string if needed.
  1869. //
  1870. if (String->Size + Size > String->Capacity) {
  1871. NewCapacity = String->Capacity;
  1872. if (NewCapacity == 0) {
  1873. NewCapacity = REGULAR_EXPRESSION_INITIAL_STRING_SIZE;
  1874. }
  1875. while (String->Size + Size > NewCapacity) {
  1876. NewCapacity *= 2;
  1877. }
  1878. NewBuffer = realloc(String->Data, NewCapacity);
  1879. if (NewBuffer == NULL) {
  1880. String->Capacity = 0;
  1881. return FALSE;
  1882. }
  1883. String->Data = NewBuffer;
  1884. String->Capacity = NewCapacity;
  1885. }
  1886. //
  1887. // Copy the new bytes in.
  1888. //
  1889. memcpy(String->Data + String->Size, Data, Size);
  1890. String->Size += Size;
  1891. return TRUE;
  1892. }