regextst.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU General Public License
  4. version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details. See the LICENSE file at the root of this
  6. project for complete licensing information.
  7. Module Name:
  8. regextst.c
  9. Abstract:
  10. This module implements the tests for the regular expression support within
  11. the C library.
  12. Author:
  13. Evan Green 9-Jul-2013
  14. Environment:
  15. Test
  16. --*/
  17. //
  18. // ------------------------------------------------------------------- Includes
  19. //
  20. #include "testc.h"
  21. #include <regex.h>
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. //
  26. // ---------------------------------------------------------------- Definitions
  27. //
  28. //
  29. // Define the match count array size.
  30. //
  31. #define REGEX_TEST_MATCH_COUNT 5
  32. //
  33. // ------------------------------------------------------ Data Type Definitions
  34. //
  35. typedef struct _REGEX_EXECUTION_TEST_CASE {
  36. PSTR Pattern;
  37. INT CompileFlags;
  38. PSTR Input;
  39. INT InputFlags;
  40. INT ExecutionResult;
  41. regmatch_t ExpectedMatch[REGEX_TEST_MATCH_COUNT];
  42. } REGEX_EXECUTION_TEST_CASE, *PREGEX_EXECUTION_TEST_CASE;
  43. typedef struct _REGEX_COMPILE_TEST_CASE {
  44. PSTR Pattern;
  45. INT CompileFlags;
  46. INT SubexpressionCount;
  47. INT Result;
  48. } REGEX_COMPILE_TEST_CASE, *PREGEX_COMPILE_TEST_CASE;
  49. typedef struct _REGEX_ERROR_STRING {
  50. INT Code;
  51. PSTR String;
  52. } REGEX_ERROR_STRING, *PREGEX_ERROR_STRING;
  53. //
  54. // ----------------------------------------------- Internal Function Prototypes
  55. //
  56. BOOL
  57. TestRegularExpressionExecutionCase (
  58. ULONG Index,
  59. PREGEX_EXECUTION_TEST_CASE Case
  60. );
  61. BOOL
  62. TestRegularExpressionCompileCase (
  63. ULONG Index,
  64. PREGEX_COMPILE_TEST_CASE Case
  65. );
  66. PSTR
  67. TestRegexGetErrorCodeString (
  68. INT Code
  69. );
  70. //
  71. // -------------------------------------------------------------------- Globals
  72. //
  73. //
  74. // Define the execution test cases.
  75. //
  76. REGEX_EXECUTION_TEST_CASE RegexExecutionTestCases[] = {
  77. //
  78. // An empty pattern should match anything.
  79. //
  80. {
  81. "", 0,
  82. "a", 0,
  83. 0,
  84. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  85. },
  86. //
  87. // A beginning circumflex should match anything.
  88. //
  89. {
  90. "^", 0,
  91. "", 0,
  92. 0,
  93. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  94. },
  95. //
  96. // A beginning dollar sign should match anything.
  97. //
  98. {
  99. "$", 0,
  100. "a", 0,
  101. 0,
  102. {{1, 1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  103. },
  104. //
  105. // A circumflex and dollar sign should match just the empty string.
  106. //
  107. {
  108. "^$", 0,
  109. "", 0,
  110. 0,
  111. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  112. },
  113. {
  114. "^$", 0,
  115. "a", 0,
  116. REG_NOMATCH,
  117. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  118. },
  119. //
  120. // Circumflexes should anchor things to the beginning and dollar signs to
  121. // the end.
  122. //
  123. {
  124. "^abc", 0,
  125. "aabc", 0,
  126. REG_NOMATCH,
  127. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  128. },
  129. {
  130. "abc$", 0,
  131. "abcabc", 0,
  132. 0,
  133. {{3, 6}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  134. },
  135. {
  136. "^abc$", 0,
  137. "abc", 0,
  138. 0,
  139. {{0, 3}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  140. },
  141. //
  142. // Try a basic but comprehensive pattern.
  143. //
  144. {
  145. "a.cd\\(ef\\)(g)h\\{2\\}hi\\{1,4\\}ij*k*.*mno\\*\\*"
  146. "{}pq\\(rs\\)\\(\\1\\2\\)*^$[tuv][]xw][^ab]*z", 0,
  147. "00abcdef(g)hhhiiiiijjjmno**{}pqrsefrsefrs^$twxyz123", 0,
  148. 0,
  149. {{2, 48}, {6, 8}, {31, 33}, {37, 41}, {-1, -1}},
  150. },
  151. //
  152. // Try nested subexpressions.
  153. //
  154. {
  155. "^\\(abcd\\(e*fg\\(hi\\(\\)j\\)\\)kl\\)\\(\\3\\)$", 0,
  156. "abcdeeefghijklhij", 0,
  157. 0,
  158. {{0, 17}, {0, 14}, {4, 12}, {9, 12}, {11, 11}},
  159. },
  160. //
  161. // Try the same nested subexpression except have an outer subexpression
  162. // fail after the inner ones succeed to make sure those inner ones get
  163. // cleared out. The difference here is the last character in the input.
  164. //
  165. {
  166. "^\\(abcd\\(e*fg\\(hi\\(\\)j\\)\\)kl\\)\\(\\3\\)$", 0,
  167. "abcdeeefghijklhi", 0,
  168. REG_NOMATCH,
  169. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  170. },
  171. //
  172. // Try some subexpressions that go beyond the size of the array.
  173. //
  174. {
  175. "+?\\(a\\)*.\\(b*\\)\\(c\\)*\\([def*]*\\)\\(g\\)\\(h\\)\\([ij]\\)*|", 0,
  176. "+?abbbccced**dfghiiijj|klm", 0,
  177. 0,
  178. {{0, 23}, {2, 3}, {4, 6}, {8, 9}, {9, 15}},
  179. },
  180. //
  181. // Try the "not EOL" and "not BOL" flags.
  182. //
  183. {
  184. "^abc$", 0,
  185. "abc", REG_NOTBOL,
  186. REG_NOMATCH,
  187. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  188. },
  189. {
  190. "^abc$", 0,
  191. "abc", REG_NOTEOL,
  192. REG_NOMATCH,
  193. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  194. },
  195. //
  196. // Try out the newline flag.
  197. //
  198. {
  199. "^abc$", REG_NEWLINE,
  200. "abc\nabc\nh", REG_NOTBOL | REG_NOTEOL,
  201. 0,
  202. {{4, 7}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  203. },
  204. {
  205. "abc$", REG_NEWLINE,
  206. "abcd\nabc\n123", REG_NOTBOL | REG_NOTEOL,
  207. 0,
  208. {{5, 8}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  209. },
  210. //
  211. // Try out the no-sub flag.
  212. //
  213. {
  214. "^\\(a\\)\\(b\\)\\(c\\)$", REG_NOSUB,
  215. "abc", 0,
  216. 0,
  217. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  218. },
  219. //
  220. // Try out the ignore case flag.
  221. //
  222. {
  223. "abcdef12!!!%$^*6*\\.*4", REG_ICASE,
  224. "aAaaaAbCDef12!!!%$...456", 0,
  225. 0,
  226. {{5, 22}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  227. },
  228. //
  229. // Try out some bracket patterns.
  230. //
  231. {
  232. "\\([ABC]\\{2,6\\}\\)ABC.*\\([[:digit:]]\\).*\\([[:alpha:]]\\).*"
  233. "\\([[:blank:]]\\).*[[:cntrl:]].*[[:graph:]].*[[:print:]].*[[:punct:]]"
  234. ".*[[:space:]].*[[:upper:]].*[[:lower:]].*[[:xdigit:]]456", 0,
  235. "aBCAABC 7 xzz \t7 . AAz F456", REG_NOTBOL | REG_NOTEOL,
  236. 0,
  237. {{1, 33}, {1, 4}, {9, 10}, {14, 15}, {15, 16}},
  238. },
  239. //
  240. // Extended mode tests.
  241. //
  242. //
  243. // An empty pattern should match anything.
  244. //
  245. {
  246. "", REG_EXTENDED,
  247. "a", 0,
  248. 0,
  249. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  250. },
  251. //
  252. // A beginning circumflex should match anything.
  253. //
  254. {
  255. "^", REG_EXTENDED,
  256. "", 0,
  257. 0,
  258. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  259. },
  260. //
  261. // A beginning dollar sign should match anything.
  262. //
  263. {
  264. "$", REG_EXTENDED,
  265. "a", 0,
  266. 0,
  267. {{1, 1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  268. },
  269. //
  270. // A circumflex and dollar sign should match just the empty string.
  271. //
  272. {
  273. "^$", REG_EXTENDED,
  274. "", 0,
  275. 0,
  276. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  277. },
  278. {
  279. "^$", REG_EXTENDED,
  280. "a", 0,
  281. REG_NOMATCH,
  282. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  283. },
  284. //
  285. // Circumflex and dollar sign should be usable from within the regex.
  286. //
  287. {
  288. "f*^abc$g*", REG_EXTENDED,
  289. "abc", 0,
  290. 0,
  291. {{0, 3}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  292. },
  293. //
  294. // Try out some extended features.
  295. //
  296. {
  297. "^(ab){1,2}cd[^ef[:digit:]]+7 ?([][:digit:]]{2})", REG_EXTENDED,
  298. "ababcdxx7 0]", 0,
  299. 0,
  300. {{0, 12}, {2, 4}, {10, 12}, {-1, -1}, {-1, -1}},
  301. },
  302. //
  303. // The plus should give one or more. Question mark should be zero or one.
  304. //
  305. {
  306. "ba+c", REG_EXTENDED,
  307. "bc", 0,
  308. REG_NOMATCH,
  309. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  310. },
  311. {
  312. "ba+cD?", REG_EXTENDED | REG_ICASE,
  313. "0bAAcde", 0,
  314. 0,
  315. {{1, 6}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  316. },
  317. {
  318. "da?a", REG_EXTENDED | REG_ICASE,
  319. "ccdAa", 0,
  320. 0,
  321. {{2, 5}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  322. },
  323. //
  324. // Try out branches.
  325. //
  326. {
  327. "(abc)|(de(f*)|g)|h", REG_EXTENDED | REG_ICASE,
  328. "000deg", 0,
  329. 0,
  330. {{3, 5}, {-1, -1}, {3, 5}, {5, 5}, {-1, -1}},
  331. },
  332. {
  333. "(abc)|(de(f*)|g)|h", REG_EXTENDED | REG_ICASE,
  334. "h", 0,
  335. 0,
  336. {{0, 1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  337. },
  338. //
  339. // Try out some escape characters.
  340. //
  341. {
  342. "(o\\(\\)o\\{\\}s\\*d\\.b\\\\q\\?\\^p\\+s\\[\\]p\\|)|a",
  343. REG_EXTENDED | REG_ICASE,
  344. "o()o{}s*d.b\\q?^p+s[]p|", 0,
  345. 0,
  346. {{0, 22}, {0, 22}, {-1, -1}, {-1, -1}, {-1, -1}},
  347. },
  348. {
  349. "(o\\(\\)o\\{\\}s\\*d\\.b\\\\q\\?\\^p\\+s\\[\\]p\\|)|a",
  350. REG_EXTENDED | REG_ICASE,
  351. "A", 0,
  352. 0,
  353. {{0, 1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  354. },
  355. //
  356. // Try a repeat that didn't make the minimum count.
  357. //
  358. {
  359. "(ab){3,5}", REG_EXTENDED | REG_ICASE,
  360. "abab", 0,
  361. REG_NOMATCH,
  362. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  363. },
  364. //
  365. // Dots shouldn't swallow newlines if they're on.
  366. //
  367. {
  368. "a*ab.+", REG_EXTENDED | REG_NEWLINE,
  369. "caab \ncd", 0,
  370. 0,
  371. {{1, 5}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  372. },
  373. //
  374. // Back references should still work even with the nosub flag.
  375. //
  376. {
  377. "(.)bcd\\1+", REG_EXTENDED | REG_NOSUB,
  378. "abcdaaab", 0,
  379. 0,
  380. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  381. },
  382. //
  383. // Bracket expressions should also honor the ignore case flag.
  384. //
  385. {
  386. "[[:lower:]][ABC][[:upper:]]", REG_EXTENDED | REG_ICASE,
  387. "Xcd", 0,
  388. 0,
  389. {{0, 3}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  390. },
  391. //
  392. // Close parentheses are normal if not opened first.
  393. //
  394. {
  395. "1)", REG_EXTENDED,
  396. "(1)", 0,
  397. 0,
  398. {{1, 3}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  399. },
  400. //
  401. // Apparently stars can override pluses, and some other
  402. // overrides are valid too.
  403. //
  404. {
  405. "0+*", REG_EXTENDED,
  406. "000+++", 0,
  407. 0,
  408. {{0, 3}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  409. },
  410. {
  411. "AS?+", REG_EXTENDED,
  412. "BASSS", 0,
  413. 0,
  414. {{1, 5}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  415. },
  416. {
  417. "AS?+", REG_EXTENDED,
  418. "BA", 0,
  419. 0,
  420. {{1, 2}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  421. },
  422. {
  423. "A*{5}", REG_EXTENDED,
  424. "AAAAAAAA", 0,
  425. 0,
  426. {{0, 8}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  427. },
  428. {
  429. "A*{5}", REG_EXTENDED,
  430. "B", 0,
  431. 0,
  432. {{0, 0}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  433. },
  434. {
  435. "(A|AB)+C", REG_EXTENDED,
  436. "ABABC", 0,
  437. 0,
  438. {{0, 5}, {2, 4}, {-1, -1}, {-1, -1}, {-1, -1}},
  439. },
  440. {
  441. "(AC|A)+C+", REG_EXTENDED,
  442. "ACACC", 0,
  443. 0,
  444. {{0, 5}, {2, 4}, {-1, -1}, {-1, -1}, {-1, -1}},
  445. },
  446. //
  447. // TODO: The commented out cases are what other C libraries would see.
  448. // This implementation finds shorter versions due to its backtracking
  449. // nature. Consider implementing a NFA/DFA regex implementation, which
  450. // would then enable these cases.
  451. //
  452. #if 0
  453. {
  454. "(A|AC)+C+", REG_EXTENDED,
  455. "ACACC", 0,
  456. 0,
  457. {{0, 5}, {2, 3}, {-1, -1}, {-1, -1}, {-1, -1}},
  458. },
  459. {
  460. "(A|AB){2,5}A*", REG_EXTENDED,
  461. "AAAABA", 0,
  462. 0,
  463. {{0, 6}, {5, 6}, {-1, -1}, {-1, -1}, {-1, -1}},
  464. },
  465. #endif
  466. //
  467. // Test that backtracking properly refills subexpressions with the old
  468. // choices. In this case, subexpressions 1 and 2 need to be refreshed after
  469. // backing out of a failed third repeat.
  470. //
  471. {
  472. "(((A|B)|(C|D)))+D", REG_EXTENDED,
  473. "ACD", 0,
  474. 0,
  475. {{0, 3}, {1, 2}, {1, 2}, {0, 1}, {1, 2}},
  476. },
  477. //
  478. // Test that repeated emptiness won't send it into conniptions.
  479. //
  480. {
  481. "A()*B(())+(C||)*", REG_EXTENDED,
  482. "AB", 0,
  483. 0,
  484. {{0, 2}, {1, 1}, {2, 2}, {2, 2}, {2, 2}},
  485. },
  486. //
  487. // Try an open ended repeat count.
  488. //
  489. {
  490. "AB\\{2,\\}", 0,
  491. "ABBBBC", 0,
  492. 0,
  493. {{0, 5}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  494. },
  495. //
  496. // Try a beginning of word that works.
  497. //
  498. {
  499. "[[:<:]](AB) [[:<:]](C)", REG_EXTENDED,
  500. "AB C", 0,
  501. 0,
  502. {{0, 4}, {0, 2}, {3, 4}, {-1, -1}, {-1, -1}},
  503. },
  504. //
  505. // Try a beginning of word that doesn't work.
  506. //
  507. {
  508. "[[:<:]]AB[[:<:]]C", 0,
  509. "ABC", 0,
  510. REG_NOMATCH,
  511. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  512. },
  513. //
  514. // Try a beginning of word that works.
  515. //
  516. {
  517. "(AB)[[:>:]] (C)[[:>:]]", REG_EXTENDED,
  518. "AB C", 0,
  519. 0,
  520. {{0, 4}, {0, 2}, {3, 4}, {-1, -1}, {-1, -1}},
  521. },
  522. //
  523. // Try a beginning of word that doesn't work.
  524. //
  525. {
  526. "[[:>:]]AB C", REG_EXTENDED,
  527. "AB C", 0,
  528. REG_NOMATCH,
  529. {{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}},
  530. },
  531. };
  532. //
  533. // Define the compile test cases.
  534. //
  535. REGEX_COMPILE_TEST_CASE RegexCompileTestCases[] = {
  536. //
  537. // Some basic but cornery cases that should all compile.
  538. //
  539. {"", 0, 0, 0},
  540. {"$", 0, 0, 0},
  541. {"^^^^^$$$$$", REG_EXTENDED, 0, 0},
  542. {"^^^^^$$$$$", 0, 0, 0},
  543. {"(1)(2)(3)(4)(5)(6)(7)(8)(9)(A)\\9\\5\\1.", REG_EXTENDED, 10, 0},
  544. {"a{ 0, 0 }", REG_EXTENDED, 10, REG_BADBR},
  545. {"[[:alpha:][:alnum:][:blank:][:cntrl:][:digit:][:graph:][:lower:]"
  546. "[:print:][:punct:][:space:][:upper:][:xdigit:]]", 0, 0},
  547. {"]]]", REG_EXTENDED, 0},
  548. {"(((((((((((((((((((((((((((((())))))))))))))))))))))))))))))",
  549. REG_EXTENDED, 30, 0},
  550. {")", REG_EXTENDED, 0, 0},
  551. {"\\(abc\\(d*e\\(f*\\)g\\)dd\\)\\(\\)", 0, 4, 0},
  552. {"\\(abc\\(d*e\\(f*\\)g\\)dd\\)\\(\\)", REG_EXTENDED, 0, 0},
  553. //
  554. // Back references are only valid between 1 and 9, and must already have a
  555. // valid subexpression.
  556. //
  557. {"(asdf)\\2", 0, 1, REG_ESUBREG},
  558. {"(asdf)\\99", 0, 1, REG_ESUBREG},
  559. //
  560. // Invalid braces.
  561. //
  562. {"a{asdf}", REG_EXTENDED, 0, REG_BADBR},
  563. {"a{4,,}", REG_EXTENDED, 0, REG_BADBR},
  564. {"a{0,-3}", REG_EXTENDED, 0, REG_BADBR},
  565. {"a{-999}", REG_EXTENDED, 0, REG_BADBR},
  566. {"a{-1,-3}", REG_EXTENDED, 0, REG_BADBR},
  567. {"a{6000, ASDF}", REG_EXTENDED, 0, REG_BADBR},
  568. {"a{ 4 , 4 ,}", REG_EXTENDED, 0, REG_BADBR},
  569. {"a{5,3}", REG_EXTENDED, 0, REG_BADBR},
  570. //
  571. // Parentheses imbalance.
  572. //
  573. {"(1((3))\\)", REG_EXTENDED, 0, REG_EPAREN},
  574. {"(1", REG_EXTENDED, 0, REG_EPAREN},
  575. {"\\(2", 0, 0, REG_EPAREN},
  576. //
  577. // Bad character class.
  578. //
  579. {"[[:poopy:]]", REG_EXTENDED, 0, REG_ECTYPE},
  580. {"[[:ALPHA:]]", REG_EXTENDED, 0, REG_ECTYPE},
  581. //
  582. // Bad brackets.
  583. //
  584. {"[[:alpha:]", REG_EXTENDED, 0, REG_EBRACK},
  585. {"[]asdf", REG_EXTENDED, 0, REG_EBRACK},
  586. //
  587. // Trailing escape.
  588. //
  589. {"asdf\\", REG_EXTENDED, 0, REG_EESCAPE},
  590. //
  591. // Bad repeat.
  592. //
  593. {"*", REG_EXTENDED, 0, REG_BADRPT},
  594. {"*?", REG_EXTENDED, 0, REG_BADRPT},
  595. {"??", REG_EXTENDED, 0, REG_BADRPT},
  596. {"{6}", REG_EXTENDED, 0, REG_BADRPT},
  597. {"+", REG_EXTENDED, 0, REG_BADRPT},
  598. };
  599. REGEX_ERROR_STRING RegexErrorStrings[] = {
  600. {0, "SUCCESS"},
  601. {REG_NOMATCH, "REG_NOMATCH"},
  602. {REG_BADPAT, "REG_BADPAT"},
  603. {REG_ECOLLATE, "REG_ECOLLATE"},
  604. {REG_ECTYPE, "REG_ECTYPE"},
  605. {REG_EESCAPE, "REG_EESCAPE"},
  606. {REG_ESUBREG, "REG_ESUBREG"},
  607. {REG_EBRACK, "REG_EBRACK"},
  608. {REG_EPAREN, "REG_EPAREN"},
  609. {REG_BADBR, "REG_BADBR"},
  610. {REG_ERANGE, "REG_ERANGE"},
  611. {REG_ESPACE, "REG_ESPACE"},
  612. {REG_BADRPT, "REG_BADRPT"}
  613. };
  614. //
  615. // ------------------------------------------------------------------ Functions
  616. //
  617. ULONG
  618. TestRegularExpressions (
  619. VOID
  620. )
  621. /*++
  622. Routine Description:
  623. This routine implements the entry point for the regular expression tests.
  624. Arguments:
  625. None.
  626. Return Value:
  627. Returns the count of test failures.
  628. --*/
  629. {
  630. ULONG Failures;
  631. BOOL Result;
  632. ULONG TestCount;
  633. ULONG TestIndex;
  634. Failures = 0;
  635. //
  636. // Run the compile tests.
  637. //
  638. TestCount = sizeof(RegexCompileTestCases) /
  639. sizeof(RegexCompileTestCases[0]);
  640. for (TestIndex = 0; TestIndex < TestCount; TestIndex += 1) {
  641. Result = TestRegularExpressionCompileCase(
  642. TestIndex,
  643. &(RegexCompileTestCases[TestIndex]));
  644. if (Result == FALSE) {
  645. Failures += 1;
  646. }
  647. }
  648. //
  649. // Run the execution tests.
  650. //
  651. TestCount = sizeof(RegexExecutionTestCases) /
  652. sizeof(RegexExecutionTestCases[0]);
  653. for (TestIndex = 0; TestIndex < TestCount; TestIndex += 1) {
  654. Result = TestRegularExpressionExecutionCase(
  655. TestIndex,
  656. &(RegexExecutionTestCases[TestIndex]));
  657. if (Result == FALSE) {
  658. printf("Case %d Failed\n", TestIndex);
  659. Failures += 1;
  660. }
  661. }
  662. return Failures;
  663. }
  664. //
  665. // --------------------------------------------------------- Internal Functions
  666. //
  667. BOOL
  668. TestRegularExpressionExecutionCase (
  669. ULONG Index,
  670. PREGEX_EXECUTION_TEST_CASE Case
  671. )
  672. /*++
  673. Routine Description:
  674. This routine performs a regular expression execution test.
  675. Arguments:
  676. Index - Supplies the test case number.
  677. Case - Supplies a pointer to the test case.
  678. Return Value:
  679. TRUE on success.
  680. FALSE on failure.
  681. --*/
  682. {
  683. regex_t Expression;
  684. regmatch_t Match[REGEX_TEST_MATCH_COUNT];
  685. size_t MatchIndex;
  686. int Result;
  687. BOOL Status;
  688. Status = FALSE;
  689. for (MatchIndex = 0; MatchIndex < REGEX_TEST_MATCH_COUNT; MatchIndex += 1) {
  690. Match[MatchIndex].rm_so = -1;
  691. Match[MatchIndex].rm_eo = -1;
  692. }
  693. //
  694. // Compile the regular expression.
  695. //
  696. Result = regcomp(&Expression, Case->Pattern, Case->CompileFlags);
  697. if (Result != 0) {
  698. printf("Error: Failed to compile regex \"%s\".\n", Case->Pattern);
  699. goto TestRegularExpressionCaseEnd;
  700. }
  701. //
  702. // Run the test case.
  703. //
  704. Result = regexec(&Expression,
  705. Case->Input,
  706. REGEX_TEST_MATCH_COUNT,
  707. Match,
  708. Case->InputFlags);
  709. if (Result != Case->ExecutionResult) {
  710. printf("Error: regexec returned %d instead of expected result %d.\n",
  711. Result,
  712. Case->ExecutionResult);
  713. }
  714. //
  715. // Compare the matches.
  716. //
  717. Status = TRUE;
  718. for (MatchIndex = 0; MatchIndex < REGEX_TEST_MATCH_COUNT; MatchIndex += 1) {
  719. if ((Match[MatchIndex].rm_so !=
  720. Case->ExpectedMatch[MatchIndex].rm_so) ||
  721. (Match[MatchIndex].rm_eo !=
  722. Case->ExpectedMatch[MatchIndex].rm_eo)) {
  723. printf("Error: Regex test match %d failed.\n", MatchIndex);
  724. Status = FALSE;
  725. }
  726. }
  727. TestRegularExpressionCaseEnd:
  728. if (Status == FALSE) {
  729. printf("Regex test %d failed.\n"
  730. "Pattern: \"%s\", Flags 0x%x.\n"
  731. "Input: \"%s\", len %d, Flags 0x%x.\n"
  732. "Ruler: 0 1 2 3 4 5\n"
  733. "Expected Result: %d.\n",
  734. Index,
  735. Case->Pattern,
  736. Case->CompileFlags,
  737. Case->Input,
  738. strlen(Case->Input),
  739. Case->InputFlags,
  740. Case->ExecutionResult);
  741. for (MatchIndex = 0;
  742. MatchIndex < REGEX_TEST_MATCH_COUNT;
  743. MatchIndex += 1) {
  744. printf("Match %d: Expected {%d, %d}, got {%d, %d}\n",
  745. MatchIndex,
  746. Case->ExpectedMatch[MatchIndex].rm_so,
  747. Case->ExpectedMatch[MatchIndex].rm_eo,
  748. Match[MatchIndex].rm_so,
  749. Match[MatchIndex].rm_eo);
  750. }
  751. printf("------------------------------------\n");
  752. }
  753. regfree(&Expression);
  754. return Status;
  755. }
  756. BOOL
  757. TestRegularExpressionCompileCase (
  758. ULONG Index,
  759. PREGEX_COMPILE_TEST_CASE Case
  760. )
  761. /*++
  762. Routine Description:
  763. This routine performs a regular expression compile test.
  764. Arguments:
  765. Index - Supplies the test case number.
  766. Case - Supplies a pointer to the test case.
  767. Return Value:
  768. TRUE on success.
  769. FALSE on failure.
  770. --*/
  771. {
  772. regex_t Expression;
  773. INT Result;
  774. BOOL Status;
  775. Status = TRUE;
  776. Expression.re_nsub = 0;
  777. Result = regcomp(&Expression, Case->Pattern, Case->CompileFlags);
  778. if ((Result != Case->Result) ||
  779. ((Result == 0) && (Expression.re_nsub != Case->SubexpressionCount))) {
  780. printf("Regex compile test case %d failed.\n"
  781. "Pattern: \"%s\", Flags 0x%x.\n"
  782. "Expected Result %d (%s), got %d (%s).\n"
  783. "Expected %d subexpressions, got %d.\n",
  784. Index,
  785. Case->Pattern,
  786. Case->CompileFlags,
  787. Case->Result,
  788. TestRegexGetErrorCodeString(Case->Result),
  789. Result,
  790. TestRegexGetErrorCodeString(Result),
  791. (INT)Case->SubexpressionCount,
  792. (INT)Expression.re_nsub);
  793. Status = FALSE;
  794. }
  795. if (Result == 0) {
  796. regfree(&Expression);
  797. }
  798. return Status;
  799. }
  800. PSTR
  801. TestRegexGetErrorCodeString (
  802. INT Code
  803. )
  804. /*++
  805. Routine Description:
  806. This routine returns the string version of the given error code.
  807. Arguments:
  808. Code - Supplies the REG_* error code (or zero).
  809. Return Value:
  810. Returns a string of that error code that can be printed.
  811. --*/
  812. {
  813. ULONG Count;
  814. ULONG Index;
  815. Count = sizeof(RegexErrorStrings) / sizeof(RegexErrorStrings[0]);
  816. for (Index = 0; Index < Count; Index += 1) {
  817. if (RegexErrorStrings[Index].Code == Code) {
  818. return RegexErrorStrings[Index].String;
  819. }
  820. }
  821. return "Unknown Error";
  822. }