grep.c 39 KB


  1. /*++
  2. Copyright (c) 2013 Minoca Corp. All Rights Reserved
  3. Module Name:
  4. grep.c
  5. Abstract:
  6. This module implements support for the grep utility.
  7. Author:
  8. Evan Green 17-Jul-2013
  9. Environment:
  10. POSIX
  11. --*/
  12. //
  13. // ------------------------------------------------------------------- Includes
  14. //
  15. #include <minoca/lib/types.h>
  16. #include <assert.h>
  17. #include <ctype.h>
  18. #include <errno.h>
  19. #include <fcntl.h>
  20. #include <getopt.h>
  21. #include <regex.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <unistd.h>
  25. #include "swlib.h"
  26. //
  27. // ---------------------------------------------------------------- Definitions
  28. //
  29. #define GREP_VERSION_MAJOR 1
  30. #define GREP_VERSION_MINOR 0
  31. #define GREP_USAGE \
  32. "usage: grep [-E | -F][-c | -l | -q][-insvx] [-e pattern_list]...\n" \
  33. " [-f pattern_file]...[file]...\n" \
  34. " grep [-E | -F][-c | -l | -q][-insvx] pattern_list [file]...\n" \
  35. "The grep utility searches for a text pattern in one or more text files.\n"\
  36. "Options are:\n" \
  37. " -E, --extended-regexp -- Use extended regular expressions.\n" \
  38. " -F, --fixed-strings -- Use fixed strings, not regular expressions.\n" \
  39. " -c, --count -- Write only a count of selected lines to standard out.\n" \
  40. " -e, --regexp pattern_list -- Specifies the pattern list to search " \
  41. "for.\n" \
  42. " -f, --file pattern_file -- Specifies a file containing patterns to \n" \
  43. " search for.\n" \
  44. " -H, --with-filename -- Print the filename for each match.\n" \
  45. " -h, --no-filename -- Do not print the filename for each match.\n" \
  46. " -i, --ignore-case -- Ignore case when searching.\n" \
  47. " -l, --files-with-matches -- Write only the names of the files \n" \
  48. " searched and matched.\n" \
  49. " -n, --line-number -- Write the line number before each match.\n" \
  50. " -q, --quiet, --silent -- Quiet, write nothing to standard out.\n" \
  51. " -R, -r, --recursive -- Scan the contents of any directories found.\n" \
  52. " -s, --no-messages -- Suppress errors for nonexistant and unreadable " \
  53. "files.\n" \
  54. " -v, --invert-match -- Select lines NOT matching any of the specified " \
  55. "patterns.\n" \
  56. " -x, --line-regexp -- Consider only input lines that use all \n" \
  57. " characters in the line to match the pattern.\n" \
  58. " --help -- Show this help.\n" \
  59. " --version -- Show the version information.\n"
  60. #define GREP_OPTIONS_STRING "EFce:f:HhilnqRrsvxV"
  61. #define GREP_HELP 256
  62. //
  63. // Define the chunk size grep reads in.
  64. //
  65. #define GREP_READ_BLOCK_SIZE 1024
  66. #define GREP_INITIAL_LINE_SIZE 16
  67. //
  68. // Define grep options.
  69. //
  70. //
  71. // Set this option to use extended regular expressions.
  72. //
  73. #define GREP_OPTION_EXTENDED_EXPRESSIONS 0x00000001
  74. //
  75. // Set this option to match using fixed strings, not regular expressions.
  76. //
  77. #define GREP_OPTION_FIXED_STRINGS 0x00000002
  78. //
  79. // Set this option to print only the count of selected lines.
  80. //
  81. #define GREP_OPTION_LINE_COUNT 0x00000004
  82. //
  83. // Set this option to ignore case within a pattern.
  84. //
  85. #define GREP_OPTION_IGNORE_CASE 0x00000008
  86. //
  87. // Set this option to print the file name with each match.
  88. //
  89. #define GREP_OPTION_PRINT_FILE_NAMES 0x00000010
  90. //
  91. // Set this option to write line numbers for each match.
  92. //
  93. #define GREP_OPTION_PRINT_LINE_NUMBERS 0x00000020
  94. //
  95. // Set this option to suppress all output.
  96. //
  97. #define GREP_OPTION_QUIET 0x00000040
  98. //
  99. // Set this option to suppress errors for nonexistant and unreadable files.
  100. //
  101. #define GREP_OPTION_SUPPRESS_BLAND_ERRORS 0x00000080
  102. //
  103. // Set this option to select only lines that do not match any pattern.
  104. //
  105. #define GREP_OPTION_NEGATE_SEARCH 0x00000100
  106. //
  107. // Set this option to consider only input lines that use all characters in
  108. // the line to match the pattern.
  109. //
  110. #define GREP_OPTION_FULL_LINE_ONLY 0x00000200
  111. //
  112. // Set this option to scan inside directories.
  113. //
  114. #define GREP_OPTION_RECURSIVE 0x00000400
  115. //
  116. // Set this option to suppress printing the match itself.
  117. //
  118. #define GREP_OPTION_SUPPRESS_MATCH_PRINT 0x00000800
  119. //
  120. // Define the maximum recursion depth for traversing into directories.
  121. //
  122. #define GREP_MAX_RECURSION_DEPTH 300
  123. //
  124. // ------------------------------------------------------ Data Type Definitions
  125. //
  126. /*++
  127. Structure Description:
  128. This structure defines a grep input file.
  129. Members:
  130. ListEntry - Stores pointers to the next and previous input entries.
  131. FileName - Stores the name of the file.
  132. File - Stores the open file pointer, or NULL if the file could not be
  133. opened.
  134. Binary - Stores a boolean indicating if this file is a binray file or not.
  135. --*/
  136. typedef struct _GREP_INPUT {
  137. LIST_ENTRY ListEntry;
  138. PSTR FileName;
  139. FILE *File;
  140. BOOL Binary;
  141. } GREP_INPUT, *PGREP_INPUT;
  142. /*++
  143. Structure Description:
  144. This structure defines a grep input file.
  145. Members:
  146. ListEntry - Stores pointers to the next and previous pattern entries.
  147. Pattern - Stores the pattern string.
  148. Expression - Stores the regular expression structure.
  149. --*/
  150. typedef struct _GREP_PATTERN {
  151. LIST_ENTRY ListEntry;
  152. PSTR Pattern;
  153. regex_t Expression;
  154. } GREP_PATTERN, *PGREP_PATTERN;
  155. /*++
  156. Structure Description:
  157. This structure defines the context for an instantiation of the grep
  158. utility.
  159. Members:
  160. InputList - Stores the head of the list of input files to process.
  161. PatternList - Stores the list of patterns to search.
  162. Options - Stores the application options. See GREP_OPTION_* definitions.
  163. --*/
  164. typedef struct _GREP_CONTEXT {
  165. LIST_ENTRY InputList;
  166. LIST_ENTRY PatternList;
  167. ULONG Options;
  168. } GREP_CONTEXT, *PGREP_CONTEXT;
  169. //
  170. // ----------------------------------------------- Internal Function Prototypes
  171. //
  172. INT
  173. GrepParsePatternFile (
  174. PGREP_CONTEXT Context,
  175. PSTR Path
  176. );
  177. INT
  178. GrepParsePatternList (
  179. PGREP_CONTEXT Context,
  180. PSTR String
  181. );
  182. INT
  183. GrepCompileRegularExpressions (
  184. PGREP_CONTEXT Context
  185. );
  186. INT
  187. GrepAddInputFile (
  188. PGREP_CONTEXT Context,
  189. PSTR Path,
  190. ULONG RecursionLevel
  191. );
  192. INT
  193. GrepProcessInput (
  194. PGREP_CONTEXT Context
  195. );
  196. INT
  197. GrepProcessInputEntry (
  198. PGREP_CONTEXT Context,
  199. PGREP_INPUT Input,
  200. PSTR *Buffer,
  201. size_t *BufferSize
  202. );
  203. INT
  204. GrepReadLine (
  205. PGREP_CONTEXT Context,
  206. PGREP_INPUT Input,
  207. PSTR *Buffer,
  208. size_t *BufferSize
  209. );
  210. BOOL
  211. GrepMatchPattern (
  212. PGREP_CONTEXT Context,
  213. PSTR Input,
  214. PGREP_PATTERN Pattern
  215. );
  216. BOOL
  217. GrepMatchFixedString (
  218. PGREP_CONTEXT Context,
  219. PSTR Input,
  220. PGREP_PATTERN Pattern
  221. );
  222. //
  223. // -------------------------------------------------------------------- Globals
  224. //
  225. struct option GrepLongOptions[] = {
  226. {"extended-regexp", no_argument, 0, 'E'},
  227. {"fixed-strings", no_argument, 0, 'F'},
  228. {"count", no_argument, 0, 'c'},
  229. {"regexp", required_argument, 0, 'e'},
  230. {"file", required_argument, 0, 'f'},
  231. {"with-filename", no_argument, 0, 'H'},
  232. {"no-filename", no_argument, 0, 'h'},
  233. {"ignore-case", no_argument, 0, 'i'},
  234. {"files-with-matches", no_argument, 0, 'l'},
  235. {"line-number", no_argument, 0, 'n'},
  236. {"quiet", no_argument, 0, 'q'},
  237. {"recursive", no_argument, 0, 'R'},
  238. {"silent", no_argument, 0, 'q'},
  239. {"no-messages", no_argument, 0, 's'},
  240. {"invert-match", no_argument, 0, 'v'},
  241. {"line-regexp", no_argument, 0, 'x'},
  242. {"help", no_argument, 0, GREP_HELP},
  243. {"version", no_argument, 0, 'V'},
  244. {NULL, 0, 0, 0}
  245. };
  246. //
  247. // ------------------------------------------------------------------ Functions
  248. //
  249. INT
  250. GrepMain (
  251. INT ArgumentCount,
  252. CHAR **Arguments
  253. )
  254. /*++
  255. Routine Description:
  256. This routine implements the main entry point for the grep utility, which
  257. searches for a pattern within a file.
  258. Arguments:
  259. ArgumentCount - Supplies the number of arguments on the command line.
  260. Arguments - Supplies an array of pointers to strings representing the
  261. arguments.
  262. Return Value:
  263. 0 on success.
  264. Non-zero on failure.
  265. --*/
  266. {
  267. PSTR Argument;
  268. INT ArgumentIndex;
  269. GREP_CONTEXT Context;
  270. PSTR FirstSource;
  271. PGREP_INPUT InputEntry;
  272. INT Option;
  273. PGREP_PATTERN Pattern;
  274. BOOL PatternsRead;
  275. BOOL ReadFromStandardIn;
  276. PSTR SecondSource;
  277. INT Status;
  278. BOOL SuppressFileName;
  279. memset(&Context, 0, sizeof(GREP_CONTEXT));
  280. INITIALIZE_LIST_HEAD(&(Context.InputList));
  281. INITIALIZE_LIST_HEAD(&(Context.PatternList));
  282. Status = 0;
  283. //
  284. // Process the control arguments.
  285. //
  286. FirstSource = NULL;
  287. SecondSource = NULL;
  288. PatternsRead = FALSE;
  289. SuppressFileName = FALSE;
  290. while (TRUE) {
  291. Option = getopt_long(ArgumentCount,
  292. Arguments,
  293. GREP_OPTIONS_STRING,
  294. GrepLongOptions,
  295. NULL);
  296. if (Option == -1) {
  297. break;
  298. }
  299. if ((Option == '?') || (Option == ':')) {
  300. Status = 1;
  301. goto MainEnd;
  302. }
  303. switch (Option) {
  304. case 'E':
  305. Context.Options |= GREP_OPTION_EXTENDED_EXPRESSIONS;
  306. if ((Context.Options & GREP_OPTION_FIXED_STRINGS) != 0) {
  307. SwPrintError(0, NULL, "Conflicting matchers specified");
  308. Status = 2;
  309. goto MainEnd;
  310. }
  311. break;
  312. case 'F':
  313. Context.Options |= GREP_OPTION_FIXED_STRINGS;
  314. if ((Context.Options &
  315. GREP_OPTION_EXTENDED_EXPRESSIONS) != 0) {
  316. SwPrintError(0, NULL, "Conflicting matchers specified");
  317. Status = 2;
  318. goto MainEnd;
  319. }
  320. break;
  321. case 'c':
  322. Context.Options |= GREP_OPTION_LINE_COUNT;
  323. break;
  324. case 'e':
  325. PatternsRead = TRUE;
  326. Argument = optarg;
  327. assert(Argument != NULL);
  328. Status = GrepParsePatternList(&Context, Argument);
  329. if (Status != 0) {
  330. goto MainEnd;
  331. }
  332. break;
  333. case 'f':
  334. PatternsRead = TRUE;
  335. Argument = optarg;
  336. assert(Argument != NULL);
  337. Status = GrepParsePatternFile(&Context, Argument);
  338. if (Status != 0) {
  339. goto MainEnd;
  340. }
  341. break;
  342. case 'h':
  343. Context.Options &= ~GREP_OPTION_PRINT_FILE_NAMES;
  344. SuppressFileName = TRUE;
  345. break;
  346. case 'H':
  347. Context.Options |= GREP_OPTION_PRINT_FILE_NAMES;
  348. break;
  349. case 'i':
  350. Context.Options |= GREP_OPTION_IGNORE_CASE;
  351. break;
  352. case 'l':
  353. Context.Options |= GREP_OPTION_PRINT_FILE_NAMES |
  354. GREP_OPTION_SUPPRESS_MATCH_PRINT;
  355. break;
  356. case 'n':
  357. Context.Options |= GREP_OPTION_PRINT_LINE_NUMBERS;
  358. break;
  359. case 'q':
  360. Context.Options |= GREP_OPTION_QUIET;
  361. break;
  362. case 'r':
  363. case 'R':
  364. Context.Options |= GREP_OPTION_RECURSIVE;
  365. break;
  366. case 's':
  367. Context.Options |= GREP_OPTION_SUPPRESS_BLAND_ERRORS;
  368. break;
  369. case 'v':
  370. Context.Options |= GREP_OPTION_NEGATE_SEARCH;
  371. break;
  372. case 'x':
  373. Context.Options |= GREP_OPTION_FULL_LINE_ONLY;
  374. break;
  375. case 'V':
  376. SwPrintVersion(GREP_VERSION_MAJOR, GREP_VERSION_MINOR);
  377. return 1;
  378. case GREP_HELP:
  379. printf(GREP_USAGE);
  380. return 1;
  381. default:
  382. assert(FALSE);
  383. Status = 1;
  384. goto MainEnd;
  385. }
  386. }
  387. ArgumentIndex = optind;
  388. if (ArgumentIndex < ArgumentCount) {
  389. FirstSource = Arguments[ArgumentIndex];
  390. if (ArgumentIndex + 1 < ArgumentCount) {
  391. SecondSource = Arguments[ArgumentIndex + 1];
  392. }
  393. }
  394. //
  395. // If there were no scripts read, the first non-control argument is the
  396. // script.
  397. //
  398. ReadFromStandardIn = TRUE;
  399. if (PatternsRead == FALSE) {
  400. if (FirstSource == NULL) {
  401. SwPrintError(0, NULL, "Argument expected. Try --help for usage");
  402. Status = 2;
  403. goto MainEnd;
  404. }
  405. Status = GrepParsePatternList(&Context, FirstSource);
  406. if (Status != 0) {
  407. goto MainEnd;
  408. }
  409. if (SecondSource != NULL) {
  410. ReadFromStandardIn = FALSE;
  411. }
  412. } else if (FirstSource != NULL) {
  413. ReadFromStandardIn = FALSE;
  414. }
  415. Status = GrepCompileRegularExpressions(&Context);
  416. if (Status != 0) {
  417. goto MainEnd;
  418. }
  419. if (ReadFromStandardIn != FALSE) {
  420. //
  421. // Create a single input entry for standard in.
  422. //
  423. InputEntry = malloc(sizeof(GREP_INPUT));
  424. if (InputEntry == NULL) {
  425. Status = ENOMEM;
  426. goto MainEnd;
  427. }
  428. InputEntry->File = stdin;
  429. InputEntry->FileName = strdup("(standard in)");
  430. if (InputEntry->FileName == NULL) {
  431. Status = ENOMEM;
  432. goto MainEnd;
  433. }
  434. InputEntry->Binary = FALSE;
  435. INSERT_BEFORE(&(InputEntry->ListEntry), &(Context.InputList));
  436. Status = GrepProcessInput(&Context);
  437. goto MainEnd;
  438. }
  439. //
  440. // Loop through the remaining arguments to create the input entries.
  441. //
  442. while (ArgumentIndex < ArgumentCount) {
  443. Argument = Arguments[ArgumentIndex];
  444. ArgumentIndex += 1;
  445. //
  446. // Skip over the script itself.
  447. //
  448. if ((PatternsRead == FALSE) && (Argument == FirstSource)) {
  449. continue;
  450. }
  451. Status = GrepAddInputFile(&Context, Argument, 0);
  452. if (Status != 0) {
  453. goto MainEnd;
  454. }
  455. }
  456. //
  457. // If there are multiple files, print the file names, unless explicitly
  458. // told not to.
  459. //
  460. if ((Context.InputList.Next != Context.InputList.Previous) &&
  461. (SuppressFileName == FALSE)) {
  462. Context.Options |= GREP_OPTION_PRINT_FILE_NAMES;
  463. }
  464. //
  465. // Let grep process all this.
  466. //
  467. Status = GrepProcessInput(&Context);
  468. MainEnd:
  469. while (LIST_EMPTY(&(Context.InputList)) == FALSE) {
  470. InputEntry = LIST_VALUE(Context.InputList.Next, GREP_INPUT, ListEntry);
  471. LIST_REMOVE(&(InputEntry->ListEntry));
  472. if ((InputEntry->File != stdin) && (InputEntry->File != NULL)) {
  473. fclose(InputEntry->File);
  474. }
  475. if (InputEntry->FileName != NULL) {
  476. free(InputEntry->FileName);
  477. }
  478. free(InputEntry);
  479. }
  480. while (LIST_EMPTY(&(Context.PatternList)) == FALSE) {
  481. Pattern = LIST_VALUE(Context.PatternList.Next, GREP_PATTERN, ListEntry);
  482. LIST_REMOVE(&(Pattern->ListEntry));
  483. if (Pattern->Pattern != NULL) {
  484. free(Pattern->Pattern);
  485. } else {
  486. regfree(&(Pattern->Expression));
  487. }
  488. free(Pattern);
  489. }
  490. return Status;
  491. }
  492. INT
  493. EgrepMain (
  494. INT ArgumentCount,
  495. CHAR **Arguments
  496. )
  497. /*++
  498. Routine Description:
  499. This routine implements the main entry point for the egrep utility, which
  500. searches for a pattern within a file. It is equivalent to grep -E.
  501. Arguments:
  502. ArgumentCount - Supplies the number of arguments on the command line.
  503. Arguments - Supplies an array of pointers to strings representing the
  504. arguments.
  505. Return Value:
  506. 0 on success.
  507. Non-zero on failure.
  508. --*/
  509. {
  510. INT ArgumentIndex;
  511. PSTR *NewArguments;
  512. INT Result;
  513. assert(ArgumentCount >= 1);
  514. NewArguments = malloc((ArgumentCount + 2) * sizeof(PSTR));
  515. if (NewArguments == NULL) {
  516. return ENOMEM;
  517. }
  518. NewArguments[0] = Arguments[0];
  519. NewArguments[1] = "-E";
  520. for (ArgumentIndex = 1; ArgumentIndex < ArgumentCount; ArgumentIndex += 1) {
  521. NewArguments[1 + ArgumentIndex] = Arguments[ArgumentIndex];
  522. }
  523. NewArguments[ArgumentCount + 1] = NULL;
  524. Result = GrepMain(ArgumentCount + 1, NewArguments);
  525. free(NewArguments);
  526. return Result;
  527. }
  528. INT
  529. FgrepMain (
  530. INT ArgumentCount,
  531. CHAR **Arguments
  532. )
  533. /*++
  534. Routine Description:
  535. This routine implements the main entry point for the fgrep utility, which
  536. searches for a pattern within a file. It is equivalent to grep -f.
  537. Arguments:
  538. ArgumentCount - Supplies the number of arguments on the command line.
  539. Arguments - Supplies an array of pointers to strings representing the
  540. arguments.
  541. Return Value:
  542. 0 on success.
  543. Non-zero on failure.
  544. --*/
  545. {
  546. INT ArgumentIndex;
  547. PSTR *NewArguments;
  548. INT Result;
  549. assert(ArgumentCount >= 1);
  550. NewArguments = malloc((ArgumentCount + 2) * sizeof(PSTR));
  551. if (NewArguments == NULL) {
  552. return ENOMEM;
  553. }
  554. NewArguments[0] = Arguments[0];
  555. NewArguments[1] = "-F";
  556. for (ArgumentIndex = 1; ArgumentIndex < ArgumentCount; ArgumentIndex += 1) {
  557. NewArguments[1 + ArgumentIndex] = Arguments[ArgumentIndex];
  558. }
  559. NewArguments[ArgumentCount + 1] = NULL;
  560. Result = GrepMain(ArgumentCount + 1, NewArguments);
  561. free(NewArguments);
  562. return Result;
  563. }
  564. //
  565. // --------------------------------------------------------- Internal Functions
  566. //
  567. INT
  568. GrepParsePatternFile (
  569. PGREP_CONTEXT Context,
  570. PSTR Path
  571. )
  572. /*++
  573. Routine Description:
  574. This routine reads a pattern list file.
  575. Arguments:
  576. Context - Supplies a pointer to the application context.
  577. Path - Supplies a pointer to a string containing the path of the file to
  578. read in.
  579. Return Value:
  580. 0 on success.
  581. Non-zero on failure.
  582. --*/
  583. {
  584. PCHAR Buffer;
  585. size_t BufferSize;
  586. ssize_t BytesRead;
  587. int File;
  588. PCHAR NewBuffer;
  589. int Status;
  590. size_t TotalBytesRead;
  591. Buffer = NULL;
  592. File = -1;
  593. TotalBytesRead = 0;
  594. //
  595. // Allocate an initial buffer.
  596. //
  597. BufferSize = GREP_READ_BLOCK_SIZE;
  598. Buffer = malloc(GREP_READ_BLOCK_SIZE);
  599. if (Buffer == NULL) {
  600. Status = ENOMEM;
  601. goto ReadFileInEnd;
  602. }
  603. File = open(Path, O_RDONLY | O_BINARY);
  604. if (File < 0) {
  605. if ((Context->Options & GREP_OPTION_SUPPRESS_BLAND_ERRORS) != 0) {
  606. Status = 0;
  607. } else {
  608. Status = errno;
  609. }
  610. goto ReadFileInEnd;
  611. }
  612. //
  613. // Loop reading the entire pattern file in.
  614. //
  615. while (TRUE) {
  616. do {
  617. BytesRead = read(File,
  618. Buffer + TotalBytesRead,
  619. BufferSize - TotalBytesRead - 1);
  620. } while ((BytesRead < 0) && (errno == EINTR));
  621. if (BytesRead < 0) {
  622. Status = errno;
  623. goto ReadFileInEnd;
  624. }
  625. TotalBytesRead += BytesRead;
  626. if (BytesRead == 0) {
  627. break;
  628. }
  629. //
  630. // If there's not at least a block's worth in the buffer, double the
  631. // buffer size.
  632. //
  633. if (BufferSize - TotalBytesRead < GREP_READ_BLOCK_SIZE) {
  634. BufferSize *= 2;
  635. NewBuffer = realloc(Buffer, BufferSize);
  636. if (NewBuffer != NULL) {
  637. Buffer = NewBuffer;
  638. } else {
  639. Status = ENOMEM;
  640. goto ReadFileInEnd;
  641. }
  642. }
  643. }
  644. //
  645. // Null terminate the string. The loop ensures there's at least one more
  646. // space open.
  647. //
  648. assert(TotalBytesRead < BufferSize);
  649. Buffer[TotalBytesRead] = '\0';
  650. //
  651. // If something was read, parse it.
  652. //
  653. if (TotalBytesRead != 0) {
  654. Status = GrepParsePatternList(Context, Buffer);
  655. if (Status != 0) {
  656. goto ReadFileInEnd;
  657. }
  658. }
  659. Status = 0;
  660. ReadFileInEnd:
  661. if (Buffer != NULL) {
  662. free(Buffer);
  663. }
  664. return Status;
  665. }
  666. INT
  667. GrepParsePatternList (
  668. PGREP_CONTEXT Context,
  669. PSTR String
  670. )
  671. /*++
  672. Routine Description:
  673. This routine reads in a string, splits it on newlines, and creates
  674. pattern entries for it.
  675. Arguments:
  676. Context - Supplies a pointer to the application context.
  677. String - Supplies the string to split.
  678. Return Value:
  679. 0 on success.
  680. Non-zero on failure.
  681. --*/
  682. {
  683. PSTR CurrentLine;
  684. size_t LineLength;
  685. PSTR NextLine;
  686. PGREP_PATTERN Pattern;
  687. INT Status;
  688. Pattern = NULL;
  689. //
  690. // Loop splitting lines.
  691. //
  692. CurrentLine = String;
  693. while (TRUE) {
  694. NextLine = strchr(CurrentLine, '\n');
  695. if (NextLine != NULL) {
  696. LineLength = (UINTN)NextLine - (UINTN)CurrentLine;
  697. } else {
  698. LineLength = strlen(CurrentLine);
  699. if (LineLength == 0) {
  700. break;
  701. }
  702. }
  703. Pattern = malloc(sizeof(GREP_PATTERN));
  704. if (Pattern == NULL) {
  705. Status = ENOMEM;
  706. goto ParsePatternListEnd;
  707. }
  708. memset(Pattern, 0, sizeof(GREP_PATTERN));
  709. Pattern->Pattern = malloc(LineLength + 1);
  710. if (Pattern->Pattern == NULL) {
  711. Status = ENOMEM;
  712. goto ParsePatternListEnd;
  713. }
  714. if (LineLength != 0) {
  715. memcpy(Pattern->Pattern, CurrentLine, LineLength);
  716. }
  717. Pattern->Pattern[LineLength] = '\0';
  718. INSERT_BEFORE(&(Pattern->ListEntry), &(Context->PatternList));
  719. Pattern = NULL;
  720. if (NextLine == NULL) {
  721. break;
  722. }
  723. CurrentLine = NextLine + 1;
  724. }
  725. Status = 0;
  726. ParsePatternListEnd:
  727. if (Pattern != NULL) {
  728. if (Pattern->Pattern != NULL) {
  729. free(Pattern->Pattern);
  730. }
  731. free(Pattern);
  732. }
  733. return Status;
  734. }
  735. INT
  736. GrepCompileRegularExpressions (
  737. PGREP_CONTEXT Context
  738. )
  739. /*++
  740. Routine Description:
  741. This routine compiles all regular expression patterns if appropriate.
  742. Arguments:
  743. Context - Supplies a pointer to the application context.
  744. Return Value:
  745. 0 on success.
  746. Non-zero on failure.
  747. --*/
  748. {
  749. INT CompileFlags;
  750. PLIST_ENTRY CurrentEntry;
  751. PSTR ErrorString;
  752. size_t ErrorStringSize;
  753. PGREP_PATTERN Pattern;
  754. INT Status;
  755. //
  756. // Skip this if they're just fixed strings and not regular expressions.
  757. //
  758. if ((Context->Options & GREP_OPTION_FIXED_STRINGS) != 0) {
  759. return 0;
  760. }
  761. //
  762. // Figure out the compile flags.
  763. //
  764. CompileFlags = REG_NOSUB;
  765. if ((Context->Options & GREP_OPTION_EXTENDED_EXPRESSIONS) != 0) {
  766. CompileFlags |= REG_EXTENDED;
  767. }
  768. if ((Context->Options & GREP_OPTION_IGNORE_CASE) != 0) {
  769. CompileFlags |= REG_ICASE;
  770. }
  771. CurrentEntry = Context->PatternList.Next;
  772. while (CurrentEntry != &(Context->PatternList)) {
  773. Pattern = LIST_VALUE(CurrentEntry, GREP_PATTERN, ListEntry);
  774. CurrentEntry = CurrentEntry->Next;
  775. Status = regcomp(&(Pattern->Expression),
  776. Pattern->Pattern,
  777. CompileFlags);
  778. if (Status != 0) {
  779. ErrorStringSize = regerror(Status, &(Pattern->Expression), NULL, 0);
  780. ErrorString = malloc(ErrorStringSize);
  781. if (ErrorString != NULL) {
  782. regerror(Status,
  783. &(Pattern->Expression),
  784. ErrorString,
  785. ErrorStringSize);
  786. SwPrintError(0,
  787. NULL,
  788. "Invalid regular expression '%s': %s",
  789. Pattern->Pattern,
  790. ErrorString);
  791. Status = 3;
  792. goto CompileRegularExpressionsEnd;
  793. }
  794. }
  795. //
  796. // Free the pattern both because it's no longer needed and to indicate
  797. // there's a valid compiled regular expression there.
  798. //
  799. free(Pattern->Pattern);
  800. Pattern->Pattern = NULL;
  801. }
  802. Status = 0;
  803. CompileRegularExpressionsEnd:
  804. return Status;
  805. }
  806. INT
  807. GrepAddInputFile (
  808. PGREP_CONTEXT Context,
  809. PSTR Path,
  810. ULONG RecursionLevel
  811. )
  812. /*++
  813. Routine Description:
  814. This routine adds a file to the list of files grep should process.
  815. Arguments:
  816. Context - Supplies a pointer to the application context.
  817. Path - Supplies a pointer to the file path to add.
  818. RecursionLevel - Supplies the recursion depth of this function.
  819. Return Value:
  820. 0 on success.
  821. Non-zero on failure.
  822. --*/
  823. {
  824. PSTR AppendedPath;
  825. ULONG AppendedPathSize;
  826. DIR *Directory;
  827. struct dirent Entry;
  828. struct dirent *EntryPointer;
  829. PGREP_INPUT InputEntry;
  830. struct stat Stat;
  831. INT Status;
  832. Directory = NULL;
  833. InputEntry = NULL;
  834. Status = SwStat(Path, TRUE, &Stat);
  835. if (Status != 0) {
  836. Status = errno;
  837. SwPrintError(Status, Path, "Unable to stat");
  838. goto AddInputFileEnd;
  839. }
  840. if (S_ISDIR(Stat.st_mode)) {
  841. //
  842. // Skip it unless recursive mode is on.
  843. //
  844. if ((Context->Options & GREP_OPTION_RECURSIVE) == 0) {
  845. Status = 0;
  846. goto AddInputFileEnd;
  847. }
  848. if (RecursionLevel >= GREP_MAX_RECURSION_DEPTH) {
  849. SwPrintError(Status, Path, "Max recursion depth reached");
  850. Status = ELOOP;
  851. goto AddInputFileEnd;
  852. }
  853. Directory = opendir(Path);
  854. if (Directory == NULL) {
  855. Status = errno;
  856. SwPrintError(Status, Path, "Unable to open directory");
  857. goto AddInputFileEnd;
  858. }
  859. //
  860. // Loop through all entries in the directory.
  861. //
  862. while (TRUE) {
  863. Status = SwReadDirectory(Directory, &Entry, &EntryPointer);
  864. if (Status != 0) {
  865. SwPrintError(Status, Path, "Unable to read directory");
  866. goto AddInputFileEnd;
  867. }
  868. if (EntryPointer == NULL) {
  869. break;
  870. }
  871. if ((strcmp(Entry.d_name, ".") == 0) ||
  872. (strcmp(Entry.d_name, "..") == 0)) {
  873. continue;
  874. }
  875. Status = SwAppendPath(Path,
  876. strlen(Path) + 1,
  877. Entry.d_name,
  878. strlen(Entry.d_name) + 1,
  879. &AppendedPath,
  880. &AppendedPathSize);
  881. if (Status == FALSE) {
  882. Status = ENOMEM;
  883. goto AddInputFileEnd;
  884. }
  885. Status = GrepAddInputFile(Context,
  886. AppendedPath,
  887. RecursionLevel + 1);
  888. free(AppendedPath);
  889. if (Status != 0) {
  890. goto AddInputFileEnd;
  891. }
  892. }
  893. //
  894. // This is not a directory, add it as an input.
  895. //
  896. } else {
  897. InputEntry = malloc(sizeof(GREP_INPUT));
  898. if (InputEntry == NULL) {
  899. Status = ENOMEM;
  900. goto AddInputFileEnd;
  901. }
  902. memset(InputEntry, 0, sizeof(GREP_INPUT));
  903. InputEntry->FileName = strdup(Path);
  904. if (InputEntry->FileName == NULL) {
  905. Status = ENOMEM;
  906. goto AddInputFileEnd;
  907. }
  908. InputEntry->Binary = FALSE;
  909. INSERT_BEFORE(&(InputEntry->ListEntry), &(Context->InputList));
  910. InputEntry = NULL;
  911. }
  912. Status = 0;
  913. AddInputFileEnd:
  914. if (Directory != NULL) {
  915. closedir(Directory);
  916. }
  917. return Status;
  918. }
  919. INT
  920. GrepProcessInput (
  921. PGREP_CONTEXT Context
  922. )
  923. /*++
  924. Routine Description:
  925. This routine compiles all regular expression patterns if appropriate.
  926. Arguments:
  927. Context - Supplies a pointer to the application context.
  928. Return Value:
  929. 0 on success.
  930. Non-zero on failure.
  931. --*/
  932. {
  933. PLIST_ENTRY CurrentEntry;
  934. BOOL FileOpened;
  935. PGREP_INPUT Input;
  936. PSTR LineBuffer;
  937. size_t LineBufferSize;
  938. INT Status;
  939. INT TotalStatus;
  940. LineBuffer = NULL;
  941. LineBufferSize = 0;
  942. TotalStatus = 1;
  943. //
  944. // Just loop through each input.
  945. //
  946. CurrentEntry = Context->InputList.Next;
  947. while (CurrentEntry != &(Context->InputList)) {
  948. Input = LIST_VALUE(CurrentEntry, GREP_INPUT, ListEntry);
  949. CurrentEntry = CurrentEntry->Next;
  950. FileOpened = FALSE;
  951. if (Input->File == NULL) {
  952. Input->File = fopen(Input->FileName, "r");
  953. if (Input->File == NULL) {
  954. if ((Context->Options &
  955. GREP_OPTION_SUPPRESS_BLAND_ERRORS) == 0) {
  956. Status = errno;
  957. SwPrintError(Status, Input->FileName, "Unable to open");
  958. goto ProcessInputEnd;
  959. }
  960. }
  961. FileOpened = TRUE;
  962. }
  963. Status = GrepProcessInputEntry(Context,
  964. Input,
  965. &LineBuffer,
  966. &LineBufferSize);
  967. if (FileOpened != FALSE) {
  968. fclose(Input->File);
  969. Input->File = NULL;
  970. }
  971. if (Status == 0) {
  972. if (TotalStatus == 1) {
  973. TotalStatus = 0;
  974. }
  975. } else if (Status > 1) {
  976. TotalStatus = Status;
  977. }
  978. }
  979. ProcessInputEnd:
  980. if (LineBuffer != NULL) {
  981. free(LineBuffer);
  982. }
  983. return TotalStatus;
  984. }
  985. INT
  986. GrepProcessInputEntry (
  987. PGREP_CONTEXT Context,
  988. PGREP_INPUT Input,
  989. PSTR *Buffer,
  990. size_t *BufferSize
  991. )
  992. /*++
  993. Routine Description:
  994. This routine compiles all regular expression patterns if appropriate.
  995. Arguments:
  996. Context - Supplies a pointer to the application context.
  997. Input - Supplies a pointer to the input entry.
  998. Buffer - Supplies a pointer that on input contains a buffer. On output,
  999. returns a potentially reallocated buffer.
  1000. BufferSize - Supplies a pointer that on input contains the size of the
  1001. input buffer. On output, returns the potentially expanded size of the
  1002. buffer.
  1003. Return Value:
  1004. 0 if the input matched.
  1005. 1 if the input did not match.
  1006. Other error codes on failure.
  1007. --*/
  1008. {
  1009. PLIST_ENTRY CurrentEntry;
  1010. ULONG LineNumber;
  1011. BOOL Match;
  1012. ULONG MatchCount;
  1013. PGREP_PATTERN Pattern;
  1014. INT Status;
  1015. LineNumber = 1;
  1016. MatchCount = 0;
  1017. //
  1018. // Loop across every line.
  1019. //
  1020. while (TRUE) {
  1021. Status = GrepReadLine(Context, Input, Buffer, BufferSize);
  1022. if (Status == EOF) {
  1023. Status = 0;
  1024. break;
  1025. } else if (Status != 0) {
  1026. goto ProcessInputEntryEnd;
  1027. }
  1028. CurrentEntry = Context->PatternList.Next;
  1029. while (CurrentEntry != &(Context->PatternList)) {
  1030. Pattern = LIST_VALUE(CurrentEntry, GREP_PATTERN, ListEntry);
  1031. CurrentEntry = CurrentEntry->Next;
  1032. Match = GrepMatchPattern(Context, *Buffer, Pattern);
  1033. if (Match != FALSE) {
  1034. MatchCount += 1;
  1035. }
  1036. //
  1037. // If it didn't match, keep going.
  1038. //
  1039. if (Match == FALSE) {
  1040. continue;
  1041. }
  1042. if ((Context->Options & GREP_OPTION_QUIET) != 0) {
  1043. continue;
  1044. }
  1045. //
  1046. // If there are more than one file elements and the file name was
  1047. // not already printed, precede the match with the file name.
  1048. //
  1049. if ((Context->Options & GREP_OPTION_PRINT_FILE_NAMES) != 0) {
  1050. if ((Context->Options &
  1051. GREP_OPTION_SUPPRESS_MATCH_PRINT) != 0) {
  1052. printf("%s\n", Input->FileName);
  1053. break;
  1054. } else {
  1055. printf("%s:", Input->FileName);
  1056. }
  1057. }
  1058. //
  1059. // With line counts only, just keep going.
  1060. //
  1061. if ((Context->Options & GREP_OPTION_LINE_COUNT) != 0) {
  1062. continue;
  1063. }
  1064. if (Input->Binary != FALSE) {
  1065. printf("Binary file %s matches.\n", Input->FileName);
  1066. break;
  1067. }
  1068. //
  1069. // If a line number is desired, print that too.
  1070. //
  1071. if ((Context->Options & GREP_OPTION_PRINT_LINE_NUMBERS) != 0) {
  1072. printf("%d:", LineNumber);
  1073. }
  1074. //
  1075. // Print the line itself.
  1076. //
  1077. printf("%s\n", *Buffer);
  1078. }
  1079. LineNumber += 1;
  1080. if ((MatchCount != 0) &&
  1081. ((Input->Binary != FALSE) ||
  1082. ((Context->Options & GREP_OPTION_SUPPRESS_MATCH_PRINT) != 0))) {
  1083. break;
  1084. }
  1085. }
  1086. //
  1087. // Print the count if desired.
  1088. //
  1089. if (((Context->Options & GREP_OPTION_LINE_COUNT) != 0) &&
  1090. ((Context->Options & GREP_OPTION_QUIET) == 0) &&
  1091. ((Context->Options & GREP_OPTION_SUPPRESS_MATCH_PRINT) == 0)) {
  1092. printf("%d\n", MatchCount);
  1093. }
  1094. Status = 0;
  1095. ProcessInputEntryEnd:
  1096. if ((Status == 0) && (MatchCount == 0)) {
  1097. Status = 1;
  1098. }
  1099. return Status;
  1100. }
  1101. INT
  1102. GrepReadLine (
  1103. PGREP_CONTEXT Context,
  1104. PGREP_INPUT Input,
  1105. PSTR *Buffer,
  1106. size_t *BufferSize
  1107. )
  1108. /*++
  1109. Routine Description:
  1110. This routine reads a new line into the given buffer.
  1111. Arguments:
  1112. Context - Supplies a pointer to the application context.
  1113. Input - Supplies a pointer to the input to read from.
  1114. Buffer - Supplies a pointer that on input contains a buffer. On output,
  1115. returns the buffer containing the line, which may be reallocated from
  1116. the original.
  1117. BufferSize - Supplies a pointer that on input contains the size of the
  1118. input buffer. On output, returns the potentially expanded size of the
  1119. buffer. Note that this returns the size of the allocation, not the
  1120. length of the string. The string is null terminated.
  1121. Return Value:
  1122. 0 on success.
  1123. Non-zero on failure.
  1124. EOF if the end of the file was hit.
  1125. --*/
  1126. {
  1127. INT Character;
  1128. size_t CharacterCount;
  1129. PSTR NewBuffer;
  1130. INT Status;
  1131. PSTR String;
  1132. size_t StringSize;
  1133. CharacterCount = 0;
  1134. String = *Buffer;
  1135. StringSize = *BufferSize;
  1136. //
  1137. // Loop reading characters.
  1138. //
  1139. while (TRUE) {
  1140. //
  1141. // Ensure that the string is big enough to take two more characters.
  1142. //
  1143. if (CharacterCount + 2 > StringSize) {
  1144. if (StringSize == 0) {
  1145. StringSize = GREP_INITIAL_LINE_SIZE;
  1146. } else {
  1147. StringSize *= 2;
  1148. }
  1149. assert(StringSize >= CharacterCount + 2);
  1150. NewBuffer = realloc(String, StringSize);
  1151. if (NewBuffer == NULL) {
  1152. Status = ENOMEM;
  1153. goto ReadLineEnd;
  1154. }
  1155. String = NewBuffer;
  1156. }
  1157. //
  1158. // Get a new character. If it's the end of the file, terminate this
  1159. // line, or if this line is empty, return EOF overall.
  1160. //
  1161. Character = fgetc(Input->File);
  1162. if (Character == EOF) {
  1163. if (CharacterCount != 0) {
  1164. break;
  1165. }
  1166. Status = EOF;
  1167. goto ReadLineEnd;
  1168. } else {
  1169. if (Character == '\0') {
  1170. Input->Binary = TRUE;
  1171. }
  1172. //
  1173. // Skip over any null terminators at the beginning.
  1174. //
  1175. if ((Character == '\0') && (CharacterCount == 0)) {
  1176. continue;
  1177. }
  1178. if ((Character == '\n') || (Character == '\0')) {
  1179. break;
  1180. } else {
  1181. String[CharacterCount] = Character;
  1182. CharacterCount += 1;
  1183. }
  1184. }
  1185. }
  1186. String[CharacterCount] = '\0';
  1187. Status = 0;
  1188. ReadLineEnd:
  1189. *Buffer = String;
  1190. *BufferSize = StringSize;
  1191. return Status;
  1192. }
  1193. BOOL
  1194. GrepMatchPattern (
  1195. PGREP_CONTEXT Context,
  1196. PSTR Input,
  1197. PGREP_PATTERN Pattern
  1198. )
  1199. /*++
  1200. Routine Description:
  1201. This routine determines if the given input line matches a grep pattern,
  1202. and prints out match information if it does.
  1203. Arguments:
  1204. Context - Supplies a pointer to the application context.
  1205. Input - Supplies a pointer to the null terminated input line.
  1206. Pattern - Supplies a pointer to the pattern to match against.
  1207. Return Value:
  1208. TRUE if the pattern matched the input.
  1209. FALSE if there was no matched.
  1210. --*/
  1211. {
  1212. regmatch_t ExpressionMatch;
  1213. BOOL Match;
  1214. INT Status;
  1215. Match = FALSE;
  1216. //
  1217. // First figure out if the pattern matched.
  1218. //
  1219. if ((Context->Options & GREP_OPTION_FIXED_STRINGS) != 0) {
  1220. Match = GrepMatchFixedString(Context, Input, Pattern);
  1221. } else {
  1222. Status = regexec(&(Pattern->Expression), Input, 1, &ExpressionMatch, 0);
  1223. if (Status == 0) {
  1224. Match = TRUE;
  1225. if ((Context->Options & GREP_OPTION_FULL_LINE_ONLY) != 0) {
  1226. if ((ExpressionMatch.rm_so != 0) ||
  1227. (Input[ExpressionMatch.rm_eo - 1] != '\0')) {
  1228. Match = FALSE;
  1229. }
  1230. }
  1231. }
  1232. }
  1233. if ((Context->Options & GREP_OPTION_NEGATE_SEARCH) != 0) {
  1234. Match = !Match;
  1235. }
  1236. return Match;
  1237. }
  1238. BOOL
  1239. GrepMatchFixedString (
  1240. PGREP_CONTEXT Context,
  1241. PSTR Input,
  1242. PGREP_PATTERN Pattern
  1243. )
  1244. /*++
  1245. Routine Description:
  1246. This routine attempts to match against a fixed string pattern.
  1247. Arguments:
  1248. Context - Supplies a pointer to the application context.
  1249. Input - Supplies a pointer to the null terminated input line.
  1250. Pattern - Supplies a pointer to the pattern to match against.
  1251. Return Value:
  1252. TRUE if the pattern matched the input.
  1253. FALSE if there was no matched.
  1254. --*/
  1255. {
  1256. ULONG BeginIndex;
  1257. BOOL IgnoreCase;
  1258. BOOL Match;
  1259. PSTR PatternString;
  1260. ULONG SearchIndex;
  1261. IgnoreCase = FALSE;
  1262. if ((Context->Options & GREP_OPTION_IGNORE_CASE) != 0) {
  1263. IgnoreCase = TRUE;
  1264. }
  1265. Match = FALSE;
  1266. PatternString = Pattern->Pattern;
  1267. SearchIndex = 0;
  1268. BeginIndex = 0;
  1269. while (Input[BeginIndex] != '\0') {
  1270. SearchIndex = 0;
  1271. //
  1272. // This seems like a mess, but isn't so bad. Loop as long as:
  1273. // 1. The pattern hasn't ended, AND
  1274. // 2. Either:
  1275. // a. The pattern matches the input, OR
  1276. // b. "Ignore case" is on and the lowercase versions of the pattern
  1277. // and inputs match.
  1278. //
  1279. // See, not so bad.
  1280. //
  1281. while ((PatternString[SearchIndex] != '\0') &&
  1282. ((Input[BeginIndex + SearchIndex] ==
  1283. PatternString[SearchIndex]) ||
  1284. ((IgnoreCase != FALSE) &&
  1285. (tolower(Input[BeginIndex + SearchIndex]) ==
  1286. tolower(PatternString[SearchIndex]))))) {
  1287. SearchIndex += 1;
  1288. }
  1289. if (PatternString[SearchIndex] == '\0') {
  1290. Match = TRUE;
  1291. break;
  1292. }
  1293. BeginIndex += 1;
  1294. }
  1295. //
  1296. // If there's a match and it's required to use up the whole line, see that
  1297. // it does. That case could have been optimized by stopping the loop above
  1298. // early, but it's expected to be an uncommonly used flag, so adding the
  1299. // extra compare in every loop iteration seemed worse.
  1300. //
  1301. if ((Match != FALSE) &&
  1302. ((Context->Options & GREP_OPTION_FULL_LINE_ONLY) != 0)) {
  1303. if ((BeginIndex != 0) || (Input[BeginIndex + SearchIndex] != '\0')) {
  1304. Match = FALSE;
  1305. }
  1306. }
  1307. return Match;
  1308. }