regexp.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU General Public License
  4. version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details. See the LICENSE file at the root of this
  6. project for complete licensing information.
  7. Module Name:
  8. regexp.h
  9. Abstract:
  10. This header contains private definitions for implementing support for
  11. Regular Expressions.
  12. Author:
  13. Evan Green 8-Jul-2013
  14. --*/
  15. //
  16. // ------------------------------------------------------------------- Includes
  17. //
  18. //
  19. // --------------------------------------------------------------------- Macros
  20. //
  21. //
  22. // This macro matches the "name" character class, which is uppercase letters,
  23. // lowercase letters, digits, and underscore.
  24. //
  25. #define REGULAR_EXPRESSION_IS_NAME(_Character) \
  26. ((isupper(_Character)) || (islower(_Character)) || \
  27. (isdigit(_Character)) || ((_Character) == '_'))
  28. //
  29. // ---------------------------------------------------------------- Definitions
  30. //
  31. //
  32. // Regular expression internal flags.
  33. //
  34. #define REGULAR_EXPRESSION_ANCHORED_LEFT 0x00000001
  35. #define REGULAR_EXPRESSION_ANCHORED_RIGHT 0x00000002
  36. #define REGULAR_EXPRESSION_NEGATED 0x00000004
  37. //
  38. // ------------------------------------------------------ Data Type Definitions
  39. //
  40. typedef enum _REGULAR_EXPRESSION_STATUS {
  41. RegexStatusSuccess,
  42. RegexStatusNoMatch = REG_NOMATCH,
  43. RegexStatusBadPattern = REG_BADPAT,
  44. RegexStatusBadCollatingElement = REG_ECOLLATE,
  45. RegexStatusBadCharacterClass = REG_ECTYPE,
  46. RegexStatusTrailingEscape = REG_EESCAPE,
  47. RegexStatusInvalidSubexpression = REG_ESUBREG,
  48. RegexStatusBracketImbalance = REG_EBRACK,
  49. RegexStatusParenthesesImbalance = REG_EPAREN,
  50. RegexStatusInvalidBraces = REG_BADBR,
  51. RegexStatusBadRange = REG_ERANGE,
  52. RegexStatusNoMemory = REG_ESPACE,
  53. RegexStatusInvalidRepeat = REG_BADRPT,
  54. } REGULAR_EXPRESSION_STATUS, *PREGULAR_EXPRESSION_STATUS;
  55. typedef enum _REGEX_ENTRY_TYPE {
  56. RegexEntryInvalid,
  57. RegexEntryOrdinaryCharacters,
  58. RegexEntryAnyCharacter,
  59. RegexEntryBackReference,
  60. RegexEntrySubexpression,
  61. RegexEntryBracketExpression,
  62. RegexEntryStringBegin,
  63. RegexEntryStringEnd,
  64. RegexEntryBranch,
  65. RegexEntryBranchOption,
  66. RegexEntryStartOfWord,
  67. RegexEntryEndOfWord,
  68. } REGEX_ENTRY_TYPE, *PREGEX_ENTRY_TYPE;
  69. typedef enum _BRACKET_EXPRESSION_TYPE {
  70. BracketExpressionInvalid,
  71. BracketExpressionSingleCharacters,
  72. BracketExpressionRange,
  73. BracketExpressionCharacterClassAlphanumeric,
  74. BracketExpressionCharacterClassAlphabetic,
  75. BracketExpressionCharacterClassBlank,
  76. BracketExpressionCharacterClassControl,
  77. BracketExpressionCharacterClassDigit,
  78. BracketExpressionCharacterClassGraph,
  79. BracketExpressionCharacterClassLowercase,
  80. BracketExpressionCharacterClassPrintable,
  81. BracketExpressionCharacterClassPunctuation,
  82. BracketExpressionCharacterClassSpace,
  83. BracketExpressionCharacterClassUppercase,
  84. BracketExpressionCharacterClassHexDigit,
  85. BracketExpressionCharacterClassName
  86. } BRACKET_EXPRESSION_TYPE, *PBRACKET_EXPRESSION_TYPE;
  87. /*++
  88. Structure Description:
  89. This structure defines a string in a regular expression used for storing
  90. characters (ordinary or set).
  91. Members:
  92. Data - Supplies a pointer to the buffer containing the string characters.
  93. Size - Supplies the number of valid bytes in the buffer.
  94. Capacity - Supplies the size of the buffer allocation.
  95. --*/
  96. typedef struct _REGULAR_EXPRESSION_STRING {
  97. PSTR Data;
  98. ULONG Size;
  99. ULONG Capacity;
  100. } REGULAR_EXPRESSION_STRING, *PREGULAR_EXPRESSION_STRING;
  101. /*++
  102. Structure Description:
  103. This structure defines a bracket expression embedded within a regular
  104. expression.
  105. Members:
  106. Minimum - Stores the minimum character, inclusive.
  107. Maximum - Stores the maximum character, inclusive.
  108. --*/
  109. typedef struct _REGULAR_BRACKET_EXPRESSION_RANGE {
  110. INT Minimum;
  111. INT Maximum;
  112. } REGULAR_BRACKET_EXPRESSION_RANGE, *PREGULAR_BRACKET_EXPRESSION_RANGE;
  113. /*++
  114. Structure Description:
  115. This structure defines a bracket expression embedded within a regular
  116. expression.
  117. Members:
  118. ListEntry - Stores pointers to the next and previous bracket entries in the
  119. expression.
  120. Type - Stores the type of bracket expression this entry represents.
  121. Range - Stores the range for range expressions.
  122. --*/
  123. typedef struct _REGULAR_BRACKET_ENTRY {
  124. LIST_ENTRY ListEntry;
  125. BRACKET_EXPRESSION_TYPE Type;
  126. union {
  127. REGULAR_BRACKET_EXPRESSION_RANGE Range;
  128. } U;
  129. } REGULAR_BRACKET_ENTRY, *PREGULAR_BRACKET_ENTRY;
  130. /*++
  131. Structure Description:
  132. This structure defines a bracket expression embedded within a regular
  133. expression.
  134. Members:
  135. RegularCharacters - Stores the string containing the regular characters in
  136. the bracket expression.
  137. EntryList - Stores the list of bracket entries, which contains things like
  138. ranges and character classes.
  139. --*/
  140. typedef struct _REGULAR_BRACKET_EXPRESSION {
  141. REGULAR_EXPRESSION_STRING RegularCharacters;
  142. LIST_ENTRY EntryList;
  143. } REGULAR_BRACKET_EXPRESSION, *PREGULAR_BRACKET_EXPRESSION;
  144. typedef struct _REGULAR_EXPRESSION_ENTRY
  145. REGULAR_EXPRESSION_ENTRY, *PREGULAR_EXPRESSION_ENTRY;
  146. /*++
  147. Structure Description:
  148. This structure defines an entry within a regular expression.
  149. Members:
  150. ListEntry - Stores pointers to the next and previous entries in the
  151. regular expression.
  152. Type - Stores the type of regular expression entry.
  153. Flags - Stores flags describing the behavior of the entry. See
  154. REGULAR_EXPRESSION_* definitions.
  155. DuplicateMin - Stores the minimum number of occurrences of the entry.
  156. DuplicateMax - Stores the maximum number of occurrences of the entry.
  157. Supply -1 for infinite recurrences.
  158. ChildList - Stores the list of child expression entries in this node.
  159. Parent - Stores the optional parent node.
  160. String - Stores the string for ordinary characters.
  161. BackReferenceNumber - Stores the subexpression index being referred to in
  162. a back reference.
  163. SubexpressionNumber - Stores the index of this subexpression, starting from
  164. 1.
  165. BracketExpression - Stores the bracker expression information for bracket
  166. expressions.
  167. --*/
  168. struct _REGULAR_EXPRESSION_ENTRY {
  169. LIST_ENTRY ListEntry;
  170. REGEX_ENTRY_TYPE Type;
  171. ULONG Flags;
  172. ULONG DuplicateMin;
  173. ULONG DuplicateMax;
  174. LIST_ENTRY ChildList;
  175. PREGULAR_EXPRESSION_ENTRY Parent;
  176. union {
  177. REGULAR_EXPRESSION_STRING String;
  178. ULONG BackReferenceNumber;
  179. ULONG SubexpressionNumber;
  180. REGULAR_BRACKET_EXPRESSION BracketExpression;
  181. } U;
  182. };
  183. /*++
  184. Structure Description:
  185. This structure defines the internal regular expression representation.
  186. Members:
  187. SubexpressionCount - Stores the number of sub expressions.
  188. Flags - Stores the flags of the regular expression.
  189. BaseEntry - Stores the initial subexpression entry, a slightly modified
  190. subexpression.
  191. --*/
  192. typedef struct _REGULAR_EXPRESSION {
  193. ULONG SubexpressionCount;
  194. ULONG Flags;
  195. REGULAR_EXPRESSION_ENTRY BaseEntry;
  196. } REGULAR_EXPRESSION, *PREGULAR_EXPRESSION;
  197. //
  198. // -------------------------------------------------------------------- Globals
  199. //
  200. //
  201. // -------------------------------------------------------- Function Prototypes
  202. //