regex.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU General Public License
  4. version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details. See the LICENSE file at the root of this
  6. project for complete licensing information.
  7. Module Name:
  8. regex.h
  9. Abstract:
  10. This header contains definitions for compiling and executing Regular
  11. Expressions.
  12. Author:
  13. Evan Green 8-Jul-2013
  14. --*/
  15. #ifndef _REGEX_H
  16. #define _REGEX_H
  17. //
  18. // ------------------------------------------------------------------- Includes
  19. //
  20. //
  21. // Define this to prevent it from getting defined as an import.
  22. //
  23. #ifndef LIBC_API
  24. #define LIBC_API
  25. #endif
  26. #include <stddef.h>
  27. //
  28. // ---------------------------------------------------------------- Definitions
  29. //
  30. #ifdef __cplusplus
  31. extern "C" {
  32. #endif
  33. //
  34. // Define flags to send into the regular expression compile function.
  35. //
  36. //
  37. // Set this flag to use extended regular expressions, which recognize extra
  38. // symbols like |, +, and ?.
  39. //
  40. #define REG_EXTENDED 0x00000001
  41. //
  42. // Set this flag to ignore case in the match.
  43. //
  44. #define REG_ICASE 0x00000002
  45. //
  46. // Set this flag to only report success/failure during execution, and not save
  47. // the match offsets.
  48. //
  49. #define REG_NOSUB 0x00000004
  50. //
  51. // Set this flag to change newline behavior such that:
  52. // 1. Newlines don't match a . expression or any form of a non-matching list.
  53. // 2. A circumflex (^) will match any zero length string immediately after a
  54. // newline, regardless of the setting of REG_NOTBOL.
  55. // 3. A dollar sign will match any zero length string before a newline,
  56. // regardless of the setting of REG_NOTEOL.
  57. //
  58. #define REG_NEWLINE 0x00000008
  59. //
  60. // Define flags to pass into the execution of regular expressions.
  61. //
  62. //
  63. // Set this flag to indicate that the beginning of this string is not the
  64. // beginning of the line, so a circumflex (^) used as an anchor should not
  65. // match.
  66. //
  67. #define REG_NOTBOL 0x00000001
  68. //
  69. // Set this flag to indicate that the end of this string is not the end of the
  70. // line, so a dollar sign ($) used as an anchor should not match.
  71. //
  72. #define REG_NOTEOL 0x00000002
  73. //
  74. // Define regular expression status codes.
  75. //
  76. //
  77. // The regular expression failed to match.
  78. //
  79. #define REG_NOMATCH 1
  80. //
  81. // The regular expression pattern was invalid.
  82. //
  83. #define REG_BADPAT 2
  84. //
  85. // An invalid collating element was referenced.
  86. //
  87. #define REG_ECOLLATE 3
  88. //
  89. // An invalid character class type was referenced.
  90. //
  91. #define REG_ECTYPE 4
  92. //
  93. // A trailing backslash (\) was found in the pattern.
  94. //
  95. #define REG_EESCAPE 5
  96. //
  97. // A number in "\digit" is invalid or in error.
  98. //
  99. #define REG_ESUBREG 6
  100. //
  101. // There is a square bracket [] imbalance.
  102. //
  103. #define REG_EBRACK 7
  104. //
  105. // There is a \(\) or () imbalance.
  106. //
  107. #define REG_EPAREN 8
  108. //
  109. // The contents of \{\} are invalid: either not a number, too large of a number,
  110. // more than two numbers, or the first number was larger than the second.
  111. //
  112. #define REG_BADBR 9
  113. //
  114. // The endpoint in a range expression is invalid.
  115. //
  116. #define REG_ERANGE 10
  117. //
  118. // The system failed a necessary memory allocation.
  119. //
  120. #define REG_ESPACE 11
  121. //
  122. // A '?', '*', or '+' was not preceded by a valid regular expression.
  123. //
  124. #define REG_BADRPT 12
  125. //
  126. // ------------------------------------------------------ Data Type Definitions
  127. //
  128. //
  129. // Define the type used for offsets into strings in regular expressions.
  130. //
  131. typedef int regoff_t;
  132. /*++
  133. Structure Description:
  134. This structure defines the regular expression structure.
  135. Members:
  136. re_nsub - Stores the number of subexpressions in the regular expression.
  137. re_data - Stores an opaque pointer to the remainder of the regular
  138. expression data.
  139. --*/
  140. typedef struct _regex_t {
  141. size_t re_nsub;
  142. void *re_data;
  143. } regex_t;
  144. /*++
  145. Structure Description:
  146. This structure defines the regular expression match structure.
  147. Members:
  148. rm_so - Stores the starting offset of the regular expression.
  149. rm_eo - Stores one beyond the ending offset of the regular expression.
  150. --*/
  151. typedef struct _regmatch_t {
  152. regoff_t rm_so;
  153. regoff_t rm_eo;
  154. } regmatch_t;
  155. //
  156. // -------------------------------------------------------------------- Globals
  157. //
  158. //
  159. // -------------------------------------------------------- Function Prototypes
  160. //
  161. LIBC_API
  162. int
  163. regcomp (
  164. regex_t *RegularExpression,
  165. const char *Pattern,
  166. int Flags
  167. );
  168. /*++
  169. Routine Description:
  170. This routine compiles a regular expression.
  171. Arguments:
  172. RegularExpression - Supplies a pointer to the regular expression structure
  173. where the compiled form will reside on success.
  174. Pattern - Supplies a pointer to the pattern input string.
  175. Flags - Supplies a bitfield of flags governing the behavior of the regular
  176. expression. See some REG_* definitions.
  177. Return Value:
  178. 0 on success.
  179. Returns a REG_* status code on failure.
  180. --*/
  181. LIBC_API
  182. int
  183. regexec (
  184. const regex_t *RegularExpression,
  185. const char *String,
  186. size_t MatchArraySize,
  187. regmatch_t Match[],
  188. int Flags
  189. );
  190. /*++
  191. Routine Description:
  192. This routine executes a regular expression, performing a search of the
  193. given string to see if it matches the regular expression.
  194. Arguments:
  195. RegularExpression - Supplies a pointer to the compiled regular expression.
  196. String - Supplies a pointer to the string to check for a match.
  197. MatchArraySize - Supplies the number of elements in the match array
  198. parameter. Supply zero and the match array parameter will be ignored.
  199. Match - Supplies an optional pointer to an array where the string indices of
  200. the match and its subexpressions will be returned.
  201. Flags - Supplies a bitfield of flags governing the search. See some REG_*
  202. definitions (specifically REG_NOTBOL and REG_NOTEOL).
  203. Return Value:
  204. 0 on successful completion (there was a match).
  205. REG_NOMATCH if there was no match.
  206. --*/
  207. LIBC_API
  208. void
  209. regfree (
  210. regex_t *RegularExpression
  211. );
  212. /*++
  213. Routine Description:
  214. This routine destroys and frees all resources associated with a compiled
  215. regular expression.
  216. Arguments:
  217. RegularExpression - Supplies a pointer to the regular expression structure
  218. to destroy. The caller owns the structure itself, this routine just
  219. guts all the innards.
  220. Return Value:
  221. None.
  222. --*/
  223. LIBC_API
  224. size_t
  225. regerror (
  226. int ErrorCode,
  227. const regex_t *Expression,
  228. char *Buffer,
  229. size_t BufferSize
  230. );
  231. /*++
  232. Routine Description:
  233. This routine returns error information about what went wrong trying to
  234. compile the regular expression.
  235. Arguments:
  236. ErrorCode - Supplies the error code returned from a regular expression
  237. token.
  238. Expression - Supplies an optional pointer to the expression.
  239. Buffer - Supplies a pointer to a buffer where the error string will be
  240. returned, always null terminated.
  241. BufferSize - Supplies the size of the buffer in bytes.
  242. Return Value:
  243. Returns the number of bytes needed to hold the entire error string,
  244. including the null terminator. If the return value is greater than the
  245. supplied size, then the buffer will be truncated and null terminated.
  246. --*/
  247. #ifdef __cplusplus
  248. }
  249. #endif
  250. #endif