regex.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. /*++
  2. Copyright (c) 2013 Minoca Corp.
  3. This file is licensed under the terms of the GNU Lesser General Public
  4. License version 3. Alternative licensing terms are available. Contact
  5. info@minocacorp.com for details.
  6. Module Name:
  7. regex.h
  8. Abstract:
  9. This header contains definitions for compiling and executing Regular
  10. Expressions.
  11. Author:
  12. Evan Green 8-Jul-2013
  13. --*/
  14. #ifndef _REGEX_H
  15. #define _REGEX_H
  16. //
  17. // ------------------------------------------------------------------- Includes
  18. //
  19. #include <libcbase.h>
  20. #include <stddef.h>
  21. //
  22. // ---------------------------------------------------------------- Definitions
  23. //
  24. #ifdef __cplusplus
  25. extern "C" {
  26. #endif
  27. //
  28. // Define flags to send into the regular expression compile function.
  29. //
  30. //
  31. // Set this flag to use extended regular expressions, which recognize extra
  32. // symbols like |, +, and ?.
  33. //
  34. #define REG_EXTENDED 0x00000001
  35. //
  36. // Set this flag to ignore case in the match.
  37. //
  38. #define REG_ICASE 0x00000002
  39. //
  40. // Set this flag to only report success/failure during execution, and not save
  41. // the match offsets.
  42. //
  43. #define REG_NOSUB 0x00000004
  44. //
  45. // Set this flag to change newline behavior such that:
  46. // 1. Newlines don't match a . expression or any form of a non-matching list.
  47. // 2. A circumflex (^) will match any zero length string immediately after a
  48. // newline, regardless of the setting of REG_NOTBOL.
  49. // 3. A dollar sign will match any zero length string before a newline,
  50. // regardless of the setting of REG_NOTEOL.
  51. //
  52. #define REG_NEWLINE 0x00000008
  53. //
  54. // Define flags to pass into the execution of regular expressions.
  55. //
  56. //
  57. // Set this flag to indicate that the beginning of this string is not the
  58. // beginning of the line, so a circumflex (^) used as an anchor should not
  59. // match.
  60. //
  61. #define REG_NOTBOL 0x00000001
  62. //
  63. // Set this flag to indicate that the end of this string is not the end of the
  64. // line, so a dollar sign ($) used as an anchor should not match.
  65. //
  66. #define REG_NOTEOL 0x00000002
  67. //
  68. // Define regular expression status codes.
  69. //
  70. //
  71. // The regular expression failed to match.
  72. //
  73. #define REG_NOMATCH 1
  74. //
  75. // The regular expression pattern was invalid.
  76. //
  77. #define REG_BADPAT 2
  78. //
  79. // An invalid collating element was referenced.
  80. //
  81. #define REG_ECOLLATE 3
  82. //
  83. // An invalid character class type was referenced.
  84. //
  85. #define REG_ECTYPE 4
  86. //
  87. // A trailing backslash (\) was found in the pattern.
  88. //
  89. #define REG_EESCAPE 5
  90. //
  91. // A number in "\digit" is invalid or in error.
  92. //
  93. #define REG_ESUBREG 6
  94. //
  95. // There is a square bracket [] imbalance.
  96. //
  97. #define REG_EBRACK 7
  98. //
  99. // There is a \(\) or () imbalance.
  100. //
  101. #define REG_EPAREN 8
  102. //
  103. // The contents of \{\} are invalid: either not a number, too large of a number,
  104. // more than two numbers, or the first number was larger than the second.
  105. //
  106. #define REG_BADBR 9
  107. //
  108. // The endpoint in a range expression is invalid.
  109. //
  110. #define REG_ERANGE 10
  111. //
  112. // The system failed a necessary memory allocation.
  113. //
  114. #define REG_ESPACE 11
  115. //
  116. // A '?', '*', or '+' was not preceded by a valid regular expression.
  117. //
  118. #define REG_BADRPT 12
  119. //
  120. // ------------------------------------------------------ Data Type Definitions
  121. //
  122. //
  123. // Define the type used for offsets into strings in regular expressions.
  124. //
  125. typedef int regoff_t;
  126. /*++
  127. Structure Description:
  128. This structure defines the regular expression structure.
  129. Members:
  130. re_nsub - Stores the number of subexpressions in the regular expression.
  131. re_data - Stores an opaque pointer to the remainder of the regular
  132. expression data.
  133. --*/
  134. typedef struct _regex_t {
  135. size_t re_nsub;
  136. void *re_data;
  137. } regex_t;
  138. /*++
  139. Structure Description:
  140. This structure defines the regular expression match structure.
  141. Members:
  142. rm_so - Stores the starting offset of the regular expression.
  143. rm_eo - Stores one beyond the ending offset of the regular expression.
  144. --*/
  145. typedef struct _regmatch_t {
  146. regoff_t rm_so;
  147. regoff_t rm_eo;
  148. } regmatch_t;
  149. //
  150. // -------------------------------------------------------------------- Globals
  151. //
  152. //
  153. // -------------------------------------------------------- Function Prototypes
  154. //
  155. LIBC_API
  156. int
  157. regcomp (
  158. regex_t *RegularExpression,
  159. const char *Pattern,
  160. int Flags
  161. );
  162. /*++
  163. Routine Description:
  164. This routine compiles a regular expression.
  165. Arguments:
  166. RegularExpression - Supplies a pointer to the regular expression structure
  167. where the compiled form will reside on success.
  168. Pattern - Supplies a pointer to the pattern input string.
  169. Flags - Supplies a bitfield of flags governing the behavior of the regular
  170. expression. See some REG_* definitions.
  171. Return Value:
  172. 0 on success.
  173. Returns a REG_* status code on failure.
  174. --*/
  175. LIBC_API
  176. int
  177. regexec (
  178. const regex_t *RegularExpression,
  179. const char *String,
  180. size_t MatchArraySize,
  181. regmatch_t Match[],
  182. int Flags
  183. );
  184. /*++
  185. Routine Description:
  186. This routine executes a regular expression, performing a search of the
  187. given string to see if it matches the regular expression.
  188. Arguments:
  189. RegularExpression - Supplies a pointer to the compiled regular expression.
  190. String - Supplies a pointer to the string to check for a match.
  191. MatchArraySize - Supplies the number of elements in the match array
  192. parameter. Supply zero and the match array parameter will be ignored.
  193. Match - Supplies an optional pointer to an array where the string indices of
  194. the match and its subexpressions will be returned.
  195. Flags - Supplies a bitfield of flags governing the search. See some REG_*
  196. definitions (specifically REG_NOTBOL and REG_NOTEOL).
  197. Return Value:
  198. 0 on successful completion (there was a match).
  199. REG_NOMATCH if there was no match.
  200. --*/
  201. LIBC_API
  202. void
  203. regfree (
  204. regex_t *RegularExpression
  205. );
  206. /*++
  207. Routine Description:
  208. This routine destroys and frees all resources associated with a compiled
  209. regular expression.
  210. Arguments:
  211. RegularExpression - Supplies a pointer to the regular expression structure
  212. to destroy. The caller owns the structure itself, this routine just
  213. guts all the innards.
  214. Return Value:
  215. None.
  216. --*/
  217. LIBC_API
  218. size_t
  219. regerror (
  220. int ErrorCode,
  221. const regex_t *Expression,
  222. char *Buffer,
  223. size_t BufferSize
  224. );
  225. /*++
  226. Routine Description:
  227. This routine returns error information about what went wrong trying to
  228. compile the regular expression.
  229. Arguments:
  230. ErrorCode - Supplies the error code returned from a regular expression
  231. token.
  232. Expression - Supplies an optional pointer to the expression.
  233. Buffer - Supplies a pointer to a buffer where the error string will be
  234. returned, always null terminated.
  235. BufferSize - Supplies the size of the buffer in bytes.
  236. Return Value:
  237. Returns the number of bytes needed to hold the entire error string,
  238. including the null terminator. If the return value is greater than the
  239. supplied size, then the buffer will be truncated and null terminated.
  240. --*/
  241. #ifdef __cplusplus
  242. }
  243. #endif
  244. #endif