regexp 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. .TH REGEXP 2
  2. .SH NAME
  3. regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression
  4. .SH SYNOPSIS
  5. .B #include <u.h>
  6. .br
  7. .B #include <libc.h>
  8. .br
  9. .B #include <regexp.h>
  10. .PP
  11. .ta \w'\fLRegprog 'u
  12. .B
  13. Reprog *regcomp(char *exp)
  14. .PP
  15. .B
  16. Reprog *regcomplit(char *exp)
  17. .PP
  18. .B
  19. Reprog *regcompnl(char *exp)
  20. .PP
  21. .nf
  22. .B
  23. int regexec(Reprog *prog, char *string, Resub *match, int msize)
  24. .PP
  25. .nf
  26. .B
  27. void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
  28. .PP
  29. .nf
  30. .B
  31. int rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
  32. .PP
  33. .nf
  34. .B
  35. void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
  36. .PP
  37. .B
  38. void regerror(char *msg)
  39. .SH DESCRIPTION
  40. .I Regcomp
  41. compiles a
  42. regular expression and returns
  43. a pointer to the generated description.
  44. The space is allocated by
  45. .IR malloc (2)
  46. and may be released by
  47. .IR free .
  48. Regular expressions are exactly as in
  49. .IR regexp (6).
  50. .PP
  51. .I Regcomplit
  52. is like
  53. .I regcomp
  54. except that all characters are treated literally.
  55. .I Regcompnl
  56. is like
  57. .I regcomp
  58. except that the
  59. .B .
  60. metacharacter matches all characters, including newlines.
  61. .PP
  62. .I Regexec
  63. matches a null-terminated
  64. .I string
  65. against the compiled regular expression in
  66. .IR prog .
  67. If it matches,
  68. .I regexec
  69. returns
  70. .B 1
  71. and fills in the array
  72. .I match
  73. with character pointers to the substrings of
  74. .I string
  75. that correspond to the
  76. parenthesized subexpressions of
  77. .IR exp :
  78. .BI match[ i ].sp
  79. points to the beginning and
  80. .BI match[ i ].ep
  81. points just beyond
  82. the end of the
  83. .IR i th
  84. substring.
  85. (Subexpression
  86. .I i
  87. begins at the
  88. .IR i th
  89. left parenthesis, counting from 1.)
  90. Pointers in
  91. .B match[0]
  92. pick out the substring that corresponds to
  93. the whole regular expression.
  94. Unused elements of
  95. .I match
  96. are filled with zeros.
  97. Matches involving
  98. .LR * ,
  99. .LR + ,
  100. and
  101. .L ?
  102. are extended as far as possible.
  103. The number of array elements in
  104. .I match
  105. is given by
  106. .IR msize .
  107. The structure of elements of
  108. .I match
  109. is:
  110. .IP
  111. .EX
  112. typedef struct {
  113. union {
  114. char *sp;
  115. Rune *rsp;
  116. };
  117. union {
  118. char *ep;
  119. Rune *rep;
  120. };
  121. } Resub;
  122. .EE
  123. .LP
  124. If
  125. .B match[0].sp
  126. is nonzero on entry,
  127. .I regexec
  128. starts matching at that point within
  129. .IR string .
  130. If
  131. .B match[0].ep
  132. is nonzero on entry,
  133. the last character matched is the one
  134. preceding that point.
  135. .PP
  136. .I Regsub
  137. places in
  138. .I dest
  139. a substitution instance of
  140. .I source
  141. in the context of the last
  142. .I regexec
  143. performed using
  144. .IR match .
  145. Each instance of
  146. .BI \e n\f1,
  147. where
  148. .I n
  149. is a digit, is replaced by the
  150. string delimited by
  151. .BI match[ n ].sp
  152. and
  153. .BI match[ n ].ep\f1.
  154. Each instance of
  155. .L &
  156. is replaced by the string delimited by
  157. .B match[0].sp
  158. and
  159. .BR match[0].ep .
  160. The substitution will always be null terminated and
  161. trimmed to fit into dlen bytes.
  162. .PP
  163. .IR Regerror ,
  164. called whenever an error is detected in
  165. .IR regcomp ,
  166. writes the string
  167. .I msg
  168. on the standard error file and exits.
  169. .I Regerror
  170. can be replaced to perform
  171. special error processing.
  172. If the user supplied
  173. .I regerror
  174. returns rather than exits,
  175. .I regcomp
  176. will return 0.
  177. .PP
  178. .I Rregexec
  179. and
  180. .I rregsub
  181. are variants of
  182. .I regexec
  183. and
  184. .I regsub
  185. that use strings of
  186. .B Runes
  187. instead of strings of
  188. .BR chars .
  189. With these routines, the
  190. .I rsp
  191. and
  192. .I rep
  193. fields of the
  194. .I match
  195. array elements should be used.
  196. .SH SOURCE
  197. .B /sys/src/libregexp
  198. .SH "SEE ALSO"
  199. .IR grep (1)
  200. .SH DIAGNOSTICS
  201. .I Regcomp
  202. returns
  203. .B 0
  204. for an illegal expression
  205. or other failure.
  206. .I Regexec
  207. returns 0
  208. if
  209. .I string
  210. is not matched.
  211. .SH BUGS
  212. There is no way to specify or match a NUL character; NULs terminate patterns and strings.