regex_internal_lib.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2012, 2013 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file regex/regex_internal_lib.h
  18. * @brief library to parse regular expressions into dfa
  19. * @author Maximilian Szengel
  20. */
  21. #ifndef REGEX_INTERNAL_LIB_H
  22. #define REGEX_INTERNAL_LIB_H
  23. #include "gnunet_util_lib.h"
  24. #include "gnunet_dht_service.h"
  25. #include "gnunet_statistics_service.h"
  26. #include "regex_block_lib.h"
  27. #ifdef __cplusplus
  28. extern "C"
  29. {
  30. #if 0 /* keep Emacsens' auto-indent happy */
  31. }
  32. #endif
  33. #endif
  34. /**
  35. * Automaton (NFA/DFA) representation.
  36. */
  37. struct REGEX_INTERNAL_Automaton;
  38. /**
  39. * Construct DFA for the given 'regex' of length 'len'.
  40. *
  41. * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
  42. * compressed to o -> abc -> o. Note that this parameter influences the
  43. * non-determinism of states of the resulting NFA in the DHT (number of outgoing
  44. * edges with the same label). For example for an application that stores IPv4
  45. * addresses as bitstrings it could make sense to limit the path compression to
  46. * 4 or 8.
  47. *
  48. * @param regex regular expression string.
  49. * @param len length of the regular expression.
  50. * @param max_path_len limit the path compression length to the
  51. * given value. If set to 1, no path compression is applied. Set to 0 for
  52. * maximal possible path compression (generally not desireable).
  53. * @return DFA, needs to be freed using #REGEX_INTERNAL_automaton_destroy().
  54. */
  55. struct REGEX_INTERNAL_Automaton *
  56. REGEX_INTERNAL_construct_dfa (const char *regex,
  57. const size_t len,
  58. unsigned int max_path_len);
  59. /**
  60. * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
  61. * data structure.
  62. *
  63. * @param a automaton to be destroyed.
  64. */
  65. void
  66. REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a);
  67. /**
  68. * Evaluates the given 'string' against the given compiled regex.
  69. *
  70. * @param a automaton.
  71. * @param string string to check.
  72. *
  73. * @return 0 if string matches, non 0 otherwise.
  74. */
  75. int
  76. REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a,
  77. const char *string);
  78. /**
  79. * Get the first key for the given @a input_string. This hashes
  80. * the first x bits of the @a input_string.
  81. *
  82. * @param input_string string.
  83. * @param string_len length of the @a input_string.
  84. * @param key pointer to where to write the hash code.
  85. * @return number of bits of @a input_string that have been consumed
  86. * to construct the key
  87. */
  88. size_t
  89. REGEX_INTERNAL_get_first_key (const char *input_string,
  90. size_t string_len,
  91. struct GNUNET_HashCode *key);
  92. /**
  93. * Iterator callback function.
  94. *
  95. * @param cls closure.
  96. * @param key hash for current state.
  97. * @param proof proof for current state
  98. * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not.
  99. * @param num_edges number of edges leaving current state.
  100. * @param edges edges leaving current state.
  101. */
  102. typedef void
  103. (*REGEX_INTERNAL_KeyIterator)(void *cls,
  104. const struct GNUNET_HashCode *key,
  105. const char *proof,
  106. int accepting,
  107. unsigned int num_edges,
  108. const struct REGEX_BLOCK_Edge *edges);
  109. /**
  110. * Iterate over all edges starting from start state of automaton 'a'. Calling
  111. * iterator for each edge.
  112. *
  113. * @param a automaton.
  114. * @param iterator iterator called for each edge.
  115. * @param iterator_cls closure.
  116. */
  117. void
  118. REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
  119. REGEX_INTERNAL_KeyIterator iterator,
  120. void *iterator_cls);
  121. /**
  122. * Iterate over all edges of automaton 'a' that are reachable from a state with
  123. * a proof of at least #GNUNET_REGEX_INITIAL_BYTES characters.
  124. *
  125. * Call the iterator for each such edge.
  126. *
  127. * @param a automaton.
  128. * @param iterator iterator called for each reachable edge.
  129. * @param iterator_cls closure.
  130. */
  131. void
  132. REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
  133. REGEX_INTERNAL_KeyIterator iterator,
  134. void *iterator_cls);
  135. /**
  136. * Handle to store cached data about a regex announce.
  137. */
  138. struct REGEX_INTERNAL_Announcement;
  139. /**
  140. * Handle to store data about a regex search.
  141. */
  142. struct REGEX_INTERNAL_Search;
  143. /**
  144. * Announce a regular expression: put all states of the automaton in the DHT.
  145. * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that.
  146. *
  147. * @param dht An existing and valid DHT service handle. CANNOT be NULL.
  148. * @param priv our private key, must remain valid until the announcement is cancelled
  149. * @param regex Regular expression to announce.
  150. * @param compression How many characters per edge can we squeeze?
  151. * @param stats Optional statistics handle to report usage. Can be NULL.
  152. * @return Handle to reuse o free cached resources.
  153. * Must be freed by calling #REGEX_INTERNAL_announce_cancel().
  154. */
  155. struct REGEX_INTERNAL_Announcement *
  156. REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht,
  157. const struct GNUNET_CRYPTO_EddsaPrivateKey *priv,
  158. const char *regex,
  159. uint16_t compression,
  160. struct GNUNET_STATISTICS_Handle *stats);
  161. /**
  162. * Announce again a regular expression previously announced.
  163. * Does use caching to speed up process.
  164. *
  165. * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
  166. */
  167. void
  168. REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h);
  169. /**
  170. * Clear all cached data used by a regex announce.
  171. * Does not close DHT connection.
  172. *
  173. * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call.
  174. */
  175. void
  176. REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h);
  177. /**
  178. * Search callback function.
  179. *
  180. * @param cls Closure provided in #REGEX_INTERNAL_search().
  181. * @param id Peer providing a regex that matches the string.
  182. * @param get_path Path of the get request.
  183. * @param get_path_length Length of @a get_path.
  184. * @param put_path Path of the put request.
  185. * @param put_path_length Length of the @a put_path.
  186. */
  187. typedef void
  188. (*REGEX_INTERNAL_Found)(void *cls,
  189. const struct GNUNET_PeerIdentity *id,
  190. const struct GNUNET_PeerIdentity *get_path,
  191. unsigned int get_path_length,
  192. const struct GNUNET_PeerIdentity *put_path,
  193. unsigned int put_path_length);
  194. /**
  195. * Search for a peer offering a regex matching certain string in the DHT.
  196. * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results
  197. * are returned.
  198. *
  199. * @param dht An existing and valid DHT service handle.
  200. * @param string String to match against the regexes in the DHT.
  201. * @param callback Callback for found peers.
  202. * @param callback_cls Closure for @c callback.
  203. * @param stats Optional statistics handle to report usage. Can be NULL.
  204. * @return Handle to stop search and free resources.
  205. * Must be freed by calling #REGEX_INTERNAL_search_cancel().
  206. */
  207. struct REGEX_INTERNAL_Search *
  208. REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht,
  209. const char *string,
  210. REGEX_INTERNAL_Found callback,
  211. void *callback_cls,
  212. struct GNUNET_STATISTICS_Handle *stats);
  213. /**
  214. * Stop search and free all data used by a #REGEX_INTERNAL_search() call.
  215. * Does not close DHT connection.
  216. *
  217. * @param h Handle returned by a previous #REGEX_INTERNAL_search() call.
  218. */
  219. void
  220. REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h);
  221. #if 0 /* keep Emacsens' auto-indent happy */
  222. {
  223. #endif
  224. #ifdef __cplusplus
  225. }
  226. #endif
  227. /* end of regex_internal_lib.h */
  228. #endif