gnunet_strings_lib.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. /*
  2. This file is part of GNUnet.
  3. Copyright (C) 2001-2013 Christian Grothoff (and other contributing authors)
  4. GNUnet is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3, or (at your
  7. option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNUnet; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  15. Boston, MA 02110-1301, USA.
  16. */
  17. /**
  18. * @author Christian Grothoff
  19. * @author Krista Bennett
  20. * @author Gerd Knorr <kraxel@bytesex.org>
  21. * @author Ioana Patrascu
  22. * @author Tzvetan Horozov
  23. *
  24. * @file
  25. * Strings and string handling functions
  26. *
  27. * @defgroup strings Strings library
  28. * Strings and string handling functions, including malloc and string tokenizing.
  29. * @{
  30. */
  31. #ifndef GNUNET_STRINGS_LIB_H
  32. #define GNUNET_STRINGS_LIB_H
  33. /* we need size_t, and since it can be both unsigned int
  34. or unsigned long long, this IS platform dependent;
  35. but "stdlib.h" should be portable 'enough' to be
  36. unconditionally available... */
  37. #include <stdlib.h>
  38. #ifdef __cplusplus
  39. extern "C"
  40. {
  41. #if 0 /* keep Emacsens' auto-indent happy */
  42. }
  43. #endif
  44. #endif
  45. #include "gnunet_time_lib.h"
  46. /**
  47. * Convert a given fancy human-readable size to bytes.
  48. *
  49. * @param fancy_size human readable string (i.e. 1 MB)
  50. * @param size set to the size in bytes
  51. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  52. */
  53. int
  54. GNUNET_STRINGS_fancy_size_to_bytes (const char *fancy_size,
  55. unsigned long long *size);
  56. /**
  57. * Convert a given fancy human-readable time to our internal
  58. * representation.
  59. *
  60. * @param fancy_time human readable string (i.e. 1 minute)
  61. * @param rtime set to the relative time
  62. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  63. */
  64. int
  65. GNUNET_STRINGS_fancy_time_to_relative (const char *fancy_time,
  66. struct GNUNET_TIME_Relative *rtime);
  67. /**
  68. * @ingroup time
  69. * Convert a given fancy human-readable time to our internal
  70. * representation. The human-readable time is expected to be
  71. * in local time, whereas the returned value will be in UTC.
  72. *
  73. * @param fancy_time human readable string (i.e. %Y-%m-%d %H:%M:%S)
  74. * @param atime set to the absolute time
  75. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  76. */
  77. int
  78. GNUNET_STRINGS_fancy_time_to_absolute (const char *fancy_time,
  79. struct GNUNET_TIME_Absolute *atime);
  80. /**
  81. * Convert a given filesize into a fancy human-readable format.
  82. *
  83. * @param size number of bytes
  84. * @return fancy representation of the size (possibly rounded) for humans
  85. */
  86. char *
  87. GNUNET_STRINGS_byte_size_fancy (unsigned long long size);
  88. /**
  89. * Convert the len characters long character sequence
  90. * given in input that is in the given input charset
  91. * to a string in given output charset.
  92. *
  93. * @param input input string
  94. * @param len number of bytes in @a input
  95. * @param input_charset character set used for @a input
  96. * @param output_charset desired character set for the return value
  97. * @return the converted string (0-terminated),
  98. * if conversion fails, a copy of the orignal
  99. * string is returned.
  100. */
  101. char *
  102. GNUNET_STRINGS_conv (const char *input, size_t len,
  103. const char *input_charset,
  104. const char *output_charset);
  105. /**
  106. * Convert the len characters long character sequence
  107. * given in input that is in the given charset
  108. * to UTF-8.
  109. *
  110. * @param input the input string (not necessarily 0-terminated)
  111. * @param len the number of bytes in the @a input
  112. * @param charset character set to convert from
  113. * @return the converted string (0-terminated)
  114. */
  115. char *
  116. GNUNET_STRINGS_to_utf8 (const char *input,
  117. size_t len,
  118. const char *charset);
  119. /**
  120. * Convert the len bytes-long UTF-8 string
  121. * given in input to the given charset.
  122. *
  123. * @param input the input string (not necessarily 0-terminated)
  124. * @param len the number of bytes in the @a input
  125. * @param charset character set to convert to
  126. * @return the converted string (0-terminated),
  127. * if conversion fails, a copy of the orignal
  128. * string is returned.
  129. */
  130. char *
  131. GNUNET_STRINGS_from_utf8 (const char *input,
  132. size_t len,
  133. const char *charset);
  134. /**
  135. * Convert the utf-8 input string to lower case.
  136. * Output needs to be allocated appropriately.
  137. *
  138. * @param input input string
  139. * @param output output buffer
  140. */
  141. void
  142. GNUNET_STRINGS_utf8_tolower (const char *input,
  143. char *output);
  144. /**
  145. * Convert the utf-8 input string to upper case.
  146. * Output needs to be allocated appropriately.
  147. *
  148. * @param input input string
  149. * @param output output buffer
  150. */
  151. void
  152. GNUNET_STRINGS_utf8_toupper (const char *input,
  153. char *output);
  154. /**
  155. * Complete filename (a la shell) from abbrevition.
  156. *
  157. * @param fil the name of the file, may contain ~/ or
  158. * be relative to the current directory
  159. * @return the full file name,
  160. * NULL is returned on error
  161. */
  162. char *
  163. GNUNET_STRINGS_filename_expand (const char *fil);
  164. /**
  165. * Fill a buffer of the given size with count 0-terminated strings
  166. * (given as varargs). If "buffer" is NULL, only compute the amount
  167. * of space required (sum of "strlen(arg)+1").
  168. *
  169. * Unlike using "snprintf" with "%s", this function will add
  170. * 0-terminators after each string. The
  171. * "GNUNET_string_buffer_tokenize" function can be used to parse the
  172. * buffer back into individual strings.
  173. *
  174. * @param buffer the buffer to fill with strings, can
  175. * be NULL in which case only the necessary
  176. * amount of space will be calculated
  177. * @param size number of bytes available in buffer
  178. * @param count number of strings that follow
  179. * @param ... count 0-terminated strings to copy to buffer
  180. * @return number of bytes written to the buffer
  181. * (or number of bytes that would have been written)
  182. */
  183. size_t
  184. GNUNET_STRINGS_buffer_fill (char *buffer,
  185. size_t size,
  186. unsigned int count,
  187. ...);
  188. /**
  189. * Given a buffer of a given size, find "count" 0-terminated strings
  190. * in the buffer and assign the count (varargs) of type "const char**"
  191. * to the locations of the respective strings in the buffer.
  192. *
  193. * @param buffer the buffer to parse
  194. * @param size size of the @a buffer
  195. * @param count number of strings to locate
  196. * @param ... pointers to where to store the strings
  197. * @return offset of the character after the last 0-termination
  198. * in the buffer, or 0 on error.
  199. */
  200. unsigned int
  201. GNUNET_STRINGS_buffer_tokenize (const char *buffer,
  202. size_t size,
  203. unsigned int count, ...);
  204. /**
  205. * @ingroup time
  206. * Like `asctime`, except for GNUnet time. Converts a GNUnet internal
  207. * absolute time (which is in UTC) to a string in local time.
  208. * Note that the returned value will be overwritten if this function
  209. * is called again.
  210. *
  211. * @param t the absolute time to convert
  212. * @return timestamp in human-readable form in local time
  213. */
  214. const char *
  215. GNUNET_STRINGS_absolute_time_to_string (struct GNUNET_TIME_Absolute t);
  216. /**
  217. * @ingroup time
  218. * Give relative time in human-readable fancy format.
  219. * This is one of the very few calls in the entire API that is
  220. * NOT reentrant!
  221. *
  222. * @param delta time in milli seconds
  223. * @param do_round are we allowed to round a bit?
  224. * @return string in human-readable form
  225. */
  226. const char *
  227. GNUNET_STRINGS_relative_time_to_string (struct GNUNET_TIME_Relative delta,
  228. int do_round);
  229. /**
  230. * "man basename"
  231. * Returns a pointer to a part of filename (allocates nothing)!
  232. *
  233. * @param filename filename to extract basename from
  234. * @return short (base) name of the file (that is, everything following the
  235. * last directory separator in filename. If filename ends with a
  236. * directory separator, the result will be a zero-length string.
  237. * If filename has no directory separators, the result is filename
  238. * itself.
  239. */
  240. const char *
  241. GNUNET_STRINGS_get_short_name (const char *filename);
  242. /**
  243. * Convert binary data to ASCII encoding using Base32Hex (RFC 4648).
  244. * Does not append 0-terminator, but returns a pointer to the place where
  245. * it should be placed, if needed.
  246. *
  247. * @param data data to encode
  248. * @param size size of data (in bytes)
  249. * @param out buffer to fill
  250. * @param out_size size of the buffer. Must be large enough to hold
  251. * ((size*8) + (((size*8) % 5) > 0 ? 5 - ((size*8) % 5) : 0)) / 5
  252. * @return pointer to the next byte in 'out' or NULL on error.
  253. */
  254. char *
  255. GNUNET_STRINGS_data_to_string (const void *data,
  256. size_t size,
  257. char *out,
  258. size_t out_size);
  259. /**
  260. * Return the base32crockford encoding of the given buffer.
  261. *
  262. * The returned string will be freshly allocated, and must be free'd
  263. * with #GNUNET_free().
  264. *
  265. * @param buf buffer with data
  266. * @param size size of the buffer @a buf
  267. * @return freshly allocated, null-terminated string
  268. */
  269. char *
  270. GNUNET_STRINGS_data_to_string_alloc (const void *buf,
  271. size_t size);
  272. /**
  273. * Convert Base32hex encoding back to data.
  274. * @a out_size must match exactly the size of the data before it was encoded.
  275. *
  276. * @param enc the encoding
  277. * @param enclen number of characters in @a enc (without 0-terminator, which can be missing)
  278. * @param out location where to store the decoded data
  279. * @param out_size size of the output buffer @a out
  280. * @return #GNUNET_OK on success, #GNUNET_SYSERR if result has the wrong encoding
  281. */
  282. int
  283. GNUNET_STRINGS_string_to_data (const char *enc,
  284. size_t enclen,
  285. void *out,
  286. size_t out_size);
  287. /**
  288. * Encode into Base64.
  289. *
  290. * @param data the data to encode
  291. * @param len the length of the input
  292. * @param output where to write the output (*output should be NULL,
  293. * is allocated)
  294. * @return the size of the output
  295. */
  296. size_t
  297. GNUNET_STRINGS_base64_encode (const char *data, size_t len, char **output);
  298. /**
  299. * Decode from Base64.
  300. *
  301. * @param data the data to encode
  302. * @param len the length of the input
  303. * @param output where to write the output (*output should be NULL,
  304. * is allocated)
  305. * @return the size of the output
  306. */
  307. size_t
  308. GNUNET_STRINGS_base64_decode (const char *data, size_t len, char **output);
  309. /**
  310. * Parse a path that might be an URI.
  311. *
  312. * @param path path to parse. Must be NULL-terminated.
  313. * @param scheme_part a pointer to 'char *' where a pointer to a string that
  314. * represents the URI scheme will be stored. Can be NULL. The string is
  315. * allocated by the function, and should be freed by GNUNET_free() when
  316. * it is no longer needed.
  317. * @param path_part a pointer to 'const char *' where a pointer to the path
  318. * part of the URI will be stored. Can be NULL. Points to the same block
  319. * of memory as 'path', and thus must not be freed. Might point to '\0',
  320. * if path part is zero-length.
  321. * @return #GNUNET_YES if it's an URI, #GNUNET_NO otherwise. If 'path' is not
  322. * an URI, '* scheme_part' and '*path_part' will remain unchanged
  323. * (if they weren't NULL).
  324. */
  325. int
  326. GNUNET_STRINGS_parse_uri (const char *path,
  327. char **scheme_part,
  328. const char **path_part);
  329. /**
  330. * Check whether filename is absolute or not, and if it's an URI
  331. *
  332. * @param filename filename to check
  333. * @param can_be_uri #GNUNET_YES to check for being URI, #GNUNET_NO - to
  334. * assume it's not URI
  335. * @param r_is_uri a pointer to an int that is set to #GNUNET_YES if 'filename'
  336. * is URI and to GNUNET_NO otherwise. Can be NULL. If 'can_be_uri' is
  337. * not #GNUNET_YES, *r_is_uri is set to #GNUNET_NO.
  338. * @param r_uri_scheme a pointer to a char * that is set to a pointer to URI scheme.
  339. * The string is allocated by the function, and should be freed with
  340. * GNUNET_free (). Can be NULL.
  341. * @return #GNUNET_YES if 'filename' is absolute, #GNUNET_NO otherwise.
  342. */
  343. int
  344. GNUNET_STRINGS_path_is_absolute (const char *filename,
  345. int can_be_uri,
  346. int *r_is_uri,
  347. char **r_uri_scheme);
  348. /**
  349. * Flags for what we should check a file for.
  350. */
  351. enum GNUNET_STRINGS_FilenameCheck
  352. {
  353. /**
  354. * Check that it exists.
  355. */
  356. GNUNET_STRINGS_CHECK_EXISTS = 0x00000001,
  357. /**
  358. * Check that it is a directory.
  359. */
  360. GNUNET_STRINGS_CHECK_IS_DIRECTORY = 0x00000002,
  361. /**
  362. * Check that it is a link.
  363. */
  364. GNUNET_STRINGS_CHECK_IS_LINK = 0x00000004,
  365. /**
  366. * Check that the path is an absolute path.
  367. */
  368. GNUNET_STRINGS_CHECK_IS_ABSOLUTE = 0x00000008
  369. };
  370. /**
  371. * Perform checks on @a filename. FIXME: some duplication with
  372. * "GNUNET_DISK_"-APIs. We should unify those.
  373. *
  374. * @param filename file to check
  375. * @param checks checks to perform
  376. * @return #GNUNET_YES if all checks pass, #GNUNET_NO if at least one of them
  377. * fails, #GNUNET_SYSERR when a check can't be performed
  378. */
  379. int
  380. GNUNET_STRINGS_check_filename (const char *filename,
  381. enum GNUNET_STRINGS_FilenameCheck checks);
  382. /**
  383. * Tries to convert @a zt_addr string to an IPv6 address.
  384. * The string is expected to have the format "[ABCD::01]:80".
  385. *
  386. * @param zt_addr 0-terminated string. May be mangled by the function.
  387. * @param addrlen length of zt_addr (not counting 0-terminator).
  388. * @param r_buf a buffer to fill. Initially gets filled with zeroes,
  389. * then its sin6_port, sin6_family and sin6_addr are set appropriately.
  390. * @return #GNUNET_OK if conversion succeded. #GNUNET_SYSERR otherwise, in which
  391. * case the contents of r_buf are undefined.
  392. */
  393. int
  394. GNUNET_STRINGS_to_address_ipv6 (const char *zt_addr,
  395. uint16_t addrlen,
  396. struct sockaddr_in6 *r_buf);
  397. /**
  398. * Tries to convert @a zt_addr string to an IPv4 address.
  399. * The string is expected to have the format "1.2.3.4:80".
  400. *
  401. * @param zt_addr 0-terminated string. May be mangled by the function.
  402. * @param addrlen length of zt_addr (not counting 0-terminator).
  403. * @param r_buf a buffer to fill.
  404. * @return #GNUNET_OK if conversion succeded. #GNUNET_SYSERR otherwise, in which case
  405. * the contents of r_buf are undefined.
  406. */
  407. int
  408. GNUNET_STRINGS_to_address_ipv4 (const char *zt_addr,
  409. uint16_t addrlen,
  410. struct sockaddr_in *r_buf);
  411. /**
  412. * Tries to convert @a addr string to an IP (v4 or v6) address.
  413. * Will automatically decide whether to treat 'addr' as v4 or v6 address.
  414. *
  415. * @param addr a string, may not be 0-terminated.
  416. * @param addrlen number of bytes in @a addr (if addr is 0-terminated,
  417. * 0-terminator should not be counted towards addrlen).
  418. * @param r_buf a buffer to fill.
  419. * @return #GNUNET_OK if conversion succeded. #GNUNET_SYSERR otherwise, in which
  420. * case the contents of r_buf are undefined.
  421. */
  422. int
  423. GNUNET_STRINGS_to_address_ip (const char *addr,
  424. uint16_t addrlen,
  425. struct sockaddr_storage *r_buf);
  426. /**
  427. * Returns utf-8 encoded arguments. Does nothing (returns a copy of
  428. * @a argc and @a argv) on any platform other than W32. Returned @a
  429. * argv has `u8argv[u8argc] == NULL`. Returned @a argv is a single
  430. * memory block, and can be freed with a single GNUNET_free() call.
  431. *
  432. * @param argc argc (as given by main())
  433. * @param argv argv (as given by main())
  434. * @param u8argc a location to store new argc in (though it's th same as argc)
  435. * @param u8argv a location to store new argv in
  436. * @return #GNUNET_OK on success, #GNUNET_SYSERR on failure
  437. */
  438. int
  439. GNUNET_STRINGS_get_utf8_args (int argc,
  440. char *const *argv,
  441. int *u8argc,
  442. char *const **u8argv);
  443. /* ***************** IPv4/IPv6 parsing ****************** */
  444. struct GNUNET_STRINGS_PortPolicy
  445. {
  446. /**
  447. * Starting port range (0 if none given).
  448. */
  449. uint16_t start_port;
  450. /**
  451. * End of port range (0 if none given).
  452. */
  453. uint16_t end_port;
  454. /**
  455. * #GNUNET_YES if the port range should be negated
  456. * ("!" in policy).
  457. */
  458. int negate_portrange;
  459. };
  460. /**
  461. * @brief IPV4 network in CIDR notation.
  462. */
  463. struct GNUNET_STRINGS_IPv4NetworkPolicy
  464. {
  465. /**
  466. * IPv4 address.
  467. */
  468. struct in_addr network;
  469. /**
  470. * IPv4 netmask.
  471. */
  472. struct in_addr netmask;
  473. /**
  474. * Policy for port access.
  475. */
  476. struct GNUNET_STRINGS_PortPolicy pp;
  477. };
  478. /**
  479. * @brief network in CIDR notation for IPV6.
  480. */
  481. struct GNUNET_STRINGS_IPv6NetworkPolicy
  482. {
  483. /**
  484. * IPv6 address.
  485. */
  486. struct in6_addr network;
  487. /**
  488. * IPv6 netmask.
  489. */
  490. struct in6_addr netmask;
  491. /**
  492. * Policy for port access.
  493. */
  494. struct GNUNET_STRINGS_PortPolicy pp;
  495. };
  496. /**
  497. * Parse an IPv4 network policy. The argument specifies a list of
  498. * subnets. The format is <tt>(network[/netmask][:[!]SPORT-DPORT];)*</tt>
  499. * (no whitespace, must be terminated with a semicolon). The network
  500. * must be given in dotted-decimal notation. The netmask can be given
  501. * in CIDR notation (/16) or in dotted-decimal (/255.255.0.0).
  502. *
  503. * @param routeListX a string specifying the IPv4 subnets
  504. * @return the converted list, terminated with all zeros;
  505. * NULL if the synatx is flawed
  506. */
  507. struct GNUNET_STRINGS_IPv4NetworkPolicy *
  508. GNUNET_STRINGS_parse_ipv4_policy (const char *routeListX);
  509. /**
  510. * Parse an IPv6 network policy. The argument specifies a list of
  511. * subnets. The format is <tt>(network[/netmask[:[!]SPORT[-DPORT]]];)*</tt>
  512. * (no whitespace, must be terminated with a semicolon). The network
  513. * must be given in colon-hex notation. The netmask must be given in
  514. * CIDR notation (/16) or can be omitted to specify a single host.
  515. * Note that the netmask is mandatory if ports are specified.
  516. *
  517. * @param routeListX a string specifying the policy
  518. * @return the converted list, 0-terminated, NULL if the synatx is flawed
  519. */
  520. struct GNUNET_STRINGS_IPv6NetworkPolicy *
  521. GNUNET_STRINGS_parse_ipv6_policy (const char *routeListX);
  522. #if 0 /* keep Emacsens' auto-indent happy */
  523. {
  524. #endif
  525. #ifdef __cplusplus
  526. }
  527. #endif
  528. /* ifndef GNUNET_UTIL_STRING_H */
  529. #endif
  530. /** @} */ /* end of group */
  531. /* end of gnunet_util_string.h */