fs_directory.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. /*
  2. This file is part of GNUnet.
  3. (C) 2003, 2004, 2006, 2009 Christian Grothoff (and other contributing authors)
  4. GNUnet is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3, or (at your
  7. option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNUnet; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  15. Boston, MA 02111-1307, USA.
  16. */
  17. /**
  18. * @file fs/fs_directory.c
  19. * @brief Helper functions for building directories.
  20. * @author Christian Grothoff
  21. *
  22. * TODO:
  23. * - modify directory builder API to support incremental
  24. * generation of directories (to allow directories that
  25. * would not fit into memory to be created)
  26. * - modify directory processor API to support incremental
  27. * iteration over FULL directories (without missing entries)
  28. * to allow access to directories that do not fit entirely
  29. * into memory
  30. */
  31. #include "platform.h"
  32. #include "gnunet_fs_service.h"
  33. #include "fs_api.h"
  34. /**
  35. * String that is used to indicate that a file
  36. * is a GNUnet directory.
  37. */
  38. #define GNUNET_DIRECTORY_MAGIC "\211GND\r\n\032\n"
  39. /**
  40. * Does the meta-data claim that this is a directory?
  41. * Checks if the mime-type is that of a GNUnet directory.
  42. *
  43. * @return #GNUNET_YES if it is, #GNUNET_NO if it is not, #GNUNET_SYSERR if
  44. * we have no mime-type information (treat as #GNUNET_NO)
  45. */
  46. int
  47. GNUNET_FS_meta_data_test_for_directory (const struct GNUNET_CONTAINER_MetaData *md)
  48. {
  49. char *mime;
  50. int ret;
  51. if (NULL == md)
  52. return GNUNET_SYSERR;
  53. mime = GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_METATYPE_MIMETYPE);
  54. if (NULL == mime)
  55. return GNUNET_SYSERR;
  56. ret = (0 == strcasecmp (mime, GNUNET_FS_DIRECTORY_MIME)) ? GNUNET_YES : GNUNET_NO;
  57. GNUNET_free (mime);
  58. return ret;
  59. }
  60. /**
  61. * Set the MIMETYPE information for the given
  62. * metadata to "application/gnunet-directory".
  63. *
  64. * @param md metadata to add mimetype to
  65. */
  66. void
  67. GNUNET_FS_meta_data_make_directory (struct GNUNET_CONTAINER_MetaData *md)
  68. {
  69. char *mime;
  70. mime =
  71. GNUNET_CONTAINER_meta_data_get_by_type (md, EXTRACTOR_METATYPE_MIMETYPE);
  72. if (mime != NULL)
  73. {
  74. GNUNET_break (0 == strcmp (mime, GNUNET_FS_DIRECTORY_MIME));
  75. GNUNET_free (mime);
  76. return;
  77. }
  78. GNUNET_CONTAINER_meta_data_insert (md, "<gnunet>",
  79. EXTRACTOR_METATYPE_MIMETYPE,
  80. EXTRACTOR_METAFORMAT_UTF8, "text/plain",
  81. GNUNET_FS_DIRECTORY_MIME,
  82. strlen (GNUNET_FS_DIRECTORY_MIME) + 1);
  83. }
  84. /**
  85. * Closure for 'find_full_data'.
  86. */
  87. struct GetFullDataClosure
  88. {
  89. /**
  90. * Extracted binary meta data.
  91. */
  92. void *data;
  93. /**
  94. * Number of bytes stored in data.
  95. */
  96. size_t size;
  97. };
  98. /**
  99. * Type of a function that libextractor calls for each
  100. * meta data item found.
  101. *
  102. * @param cls closure (user-defined)
  103. * @param plugin_name name of the plugin that produced this value;
  104. * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
  105. * used in the main libextractor library and yielding
  106. * meta data).
  107. * @param type libextractor-type describing the meta data
  108. * @param format basic format information about data
  109. * @param data_mime_type mime-type of data (not of the original file);
  110. * can be NULL (if mime-type is not known)
  111. * @param data actual meta-data found
  112. * @param data_len number of bytes in data
  113. * @return 0 to continue extracting, 1 to abort
  114. */
  115. static int
  116. find_full_data (void *cls, const char *plugin_name,
  117. enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
  118. const char *data_mime_type, const char *data, size_t data_len)
  119. {
  120. struct GetFullDataClosure *gfdc = cls;
  121. if (type == EXTRACTOR_METATYPE_GNUNET_FULL_DATA)
  122. {
  123. gfdc->size = data_len;
  124. if (data_len > 0)
  125. {
  126. gfdc->data = GNUNET_malloc (data_len);
  127. memcpy (gfdc->data, data, data_len);
  128. }
  129. return 1;
  130. }
  131. return 0;
  132. }
  133. /**
  134. * Iterate over all entries in a directory. Note that directories
  135. * are structured such that it is possible to iterate over the
  136. * individual blocks as well as over the entire directory. Thus
  137. * a client can call this function on the buffer in the
  138. * GNUNET_FS_ProgressCallback. Also, directories can optionally
  139. * include the contents of (small) files embedded in the directory
  140. * itself; for those files, the processor may be given the
  141. * contents of the file directly by this function.
  142. * <p>
  143. *
  144. * Note that this function maybe called on parts of directories. Thus
  145. * parser errors should not be reported _at all_ (with GNUNET_break).
  146. * Still, if some entries can be recovered despite these parsing
  147. * errors, the function should try to do this.
  148. *
  149. * @param size number of bytes in data
  150. * @param data pointer to the beginning of the directory
  151. * @param offset offset of data in the directory
  152. * @param dep function to call on each entry
  153. * @param dep_cls closure for dep
  154. * @return GNUNET_OK if this could be a block in a directory,
  155. * GNUNET_NO if this could be part of a directory (but not 100% OK)
  156. * GNUNET_SYSERR if 'data' does not represent a directory
  157. */
  158. int
  159. GNUNET_FS_directory_list_contents (size_t size, const void *data,
  160. uint64_t offset,
  161. GNUNET_FS_DirectoryEntryProcessor dep,
  162. void *dep_cls)
  163. {
  164. struct GetFullDataClosure full_data;
  165. const char *cdata = data;
  166. char *emsg;
  167. uint64_t pos;
  168. uint64_t align;
  169. uint32_t mdSize;
  170. uint64_t epos;
  171. struct GNUNET_FS_Uri *uri;
  172. struct GNUNET_CONTAINER_MetaData *md;
  173. char *filename;
  174. if ((offset == 0) &&
  175. ((size < 8 + sizeof (uint32_t)) ||
  176. (0 != memcmp (cdata, GNUNET_FS_DIRECTORY_MAGIC, 8))))
  177. return GNUNET_SYSERR;
  178. pos = offset;
  179. if (offset == 0)
  180. {
  181. memcpy (&mdSize, &cdata[8], sizeof (uint32_t));
  182. mdSize = ntohl (mdSize);
  183. if (mdSize > size - 8 - sizeof (uint32_t))
  184. {
  185. /* invalid size */
  186. GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
  187. _("MAGIC mismatch. This is not a GNUnet directory.\n"));
  188. return GNUNET_SYSERR;
  189. }
  190. md = GNUNET_CONTAINER_meta_data_deserialize (&cdata[8 + sizeof (uint32_t)],
  191. mdSize);
  192. if (md == NULL)
  193. {
  194. GNUNET_break (0);
  195. return GNUNET_SYSERR; /* malformed ! */
  196. }
  197. dep (dep_cls, NULL, NULL, md, 0, NULL);
  198. GNUNET_CONTAINER_meta_data_destroy (md);
  199. pos = 8 + sizeof (uint32_t) + mdSize;
  200. }
  201. while (pos < size)
  202. {
  203. /* find end of URI */
  204. if (cdata[pos] == '\0')
  205. {
  206. /* URI is never empty, must be end of block,
  207. * skip to next alignment */
  208. align = ((pos / DBLOCK_SIZE) + 1) * DBLOCK_SIZE;
  209. if (align == pos)
  210. {
  211. /* if we were already aligned, still skip a block! */
  212. align += DBLOCK_SIZE;
  213. }
  214. pos = align;
  215. if (pos >= size)
  216. {
  217. /* malformed - or partial download... */
  218. break;
  219. }
  220. }
  221. epos = pos;
  222. while ((epos < size) && (cdata[epos] != '\0'))
  223. epos++;
  224. if (epos >= size)
  225. return GNUNET_NO; /* malformed - or partial download */
  226. uri = GNUNET_FS_uri_parse (&cdata[pos], &emsg);
  227. pos = epos + 1;
  228. if (uri == NULL)
  229. {
  230. GNUNET_free (emsg);
  231. pos--; /* go back to '\0' to force going to next alignment */
  232. continue;
  233. }
  234. if (GNUNET_FS_uri_test_ksk (uri))
  235. {
  236. GNUNET_FS_uri_destroy (uri);
  237. GNUNET_break (0);
  238. return GNUNET_NO; /* illegal in directory! */
  239. }
  240. memcpy (&mdSize, &cdata[pos], sizeof (uint32_t));
  241. mdSize = ntohl (mdSize);
  242. pos += sizeof (uint32_t);
  243. if (pos + mdSize > size)
  244. {
  245. GNUNET_FS_uri_destroy (uri);
  246. return GNUNET_NO; /* malformed - or partial download */
  247. }
  248. md = GNUNET_CONTAINER_meta_data_deserialize (&cdata[pos], mdSize);
  249. if (md == NULL)
  250. {
  251. GNUNET_FS_uri_destroy (uri);
  252. GNUNET_break (0);
  253. return GNUNET_NO; /* malformed ! */
  254. }
  255. pos += mdSize;
  256. filename =
  257. GNUNET_CONTAINER_meta_data_get_by_type (md,
  258. EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME);
  259. full_data.size = 0;
  260. full_data.data = NULL;
  261. GNUNET_CONTAINER_meta_data_iterate (md, &find_full_data, &full_data);
  262. if (dep != NULL)
  263. {
  264. dep (dep_cls, filename, uri, md, full_data.size, full_data.data);
  265. }
  266. GNUNET_free_non_null (full_data.data);
  267. GNUNET_free_non_null (filename);
  268. GNUNET_CONTAINER_meta_data_destroy (md);
  269. GNUNET_FS_uri_destroy (uri);
  270. }
  271. return GNUNET_OK;
  272. }
  273. /**
  274. * Entries in the directory (builder).
  275. */
  276. struct BuilderEntry
  277. {
  278. /**
  279. * This is a linked list.
  280. */
  281. struct BuilderEntry *next;
  282. /**
  283. * Length of this entry.
  284. */
  285. size_t len;
  286. };
  287. /**
  288. * Internal state of a directory builder.
  289. */
  290. struct GNUNET_FS_DirectoryBuilder
  291. {
  292. /**
  293. * Meta-data for the directory itself.
  294. */
  295. struct GNUNET_CONTAINER_MetaData *meta;
  296. /**
  297. * Head of linked list of entries.
  298. */
  299. struct BuilderEntry *head;
  300. /**
  301. * Number of entires in the directory.
  302. */
  303. unsigned int count;
  304. };
  305. /**
  306. * Create a directory builder.
  307. *
  308. * @param mdir metadata for the directory
  309. */
  310. struct GNUNET_FS_DirectoryBuilder *
  311. GNUNET_FS_directory_builder_create (const struct GNUNET_CONTAINER_MetaData
  312. *mdir)
  313. {
  314. struct GNUNET_FS_DirectoryBuilder *ret;
  315. ret = GNUNET_new (struct GNUNET_FS_DirectoryBuilder);
  316. if (mdir != NULL)
  317. ret->meta = GNUNET_CONTAINER_meta_data_duplicate (mdir);
  318. else
  319. ret->meta = GNUNET_CONTAINER_meta_data_create ();
  320. GNUNET_FS_meta_data_make_directory (ret->meta);
  321. return ret;
  322. }
  323. /**
  324. * Add an entry to a directory.
  325. *
  326. * @param bld directory to extend
  327. * @param uri uri of the entry (must not be a KSK)
  328. * @param md metadata of the entry
  329. * @param data raw data of the entry, can be NULL, otherwise
  330. * data must point to exactly the number of bytes specified
  331. * by the uri which must be of type LOC or CHK
  332. */
  333. void
  334. GNUNET_FS_directory_builder_add (struct GNUNET_FS_DirectoryBuilder *bld,
  335. const struct GNUNET_FS_Uri *uri,
  336. const struct GNUNET_CONTAINER_MetaData *md,
  337. const void *data)
  338. {
  339. struct GNUNET_FS_Uri *curi;
  340. struct BuilderEntry *e;
  341. uint64_t fsize;
  342. uint32_t big;
  343. ssize_t ret;
  344. size_t mds;
  345. size_t mdxs;
  346. char *uris;
  347. char *ser;
  348. char *sptr;
  349. size_t slen;
  350. struct GNUNET_CONTAINER_MetaData *meta;
  351. const struct GNUNET_CONTAINER_MetaData *meta_use;
  352. GNUNET_assert (!GNUNET_FS_uri_test_ksk (uri));
  353. if (NULL != data)
  354. {
  355. GNUNET_assert (!GNUNET_FS_uri_test_sks (uri));
  356. if (GNUNET_FS_uri_test_chk (uri))
  357. {
  358. fsize = GNUNET_FS_uri_chk_get_file_size (uri);
  359. }
  360. else
  361. {
  362. curi = GNUNET_FS_uri_loc_get_uri (uri);
  363. GNUNET_assert (NULL != curi);
  364. fsize = GNUNET_FS_uri_chk_get_file_size (curi);
  365. GNUNET_FS_uri_destroy (curi);
  366. }
  367. }
  368. else
  369. {
  370. fsize = 0; /* not given */
  371. }
  372. if (fsize > MAX_INLINE_SIZE)
  373. fsize = 0; /* too large */
  374. uris = GNUNET_FS_uri_to_string (uri);
  375. slen = strlen (uris) + 1;
  376. mds = GNUNET_CONTAINER_meta_data_get_serialized_size (md);
  377. meta_use = md;
  378. meta = NULL;
  379. if (fsize > 0)
  380. {
  381. meta = GNUNET_CONTAINER_meta_data_duplicate (md);
  382. GNUNET_CONTAINER_meta_data_insert (meta, "<gnunet>",
  383. EXTRACTOR_METATYPE_GNUNET_FULL_DATA,
  384. EXTRACTOR_METAFORMAT_BINARY, NULL, data,
  385. fsize);
  386. mdxs = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
  387. if ((slen + sizeof (uint32_t) + mdxs - 1) / DBLOCK_SIZE ==
  388. (slen + sizeof (uint32_t) + mds - 1) / DBLOCK_SIZE)
  389. {
  390. /* adding full data would not cause us to cross
  391. * additional blocks, so add it! */
  392. meta_use = meta;
  393. mds = mdxs;
  394. }
  395. }
  396. if (mds > GNUNET_MAX_MALLOC_CHECKED / 2)
  397. mds = GNUNET_MAX_MALLOC_CHECKED / 2;
  398. e = GNUNET_malloc (sizeof (struct BuilderEntry) + slen + mds +
  399. sizeof (uint32_t));
  400. ser = (char *) &e[1];
  401. memcpy (ser, uris, slen);
  402. GNUNET_free (uris);
  403. sptr = &ser[slen + sizeof (uint32_t)];
  404. ret =
  405. GNUNET_CONTAINER_meta_data_serialize (meta_use, &sptr, mds,
  406. GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
  407. if (NULL != meta)
  408. GNUNET_CONTAINER_meta_data_destroy (meta);
  409. if (ret == -1)
  410. mds = 0;
  411. else
  412. mds = ret;
  413. big = htonl (mds);
  414. memcpy (&ser[slen], &big, sizeof (uint32_t));
  415. e->len = slen + sizeof (uint32_t) + mds;
  416. e->next = bld->head;
  417. bld->head = e;
  418. bld->count++;
  419. }
  420. /**
  421. * Given the start and end position of a block of
  422. * data, return the end position of that data
  423. * after alignment to the DBLOCK_SIZE.
  424. */
  425. static size_t
  426. do_align (size_t start_position, size_t end_position)
  427. {
  428. size_t align;
  429. align = (end_position / DBLOCK_SIZE) * DBLOCK_SIZE;
  430. if ((start_position < align) && (end_position > align))
  431. return align + end_position - start_position;
  432. return end_position;
  433. }
  434. /**
  435. * Compute a permuation of the blocks to
  436. * minimize the cost of alignment. Greedy packer.
  437. *
  438. * @param start starting position for the first block
  439. * @param count size of the two arrays
  440. * @param sizes the sizes of the individual blocks
  441. * @param perm the permutation of the blocks (updated)
  442. */
  443. static void
  444. block_align (size_t start, unsigned int count, const size_t * sizes,
  445. unsigned int *perm)
  446. {
  447. unsigned int i;
  448. unsigned int j;
  449. unsigned int tmp;
  450. unsigned int best;
  451. ssize_t badness;
  452. size_t cpos;
  453. size_t cend;
  454. ssize_t cbad;
  455. unsigned int cval;
  456. cpos = start;
  457. for (i = 0; i < count; i++)
  458. {
  459. start = cpos;
  460. badness = 0x7FFFFFFF;
  461. best = -1;
  462. for (j = i; j < count; j++)
  463. {
  464. cval = perm[j];
  465. cend = cpos + sizes[cval];
  466. if (cpos % DBLOCK_SIZE == 0)
  467. {
  468. /* prefer placing the largest blocks first */
  469. cbad = -(cend % DBLOCK_SIZE);
  470. }
  471. else
  472. {
  473. if (cpos / DBLOCK_SIZE == cend / DBLOCK_SIZE)
  474. {
  475. /* Data fits into the same block! Prefer small left-overs! */
  476. cbad = DBLOCK_SIZE - cend % DBLOCK_SIZE;
  477. }
  478. else
  479. {
  480. /* Would have to waste space to re-align, add big factor, this
  481. * case is a real loss (proportional to space wasted)! */
  482. cbad = DBLOCK_SIZE * (DBLOCK_SIZE - cpos % DBLOCK_SIZE);
  483. }
  484. }
  485. if (cbad < badness)
  486. {
  487. best = j;
  488. badness = cbad;
  489. }
  490. }
  491. GNUNET_assert (best != -1);
  492. tmp = perm[i];
  493. perm[i] = perm[best];
  494. perm[best] = tmp;
  495. cpos += sizes[perm[i]];
  496. cpos = do_align (start, cpos);
  497. }
  498. }
  499. /**
  500. * Finish building the directory. Frees the
  501. * builder context and returns the directory
  502. * in-memory.
  503. *
  504. * @param bld directory to finish
  505. * @param rsize set to the number of bytes needed
  506. * @param rdata set to the encoded directory
  507. * @return GNUNET_OK on success
  508. */
  509. int
  510. GNUNET_FS_directory_builder_finish (struct GNUNET_FS_DirectoryBuilder *bld,
  511. size_t * rsize, void **rdata)
  512. {
  513. char *data;
  514. char *sptr;
  515. size_t *sizes;
  516. unsigned int *perm;
  517. unsigned int i;
  518. unsigned int j;
  519. struct BuilderEntry *pos;
  520. struct BuilderEntry **bes;
  521. size_t size;
  522. size_t psize;
  523. size_t off;
  524. ssize_t ret;
  525. uint32_t big;
  526. size = strlen (GNUNET_DIRECTORY_MAGIC) + sizeof (uint32_t);
  527. size += GNUNET_CONTAINER_meta_data_get_serialized_size (bld->meta);
  528. sizes = NULL;
  529. perm = NULL;
  530. bes = NULL;
  531. if (0 < bld->count)
  532. {
  533. sizes = GNUNET_malloc (bld->count * sizeof (size_t));
  534. perm = GNUNET_malloc (bld->count * sizeof (unsigned int));
  535. bes = GNUNET_malloc (bld->count * sizeof (struct BuilderEntry *));
  536. pos = bld->head;
  537. for (i = 0; i < bld->count; i++)
  538. {
  539. perm[i] = i;
  540. bes[i] = pos;
  541. sizes[i] = pos->len;
  542. pos = pos->next;
  543. }
  544. block_align (size, bld->count, sizes, perm);
  545. /* compute final size with alignment */
  546. for (i = 0; i < bld->count; i++)
  547. {
  548. psize = size;
  549. size += sizes[perm[i]];
  550. size = do_align (psize, size);
  551. }
  552. }
  553. *rsize = size;
  554. data = GNUNET_malloc_large (size);
  555. if (data == NULL)
  556. {
  557. GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
  558. *rsize = 0;
  559. *rdata = NULL;
  560. GNUNET_free_non_null (sizes);
  561. GNUNET_free_non_null (perm);
  562. GNUNET_free_non_null (bes);
  563. return GNUNET_SYSERR;
  564. }
  565. *rdata = data;
  566. memcpy (data, GNUNET_DIRECTORY_MAGIC, strlen (GNUNET_DIRECTORY_MAGIC));
  567. off = strlen (GNUNET_DIRECTORY_MAGIC);
  568. sptr = &data[off + sizeof (uint32_t)];
  569. ret =
  570. GNUNET_CONTAINER_meta_data_serialize (bld->meta, &sptr,
  571. size - off - sizeof (uint32_t),
  572. GNUNET_CONTAINER_META_DATA_SERIALIZE_FULL);
  573. GNUNET_assert (ret != -1);
  574. big = htonl (ret);
  575. memcpy (&data[off], &big, sizeof (uint32_t));
  576. off += sizeof (uint32_t) + ret;
  577. for (j = 0; j < bld->count; j++)
  578. {
  579. i = perm[j];
  580. psize = off;
  581. off += sizes[i];
  582. off = do_align (psize, off);
  583. memcpy (&data[off - sizes[i]], &(bes[i])[1], sizes[i]);
  584. GNUNET_free (bes[i]);
  585. }
  586. GNUNET_free_non_null (sizes);
  587. GNUNET_free_non_null (perm);
  588. GNUNET_free_non_null (bes);
  589. GNUNET_assert (off == size);
  590. GNUNET_CONTAINER_meta_data_destroy (bld->meta);
  591. GNUNET_free (bld);
  592. return GNUNET_OK;
  593. }
  594. /* end of fs_directory.c */