fs_dirmetascan.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /*
  2. This file is part of GNUnet
  3. Copyright (C) 2005-2012 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file fs/fs_dirmetascan.c
  18. * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
  19. * from an on-disk directory for publishing; use the 'gnunet-helper-fs-publish'.
  20. * @author LRN
  21. * @author Christian Grothoff
  22. */
  23. #include "platform.h"
  24. #include "gnunet_fs_service.h"
  25. #include "gnunet_scheduler_lib.h"
  26. #include <pthread.h>
  27. /**
  28. * An opaque structure a pointer to which is returned to the
  29. * caller to be used to control the scanner.
  30. */
  31. struct GNUNET_FS_DirScanner
  32. {
  33. /**
  34. * Helper process.
  35. */
  36. struct GNUNET_HELPER_Handle *helper;
  37. /**
  38. * Expanded filename (as given by the scan initiator).
  39. * The scanner thread stores a copy here, and frees it when it finishes.
  40. */
  41. char *filename_expanded;
  42. /**
  43. * Second argument to helper process.
  44. */
  45. char *ex_arg;
  46. /**
  47. * The function that will be called every time there's a progress
  48. * message.
  49. */
  50. GNUNET_FS_DirScannerProgressCallback progress_callback;
  51. /**
  52. * A closure for progress_callback.
  53. */
  54. void *progress_callback_cls;
  55. /**
  56. * After the scan is finished, it will contain a pointer to the
  57. * top-level directory entry in the directory tree built by the
  58. * scanner.
  59. */
  60. struct GNUNET_FS_ShareTreeItem *toplevel;
  61. /**
  62. * Current position during processing.
  63. */
  64. struct GNUNET_FS_ShareTreeItem *pos;
  65. /**
  66. * Task scheduled when we are done.
  67. */
  68. struct GNUNET_SCHEDULER_Task *stop_task;
  69. /**
  70. * Arguments for helper.
  71. */
  72. char *args[4];
  73. };
  74. /**
  75. * Abort the scan. Must not be called from within the progress_callback
  76. * function.
  77. *
  78. * @param ds directory scanner structure
  79. */
  80. void
  81. GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds)
  82. {
  83. /* terminate helper */
  84. if (NULL != ds->helper)
  85. GNUNET_HELPER_stop (ds->helper, GNUNET_NO);
  86. /* free resources */
  87. if (NULL != ds->toplevel)
  88. GNUNET_FS_share_tree_free (ds->toplevel);
  89. if (NULL != ds->stop_task)
  90. GNUNET_SCHEDULER_cancel (ds->stop_task);
  91. GNUNET_free (ds->ex_arg);
  92. GNUNET_free (ds->filename_expanded);
  93. GNUNET_free (ds);
  94. }
  95. /**
  96. * Obtain the result of the scan after the scan has signalled
  97. * completion. Must not be called prior to completion. The 'ds' is
  98. * freed as part of this call.
  99. *
  100. * @param ds directory scanner structure
  101. * @return the results of the scan (a directory tree)
  102. */
  103. struct GNUNET_FS_ShareTreeItem *
  104. GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds)
  105. {
  106. struct GNUNET_FS_ShareTreeItem *result;
  107. /* check that we're actually done */
  108. GNUNET_assert (NULL == ds->helper);
  109. /* preserve result */
  110. result = ds->toplevel;
  111. ds->toplevel = NULL;
  112. GNUNET_FS_directory_scan_abort (ds);
  113. return result;
  114. }
  115. /**
  116. * Move in the directory from the given position to the next file
  117. * in DFS traversal.
  118. *
  119. * @param pos current position
  120. * @return next file, NULL for none
  121. */
  122. static struct GNUNET_FS_ShareTreeItem *
  123. advance (struct GNUNET_FS_ShareTreeItem *pos)
  124. {
  125. int moved;
  126. GNUNET_assert (NULL != pos);
  127. moved = 0; /* must not terminate, even on file, otherwise "normal" */
  128. while ((pos->is_directory == GNUNET_YES) || (0 == moved))
  129. {
  130. if ((moved != -1) && (NULL != pos->children_head))
  131. {
  132. pos = pos->children_head;
  133. moved = 1; /* can terminate if file */
  134. continue;
  135. }
  136. if (NULL != pos->next)
  137. {
  138. pos = pos->next;
  139. moved = 1; /* can terminate if file */
  140. continue;
  141. }
  142. if (NULL != pos->parent)
  143. {
  144. pos = pos->parent;
  145. moved = -1; /* force move to 'next' or 'parent' */
  146. continue;
  147. }
  148. /* no more options, end of traversal */
  149. return NULL;
  150. }
  151. return pos;
  152. }
  153. /**
  154. * Add another child node to the tree.
  155. *
  156. * @param parent parent of the child, NULL for top level
  157. * @param filename name of the file or directory
  158. * @param is_directory GNUNET_YES for directories
  159. * @return new entry that was just created
  160. */
  161. static struct GNUNET_FS_ShareTreeItem *
  162. expand_tree (struct GNUNET_FS_ShareTreeItem *parent,
  163. const char *filename,
  164. int is_directory)
  165. {
  166. struct GNUNET_FS_ShareTreeItem *chld;
  167. size_t slen;
  168. chld = GNUNET_new (struct GNUNET_FS_ShareTreeItem);
  169. chld->parent = parent;
  170. chld->filename = GNUNET_strdup (filename);
  171. GNUNET_asprintf (&chld->short_filename,
  172. "%s%s",
  173. GNUNET_STRINGS_get_short_name (filename),
  174. is_directory == GNUNET_YES ? "/" : "");
  175. /* make sure we do not end with '//' */
  176. slen = strlen (chld->short_filename);
  177. if ((slen >= 2) && (chld->short_filename[slen - 1] == '/') &&
  178. (chld->short_filename[slen - 2] == '/'))
  179. chld->short_filename[slen - 1] = '\0';
  180. chld->is_directory = is_directory;
  181. if (NULL != parent)
  182. GNUNET_CONTAINER_DLL_insert (parent->children_head,
  183. parent->children_tail,
  184. chld);
  185. return chld;
  186. }
  187. /**
  188. * Task run last to shut everything down.
  189. *
  190. * @param cls the 'struct GNUNET_FS_DirScanner'
  191. */
  192. static void
  193. finish_scan (void *cls)
  194. {
  195. struct GNUNET_FS_DirScanner *ds = cls;
  196. ds->stop_task = NULL;
  197. if (NULL != ds->helper)
  198. {
  199. GNUNET_HELPER_stop (ds->helper, GNUNET_NO);
  200. ds->helper = NULL;
  201. }
  202. ds->progress_callback (ds->progress_callback_cls,
  203. NULL,
  204. GNUNET_SYSERR,
  205. GNUNET_FS_DIRSCANNER_FINISHED);
  206. }
  207. /**
  208. * Called every time there is data to read from the scanner.
  209. * Calls the scanner progress handler.
  210. *
  211. * @param cls the closure (directory scanner object)
  212. * @param msg message from the helper process
  213. * @return #GNUNET_OK on success,
  214. * #GNUNET_NO to stop further processing (no error)
  215. * #GNUNET_SYSERR to stop further processing with error
  216. */
  217. static int
  218. process_helper_msgs (void *cls, const struct GNUNET_MessageHeader *msg)
  219. {
  220. struct GNUNET_FS_DirScanner *ds = cls;
  221. const char *filename;
  222. size_t left;
  223. #if 0
  224. fprintf (stderr,
  225. "DMS parses %u-byte message of type %u\n",
  226. (unsigned int) ntohs (msg->size),
  227. (unsigned int) ntohs (msg->type));
  228. #endif
  229. left = ntohs (msg->size) - sizeof(struct GNUNET_MessageHeader);
  230. filename = (const char *) &msg[1];
  231. switch (ntohs (msg->type))
  232. {
  233. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE:
  234. if (filename[left - 1] != '\0')
  235. {
  236. GNUNET_break (0);
  237. break;
  238. }
  239. ds->progress_callback (ds->progress_callback_cls,
  240. filename,
  241. GNUNET_NO,
  242. GNUNET_FS_DIRSCANNER_FILE_START);
  243. if (NULL == ds->toplevel)
  244. {
  245. ds->toplevel = expand_tree (ds->pos, filename, GNUNET_NO);
  246. }
  247. else
  248. {
  249. GNUNET_assert (NULL != ds->pos);
  250. (void) expand_tree (ds->pos, filename, GNUNET_NO);
  251. }
  252. return GNUNET_OK;
  253. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY:
  254. if (filename[left - 1] != '\0')
  255. {
  256. GNUNET_break (0);
  257. break;
  258. }
  259. if (0 == strcmp ("..", filename))
  260. {
  261. if (NULL == ds->pos)
  262. {
  263. GNUNET_break (0);
  264. break;
  265. }
  266. ds->pos = ds->pos->parent;
  267. return GNUNET_OK;
  268. }
  269. ds->progress_callback (ds->progress_callback_cls,
  270. filename,
  271. GNUNET_YES,
  272. GNUNET_FS_DIRSCANNER_FILE_START);
  273. ds->pos = expand_tree (ds->pos, filename, GNUNET_YES);
  274. if (NULL == ds->toplevel)
  275. ds->toplevel = ds->pos;
  276. return GNUNET_OK;
  277. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR:
  278. break;
  279. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE:
  280. if ('\0' != filename[left - 1])
  281. break;
  282. ds->progress_callback (ds->progress_callback_cls,
  283. filename,
  284. GNUNET_SYSERR,
  285. GNUNET_FS_DIRSCANNER_FILE_IGNORED);
  286. return GNUNET_OK;
  287. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE:
  288. if (0 != left)
  289. {
  290. GNUNET_break (0);
  291. break;
  292. }
  293. if (NULL == ds->toplevel)
  294. break;
  295. ds->progress_callback (ds->progress_callback_cls,
  296. NULL,
  297. GNUNET_SYSERR,
  298. GNUNET_FS_DIRSCANNER_ALL_COUNTED);
  299. ds->pos = ds->toplevel;
  300. if (GNUNET_YES == ds->pos->is_directory)
  301. ds->pos = advance (ds->pos);
  302. return GNUNET_OK;
  303. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA: {
  304. size_t nlen;
  305. const char *end;
  306. if (NULL == ds->pos)
  307. {
  308. GNUNET_break (0);
  309. break;
  310. }
  311. end = memchr (filename, 0, left);
  312. if (NULL == end)
  313. {
  314. GNUNET_break (0);
  315. break;
  316. }
  317. end++;
  318. nlen = end - filename;
  319. left -= nlen;
  320. if (0 != strcmp (filename, ds->pos->filename))
  321. {
  322. GNUNET_break (0);
  323. break;
  324. }
  325. ds->progress_callback (ds->progress_callback_cls,
  326. filename,
  327. GNUNET_YES,
  328. GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED);
  329. if (0 < left)
  330. {
  331. ds->pos->meta = GNUNET_CONTAINER_meta_data_deserialize (end, left);
  332. if (NULL == ds->pos->meta)
  333. {
  334. GNUNET_break (0);
  335. break;
  336. }
  337. /* having full filenames is too dangerous; always make sure we clean them up */
  338. GNUNET_CONTAINER_meta_data_delete (ds->pos->meta,
  339. EXTRACTOR_METATYPE_FILENAME,
  340. NULL,
  341. 0);
  342. /* instead, put in our 'safer' original filename */
  343. GNUNET_CONTAINER_meta_data_insert (ds->pos->meta,
  344. "<libgnunetfs>",
  345. EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
  346. EXTRACTOR_METAFORMAT_UTF8,
  347. "text/plain",
  348. ds->pos->short_filename,
  349. strlen (ds->pos->short_filename)
  350. + 1);
  351. }
  352. ds->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (
  353. ds->pos->meta);
  354. ds->pos = advance (ds->pos);
  355. return GNUNET_OK;
  356. }
  357. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED:
  358. if (NULL != ds->pos)
  359. {
  360. GNUNET_break (0);
  361. break;
  362. }
  363. if (0 != left)
  364. {
  365. GNUNET_break (0);
  366. break;
  367. }
  368. if (NULL == ds->toplevel)
  369. break;
  370. ds->stop_task = GNUNET_SCHEDULER_add_now (&finish_scan, ds);
  371. return GNUNET_OK;
  372. default:
  373. GNUNET_break (0);
  374. break;
  375. }
  376. ds->progress_callback (ds->progress_callback_cls,
  377. NULL,
  378. GNUNET_SYSERR,
  379. GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
  380. return GNUNET_OK;
  381. }
  382. /**
  383. * Function called if our helper process died.
  384. *
  385. * @param cls the 'struct GNUNET_FS_DirScanner' callback.
  386. */
  387. static void
  388. helper_died_cb (void *cls)
  389. {
  390. struct GNUNET_FS_DirScanner *ds = cls;
  391. ds->helper = NULL;
  392. if (NULL != ds->stop_task)
  393. return; /* normal death, was finished */
  394. ds->progress_callback (ds->progress_callback_cls,
  395. NULL,
  396. GNUNET_SYSERR,
  397. GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
  398. }
  399. /**
  400. * Start a directory scanner thread.
  401. *
  402. * @param filename name of the directory to scan
  403. * @param disable_extractor #GNUNET_YES to not run libextractor on files (only
  404. * build a tree)
  405. * @param ex if not NULL, must be a list of extra plugins for extractor
  406. * @param cb the callback to call when there are scanning progress messages
  407. * @param cb_cls closure for 'cb'
  408. * @return directory scanner object to be used for controlling the scanner
  409. */
  410. struct GNUNET_FS_DirScanner *
  411. GNUNET_FS_directory_scan_start (const char *filename,
  412. int disable_extractor,
  413. const char *ex,
  414. GNUNET_FS_DirScannerProgressCallback cb,
  415. void *cb_cls)
  416. {
  417. struct stat sbuf;
  418. char *filename_expanded;
  419. struct GNUNET_FS_DirScanner *ds;
  420. if (0 != stat (filename, &sbuf))
  421. return NULL;
  422. filename_expanded = GNUNET_STRINGS_filename_expand (filename);
  423. if (NULL == filename_expanded)
  424. return NULL;
  425. GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
  426. "Starting to scan directory `%s'\n",
  427. filename_expanded);
  428. ds = GNUNET_new (struct GNUNET_FS_DirScanner);
  429. ds->progress_callback = cb;
  430. ds->progress_callback_cls = cb_cls;
  431. ds->filename_expanded = filename_expanded;
  432. if (disable_extractor)
  433. ds->ex_arg = GNUNET_strdup ("-");
  434. else
  435. ds->ex_arg = (NULL != ex) ? GNUNET_strdup (ex) : NULL;
  436. ds->args[0] = "gnunet-helper-fs-publish";
  437. ds->args[1] = ds->filename_expanded;
  438. ds->args[2] = ds->ex_arg;
  439. ds->args[3] = NULL;
  440. ds->helper = GNUNET_HELPER_start (GNUNET_NO,
  441. "gnunet-helper-fs-publish",
  442. ds->args,
  443. &process_helper_msgs,
  444. &helper_died_cb,
  445. ds);
  446. if (NULL == ds->helper)
  447. {
  448. GNUNET_free (filename_expanded);
  449. GNUNET_free (ds);
  450. return NULL;
  451. }
  452. return ds;
  453. }
  454. /* end of fs_dirmetascan.c */