fs_dirmetascan.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /*
  2. This file is part of GNUnet
  3. (C) 2005-2012 Christian Grothoff (and other contributing authors)
  4. GNUnet is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published
  6. by the Free Software Foundation; either version 3, or (at your
  7. option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNUnet; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  15. Boston, MA 02111-1307, USA.
  16. */
  17. /**
  18. * @file fs/fs_dirmetascan.c
  19. * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem'
  20. * from an on-disk directory for publishing; use the 'gnunet-helper-fs-publish'.
  21. * @author LRN
  22. * @author Christian Grothoff
  23. */
  24. #include "platform.h"
  25. #include "gnunet_fs_service.h"
  26. #include "gnunet_scheduler_lib.h"
  27. #include <pthread.h>
  28. /**
  29. * An opaque structure a pointer to which is returned to the
  30. * caller to be used to control the scanner.
  31. */
  32. struct GNUNET_FS_DirScanner
  33. {
  34. /**
  35. * Helper process.
  36. */
  37. struct GNUNET_HELPER_Handle *helper;
  38. /**
  39. * Expanded filename (as given by the scan initiator).
  40. * The scanner thread stores a copy here, and frees it when it finishes.
  41. */
  42. char *filename_expanded;
  43. /**
  44. * Second argument to helper process.
  45. */
  46. char *ex_arg;
  47. /**
  48. * The function that will be called every time there's a progress
  49. * message.
  50. */
  51. GNUNET_FS_DirScannerProgressCallback progress_callback;
  52. /**
  53. * A closure for progress_callback.
  54. */
  55. void *progress_callback_cls;
  56. /**
  57. * After the scan is finished, it will contain a pointer to the
  58. * top-level directory entry in the directory tree built by the
  59. * scanner.
  60. */
  61. struct GNUNET_FS_ShareTreeItem *toplevel;
  62. /**
  63. * Current position during processing.
  64. */
  65. struct GNUNET_FS_ShareTreeItem *pos;
  66. /**
  67. * Task scheduled when we are done.
  68. */
  69. GNUNET_SCHEDULER_TaskIdentifier stop_task;
  70. /**
  71. * Arguments for helper.
  72. */
  73. char *args[4];
  74. };
  75. /**
  76. * Abort the scan. Must not be called from within the progress_callback
  77. * function.
  78. *
  79. * @param ds directory scanner structure
  80. */
  81. void
  82. GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds)
  83. {
  84. /* terminate helper */
  85. if (NULL != ds->helper)
  86. GNUNET_HELPER_stop (ds->helper, GNUNET_NO);
  87. /* free resources */
  88. if (NULL != ds->toplevel)
  89. GNUNET_FS_share_tree_free (ds->toplevel);
  90. if (GNUNET_SCHEDULER_NO_TASK != ds->stop_task)
  91. GNUNET_SCHEDULER_cancel (ds->stop_task);
  92. GNUNET_free_non_null (ds->ex_arg);
  93. GNUNET_free (ds->filename_expanded);
  94. GNUNET_free (ds);
  95. }
  96. /**
  97. * Obtain the result of the scan after the scan has signalled
  98. * completion. Must not be called prior to completion. The 'ds' is
  99. * freed as part of this call.
  100. *
  101. * @param ds directory scanner structure
  102. * @return the results of the scan (a directory tree)
  103. */
  104. struct GNUNET_FS_ShareTreeItem *
  105. GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds)
  106. {
  107. struct GNUNET_FS_ShareTreeItem *result;
  108. /* check that we're actually done */
  109. GNUNET_assert (NULL == ds->helper);
  110. /* preserve result */
  111. result = ds->toplevel;
  112. ds->toplevel = NULL;
  113. GNUNET_FS_directory_scan_abort (ds);
  114. return result;
  115. }
  116. /**
  117. * Move in the directory from the given position to the next file
  118. * in DFS traversal.
  119. *
  120. * @param pos current position
  121. * @return next file, NULL for none
  122. */
  123. static struct GNUNET_FS_ShareTreeItem *
  124. advance (struct GNUNET_FS_ShareTreeItem *pos)
  125. {
  126. int moved;
  127. GNUNET_assert (NULL != pos);
  128. moved = 0; /* must not terminate, even on file, otherwise "normal" */
  129. while ( (pos->is_directory == GNUNET_YES) ||
  130. (0 == moved) )
  131. {
  132. if ( (moved != -1) &&
  133. (NULL != pos->children_head) )
  134. {
  135. pos = pos->children_head;
  136. moved = 1; /* can terminate if file */
  137. continue;
  138. }
  139. if (NULL != pos->next)
  140. {
  141. pos = pos->next;
  142. moved = 1; /* can terminate if file */
  143. continue;
  144. }
  145. if (NULL != pos->parent)
  146. {
  147. pos = pos->parent;
  148. moved = -1; /* force move to 'next' or 'parent' */
  149. continue;
  150. }
  151. /* no more options, end of traversal */
  152. return NULL;
  153. }
  154. return pos;
  155. }
  156. /**
  157. * Add another child node to the tree.
  158. *
  159. * @param parent parent of the child, NULL for top level
  160. * @param filename name of the file or directory
  161. * @param is_directory GNUNET_YES for directories
  162. * @return new entry that was just created
  163. */
  164. static struct GNUNET_FS_ShareTreeItem *
  165. expand_tree (struct GNUNET_FS_ShareTreeItem *parent,
  166. const char *filename,
  167. int is_directory)
  168. {
  169. struct GNUNET_FS_ShareTreeItem *chld;
  170. size_t slen;
  171. chld = GNUNET_new (struct GNUNET_FS_ShareTreeItem);
  172. chld->parent = parent;
  173. chld->filename = GNUNET_strdup (filename);
  174. GNUNET_asprintf (&chld->short_filename,
  175. "%s%s",
  176. GNUNET_STRINGS_get_short_name (filename),
  177. is_directory == GNUNET_YES ? "/" : "");
  178. /* make sure we do not end with '//' */
  179. slen = strlen (chld->short_filename);
  180. if ( (slen >= 2) &&
  181. (chld->short_filename[slen-1] == '/') &&
  182. (chld->short_filename[slen-2] == '/') )
  183. chld->short_filename[slen-1] = '\0';
  184. chld->is_directory = is_directory;
  185. if (NULL != parent)
  186. GNUNET_CONTAINER_DLL_insert (parent->children_head,
  187. parent->children_tail,
  188. chld);
  189. return chld;
  190. }
  191. /**
  192. * Task run last to shut everything down.
  193. *
  194. * @param cls the 'struct GNUNET_FS_DirScanner'
  195. * @param tc unused
  196. */
  197. static void
  198. finish_scan (void *cls,
  199. const struct GNUNET_SCHEDULER_TaskContext *tc)
  200. {
  201. struct GNUNET_FS_DirScanner *ds = cls;
  202. ds->stop_task = GNUNET_SCHEDULER_NO_TASK;
  203. if (NULL != ds->helper)
  204. {
  205. GNUNET_HELPER_stop (ds->helper, GNUNET_NO);
  206. ds->helper = NULL;
  207. }
  208. ds->progress_callback (ds->progress_callback_cls,
  209. NULL, GNUNET_SYSERR,
  210. GNUNET_FS_DIRSCANNER_FINISHED);
  211. }
  212. /**
  213. * Called every time there is data to read from the scanner.
  214. * Calls the scanner progress handler.
  215. *
  216. * @param cls the closure (directory scanner object)
  217. * @param client always NULL
  218. * @param msg message from the helper process
  219. */
  220. static int
  221. process_helper_msgs (void *cls,
  222. void *client,
  223. const struct GNUNET_MessageHeader *msg)
  224. {
  225. struct GNUNET_FS_DirScanner *ds = cls;
  226. const char *filename;
  227. size_t left;
  228. #if 0
  229. fprintf (stderr, "DMS parses %u-byte message of type %u\n",
  230. (unsigned int) ntohs (msg->size),
  231. (unsigned int) ntohs (msg->type));
  232. #endif
  233. left = ntohs (msg->size) - sizeof (struct GNUNET_MessageHeader);
  234. filename = (const char*) &msg[1];
  235. switch (ntohs (msg->type))
  236. {
  237. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE:
  238. if (filename[left-1] != '\0')
  239. {
  240. GNUNET_break (0);
  241. break;
  242. }
  243. ds->progress_callback (ds->progress_callback_cls,
  244. filename, GNUNET_NO,
  245. GNUNET_FS_DIRSCANNER_FILE_START);
  246. if (NULL == ds->toplevel)
  247. ds->toplevel = expand_tree (ds->pos,
  248. filename, GNUNET_NO);
  249. else
  250. (void) expand_tree (ds->pos,
  251. filename, GNUNET_NO);
  252. return GNUNET_OK;
  253. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY:
  254. if (filename[left-1] != '\0')
  255. {
  256. GNUNET_break (0);
  257. break;
  258. }
  259. if (0 == strcmp ("..", filename))
  260. {
  261. if (NULL == ds->pos)
  262. {
  263. GNUNET_break (0);
  264. break;
  265. }
  266. ds->pos = ds->pos->parent;
  267. return GNUNET_OK;
  268. }
  269. ds->progress_callback (ds->progress_callback_cls,
  270. filename, GNUNET_YES,
  271. GNUNET_FS_DIRSCANNER_FILE_START);
  272. ds->pos = expand_tree (ds->pos,
  273. filename, GNUNET_YES);
  274. if (NULL == ds->toplevel)
  275. ds->toplevel = ds->pos;
  276. return GNUNET_OK;
  277. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR:
  278. break;
  279. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE:
  280. if ('\0' != filename[left-1])
  281. break;
  282. ds->progress_callback (ds->progress_callback_cls,
  283. filename, GNUNET_SYSERR,
  284. GNUNET_FS_DIRSCANNER_FILE_IGNORED);
  285. return GNUNET_OK;
  286. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE:
  287. if (0 != left)
  288. {
  289. GNUNET_break (0);
  290. break;
  291. }
  292. if (NULL == ds->toplevel)
  293. {
  294. GNUNET_break (0);
  295. break;
  296. }
  297. ds->progress_callback (ds->progress_callback_cls,
  298. NULL, GNUNET_SYSERR,
  299. GNUNET_FS_DIRSCANNER_ALL_COUNTED);
  300. ds->pos = ds->toplevel;
  301. if (GNUNET_YES == ds->pos->is_directory)
  302. ds->pos = advance (ds->pos);
  303. return GNUNET_OK;
  304. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA:
  305. {
  306. size_t nlen;
  307. const char *end;
  308. if (NULL == ds->pos)
  309. {
  310. GNUNET_break (0);
  311. break;
  312. }
  313. end = memchr (filename, 0, left);
  314. if (NULL == end)
  315. {
  316. GNUNET_break (0);
  317. break;
  318. }
  319. end++;
  320. nlen = end - filename;
  321. left -= nlen;
  322. if (0 != strcmp (filename,
  323. ds->pos->filename))
  324. {
  325. GNUNET_break (0);
  326. break;
  327. }
  328. ds->progress_callback (ds->progress_callback_cls,
  329. filename, GNUNET_YES,
  330. GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED);
  331. if (0 < left)
  332. {
  333. ds->pos->meta = GNUNET_CONTAINER_meta_data_deserialize (end, left);
  334. if (NULL == ds->pos->meta)
  335. {
  336. GNUNET_break (0);
  337. break;
  338. }
  339. /* having full filenames is too dangerous; always make sure we clean them up */
  340. GNUNET_CONTAINER_meta_data_delete (ds->pos->meta,
  341. EXTRACTOR_METATYPE_FILENAME,
  342. NULL, 0);
  343. /* instead, put in our 'safer' original filename */
  344. GNUNET_CONTAINER_meta_data_insert (ds->pos->meta, "<libgnunetfs>",
  345. EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
  346. EXTRACTOR_METAFORMAT_UTF8, "text/plain",
  347. ds->pos->short_filename,
  348. strlen (ds->pos->short_filename) + 1);
  349. }
  350. ds->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (ds->pos->meta);
  351. ds->pos = advance (ds->pos);
  352. return GNUNET_OK;
  353. }
  354. case GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED:
  355. if (NULL != ds->pos)
  356. {
  357. GNUNET_break (0);
  358. break;
  359. }
  360. if (0 != left)
  361. {
  362. GNUNET_break (0);
  363. break;
  364. }
  365. if (NULL == ds->toplevel)
  366. {
  367. GNUNET_break (0);
  368. break;
  369. }
  370. ds->stop_task = GNUNET_SCHEDULER_add_now (&finish_scan,
  371. ds);
  372. return GNUNET_OK;
  373. default:
  374. GNUNET_break (0);
  375. break;
  376. }
  377. ds->progress_callback (ds->progress_callback_cls,
  378. NULL, GNUNET_SYSERR,
  379. GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
  380. return GNUNET_OK;
  381. }
  382. /**
  383. * Function called if our helper process died.
  384. *
  385. * @param cls the 'struct GNUNET_FS_DirScanner' callback.
  386. */
  387. static void
  388. helper_died_cb (void *cls)
  389. {
  390. struct GNUNET_FS_DirScanner *ds = cls;
  391. ds->helper = NULL;
  392. if (GNUNET_SCHEDULER_NO_TASK != ds->stop_task)
  393. return; /* normal death, was finished */
  394. ds->progress_callback (ds->progress_callback_cls,
  395. NULL, GNUNET_SYSERR,
  396. GNUNET_FS_DIRSCANNER_INTERNAL_ERROR);
  397. }
  398. /**
  399. * Start a directory scanner thread.
  400. *
  401. * @param filename name of the directory to scan
  402. * @param disable_extractor #GNUNET_YES to not to run libextractor on files (only build a tree)
  403. * @param ex if not NULL, must be a list of extra plugins for extractor
  404. * @param cb the callback to call when there are scanning progress messages
  405. * @param cb_cls closure for 'cb'
  406. * @return directory scanner object to be used for controlling the scanner
  407. */
  408. struct GNUNET_FS_DirScanner *
  409. GNUNET_FS_directory_scan_start (const char *filename,
  410. int disable_extractor, const char *ex,
  411. GNUNET_FS_DirScannerProgressCallback cb,
  412. void *cb_cls)
  413. {
  414. struct stat sbuf;
  415. char *filename_expanded;
  416. struct GNUNET_FS_DirScanner *ds;
  417. if (0 != STAT (filename, &sbuf))
  418. return NULL;
  419. filename_expanded = GNUNET_STRINGS_filename_expand (filename);
  420. if (NULL == filename_expanded)
  421. return NULL;
  422. GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
  423. "Starting to scan directory `%s'\n",
  424. filename_expanded);
  425. ds = GNUNET_new (struct GNUNET_FS_DirScanner);
  426. ds->progress_callback = cb;
  427. ds->progress_callback_cls = cb_cls;
  428. ds->filename_expanded = filename_expanded;
  429. if (disable_extractor)
  430. ds->ex_arg = GNUNET_strdup ("-");
  431. else
  432. ds->ex_arg = (NULL != ex) ? GNUNET_strdup (ex) : NULL;
  433. ds->args[0] = "gnunet-helper-fs-publish";
  434. ds->args[1] = ds->filename_expanded;
  435. ds->args[2] = ds->ex_arg;
  436. ds->args[3] = NULL;
  437. ds->helper = GNUNET_HELPER_start (GNUNET_NO,
  438. "gnunet-helper-fs-publish",
  439. ds->args,
  440. &process_helper_msgs,
  441. &helper_died_cb, ds);
  442. if (NULL == ds->helper)
  443. {
  444. GNUNET_free (filename_expanded);
  445. GNUNET_free (ds);
  446. return NULL;
  447. }
  448. return ds;
  449. }
  450. /* end of fs_dirmetascan.c */