gnunet-helper-fs-publish.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. /*
  2. This file is part of GNUnet.
  3. Copyright (C) 2012 GNUnet e.V.
  4. GNUnet is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Affero General Public License as published
  6. by the Free Software Foundation, either version 3 of the License,
  7. or (at your option) any later version.
  8. GNUnet is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Affero General Public License for more details.
  12. You should have received a copy of the GNU Affero General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. SPDX-License-Identifier: AGPL3.0-or-later
  15. */
  16. /**
  17. * @file src/fs/gnunet-helper-fs-publish.c
  18. * @brief Tool to help extract meta data asynchronously
  19. * @author Christian Grothoff
  20. *
  21. * This program will scan a directory for files with meta data
  22. * and report the results to stdout.
  23. */
  24. #include "platform.h"
  25. #include "gnunet_fs_service.h"
  26. /**
  27. * A node of a directory tree.
  28. */
  29. struct ScanTreeNode
  30. {
  31. /**
  32. * This is a doubly-linked list
  33. */
  34. struct ScanTreeNode *next;
  35. /**
  36. * This is a doubly-linked list
  37. */
  38. struct ScanTreeNode *prev;
  39. /**
  40. * Parent of this node, NULL for top-level entries.
  41. */
  42. struct ScanTreeNode *parent;
  43. /**
  44. * This is a doubly-linked tree
  45. * NULL for files and empty directories
  46. */
  47. struct ScanTreeNode *children_head;
  48. /**
  49. * This is a doubly-linked tree
  50. * NULL for files and empty directories
  51. */
  52. struct ScanTreeNode *children_tail;
  53. /**
  54. * Name of the file/directory
  55. */
  56. char *filename;
  57. /**
  58. * Size of the file (if it is a file), in bytes.
  59. * At the moment it is set to 0 for directories.
  60. */
  61. uint64_t file_size;
  62. /**
  63. * #GNUNET_YES if this is a directory
  64. */
  65. int is_directory;
  66. };
  67. #if HAVE_LIBEXTRACTOR
  68. /**
  69. * List of libextractor plugins to use for extracting.
  70. */
  71. static struct EXTRACTOR_PluginList *plugins;
  72. #endif
  73. /**
  74. * File descriptor we use for IPC with the parent.
  75. */
  76. static int output_stream;
  77. #if HAVE_LIBEXTRACTOR
  78. /**
  79. * Add meta data that libextractor finds to our meta data
  80. * container.
  81. *
  82. * @param cls closure, our meta data container
  83. * @param plugin_name name of the plugin that produced this value;
  84. * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
  85. * used in the main libextractor library and yielding
  86. * meta data).
  87. * @param type libextractor-type describing the meta data
  88. * @param format basic format information about data
  89. * @param data_mime_type mime-type of data (not of the original file);
  90. * can be NULL (if mime-type is not known)
  91. * @param data actual meta-data found
  92. * @param data_len number of bytes in @a data
  93. * @return always 0 to continue extracting
  94. */
  95. static int
  96. add_to_md (void *cls,
  97. const char *plugin_name,
  98. enum EXTRACTOR_MetaType type,
  99. enum EXTRACTOR_MetaFormat format,
  100. const char *data_mime_type,
  101. const char *data,
  102. size_t data_len)
  103. {
  104. struct GNUNET_CONTAINER_MetaData *md = cls;
  105. if (((EXTRACTOR_METAFORMAT_UTF8 == format) ||
  106. (EXTRACTOR_METAFORMAT_C_STRING == format)) &&
  107. ('\0' != data[data_len - 1]))
  108. {
  109. char zdata[data_len + 1];
  110. GNUNET_memcpy (zdata, data, data_len);
  111. zdata[data_len] = '\0';
  112. (void) GNUNET_CONTAINER_meta_data_insert (md,
  113. plugin_name,
  114. type,
  115. format,
  116. data_mime_type,
  117. zdata,
  118. data_len + 1);
  119. }
  120. else
  121. {
  122. (void) GNUNET_CONTAINER_meta_data_insert (md,
  123. plugin_name,
  124. type,
  125. format,
  126. data_mime_type,
  127. data,
  128. data_len);
  129. }
  130. return 0;
  131. }
  132. #endif
  133. /**
  134. * Free memory of the @a tree structure
  135. *
  136. * @param tree tree to free
  137. */
  138. static void
  139. free_tree (struct ScanTreeNode *tree)
  140. {
  141. struct ScanTreeNode *pos;
  142. while (NULL != (pos = tree->children_head))
  143. free_tree (pos);
  144. if (NULL != tree->parent)
  145. GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
  146. tree->parent->children_tail,
  147. tree);
  148. GNUNET_free (tree->filename);
  149. GNUNET_free (tree);
  150. }
  151. /**
  152. * Write @a size bytes from @a buf into the #output_stream.
  153. *
  154. * @param buf buffer with data to write
  155. * @param size number of bytes to write
  156. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  157. */
  158. static int
  159. write_all (const void *buf, size_t size)
  160. {
  161. const char *cbuf = buf;
  162. size_t total;
  163. ssize_t wr;
  164. total = 0;
  165. do
  166. {
  167. wr = write (output_stream, &cbuf[total], size - total);
  168. if (wr > 0)
  169. total += wr;
  170. }
  171. while ((wr > 0) && (total < size));
  172. if (wr <= 0)
  173. GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
  174. "Failed to write to stdout: %s\n",
  175. strerror (errno));
  176. return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
  177. }
  178. /**
  179. * Write message to the master process.
  180. *
  181. * @param message_type message type to use
  182. * @param data data to append, NULL for none
  183. * @param data_length number of bytes in @a data
  184. * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
  185. */
  186. static int
  187. write_message (uint16_t message_type, const char *data, size_t data_length)
  188. {
  189. struct GNUNET_MessageHeader hdr;
  190. #if 0
  191. fprintf (stderr,
  192. "Helper sends %u-byte message of type %u\n",
  193. (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length),
  194. (unsigned int) message_type);
  195. #endif
  196. hdr.type = htons (message_type);
  197. hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length);
  198. if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) ||
  199. (GNUNET_OK != write_all (data, data_length)))
  200. return GNUNET_SYSERR;
  201. return GNUNET_OK;
  202. }
  203. /**
  204. * Function called to (recursively) add all of the files in the
  205. * directory to the tree. Called by the directory scanner to initiate
  206. * the scan. Does NOT yet add any metadata.
  207. *
  208. * @param filename file or directory to scan
  209. * @param dst where to store the resulting share tree item;
  210. * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
  211. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  212. */
  213. static int
  214. preprocess_file (const char *filename, struct ScanTreeNode **dst);
  215. /**
  216. * Closure for the 'scan_callback'
  217. */
  218. struct RecursionContext
  219. {
  220. /**
  221. * Parent to add the files to.
  222. */
  223. struct ScanTreeNode *parent;
  224. /**
  225. * Flag to set to GNUNET_YES on serious errors.
  226. */
  227. int stop;
  228. };
  229. /**
  230. * Function called by the directory iterator to (recursively) add all
  231. * of the files in the directory to the tree. Called by the directory
  232. * scanner to initiate the scan. Does NOT yet add any metadata.
  233. *
  234. * @param cls the `struct RecursionContext`
  235. * @param filename file or directory to scan
  236. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  237. */
  238. static int
  239. scan_callback (void *cls, const char *filename)
  240. {
  241. struct RecursionContext *rc = cls;
  242. struct ScanTreeNode *chld;
  243. if (GNUNET_OK != preprocess_file (filename, &chld))
  244. {
  245. rc->stop = GNUNET_YES;
  246. return GNUNET_SYSERR;
  247. }
  248. if (NULL == chld)
  249. return GNUNET_OK;
  250. chld->parent = rc->parent;
  251. GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
  252. rc->parent->children_tail,
  253. chld);
  254. return GNUNET_OK;
  255. }
  256. /**
  257. * Function called to (recursively) add all of the files in the
  258. * directory to the tree. Called by the directory scanner to initiate
  259. * the scan. Does NOT yet add any metadata.
  260. *
  261. * @param filename file or directory to scan
  262. * @param dst where to store the resulting share tree item;
  263. * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
  264. * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
  265. */
  266. static int
  267. preprocess_file (const char *filename, struct ScanTreeNode **dst)
  268. {
  269. struct ScanTreeNode *item;
  270. struct stat sbuf;
  271. uint64_t fsize = 0;
  272. if ((0 != stat (filename, &sbuf)) ||
  273. ((! S_ISDIR (sbuf.st_mode)) &&
  274. (GNUNET_OK !=
  275. GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES))))
  276. {
  277. /* If the file doesn't exist (or is not stat-able for any other reason)
  278. skip it (but report it), but do continue. */
  279. if (GNUNET_OK !=
  280. write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
  281. filename,
  282. strlen (filename) + 1))
  283. return GNUNET_SYSERR;
  284. /* recoverable error, store 'NULL' in *dst */
  285. *dst = NULL;
  286. return GNUNET_OK;
  287. }
  288. /* Report the progress */
  289. if (
  290. GNUNET_OK !=
  291. write_message (S_ISDIR (sbuf.st_mode)
  292. ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
  293. : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
  294. filename,
  295. strlen (filename) + 1))
  296. return GNUNET_SYSERR;
  297. item = GNUNET_new (struct ScanTreeNode);
  298. item->filename = GNUNET_strdup (filename);
  299. item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
  300. item->file_size = fsize;
  301. if (GNUNET_YES == item->is_directory)
  302. {
  303. struct RecursionContext rc;
  304. rc.parent = item;
  305. rc.stop = GNUNET_NO;
  306. GNUNET_DISK_directory_scan (filename, &scan_callback, &rc);
  307. if (
  308. (GNUNET_YES == rc.stop) ||
  309. (GNUNET_OK !=
  310. write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
  311. "..",
  312. 3)))
  313. {
  314. free_tree (item);
  315. return GNUNET_SYSERR;
  316. }
  317. }
  318. *dst = item;
  319. return GNUNET_OK;
  320. }
  321. /**
  322. * Extract metadata from files.
  323. *
  324. * @param item entry we are processing
  325. * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors
  326. */
  327. static int
  328. extract_files (struct ScanTreeNode *item)
  329. {
  330. struct GNUNET_CONTAINER_MetaData *meta;
  331. ssize_t size;
  332. size_t slen;
  333. if (GNUNET_YES == item->is_directory)
  334. {
  335. /* for directories, we simply only descent, no extraction, no
  336. progress reporting */
  337. struct ScanTreeNode *pos;
  338. for (pos = item->children_head; NULL != pos; pos = pos->next)
  339. if (GNUNET_OK != extract_files (pos))
  340. return GNUNET_SYSERR;
  341. return GNUNET_OK;
  342. }
  343. /* this is the expensive operation, *afterwards* we'll check for aborts */
  344. meta = GNUNET_CONTAINER_meta_data_create ();
  345. #if HAVE_LIBEXTRACTOR
  346. EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
  347. #endif
  348. slen = strlen (item->filename) + 1;
  349. size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
  350. if (-1 == size)
  351. {
  352. /* no meta data */
  353. GNUNET_CONTAINER_meta_data_destroy (meta);
  354. if (GNUNET_OK !=
  355. write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
  356. item->filename,
  357. slen))
  358. return GNUNET_SYSERR;
  359. return GNUNET_OK;
  360. }
  361. else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen))
  362. {
  363. /* We can't transfer more than 64k bytes in one message. */
  364. size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen;
  365. }
  366. {
  367. char buf[size + slen];
  368. char *dst = &buf[slen];
  369. GNUNET_memcpy (buf, item->filename, slen);
  370. size = GNUNET_CONTAINER_meta_data_serialize (
  371. meta,
  372. &dst,
  373. size,
  374. GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
  375. if (size < 0)
  376. {
  377. GNUNET_break (0);
  378. size = 0;
  379. }
  380. GNUNET_CONTAINER_meta_data_destroy (meta);
  381. if (GNUNET_OK !=
  382. write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
  383. buf,
  384. slen + size))
  385. return GNUNET_SYSERR;
  386. }
  387. return GNUNET_OK;
  388. }
  389. /**
  390. * Install a signal handler to ignore SIGPIPE.
  391. */
  392. static void
  393. ignore_sigpipe ()
  394. {
  395. struct sigaction oldsig;
  396. struct sigaction sig;
  397. memset (&sig, 0, sizeof(struct sigaction));
  398. sig.sa_handler = SIG_IGN;
  399. sigemptyset (&sig.sa_mask);
  400. #ifdef SA_INTERRUPT
  401. sig.sa_flags = SA_INTERRUPT; /* SunOS */
  402. #else
  403. sig.sa_flags = SA_RESTART;
  404. #endif
  405. if (0 != sigaction (SIGPIPE, &sig, &oldsig))
  406. fprintf (stderr,
  407. "Failed to install SIGPIPE handler: %s\n",
  408. strerror (errno));
  409. }
  410. /**
  411. * Turn the given file descriptor in to '/dev/null'.
  412. *
  413. * @param fd fd to bind to /dev/null
  414. * @param flags flags to use (O_RDONLY or O_WRONLY)
  415. */
  416. static void
  417. make_dev_zero (int fd, int flags)
  418. {
  419. int z;
  420. GNUNET_assert (0 == close (fd));
  421. z = open ("/dev/null", flags);
  422. GNUNET_assert (-1 != z);
  423. if (z == fd)
  424. return;
  425. GNUNET_break (fd == dup2 (z, fd));
  426. GNUNET_assert (0 == close (z));
  427. }
  428. /**
  429. * Main function of the helper process to extract meta data.
  430. *
  431. * @param argc should be 3
  432. * @param argv [0] our binary name
  433. * [1] name of the file or directory to process
  434. * [2] "-" to disable extraction, NULL for defaults,
  435. * otherwise custom plugins to load from LE
  436. * @return 0 on success
  437. */
  438. int
  439. main (int argc, char *const *argv)
  440. {
  441. const char *filename_expanded;
  442. const char *ex;
  443. struct ScanTreeNode *root;
  444. ignore_sigpipe ();
  445. /* move stdout to some other FD for IPC, bind
  446. stdout/stderr to /dev/null */
  447. output_stream = dup (1);
  448. make_dev_zero (1, O_WRONLY);
  449. make_dev_zero (2, O_WRONLY);
  450. /* parse command line */
  451. if ((3 != argc) && (2 != argc))
  452. {
  453. fprintf (stderr,
  454. "%s",
  455. "gnunet-helper-fs-publish needs exactly one or two arguments\n");
  456. return 1;
  457. }
  458. filename_expanded = argv[1];
  459. ex = argv[2];
  460. if ((NULL == ex) || (0 != strcmp (ex, "-")))
  461. {
  462. #if HAVE_LIBEXTRACTOR
  463. plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
  464. if (NULL != ex)
  465. plugins = EXTRACTOR_plugin_add_config (plugins,
  466. ex,
  467. EXTRACTOR_OPTION_DEFAULT_POLICY);
  468. #endif
  469. }
  470. /* scan tree to find out how much work there is to be done */
  471. if (GNUNET_OK != preprocess_file (filename_expanded, &root))
  472. {
  473. (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
  474. #if HAVE_LIBEXTRACTOR
  475. EXTRACTOR_plugin_remove_all (plugins);
  476. #endif
  477. return 2;
  478. }
  479. /* signal that we're done counting files, so that a percentage of
  480. progress can now be calculated */
  481. if (GNUNET_OK !=
  482. write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE,
  483. NULL,
  484. 0))
  485. {
  486. #if HAVE_LIBEXTRACTOR
  487. EXTRACTOR_plugin_remove_all (plugins);
  488. #endif
  489. return 3;
  490. }
  491. if (NULL != root)
  492. {
  493. if (GNUNET_OK != extract_files (root))
  494. {
  495. (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR,
  496. NULL,
  497. 0);
  498. free_tree (root);
  499. #if HAVE_LIBEXTRACTOR
  500. EXTRACTOR_plugin_remove_all (plugins);
  501. #endif
  502. return 4;
  503. }
  504. free_tree (root);
  505. }
  506. /* enable "clean" shutdown by telling parent that we are done */
  507. (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED,
  508. NULL,
  509. 0);
  510. #if HAVE_LIBEXTRACTOR
  511. EXTRACTOR_plugin_remove_all (plugins);
  512. #endif
  513. return 0;
  514. }
  515. /* end of gnunet-helper-fs-publish.c */