123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578 |
- /*
- This file is part of GNUnet.
- Copyright (C) 2012 GNUnet e.V.
- GNUnet is free software: you can redistribute it and/or modify it
- under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation, either version 3 of the License,
- or (at your option) any later version.
- GNUnet is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- SPDX-License-Identifier: AGPL3.0-or-later
- */
- /**
- * @file src/fs/gnunet-helper-fs-publish.c
- * @brief Tool to help extract meta data asynchronously
- * @author Christian Grothoff
- *
- * This program will scan a directory for files with meta data
- * and report the results to stdout.
- */
- #include "platform.h"
- #include "gnunet_fs_service.h"
- /**
- * A node of a directory tree.
- */
- struct ScanTreeNode
- {
- /**
- * This is a doubly-linked list
- */
- struct ScanTreeNode *next;
- /**
- * This is a doubly-linked list
- */
- struct ScanTreeNode *prev;
- /**
- * Parent of this node, NULL for top-level entries.
- */
- struct ScanTreeNode *parent;
- /**
- * This is a doubly-linked tree
- * NULL for files and empty directories
- */
- struct ScanTreeNode *children_head;
- /**
- * This is a doubly-linked tree
- * NULL for files and empty directories
- */
- struct ScanTreeNode *children_tail;
- /**
- * Name of the file/directory
- */
- char *filename;
- /**
- * Size of the file (if it is a file), in bytes.
- * At the moment it is set to 0 for directories.
- */
- uint64_t file_size;
- /**
- * #GNUNET_YES if this is a directory
- */
- int is_directory;
- };
- #if HAVE_LIBEXTRACTOR
- /**
- * List of libextractor plugins to use for extracting.
- */
- static struct EXTRACTOR_PluginList *plugins;
- #endif
- /**
- * File descriptor we use for IPC with the parent.
- */
- static int output_stream;
- #if HAVE_LIBEXTRACTOR
- /**
- * Add meta data that libextractor finds to our meta data
- * container.
- *
- * @param cls closure, our meta data container
- * @param plugin_name name of the plugin that produced this value;
- * special values can be used (i.e. '<zlib>' for zlib being
- * used in the main libextractor library and yielding
- * meta data).
- * @param type libextractor-type describing the meta data
- * @param format basic format information about data
- * @param data_mime_type mime-type of data (not of the original file);
- * can be NULL (if mime-type is not known)
- * @param data actual meta-data found
- * @param data_len number of bytes in @a data
- * @return always 0 to continue extracting
- */
- static int
- add_to_md (void *cls,
- const char *plugin_name,
- enum EXTRACTOR_MetaType type,
- enum EXTRACTOR_MetaFormat format,
- const char *data_mime_type,
- const char *data,
- size_t data_len)
- {
- struct GNUNET_CONTAINER_MetaData *md = cls;
- if (((EXTRACTOR_METAFORMAT_UTF8 == format) ||
- (EXTRACTOR_METAFORMAT_C_STRING == format)) &&
- ('\0' != data[data_len - 1]))
- {
- char zdata[data_len + 1];
- GNUNET_memcpy (zdata, data, data_len);
- zdata[data_len] = '\0';
- (void) GNUNET_CONTAINER_meta_data_insert (md,
- plugin_name,
- type,
- format,
- data_mime_type,
- zdata,
- data_len + 1);
- }
- else
- {
- (void) GNUNET_CONTAINER_meta_data_insert (md,
- plugin_name,
- type,
- format,
- data_mime_type,
- data,
- data_len);
- }
- return 0;
- }
- #endif
- /**
- * Free memory of the @a tree structure
- *
- * @param tree tree to free
- */
- static void
- free_tree (struct ScanTreeNode *tree)
- {
- struct ScanTreeNode *pos;
- while (NULL != (pos = tree->children_head))
- free_tree (pos);
- if (NULL != tree->parent)
- GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
- tree->parent->children_tail,
- tree);
- GNUNET_free (tree->filename);
- GNUNET_free (tree);
- }
- /**
- * Write @a size bytes from @a buf into the #output_stream.
- *
- * @param buf buffer with data to write
- * @param size number of bytes to write
- * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
- */
- static int
- write_all (const void *buf, size_t size)
- {
- const char *cbuf = buf;
- size_t total;
- ssize_t wr;
- total = 0;
- do
- {
- wr = write (output_stream, &cbuf[total], size - total);
- if (wr > 0)
- total += wr;
- }
- while ((wr > 0) && (total < size));
- if (wr <= 0)
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
- "Failed to write to stdout: %s\n",
- strerror (errno));
- return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
- }
- /**
- * Write message to the master process.
- *
- * @param message_type message type to use
- * @param data data to append, NULL for none
- * @param data_length number of bytes in @a data
- * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
- */
- static int
- write_message (uint16_t message_type, const char *data, size_t data_length)
- {
- struct GNUNET_MessageHeader hdr;
- #if 0
- fprintf (stderr,
- "Helper sends %u-byte message of type %u\n",
- (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length),
- (unsigned int) message_type);
- #endif
- hdr.type = htons (message_type);
- hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length);
- if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) ||
- (GNUNET_OK != write_all (data, data_length)))
- return GNUNET_SYSERR;
- return GNUNET_OK;
- }
- /**
- * Function called to (recursively) add all of the files in the
- * directory to the tree. Called by the directory scanner to initiate
- * the scan. Does NOT yet add any metadata.
- *
- * @param filename file or directory to scan
- * @param dst where to store the resulting share tree item;
- * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
- * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
- */
- static int
- preprocess_file (const char *filename, struct ScanTreeNode **dst);
- /**
- * Closure for the 'scan_callback'
- */
- struct RecursionContext
- {
- /**
- * Parent to add the files to.
- */
- struct ScanTreeNode *parent;
- /**
- * Flag to set to GNUNET_YES on serious errors.
- */
- int stop;
- };
- /**
- * Function called by the directory iterator to (recursively) add all
- * of the files in the directory to the tree. Called by the directory
- * scanner to initiate the scan. Does NOT yet add any metadata.
- *
- * @param cls the `struct RecursionContext`
- * @param filename file or directory to scan
- * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
- */
- static int
- scan_callback (void *cls, const char *filename)
- {
- struct RecursionContext *rc = cls;
- struct ScanTreeNode *chld;
- if (GNUNET_OK != preprocess_file (filename, &chld))
- {
- rc->stop = GNUNET_YES;
- return GNUNET_SYSERR;
- }
- if (NULL == chld)
- return GNUNET_OK;
- chld->parent = rc->parent;
- GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
- rc->parent->children_tail,
- chld);
- return GNUNET_OK;
- }
- /**
- * Function called to (recursively) add all of the files in the
- * directory to the tree. Called by the directory scanner to initiate
- * the scan. Does NOT yet add any metadata.
- *
- * @param filename file or directory to scan
- * @param dst where to store the resulting share tree item;
- * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
- * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
- */
- static int
- preprocess_file (const char *filename, struct ScanTreeNode **dst)
- {
- struct ScanTreeNode *item;
- struct stat sbuf;
- uint64_t fsize = 0;
- if ((0 != stat (filename, &sbuf)) ||
- ((! S_ISDIR (sbuf.st_mode)) &&
- (GNUNET_OK !=
- GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES))))
- {
- /* If the file doesn't exist (or is not stat-able for any other reason)
- skip it (but report it), but do continue. */
- if (GNUNET_OK !=
- write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
- filename,
- strlen (filename) + 1))
- return GNUNET_SYSERR;
- /* recoverable error, store 'NULL' in *dst */
- *dst = NULL;
- return GNUNET_OK;
- }
- /* Report the progress */
- if (
- GNUNET_OK !=
- write_message (S_ISDIR (sbuf.st_mode)
- ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
- : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
- filename,
- strlen (filename) + 1))
- return GNUNET_SYSERR;
- item = GNUNET_new (struct ScanTreeNode);
- item->filename = GNUNET_strdup (filename);
- item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
- item->file_size = fsize;
- if (GNUNET_YES == item->is_directory)
- {
- struct RecursionContext rc;
- rc.parent = item;
- rc.stop = GNUNET_NO;
- GNUNET_DISK_directory_scan (filename, &scan_callback, &rc);
- if (
- (GNUNET_YES == rc.stop) ||
- (GNUNET_OK !=
- write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
- "..",
- 3)))
- {
- free_tree (item);
- return GNUNET_SYSERR;
- }
- }
- *dst = item;
- return GNUNET_OK;
- }
- /**
- * Extract metadata from files.
- *
- * @param item entry we are processing
- * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors
- */
- static int
- extract_files (struct ScanTreeNode *item)
- {
- struct GNUNET_CONTAINER_MetaData *meta;
- ssize_t size;
- size_t slen;
- if (GNUNET_YES == item->is_directory)
- {
- /* for directories, we simply only descent, no extraction, no
- progress reporting */
- struct ScanTreeNode *pos;
- for (pos = item->children_head; NULL != pos; pos = pos->next)
- if (GNUNET_OK != extract_files (pos))
- return GNUNET_SYSERR;
- return GNUNET_OK;
- }
- /* this is the expensive operation, *afterwards* we'll check for aborts */
- meta = GNUNET_CONTAINER_meta_data_create ();
- #if HAVE_LIBEXTRACTOR
- EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
- #endif
- slen = strlen (item->filename) + 1;
- size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta);
- if (-1 == size)
- {
- /* no meta data */
- GNUNET_CONTAINER_meta_data_destroy (meta);
- if (GNUNET_OK !=
- write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
- item->filename,
- slen))
- return GNUNET_SYSERR;
- return GNUNET_OK;
- }
- else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen))
- {
- /* We can't transfer more than 64k bytes in one message. */
- size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen;
- }
- {
- char buf[size + slen];
- char *dst = &buf[slen];
- GNUNET_memcpy (buf, item->filename, slen);
- size = GNUNET_CONTAINER_meta_data_serialize (
- meta,
- &dst,
- size,
- GNUNET_CONTAINER_META_DATA_SERIALIZE_PART);
- if (size < 0)
- {
- GNUNET_break (0);
- size = 0;
- }
- GNUNET_CONTAINER_meta_data_destroy (meta);
- if (GNUNET_OK !=
- write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
- buf,
- slen + size))
- return GNUNET_SYSERR;
- }
- return GNUNET_OK;
- }
- /**
- * Install a signal handler to ignore SIGPIPE.
- */
- static void
- ignore_sigpipe ()
- {
- struct sigaction oldsig;
- struct sigaction sig;
- memset (&sig, 0, sizeof(struct sigaction));
- sig.sa_handler = SIG_IGN;
- sigemptyset (&sig.sa_mask);
- #ifdef SA_INTERRUPT
- sig.sa_flags = SA_INTERRUPT; /* SunOS */
- #else
- sig.sa_flags = SA_RESTART;
- #endif
- if (0 != sigaction (SIGPIPE, &sig, &oldsig))
- fprintf (stderr,
- "Failed to install SIGPIPE handler: %s\n",
- strerror (errno));
- }
- /**
- * Turn the given file descriptor in to '/dev/null'.
- *
- * @param fd fd to bind to /dev/null
- * @param flags flags to use (O_RDONLY or O_WRONLY)
- */
- static void
- make_dev_zero (int fd, int flags)
- {
- int z;
- GNUNET_assert (0 == close (fd));
- z = open ("/dev/null", flags);
- GNUNET_assert (-1 != z);
- if (z == fd)
- return;
- GNUNET_break (fd == dup2 (z, fd));
- GNUNET_assert (0 == close (z));
- }
- /**
- * Main function of the helper process to extract meta data.
- *
- * @param argc should be 3
- * @param argv [0] our binary name
- * [1] name of the file or directory to process
- * [2] "-" to disable extraction, NULL for defaults,
- * otherwise custom plugins to load from LE
- * @return 0 on success
- */
- int
- main (int argc, char *const *argv)
- {
- const char *filename_expanded;
- const char *ex;
- struct ScanTreeNode *root;
- ignore_sigpipe ();
- /* move stdout to some other FD for IPC, bind
- stdout/stderr to /dev/null */
- output_stream = dup (1);
- make_dev_zero (1, O_WRONLY);
- make_dev_zero (2, O_WRONLY);
- /* parse command line */
- if ((3 != argc) && (2 != argc))
- {
- fprintf (stderr,
- "%s",
- "gnunet-helper-fs-publish needs exactly one or two arguments\n");
- return 1;
- }
- filename_expanded = argv[1];
- ex = argv[2];
- if ((NULL == ex) || (0 != strcmp (ex, "-")))
- {
- #if HAVE_LIBEXTRACTOR
- plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
- if (NULL != ex)
- plugins = EXTRACTOR_plugin_add_config (plugins,
- ex,
- EXTRACTOR_OPTION_DEFAULT_POLICY);
- #endif
- }
- /* scan tree to find out how much work there is to be done */
- if (GNUNET_OK != preprocess_file (filename_expanded, &root))
- {
- (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
- #if HAVE_LIBEXTRACTOR
- EXTRACTOR_plugin_remove_all (plugins);
- #endif
- return 2;
- }
- /* signal that we're done counting files, so that a percentage of
- progress can now be calculated */
- if (GNUNET_OK !=
- write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE,
- NULL,
- 0))
- {
- #if HAVE_LIBEXTRACTOR
- EXTRACTOR_plugin_remove_all (plugins);
- #endif
- return 3;
- }
- if (NULL != root)
- {
- if (GNUNET_OK != extract_files (root))
- {
- (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR,
- NULL,
- 0);
- free_tree (root);
- #if HAVE_LIBEXTRACTOR
- EXTRACTOR_plugin_remove_all (plugins);
- #endif
- return 4;
- }
- free_tree (root);
- }
- /* enable "clean" shutdown by telling parent that we are done */
- (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED,
- NULL,
- 0);
- #if HAVE_LIBEXTRACTOR
- EXTRACTOR_plugin_remove_all (plugins);
- #endif
- return 0;
- }
- /* end of gnunet-helper-fs-publish.c */
|