123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455 |
- /*
- This file is part of GNUnet
- Copyright (C) 2005-2012 GNUnet e.V.
- GNUnet is free software: you can redistribute it and/or modify it
- under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation, either version 3 of the License,
- or (at your option) any later version.
- GNUnet is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- SPDX-License-Identifier: AGPL3.0-or-later
- */
- /**
- * @file fs/fs_sharetree.c
- * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
- * @author LRN
- * @author Christian Grothoff
- */
- #include "platform.h"
- #include "gnunet_fs_service.h"
- #include "gnunet_scheduler_lib.h"
- #include <pthread.h>
- /**
- * Entry for each unique keyword to track how often
- * it occured. Contains the keyword and the counter.
- */
- struct KeywordCounter
- {
- /**
- * This is a doubly-linked list
- */
- struct KeywordCounter *prev;
- /**
- * This is a doubly-linked list
- */
- struct KeywordCounter *next;
- /**
- * Keyword that was found.
- */
- const char *value;
- /**
- * How many files have this keyword?
- */
- unsigned int count;
- };
- /**
- * Aggregate information we keep for meta data in each directory.
- */
- struct MetaCounter
- {
- /**
- * This is a doubly-linked list
- */
- struct MetaCounter *prev;
- /**
- * This is a doubly-linked list
- */
- struct MetaCounter *next;
- /**
- * Name of the plugin that provided that piece of metadata
- */
- const char *plugin_name;
- /**
- * MIME-type of the metadata itself
- */
- const char *data_mime_type;
- /**
- * The actual meta data.
- */
- const char *data;
- /**
- * Number of bytes in 'data'.
- */
- size_t data_size;
- /**
- * Type of the data
- */
- enum EXTRACTOR_MetaType type;
- /**
- * Format of the data
- */
- enum EXTRACTOR_MetaFormat format;
- /**
- * How many files have meta entries matching this value?
- * (type and format do not have to match).
- */
- unsigned int count;
- };
- /**
- * A structure that forms a singly-linked list that serves as a stack
- * for metadata-processing function.
- */
- struct TrimContext
- {
- /**
- * Map from the hash over the keyword to an 'struct KeywordCounter *'
- * counter that says how often this keyword was
- * encountered in the current directory.
- */
- struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
- /**
- * Map from the hash over the metadata to an 'struct MetaCounter *'
- * counter that says how often this metadata was
- * encountered in the current directory.
- */
- struct GNUNET_CONTAINER_MultiHashMap *metacounter;
- /**
- * Position we are currently manipulating.
- */
- struct GNUNET_FS_ShareTreeItem *pos;
- /**
- * Number of times an item has to be found to be moved to the parent.
- */
- unsigned int move_threshold;
- };
- /**
- * Add the given keyword to the keyword statistics tracker.
- *
- * @param cls the multihashmap we store the keyword counters in
- * @param keyword the keyword to count
- * @param is_mandatory ignored
- * @return always GNUNET_OK
- */
- static int
- add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
- {
- struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
- struct KeywordCounter *cnt;
- struct GNUNET_HashCode hc;
- size_t klen;
- klen = strlen (keyword) + 1;
- GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
- cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
- if (cnt == NULL)
- {
- cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
- cnt->value = (const char *) &cnt[1];
- GNUNET_memcpy (&cnt[1], keyword, klen);
- GNUNET_assert (GNUNET_OK ==
- GNUNET_CONTAINER_multihashmap_put (mcm,
- &hc, cnt,
- GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
- }
- cnt->count++;
- return GNUNET_OK;
- }
- /**
- * Function called on each meta data item. Increments the
- * respective counter.
- *
- * @param cls the container multihashmap to update
- * @param plugin_name name of the plugin that produced this value;
- * special values can be used (i.e. '<zlib>' for zlib being
- * used in the main libextractor library and yielding
- * meta data).
- * @param type libextractor-type describing the meta data
- * @param format basic format information about data
- * @param data_mime_type mime-type of data (not of the original file);
- * can be NULL (if mime-type is not known)
- * @param data actual meta-data found
- * @param data_len number of bytes in data
- * @return 0 to continue extracting / iterating
- */
- static int
- add_to_meta_counter (void *cls, const char *plugin_name,
- enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat
- format,
- const char *data_mime_type, const char *data, size_t
- data_len)
- {
- struct GNUNET_CONTAINER_MultiHashMap *map = cls;
- struct GNUNET_HashCode key;
- struct MetaCounter *cnt;
- GNUNET_CRYPTO_hash (data, data_len, &key);
- cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
- if (NULL == cnt)
- {
- cnt = GNUNET_new (struct MetaCounter);
- cnt->data = data;
- cnt->data_size = data_len;
- cnt->plugin_name = plugin_name;
- cnt->type = type;
- cnt->format = format;
- cnt->data_mime_type = data_mime_type;
- GNUNET_assert (GNUNET_OK ==
- GNUNET_CONTAINER_multihashmap_put (map,
- &key, cnt,
- GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
- }
- cnt->count++;
- return 0;
- }
- /**
- * Remove keywords above the threshold.
- *
- * @param cls the 'struct TrimContext' with the pos to remove the keywords from
- * @param keyword the keyword to check
- * @param is_mandatory ignored
- * @return always GNUNET_OK
- */
- static int
- remove_high_frequency_keywords (void *cls, const char *keyword, int
- is_mandatory)
- {
- struct TrimContext *tc = cls;
- struct KeywordCounter *counter;
- struct GNUNET_HashCode hc;
- size_t klen;
- klen = strlen (keyword) + 1;
- GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
- counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
- GNUNET_assert (NULL != counter);
- if (counter->count < tc->move_threshold)
- return GNUNET_OK;
- GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
- counter->value);
- return GNUNET_OK;
- }
- /**
- * Move "frequent" keywords over to the target ksk uri, free the
- * counters.
- *
- * @param cls the 'struct TrimContext'
- * @param key key of the entry
- * @param value the 'struct KeywordCounter'
- * @return GNUNET_YES (always)
- */
- static int
- migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key,
- void *value)
- {
- struct TrimContext *tc = cls;
- struct KeywordCounter *counter = value;
- if (counter->count >= tc->move_threshold)
- {
- if (NULL == tc->pos->ksk_uri)
- tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
- &counter->value);
- else
- GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
- GNUNET_NO);
- }
- GNUNET_assert (GNUNET_YES ==
- GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
- key,
- counter));
- GNUNET_free (counter);
- return GNUNET_YES;
- }
- /**
- * Copy "frequent" metadata items over to the
- * target metadata container, free the counters.
- *
- * @param cls the 'struct TrimContext'
- * @param key key of the entry
- * @param value the 'struct KeywordCounter'
- * @return GNUNET_YES (always)
- */
- static int
- migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key,
- void *value)
- {
- struct TrimContext *tc = cls;
- struct MetaCounter *counter = value;
- if (counter->count >= tc->move_threshold)
- {
- if (NULL == tc->pos->meta)
- tc->pos->meta = GNUNET_CONTAINER_meta_data_create ();
- GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
- counter->plugin_name,
- counter->type,
- counter->format,
- counter->data_mime_type, counter->data,
- counter->data_size);
- }
- GNUNET_assert (GNUNET_YES ==
- GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
- key,
- counter));
- GNUNET_free (counter);
- return GNUNET_YES;
- }
- /**
- * Process a share item tree, moving frequent keywords up and
- * copying frequent metadata up.
- *
- * @param tc trim context with hash maps to use
- * @param tree tree to trim
- */
- static void
- share_tree_trim (struct TrimContext *tc,
- struct GNUNET_FS_ShareTreeItem *tree)
- {
- struct GNUNET_FS_ShareTreeItem *pos;
- unsigned int num_children;
- /* first, trim all children */
- num_children = 0;
- for (pos = tree->children_head; NULL != pos; pos = pos->next)
- {
- share_tree_trim (tc, pos);
- num_children++;
- }
- /* consider adding filename to directory meta data */
- if (tree->is_directory == GNUNET_YES)
- {
- const char *user = getenv ("USER");
- if ((user == NULL) ||
- (0 != strncasecmp (user, tree->short_filename, strlen (user))))
- {
- /* only use filename if it doesn't match $USER */
- if (NULL == tree->meta)
- tree->meta = GNUNET_CONTAINER_meta_data_create ();
- GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
- EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain", tree->short_filename,
- strlen (tree->short_filename) + 1);
- }
- }
- if (1 >= num_children)
- return; /* nothing to trim */
- /* now, count keywords and meta data in children */
- for (pos = tree->children_head; NULL != pos; pos = pos->next)
- {
- if (NULL != pos->meta)
- GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter,
- tc->metacounter);
- if (NULL != pos->ksk_uri)
- GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter,
- tc->keywordcounter);
- }
- /* calculate threshold for moving keywords / meta data */
- tc->move_threshold = 1 + (num_children / 2);
- /* remove high-frequency keywords from children */
- for (pos = tree->children_head; NULL != pos; pos = pos->next)
- {
- tc->pos = pos;
- if (NULL != pos->ksk_uri)
- {
- struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
- GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
- &remove_high_frequency_keywords, tc);
- GNUNET_FS_uri_destroy (ksk_uri_copy);
- }
- }
- /* add high-frequency meta data and keywords to parent */
- tc->pos = tree;
- GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
- &migrate_and_drop_keywords,
- tc);
- GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
- &migrate_and_drop_metadata,
- tc);
- }
- /**
- * Process a share item tree, moving frequent keywords up and
- * copying frequent metadata up.
- *
- * @param toplevel toplevel directory in the tree, returned by the scanner
- */
- void
- GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
- {
- struct TrimContext tc;
- if (toplevel == NULL)
- return;
- tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
- tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
- share_tree_trim (&tc, toplevel);
- GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
- GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
- }
- /**
- * Release memory of a share item tree.
- *
- * @param toplevel toplevel of the tree to be freed
- */
- void
- GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
- {
- struct GNUNET_FS_ShareTreeItem *pos;
- while (NULL != (pos = toplevel->children_head))
- GNUNET_FS_share_tree_free (pos);
- if (NULL != toplevel->parent)
- GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
- toplevel->parent->children_tail,
- toplevel);
- if (NULL != toplevel->meta)
- GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
- if (NULL != toplevel->ksk_uri)
- GNUNET_FS_uri_destroy (toplevel->ksk_uri);
- GNUNET_free_non_null (toplevel->filename);
- GNUNET_free_non_null (toplevel->short_filename);
- GNUNET_free (toplevel);
- }
- /* end fs_sharetree.c */
|