123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343 |
- From: Alexander Duyck <alexander.h.duyck@redhat.com>
- Date: Wed, 31 Dec 2014 10:55:54 -0800
- Subject: [PATCH] fib_trie: Optimize fib_table_lookup to avoid wasting
- time on loops/variables
- This patch is meant to reduce the complexity of fib_table_lookup by reducing
- the number of variables to the bare minimum while still keeping the same if
- not improved functionality versus the original.
- Most of this change was started off by the desire to rid the function of
- chopped_off and current_prefix_length as they actually added very little to
- the function since they only applied when computing the cindex. I was able
- to replace them mostly with just a check for the prefix match. As long as
- the prefix between the key and the node being tested was the same we know
- we can search the tnode fully versus just testing cindex 0.
- The second portion of the change ended up being a massive reordering.
- Originally the calls to check_leaf were up near the start of the loop, and
- the backtracing and descending into lower levels of tnodes was later. This
- didn't make much sense as the structure of the tree means the leaves are
- always the last thing to be tested. As such I reordered things so that we
- instead have a loop that will delve into the tree and only exit when we
- have either found a leaf or we have exhausted the tree. The advantage of
- rearranging things like this is that we can fully inline check_leaf since
- there is now only one reference to it in the function.
- Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
- Signed-off-by: David S. Miller <davem@davemloft.net>
- ---
- --- a/net/ipv4/fib_trie.c
- +++ b/net/ipv4/fib_trie.c
- @@ -90,6 +90,9 @@ typedef unsigned int t_key;
- #define IS_TNODE(n) ((n)->bits)
- #define IS_LEAF(n) (!(n)->bits)
-
- +#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
- +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
- +
- struct tnode {
- t_key key;
- unsigned char bits; /* 2log(KEYLENGTH) bits needed */
- @@ -1281,7 +1284,7 @@ static int check_leaf(struct fib_table *
- continue;
- fib_alias_accessed(fa);
- err = fib_props[fa->fa_type].error;
- - if (err) {
- + if (unlikely(err < 0)) {
- #ifdef CONFIG_IP_FIB_TRIE_STATS
- this_cpu_inc(t->stats->semantic_match_passed);
- #endif
- @@ -1303,7 +1306,7 @@ static int check_leaf(struct fib_table *
- res->prefixlen = li->plen;
- res->nh_sel = nhsel;
- res->type = fa->fa_type;
- - res->scope = fa->fa_info->fib_scope;
- + res->scope = fi->fib_scope;
- res->fi = fi;
- res->table = tb;
- res->fa_head = &li->falh;
- @@ -1321,23 +1324,24 @@ static int check_leaf(struct fib_table *
- return 1;
- }
-
- +static inline t_key prefix_mismatch(t_key key, struct tnode *n)
- +{
- + t_key prefix = n->key;
- +
- + return (key ^ prefix) & (prefix | -prefix);
- +}
- +
- int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
- struct fib_result *res, int fib_flags)
- {
- - struct trie *t = (struct trie *) tb->tb_data;
- + struct trie *t = (struct trie *)tb->tb_data;
- #ifdef CONFIG_IP_FIB_TRIE_STATS
- struct trie_use_stats __percpu *stats = t->stats;
- #endif
- - int ret;
- - struct tnode *n;
- - struct tnode *pn;
- - unsigned int pos, bits;
- - t_key key = ntohl(flp->daddr);
- - unsigned int chopped_off;
- - t_key cindex = 0;
- - unsigned int current_prefix_length = KEYLENGTH;
- - struct tnode *cn;
- - t_key pref_mismatch;
- + const t_key key = ntohl(flp->daddr);
- + struct tnode *n, *pn;
- + t_key cindex;
- + int ret = 1;
-
- rcu_read_lock();
-
- @@ -1349,170 +1353,102 @@ int fib_table_lookup(struct fib_table *t
- this_cpu_inc(stats->gets);
- #endif
-
- - /* Just a leaf? */
- - if (IS_LEAF(n)) {
- - ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
- - goto found;
- - }
- -
- pn = n;
- - chopped_off = 0;
- -
- - while (pn) {
- - pos = pn->pos;
- - bits = pn->bits;
- + cindex = 0;
-
- - if (!chopped_off)
- - cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
- - pos, bits);
- -
- - n = tnode_get_child_rcu(pn, cindex);
- -
- - if (n == NULL) {
- -#ifdef CONFIG_IP_FIB_TRIE_STATS
- - this_cpu_inc(stats->null_node_hit);
- -#endif
- - goto backtrace;
- - }
- + /* Step 1: Travel to the longest prefix match in the trie */
- + for (;;) {
- + unsigned long index = get_index(key, n);
- +
- + /* This bit of code is a bit tricky but it combines multiple
- + * checks into a single check. The prefix consists of the
- + * prefix plus zeros for the "bits" in the prefix. The index
- + * is the difference between the key and this value. From
- + * this we can actually derive several pieces of data.
- + * if !(index >> bits)
- + * we know the value is child index
- + * else
- + * we have a mismatch in skip bits and failed
- + */
- + if (index >> n->bits)
- + break;
-
- - if (IS_LEAF(n)) {
- - ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
- - if (ret > 0)
- - goto backtrace;
- + /* we have found a leaf. Prefixes have already been compared */
- + if (IS_LEAF(n))
- goto found;
- - }
- -
- - cn = n;
-
- - /*
- - * It's a tnode, and we can do some extra checks here if we
- - * like, to avoid descending into a dead-end branch.
- - * This tnode is in the parent's child array at index
- - * key[p_pos..p_pos+p_bits] but potentially with some bits
- - * chopped off, so in reality the index may be just a
- - * subprefix, padded with zero at the end.
- - * We can also take a look at any skipped bits in this
- - * tnode - everything up to p_pos is supposed to be ok,
- - * and the non-chopped bits of the index (se previous
- - * paragraph) are also guaranteed ok, but the rest is
- - * considered unknown.
- - *
- - * The skipped bits are key[pos+bits..cn->pos].
- - */
- -
- - /* If current_prefix_length < pos+bits, we are already doing
- - * actual prefix matching, which means everything from
- - * pos+(bits-chopped_off) onward must be zero along some
- - * branch of this subtree - otherwise there is *no* valid
- - * prefix present. Here we can only check the skipped
- - * bits. Remember, since we have already indexed into the
- - * parent's child array, we know that the bits we chopped of
- - * *are* zero.
- + /* only record pn and cindex if we are going to be chopping
- + * bits later. Otherwise we are just wasting cycles.
- */
- -
- - /* NOTA BENE: Checking only skipped bits
- - for the new node here */
- -
- - if (current_prefix_length < pos+bits) {
- - if (tkey_extract_bits(cn->key, current_prefix_length,
- - cn->pos - current_prefix_length)
- - || !(cn->child[0]))
- - goto backtrace;
- + if (index) {
- + pn = n;
- + cindex = index;
- }
-
- - /*
- - * If chopped_off=0, the index is fully validated and we
- - * only need to look at the skipped bits for this, the new,
- - * tnode. What we actually want to do is to find out if
- - * these skipped bits match our key perfectly, or if we will
- - * have to count on finding a matching prefix further down,
- - * because if we do, we would like to have some way of
- - * verifying the existence of such a prefix at this point.
- - */
- -
- - /* The only thing we can do at this point is to verify that
- - * any such matching prefix can indeed be a prefix to our
- - * key, and if the bits in the node we are inspecting that
- - * do not match our key are not ZERO, this cannot be true.
- - * Thus, find out where there is a mismatch (before cn->pos)
- - * and verify that all the mismatching bits are zero in the
- - * new tnode's key.
- - */
- + n = rcu_dereference(n->child[index]);
- + if (unlikely(!n))
- + goto backtrace;
- + }
-
- - /*
- - * Note: We aren't very concerned about the piece of
- - * the key that precede pn->pos+pn->bits, since these
- - * have already been checked. The bits after cn->pos
- - * aren't checked since these are by definition
- - * "unknown" at this point. Thus, what we want to see
- - * is if we are about to enter the "prefix matching"
- - * state, and in that case verify that the skipped
- - * bits that will prevail throughout this subtree are
- - * zero, as they have to be if we are to find a
- - * matching prefix.
- + /* Step 2: Sort out leaves and begin backtracing for longest prefix */
- + for (;;) {
- + /* record the pointer where our next node pointer is stored */
- + struct tnode __rcu **cptr = n->child;
- +
- + /* This test verifies that none of the bits that differ
- + * between the key and the prefix exist in the region of
- + * the lsb and higher in the prefix.
- */
- + if (unlikely(prefix_mismatch(key, n)))
- + goto backtrace;
-
- - pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
- + /* exit out and process leaf */
- + if (unlikely(IS_LEAF(n)))
- + break;
-
- - /*
- - * In short: If skipped bits in this node do not match
- - * the search key, enter the "prefix matching"
- - * state.directly.
- + /* Don't bother recording parent info. Since we are in
- + * prefix match mode we will have to come back to wherever
- + * we started this traversal anyway
- */
- - if (pref_mismatch) {
- - /* fls(x) = __fls(x) + 1 */
- - int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
- -
- - if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
- - goto backtrace;
- -
- - if (current_prefix_length >= cn->pos)
- - current_prefix_length = mp;
- - }
- -
- - pn = n; /* Descend */
- - chopped_off = 0;
- - continue;
-
- + while ((n = rcu_dereference(*cptr)) == NULL) {
- backtrace:
- - chopped_off++;
- -
- - /* As zero don't change the child key (cindex) */
- - while ((chopped_off <= pn->bits)
- - && !(cindex & (1<<(chopped_off-1))))
- - chopped_off++;
- -
- - /* Decrease current_... with bits chopped off */
- - if (current_prefix_length > pn->pos + pn->bits - chopped_off)
- - current_prefix_length = pn->pos + pn->bits
- - - chopped_off;
- -
- - /*
- - * Either we do the actual chop off according or if we have
- - * chopped off all bits in this tnode walk up to our parent.
- - */
- -
- - if (chopped_off <= pn->bits) {
- - cindex &= ~(1 << (chopped_off-1));
- - } else {
- - struct tnode *parent = node_parent_rcu(pn);
- - if (!parent)
- - goto failed;
- -
- - /* Get Child's index */
- - cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
- - pn = parent;
- - chopped_off = 0;
- -
- #ifdef CONFIG_IP_FIB_TRIE_STATS
- - this_cpu_inc(stats->backtrack);
- + if (!n)
- + this_cpu_inc(stats->null_node_hit);
- #endif
- - goto backtrace;
- + /* If we are at cindex 0 there are no more bits for
- + * us to strip at this level so we must ascend back
- + * up one level to see if there are any more bits to
- + * be stripped there.
- + */
- + while (!cindex) {
- + t_key pkey = pn->key;
- +
- + pn = node_parent_rcu(pn);
- + if (unlikely(!pn))
- + goto failed;
- +#ifdef CONFIG_IP_FIB_TRIE_STATS
- + this_cpu_inc(stats->backtrack);
- +#endif
- + /* Get Child's index */
- + cindex = get_index(pkey, pn);
- + }
- +
- + /* strip the least significant bit from the cindex */
- + cindex &= cindex - 1;
- +
- + /* grab pointer for next child node */
- + cptr = &pn->child[cindex];
- }
- }
- -failed:
- - ret = 1;
- +
- found:
- + /* Step 3: Process the leaf, if that fails fall back to backtracing */
- + ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
- + if (unlikely(ret > 0))
- + goto backtrace;
- +failed:
- rcu_read_unlock();
- return ret;
- }
|