123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531 |
- Subject: netfilter: conntrack: cache route for forwarded connections
- ... to avoid per-packet FIB lookup if possible.
- The cached dst is re-used provided the input interface
- is the same as that of the previous packet in the same direction.
- If not, the cached dst is invalidated.
- For ipv6 we also need to store sernum, else dst_check doesn't work,
- pointed out by Eric Dumazet.
- This should speed up forwarding when conntrack is already in use
- anyway, especially when using reverse path filtering -- active RPF
- enforces two FIB lookups for each packet.
- Before the routing cache removal this didn't matter since RPF was performed
- only when route cache didn't yield a result; but without route cache it
- comes at higher price.
- Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
- avoid holding on to dsts of 'frozen' conntracks.
- Signed-off-by: Florian Westphal <fw@strlen.de>
- --- a/include/net/netfilter/nf_conntrack_extend.h
- +++ b/include/net/netfilter/nf_conntrack_extend.h
- @@ -30,6 +30,9 @@ enum nf_ct_ext_id {
- #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
- NF_CT_EXT_SYNPROXY,
- #endif
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
- + NF_CT_EXT_RTCACHE,
- +#endif
- NF_CT_EXT_NUM,
- };
-
- @@ -43,6 +46,7 @@ enum nf_ct_ext_id {
- #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
- #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
- #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
- +#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
-
- /* Extensions: optional stuff which isn't permanently in struct. */
- struct nf_ct_ext {
- --- /dev/null
- +++ b/include/net/netfilter/nf_conntrack_rtcache.h
- @@ -0,0 +1,34 @@
- +#include <linux/gfp.h>
- +#include <net/netfilter/nf_conntrack.h>
- +#include <net/netfilter/nf_conntrack_extend.h>
- +
- +struct dst_entry;
- +
- +struct nf_conn_dst_cache {
- + struct dst_entry *dst;
- + int iif;
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- + u32 cookie;
- +#endif
- +
- +};
- +
- +struct nf_conn_rtcache {
- + struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
- +};
- +
- +static inline
- +struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
- +{
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
- + return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
- +#else
- + return NULL;
- +#endif
- +}
- +
- +static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
- + enum ip_conntrack_dir dir)
- +{
- + return rtc->cached_dst[dir].iif;
- +}
- --- a/net/netfilter/Kconfig
- +++ b/net/netfilter/Kconfig
- @@ -114,6 +114,18 @@ config NF_CONNTRACK_EVENTS
-
- If unsure, say `N'.
-
- +config NF_CONNTRACK_RTCACHE
- + tristate "Cache route entries in conntrack objects"
- + depends on NETFILTER_ADVANCED
- + depends on NF_CONNTRACK
- + help
- + If this option is enabled, the connection tracking code will
- + cache routing information for each connection that is being
- + forwarded, at a cost of 32 bytes per conntrack object.
- +
- + To compile it as a module, choose M here. If unsure, say N.
- + The module will be called nf_conntrack_rtcache.
- +
- config NF_CONNTRACK_TIMEOUT
- bool 'Connection tracking timeout'
- depends on NETFILTER_ADVANCED
- --- a/net/netfilter/Makefile
- +++ b/net/netfilter/Makefile
- @@ -16,6 +16,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
- # connection tracking
- obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
-
- +# optional conntrack route cache extension
- +obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
- +
- # SCTP protocol connection tracking
- obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
- obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
- --- /dev/null
- +++ b/net/netfilter/nf_conntrack_rtcache.c
- @@ -0,0 +1,413 @@
- +/* route cache for netfilter.
- + *
- + * (C) 2014 Red Hat GmbH
- + *
- + * This program is free software; you can redistribute it and/or modify
- + * it under the terms of the GNU General Public License version 2 as
- + * published by the Free Software Foundation.
- + */
- +
- +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- +
- +#include <linux/types.h>
- +#include <linux/netfilter.h>
- +#include <linux/skbuff.h>
- +#include <linux/stddef.h>
- +#include <linux/kernel.h>
- +#include <linux/netdevice.h>
- +#include <linux/export.h>
- +#include <linux/module.h>
- +
- +#include <net/dst.h>
- +
- +#include <net/netfilter/nf_conntrack.h>
- +#include <net/netfilter/nf_conntrack_core.h>
- +#include <net/netfilter/nf_conntrack_extend.h>
- +#include <net/netfilter/nf_conntrack_rtcache.h>
- +
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- +#include <net/ip6_fib.h>
- +#endif
- +
- +static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
- + enum ip_conntrack_dir dir)
- +{
- + struct dst_entry *dst = rtc->cached_dst[dir].dst;
- +
- + dst_release(dst);
- +}
- +
- +static void nf_conn_rtcache_destroy(struct nf_conn *ct)
- +{
- + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
- +
- + if (!rtc)
- + return;
- +
- + __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
- + __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
- +}
- +
- +static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
- +{
- + struct nf_conn_rtcache *rtc;
- +
- + rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
- + if (rtc) {
- + rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
- + rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
- + rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
- + rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
- + }
- +}
- +
- +static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
- +{
- + if (nf_ct_is_untracked(ct))
- + return NULL;
- + return nf_ct_rtcache_find(ct);
- +}
- +
- +static struct dst_entry *
- +nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
- + enum ip_conntrack_dir dir)
- +{
- + return rtc->cached_dst[dir].dst;
- +}
- +
- +static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
- +{
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- + if (pf == NFPROTO_IPV6) {
- + const struct rt6_info *rt = (const struct rt6_info *)dst;
- +
- + if (rt->rt6i_node)
- + return (u32)rt->rt6i_node->fn_sernum;
- + }
- +#endif
- + return 0;
- +}
- +
- +static void nf_conn_rtcache_dst_set(int pf,
- + struct nf_conn_rtcache *rtc,
- + struct dst_entry *dst,
- + enum ip_conntrack_dir dir, int iif)
- +{
- + if (rtc->cached_dst[dir].iif != iif)
- + rtc->cached_dst[dir].iif = iif;
- +
- + if (rtc->cached_dst[dir].dst != dst) {
- + struct dst_entry *old;
- +
- + dst_hold(dst);
- +
- + old = xchg(&rtc->cached_dst[dir].dst, dst);
- + dst_release(old);
- +
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- + if (pf == NFPROTO_IPV6)
- + rtc->cached_dst[dir].cookie =
- + nf_rtcache_get_cookie(pf, dst);
- +#endif
- + }
- +}
- +
- +static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
- + enum ip_conntrack_dir dir)
- +{
- + struct dst_entry *old;
- +
- + pr_debug("Invalidate iif %d for dir %d on cache %p\n",
- + rtc->cached_dst[dir].iif, dir, rtc);
- +
- + old = xchg(&rtc->cached_dst[dir].dst, NULL);
- + dst_release(old);
- + rtc->cached_dst[dir].iif = -1;
- +}
- +
- +static unsigned int nf_rtcache_in(u_int8_t pf,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + struct nf_conn_rtcache *rtc;
- + enum ip_conntrack_info ctinfo;
- + enum ip_conntrack_dir dir;
- + struct dst_entry *dst;
- + struct nf_conn *ct;
- + int iif;
- + u32 cookie;
- +
- + if (skb_dst(skb) || skb->sk)
- + return NF_ACCEPT;
- +
- + ct = nf_ct_get(skb, &ctinfo);
- + if (!ct)
- + return NF_ACCEPT;
- +
- + rtc = nf_ct_rtcache_find_usable(ct);
- + if (!rtc)
- + return NF_ACCEPT;
- +
- + /* if iif changes, don't use cache and let ip stack
- + * do route lookup.
- + *
- + * If rp_filter is enabled it might toss skb, so
- + * we don't want to avoid these checks.
- + */
- + dir = CTINFO2DIR(ctinfo);
- + iif = nf_conn_rtcache_iif_get(rtc, dir);
- + if (state->in->ifindex != iif) {
- + pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
- + ct, iif, state->in->ifindex);
- + return NF_ACCEPT;
- + }
- + dst = nf_conn_rtcache_dst_get(rtc, dir);
- + if (dst == NULL)
- + return NF_ACCEPT;
- +
- + cookie = nf_rtcache_get_cookie(pf, dst);
- +
- + dst = dst_check(dst, cookie);
- + pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
- + if (likely(dst))
- + skb_dst_set_noref(skb, dst);
- + else
- + nf_conn_rtcache_dst_obsolete(rtc, dir);
- +
- + return NF_ACCEPT;
- +}
- +
- +static unsigned int nf_rtcache_forward(u_int8_t pf,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + struct nf_conn_rtcache *rtc;
- + enum ip_conntrack_info ctinfo;
- + enum ip_conntrack_dir dir;
- + struct nf_conn *ct;
- + struct dst_entry *dst = skb_dst(skb);
- + int iif;
- +
- + ct = nf_ct_get(skb, &ctinfo);
- + if (!ct)
- + return NF_ACCEPT;
- +
- + if (dst && dst_xfrm(dst))
- + return NF_ACCEPT;
- +
- + if (!nf_ct_is_confirmed(ct)) {
- + if (WARN_ON(nf_ct_rtcache_find(ct)))
- + return NF_ACCEPT;
- + nf_ct_rtcache_ext_add(ct);
- + return NF_ACCEPT;
- + }
- +
- + rtc = nf_ct_rtcache_find_usable(ct);
- + if (!rtc)
- + return NF_ACCEPT;
- +
- + dir = CTINFO2DIR(ctinfo);
- + iif = nf_conn_rtcache_iif_get(rtc, dir);
- + pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
- + ct, skb, dir, iif, state->in->ifindex);
- + if (likely(state->in->ifindex == iif))
- + return NF_ACCEPT;
- +
- + nf_conn_rtcache_dst_set(pf, rtc, skb_dst(skb), dir, state->in->ifindex);
- + return NF_ACCEPT;
- +}
- +
- +static unsigned int nf_rtcache_in4(void *priv,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + return nf_rtcache_in(NFPROTO_IPV4, skb, state);
- +}
- +
- +static unsigned int nf_rtcache_forward4(void *priv,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + return nf_rtcache_forward(NFPROTO_IPV4, skb, state);
- +}
- +
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- +static unsigned int nf_rtcache_in6(void *priv,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + return nf_rtcache_in(NFPROTO_IPV6, skb, state);
- +}
- +
- +static unsigned int nf_rtcache_forward6(void *priv,
- + struct sk_buff *skb,
- + const struct nf_hook_state *state)
- +{
- + return nf_rtcache_forward(NFPROTO_IPV6, skb, state);
- +}
- +#endif
- +
- +static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
- +{
- + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
- + struct net_device *dev = data;
- +
- + if (!rtc)
- + return 0;
- +
- + if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
- + dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
- + nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
- + nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
- + }
- +
- + return 0;
- +}
- +
- +static int nf_rtcache_netdev_event(struct notifier_block *this,
- + unsigned long event, void *ptr)
- +{
- + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- + struct net *net = dev_net(dev);
- +
- + if (event == NETDEV_DOWN)
- + nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0);
- +
- + return NOTIFY_DONE;
- +}
- +
- +static struct notifier_block nf_rtcache_notifier = {
- + .notifier_call = nf_rtcache_netdev_event,
- +};
- +
- +static struct nf_hook_ops rtcache_ops[] = {
- + {
- + .hook = nf_rtcache_in4,
- + .pf = NFPROTO_IPV4,
- + .hooknum = NF_INET_PRE_ROUTING,
- + .priority = NF_IP_PRI_LAST,
- + },
- + {
- + .hook = nf_rtcache_forward4,
- + .pf = NFPROTO_IPV4,
- + .hooknum = NF_INET_FORWARD,
- + .priority = NF_IP_PRI_LAST,
- + },
- +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
- + {
- + .hook = nf_rtcache_in6,
- + .pf = NFPROTO_IPV6,
- + .hooknum = NF_INET_PRE_ROUTING,
- + .priority = NF_IP_PRI_LAST,
- + },
- + {
- + .hook = nf_rtcache_forward6,
- + .pf = NFPROTO_IPV6,
- + .hooknum = NF_INET_FORWARD,
- + .priority = NF_IP_PRI_LAST,
- + },
- +#endif
- +};
- +
- +static struct nf_ct_ext_type rtcache_extend __read_mostly = {
- + .len = sizeof(struct nf_conn_rtcache),
- + .align = __alignof__(struct nf_conn_rtcache),
- + .id = NF_CT_EXT_RTCACHE,
- + .destroy = nf_conn_rtcache_destroy,
- +};
- +
- +static int __init nf_conntrack_rtcache_init(void)
- +{
- + int ret = nf_ct_extend_register(&rtcache_extend);
- +
- + if (ret < 0) {
- + pr_err("nf_conntrack_rtcache: Unable to register extension\n");
- + return ret;
- + }
- +
- + ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
- + if (ret < 0) {
- + nf_ct_extend_unregister(&rtcache_extend);
- + return ret;
- + }
- +
- + ret = register_netdevice_notifier(&nf_rtcache_notifier);
- + if (ret) {
- + nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
- + nf_ct_extend_unregister(&rtcache_extend);
- + }
- +
- + return ret;
- +}
- +
- +static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
- +{
- + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
- +
- + return rtc != NULL;
- +}
- +
- +static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
- +{
- + bool wait = false;
- + int cpu;
- +
- + for_each_possible_cpu(cpu) {
- + struct nf_conntrack_tuple_hash *h;
- + struct hlist_nulls_node *n;
- + struct nf_conn *ct;
- + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
- +
- + rcu_read_lock();
- + spin_lock_bh(&pcpu->lock);
- +
- + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
- + ct = nf_ct_tuplehash_to_ctrack(h);
- + if (nf_ct_rtcache_find(ct) != NULL) {
- + wait = true;
- + break;
- + }
- + }
- + spin_unlock_bh(&pcpu->lock);
- + rcu_read_unlock();
- + }
- +
- + return wait;
- +}
- +
- +static void __exit nf_conntrack_rtcache_fini(void)
- +{
- + struct net *net;
- + int count = 0;
- +
- + /* remove hooks so no new connections get rtcache extension */
- + nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
- +
- + synchronize_net();
- +
- + unregister_netdevice_notifier(&nf_rtcache_notifier);
- +
- + rtnl_lock();
- +
- + /* zap all conntracks with rtcache extension */
- + for_each_net(net)
- + nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0);
- +
- + for_each_net(net) {
- + /* .. and make sure they're gone from dying list, too */
- + while (nf_conntrack_rtcache_wait_for_dying(net)) {
- + msleep(200);
- + WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
- + }
- + }
- +
- + rtnl_unlock();
- + synchronize_net();
- + nf_ct_extend_unregister(&rtcache_extend);
- +}
- +module_init(nf_conntrack_rtcache_init);
- +module_exit(nf_conntrack_rtcache_fini);
- +
- +MODULE_LICENSE("GPL");
- +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
- +MODULE_DESCRIPTION("Conntrack route cache extension");
|