123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566 |
- From e3777dd42dc6f1b9cb099836707a3e7971dcf4df Mon Sep 17 00:00:00 2001
- From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
- Date: Wed, 13 Mar 2019 20:54:49 +0000
- Subject: [PATCH] net: sched: Introduce act_ctinfo action
- ctinfo is a new tc filter action module. It is designed to restore DSCPs
- stored in conntrack marks
- The feature is intended for use and has been found useful for restoring
- ingress classifications based on egress classifications across links
- that bleach or otherwise change DSCP, typically home ISP Internet links.
- Restoring DSCP on ingress on the WAN link allows qdiscs such as CAKE to
- shape inbound packets according to policies that are easier to implement
- on egress.
- Ingress classification is traditionally a challenging task since
- iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
- lookups, hence are unable to see internal IPv4 addresses as used on the
- typical home masquerading gateway.
- ctinfo understands the following parameters:
- dscp mask[/statemask]
- mask - a 32 bit mask of at least 6 contiguous bits where conndscp will
- place the DSCP in conntrack mark. The DSCP is left-shifted by the
- number of unset lower bits of the mask before storing into the mark
- field.
- statemask - a 32 bit mask of (usually) 1 bit length, outside the area
- specified by mask. This represents a conditional operation flag the
- DSCP is only restored if the flag is set. This is useful to implement a
- 'one shot' iptables based classification where the 'complicated'
- iptables rules are only run once to classify the connection on initial
- (egress) packet and subsequent packets are all marked/restored with the
- same DSCP. A mask of zero disables the conditional behaviour.
- optional parameters:
- zone - conntrack zone
- control - action related control (reclassify | pipe | drop | continue |
- ok | goto chain <CHAIN_INDEX>
- Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
- ---
- include/net/tc_act/tc_ctinfo.h | 33 +++
- include/uapi/linux/pkt_cls.h | 3 +-
- include/uapi/linux/tc_act/tc_ctinfo.h | 29 ++
- net/sched/Kconfig | 13 +
- net/sched/Makefile | 1 +
- net/sched/act_ctinfo.c | 394 ++++++++++++++++++++++++++
- 6 files changed, 472 insertions(+), 1 deletion(-)
- create mode 100644 include/net/tc_act/tc_ctinfo.h
- create mode 100644 include/uapi/linux/tc_act/tc_ctinfo.h
- create mode 100644 net/sched/act_ctinfo.c
- --- /dev/null
- +++ b/include/net/tc_act/tc_ctinfo.h
- @@ -0,0 +1,33 @@
- +/* SPDX-License-Identifier: GPL-2.0 */
- +#ifndef __NET_TC_CTINFO_H
- +#define __NET_TC_CTINFO_H
- +
- +#include <net/act_api.h>
- +
- +struct tcf_ctinfo_params {
- + struct rcu_head rcu;
- + struct net *net;
- + u32 dscpmask;
- + u32 dscpstatemask;
- + u32 cpmarkmask;
- + u16 zone;
- + u8 mode;
- + u8 dscpmaskshift;
- +};
- +
- +struct tcf_ctinfo {
- + struct tc_action common;
- + struct tcf_ctinfo_params __rcu *params;
- + u64 stats_dscp_set;
- + u64 stats_dscp_error;
- + u64 stats_cpmark_set;
- +};
- +
- +enum {
- + CTINFO_MODE_DSCP = BIT(0),
- + CTINFO_MODE_CPMARK = BIT(1)
- +};
- +
- +#define to_ctinfo(a) ((struct tcf_ctinfo *)a)
- +
- +#endif /* __NET_TC_CTINFO_H */
- --- a/include/uapi/linux/pkt_cls.h
- +++ b/include/uapi/linux/pkt_cls.h
- @@ -66,7 +66,8 @@ enum {
- TCA_ID_UNSPEC=0,
- TCA_ID_POLICE=1,
- /* other actions go here */
- - __TCA_ID_MAX=255
- + TCA_ID_CTINFO=27,
- + __TCA_ID_MAX = 255
- };
-
- #define TCA_ID_MAX __TCA_ID_MAX
- --- /dev/null
- +++ b/include/uapi/linux/tc_act/tc_ctinfo.h
- @@ -0,0 +1,29 @@
- +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
- +#ifndef __UAPI_TC_CTINFO_H
- +#define __UAPI_TC_CTINFO_H
- +
- +#include <linux/types.h>
- +#include <linux/pkt_cls.h>
- +
- +struct tc_ctinfo {
- + tc_gen;
- +};
- +
- +enum {
- + TCA_CTINFO_UNSPEC,
- + TCA_CTINFO_PAD,
- + TCA_CTINFO_TM,
- + TCA_CTINFO_ACT,
- + TCA_CTINFO_ZONE,
- + TCA_CTINFO_PARMS_DSCP_MASK,
- + TCA_CTINFO_PARMS_DSCP_STATEMASK,
- + TCA_CTINFO_PARMS_CPMARK_MASK,
- + TCA_CTINFO_STATS_DSCP_SET,
- + TCA_CTINFO_STATS_DSCP_ERROR,
- + TCA_CTINFO_STATS_CPMARK_SET,
- + __TCA_CTINFO_MAX
- +};
- +
- +#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
- +
- +#endif
- --- a/net/sched/Kconfig
- +++ b/net/sched/Kconfig
- @@ -769,6 +769,19 @@ config NET_ACT_CONNMARK
- To compile this code as a module, choose M here: the
- module will be called act_connmark.
-
- +config NET_ACT_CTINFO
- + tristate "Netfilter Connmark to DSCP Retriever"
- + depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- + depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- + help
- + Say Y here to allow transfer of a connmark stored DSCP into
- + ipv4/v6 diffserv
- +
- + If unsure, say N.
- +
- + To compile this code as a module, choose M here: the
- + module will be called act_ctinfo.
- +
- config NET_ACT_SKBMOD
- tristate "skb data modification action"
- depends on NET_CLS_ACT
- --- a/net/sched/Makefile
- +++ b/net/sched/Makefile
- @@ -21,6 +21,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
- obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
- obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
- obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
- +obj-$(CONFIG_NET_ACT_CTINFO) += act_ctinfo.o
- obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o
- obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
- obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
- --- /dev/null
- +++ b/net/sched/act_ctinfo.c
- @@ -0,0 +1,394 @@
- +// SPDX-License-Identifier: GPL-2.0+
- +/* net/sched/act_ctinfo.c netfilter ctinfo connmark actions
- + *
- + * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
- + */
- +
- +#include <linux/module.h>
- +#include <linux/init.h>
- +#include <linux/kernel.h>
- +#include <linux/skbuff.h>
- +#include <linux/rtnetlink.h>
- +#include <linux/pkt_cls.h>
- +#include <linux/ip.h>
- +#include <linux/ipv6.h>
- +#include <net/netlink.h>
- +#include <net/pkt_sched.h>
- +#include <net/act_api.h>
- +#include <net/pkt_cls.h>
- +#include <uapi/linux/tc_act/tc_ctinfo.h>
- +#include <net/tc_act/tc_ctinfo.h>
- +
- +#include <net/netfilter/nf_conntrack.h>
- +#include <net/netfilter/nf_conntrack_core.h>
- +#include <net/netfilter/nf_conntrack_ecache.h>
- +#include <net/netfilter/nf_conntrack_zones.h>
- +
- +static struct tc_action_ops act_ctinfo_ops;
- +static unsigned int ctinfo_net_id;
- +
- +static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
- + struct tcf_ctinfo_params *cp,
- + struct sk_buff *skb, int wlen, int proto)
- +{
- + u8 dscp,newdscp;
- +
- + newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
- + ~INET_ECN_MASK;
- +
- + /* mark contains DSCP so restore DSCP bits from ct->mark into diffserv */
- + /* using overlimits stats to count how many DSCP updates */
- + switch (proto) {
- + case NFPROTO_IPV4:
- + dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK;
- + if (dscp != newdscp) {
- + if (likely(!skb_try_make_writable(skb, wlen))) {
- + ipv4_change_dsfield(ip_hdr(skb),
- + INET_ECN_MASK,
- + newdscp);
- + ca->stats_dscp_set++;
- + } else {
- + ca->stats_dscp_error++;
- + }
- + }
- + break;
- + case NFPROTO_IPV6:
- + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK;
- + if (dscp != newdscp) {
- + if (likely(!skb_try_make_writable(skb, wlen))) {
- + ipv6_change_dsfield(ipv6_hdr(skb),
- + INET_ECN_MASK,
- + newdscp);
- + ca->stats_dscp_set++;
- + } else {
- + ca->stats_dscp_error++;
- + }
- + }
- + break;
- + default:
- + break;
- + }
- +}
- +
- +static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
- + struct tcf_ctinfo_params *cp,
- + struct sk_buff *skb)
- +{
- + ca->stats_cpmark_set++;
- + skb->mark = ct->mark & cp->cpmarkmask;
- +}
- +
- +static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
- + struct tcf_result *res)
- +{
- + const struct nf_conntrack_tuple_hash *thash = NULL;
- + struct tcf_ctinfo *ca = to_ctinfo(a);
- + struct nf_conntrack_tuple tuple;
- + struct nf_conntrack_zone zone;
- + enum ip_conntrack_info ctinfo;
- + struct tcf_ctinfo_params *cp;
- + struct nf_conn *ct;
- + int proto, wlen;
- + int action;
- +
- + cp = rcu_dereference_bh(ca->params);
- +
- + tcf_lastuse_update(&ca->tcf_tm);
- + bstats_update(&ca->tcf_bstats, skb);
- + action = READ_ONCE(ca->tcf_action);
- +
- + wlen = skb_network_offset(skb);
- + if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
- + wlen += sizeof(struct iphdr);
- + if (!pskb_may_pull(skb, wlen))
- + goto out;
- +
- + proto = NFPROTO_IPV4;
- + } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
- + wlen += sizeof(struct ipv6hdr);
- + if (!pskb_may_pull(skb, wlen))
- + goto out;
- +
- + proto = NFPROTO_IPV6;
- + } else {
- + goto out;
- + }
- +
- + ct = nf_ct_get(skb, &ctinfo);
- + if (!ct) { /* look harder, usually ingress */
- + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- + proto, cp->net, &tuple))
- + goto out;
- + zone.id = cp->zone;
- + zone.dir = NF_CT_DEFAULT_ZONE_DIR;
- +
- + thash = nf_conntrack_find_get(cp->net, &zone, &tuple);
- + if (!thash)
- + goto out;
- +
- + ct = nf_ct_tuplehash_to_ctrack(thash);
- + }
- +
- + if (cp->mode & CTINFO_MODE_DSCP)
- + if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
- + tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
- +
- + if (cp->mode & CTINFO_MODE_CPMARK)
- + tcf_ctinfo_cpmark_set(ct, ca, cp, skb);
- +
- + if (thash)
- + nf_ct_put(ct);
- +out:
- + return action;
- +}
- +
- +static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
- + [TCA_CTINFO_ACT] = { .len = sizeof(struct
- + tc_ctinfo) },
- + [TCA_CTINFO_ZONE] = { .type = NLA_U16 },
- + [TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 },
- + [TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
- + [TCA_CTINFO_PARMS_CPMARK_MASK] = { .type = NLA_U32 },
- +};
- +
- +static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
- + struct nlattr *est, struct tc_action **a,
- + int ovr, int bind)
- +{
- + struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
- + struct nlattr *tb[TCA_CTINFO_MAX + 1];
- + struct tcf_ctinfo_params *cp_new;
- +/* struct tcf_chain *goto_ch = NULL; */
- + u32 dscpmask = 0, dscpstatemask;
- + struct tc_ctinfo *actparm;
- + struct tcf_ctinfo *ci;
- + u8 dscpmaskshift;
- + int ret = 0, err;
- +
- + if (!nla)
- + return -EINVAL;
- +
- + err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, NULL);
- + if (err < 0)
- + return err;
- +
- + if (!tb[TCA_CTINFO_ACT])
- + return -EINVAL;
- + actparm = nla_data(tb[TCA_CTINFO_ACT]);
- +
- + /* do some basic validation here before dynamically allocating things */
- + /* that we would otherwise have to clean up. */
- + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
- + dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]);
- + /* need contiguous 6 bit mask */
- + dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0;
- + if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f)
- + return -EINVAL;
- + dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
- + nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
- + /* mask & statemask must not overlap */
- + if (dscpmask & dscpstatemask)
- + return -EINVAL;
- + }
- + /* done the validation:now to the actual action allocation */
- + err = tcf_idr_check(tn, actparm->index, a, bind);
- + if (!err) {
- + ret = tcf_idr_create(tn, actparm->index, est, a,
- + &act_ctinfo_ops, bind, false);
- + if (ret) {
- + /* tcf_idr_cleanup(tn, actparm->index); */
- + return ret;
- + }
- + ret = ACT_P_CREATED;
- + } else if (err > 0) {
- + if (bind) /* don't override defaults */
- + return 0;
- + if (!ovr) {
- + tcf_idr_release(*a, bind);
- + return -EEXIST;
- + }
- + } else {
- + return err;
- + }
- +
- +/* err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack);
- + if (err < 0)
- + goto release_idr;
- + */
- +
- + ci = to_ctinfo(*a);
- +
- + cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL);
- + if (unlikely(!cp_new)) {
- + err = -ENOMEM;
- + goto put_chain;
- + }
- +
- + cp_new->net = net;
- + cp_new->zone = tb[TCA_CTINFO_ZONE] ?
- + nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
- + if (dscpmask) {
- + cp_new->dscpmask = dscpmask;
- + cp_new->dscpmaskshift = dscpmaskshift;
- + cp_new->dscpstatemask = dscpstatemask;
- + cp_new->mode |= CTINFO_MODE_DSCP;
- + }
- +
- + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
- + cp_new->cpmarkmask = nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
- + cp_new->mode |= CTINFO_MODE_CPMARK;
- + }
- +
- + spin_lock_bh(&ci->tcf_lock);
- +/* goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch); */
- + ci->tcf_action = actparm->action;
- + rcu_swap_protected(ci->params, cp_new,
- + lockdep_is_held(&ci->tcf_lock));
- + spin_unlock_bh(&ci->tcf_lock);
- +
- +/* if (goto_ch)
- + tcf_chain_put_by_act(goto_ch); */
- + if (cp_new)
- + kfree_rcu(cp_new, rcu);
- +
- + if (ret == ACT_P_CREATED)
- + tcf_idr_insert(tn, *a);
- +
- + return ret;
- +
- +put_chain:
- +/* if (goto_ch)
- + tcf_chain_put_by_act(goto_ch); */
- +/*release_idr:*/
- + tcf_idr_release(*a, bind);
- + return err;
- +}
- +
- +static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
- + int bind, int ref)
- +{
- + struct tcf_ctinfo *ci = to_ctinfo(a);
- + struct tc_ctinfo opt = {
- + .index = ci->tcf_index,
- + .refcnt = ci->tcf_refcnt - ref,
- + .bindcnt = ci->tcf_bindcnt - bind,
- + };
- + unsigned char *b = skb_tail_pointer(skb);
- + struct tcf_ctinfo_params *cp;
- + struct tcf_t t;
- +
- + spin_lock_bh(&ci->tcf_lock);
- + cp = rcu_dereference_protected(ci->params,
- + lockdep_is_held(&ci->tcf_lock));
- +
- + tcf_tm_dump(&t, &ci->tcf_tm);
- + if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
- + goto nla_put_failure;
- +
- + opt.action = ci->tcf_action;
- + if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
- + goto nla_put_failure;
- +
- + if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone))
- + goto nla_put_failure;
- +
- + if (cp->mode & CTINFO_MODE_DSCP) {
- + if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK,
- + cp->dscpmask))
- + goto nla_put_failure;
- + if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK,
- + cp->dscpstatemask))
- + goto nla_put_failure;
- + }
- +
- + if (cp->mode & CTINFO_MODE_CPMARK) {
- + if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK,
- + cp->cpmarkmask))
- + goto nla_put_failure;
- + }
- +
- + if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
- + ci->stats_dscp_set, TCA_CTINFO_PAD))
- + goto nla_put_failure;
- +
- + if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
- + ci->stats_dscp_error, TCA_CTINFO_PAD))
- + goto nla_put_failure;
- +
- + if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
- + ci->stats_cpmark_set, TCA_CTINFO_PAD))
- + goto nla_put_failure;
- +
- + spin_unlock_bh(&ci->tcf_lock);
- + return skb->len;
- +
- +nla_put_failure:
- + spin_unlock_bh(&ci->tcf_lock);
- + nlmsg_trim(skb, b);
- + return -1;
- +}
- +
- +static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
- + struct netlink_callback *cb, int type,
- + const struct tc_action_ops *ops)
- +{
- + struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
- +
- + return tcf_generic_walker(tn, skb, cb, type, ops);
- +}
- +
- +static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
- +{
- + struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
- +
- + return tcf_idr_search(tn, a, index);
- +}
- +
- +static struct tc_action_ops act_ctinfo_ops = {
- + .kind = "ctinfo",
- + .type = TCA_ID_CTINFO,
- + .owner = THIS_MODULE,
- + .act = tcf_ctinfo_act,
- + .dump = tcf_ctinfo_dump,
- + .init = tcf_ctinfo_init,
- + .walk = tcf_ctinfo_walker,
- + .lookup = tcf_ctinfo_search,
- + .size = sizeof(struct tcf_ctinfo),
- +};
- +
- +static __net_init int ctinfo_init_net(struct net *net)
- +{
- + struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
- +
- + return tc_action_net_init(net, tn, &act_ctinfo_ops);
- +}
- +
- +static void __net_exit ctinfo_exit_net(struct net *net)
- +{
- + struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
- +
- + tc_action_net_exit(tn);
- +}
- +
- +static struct pernet_operations ctinfo_net_ops = {
- + .init = ctinfo_init_net,
- + .exit = ctinfo_exit_net,
- + .id = &ctinfo_net_id,
- + .size = sizeof(struct tc_action_net),
- +};
- +
- +static int __init ctinfo_init_module(void)
- +{
- + return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops);
- +}
- +
- +static void __exit ctinfo_cleanup_module(void)
- +{
- + tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops);
- +}
- +
- +module_init(ctinfo_init_module);
- +module_exit(ctinfo_cleanup_module);
- +MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
- +MODULE_DESCRIPTION("Conntrack mark to DSCP restoring");
- +MODULE_LICENSE("GPL");
|