From dff8eadcab33209e040e77a5d56d5def04808144 Mon Sep 17 00:00:00 2001 From: Kevin Darbyshire-Bryant Date: Fri, 15 Mar 2019 09:35:37 +0000 Subject: [PATCH] tc: add support for action act_ctinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ctinfo is a tc action restoring data stored in conntrack marks to various fields. At present it has two independent modes of operation, restoration of DSCP into IPv4/v6 diffserv and restoration of conntrack marks into packet skb marks. It understands a number of parameters specific to this action in additional to the usual action syntax. Each operating mode is independent of the other so all options are optional, however not specifying at least one mode is a bit pointless. Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index ] DSCP mode dscp enables copying of a DSCP stored in the conntrack mark into the ipv4/v6 diffserv field. The mask is a 32bit field and specifies where in the conntrack mark the DSCP value is located. It must be 6 contiguous bits long. eg. 0xfc000000 would restore the DSCP from the upper 6 bits of the conntrack mark. The DSCP copying may be optionally controlled by a statemask. The statemask is a 32bit field, usually with a single bit set and must not overlap the dscp mask. The DSCP restore operation will only take place if the corresponding bit/s in conntrack mark ANDed with the statemask yield a non zero result. eg. dscp 0xfc000000 0x01000000 would retrieve the DSCP from the top 6 bits, whilst using bit 25 as a flag to do so. Bit 26 is unused in this example. CPMARK mode cpmark enables copying of the conntrack mark to the packet skb mark. In this mode it is completely equivalent to the existing act_connmark action. Additional functionality is provided by the optional mask parameter, whereby the stored conntrack mark is logically ANDed with the cpmark mask before being stored into skb mark. This allows shared usage of the conntrack mark between applications. eg. cpmark 0x00ffffff would restore only the lower 24 bits of the conntrack mark, thus may be useful in the event that the upper 8 bits are used by the DSCP function. Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index ] where : dscp MASK is the bitmask to restore DSCP STATEMASK is the bitmask to determine conditional restoring cpmark MASK mask applied to restored packet mark ZONE is the conntrack zone CONTROL := reclassify | pipe | drop | continue | ok | goto chain Signed-off-by: Kevin Darbyshire-Bryant Reviewed-by: Toke Høiland-Jørgensen --- include/uapi/linux/pkt_cls.h | 3 +- include/uapi/linux/tc_act/tc_ctinfo.h | 29 +++ man/man8/tc-ctinfo.8 | 170 ++++++++++++++++ tc/Makefile | 1 + tc/m_ctinfo.c | 268 ++++++++++++++++++++++++++ 5 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/tc_act/tc_ctinfo.h create mode 100644 man/man8/tc-ctinfo.8 create mode 100644 tc/m_ctinfo.c diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 95d0db2a..a6e7e176 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -68,7 +68,8 @@ enum { TCA_ID_UNSPEC=0, TCA_ID_POLICE=1, /* other actions go here */ - __TCA_ID_MAX=255 + TCA_ID_CTINFO=27, + __TCA_ID_MAX = 255 }; #define TCA_ID_MAX __TCA_ID_MAX diff --git a/include/uapi/linux/tc_act/tc_ctinfo.h b/include/uapi/linux/tc_act/tc_ctinfo.h new file mode 100644 index 00000000..f5f26d95 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_ctinfo.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __UAPI_TC_CTINFO_H +#define __UAPI_TC_CTINFO_H + +#include +#include + +struct tc_ctinfo { + tc_gen; +}; + +enum { + TCA_CTINFO_UNSPEC, + TCA_CTINFO_PAD, + TCA_CTINFO_TM, + TCA_CTINFO_ACT, + TCA_CTINFO_ZONE, + TCA_CTINFO_PARMS_DSCP_MASK, + TCA_CTINFO_PARMS_DSCP_STATEMASK, + TCA_CTINFO_PARMS_CPMARK_MASK, + TCA_CTINFO_STATS_DSCP_SET, + TCA_CTINFO_STATS_DSCP_ERROR, + TCA_CTINFO_STATS_CPMARK_SET, + __TCA_CTINFO_MAX +}; + +#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1) + +#endif diff --git a/man/man8/tc-ctinfo.8 b/man/man8/tc-ctinfo.8 new file mode 100644 index 00000000..096590d1 --- /dev/null +++ b/man/man8/tc-ctinfo.8 @@ -0,0 +1,170 @@ +.TH "ctinfo action in tc" 8 "4 Jun 2019" "iproute2" "Linux" +.SH NAME +ctinfo \- tc connmark processing action +.SH SYNOPSIS +.B tc ... action ctinfo +[ +.B dscp +MASK [STATEMASK] ] [ +.B cpmark +[MASK] ] [ +.B zone +ZONE ] [ +.B CONTROL +] [ +.B index + +] + +.SH DESCRIPTION +CTINFO (Conntrack Information) is a tc action for retrieving data from +conntrack marks into various fields. At present it has two independent +processing modes which may be viewed as sub-functions. + +DSCP mode copies a DSCP stored in conntrack's connmark into the IPv4/v6 diffserv +field. The copying may conditionally occur based on a flag also stored in the +connmark. DSCP mode was designed to assist in restoring packet classifications on +ingress, classifications which may then be used by qdiscs such as CAKE. It may be +used in any circumstance where ingress classification needs to be maintained across +links that otherwise bleach or remap according to their own policies. + +CPMARK (copymark) mode copies the conntrack connmark into the packet's mark field. Without +additional parameters it is functionally completely equivalent to the existing +connmark action. An optional mask may be specified to mask which bits of the +connmark are restored. This may be useful when DSCP and CPMARK modes are combined. + +Simple statistics (tc -s) on DSCP restores and CPMARK copies are maintained where values for +set indicate a count of packets altered for that mode. DSCP includes an error count +where the destination packet's diffserv field was unwriteable. +.SH PARAMETERS +.SS DSCP mode parameters: +.IP mask +A mask of 6 contiguous bits indicating where the DSCP value is located in the 32 bit +conntrack mark field. A mask must be provided for this mode. mask is a 32 bit +unsigned value. +.IP statemask +A mask of at least 1 bit indicating where a conditional restore flag is located in the +32 bit conntrack mark field. The statemask bit/s must NOT overlap the mask bits. The +DSCP will be restored if the conntrack mark logically ANDed with the statemask yields +a non-zero result. statemask is an optional unsigned 32 bit value. +.SS CPMARK mode parameters: +.IP mask +Store the logically ANDed result of conntrack mark and mask into the packet's mark +field. Default is 0xffffffff i.e. the whole mark field. mask is an optional unsigned 32 bit +value +.SS Overall action parameters: +.IP zone +Specify the conntrack zone when doing conntrack lookups for packets. +zone is a 16bit unsigned decimal value. +Default is 0. +.IP CONTROL +The following keywords allow to control how the tree of qdisc, classes, +filters and actions is further traversed after this action. +.RS +.TP +.B reclassify +Restart with the first filter in the current list. +.TP +.B pipe +Continue with the next action attached to the same filter. +.TP +.B drop +Drop the packet. +.TP +.B shot +synonym for +.B drop +.TP +.B continue +Continue classification with the next filter in line. +.TP +.B pass +Finish classification process and return to calling qdisc for further packet +processing. This is the default. +.RE +.IP index +Specify an index for this action in order to being able to identify it in later +commands. index is a 32bit unsigned decimal value. +.SH EXAMPLES +Example showing conditional restoration of DSCP on ingress via an IFB +.RS +.EX + +#Set up the IFB interface +.br +tc qdisc add dev ifb4eth0 handle ffff: ingress + +#Put CAKE qdisc on it +.br +tc qdisc add dev ifb4eth0 root cake bandwidth 40mbit + +#Set interface UP +.br +ip link set dev ifb4eth0 up + +#Add 2 actions, ctinfo to restore dscp & mirred to redirect the packets to IFB +.br +tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \\ + match u32 0 0 flowid 1:1 action \\ + ctinfo dscp 0xfc000000 0x01000000 \\ + mirred egress redirect dev ifb4eth0 + +tc -s qdisc show dev eth0 ingress + + filter parent ffff: protocol all pref 10 u32 chain 0 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw + match 00000000/00000000 at 0 + action order 1: ctinfo zone 0 pipe + index 2 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 72 sec used 0 sec DSCP set 1333 error 0 CPMARK set 0 + Action statistics: + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + + action order 2: mirred (Egress Redirect to device ifb4eth0) stolen + index 1 ref 1 bind 1 installed 72 sec used 0 sec + Action statistics: + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +.EE +.RE + +Example showing conditional restoration of DSCP on egress + +This may appear nonsensical since iptables marking of egress packets is easy +to achieve, however the iptables flow classification rules may be extensive +and so some sort of set once and forget may be useful especially on cpu +constrained devices. +.RS +.EX + +# Send unmarked connections to a marking chain which needs to store a DSCP +and set statemask bit in the connmark +.br +iptables -t mangle -A POSTROUTING -o eth0 -m connmark \\ + --mark 0x00000000/0x01000000 -g CLASS_MARKING_CHAIN + +# Apply marked DSCP to the packets +.br +tc filter add dev eth0 protocol all prio 10 u32 \\ + match u32 0 0 flowid 1:1 action \\ + ctinfo dscp 0xfc000000 0x01000000 + +tc -s filter show dev eth0 + filter parent 800e: protocol all pref 10 u32 chain 0 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw + match 00000000/00000000 at 0 + action order 1: ctinfo zone 0 pipe + index 1 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 7414 sec used 0 sec DSCP set 53404 error 0 CPMARK set 0 + Action statistics: + Sent 32890260 bytes 120441 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 +.br +.SH SEE ALSO +.BR tc (8), +.BR tc-cake (8) +.BR tc-connmark (8) +.BR tc-mirred (8) +.SH AUTHORS +ctinfo was written by Kevin Darbyshire-Bryant. diff --git a/tc/Makefile b/tc/Makefile index 2edaf2c8..ec93a9a1 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -48,6 +48,7 @@ TCMODULES += m_csum.o TCMODULES += m_simple.o TCMODULES += m_vlan.o TCMODULES += m_connmark.o +TCMODULES += m_ctinfo.o TCMODULES += m_bpf.o TCMODULES += m_tunnel_key.o TCMODULES += m_sample.o diff --git a/tc/m_ctinfo.c b/tc/m_ctinfo.c new file mode 100644 index 00000000..5e451f87 --- /dev/null +++ b/tc/m_ctinfo.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * m_ctinfo.c netfilter ctinfo mark action + * + * Copyright (c) 2019 Kevin Darbyshire-Bryant + */ + +#include +#include +#include +#include +#include "utils.h" +#include "tc_util.h" +#include + +static void +explain(void) +{ + fprintf(stderr, + "Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index ]\n" + "where :\n" + "\tdscp MASK bitmask location of stored DSCP\n" + "\t STATEMASK bitmask to determine conditional restoring\n" + "\tcpmark MASK mask applied to mark on restoration\n" + "\tZONE is the conntrack zone\n" + "\tCONTROL := reclassify | pipe | drop | continue | ok |\n" + "\t goto chain \n"); +} + +static void +usage(void) +{ + explain(); + exit(-1); +} + +static int +parse_ctinfo(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, + struct nlmsghdr *n) +{ + unsigned int cpmarkmask = 0, dscpmask = 0, dscpstatemask = 0; + struct tc_ctinfo sel = {}; + unsigned short zone = 0; + char **argv = *argv_p; + struct rtattr *tail; + int argc = *argc_p; + int ok = 0; + __u8 i; + + while (argc > 0) { + if (matches(*argv, "ctinfo") == 0) { + ok = 1; + NEXT_ARG_FWD(); + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + break; + } + + } + + if (!ok) { + explain(); + return -1; + } + + if (argc) { + if (matches(*argv, "dscp") == 0) { + NEXT_ARG(); + if (get_u32(&dscpmask, *argv, 0)) { + fprintf(stderr, + "ctinfo: Illegal dscp \"mask\"\n"); + return -1; + } + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + if (!get_u32(&dscpstatemask, *argv, 0)) + NEXT_ARG_FWD(); /* was a statemask */ + } else { + NEXT_ARG_FWD(); + } + } + } + + /* cpmark has optional mask parameter, so the next arg might not */ + /* exist, or it might be the next option, or it may actually be a */ + /* 32bit mask */ + if (argc) { + if (matches(*argv, "cpmark") == 0) { + cpmarkmask = ~0; + if (NEXT_ARG_OK()) { + NEXT_ARG_FWD(); + if (!get_u32(&cpmarkmask, *argv, 0)) + NEXT_ARG_FWD(); /* was a mask */ + } else { + NEXT_ARG_FWD(); + } + } + } + + if (argc) { + if (matches(*argv, "zone") == 0) { + NEXT_ARG(); + if (get_u16(&zone, *argv, 10)) { + fprintf(stderr, "ctinfo: Illegal \"zone\"\n"); + return -1; + } + NEXT_ARG_FWD(); + } + } + + parse_action_control_dflt(&argc, &argv, &sel.action, + false, TC_ACT_PIPE); + + if (argc) { + if (matches(*argv, "index") == 0) { + NEXT_ARG(); + if (get_u32(&sel.index, *argv, 10)) { + fprintf(stderr, "ctinfo: Illegal \"index\"\n"); + return -1; + } + NEXT_ARG_FWD(); + } + } + + if (dscpmask & dscpstatemask) { + fprintf(stderr, + "ctinfo: dscp mask & statemask must NOT overlap\n"); + return -1; + } + + i = ffs(dscpmask); + if (i && ((~0 & (dscpmask >> (i - 1))) != 0x3f)) { + fprintf(stderr, + "ctinfo: dscp mask must be 6 contiguous bits long\n"); + return -1; + } + + tail = addattr_nest(n, MAX_MSG, tca_id); + addattr_l(n, MAX_MSG, TCA_CTINFO_ACT, &sel, sizeof(sel)); + addattr16(n, MAX_MSG, TCA_CTINFO_ZONE, zone); + + if (dscpmask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_DSCP_MASK, dscpmask); + + if (dscpstatemask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_DSCP_STATEMASK, dscpstatemask); + + if (cpmarkmask) + addattr32(n, MAX_MSG, + TCA_CTINFO_PARMS_CPMARK_MASK, cpmarkmask); + + addattr_nest_end(n, tail); + + *argc_p = argc; + *argv_p = argv; + return 0; +} + +static void print_ctinfo_stats(FILE *f, struct rtattr *tb[TCA_CTINFO_MAX + 1]) +{ + struct tcf_t *tm; + + if (tb[TCA_CTINFO_TM]) { + tm = RTA_DATA(tb[TCA_CTINFO_TM]); + + print_tm(f, tm); + } + + if (tb[TCA_CTINFO_STATS_DSCP_SET]) + print_lluint(PRINT_ANY, "dscpset", " DSCP set %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_SET])); + if (tb[TCA_CTINFO_STATS_DSCP_ERROR]) + print_lluint(PRINT_ANY, "dscperror", " error %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_ERROR])); + + if (tb[TCA_CTINFO_STATS_CPMARK_SET]) + print_lluint(PRINT_ANY, "cpmarkset", " CPMARK set %llu", + rta_getattr_u64(tb[TCA_CTINFO_STATS_CPMARK_SET])); +} + +static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg) +{ + unsigned int cpmarkmask = ~0, dscpmask = 0, dscpstatemask = 0; + struct rtattr *tb[TCA_CTINFO_MAX + 1]; + unsigned short zone = 0; + struct tc_ctinfo *ci; + + if (arg == NULL) + return -1; + + parse_rtattr_nested(tb, TCA_CTINFO_MAX, arg); + if (!tb[TCA_CTINFO_ACT]) { + print_string(PRINT_FP, NULL, "%s", + "[NULL ctinfo action parameters]"); + return -1; + } + + ci = RTA_DATA(tb[TCA_CTINFO_ACT]); + + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_MASK]) >= + sizeof(__u32)) + dscpmask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_DSCP_MASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid dscp mask parameter]"); + } + + if (tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) >= + sizeof(__u32)) + dscpstatemask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid dscp statemask parameter]"); + } + + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) { + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_CPMARK_MASK]) >= + sizeof(__u32)) + cpmarkmask = rta_getattr_u32( + tb[TCA_CTINFO_PARMS_CPMARK_MASK]); + else + print_string(PRINT_FP, NULL, "%s", + "[invalid cpmark mask parameter]"); + } + + if (tb[TCA_CTINFO_ZONE] && RTA_PAYLOAD(tb[TCA_CTINFO_ZONE]) >= + sizeof(__u16)) + zone = rta_getattr_u16(tb[TCA_CTINFO_ZONE]); + + print_string(PRINT_ANY, "kind", "%s ", "ctinfo"); + print_hu(PRINT_ANY, "zone", "zone %u", zone); + print_action_control(f, " ", ci->action, ""); + + print_string(PRINT_FP, NULL, "%s", _SL_); + print_uint(PRINT_ANY, "index", "\t index %u", ci->index); + print_int(PRINT_ANY, "ref", " ref %d", ci->refcnt); + print_int(PRINT_ANY, "bind", " bind %d", ci->bindcnt); + + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) { + print_0xhex(PRINT_ANY, "dscpmask", " dscp %#010llx", dscpmask); + print_0xhex(PRINT_ANY, "dscpstatemask", " %#010llx", + dscpstatemask); + } + + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) + print_0xhex(PRINT_ANY, "cpmark", " cpmark %#010llx", + cpmarkmask); + + if (show_stats) + print_ctinfo_stats(f, tb); + + print_string(PRINT_FP, NULL, "%s", _SL_); + + return 0; +} + +struct action_util ctinfo_action_util = { + .id = "ctinfo", + .parse_aopt = parse_ctinfo, + .print_aopt = print_ctinfo, +}; -- 2.20.1 (Apple Git-117)