324-v4.16-netfilter-flow-table-support-for-IPv6.patch 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. From: Pablo Neira Ayuso <pablo@netfilter.org>
  2. Date: Sun, 7 Jan 2018 01:04:19 +0100
  3. Subject: [PATCH] netfilter: flow table support for IPv6
  4. This patch adds the IPv6 flow table type, that implements the datapath
  5. flow table to forward IPv6 traffic.
  6. This patch exports ip6_dst_mtu_forward() that is required to check for
  7. mtu to pass up packets that need PMTUD handling to the classic
  8. forwarding path.
  9. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  10. ---
  11. create mode 100644 net/ipv6/netfilter/nf_flow_table_ipv6.c
  12. --- a/include/net/ipv6.h
  13. +++ b/include/net/ipv6.h
  14. @@ -860,6 +860,8 @@ static inline struct sk_buff *ip6_finish
  15. &inet6_sk(sk)->cork);
  16. }
  17. +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
  18. +
  19. int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
  20. struct flowi6 *fl6);
  21. struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
  22. --- a/net/ipv6/ip6_output.c
  23. +++ b/net/ipv6/ip6_output.c
  24. @@ -381,7 +381,7 @@ static inline int ip6_forward_finish(str
  25. return dst_output(net, sk, skb);
  26. }
  27. -static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
  28. +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
  29. {
  30. unsigned int mtu;
  31. struct inet6_dev *idev;
  32. @@ -401,6 +401,7 @@ static unsigned int ip6_dst_mtu_forward(
  33. return mtu;
  34. }
  35. +EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
  36. static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
  37. {
  38. --- a/net/ipv6/netfilter/Kconfig
  39. +++ b/net/ipv6/netfilter/Kconfig
  40. @@ -99,6 +99,14 @@ config NFT_FIB_IPV6
  41. endif # NF_TABLES_IPV6
  42. endif # NF_TABLES
  43. +config NF_FLOW_TABLE_IPV6
  44. + select NF_FLOW_TABLE
  45. + tristate "Netfilter flow table IPv6 module"
  46. + help
  47. + This option adds the flow table IPv6 support.
  48. +
  49. + To compile it as a module, choose M here.
  50. +
  51. config NF_DUP_IPV6
  52. tristate "Netfilter IPv6 packet duplication to alternate destination"
  53. depends on !NF_CONNTRACK || NF_CONNTRACK
  54. --- a/net/ipv6/netfilter/Makefile
  55. +++ b/net/ipv6/netfilter/Makefile
  56. @@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redi
  57. obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
  58. obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
  59. +# flow table support
  60. +obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
  61. +
  62. # matches
  63. obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
  64. obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
  65. --- /dev/null
  66. +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
  67. @@ -0,0 +1,277 @@
  68. +#include <linux/kernel.h>
  69. +#include <linux/init.h>
  70. +#include <linux/module.h>
  71. +#include <linux/netfilter.h>
  72. +#include <linux/rhashtable.h>
  73. +#include <linux/ipv6.h>
  74. +#include <linux/netdevice.h>
  75. +#include <linux/ipv6.h>
  76. +#include <net/ipv6.h>
  77. +#include <net/ip6_route.h>
  78. +#include <net/neighbour.h>
  79. +#include <net/netfilter/nf_flow_table.h>
  80. +#include <net/netfilter/nf_tables.h>
  81. +/* For layer 4 checksum field offset. */
  82. +#include <linux/tcp.h>
  83. +#include <linux/udp.h>
  84. +
  85. +static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
  86. + struct in6_addr *addr,
  87. + struct in6_addr *new_addr)
  88. +{
  89. + struct tcphdr *tcph;
  90. +
  91. + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
  92. + skb_try_make_writable(skb, thoff + sizeof(*tcph)))
  93. + return -1;
  94. +
  95. + tcph = (void *)(skb_network_header(skb) + thoff);
  96. + inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
  97. + new_addr->s6_addr32, true);
  98. +
  99. + return 0;
  100. +}
  101. +
  102. +static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
  103. + struct in6_addr *addr,
  104. + struct in6_addr *new_addr)
  105. +{
  106. + struct udphdr *udph;
  107. +
  108. + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
  109. + skb_try_make_writable(skb, thoff + sizeof(*udph)))
  110. + return -1;
  111. +
  112. + udph = (void *)(skb_network_header(skb) + thoff);
  113. + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
  114. + inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
  115. + new_addr->s6_addr32, true);
  116. + if (!udph->check)
  117. + udph->check = CSUM_MANGLED_0;
  118. + }
  119. +
  120. + return 0;
  121. +}
  122. +
  123. +static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
  124. + unsigned int thoff, struct in6_addr *addr,
  125. + struct in6_addr *new_addr)
  126. +{
  127. + switch (ip6h->nexthdr) {
  128. + case IPPROTO_TCP:
  129. + if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
  130. + return NF_DROP;
  131. + break;
  132. + case IPPROTO_UDP:
  133. + if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
  134. + return NF_DROP;
  135. + break;
  136. + }
  137. +
  138. + return 0;
  139. +}
  140. +
  141. +static int nf_flow_snat_ipv6(const struct flow_offload *flow,
  142. + struct sk_buff *skb, struct ipv6hdr *ip6h,
  143. + unsigned int thoff,
  144. + enum flow_offload_tuple_dir dir)
  145. +{
  146. + struct in6_addr addr, new_addr;
  147. +
  148. + switch (dir) {
  149. + case FLOW_OFFLOAD_DIR_ORIGINAL:
  150. + addr = ip6h->saddr;
  151. + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
  152. + ip6h->saddr = new_addr;
  153. + break;
  154. + case FLOW_OFFLOAD_DIR_REPLY:
  155. + addr = ip6h->daddr;
  156. + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
  157. + ip6h->daddr = new_addr;
  158. + break;
  159. + default:
  160. + return -1;
  161. + }
  162. +
  163. + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
  164. +}
  165. +
  166. +static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
  167. + struct sk_buff *skb, struct ipv6hdr *ip6h,
  168. + unsigned int thoff,
  169. + enum flow_offload_tuple_dir dir)
  170. +{
  171. + struct in6_addr addr, new_addr;
  172. +
  173. + switch (dir) {
  174. + case FLOW_OFFLOAD_DIR_ORIGINAL:
  175. + addr = ip6h->daddr;
  176. + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
  177. + ip6h->daddr = new_addr;
  178. + break;
  179. + case FLOW_OFFLOAD_DIR_REPLY:
  180. + addr = ip6h->saddr;
  181. + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
  182. + ip6h->saddr = new_addr;
  183. + break;
  184. + default:
  185. + return -1;
  186. + }
  187. +
  188. + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
  189. +}
  190. +
  191. +static int nf_flow_nat_ipv6(const struct flow_offload *flow,
  192. + struct sk_buff *skb,
  193. + enum flow_offload_tuple_dir dir)
  194. +{
  195. + struct ipv6hdr *ip6h = ipv6_hdr(skb);
  196. + unsigned int thoff = sizeof(*ip6h);
  197. +
  198. + if (flow->flags & FLOW_OFFLOAD_SNAT &&
  199. + (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
  200. + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
  201. + return -1;
  202. + if (flow->flags & FLOW_OFFLOAD_DNAT &&
  203. + (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
  204. + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
  205. + return -1;
  206. +
  207. + return 0;
  208. +}
  209. +
  210. +static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
  211. + struct flow_offload_tuple *tuple)
  212. +{
  213. + struct flow_ports *ports;
  214. + struct ipv6hdr *ip6h;
  215. + unsigned int thoff;
  216. +
  217. + if (!pskb_may_pull(skb, sizeof(*ip6h)))
  218. + return -1;
  219. +
  220. + ip6h = ipv6_hdr(skb);
  221. +
  222. + if (ip6h->nexthdr != IPPROTO_TCP &&
  223. + ip6h->nexthdr != IPPROTO_UDP)
  224. + return -1;
  225. +
  226. + thoff = sizeof(*ip6h);
  227. + if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
  228. + return -1;
  229. +
  230. + ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
  231. +
  232. + tuple->src_v6 = ip6h->saddr;
  233. + tuple->dst_v6 = ip6h->daddr;
  234. + tuple->src_port = ports->source;
  235. + tuple->dst_port = ports->dest;
  236. + tuple->l3proto = AF_INET6;
  237. + tuple->l4proto = ip6h->nexthdr;
  238. + tuple->iifidx = dev->ifindex;
  239. +
  240. + return 0;
  241. +}
  242. +
  243. +/* Based on ip_exceeds_mtu(). */
  244. +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
  245. +{
  246. + if (skb->len <= mtu)
  247. + return false;
  248. +
  249. + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
  250. + return false;
  251. +
  252. + return true;
  253. +}
  254. +
  255. +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
  256. +{
  257. + u32 mtu;
  258. +
  259. + mtu = ip6_dst_mtu_forward(&rt->dst);
  260. + if (__nf_flow_exceeds_mtu(skb, mtu))
  261. + return true;
  262. +
  263. + return false;
  264. +}
  265. +
  266. +static unsigned int
  267. +nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
  268. + const struct nf_hook_state *state)
  269. +{
  270. + struct flow_offload_tuple_rhash *tuplehash;
  271. + struct nf_flowtable *flow_table = priv;
  272. + struct flow_offload_tuple tuple = {};
  273. + enum flow_offload_tuple_dir dir;
  274. + struct flow_offload *flow;
  275. + struct net_device *outdev;
  276. + struct in6_addr *nexthop;
  277. + struct ipv6hdr *ip6h;
  278. + struct rt6_info *rt;
  279. +
  280. + if (skb->protocol != htons(ETH_P_IPV6))
  281. + return NF_ACCEPT;
  282. +
  283. + if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
  284. + return NF_ACCEPT;
  285. +
  286. + tuplehash = flow_offload_lookup(flow_table, &tuple);
  287. + if (tuplehash == NULL)
  288. + return NF_ACCEPT;
  289. +
  290. + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
  291. + if (!outdev)
  292. + return NF_ACCEPT;
  293. +
  294. + dir = tuplehash->tuple.dir;
  295. + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
  296. +
  297. + rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
  298. + if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
  299. + return NF_ACCEPT;
  300. +
  301. + if (skb_try_make_writable(skb, sizeof(*ip6h)))
  302. + return NF_DROP;
  303. +
  304. + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
  305. + nf_flow_nat_ipv6(flow, skb, dir) < 0)
  306. + return NF_DROP;
  307. +
  308. + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
  309. + ip6h = ipv6_hdr(skb);
  310. + ip6h->hop_limit--;
  311. +
  312. + skb->dev = outdev;
  313. + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
  314. + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
  315. +
  316. + return NF_STOLEN;
  317. +}
  318. +
  319. +static struct nf_flowtable_type flowtable_ipv6 = {
  320. + .family = NFPROTO_IPV6,
  321. + .params = &nf_flow_offload_rhash_params,
  322. + .gc = nf_flow_offload_work_gc,
  323. + .hook = nf_flow_offload_ipv6_hook,
  324. + .owner = THIS_MODULE,
  325. +};
  326. +
  327. +static int __init nf_flow_ipv6_module_init(void)
  328. +{
  329. + nft_register_flowtable_type(&flowtable_ipv6);
  330. +
  331. + return 0;
  332. +}
  333. +
  334. +static void __exit nf_flow_ipv6_module_exit(void)
  335. +{
  336. + nft_unregister_flowtable_type(&flowtable_ipv6);
  337. +}
  338. +
  339. +module_init(nf_flow_ipv6_module_init);
  340. +module_exit(nf_flow_ipv6_module_exit);
  341. +
  342. +MODULE_LICENSE("GPL");
  343. +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
  344. +MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);