680-NET-skip-GRO-for-foreign-MAC-addresses.patch 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. Subject: NET: skip GRO for foreign MAC addresses
  2. For network drivers using napi_gro_receive, packets are run through GRO,
  3. even when the destination MAC address does not match, and they're supposed
  4. to be delivered to another host behind a different bridge port.
  5. This can be very expensive, because for drivers without TSO or scatter-
  6. gather, this can only be undone by copying the skb and checksumming it
  7. again.
  8. To be able to track foreign MAC addresses in an inexpensive way, create
  9. a mask of changed bits in MAC addresses of upper devices. This allows
  10. handling VLANs and bridge devices with different addresses (as long as
  11. they are not too different).
  12. Signed-off-by: Felix Fietkau <nbd@nbd.name>
  13. --- a/net/core/dev.c
  14. +++ b/net/core/dev.c
  15. @@ -4256,6 +4256,9 @@ static enum gro_result dev_gro_receive(s
  16. enum gro_result ret;
  17. int grow;
  18. + if (skb->gro_skip)
  19. + goto normal;
  20. +
  21. if (!(skb->dev->features & NETIF_F_GRO))
  22. goto normal;
  23. @@ -5422,6 +5425,48 @@ static void __netdev_adjacent_dev_unlink
  24. &upper_dev->adj_list.lower);
  25. }
  26. +static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr,
  27. + struct net_device *dev)
  28. +{
  29. + int i;
  30. +
  31. + for (i = 0; i < dev->addr_len; i++)
  32. + mask[i] |= addr[i] ^ dev->dev_addr[i];
  33. +}
  34. +
  35. +static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev,
  36. + struct net_device *lower)
  37. +{
  38. + struct net_device *cur;
  39. + struct list_head *iter;
  40. +
  41. + netdev_for_each_upper_dev_rcu(dev, cur, iter) {
  42. + __netdev_addr_mask(mask, cur->dev_addr, lower);
  43. + __netdev_upper_mask(mask, cur, lower);
  44. + }
  45. +}
  46. +
  47. +static void __netdev_update_addr_mask(struct net_device *dev)
  48. +{
  49. + unsigned char mask[MAX_ADDR_LEN];
  50. + struct net_device *cur;
  51. + struct list_head *iter;
  52. +
  53. + memset(mask, 0, sizeof(mask));
  54. + __netdev_upper_mask(mask, dev, dev);
  55. + memcpy(dev->local_addr_mask, mask, dev->addr_len);
  56. +
  57. + netdev_for_each_lower_dev(dev, cur, iter)
  58. + __netdev_update_addr_mask(cur);
  59. +}
  60. +
  61. +static void netdev_update_addr_mask(struct net_device *dev)
  62. +{
  63. + rcu_read_lock();
  64. + __netdev_update_addr_mask(dev);
  65. + rcu_read_unlock();
  66. +}
  67. +
  68. static int __netdev_upper_dev_link(struct net_device *dev,
  69. struct net_device *upper_dev, bool master,
  70. void *private)
  71. @@ -5493,6 +5538,7 @@ static int __netdev_upper_dev_link(struc
  72. goto rollback_lower_mesh;
  73. }
  74. + netdev_update_addr_mask(dev);
  75. call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
  76. &changeupper_info.info);
  77. return 0;
  78. @@ -5619,6 +5665,7 @@ void netdev_upper_dev_unlink(struct net_
  79. list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
  80. __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
  81. + netdev_update_addr_mask(dev);
  82. call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
  83. &changeupper_info.info);
  84. }
  85. @@ -6159,6 +6206,7 @@ int dev_set_mac_address(struct net_devic
  86. if (err)
  87. return err;
  88. dev->addr_assign_type = NET_ADDR_SET;
  89. + netdev_update_addr_mask(dev);
  90. call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
  91. add_device_randomness(dev->dev_addr, dev->addr_len);
  92. return 0;
  93. --- a/include/linux/netdevice.h
  94. +++ b/include/linux/netdevice.h
  95. @@ -1639,6 +1639,8 @@ struct net_device {
  96. struct netdev_hw_addr_list mc;
  97. struct netdev_hw_addr_list dev_addrs;
  98. + unsigned char local_addr_mask[MAX_ADDR_LEN];
  99. +
  100. #ifdef CONFIG_SYSFS
  101. struct kset *queues_kset;
  102. #endif
  103. --- a/include/linux/skbuff.h
  104. +++ b/include/linux/skbuff.h
  105. @@ -642,7 +642,8 @@ struct sk_buff {
  106. __u8 ipvs_property:1;
  107. __u8 inner_protocol_type:1;
  108. __u8 remcsum_offload:1;
  109. - /* 3 or 5 bit hole */
  110. + __u8 gro_skip:1;
  111. + /* 2 or 4 bit hole */
  112. #ifdef CONFIG_NET_SCHED
  113. __u16 tc_index; /* traffic control index */
  114. --- a/net/ethernet/eth.c
  115. +++ b/net/ethernet/eth.c
  116. @@ -140,6 +140,18 @@ u32 eth_get_headlen(void *data, unsigned
  117. }
  118. EXPORT_SYMBOL(eth_get_headlen);
  119. +static inline bool
  120. +eth_check_local_mask(const void *addr1, const void *addr2, const void *mask)
  121. +{
  122. + const u16 *a1 = addr1;
  123. + const u16 *a2 = addr2;
  124. + const u16 *m = mask;
  125. +
  126. + return (((a1[0] ^ a2[0]) & ~m[0]) |
  127. + ((a1[1] ^ a2[1]) & ~m[1]) |
  128. + ((a1[2] ^ a2[2]) & ~m[2]));
  129. +}
  130. +
  131. /**
  132. * eth_type_trans - determine the packet's protocol ID.
  133. * @skb: received socket data
  134. @@ -168,8 +180,12 @@ __be16 eth_type_trans(struct sk_buff *sk
  135. skb->pkt_type = PACKET_MULTICAST;
  136. }
  137. else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
  138. - dev->dev_addr)))
  139. + dev->dev_addr))) {
  140. skb->pkt_type = PACKET_OTHERHOST;
  141. + if (eth_check_local_mask(eth->h_dest, dev->dev_addr,
  142. + dev->local_addr_mask))
  143. + skb->gro_skip = 1;
  144. + }
  145. /*
  146. * Some variants of DSA tagging don't have an ethertype field