024-7-net-reorganize-struct-sock-for-better-data-locality.patch 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. From 9115e8cd2a0c6eaaa900c462721f12e1d45f326c Mon Sep 17 00:00:00 2001
  2. From: Eric Dumazet <edumazet@google.com>
  3. Date: Sat, 3 Dec 2016 11:14:56 -0800
  4. Subject: [PATCH 07/10] net: reorganize struct sock for better data locality
  5. Group fields used in TX path, and keep some cache lines mostly read
  6. to permit sharing among cpus.
  7. Gained two 4 bytes holes on 64bit arches.
  8. Added a place holder for tcp tsq_flags, next to sk_wmem_alloc
  9. to speed up tcp_wfree() in the following patch.
  10. I have not added ____cacheline_aligned_in_smp, this might be done later.
  11. I prefer doing this once inet and tcp/udp sockets reorg is also done.
  12. Tested with both TCP and UDP.
  13. UDP receiver performance under flood increased by ~20 % :
  14. Accessing sk_filter/sk_wq/sk_napi_id no longer stalls because sk_drops
  15. was moved away from a critical cache line, now mostly read and shared.
  16. /* --- cacheline 4 boundary (256 bytes) --- */
  17. unsigned int sk_napi_id; /* 0x100 0x4 */
  18. int sk_rcvbuf; /* 0x104 0x4 */
  19. struct sk_filter * sk_filter; /* 0x108 0x8 */
  20. union {
  21. struct socket_wq * sk_wq; /* 0x8 */
  22. struct socket_wq * sk_wq_raw; /* 0x8 */
  23. }; /* 0x110 0x8 */
  24. struct xfrm_policy * sk_policy[2]; /* 0x118 0x10 */
  25. struct dst_entry * sk_rx_dst; /* 0x128 0x8 */
  26. struct dst_entry * sk_dst_cache; /* 0x130 0x8 */
  27. atomic_t sk_omem_alloc; /* 0x138 0x4 */
  28. int sk_sndbuf; /* 0x13c 0x4 */
  29. /* --- cacheline 5 boundary (320 bytes) --- */
  30. int sk_wmem_queued; /* 0x140 0x4 */
  31. atomic_t sk_wmem_alloc; /* 0x144 0x4 */
  32. long unsigned int sk_tsq_flags; /* 0x148 0x8 */
  33. struct sk_buff * sk_send_head; /* 0x150 0x8 */
  34. struct sk_buff_head sk_write_queue; /* 0x158 0x18 */
  35. __s32 sk_peek_off; /* 0x170 0x4 */
  36. int sk_write_pending; /* 0x174 0x4 */
  37. long int sk_sndtimeo; /* 0x178 0x8 */
  38. Signed-off-by: Eric Dumazet <edumazet@google.com>
  39. Tested-by: Paolo Abeni <pabeni@redhat.com>
  40. Signed-off-by: David S. Miller <davem@davemloft.net>
  41. ---
  42. include/net/sock.h | 51 +++++++++++++++++++++++++++------------------------
  43. 1 file changed, 27 insertions(+), 24 deletions(-)
  44. --- a/include/net/sock.h
  45. +++ b/include/net/sock.h
  46. @@ -343,6 +343,9 @@ struct sock {
  47. #define sk_rxhash __sk_common.skc_rxhash
  48. socket_lock_t sk_lock;
  49. + atomic_t sk_drops;
  50. + int sk_rcvlowat;
  51. + struct sk_buff_head sk_error_queue;
  52. struct sk_buff_head sk_receive_queue;
  53. /*
  54. * The backlog queue is special, it is always used with
  55. @@ -359,14 +362,13 @@ struct sock {
  56. struct sk_buff *tail;
  57. } sk_backlog;
  58. #define sk_rmem_alloc sk_backlog.rmem_alloc
  59. - int sk_forward_alloc;
  60. - __u32 sk_txhash;
  61. + int sk_forward_alloc;
  62. #ifdef CONFIG_NET_RX_BUSY_POLL
  63. - unsigned int sk_napi_id;
  64. unsigned int sk_ll_usec;
  65. + /* ===== mostly read cache line ===== */
  66. + unsigned int sk_napi_id;
  67. #endif
  68. - atomic_t sk_drops;
  69. int sk_rcvbuf;
  70. struct sk_filter __rcu *sk_filter;
  71. @@ -379,11 +381,30 @@ struct sock {
  72. #endif
  73. struct dst_entry *sk_rx_dst;
  74. struct dst_entry __rcu *sk_dst_cache;
  75. - /* Note: 32bit hole on 64bit arches */
  76. - atomic_t sk_wmem_alloc;
  77. atomic_t sk_omem_alloc;
  78. int sk_sndbuf;
  79. +
  80. + /* ===== cache line for TX ===== */
  81. + int sk_wmem_queued;
  82. + atomic_t sk_wmem_alloc;
  83. + unsigned long sk_tsq_flags;
  84. + struct sk_buff *sk_send_head;
  85. struct sk_buff_head sk_write_queue;
  86. + __s32 sk_peek_off;
  87. + int sk_write_pending;
  88. + long sk_sndtimeo;
  89. + struct timer_list sk_timer;
  90. + __u32 sk_priority;
  91. + __u32 sk_mark;
  92. + u32 sk_pacing_rate; /* bytes per second */
  93. + u32 sk_max_pacing_rate;
  94. + struct page_frag sk_frag;
  95. + netdev_features_t sk_route_caps;
  96. + netdev_features_t sk_route_nocaps;
  97. + int sk_gso_type;
  98. + unsigned int sk_gso_max_size;
  99. + gfp_t sk_allocation;
  100. + __u32 sk_txhash;
  101. /*
  102. * Because of non atomicity rules, all
  103. @@ -399,41 +420,23 @@ struct sock {
  104. #define SK_PROTOCOL_MAX U8_MAX
  105. kmemcheck_bitfield_end(flags);
  106. - int sk_wmem_queued;
  107. - gfp_t sk_allocation;
  108. - u32 sk_pacing_rate; /* bytes per second */
  109. - u32 sk_max_pacing_rate;
  110. - netdev_features_t sk_route_caps;
  111. - netdev_features_t sk_route_nocaps;
  112. - int sk_gso_type;
  113. - unsigned int sk_gso_max_size;
  114. u16 sk_gso_max_segs;
  115. - int sk_rcvlowat;
  116. unsigned long sk_lingertime;
  117. - struct sk_buff_head sk_error_queue;
  118. struct proto *sk_prot_creator;
  119. rwlock_t sk_callback_lock;
  120. int sk_err,
  121. sk_err_soft;
  122. u32 sk_ack_backlog;
  123. u32 sk_max_ack_backlog;
  124. - __u32 sk_priority;
  125. - __u32 sk_mark;
  126. struct pid *sk_peer_pid;
  127. const struct cred *sk_peer_cred;
  128. long sk_rcvtimeo;
  129. - long sk_sndtimeo;
  130. - struct timer_list sk_timer;
  131. ktime_t sk_stamp;
  132. u16 sk_tsflags;
  133. u8 sk_shutdown;
  134. u32 sk_tskey;
  135. struct socket *sk_socket;
  136. void *sk_user_data;
  137. - struct page_frag sk_frag;
  138. - struct sk_buff *sk_send_head;
  139. - __s32 sk_peek_off;
  140. - int sk_write_pending;
  141. #ifdef CONFIG_SECURITY
  142. void *sk_security;
  143. #endif