077-03-bgmac-implement-scatter-gather-support.patch 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. From: Felix Fietkau <nbd@nbd.name>
  2. Date: Mon, 23 Mar 2015 02:42:26 +0100
  3. Subject: [PATCH] bgmac: implement scatter/gather support
  4. Always use software checksumming, since the hardware does not have any
  5. checksum offload support.
  6. This significantly improves local TCP tx performance.
  7. Signed-off-by: Felix Fietkau <nbd@nbd.name>
  8. ---
  9. --- a/drivers/net/ethernet/broadcom/bgmac.c
  10. +++ b/drivers/net/ethernet/broadcom/bgmac.c
  11. @@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
  12. bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
  13. }
  14. +static void
  15. +bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
  16. + int i, int len, u32 ctl0)
  17. +{
  18. + struct bgmac_slot_info *slot;
  19. + struct bgmac_dma_desc *dma_desc;
  20. + u32 ctl1;
  21. +
  22. + if (i == ring->num_slots - 1)
  23. + ctl0 |= BGMAC_DESC_CTL0_EOT;
  24. +
  25. + ctl1 = len & BGMAC_DESC_CTL1_LEN;
  26. +
  27. + slot = &ring->slots[i];
  28. + dma_desc = &ring->cpu_base[i];
  29. + dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
  30. + dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
  31. + dma_desc->ctl0 = cpu_to_le32(ctl0);
  32. + dma_desc->ctl1 = cpu_to_le32(ctl1);
  33. +}
  34. +
  35. static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
  36. struct bgmac_dma_ring *ring,
  37. struct sk_buff *skb)
  38. {
  39. struct device *dma_dev = bgmac->core->dma_dev;
  40. struct net_device *net_dev = bgmac->net_dev;
  41. - struct bgmac_dma_desc *dma_desc;
  42. - struct bgmac_slot_info *slot;
  43. - u32 ctl0, ctl1;
  44. + struct bgmac_slot_info *slot = &ring->slots[ring->end];
  45. int free_slots;
  46. + int nr_frags;
  47. + u32 flags;
  48. + int index = ring->end;
  49. + int i;
  50. if (skb->len > BGMAC_DESC_CTL1_LEN) {
  51. bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
  52. - goto err_stop_drop;
  53. + goto err_drop;
  54. }
  55. + if (skb->ip_summed == CHECKSUM_PARTIAL)
  56. + skb_checksum_help(skb);
  57. +
  58. + nr_frags = skb_shinfo(skb)->nr_frags;
  59. +
  60. if (ring->start <= ring->end)
  61. free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
  62. else
  63. free_slots = ring->start - ring->end;
  64. - if (free_slots == 1) {
  65. +
  66. + if (free_slots <= nr_frags + 1) {
  67. bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
  68. netif_stop_queue(net_dev);
  69. return NETDEV_TX_BUSY;
  70. }
  71. - slot = &ring->slots[ring->end];
  72. - slot->skb = skb;
  73. - slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
  74. + slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
  75. DMA_TO_DEVICE);
  76. - if (dma_mapping_error(dma_dev, slot->dma_addr)) {
  77. - bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
  78. - ring->mmio_base);
  79. - goto err_stop_drop;
  80. - }
  81. + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
  82. + goto err_dma_head;
  83. - ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
  84. - if (ring->end == ring->num_slots - 1)
  85. - ctl0 |= BGMAC_DESC_CTL0_EOT;
  86. - ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
  87. + flags = BGMAC_DESC_CTL0_SOF;
  88. + if (!nr_frags)
  89. + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
  90. +
  91. + bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
  92. + flags = 0;
  93. +
  94. + for (i = 0; i < nr_frags; i++) {
  95. + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
  96. + int len = skb_frag_size(frag);
  97. +
  98. + index = (index + 1) % BGMAC_TX_RING_SLOTS;
  99. + slot = &ring->slots[index];
  100. + slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
  101. + len, DMA_TO_DEVICE);
  102. + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
  103. + goto err_dma;
  104. - dma_desc = ring->cpu_base;
  105. - dma_desc += ring->end;
  106. - dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
  107. - dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
  108. - dma_desc->ctl0 = cpu_to_le32(ctl0);
  109. - dma_desc->ctl1 = cpu_to_le32(ctl1);
  110. + if (i == nr_frags - 1)
  111. + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
  112. +
  113. + bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
  114. + }
  115. +
  116. + slot->skb = skb;
  117. netdev_sent_queue(net_dev, skb->len);
  118. @@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
  119. /* Increase ring->end to point empty slot. We tell hardware the first
  120. * slot it should *not* read.
  121. */
  122. - if (++ring->end >= BGMAC_TX_RING_SLOTS)
  123. - ring->end = 0;
  124. + ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
  125. bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
  126. ring->index_base +
  127. ring->end * sizeof(struct bgmac_dma_desc));
  128. - /* Always keep one slot free to allow detecting bugged calls. */
  129. - if (--free_slots == 1)
  130. + free_slots -= nr_frags + 1;
  131. + if (free_slots < 8)
  132. netif_stop_queue(net_dev);
  133. return NETDEV_TX_OK;
  134. -err_stop_drop:
  135. - netif_stop_queue(net_dev);
  136. +err_dma:
  137. + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
  138. + DMA_TO_DEVICE);
  139. +
  140. + while (i > 0) {
  141. + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
  142. + struct bgmac_slot_info *slot = &ring->slots[index];
  143. + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
  144. + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
  145. +
  146. + dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
  147. + }
  148. +
  149. +err_dma_head:
  150. + bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
  151. + ring->mmio_base);
  152. +
  153. +err_drop:
  154. dev_kfree_skb(skb);
  155. return NETDEV_TX_OK;
  156. }
  157. @@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
  158. while (ring->start != empty_slot) {
  159. struct bgmac_slot_info *slot = &ring->slots[ring->start];
  160. + u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
  161. + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
  162. - if (slot->skb) {
  163. + if (!slot->dma_addr) {
  164. + bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
  165. + ring->start, ring->end);
  166. + goto next;
  167. + }
  168. +
  169. + if (ctl1 & BGMAC_DESC_CTL0_SOF)
  170. /* Unmap no longer used buffer */
  171. - dma_unmap_single(dma_dev, slot->dma_addr,
  172. - slot->skb->len, DMA_TO_DEVICE);
  173. - slot->dma_addr = 0;
  174. + dma_unmap_single(dma_dev, slot->dma_addr, len,
  175. + DMA_TO_DEVICE);
  176. + else
  177. + dma_unmap_page(dma_dev, slot->dma_addr, len,
  178. + DMA_TO_DEVICE);
  179. + if (slot->skb) {
  180. bytes_compl += slot->skb->len;
  181. pkts_compl++;
  182. /* Free memory! :) */
  183. dev_kfree_skb(slot->skb);
  184. slot->skb = NULL;
  185. - } else {
  186. - bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
  187. - ring->start, ring->end);
  188. }
  189. +next:
  190. + slot->dma_addr = 0;
  191. if (++ring->start >= BGMAC_TX_RING_SLOTS)
  192. ring->start = 0;
  193. freed = true;
  194. }
  195. + if (!pkts_compl)
  196. + return;
  197. +
  198. netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
  199. - if (freed && netif_queue_stopped(bgmac->net_dev))
  200. + if (netif_queue_stopped(bgmac->net_dev))
  201. netif_wake_queue(bgmac->net_dev);
  202. }
  203. @@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
  204. struct bgmac_dma_ring *ring)
  205. {
  206. struct device *dma_dev = bgmac->core->dma_dev;
  207. + struct bgmac_dma_desc *dma_desc = ring->cpu_base;
  208. struct bgmac_slot_info *slot;
  209. int i;
  210. for (i = 0; i < ring->num_slots; i++) {
  211. + int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
  212. +
  213. slot = &ring->slots[i];
  214. - if (slot->skb) {
  215. - if (slot->dma_addr)
  216. - dma_unmap_single(dma_dev, slot->dma_addr,
  217. - slot->skb->len, DMA_TO_DEVICE);
  218. - dev_kfree_skb(slot->skb);
  219. - }
  220. + dev_kfree_skb(slot->skb);
  221. +
  222. + if (!slot->dma_addr)
  223. + continue;
  224. +
  225. + if (slot->skb)
  226. + dma_unmap_single(dma_dev, slot->dma_addr,
  227. + len, DMA_TO_DEVICE);
  228. + else
  229. + dma_unmap_page(dma_dev, slot->dma_addr,
  230. + len, DMA_TO_DEVICE);
  231. }
  232. }
  233. @@ -1588,6 +1662,10 @@ static int bgmac_probe(struct bcma_devic
  234. goto err_dma_free;
  235. }
  236. + net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
  237. + net_dev->hw_features = net_dev->features;
  238. + net_dev->vlan_features = net_dev->features;
  239. +
  240. err = register_netdev(bgmac->net_dev);
  241. if (err) {
  242. bgmac_err(bgmac, "Cannot register net device\n");