123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- From: Felix Fietkau <nbd@nbd.name>
- Date: Mon, 23 Mar 2015 02:42:26 +0100
- Subject: [PATCH] bgmac: implement scatter/gather support
- Always use software checksumming, since the hardware does not have any
- checksum offload support.
- This significantly improves local TCP tx performance.
- Signed-off-by: Felix Fietkau <nbd@nbd.name>
- ---
- --- a/drivers/net/ethernet/broadcom/bgmac.c
- +++ b/drivers/net/ethernet/broadcom/bgmac.c
- @@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
- bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
- }
-
- +static void
- +bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
- + int i, int len, u32 ctl0)
- +{
- + struct bgmac_slot_info *slot;
- + struct bgmac_dma_desc *dma_desc;
- + u32 ctl1;
- +
- + if (i == ring->num_slots - 1)
- + ctl0 |= BGMAC_DESC_CTL0_EOT;
- +
- + ctl1 = len & BGMAC_DESC_CTL1_LEN;
- +
- + slot = &ring->slots[i];
- + dma_desc = &ring->cpu_base[i];
- + dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
- + dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
- + dma_desc->ctl0 = cpu_to_le32(ctl0);
- + dma_desc->ctl1 = cpu_to_le32(ctl1);
- +}
- +
- static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
- struct bgmac_dma_ring *ring,
- struct sk_buff *skb)
- {
- struct device *dma_dev = bgmac->core->dma_dev;
- struct net_device *net_dev = bgmac->net_dev;
- - struct bgmac_dma_desc *dma_desc;
- - struct bgmac_slot_info *slot;
- - u32 ctl0, ctl1;
- + struct bgmac_slot_info *slot = &ring->slots[ring->end];
- int free_slots;
- + int nr_frags;
- + u32 flags;
- + int index = ring->end;
- + int i;
-
- if (skb->len > BGMAC_DESC_CTL1_LEN) {
- bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
- - goto err_stop_drop;
- + goto err_drop;
- }
-
- + if (skb->ip_summed == CHECKSUM_PARTIAL)
- + skb_checksum_help(skb);
- +
- + nr_frags = skb_shinfo(skb)->nr_frags;
- +
- if (ring->start <= ring->end)
- free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
- else
- free_slots = ring->start - ring->end;
- - if (free_slots == 1) {
- +
- + if (free_slots <= nr_frags + 1) {
- bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
- netif_stop_queue(net_dev);
- return NETDEV_TX_BUSY;
- }
-
- - slot = &ring->slots[ring->end];
- - slot->skb = skb;
- - slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
- + slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
- DMA_TO_DEVICE);
- - if (dma_mapping_error(dma_dev, slot->dma_addr)) {
- - bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
- - ring->mmio_base);
- - goto err_stop_drop;
- - }
- + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
- + goto err_dma_head;
-
- - ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
- - if (ring->end == ring->num_slots - 1)
- - ctl0 |= BGMAC_DESC_CTL0_EOT;
- - ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
- + flags = BGMAC_DESC_CTL0_SOF;
- + if (!nr_frags)
- + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
- +
- + bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
- + flags = 0;
- +
- + for (i = 0; i < nr_frags; i++) {
- + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
- + int len = skb_frag_size(frag);
- +
- + index = (index + 1) % BGMAC_TX_RING_SLOTS;
- + slot = &ring->slots[index];
- + slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
- + len, DMA_TO_DEVICE);
- + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
- + goto err_dma;
-
- - dma_desc = ring->cpu_base;
- - dma_desc += ring->end;
- - dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
- - dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
- - dma_desc->ctl0 = cpu_to_le32(ctl0);
- - dma_desc->ctl1 = cpu_to_le32(ctl1);
- + if (i == nr_frags - 1)
- + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
- +
- + bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
- + }
- +
- + slot->skb = skb;
-
- netdev_sent_queue(net_dev, skb->len);
-
- @@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
- /* Increase ring->end to point empty slot. We tell hardware the first
- * slot it should *not* read.
- */
- - if (++ring->end >= BGMAC_TX_RING_SLOTS)
- - ring->end = 0;
- + ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
- bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
- ring->index_base +
- ring->end * sizeof(struct bgmac_dma_desc));
-
- - /* Always keep one slot free to allow detecting bugged calls. */
- - if (--free_slots == 1)
- + free_slots -= nr_frags + 1;
- + if (free_slots < 8)
- netif_stop_queue(net_dev);
-
- return NETDEV_TX_OK;
-
- -err_stop_drop:
- - netif_stop_queue(net_dev);
- +err_dma:
- + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
- + DMA_TO_DEVICE);
- +
- + while (i > 0) {
- + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
- + struct bgmac_slot_info *slot = &ring->slots[index];
- + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
- + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
- +
- + dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
- + }
- +
- +err_dma_head:
- + bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
- + ring->mmio_base);
- +
- +err_drop:
- dev_kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- @@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
-
- while (ring->start != empty_slot) {
- struct bgmac_slot_info *slot = &ring->slots[ring->start];
- + u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
- + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
-
- - if (slot->skb) {
- + if (!slot->dma_addr) {
- + bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
- + ring->start, ring->end);
- + goto next;
- + }
- +
- + if (ctl1 & BGMAC_DESC_CTL0_SOF)
- /* Unmap no longer used buffer */
- - dma_unmap_single(dma_dev, slot->dma_addr,
- - slot->skb->len, DMA_TO_DEVICE);
- - slot->dma_addr = 0;
- + dma_unmap_single(dma_dev, slot->dma_addr, len,
- + DMA_TO_DEVICE);
- + else
- + dma_unmap_page(dma_dev, slot->dma_addr, len,
- + DMA_TO_DEVICE);
-
- + if (slot->skb) {
- bytes_compl += slot->skb->len;
- pkts_compl++;
-
- /* Free memory! :) */
- dev_kfree_skb(slot->skb);
- slot->skb = NULL;
- - } else {
- - bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
- - ring->start, ring->end);
- }
-
- +next:
- + slot->dma_addr = 0;
- if (++ring->start >= BGMAC_TX_RING_SLOTS)
- ring->start = 0;
- freed = true;
- }
-
- + if (!pkts_compl)
- + return;
- +
- netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
-
- - if (freed && netif_queue_stopped(bgmac->net_dev))
- + if (netif_queue_stopped(bgmac->net_dev))
- netif_wake_queue(bgmac->net_dev);
- }
-
- @@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
- struct bgmac_dma_ring *ring)
- {
- struct device *dma_dev = bgmac->core->dma_dev;
- + struct bgmac_dma_desc *dma_desc = ring->cpu_base;
- struct bgmac_slot_info *slot;
- int i;
-
- for (i = 0; i < ring->num_slots; i++) {
- + int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
- +
- slot = &ring->slots[i];
- - if (slot->skb) {
- - if (slot->dma_addr)
- - dma_unmap_single(dma_dev, slot->dma_addr,
- - slot->skb->len, DMA_TO_DEVICE);
- - dev_kfree_skb(slot->skb);
- - }
- + dev_kfree_skb(slot->skb);
- +
- + if (!slot->dma_addr)
- + continue;
- +
- + if (slot->skb)
- + dma_unmap_single(dma_dev, slot->dma_addr,
- + len, DMA_TO_DEVICE);
- + else
- + dma_unmap_page(dma_dev, slot->dma_addr,
- + len, DMA_TO_DEVICE);
- }
- }
-
- @@ -1588,6 +1662,10 @@ static int bgmac_probe(struct bcma_devic
- goto err_dma_free;
- }
-
- + net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- + net_dev->hw_features = net_dev->features;
- + net_dev->vlan_features = net_dev->features;
- +
- err = register_netdev(bgmac->net_dev);
- if (err) {
- bgmac_err(bgmac, "Cannot register net device\n");
|