mlxbf_gige_tx.c (8376B)
1// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause 2 3/* Packet transmit logic for Mellanox Gigabit Ethernet driver 4 * 5 * Copyright (C) 2020-2021 NVIDIA CORPORATION & AFFILIATES 6 */ 7 8#include <linux/skbuff.h> 9 10#include "mlxbf_gige.h" 11#include "mlxbf_gige_regs.h" 12 13/* Transmit Initialization 14 * 1) Allocates TX WQE array using coherent DMA mapping 15 * 2) Allocates TX completion counter using coherent DMA mapping 16 */ 17int mlxbf_gige_tx_init(struct mlxbf_gige *priv) 18{ 19 size_t size; 20 21 size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; 22 priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size, 23 &priv->tx_wqe_base_dma, 24 GFP_KERNEL); 25 if (!priv->tx_wqe_base) 26 return -ENOMEM; 27 28 priv->tx_wqe_next = priv->tx_wqe_base; 29 30 /* Write TX WQE base address into MMIO reg */ 31 writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE); 32 33 /* Allocate address for TX completion count */ 34 priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ, 35 &priv->tx_cc_dma, GFP_KERNEL); 36 if (!priv->tx_cc) { 37 dma_free_coherent(priv->dev, size, 38 priv->tx_wqe_base, priv->tx_wqe_base_dma); 39 return -ENOMEM; 40 } 41 42 /* Write TX CC base address into MMIO reg */ 43 writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); 44 45 writeq(ilog2(priv->tx_q_entries), 46 priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2); 47 48 priv->prev_tx_ci = 0; 49 priv->tx_pi = 0; 50 51 return 0; 52} 53 54/* Transmit Deinitialization 55 * This routine will free allocations done by mlxbf_gige_tx_init(), 56 * namely the TX WQE array and the TX completion counter 57 */ 58void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv) 59{ 60 u64 *tx_wqe_addr; 61 size_t size; 62 int i; 63 64 tx_wqe_addr = priv->tx_wqe_base; 65 66 for (i = 0; i < priv->tx_q_entries; i++) { 67 if (priv->tx_skb[i]) { 68 dma_unmap_single(priv->dev, *tx_wqe_addr, 69 priv->tx_skb[i]->len, DMA_TO_DEVICE); 70 dev_kfree_skb(priv->tx_skb[i]); 71 priv->tx_skb[i] = NULL; 72 } 73 tx_wqe_addr += 2; 74 } 75 76 size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries; 77 dma_free_coherent(priv->dev, size, 78 priv->tx_wqe_base, priv->tx_wqe_base_dma); 79 80 dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ, 81 priv->tx_cc, priv->tx_cc_dma); 82 83 priv->tx_wqe_base = NULL; 84 priv->tx_wqe_base_dma = 0; 85 priv->tx_cc = NULL; 86 priv->tx_cc_dma = 0; 87 priv->tx_wqe_next = NULL; 88 writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE); 89 writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS); 90} 91 92/* Function that returns status of TX ring: 93 * 0: TX ring is full, i.e. there are no 94 * available un-used entries in TX ring. 95 * non-null: TX ring is not full, i.e. there are 96 * some available entries in TX ring. 97 * The non-null value is a measure of 98 * how many TX entries are available, but 99 * it is not the exact number of available 100 * entries (see below). 101 * 102 * The algorithm makes the assumption that if 103 * (prev_tx_ci == tx_pi) then the TX ring is empty. 104 * An empty ring actually has (tx_q_entries-1) 105 * entries, which allows the algorithm to differentiate 106 * the case of an empty ring vs. a full ring. 107 */ 108static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv) 109{ 110 unsigned long flags; 111 u16 avail; 112 113 spin_lock_irqsave(&priv->lock, flags); 114 115 if (priv->prev_tx_ci == priv->tx_pi) 116 avail = priv->tx_q_entries - 1; 117 else 118 avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi) 119 % priv->tx_q_entries) - 1; 120 121 spin_unlock_irqrestore(&priv->lock, flags); 122 123 return avail; 124} 125 126bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv) 127{ 128 struct net_device_stats *stats; 129 u16 tx_wqe_index; 130 u64 *tx_wqe_addr; 131 u64 tx_status; 132 u16 tx_ci; 133 134 tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS); 135 if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL) 136 priv->stats.tx_fifo_full++; 137 tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX); 138 stats = &priv->netdev->stats; 139 140 /* Transmit completion logic needs to loop until the completion 141 * index (in SW) equals TX consumer index (from HW). These 142 * parameters are unsigned 16-bit values and the wrap case needs 143 * to be supported, that is TX consumer index wrapped from 0xFFFF 144 * to 0 while TX completion index is still < 0xFFFF. 145 */ 146 for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) { 147 tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries; 148 /* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX 149 * buffer address and the 8 LSB contain information 150 * about the TX WQE. 151 */ 152 tx_wqe_addr = priv->tx_wqe_base + 153 (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS); 154 155 stats->tx_packets++; 156 stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr); 157 158 dma_unmap_single(priv->dev, *tx_wqe_addr, 159 priv->tx_skb[tx_wqe_index]->len, DMA_TO_DEVICE); 160 dev_consume_skb_any(priv->tx_skb[tx_wqe_index]); 161 priv->tx_skb[tx_wqe_index] = NULL; 162 163 /* Ensure completion of updates across all cores */ 164 mb(); 165 } 166 167 /* Since the TX ring was likely just drained, check if TX queue 168 * had previously been stopped and now that there are TX buffers 169 * available the TX queue can be awakened. 170 */ 171 if (netif_queue_stopped(priv->netdev) && 172 mlxbf_gige_tx_buffs_avail(priv)) 173 netif_wake_queue(priv->netdev); 174 175 return true; 176} 177 178/* Function to advance the tx_wqe_next pointer to next TX WQE */ 179void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv) 180{ 181 /* Advance tx_wqe_next pointer */ 182 priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS; 183 184 /* Check if 'next' pointer is beyond end of TX ring */ 185 /* If so, set 'next' back to 'base' pointer of ring */ 186 if (priv->tx_wqe_next == (priv->tx_wqe_base + 187 (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS))) 188 priv->tx_wqe_next = priv->tx_wqe_base; 189} 190 191netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb, 192 struct net_device *netdev) 193{ 194 struct mlxbf_gige *priv = netdev_priv(netdev); 195 long buff_addr, start_dma_page, end_dma_page; 196 struct sk_buff *tx_skb; 197 dma_addr_t tx_buf_dma; 198 unsigned long flags; 199 u64 *tx_wqe_addr; 200 u64 word2; 201 202 /* If needed, linearize TX SKB as hardware DMA expects this */ 203 if (skb->len > MLXBF_GIGE_DEFAULT_BUF_SZ || skb_linearize(skb)) { 204 dev_kfree_skb(skb); 205 netdev->stats.tx_dropped++; 206 return NETDEV_TX_OK; 207 } 208 209 buff_addr = (long)skb->data; 210 start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT; 211 end_dma_page = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT; 212 213 /* Verify that payload pointer and data length of SKB to be 214 * transmitted does not violate the hardware DMA limitation. 215 */ 216 if (start_dma_page != end_dma_page) { 217 /* DMA operation would fail as-is, alloc new aligned SKB */ 218 tx_skb = mlxbf_gige_alloc_skb(priv, skb->len, 219 &tx_buf_dma, DMA_TO_DEVICE); 220 if (!tx_skb) { 221 /* Free original skb, could not alloc new aligned SKB */ 222 dev_kfree_skb(skb); 223 netdev->stats.tx_dropped++; 224 return NETDEV_TX_OK; 225 } 226 227 skb_put_data(tx_skb, skb->data, skb->len); 228 229 /* Free the original SKB */ 230 dev_kfree_skb(skb); 231 } else { 232 tx_skb = skb; 233 tx_buf_dma = dma_map_single(priv->dev, skb->data, 234 skb->len, DMA_TO_DEVICE); 235 if (dma_mapping_error(priv->dev, tx_buf_dma)) { 236 dev_kfree_skb(skb); 237 netdev->stats.tx_dropped++; 238 return NETDEV_TX_OK; 239 } 240 } 241 242 /* Get address of TX WQE */ 243 tx_wqe_addr = priv->tx_wqe_next; 244 245 mlxbf_gige_update_tx_wqe_next(priv); 246 247 /* Put PA of buffer address into first 64-bit word of TX WQE */ 248 *tx_wqe_addr = tx_buf_dma; 249 250 /* Set TX WQE pkt_len appropriately 251 * NOTE: GigE silicon will automatically pad up to 252 * minimum packet length if needed. 253 */ 254 word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK; 255 256 /* Write entire 2nd word of TX WQE */ 257 *(tx_wqe_addr + 1) = word2; 258 259 spin_lock_irqsave(&priv->lock, flags); 260 priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb; 261 priv->tx_pi++; 262 spin_unlock_irqrestore(&priv->lock, flags); 263 264 if (!netdev_xmit_more()) { 265 /* Create memory barrier before write to TX PI */ 266 wmb(); 267 writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX); 268 } 269 270 /* Check if the last TX entry was just used */ 271 if (!mlxbf_gige_tx_buffs_avail(priv)) { 272 /* TX ring is full, inform stack */ 273 netif_stop_queue(netdev); 274 275 /* Since there is no separate "TX complete" interrupt, need 276 * to explicitly schedule NAPI poll. This will trigger logic 277 * which processes TX completions, and will hopefully drain 278 * the TX ring allowing the TX queue to be awakened. 279 */ 280 napi_schedule(&priv->napi); 281 } 282 283 return NETDEV_TX_OK; 284}