xsk.c (10201B)
1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2/* Copyright (C) 2018 Netronome Systems, Inc */ 3/* Copyright (C) 2021 Corigine, Inc */ 4 5#include <linux/bpf_trace.h> 6#include <linux/netdevice.h> 7 8#include "../nfp_app.h" 9#include "../nfp_net.h" 10#include "../nfp_net_dp.h" 11#include "../nfp_net_xsk.h" 12#include "nfd3.h" 13 14static bool 15nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 16 struct nfp_net_rx_ring *rx_ring, 17 struct nfp_net_tx_ring *tx_ring, 18 struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, 19 int pkt_off) 20{ 21 struct xsk_buff_pool *pool = r_vec->xsk_pool; 22 struct nfp_nfd3_tx_buf *txbuf; 23 struct nfp_nfd3_tx_desc *txd; 24 unsigned int wr_idx; 25 26 if (nfp_net_tx_space(tx_ring) < 1) 27 return false; 28 29 xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, 30 pkt_len); 31 32 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 33 34 txbuf = &tx_ring->txbufs[wr_idx]; 35 txbuf->xdp = xrxbuf->xdp; 36 txbuf->real_len = pkt_len; 37 txbuf->is_xsk_tx = true; 38 39 /* Build TX descriptor */ 40 txd = &tx_ring->txds[wr_idx]; 41 txd->offset_eop = NFD3_DESC_TX_EOP; 42 txd->dma_len = cpu_to_le16(pkt_len); 43 nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off); 44 txd->data_len = cpu_to_le16(pkt_len); 45 46 txd->flags = 0; 47 txd->mss = 0; 48 txd->lso_hdrlen = 0; 49 50 tx_ring->wr_ptr_add++; 51 tx_ring->wr_p++; 52 53 return true; 54} 55 56static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, 57 const struct nfp_net_rx_desc *rxd, 58 struct nfp_net_xsk_rx_buf *xrxbuf, 59 const struct nfp_meta_parsed *meta, 60 unsigned int pkt_len, 61 bool meta_xdp, 62 unsigned int *skbs_polled) 63{ 64 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 65 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 66 struct net_device *netdev; 67 struct sk_buff *skb; 68 69 if (likely(!meta->portid)) { 70 netdev = dp->netdev; 71 } else { 72 struct nfp_net *nn = netdev_priv(dp->netdev); 73 74 netdev = nfp_app_dev_get(nn->app, meta->portid, NULL); 75 if (unlikely(!netdev)) { 76 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 77 return; 78 } 79 nfp_repr_inc_rx_stats(netdev, pkt_len); 80 } 81 82 skb = napi_alloc_skb(&r_vec->napi, pkt_len); 83 if (!skb) { 84 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 85 return; 86 } 87 memcpy(skb_put(skb, pkt_len), xrxbuf->xdp->data, pkt_len); 88 89 skb->mark = meta->mark; 90 skb_set_hash(skb, meta->hash, meta->hash_type); 91 92 skb_record_rx_queue(skb, rx_ring->idx); 93 skb->protocol = eth_type_trans(skb, netdev); 94 95 nfp_nfd3_rx_csum(dp, r_vec, rxd, meta, skb); 96 97 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) 98 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 99 le16_to_cpu(rxd->rxd.vlan)); 100 if (meta_xdp) 101 skb_metadata_set(skb, 102 xrxbuf->xdp->data - xrxbuf->xdp->data_meta); 103 104 napi_gro_receive(&rx_ring->r_vec->napi, skb); 105 106 nfp_net_xsk_rx_free(xrxbuf); 107 108 (*skbs_polled)++; 109} 110 111static unsigned int 112nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, 113 unsigned int *skbs_polled) 114{ 115 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 116 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 117 struct nfp_net_tx_ring *tx_ring; 118 struct bpf_prog *xdp_prog; 119 bool xdp_redir = false; 120 int pkts_polled = 0; 121 122 xdp_prog = READ_ONCE(dp->xdp_prog); 123 tx_ring = r_vec->xdp_ring; 124 125 while (pkts_polled < budget) { 126 unsigned int meta_len, data_len, pkt_len, pkt_off; 127 struct nfp_net_xsk_rx_buf *xrxbuf; 128 struct nfp_net_rx_desc *rxd; 129 struct nfp_meta_parsed meta; 130 int idx, act; 131 132 idx = D_IDX(rx_ring, rx_ring->rd_p); 133 134 rxd = &rx_ring->rxds[idx]; 135 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 136 break; 137 138 rx_ring->rd_p++; 139 pkts_polled++; 140 141 xrxbuf = &rx_ring->xsk_rxbufs[idx]; 142 143 /* If starved of buffers "drop" it and scream. */ 144 if (rx_ring->rd_p >= rx_ring->wr_p) { 145 nn_dp_warn(dp, "Starved of RX buffers\n"); 146 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 147 break; 148 } 149 150 /* Memory barrier to ensure that we won't do other reads 151 * before the DD bit. 152 */ 153 dma_rmb(); 154 155 memset(&meta, 0, sizeof(meta)); 156 157 /* Only supporting AF_XDP with dynamic metadata so buffer layout 158 * is always: 159 * 160 * --------------------------------------------------------- 161 * | off | metadata | packet | XXXX | 162 * --------------------------------------------------------- 163 */ 164 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 165 data_len = le16_to_cpu(rxd->rxd.data_len); 166 pkt_len = data_len - meta_len; 167 168 if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { 169 nn_dp_warn(dp, "Oversized RX packet metadata %u\n", 170 meta_len); 171 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 172 continue; 173 } 174 175 /* Stats update. */ 176 u64_stats_update_begin(&r_vec->rx_sync); 177 r_vec->rx_pkts++; 178 r_vec->rx_bytes += pkt_len; 179 u64_stats_update_end(&r_vec->rx_sync); 180 181 xrxbuf->xdp->data += meta_len; 182 xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; 183 xdp_set_data_meta_invalid(xrxbuf->xdp); 184 xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool); 185 net_prefetch(xrxbuf->xdp->data); 186 187 if (meta_len) { 188 if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, 189 xrxbuf->xdp->data - 190 meta_len, 191 xrxbuf->xdp->data, 192 pkt_len, meta_len))) { 193 nn_dp_warn(dp, "Invalid RX packet metadata\n"); 194 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 195 continue; 196 } 197 198 if (unlikely(meta.portid)) { 199 struct nfp_net *nn = netdev_priv(dp->netdev); 200 201 if (meta.portid != NFP_META_PORT_ID_CTRL) { 202 nfp_nfd3_xsk_rx_skb(rx_ring, rxd, 203 xrxbuf, &meta, 204 pkt_len, false, 205 skbs_polled); 206 continue; 207 } 208 209 nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data, 210 pkt_len); 211 nfp_net_xsk_rx_free(xrxbuf); 212 continue; 213 } 214 } 215 216 act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp); 217 218 pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; 219 pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; 220 221 switch (act) { 222 case XDP_PASS: 223 nfp_nfd3_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len, 224 true, skbs_polled); 225 break; 226 case XDP_TX: 227 if (!nfp_nfd3_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, 228 xrxbuf, pkt_len, pkt_off)) 229 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 230 else 231 nfp_net_xsk_rx_unstash(xrxbuf); 232 break; 233 case XDP_REDIRECT: 234 if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) { 235 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 236 } else { 237 nfp_net_xsk_rx_unstash(xrxbuf); 238 xdp_redir = true; 239 } 240 break; 241 default: 242 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 243 fallthrough; 244 case XDP_ABORTED: 245 trace_xdp_exception(dp->netdev, xdp_prog, act); 246 fallthrough; 247 case XDP_DROP: 248 nfp_net_xsk_rx_drop(r_vec, xrxbuf); 249 break; 250 } 251 } 252 253 nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring); 254 255 if (xdp_redir) 256 xdp_do_flush_map(); 257 258 if (tx_ring->wr_ptr_add) 259 nfp_net_tx_xmit_more_flush(tx_ring); 260 261 return pkts_polled; 262} 263 264void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf) 265{ 266 xsk_buff_free(txbuf->xdp); 267 268 txbuf->dma_addr = 0; 269 txbuf->xdp = NULL; 270} 271 272static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring *tx_ring) 273{ 274 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 275 u32 done_pkts = 0, done_bytes = 0, reused = 0; 276 bool done_all; 277 int idx, todo; 278 u32 qcp_rd_p; 279 280 if (tx_ring->wr_p == tx_ring->rd_p) 281 return true; 282 283 /* Work out how many descriptors have been transmitted. */ 284 qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); 285 286 if (qcp_rd_p == tx_ring->qcp_rd_p) 287 return true; 288 289 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 290 291 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 292 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 293 294 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); 295 296 done_pkts = todo; 297 while (todo--) { 298 struct nfp_nfd3_tx_buf *txbuf; 299 300 idx = D_IDX(tx_ring, tx_ring->rd_p); 301 tx_ring->rd_p++; 302 303 txbuf = &tx_ring->txbufs[idx]; 304 if (unlikely(!txbuf->real_len)) 305 continue; 306 307 done_bytes += txbuf->real_len; 308 txbuf->real_len = 0; 309 310 if (txbuf->is_xsk_tx) { 311 nfp_nfd3_xsk_tx_free(txbuf); 312 reused++; 313 } 314 } 315 316 u64_stats_update_begin(&r_vec->tx_sync); 317 r_vec->tx_bytes += done_bytes; 318 r_vec->tx_pkts += done_pkts; 319 u64_stats_update_end(&r_vec->tx_sync); 320 321 xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused); 322 323 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 324 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 325 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 326 327 return done_all; 328} 329 330static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring) 331{ 332 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 333 struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; 334 struct xsk_buff_pool *xsk_pool; 335 struct nfp_nfd3_tx_desc *txd; 336 u32 pkts = 0, wr_idx; 337 u32 i, got; 338 339 xsk_pool = r_vec->xsk_pool; 340 341 while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { 342 for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) 343 if (!xsk_tx_peek_desc(xsk_pool, &desc[i])) 344 break; 345 got = i; 346 if (!got) 347 break; 348 349 wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); 350 prefetchw(&tx_ring->txds[wr_idx]); 351 352 for (i = 0; i < got; i++) 353 xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr, 354 desc[i].len); 355 356 for (i = 0; i < got; i++) { 357 wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); 358 359 tx_ring->txbufs[wr_idx].real_len = desc[i].len; 360 tx_ring->txbufs[wr_idx].is_xsk_tx = false; 361 362 /* Build TX descriptor. */ 363 txd = &tx_ring->txds[wr_idx]; 364 nfp_desc_set_dma_addr(txd, 365 xsk_buff_raw_get_dma(xsk_pool, 366 desc[i].addr 367 )); 368 txd->offset_eop = NFD3_DESC_TX_EOP; 369 txd->dma_len = cpu_to_le16(desc[i].len); 370 txd->data_len = cpu_to_le16(desc[i].len); 371 } 372 373 tx_ring->wr_p += got; 374 pkts += got; 375 } 376 377 if (!pkts) 378 return; 379 380 xsk_tx_release(xsk_pool); 381 /* Ensure all records are visible before incrementing write counter. */ 382 wmb(); 383 nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts); 384} 385 386int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget) 387{ 388 struct nfp_net_r_vector *r_vec = 389 container_of(napi, struct nfp_net_r_vector, napi); 390 unsigned int pkts_polled, skbs = 0; 391 392 pkts_polled = nfp_nfd3_xsk_rx(r_vec->rx_ring, budget, &skbs); 393 394 if (pkts_polled < budget) { 395 if (r_vec->tx_ring) 396 nfp_nfd3_tx_complete(r_vec->tx_ring, budget); 397 398 if (!nfp_nfd3_xsk_complete(r_vec->xdp_ring)) 399 pkts_polled = budget; 400 401 nfp_nfd3_xsk_tx(r_vec->xdp_ring); 402 403 if (pkts_polled < budget && napi_complete_done(napi, skbs)) 404 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 405 } 406 407 return pkts_polled; 408}