On Fri, Jul 25, 2025 at 6:52 PM Larysa Zaremba <larysa.zaremba@xxxxxxxxx> wrote: > > On Sun, Jul 20, 2025 at 05:11:22PM +0800, Jason Xing wrote: > > From: Jason Xing <kernelxing@xxxxxxxxxxx> > > > > Like what i40e driver initially did in commit 3106c580fb7cf > > ("i40e: Use batched xsk Tx interfaces to increase performance"), use > > the batched xsk feature to transmit packets. > > > > Signed-off-by: Jason Xing <kernelxing@xxxxxxxxxxx> > > --- > > drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 106 +++++++++++++------ > > 1 file changed, 72 insertions(+), 34 deletions(-) > > > > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > > index f3d3f5c1cdc7..9fe2c4bf8bc5 100644 > > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c > > @@ -2,12 +2,15 @@ > > /* Copyright(c) 2018 Intel Corporation. */ > > > > #include <linux/bpf_trace.h> > > +#include <linux/unroll.h> > > #include <net/xdp_sock_drv.h> > > #include <net/xdp.h> > > > > #include "ixgbe.h" > > #include "ixgbe_txrx_common.h" > > > > +#define PKTS_PER_BATCH 4 > > + > > struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter, > > struct ixgbe_ring *ring) > > { > > @@ -388,58 +391,93 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring) > > } > > } > > > > -static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) > > +static void ixgbe_set_rs_bit(struct ixgbe_ring *xdp_ring) > > +{ > > + u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; > > + union ixgbe_adv_tx_desc *tx_desc; > > + > > + tx_desc = IXGBE_TX_DESC(xdp_ring, ntu); > > + tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD_RS); > > +} > > + > > +static void ixgbe_xmit_pkt(struct ixgbe_ring *xdp_ring, struct xdp_desc *desc, > > + int i) > > + > > `i` parameter seems redundant here, why not just pass desc + i as a parameter? Let me resolve this :) Thanks, Jason > > > { > > struct xsk_buff_pool *pool = xdp_ring->xsk_pool; > > union ixgbe_adv_tx_desc *tx_desc = NULL; > > struct ixgbe_tx_buffer *tx_bi; > > - struct xdp_desc desc; > > dma_addr_t dma; > > u32 cmd_type; > > > > - if (!budget) > > - return true; > > + dma = xsk_buff_raw_get_dma(pool, desc[i].addr); > > + xsk_buff_raw_dma_sync_for_device(pool, dma, desc[i].len); > > > > - while (likely(budget)) { > > - if (!netif_carrier_ok(xdp_ring->netdev)) > > - break; > > + tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use]; > > + tx_bi->bytecount = desc[i].len; > > + tx_bi->xdpf = NULL; > > + tx_bi->gso_segs = 1; > > > > - if (!xsk_tx_peek_desc(pool, &desc)) > > - break; > > + tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use); > > + tx_desc->read.buffer_addr = cpu_to_le64(dma); > > > > - dma = xsk_buff_raw_get_dma(pool, desc.addr); > > - xsk_buff_raw_dma_sync_for_device(pool, dma, desc.len); > > + cmd_type = IXGBE_ADVTXD_DTYP_DATA | > > + IXGBE_ADVTXD_DCMD_DEXT | > > + IXGBE_ADVTXD_DCMD_IFCS; > > + cmd_type |= desc[i].len | IXGBE_TXD_CMD_EOP; > > + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); > > + tx_desc->read.olinfo_status = > > + cpu_to_le32(desc[i].len << IXGBE_ADVTXD_PAYLEN_SHIFT); > > > > - tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use]; > > - tx_bi->bytecount = desc.len; > > - tx_bi->xdpf = NULL; > > - tx_bi->gso_segs = 1; > > + xdp_ring->next_to_use++; > > +} > > > > - tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use); > > - tx_desc->read.buffer_addr = cpu_to_le64(dma); > > +static void ixgbe_xmit_pkt_batch(struct ixgbe_ring *xdp_ring, struct xdp_desc *desc) > > +{ > > + u32 i; > > > > - /* put descriptor type bits */ > > - cmd_type = IXGBE_ADVTXD_DTYP_DATA | > > - IXGBE_ADVTXD_DCMD_DEXT | > > - IXGBE_ADVTXD_DCMD_IFCS; > > - cmd_type |= desc.len | IXGBE_TXD_CMD; > > - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); > > - tx_desc->read.olinfo_status = > > - cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT); > > + unrolled_count(PKTS_PER_BATCH) > > + for (i = 0; i < PKTS_PER_BATCH; i++) > > + ixgbe_xmit_pkt(xdp_ring, desc, i); > > +} > > > > - xdp_ring->next_to_use++; > > - if (xdp_ring->next_to_use == xdp_ring->count) > > - xdp_ring->next_to_use = 0; > > +static void ixgbe_fill_tx_hw_ring(struct ixgbe_ring *xdp_ring, > > + struct xdp_desc *descs, u32 nb_pkts) > > +{ > > + u32 batched, leftover, i; > > + > > + batched = nb_pkts & ~(PKTS_PER_BATCH - 1); > > + leftover = nb_pkts & (PKTS_PER_BATCH - 1); > > + for (i = 0; i < batched; i += PKTS_PER_BATCH) > > + ixgbe_xmit_pkt_batch(xdp_ring, &descs[i]); > > + for (i = batched; i < batched + leftover; i++) > > + ixgbe_xmit_pkt(xdp_ring, &descs[i], 0); > > +} > > > > - budget--; > > - } > > +static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) > > +{ > > + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > > + u32 nb_pkts, nb_processed = 0; > > > > - if (tx_desc) { > > - ixgbe_xdp_ring_update_tail(xdp_ring); > > - xsk_tx_release(pool); > > + if (!netif_carrier_ok(xdp_ring->netdev)) > > + return true; > > + > > + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > > + if (!nb_pkts) > > + return true; > > + > > + if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > > + nb_processed = xdp_ring->count - xdp_ring->next_to_use; > > + ixgbe_fill_tx_hw_ring(xdp_ring, descs, nb_processed); > > + xdp_ring->next_to_use = 0; > > } > > > > - return !!budget; > > + ixgbe_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed); > > + > > + ixgbe_set_rs_bit(xdp_ring); > > + ixgbe_xdp_ring_update_tail(xdp_ring); > > + > > + return nb_pkts < budget; > > } > > > > static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring, > > -- > > 2.41.3 > > > >