From: Jason Xing <kernelxing@xxxxxxxxxxx> For some applications, it's quite useful to let users have the chance to tune the max budget, like accelerating transmission, when xsk is sending packets. Exposing such a knob also helps auto/AI tuning in the long run. The patch unifies two definitions into one that is 32 by default and makes the sysctl knob namespecified. Signed-off-by: Jason Xing <kernelxing@xxxxxxxxxxx> --- v2 Link: https://lore.kernel.org/all/20250617002236.30557-1-kerneljasonxing@xxxxxxxxx/ 1. use a per-netns sysctl knob 2. use sysctl_xsk_max_tx_budget to unify both definitions. --- include/net/netns/core.h | 1 + include/net/xdp_sock.h | 2 +- net/core/net_namespace.c | 1 + net/core/sysctl_net_core.c | 8 ++++++++ net/xdp/xsk.c | 12 ++++++------ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 9b36f0ff0c20..f1ff15fd0032 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -14,6 +14,7 @@ struct netns_core { int sysctl_somaxconn; int sysctl_optmem_max; + int sysctl_xsk_max_tx_budget; u8 sysctl_txrehash; u8 sysctl_tstamp_allow_data; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index e8bd6ddb7b12..57b26ad12aa1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -65,7 +65,7 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; /* record the number of tx descriptors sent by this xsk and - * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs + * when it exceeds sysctl_xsk_max_tx_budget, an opportunity needs * to be given to other xsks for sending tx descriptors, thereby * preventing other XSKs from being starved. */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ae54f26709ca..890f8dc28690 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -396,6 +396,7 @@ static __net_init void preinit_net_sysctl(struct net *net) net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; net->core.sysctl_tstamp_allow_data = 1; + net->core.sysctl_xsk_max_tx_budget = 32; } /* init code that must occur even if setup_net() is not called. */ diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5dbb2c6f371d..a51d9c7246ee 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -667,6 +667,14 @@ static struct ctl_table netns_core_table[] = { .extra1 = SYSCTL_ZERO, .proc_handler = proc_dointvec_minmax }, + { + .procname = "xsk_max_tx_budget", + .data = &init_net.core.sysctl_xsk_max_tx_budget, + .maxlen = sizeof(int), + .mode = 0644, + .extra1 = SYSCTL_ONE, + .proc_handler = proc_dointvec_minmax + }, { .procname = "txrehash", .data = &init_net.core.sysctl_txrehash, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72c000c0ae5f..15df133b50d7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -33,9 +33,6 @@ #include "xdp_umem.h" #include "xsk.h" -#define TX_BATCH_SIZE 32 -#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) - void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -424,7 +421,10 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) rcu_read_lock(); again: list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { - if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) { + struct sock *sk = (struct sock *)xs; + int max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget); + + if (xs->tx_budget_spent >= max_budget) { budget_exhausted = true; continue; } @@ -778,8 +778,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, static int __xsk_generic_xmit(struct sock *sk) { + u32 max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget); struct xdp_sock *xs = xdp_sk(sk); - u32 max_batch = TX_BATCH_SIZE; bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; @@ -797,7 +797,7 @@ static int __xsk_generic_xmit(struct sock *sk) goto out; while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) { - if (max_batch-- == 0) { + if (max_budget-- == 0) { err = -EAGAIN; goto out; } -- 2.43.5