Hi Florian, On Fri, Aug 22, 2025 at 10:15:37AM +0200, Florian Westphal wrote: [...] > diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c > index 266d0c637225..cc302543c2e4 100644 > --- a/net/netfilter/nft_set_hash.c > +++ b/net/netfilter/nft_set_hash.c > @@ -30,6 +30,7 @@ struct nft_rhash { > struct nft_rhash_elem { > struct nft_elem_priv priv; > struct rhash_head node; > + struct llist_node walk_node; > u32 wq_gc_seq; > struct nft_set_ext ext; > }; > @@ -144,6 +145,7 @@ nft_rhash_update(struct nft_set *set, const u32 *key, > goto err1; > > he = nft_elem_priv_cast(elem_priv); > + init_llist_node(&he->walk_node); > prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, > nft_rhash_params); > if (IS_ERR(prev)) > @@ -180,6 +182,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, > }; > struct nft_rhash_elem *prev; > > + init_llist_node(&he->walk_node); > prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, > nft_rhash_params); > if (IS_ERR(prev)) > @@ -261,12 +264,12 @@ static bool nft_rhash_delete(const struct nft_set *set, > return true; > } > > -static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, > - struct nft_set_iter *iter) > +static void nft_rhash_walk_ro(const struct nft_ctx *ctx, struct nft_set *set, > + struct nft_set_iter *iter) > { > struct nft_rhash *priv = nft_set_priv(set); > - struct nft_rhash_elem *he; > struct rhashtable_iter hti; > + struct nft_rhash_elem *he; > > rhashtable_walk_enter(&priv->ht, &hti); > rhashtable_walk_start(&hti); > @@ -295,6 +298,99 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, > rhashtable_walk_exit(&hti); > } > > +static void nft_rhash_walk_update(const struct nft_ctx *ctx, > + struct nft_set *set, > + struct nft_set_iter *iter) > +{ > + struct nft_rhash *priv = nft_set_priv(set); > + struct nft_rhash_elem *he, *tmp; > + struct llist_node *first_node; > + struct rhashtable_iter hti; > + LLIST_HEAD(walk_list); > + > + lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); > + > + if (set->in_update_walk) { > + /* This can happen with bogus rulesets during ruleset validation > + * when a verdict map causes a jump back to the same map. > + * > + * Without this extra check the walk_next loop below will see > + * elems on the callers walk_list and skip (not validate) them. > + */ > + iter->err = -EMLINK; > + return; > + } > + > + /* walk happens under RCU. > + * > + * We create a snapshot list so ->iter callback can sleep. > + * commit_mutex is held, elements can ... > + * .. be added in parallel from dataplane (dynset) > + * .. be marked as dead in parallel from dataplane (dynset). > + * .. be queued for removal in parallel (gc timeout). > + * .. not be freed: transaction mutex is held. > + */ > + rhashtable_walk_enter(&priv->ht, &hti); > + rhashtable_walk_start(&hti); > + > + while ((he = rhashtable_walk_next(&hti))) { > + if (IS_ERR(he)) { > + if (PTR_ERR(he) != -EAGAIN) { > + iter->err = PTR_ERR(he); > + break; > + } > + > + continue; > + } > + > + /* rhashtable resized during walk, skip */ > + if (llist_on_list(&he->walk_node)) > + continue; > + > + if (iter->count < iter->skip) { > + iter->count++; > + continue; > + } Not causing any harm, but is iter->count useful for this NFT_ITER_UPDATE variant? I think iter->count is only used for netlink dumps, to resume from the last netlink message. > + llist_add(&he->walk_node, &walk_list); > + } > + rhashtable_walk_stop(&hti); > + rhashtable_walk_exit(&hti); > + > + first_node = __llist_del_all(&walk_list); > + set->in_update_walk = true; > + llist_for_each_entry_safe(he, tmp, first_node, walk_node) { > + if (iter->err == 0) { > + iter->err = iter->fn(ctx, set, iter, &he->priv); > + if (iter->err == 0) > + iter->count++; > + } > + > + /* all entries must be cleared again, else next ->walk iteration > + * will skip entries. > + */ > + init_llist_node(&he->walk_node); > + } > + set->in_update_walk = false; > +} > + > +static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, > + struct nft_set_iter *iter) > +{ > + switch (iter->type) { > + case NFT_ITER_UPDATE: > + nft_rhash_walk_update(ctx, set, iter); > + break; > + case NFT_ITER_READ: > + nft_rhash_walk_ro(ctx, set, iter); > + break; > + default: > + iter->err = -EINVAL; > + WARN_ON_ONCE(1); > + break; > + } > +} > + > static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, > struct nft_set_ext *ext) > {