On Fri, 15 Aug 2025 16:36:58 +0200 Florian Westphal <fw@xxxxxxxxx> wrote: > Always prefer the avx2 implementation if its available. > This greatly improves insertion performance (each insertion > checks if the new element would overlap with an existing one): > > time nft -f - <<EOF > table ip pipapo { > set s { > typeof ip saddr . tcp dport > flags interval > size 800000 > elements = { 10.1.1.1 - 10.1.1.4 . 3996, > [.. 800k entries elided .. ] > > before: > real 1m55.993s > user 0m2.505s > sys 1m53.296s > > after: > real 0m42.586s > user 0m2.554s > sys 0m39.811s > > Signed-off-by: Florian Westphal <fw@xxxxxxxxx> > --- > net/netfilter/nft_set_pipapo.c | 47 ++++++++++++++++++++++++++--- > net/netfilter/nft_set_pipapo_avx2.c | 6 ++-- > net/netfilter/nft_set_pipapo_avx2.h | 4 +++ > 3 files changed, 49 insertions(+), 8 deletions(-) > > diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c > index 9a10251228fd..affa473b13a6 100644 > --- a/net/netfilter/nft_set_pipapo.c > +++ b/net/netfilter/nft_set_pipapo.c > @@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, > } > > /** > - * pipapo_get() - Get matching element reference given key data > + * pipapo_get_slow() - Get matching element reference given key data > * @m: storage containing the set elements > * @data: Key data to be matched against existing elements > * @genmask: If set, check that element is active in given genmask > @@ -414,9 +414,9 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules, > * > * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. > */ > -static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, > - const u8 *data, u8 genmask, > - u64 tstamp) > +static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, > + const u8 *data, u8 genmask, > + u64 tstamp) > { > struct nft_pipapo_scratch *scratch; > unsigned long *res_map, *fill_map; > @@ -502,6 +502,43 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, > return NULL; > } > > +/** > + * pipapo_get() - Get matching element reference given key data > + * @m: storage containing the set elements > + * @data: Key data to be matched against existing elements > + * @genmask: If set, check that element is active in given genmask > + * @tstamp: timestamp to check for expired elements > + * > + * This is a dispatcher function, either calling out the generic C > + * implementation or, if available, the avx2 one. > + * This helper is only called from the control plane, with either rcu Nit: AVX2, RCU > + * read lock or transaction mutex held. > + * > + * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. > + */ > +static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m, > + const u8 *data, u8 genmask, > + u64 tstamp) > +{ > + struct nft_pipapo_elem *e; > + > + local_bh_disable(); > + > +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) > + if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && I don't have any straightforward idea on how to avoid introducing AVX2 stuff (even if compiled out) in the generic function, which we had managed to avoid so far. I don't think it's a big deal, though. > + irq_fpu_usable()) { > + kernel_fpu_begin_mask(0); > + e = pipapo_get_avx2(m, data, genmask, tstamp); > + kernel_fpu_end(); > + local_bh_enable(); > + return e; > + } > +#endif > + e = pipapo_get_slow(m, data, genmask, tstamp); > + local_bh_enable(); > + return e; > +} > + > /** > * nft_pipapo_lookup() - Dataplane fronted for main lookup function > * @net: Network namespace > @@ -523,7 +560,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set, > const struct nft_pipapo_elem *e; > > m = rcu_dereference(priv->match); > - e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64()); > + e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64()); > > return e ? &e->ext : NULL; > } > diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c > index d512877cc211..7a9900f8ce2f 100644 > --- a/net/netfilter/nft_set_pipapo_avx2.c > +++ b/net/netfilter/nft_set_pipapo_avx2.c > @@ -1149,9 +1149,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns > * > * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise. > */ > -static struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, > - const u8 *data, u8 genmask, > - u64 tstamp) > +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, > + const u8 *data, u8 genmask, > + u64 tstamp) > { > struct nft_pipapo_scratch *scratch; > const struct nft_pipapo_field *f; > diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h > index dbb6aaca8a7a..c2999b63da3f 100644 > --- a/net/netfilter/nft_set_pipapo_avx2.h > +++ b/net/netfilter/nft_set_pipapo_avx2.h > @@ -5,8 +5,12 @@ > #include <asm/fpu/xstate.h> > #define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE) > > +struct nft_pipapo_match; > bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, > struct nft_set_estimate *est); > +struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, > + const u8 *data, u8 genmask, > + u64 tstamp); > #endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */ > > #endif /* _NFT_SET_PIPAPO_AVX2_H */ -- Stefano