On Thu, Aug 21, 2025 at 9:08 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote: > > Introduce BPF_F_ALL_CPUS flag support for percpu_hash and lru_percpu_hash > maps to allow updating values for all CPUs with a single value. > > Introduce BPF_F_CPU flag support for percpu_hash and lru_percpu_hash > maps to allow updating value for specified CPU. > > This enhancement enables: > > * Efficient update values across all CPUs with a single value when > BPF_F_ALL_CPUS is set for update_elem and update_batch APIs. > * Targeted update or lookup for a specified CPU when BPF_F_CPU is set. > > The BPF_F_CPU flag is passed via: > > * map_flags of lookup_elem and update_elem APIs along with embedded cpu > field. > * elem_flags of lookup_batch and update_batch APIs along with embedded > cpu field. > > Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> > --- > include/linux/bpf.h | 54 +++++++++++++++++++- > kernel/bpf/arraymap.c | 29 ++++------- > kernel/bpf/hashtab.c | 111 +++++++++++++++++++++++++++++------------- > kernel/bpf/syscall.c | 30 +++--------- > 4 files changed, 147 insertions(+), 77 deletions(-) > [...] > @@ -397,22 +395,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, > u64 map_flags) > { > struct bpf_array *array = container_of(map, struct bpf_array, map); > - const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; > u32 index = *(u32 *)key; > void __percpu *pptr; > + int off = 0, err; > u32 size, cpu; > - int off = 0; > - > - if (unlikely((u32)map_flags > BPF_F_ALL_CPUS)) > - /* unknown flags */ > - return -EINVAL; > - if (unlikely((map_flags & cpu_flags) == cpu_flags)) > - return -EINVAL; > > - cpu = map_flags >> 32; > - if (unlikely((map_flags & BPF_F_CPU) && cpu >= num_possible_cpus())) > - /* invalid cpu */ > - return -ERANGE; > + err = bpf_map_check_cpu_flags(map_flags, true); > + if (unlikely(err)) > + return err; again, unnecessary churn, why not add this function in previous patch when you add cpu flags ? > > if (unlikely(index >= array->map.max_entries)) > /* all elements were pre-allocated, cannot insert a new one */ > @@ -432,6 +422,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, > rcu_read_lock(); > pptr = array->pptrs[index & array->index_mask]; > if (map_flags & BPF_F_CPU) { > + cpu = map_flags >> 32; > copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value); > bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu)); > } else { > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index 71f9931ac64cd..34a35cdade425 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -937,24 +937,39 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) > } > > static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, > - void *value, bool onallcpus) > + void *value, bool onallcpus, u64 map_flags) > { > + int cpu = map_flags & BPF_F_CPU ? map_flags >> 32 : 0; > + int current_cpu = raw_smp_processor_id(); > + > if (!onallcpus) { > /* copy true value_size bytes */ > - copy_map_value(&htab->map, this_cpu_ptr(pptr), value); > + copy_map_value(&htab->map, (map_flags & BPF_F_CPU) && cpu != current_cpu ? > + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); is there any benefit to this cpu == current_cpu special casing? Wouldn't per_cpu_ptr() do the right thing even if cpu == current_cpu? > } else { > u32 size = round_up(htab->map.value_size, 8); > - int off = 0, cpu; > + int off = 0; > + > + if (map_flags & BPF_F_CPU) { > + copy_map_value_long(&htab->map, cpu != current_cpu ? > + per_cpu_ptr(pptr, cpu) : this_cpu_ptr(pptr), value); > + return; > + } [...] > @@ -1806,10 +1834,17 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, > void __percpu *pptr; > > pptr = htab_elem_get_ptr(l, map->key_size); > - for_each_possible_cpu(cpu) { > - copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); > - check_and_init_map_value(&htab->map, dst_val + off); > - off += size; > + if (!do_delete && (elem_map_flags & BPF_F_CPU)) { if do_delete is true we can't have BPF_F_CPU set, right? We checked that above, so why all these complications? > + cpu = elem_map_flags >> 32; > + copy_map_value_long(&htab->map, dst_val, per_cpu_ptr(pptr, cpu)); > + check_and_init_map_value(&htab->map, dst_val); > + } else { > + for_each_possible_cpu(cpu) { > + copy_map_value_long(&htab->map, dst_val + off, > + per_cpu_ptr(pptr, cpu)); > + check_and_init_map_value(&htab->map, dst_val + off); > + off += size; > + } > } > } else { > value = htab_elem_value(l, key_size); > @@ -2365,14 +2400,18 @@ static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *k > return NULL; > } > > -int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) > +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) > { > + int ret, cpu, off = 0; > struct htab_elem *l; > void __percpu *pptr; > - int ret = -ENOENT; > - int cpu, off = 0; > u32 size; > > + ret = bpf_map_check_cpu_flags(map_flags, false); > + if (unlikely(ret)) > + return ret; > + ret = -ENOENT; > + > /* per_cpu areas are zero-filled and bpf programs can only > * access 'value_size' of them, so copying rounded areas > * will not leak any kernel data > @@ -2386,10 +2425,16 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) > * eviction heuristics when user space does a map walk. > */ > pptr = htab_elem_get_ptr(l, map->key_size); > - for_each_possible_cpu(cpu) { > - copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); > - check_and_init_map_value(map, value + off); > - off += size; > + if (map_flags & BPF_F_CPU) { > + cpu = map_flags >> 32; > + copy_map_value_long(map, value, per_cpu_ptr(pptr, cpu)); > + check_and_init_map_value(map, value); > + } else { > + for_each_possible_cpu(cpu) { > + copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); > + check_and_init_map_value(map, value + off); > + off += size; > + } > } it feels like this whole logic of copying per-cpu value to/from user should be generic between all per-cpu maps, once we get that `void __percpu *` pointer, no? See if you can extract it as reusable helper (but, you know, without all the per-map type special casing, though it doesn't seem like you should need it, though I might be missing details, of course). One for bpf_percpu_copy_to_user() and another for bpf_percpu_copy_from_user(), which would take into account all these cpu flags? [...]