On Tue, Jun 24, 2025 at 9:55 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote: > > This patch adds libbpf support for the BPF_F_CPU flag in percpu_array maps, > introducing the following APIs: > > 1. bpf_map_update_elem_opts(): update with struct bpf_map_update_elem_opts > 2. bpf_map_lookup_elem_opts(): lookup with struct bpf_map_lookup_elem_opts > 3. bpf_map__update_elem_opts(): high-level wrapper with input validation > 4. bpf_map__lookup_elem_opts(): high-level wrapper with input validation > > Behavior: > > * If opts->cpu == 0xFFFFFFFF, the update is applied to all CPUs. > * Otherwise, it applies only to the specified CPU. > * Lookup APIs retrieve values from the target CPU when BPF_F_CPU is used. > > Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> > --- > tools/lib/bpf/bpf.c | 37 +++++++++++++++++++++++ > tools/lib/bpf/bpf.h | 35 +++++++++++++++++++++- > tools/lib/bpf/libbpf.c | 56 +++++++++++++++++++++++++++++++++++ > tools/lib/bpf/libbpf.h | 45 ++++++++++++++++++++++++++++ > tools/lib/bpf/libbpf.map | 4 +++ > tools/lib/bpf/libbpf_common.h | 12 ++++++++ > 6 files changed, 188 insertions(+), 1 deletion(-) > > diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c > index 6eb421ccf91b..80f7ea041187 100644 > --- a/tools/lib/bpf/bpf.c > +++ b/tools/lib/bpf/bpf.c > @@ -402,6 +402,24 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, > return libbpf_err_errno(ret); > } > > +int bpf_map_update_elem_opts(int fd, const void *key, const void *value, > + const struct bpf_map_update_elem_opts *opts) > +{ > + const size_t attr_sz = offsetofend(union bpf_attr, cpu); > + union bpf_attr attr; > + int ret; > + > + memset(&attr, 0, attr_sz); > + attr.map_fd = fd; > + attr.key = ptr_to_u64(key); > + attr.value = ptr_to_u64(value); > + attr.flags = OPTS_GET(opts, flags, 0); > + attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); > + > + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); > + return libbpf_err_errno(ret); > +} > + > int bpf_map_lookup_elem(int fd, const void *key, void *value) > { > const size_t attr_sz = offsetofend(union bpf_attr, flags); > @@ -433,6 +451,24 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) > return libbpf_err_errno(ret); > } > > +int bpf_map_lookup_elem_opts(int fd, const void *key, void *value, > + const struct bpf_map_lookup_elem_opts *opts) > +{ > + const size_t attr_sz = offsetofend(union bpf_attr, cpu); > + union bpf_attr attr; > + int ret; > + > + memset(&attr, 0, attr_sz); > + attr.map_fd = fd; > + attr.key = ptr_to_u64(key); > + attr.value = ptr_to_u64(value); > + attr.flags = OPTS_GET(opts, flags, 0); > + attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); can't do that, setting cpu field to 0xffffffff on old kernels will cause -EINVAL, immediate backwards compat breakage just default it to zero, this field should remain zero and not be used unless flags have BPF_F_CPU > + > + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); > + return libbpf_err_errno(ret); > +} > + > int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) > { > const size_t attr_sz = offsetofend(union bpf_attr, flags); > @@ -542,6 +578,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, > attr.batch.count = *count; > attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); > attr.batch.flags = OPTS_GET(opts, flags, 0); > + attr.batch.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); ditto > > ret = sys_bpf(cmd, &attr, attr_sz); > *count = attr.batch.count; > diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h > index 1342564214c8..7c6a0a3693c9 100644 > --- a/tools/lib/bpf/bpf.h > +++ b/tools/lib/bpf/bpf.h > @@ -163,12 +163,41 @@ LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); > LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); > LIBBPF_API int bpf_map_freeze(int fd); > > +/** > + * @brief **bpf_map_update_elem_opts** allows for updating percpu map with value > + * on specified CPU or on all CPUs. IMO, a bit too specific a description. xxx_ops APIs are extended versions of original non-opts APIs allowing to pass extra (optional) arguments. Keep it generic. cpu field is currently the only "extra", but this might grow over time > + * > + * @param fd BPF map file descriptor > + * @param key pointer to key > + * @param value pointer to value > + * @param opts options for configuring the way to update percpu map again, too specific > + * @return 0, on success; negative error code, otherwise (errno is also set to > + * the error code) > + */ > +LIBBPF_API int bpf_map_update_elem_opts(int fd, const void *key, const void *value, > + const struct bpf_map_update_elem_opts *opts); > + > +/** > + * @brief **bpf_map_lookup_elem_opts** allows for looking up the value from > + * percpu map on specified CPU. > + * > + * @param fd BPF map file descriptor > + * @param key pointer to key > + * @param value pointer to value > + * @param opts options for configuring the way to lookup percpu map > + * @return 0, on success; negative error code, otherwise (errno is also set to > + * the error code) > + */ > +LIBBPF_API int bpf_map_lookup_elem_opts(int fd, const void *key, void *value, > + const struct bpf_map_lookup_elem_opts *opts); > + > struct bpf_map_batch_opts { > size_t sz; /* size of this struct for forward/backward compatibility */ > __u64 elem_flags; > __u64 flags; > + __u32 cpu; add size_t: 0 to avoid having non-zeroed padding at the end (see other opts structs) > }; > -#define bpf_map_batch_opts__last_field flags > +#define bpf_map_batch_opts__last_field cpu > > > /** > @@ -286,6 +315,10 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, > * Update spin_lock-ed map elements. This must be > * specified if the map value contains a spinlock. > * > + * **BPF_F_CPU** > + * As for percpu map, update value on all CPUs if **opts->cpu** is > + * 0xFFFFFFFF, or on specified CPU otherwise. > + * > * @param fd BPF map file descriptor > * @param keys pointer to an array of *count* keys > * @param values pointer to an array of *count* values > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 6445165a24f2..30400bdc20d9 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -10636,6 +10636,34 @@ int bpf_map__lookup_elem(const struct bpf_map *map, > return bpf_map_lookup_elem_flags(map->fd, key, value, flags); > } > > +int bpf_map__lookup_elem_opts(const struct bpf_map *map, const void *key, > + size_t key_sz, void *value, size_t value_sz, > + const struct bpf_map_lookup_elem_opts *opts) > +{ > + int nr_cpus = libbpf_num_possible_cpus(); > + __u32 cpu = OPTS_GET(opts, cpu, nr_cpus); > + __u64 flags = OPTS_GET(opts, flags, 0); > + int err; > + > + if (flags & BPF_F_CPU) { > + if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY) > + return -EINVAL; > + if (cpu >= nr_cpus) > + return -E2BIG; > + if (map->def.value_size != value_sz) { > + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", > + map->name, value_sz, map->def.value_size); > + return -EINVAL; > + } shouldn't this go into validate_map_op?.. > + } else { > + err = validate_map_op(map, key_sz, value_sz, true); > + if (err) > + return libbpf_err(err); > + } > + > + return bpf_map_lookup_elem_opts(map->fd, key, value, opts); > +} > + > int bpf_map__update_elem(const struct bpf_map *map, > const void *key, size_t key_sz, > const void *value, size_t value_sz, __u64 flags) > @@ -10649,6 +10677,34 @@ int bpf_map__update_elem(const struct bpf_map *map, > return bpf_map_update_elem(map->fd, key, value, flags); > } > > +int bpf_map__update_elem_opts(const struct bpf_map *map, const void *key, > + size_t key_sz, const void *value, size_t value_sz, > + const struct bpf_map_update_elem_opts *opts) > +{ > + int nr_cpus = libbpf_num_possible_cpus(); > + __u32 cpu = OPTS_GET(opts, cpu, nr_cpus); > + __u64 flags = OPTS_GET(opts, flags, 0); > + int err; > + > + if (flags & BPF_F_CPU) { > + if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY) > + return -EINVAL; > + if (cpu != BPF_ALL_CPU && cpu >= nr_cpus) > + return -E2BIG; > + if (map->def.value_size != value_sz) { > + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", > + map->name, value_sz, map->def.value_size); > + return -EINVAL; > + } same, move into validate_map_op > + } else { > + err = validate_map_op(map, key_sz, value_sz, true); > + if (err) > + return libbpf_err(err); > + } > + > + return bpf_map_update_elem_opts(map->fd, key, value, opts); > +} > + > int bpf_map__delete_elem(const struct bpf_map *map, > const void *key, size_t key_sz, __u64 flags) > { > diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h > index d1cf813a057b..ba0d15028c72 100644 > --- a/tools/lib/bpf/libbpf.h > +++ b/tools/lib/bpf/libbpf.h > @@ -1185,6 +1185,28 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, > const void *key, size_t key_sz, > void *value, size_t value_sz, __u64 flags); > > +/** > + * @brief **bpf_map__lookup_elem_opts()** allows to lookup BPF map value > + * corresponding to provided key with options to lookup percpu map. > + * @param map BPF map to lookup element in > + * @param key pointer to memory containing bytes of the key used for lookup > + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** > + * @param value pointer to memory in which looked up value will be stored > + * @param value_sz size in byte of value data memory; it has to match BPF map > + * definition's **value_size**. For per-CPU BPF maps value size can be > + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**, > + * or the size described in **bpf_map__lookup_elem()**. let's describe all this sizing in one place (either __lookup_elem or __lookup_elem_opts) and then refer to that succinctly from another one (without BPF_F_CPU exception spread out across two API descriptions) > + * @opts extra options passed to kernel for this operation > + * @return 0, on success; negative error, otherwise > + * > + * **bpf_map__lookup_elem_opts()** is high-level equivalent of > + * **bpf_map_lookup_elem_opts()** API with added check for key and value size. > + */ > +LIBBPF_API int bpf_map__lookup_elem_opts(const struct bpf_map *map, > + const void *key, size_t key_sz, > + void *value, size_t value_sz, > + const struct bpf_map_lookup_elem_opts *opts); > + > /** > * @brief **bpf_map__update_elem()** allows to insert or update value in BPF > * map that corresponds to provided key. > @@ -1209,6 +1231,29 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, > const void *key, size_t key_sz, > const void *value, size_t value_sz, __u64 flags); > > +/** > + * @brief **bpf_map__update_elem_opts()** allows to insert or update value in BPF > + * map that corresponds to provided key with options for percpu maps. > + * @param map BPF map to insert to or update element in > + * @param key pointer to memory containing bytes of the key > + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** > + * @param value pointer to memory containing bytes of the value > + * @param value_sz size in byte of value data memory; it has to match BPF map > + * definition's **value_size**. For per-CPU BPF maps value size can be > + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**, > + * or the size described in **bpf_map__update_elem()**. > + * @opts extra options passed to kernel for this operation > + * @flags extra flags passed to kernel for this operation > + * @return 0, on success; negative error, otherwise > + * > + * **bpf_map__update_elem_opts()** is high-level equivalent of > + * **bpf_map_update_elem_opts()** API with added check for key and value size. > + */ > +LIBBPF_API int bpf_map__update_elem_opts(const struct bpf_map *map, > + const void *key, size_t key_sz, > + const void *value, size_t value_sz, > + const struct bpf_map_update_elem_opts *opts); > + > /** > * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that > * corresponds to provided key. > diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map > index c7fc0bde5648..c39814adeae9 100644 > --- a/tools/lib/bpf/libbpf.map > +++ b/tools/lib/bpf/libbpf.map > @@ -436,6 +436,10 @@ LIBBPF_1.6.0 { > bpf_linker__add_buf; > bpf_linker__add_fd; > bpf_linker__new_fd; > + bpf_map__lookup_elem_opts; > + bpf_map__update_elem_opts; > + bpf_map_lookup_elem_opts; > + bpf_map_update_elem_opts; > bpf_object__prepare; > bpf_program__attach_cgroup_opts; > bpf_program__func_info; > diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h > index 8fe248e14eb6..ef29caf91f9c 100644 > --- a/tools/lib/bpf/libbpf_common.h > +++ b/tools/lib/bpf/libbpf_common.h > @@ -89,4 +89,16 @@ > memcpy(&NAME, &___##NAME, sizeof(NAME)); \ > } while (0) > > +struct bpf_map_update_elem_opts { > + size_t sz; /* size of this struct for forward/backward compatibility */ > + __u64 flags; > + __u32 cpu; size_t: 0 > +}; > + > +struct bpf_map_lookup_elem_opts { > + size_t sz; /* size of this struct for forward/backward compatibility */ > + __u64 flags; > + __u32 cpu; size_t: 0 > +}; > + > #endif /* __LIBBPF_LIBBPF_COMMON_H */ > -- > 2.49.0 >