On 2025/7/2 04:22, Andrii Nakryiko wrote: > On Tue, Jun 24, 2025 at 9:55 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote: >> >> This patch adds libbpf support for the BPF_F_CPU flag in percpu_array maps, >> introducing the following APIs: >> >> 1. bpf_map_update_elem_opts(): update with struct bpf_map_update_elem_opts >> 2. bpf_map_lookup_elem_opts(): lookup with struct bpf_map_lookup_elem_opts >> 3. bpf_map__update_elem_opts(): high-level wrapper with input validation >> 4. bpf_map__lookup_elem_opts(): high-level wrapper with input validation >> >> Behavior: >> >> * If opts->cpu == 0xFFFFFFFF, the update is applied to all CPUs. >> * Otherwise, it applies only to the specified CPU. >> * Lookup APIs retrieve values from the target CPU when BPF_F_CPU is used. >> >> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> >> --- >> tools/lib/bpf/bpf.c | 37 +++++++++++++++++++++++ >> tools/lib/bpf/bpf.h | 35 +++++++++++++++++++++- >> tools/lib/bpf/libbpf.c | 56 +++++++++++++++++++++++++++++++++++ >> tools/lib/bpf/libbpf.h | 45 ++++++++++++++++++++++++++++ >> tools/lib/bpf/libbpf.map | 4 +++ >> tools/lib/bpf/libbpf_common.h | 12 ++++++++ >> 6 files changed, 188 insertions(+), 1 deletion(-) >> >> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c >> index 6eb421ccf91b..80f7ea041187 100644 >> --- a/tools/lib/bpf/bpf.c >> +++ b/tools/lib/bpf/bpf.c >> @@ -402,6 +402,24 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, >> return libbpf_err_errno(ret); >> } >> >> +int bpf_map_update_elem_opts(int fd, const void *key, const void *value, >> + const struct bpf_map_update_elem_opts *opts) >> +{ >> + const size_t attr_sz = offsetofend(union bpf_attr, cpu); >> + union bpf_attr attr; >> + int ret; >> + >> + memset(&attr, 0, attr_sz); >> + attr.map_fd = fd; >> + attr.key = ptr_to_u64(key); >> + attr.value = ptr_to_u64(value); >> + attr.flags = OPTS_GET(opts, flags, 0); >> + attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); >> + >> + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); >> + return libbpf_err_errno(ret); >> +} >> + >> int bpf_map_lookup_elem(int fd, const void *key, void *value) >> { >> const size_t attr_sz = offsetofend(union bpf_attr, flags); >> @@ -433,6 +451,24 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) >> return libbpf_err_errno(ret); >> } >> >> +int bpf_map_lookup_elem_opts(int fd, const void *key, void *value, >> + const struct bpf_map_lookup_elem_opts *opts) >> +{ >> + const size_t attr_sz = offsetofend(union bpf_attr, cpu); >> + union bpf_attr attr; >> + int ret; >> + >> + memset(&attr, 0, attr_sz); >> + attr.map_fd = fd; >> + attr.key = ptr_to_u64(key); >> + attr.value = ptr_to_u64(value); >> + attr.flags = OPTS_GET(opts, flags, 0); >> + attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); > > can't do that, setting cpu field to 0xffffffff on old kernels will > cause -EINVAL, immediate backwards compat breakage > > just default it to zero, this field should remain zero and not be used > unless flags have BPF_F_CPU > Ack. >> + >> + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); >> + return libbpf_err_errno(ret); >> +} >> + >> int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) >> { >> const size_t attr_sz = offsetofend(union bpf_attr, flags); >> @@ -542,6 +578,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, >> attr.batch.count = *count; >> attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); >> attr.batch.flags = OPTS_GET(opts, flags, 0); >> + attr.batch.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU); > > ditto > Ack. >> >> ret = sys_bpf(cmd, &attr, attr_sz); >> *count = attr.batch.count; >> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h >> index 1342564214c8..7c6a0a3693c9 100644 >> --- a/tools/lib/bpf/bpf.h >> +++ b/tools/lib/bpf/bpf.h >> @@ -163,12 +163,41 @@ LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); >> LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); >> LIBBPF_API int bpf_map_freeze(int fd); >> >> +/** >> + * @brief **bpf_map_update_elem_opts** allows for updating percpu map with value >> + * on specified CPU or on all CPUs. > > IMO, a bit too specific a description. xxx_ops APIs are extended > versions of original non-opts APIs allowing to pass extra (optional) > arguments. Keep it generic. cpu field is currently the only "extra", > but this might grow over time > I'll update it. >> + * >> + * @param fd BPF map file descriptor >> + * @param key pointer to key >> + * @param value pointer to value >> + * @param opts options for configuring the way to update percpu map > > again, too specific > Ack. >> + * @return 0, on success; negative error code, otherwise (errno is also set to >> + * the error code) >> + */ >> +LIBBPF_API int bpf_map_update_elem_opts(int fd, const void *key, const void *value, >> + const struct bpf_map_update_elem_opts *opts); >> + >> +/** >> + * @brief **bpf_map_lookup_elem_opts** allows for looking up the value from >> + * percpu map on specified CPU. >> + * >> + * @param fd BPF map file descriptor >> + * @param key pointer to key >> + * @param value pointer to value >> + * @param opts options for configuring the way to lookup percpu map >> + * @return 0, on success; negative error code, otherwise (errno is also set to >> + * the error code) >> + */ >> +LIBBPF_API int bpf_map_lookup_elem_opts(int fd, const void *key, void *value, >> + const struct bpf_map_lookup_elem_opts *opts); >> + >> struct bpf_map_batch_opts { >> size_t sz; /* size of this struct for forward/backward compatibility */ >> __u64 elem_flags; >> __u64 flags; >> + __u32 cpu; > > add size_t: 0 to avoid having non-zeroed padding at the end (see other > opts structs) > Ack. >> }; >> -#define bpf_map_batch_opts__last_field flags >> +#define bpf_map_batch_opts__last_field cpu >> >> >> /** >> @@ -286,6 +315,10 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, >> * Update spin_lock-ed map elements. This must be >> * specified if the map value contains a spinlock. >> * >> + * **BPF_F_CPU** >> + * As for percpu map, update value on all CPUs if **opts->cpu** is >> + * 0xFFFFFFFF, or on specified CPU otherwise. >> + * >> * @param fd BPF map file descriptor >> * @param keys pointer to an array of *count* keys >> * @param values pointer to an array of *count* values >> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c >> index 6445165a24f2..30400bdc20d9 100644 >> --- a/tools/lib/bpf/libbpf.c >> +++ b/tools/lib/bpf/libbpf.c >> @@ -10636,6 +10636,34 @@ int bpf_map__lookup_elem(const struct bpf_map *map, >> return bpf_map_lookup_elem_flags(map->fd, key, value, flags); >> } >> >> +int bpf_map__lookup_elem_opts(const struct bpf_map *map, const void *key, >> + size_t key_sz, void *value, size_t value_sz, >> + const struct bpf_map_lookup_elem_opts *opts) >> +{ >> + int nr_cpus = libbpf_num_possible_cpus(); >> + __u32 cpu = OPTS_GET(opts, cpu, nr_cpus); >> + __u64 flags = OPTS_GET(opts, flags, 0); >> + int err; >> + >> + if (flags & BPF_F_CPU) { >> + if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY) >> + return -EINVAL; >> + if (cpu >= nr_cpus) >> + return -E2BIG; >> + if (map->def.value_size != value_sz) { >> + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", >> + map->name, value_sz, map->def.value_size); >> + return -EINVAL; >> + } > > shouldn't this go into validate_map_op?.. > It should. However, to avoid making validate_map_op really complicated, I'd like to add validate_map_cpu_op to wrap checking cpu and validate_map_op. >> + } else { >> + err = validate_map_op(map, key_sz, value_sz, true); >> + if (err) >> + return libbpf_err(err); >> + } >> + >> + return bpf_map_lookup_elem_opts(map->fd, key, value, opts); >> +} >> + >> int bpf_map__update_elem(const struct bpf_map *map, >> const void *key, size_t key_sz, >> const void *value, size_t value_sz, __u64 flags) >> @@ -10649,6 +10677,34 @@ int bpf_map__update_elem(const struct bpf_map *map, >> return bpf_map_update_elem(map->fd, key, value, flags); >> } >> >> +int bpf_map__update_elem_opts(const struct bpf_map *map, const void *key, >> + size_t key_sz, const void *value, size_t value_sz, >> + const struct bpf_map_update_elem_opts *opts) >> +{ >> + int nr_cpus = libbpf_num_possible_cpus(); >> + __u32 cpu = OPTS_GET(opts, cpu, nr_cpus); >> + __u64 flags = OPTS_GET(opts, flags, 0); >> + int err; >> + >> + if (flags & BPF_F_CPU) { >> + if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY) >> + return -EINVAL; >> + if (cpu != BPF_ALL_CPU && cpu >= nr_cpus) >> + return -E2BIG; >> + if (map->def.value_size != value_sz) { >> + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", >> + map->name, value_sz, map->def.value_size); >> + return -EINVAL; >> + } > > same, move into validate_map_op > Ack. >> + } else { >> + err = validate_map_op(map, key_sz, value_sz, true); >> + if (err) >> + return libbpf_err(err); >> + } >> + >> + return bpf_map_update_elem_opts(map->fd, key, value, opts); >> +} >> + >> int bpf_map__delete_elem(const struct bpf_map *map, >> const void *key, size_t key_sz, __u64 flags) >> { >> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h >> index d1cf813a057b..ba0d15028c72 100644 >> --- a/tools/lib/bpf/libbpf.h >> +++ b/tools/lib/bpf/libbpf.h >> @@ -1185,6 +1185,28 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, >> const void *key, size_t key_sz, >> void *value, size_t value_sz, __u64 flags); >> >> +/** >> + * @brief **bpf_map__lookup_elem_opts()** allows to lookup BPF map value >> + * corresponding to provided key with options to lookup percpu map. >> + * @param map BPF map to lookup element in >> + * @param key pointer to memory containing bytes of the key used for lookup >> + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** >> + * @param value pointer to memory in which looked up value will be stored >> + * @param value_sz size in byte of value data memory; it has to match BPF map >> + * definition's **value_size**. For per-CPU BPF maps value size can be >> + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**, >> + * or the size described in **bpf_map__lookup_elem()**. > > let's describe all this sizing in one place (either __lookup_elem or > __lookup_elem_opts) and then refer to that succinctly from another one > (without BPF_F_CPU exception spread out across two API descriptions) > Let's describe it in __lookup_elem_opts and then refer to that in __lookup_elem. >> + * @opts extra options passed to kernel for this operation >> + * @return 0, on success; negative error, otherwise >> + * >> + * **bpf_map__lookup_elem_opts()** is high-level equivalent of >> + * **bpf_map_lookup_elem_opts()** API with added check for key and value size. >> + */ >> +LIBBPF_API int bpf_map__lookup_elem_opts(const struct bpf_map *map, >> + const void *key, size_t key_sz, >> + void *value, size_t value_sz, >> + const struct bpf_map_lookup_elem_opts *opts); >> + >> /** >> * @brief **bpf_map__update_elem()** allows to insert or update value in BPF >> * map that corresponds to provided key. >> @@ -1209,6 +1231,29 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, >> const void *key, size_t key_sz, >> const void *value, size_t value_sz, __u64 flags); >> >> +/** >> + * @brief **bpf_map__update_elem_opts()** allows to insert or update value in BPF >> + * map that corresponds to provided key with options for percpu maps. >> + * @param map BPF map to insert to or update element in >> + * @param key pointer to memory containing bytes of the key >> + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** >> + * @param value pointer to memory containing bytes of the value >> + * @param value_sz size in byte of value data memory; it has to match BPF map >> + * definition's **value_size**. For per-CPU BPF maps value size can be >> + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**, >> + * or the size described in **bpf_map__update_elem()**. >> + * @opts extra options passed to kernel for this operation >> + * @flags extra flags passed to kernel for this operation >> + * @return 0, on success; negative error, otherwise >> + * >> + * **bpf_map__update_elem_opts()** is high-level equivalent of >> + * **bpf_map_update_elem_opts()** API with added check for key and value size. >> + */ >> +LIBBPF_API int bpf_map__update_elem_opts(const struct bpf_map *map, >> + const void *key, size_t key_sz, >> + const void *value, size_t value_sz, >> + const struct bpf_map_update_elem_opts *opts); >> + >> /** >> * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that >> * corresponds to provided key. >> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map >> index c7fc0bde5648..c39814adeae9 100644 >> --- a/tools/lib/bpf/libbpf.map >> +++ b/tools/lib/bpf/libbpf.map >> @@ -436,6 +436,10 @@ LIBBPF_1.6.0 { >> bpf_linker__add_buf; >> bpf_linker__add_fd; >> bpf_linker__new_fd; >> + bpf_map__lookup_elem_opts; >> + bpf_map__update_elem_opts; >> + bpf_map_lookup_elem_opts; >> + bpf_map_update_elem_opts; >> bpf_object__prepare; >> bpf_program__attach_cgroup_opts; >> bpf_program__func_info; >> diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h >> index 8fe248e14eb6..ef29caf91f9c 100644 >> --- a/tools/lib/bpf/libbpf_common.h >> +++ b/tools/lib/bpf/libbpf_common.h >> @@ -89,4 +89,16 @@ >> memcpy(&NAME, &___##NAME, sizeof(NAME)); \ >> } while (0) >> >> +struct bpf_map_update_elem_opts { >> + size_t sz; /* size of this struct for forward/backward compatibility */ >> + __u64 flags; >> + __u32 cpu; > > size_t: 0 > Ack. >> +}; >> + >> +struct bpf_map_lookup_elem_opts { >> + size_t sz; /* size of this struct for forward/backward compatibility */ >> + __u64 flags; >> + __u32 cpu; > > size_t: 0 > Ack. Thanks, Leon