Re: [RFC PATCH bpf-next 2/3] bpf, libbpf: Support BPF_F_CPU for percpu_array map

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Tue, 1 Jul 2025 13:22:05 -0700

On Tue, Jun 24, 2025 at 9:55 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote:
>
> This patch adds libbpf support for the BPF_F_CPU flag in percpu_array maps,
> introducing the following APIs:
>
> 1. bpf_map_update_elem_opts(): update with struct bpf_map_update_elem_opts
> 2. bpf_map_lookup_elem_opts(): lookup with struct bpf_map_lookup_elem_opts
> 3. bpf_map__update_elem_opts(): high-level wrapper with input validation
> 4. bpf_map__lookup_elem_opts(): high-level wrapper with input validation
>
> Behavior:
>
> * If opts->cpu == 0xFFFFFFFF, the update is applied to all CPUs.
> * Otherwise, it applies only to the specified CPU.
> * Lookup APIs retrieve values from the target CPU when BPF_F_CPU is used.
>
> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx>
> ---
>  tools/lib/bpf/bpf.c           | 37 +++++++++++++++++++++++
>  tools/lib/bpf/bpf.h           | 35 +++++++++++++++++++++-
>  tools/lib/bpf/libbpf.c        | 56 +++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.h        | 45 ++++++++++++++++++++++++++++
>  tools/lib/bpf/libbpf.map      |  4 +++
>  tools/lib/bpf/libbpf_common.h | 12 ++++++++
>  6 files changed, 188 insertions(+), 1 deletion(-)
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index 6eb421ccf91b..80f7ea041187 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -402,6 +402,24 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
>         return libbpf_err_errno(ret);
>  }
>
> +int bpf_map_update_elem_opts(int fd, const void *key, const void *value,
> +                            const struct bpf_map_update_elem_opts *opts)
> +{
> +       const size_t attr_sz = offsetofend(union bpf_attr, cpu);
> +       union bpf_attr attr;
> +       int ret;
> +
> +       memset(&attr, 0, attr_sz);
> +       attr.map_fd = fd;
> +       attr.key = ptr_to_u64(key);
> +       attr.value = ptr_to_u64(value);
> +       attr.flags = OPTS_GET(opts, flags, 0);
> +       attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU);
> +
> +       ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
> +       return libbpf_err_errno(ret);
> +}
> +
>  int bpf_map_lookup_elem(int fd, const void *key, void *value)
>  {
>         const size_t attr_sz = offsetofend(union bpf_attr, flags);
> @@ -433,6 +451,24 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
>         return libbpf_err_errno(ret);
>  }
>
> +int bpf_map_lookup_elem_opts(int fd, const void *key, void *value,
> +                            const struct bpf_map_lookup_elem_opts *opts)
> +{
> +       const size_t attr_sz = offsetofend(union bpf_attr, cpu);
> +       union bpf_attr attr;
> +       int ret;
> +
> +       memset(&attr, 0, attr_sz);
> +       attr.map_fd = fd;
> +       attr.key = ptr_to_u64(key);
> +       attr.value = ptr_to_u64(value);
> +       attr.flags = OPTS_GET(opts, flags, 0);
> +       attr.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU);

can't do that, setting cpu field to 0xffffffff on old kernels will
cause -EINVAL, immediate backwards compat breakage

just default it to zero, this field should remain zero and not be used
unless flags have BPF_F_CPU

> +
> +       ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
> +       return libbpf_err_errno(ret);
> +}
> +
>  int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
>  {
>         const size_t attr_sz = offsetofend(union bpf_attr, flags);
> @@ -542,6 +578,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
>         attr.batch.count = *count;
>         attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
>         attr.batch.flags = OPTS_GET(opts, flags, 0);
> +       attr.batch.cpu = OPTS_GET(opts, cpu, BPF_ALL_CPU);

ditto

>
>         ret = sys_bpf(cmd, &attr, attr_sz);
>         *count = attr.batch.count;
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 1342564214c8..7c6a0a3693c9 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -163,12 +163,41 @@ LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags);
>  LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
>  LIBBPF_API int bpf_map_freeze(int fd);
>
> +/**
> + * @brief **bpf_map_update_elem_opts** allows for updating percpu map with value
> + * on specified CPU or on all CPUs.

IMO, a bit too specific a description. xxx_ops APIs are extended
versions of original non-opts APIs allowing to pass extra (optional)
arguments. Keep it generic. cpu field is currently the only "extra",
but this might grow over time

> + *
> + * @param fd BPF map file descriptor
> + * @param key pointer to key
> + * @param value pointer to value
> + * @param opts options for configuring the way to update percpu map

again, too specific

> + * @return 0, on success; negative error code, otherwise (errno is also set to
> + * the error code)
> + */
> +LIBBPF_API int bpf_map_update_elem_opts(int fd, const void *key, const void *value,
> +                                       const struct bpf_map_update_elem_opts *opts);
> +
> +/**
> + * @brief **bpf_map_lookup_elem_opts** allows for looking up the value from
> + * percpu map on specified CPU.
> + *
> + * @param fd BPF map file descriptor
> + * @param key pointer to key
> + * @param value pointer to value
> + * @param opts options for configuring the way to lookup percpu map
> + * @return 0, on success; negative error code, otherwise (errno is also set to
> + * the error code)
> + */
> +LIBBPF_API int bpf_map_lookup_elem_opts(int fd, const void *key, void *value,
> +                                       const struct bpf_map_lookup_elem_opts *opts);
> +
>  struct bpf_map_batch_opts {
>         size_t sz; /* size of this struct for forward/backward compatibility */
>         __u64 elem_flags;
>         __u64 flags;
> +       __u32 cpu;

add size_t: 0 to avoid having non-zeroed padding at the end (see other
opts structs)

>  };
> -#define bpf_map_batch_opts__last_field flags
> +#define bpf_map_batch_opts__last_field cpu
>
>
>  /**
> @@ -286,6 +315,10 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
>   *    Update spin_lock-ed map elements. This must be
>   *    specified if the map value contains a spinlock.
>   *
> + * **BPF_F_CPU**
> + *    As for percpu map, update value on all CPUs if **opts->cpu** is
> + *    0xFFFFFFFF, or on specified CPU otherwise.
> + *
>   * @param fd BPF map file descriptor
>   * @param keys pointer to an array of *count* keys
>   * @param values pointer to an array of *count* values
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 6445165a24f2..30400bdc20d9 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10636,6 +10636,34 @@ int bpf_map__lookup_elem(const struct bpf_map *map,
>         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
>  }
>
> +int bpf_map__lookup_elem_opts(const struct bpf_map *map, const void *key,
> +                             size_t key_sz, void *value, size_t value_sz,
> +                             const struct bpf_map_lookup_elem_opts *opts)
> +{
> +       int nr_cpus = libbpf_num_possible_cpus();
> +       __u32 cpu = OPTS_GET(opts, cpu, nr_cpus);
> +       __u64 flags = OPTS_GET(opts, flags, 0);
> +       int err;
> +
> +       if (flags & BPF_F_CPU) {
> +               if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY)
> +                       return -EINVAL;
> +               if (cpu >= nr_cpus)
> +                       return -E2BIG;
> +               if (map->def.value_size != value_sz) {
> +                       pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
> +                               map->name, value_sz, map->def.value_size);
> +                       return -EINVAL;
> +               }

shouldn't this go into validate_map_op?..

> +       } else {
> +               err = validate_map_op(map, key_sz, value_sz, true);
> +               if (err)
> +                       return libbpf_err(err);
> +       }
> +
> +       return bpf_map_lookup_elem_opts(map->fd, key, value, opts);
> +}
> +
>  int bpf_map__update_elem(const struct bpf_map *map,
>                          const void *key, size_t key_sz,
>                          const void *value, size_t value_sz, __u64 flags)
> @@ -10649,6 +10677,34 @@ int bpf_map__update_elem(const struct bpf_map *map,
>         return bpf_map_update_elem(map->fd, key, value, flags);
>  }
>
> +int bpf_map__update_elem_opts(const struct bpf_map *map, const void *key,
> +                             size_t key_sz, const void *value, size_t value_sz,
> +                             const struct bpf_map_update_elem_opts *opts)
> +{
> +       int nr_cpus = libbpf_num_possible_cpus();
> +       __u32 cpu = OPTS_GET(opts, cpu, nr_cpus);
> +       __u64 flags = OPTS_GET(opts, flags, 0);
> +       int err;
> +
> +       if (flags & BPF_F_CPU) {
> +               if (map->def.type != BPF_MAP_TYPE_PERCPU_ARRAY)
> +                       return -EINVAL;
> +               if (cpu != BPF_ALL_CPU && cpu >= nr_cpus)
> +                       return -E2BIG;
> +               if (map->def.value_size != value_sz) {
> +                       pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
> +                               map->name, value_sz, map->def.value_size);
> +                       return -EINVAL;
> +               }

same, move into validate_map_op

> +       } else {
> +               err = validate_map_op(map, key_sz, value_sz, true);
> +               if (err)
> +                       return libbpf_err(err);
> +       }
> +
> +       return bpf_map_update_elem_opts(map->fd, key, value, opts);
> +}
> +
>  int bpf_map__delete_elem(const struct bpf_map *map,
>                          const void *key, size_t key_sz, __u64 flags)
>  {
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index d1cf813a057b..ba0d15028c72 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1185,6 +1185,28 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
>                                     const void *key, size_t key_sz,
>                                     void *value, size_t value_sz, __u64 flags);
>
> +/**
> + * @brief **bpf_map__lookup_elem_opts()** allows to lookup BPF map value
> + * corresponding to provided key with options to lookup percpu map.
> + * @param map BPF map to lookup element in
> + * @param key pointer to memory containing bytes of the key used for lookup
> + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
> + * @param value pointer to memory in which looked up value will be stored
> + * @param value_sz size in byte of value data memory; it has to match BPF map
> + * definition's **value_size**. For per-CPU BPF maps value size can be
> + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**,
> + * or the size described in **bpf_map__lookup_elem()**.

let's describe all this sizing in one place (either __lookup_elem or
__lookup_elem_opts) and then refer to that succinctly from another one
(without BPF_F_CPU exception spread out across two API descriptions)

> + * @opts extra options passed to kernel for this operation
> + * @return 0, on success; negative error, otherwise
> + *
> + * **bpf_map__lookup_elem_opts()** is high-level equivalent of
> + * **bpf_map_lookup_elem_opts()** API with added check for key and value size.
> + */
> +LIBBPF_API int bpf_map__lookup_elem_opts(const struct bpf_map *map,
> +                                        const void *key, size_t key_sz,
> +                                        void *value, size_t value_sz,
> +                                        const struct bpf_map_lookup_elem_opts *opts);
> +
>  /**
>   * @brief **bpf_map__update_elem()** allows to insert or update value in BPF
>   * map that corresponds to provided key.
> @@ -1209,6 +1231,29 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
>                                     const void *key, size_t key_sz,
>                                     const void *value, size_t value_sz, __u64 flags);
>
> +/**
> + * @brief **bpf_map__update_elem_opts()** allows to insert or update value in BPF
> + * map that corresponds to provided key with options for percpu maps.
> + * @param map BPF map to insert to or update element in
> + * @param key pointer to memory containing bytes of the key
> + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
> + * @param value pointer to memory containing bytes of the value
> + * @param value_sz size in byte of value data memory; it has to match BPF map
> + * definition's **value_size**. For per-CPU BPF maps value size can be
> + * definition's **value_size** if **BPF_F_CPU** is specified in **opts->flags**,
> + * or the size described in **bpf_map__update_elem()**.
> + * @opts extra options passed to kernel for this operation
> + * @flags extra flags passed to kernel for this operation
> + * @return 0, on success; negative error, otherwise
> + *
> + * **bpf_map__update_elem_opts()** is high-level equivalent of
> + * **bpf_map_update_elem_opts()** API with added check for key and value size.
> + */
> +LIBBPF_API int bpf_map__update_elem_opts(const struct bpf_map *map,
> +                                        const void *key, size_t key_sz,
> +                                        const void *value, size_t value_sz,
> +                                        const struct bpf_map_update_elem_opts *opts);
> +
>  /**
>   * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that
>   * corresponds to provided key.
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index c7fc0bde5648..c39814adeae9 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -436,6 +436,10 @@ LIBBPF_1.6.0 {
>                 bpf_linker__add_buf;
>                 bpf_linker__add_fd;
>                 bpf_linker__new_fd;
> +               bpf_map__lookup_elem_opts;
> +               bpf_map__update_elem_opts;
> +               bpf_map_lookup_elem_opts;
> +               bpf_map_update_elem_opts;
>                 bpf_object__prepare;
>                 bpf_program__attach_cgroup_opts;
>                 bpf_program__func_info;
> diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
> index 8fe248e14eb6..ef29caf91f9c 100644
> --- a/tools/lib/bpf/libbpf_common.h
> +++ b/tools/lib/bpf/libbpf_common.h
> @@ -89,4 +89,16 @@
>                 memcpy(&NAME, &___##NAME, sizeof(NAME));                    \
>         } while (0)
>
> +struct bpf_map_update_elem_opts {
> +       size_t sz; /* size of this struct for forward/backward compatibility */
> +       __u64 flags;
> +       __u32 cpu;

size_t: 0

> +};
> +
> +struct bpf_map_lookup_elem_opts {
> +       size_t sz; /* size of this struct for forward/backward compatibility */
> +       __u64 flags;
> +       __u32 cpu;

size_t: 0

> +};
> +
>  #endif /* __LIBBPF_LIBBPF_COMMON_H */
> --
> 2.49.0
>