Re: [PATCH bpf-next 1/3] bpf: Introduce BPF_F_CPU flag for percpu_array map

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jul 17, 2025 at 12:38 PM Leon Hwang <leon.hwang@xxxxxxxxx> wrote:
>
> This patch introduces support for the BPF_F_CPU flag in percpu_array maps
> to allow updating or looking up values for specified CPUs or for all CPUs
> with a single value.
>
> This enhancement enables:
>
> * Efficient update of all CPUs using a single value when cpu == (u32)~0.
> * Targeted update or lookup for a specified CPU otherwise.
>
> The flag is passed via:
>
> * map_flags in bpf_percpu_array_update() along with embedded cpu field.
> * elem_flags in generic_map_update_batch() along with separated cpu field.
>
> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx>
> ---
>  include/linux/bpf.h            |  3 +-
>  include/uapi/linux/bpf.h       |  7 +++++
>  kernel/bpf/arraymap.c          | 54 ++++++++++++++++++++++++++--------
>  kernel/bpf/syscall.c           | 52 ++++++++++++++++++++------------
>  tools/include/uapi/linux/bpf.h |  7 +++++
>  5 files changed, 90 insertions(+), 33 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f9cd2164ed23..faee5710e913 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2671,7 +2671,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
>                                    struct bpf_func_state *callee);
>
>  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
> -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
> +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value,
> +                         u64 flags);
>  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
>                            u64 flags);
>  int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 233de8677382..4cad3de6899d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1372,6 +1372,12 @@ enum {
>         BPF_NOEXIST     = 1, /* create new element if it didn't exist */
>         BPF_EXIST       = 2, /* update existing element */
>         BPF_F_LOCK      = 4, /* spin_lock-ed map_lookup/map_update */
> +       BPF_F_CPU       = 8, /* map_update for percpu_array */
> +};
> +
> +enum {
> +       /* indicate updating value across all CPUs for percpu maps. */
> +       BPF_ALL_CPUS    = (__u32)~0,
>  };
>
>  /* flags for BPF_MAP_CREATE command */
> @@ -1549,6 +1555,7 @@ union bpf_attr {
>                 __u32           map_fd;
>                 __u64           elem_flags;
>                 __u64           flags;
> +               __u32           cpu;
>         } batch;

So you use flags to pass cpu for singular lookup/delete operations,
but separate cpu field for batch. We need to be consistent here. I
think I initially suggested a separate cpu field, but given how much
churn it's causing in API and usage, I guess I'm leaning towards just
passing it through flags.

But if someone else has strong preferences, I can be convinced otherwise.

Either way, it has to be consistent between batched and non-batched API.

Other than that and minor formatting needs below, LGTM.

pw-bot: cr

>
>         struct { /* anonymous struct used by BPF_PROG_LOAD command */
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 3d080916faf9..d333663cbe71 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -295,17 +295,24 @@ static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key,
>         return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
>  }
>
> -int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
> +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value, u64 flags)
>  {
>         struct bpf_array *array = container_of(map, struct bpf_array, map);
>         u32 index = *(u32 *)key;
>         void __percpu *pptr;
> -       int cpu, off = 0;
> -       u32 size;
> +       u32 size, cpu;
> +       int off = 0;
>
>         if (unlikely(index >= array->map.max_entries))
>                 return -ENOENT;
>
> +       cpu = (u32)(flags >> 32);
> +       flags &= (u32)~0;
> +       if (unlikely(flags > BPF_F_CPU))
> +               return -EINVAL;
> +       if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus()))
> +               return -E2BIG;

nit: I'd probably do -ERANGE for this one

> +
>         /* per_cpu areas are zero-filled and bpf programs can only
>          * access 'value_size' of them, so copying rounded areas
>          * will not leak any kernel data

[...]

> @@ -1941,19 +1945,25 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
>  {
>         void __user *values = u64_to_user_ptr(attr->batch.values);
>         void __user *keys = u64_to_user_ptr(attr->batch.keys);
> -       u32 value_size, cp, max_count;
> +       u32 value_size, cp, max_count, cpu = attr->batch.cpu;
> +       u64 elem_flags = attr->batch.elem_flags;
>         void *key, *value;
>         int err = 0;
>
> -       if (attr->batch.elem_flags & ~BPF_F_LOCK)
> +       if (elem_flags & ~(BPF_F_LOCK | BPF_F_CPU))
>                 return -EINVAL;
>
> -       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
> +       if ((elem_flags & BPF_F_LOCK) &&
>             !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
>                 return -EINVAL;
>         }
>
> -       value_size = bpf_map_value_size(map);
> +       if ((elem_flags & BPF_F_CPU) &&
> +               map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)

nit: keep on the single line

> +               return -EINVAL;
> +
> +       value_size = bpf_map_value_size(map, elem_flags);
> +       elem_flags |= ((u64)cpu) << 32;
>
>         max_count = attr->batch.count;
>         if (!max_count)

[...]

> -       if ((attr->batch.elem_flags & BPF_F_LOCK) &&
> +       if ((elem_flags & BPF_F_LOCK) &&
>             !btf_record_has_field(map->record, BPF_SPIN_LOCK))
>                 return -EINVAL;
>
> -       value_size = bpf_map_value_size(map);
> +       if ((elem_flags & BPF_F_CPU) &&
> +               map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
> +               return -EINVAL;

same, formatting is off, but best to keep it single line

> +
> +       value_size = bpf_map_value_size(map, elem_flags);
> +       elem_flags |= ((u64)cpu) << 32;
>
>         max_count = attr->batch.count;
>         if (!max_count)

[...]





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux