On Tue, Sep 9, 2025 at 7:14 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote: > > Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags and check them for > following APIs: > > * 'map_lookup_elem()' > * 'map_update_elem()' > * 'generic_map_lookup_batch()' > * 'generic_map_update_batch()' > > And, get the correct value size for these APIs. > > Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> > --- > include/linux/bpf.h | 22 ++++++++++++++++++ > include/uapi/linux/bpf.h | 2 ++ > kernel/bpf/syscall.c | 42 ++++++++++++++++++++++------------ > tools/include/uapi/linux/bpf.h | 2 ++ > 4 files changed, 54 insertions(+), 14 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 8f6e87f0f3a89..60c235836987d 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -3709,4 +3709,26 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char * > const char **linep, int *nump); > struct bpf_prog *bpf_prog_find_from_stack(void); > > +static inline int bpf_map_check_cpu_flags(u64 flags, bool check_all_cpus_flag) > +{ This function is not used in this patch. Don't add it without users. Also I really don't like 'bool' arguments. They make callsite hard to read. Instead of bool use bpf_map_check_flags(u64 flags, u64 allowed_flags) so the callsites will look like: bpf_map_check_flags(flags, BPF_F_CPU); and bpf_map_check_flags(flags, BPF_F_CPU | BPF_F_ALL_CPUS); Also two functions that do very similar things look redundant. This bpf_map_check_flags() vs bpf_map_check_op_flags()... I think one should do it. pw-bot: cr > + const u64 cpu_flags = BPF_F_CPU | BPF_F_ALL_CPUS; > + u32 cpu; > + > + if (check_all_cpus_flag) { > + if (unlikely((u32)flags > BPF_F_ALL_CPUS)) > + return -EINVAL; > + if (unlikely((flags & cpu_flags) == cpu_flags)) > + return -EINVAL; > + } else { > + if (unlikely((u32)flags & ~BPF_F_CPU)) > + return -EINVAL; > + } > + > + cpu = flags >> 32; > + if (unlikely((flags & BPF_F_CPU) && cpu >= num_possible_cpus())) > + return -ERANGE; > + > + return 0; > +} > + > #endif /* _LINUX_BPF_H */ > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 233de8677382e..be1fdc5042744 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1372,6 +1372,8 @@ enum { > BPF_NOEXIST = 1, /* create new element if it didn't exist */ > BPF_EXIST = 2, /* update existing element */ > BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ > + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ > + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ > }; > > /* flags for BPF_MAP_CREATE command */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index f5448e00a2e8f..db841b38f0c22 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -131,23 +131,36 @@ bool bpf_map_write_active(const struct bpf_map *map) > return atomic64_read(&map->writecnt) != 0; > } > > +static bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) > +{ > + return false; > +} > + > static int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) > { > - if (flags & ~allowed_flags) > + if ((u32)flags & ~allowed_flags) > return -EINVAL; > > if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) > return -EINVAL; > > + if (!(flags & BPF_F_CPU) && flags >> 32) > + return -EINVAL; > + > + if ((flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) && !bpf_map_supports_cpu_flags(map->map_type)) > + return -EINVAL; > + > return 0; > } > > -static u32 bpf_map_value_size(const struct bpf_map *map) > +static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) > { > - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || > - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || > - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || > - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) > + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) > + return round_up(map->value_size, 8); > + else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || > + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || > + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || > + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) > return round_up(map->value_size, 8) * num_possible_cpus(); > else if (IS_FD_MAP(map)) > return sizeof(u32); > @@ -1687,7 +1700,7 @@ static int map_lookup_elem(union bpf_attr *attr) > if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) > return -EPERM; > > - err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK); > + err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU); > if (err) > return err; > > @@ -1695,7 +1708,7 @@ static int map_lookup_elem(union bpf_attr *attr) > if (IS_ERR(key)) > return PTR_ERR(key); > > - value_size = bpf_map_value_size(map); > + value_size = bpf_map_value_size(map, attr->flags); > > err = -ENOMEM; > value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); > @@ -1762,7 +1775,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) > goto err_put; > } > > - value_size = bpf_map_value_size(map); > + value_size = bpf_map_value_size(map, attr->flags); > value = kvmemdup_bpfptr(uvalue, value_size); > if (IS_ERR(value)) { > err = PTR_ERR(value); > @@ -1958,11 +1971,12 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > void *key, *value; > int err = 0; > > - err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); > + err = bpf_map_check_op_flags(map, attr->batch.elem_flags, > + BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS); > if (err) > return err; > > - value_size = bpf_map_value_size(map); > + value_size = bpf_map_value_size(map, attr->batch.elem_flags); > > max_count = attr->batch.count; > if (!max_count) > @@ -2017,11 +2031,11 @@ int generic_map_lookup_batch(struct bpf_map *map, > u32 value_size, cp, max_count; > int err; > > - err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); > + err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU); > if (err) > return err; > > - value_size = bpf_map_value_size(map); > + value_size = bpf_map_value_size(map, attr->batch.elem_flags); > > max_count = attr->batch.count; > if (!max_count) > @@ -2143,7 +2157,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > goto err_put; > } > > - value_size = bpf_map_value_size(map); > + value_size = bpf_map_value_size(map, 0); > > err = -ENOMEM; > value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 233de8677382e..be1fdc5042744 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -1372,6 +1372,8 @@ enum { > BPF_NOEXIST = 1, /* create new element if it didn't exist */ > BPF_EXIST = 2, /* update existing element */ > BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ > + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ > + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ > }; > > /* flags for BPF_MAP_CREATE command */ > -- > 2.50.1 >