Re: [PATCH bpf-next v3 1/4] bpf: Introduce global percpu data

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 26, 2025 at 9:22 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote:
>
> This patch introduces global percpu data, inspired by commit
> 6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
> definition of global percpu variables in BPF, similar to the
> DEFINE_PER_CPU() macro in the kernel[0].
>
> For example, in BPF, it is able to define a global percpu variable like:
>
> int data SEC(".data..percpu");
>
> With this patch, tools like retsnoop[1] and bpfsnoop[2] can simplify their
> BPF code for handling LBRs. The code can be updated from
>
> static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");
>
> to
>
> static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".data..percpu.lbrs");
>
> This eliminates the need to retrieve the CPU ID using the
> bpf_get_smp_processor_id() helper.
>
> Additionally, by reusing global percpu data map, sharing information
> between tail callers and callees or freplace callers and callees becomes
> simpler compared to reusing percpu_array maps.
>
> Links:
> [0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
> [1] https://github.com/anakryiko/retsnoop
> [2] https://github.com/bpfsnoop/bpfsnoop
>
> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx>
> ---
>  kernel/bpf/arraymap.c | 41 +++++++++++++++++++++++++++++++++++++--
>  kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index eb28c0f219ee4..91d06f0165a6e 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -249,6 +249,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
>         return this_cpu_ptr(array->pptrs[index & array->index_mask]);
>  }
>
> +static int percpu_array_map_direct_value_addr(const struct bpf_map *map,
> +                                             u64 *imm, u32 off)
> +{
> +       struct bpf_array *array = container_of(map, struct bpf_array, map);
> +
> +       if (map->max_entries != 1)
> +               return -EOPNOTSUPP;
> +       if (off >= map->value_size)
> +               return -EINVAL;
> +       if (!bpf_jit_supports_percpu_insn())
> +               return -EOPNOTSUPP;
> +
> +       *imm = (u64) array->pptrs[0];
> +       return 0;
> +}
> +
> +static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
> +                                             u64 imm, u32 *off)
> +{
> +       struct bpf_array *array = container_of(map, struct bpf_array, map);
> +       u64 base = (u64) array->pptrs[0];
> +       u64 range = array->elem_size;
> +
> +       if (map->max_entries != 1)
> +               return -EOPNOTSUPP;
> +       if (imm < base || imm >= base + range)
> +               return -ENOENT;
> +       if (!bpf_jit_supports_percpu_insn())
> +               return -EOPNOTSUPP;
> +
> +       *off = imm - base;
> +       return 0;
> +}
> +
>  /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
>  static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
>  {
> @@ -532,9 +566,10 @@ static int array_map_check_btf(const struct bpf_map *map,
>  {
>         u32 int_data;
>
> -       /* One exception for keyless BTF: .bss/.data/.rodata map */
> +       /* One exception for keyless BTF: .bss/.data/.rodata/.data..percpu map */
>         if (btf_type_is_void(key_type)) {
> -               if (map->map_type != BPF_MAP_TYPE_ARRAY ||
> +               if ((map->map_type != BPF_MAP_TYPE_ARRAY &&
> +                    map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) ||
>                     map->max_entries != 1)
>                         return -EINVAL;
>
> @@ -815,6 +850,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
>         .map_get_next_key = array_map_get_next_key,
>         .map_lookup_elem = percpu_array_map_lookup_elem,
>         .map_gen_lookup = percpu_array_map_gen_lookup,
> +       .map_direct_value_addr = percpu_array_map_direct_value_addr,
> +       .map_direct_value_meta = percpu_array_map_direct_value_meta,
>         .map_update_elem = array_map_update_elem,
>         .map_delete_elem = array_map_delete_elem,
>         .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index d5807d2efc922..9203354208732 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6939,6 +6939,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
>         u64 addr;
>         int err;
>
> +       if (map->map_type != BPF_MAP_TYPE_ARRAY)
> +               return -EINVAL;
>         err = map->ops->map_direct_value_addr(map, &addr, off);
>         if (err)
>                 return err;
> @@ -7451,6 +7453,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>                         /* if map is read-only, track its contents as scalars */
>                         if (tnum_is_const(reg->var_off) &&
>                             bpf_map_is_rdonly(map) &&
> +                           map->map_type == BPF_MAP_TYPE_ARRAY &&
>                             map->ops->map_direct_value_addr) {
>                                 int map_off = off + reg->var_off.value;
>                                 u64 val = 0;
> @@ -9414,6 +9417,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
>                 return -EACCES;
>         }
>
> +       if (map->map_type != BPF_MAP_TYPE_ARRAY) {
> +               verbose(env, "only array map supports direct string value access\n");
> +               return -EINVAL;
> +       }
> +
>         err = check_map_access(env, regno, reg->off,
>                                map->value_size - reg->off, false,
>                                ACCESS_HELPER);
> @@ -11101,6 +11109,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
>                 return -EINVAL;
>         num_args = data_len_reg->var_off.value / 8;
>
> +       if (fmt_map->map_type != BPF_MAP_TYPE_ARRAY) {
> +               verbose(env, "only array map supports snprintf\n");
> +               return -EINVAL;
> +       }
> +
>         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
>          * and map_direct_value_addr is set.
>          */
> @@ -21906,6 +21919,38 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                         goto next_insn;
>                 }
>
> +#ifdef CONFIG_SMP

Instead of CONFIG_SMP, I think it's more appropriate to check for
bpf_jit_supports_percpu_insn(). We check CONFIG_SMP for
BPF_FUNC_get_smp_processor_id inlining because of `cpu_number` per-CPU
variable, not because BPF_MOV64_PERCPU_REG() doesn't work on single
CPU systems (IIUC).

pw-bot: cr


> +               if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
> +                   (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
> +                    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
> +                       struct bpf_map *map;
> +
> +                       aux = &env->insn_aux_data[i + delta];
> +                       map = env->used_maps[aux->map_index];
> +                       if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
> +                               goto next_insn;
> +
> +                       /* Reuse the original ld_imm64 insn. And add one
> +                        * mov64_percpu_reg insn.
> +                        */
> +
> +                       insn_buf[0] = insn[1];
> +                       insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg);
> +                       cnt = 2;
> +
> +                       i++;
> +                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> +                       if (!new_prog)
> +                               return -ENOMEM;
> +
> +                       delta    += cnt - 1;
> +                       env->prog = prog = new_prog;
> +                       insn      = new_prog->insnsi + i + delta;
> +
> +                       goto next_insn;
> +               }
> +#endif
> +
>                 if (insn->code != (BPF_JMP | BPF_CALL))
>                         goto next_insn;
>                 if (insn->src_reg == BPF_PSEUDO_CALL)
> --
> 2.49.0
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux