On Mon, May 26, 2025 at 9:22 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote: > > This patch introduces global percpu data, inspired by commit > 6316f78306c1 ("Merge branch 'support-global-data'"). It enables the > definition of global percpu variables in BPF, similar to the > DEFINE_PER_CPU() macro in the kernel[0]. > > For example, in BPF, it is able to define a global percpu variable like: > > int data SEC(".data..percpu"); > > With this patch, tools like retsnoop[1] and bpfsnoop[2] can simplify their > BPF code for handling LBRs. The code can be updated from > > static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs"); > > to > > static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".data..percpu.lbrs"); > > This eliminates the need to retrieve the CPU ID using the > bpf_get_smp_processor_id() helper. > > Additionally, by reusing global percpu data map, sharing information > between tail callers and callees or freplace callers and callees becomes > simpler compared to reusing percpu_array maps. > > Links: > [0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114 > [1] https://github.com/anakryiko/retsnoop > [2] https://github.com/bpfsnoop/bpfsnoop > > Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> > --- > kernel/bpf/arraymap.c | 41 +++++++++++++++++++++++++++++++++++++-- > kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 84 insertions(+), 2 deletions(-) > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index eb28c0f219ee4..91d06f0165a6e 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -249,6 +249,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) > return this_cpu_ptr(array->pptrs[index & array->index_mask]); > } > > +static int percpu_array_map_direct_value_addr(const struct bpf_map *map, > + u64 *imm, u32 off) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + > + if (map->max_entries != 1) > + return -EOPNOTSUPP; > + if (off >= map->value_size) > + return -EINVAL; > + if (!bpf_jit_supports_percpu_insn()) > + return -EOPNOTSUPP; > + > + *imm = (u64) array->pptrs[0]; > + return 0; > +} > + > +static int percpu_array_map_direct_value_meta(const struct bpf_map *map, > + u64 imm, u32 *off) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + u64 base = (u64) array->pptrs[0]; > + u64 range = array->elem_size; > + > + if (map->max_entries != 1) > + return -EOPNOTSUPP; > + if (imm < base || imm >= base + range) > + return -ENOENT; > + if (!bpf_jit_supports_percpu_insn()) > + return -EOPNOTSUPP; > + > + *off = imm - base; > + return 0; > +} > + > /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */ > static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) > { > @@ -532,9 +566,10 @@ static int array_map_check_btf(const struct bpf_map *map, > { > u32 int_data; > > - /* One exception for keyless BTF: .bss/.data/.rodata map */ > + /* One exception for keyless BTF: .bss/.data/.rodata/.data..percpu map */ > if (btf_type_is_void(key_type)) { > - if (map->map_type != BPF_MAP_TYPE_ARRAY || > + if ((map->map_type != BPF_MAP_TYPE_ARRAY && > + map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) || > map->max_entries != 1) > return -EINVAL; > > @@ -815,6 +850,8 @@ const struct bpf_map_ops percpu_array_map_ops = { > .map_get_next_key = array_map_get_next_key, > .map_lookup_elem = percpu_array_map_lookup_elem, > .map_gen_lookup = percpu_array_map_gen_lookup, > + .map_direct_value_addr = percpu_array_map_direct_value_addr, > + .map_direct_value_meta = percpu_array_map_direct_value_meta, > .map_update_elem = array_map_update_elem, > .map_delete_elem = array_map_delete_elem, > .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index d5807d2efc922..9203354208732 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -6939,6 +6939,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val, > u64 addr; > int err; > > + if (map->map_type != BPF_MAP_TYPE_ARRAY) > + return -EINVAL; > err = map->ops->map_direct_value_addr(map, &addr, off); > if (err) > return err; > @@ -7451,6 +7453,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn > /* if map is read-only, track its contents as scalars */ > if (tnum_is_const(reg->var_off) && > bpf_map_is_rdonly(map) && > + map->map_type == BPF_MAP_TYPE_ARRAY && > map->ops->map_direct_value_addr) { > int map_off = off + reg->var_off.value; > u64 val = 0; > @@ -9414,6 +9417,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env, > return -EACCES; > } > > + if (map->map_type != BPF_MAP_TYPE_ARRAY) { > + verbose(env, "only array map supports direct string value access\n"); > + return -EINVAL; > + } > + > err = check_map_access(env, regno, reg->off, > map->value_size - reg->off, false, > ACCESS_HELPER); > @@ -11101,6 +11109,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, > return -EINVAL; > num_args = data_len_reg->var_off.value / 8; > > + if (fmt_map->map_type != BPF_MAP_TYPE_ARRAY) { > + verbose(env, "only array map supports snprintf\n"); > + return -EINVAL; > + } > + > /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const > * and map_direct_value_addr is set. > */ > @@ -21906,6 +21919,38 @@ static int do_misc_fixups(struct bpf_verifier_env *env) > goto next_insn; > } > > +#ifdef CONFIG_SMP Instead of CONFIG_SMP, I think it's more appropriate to check for bpf_jit_supports_percpu_insn(). We check CONFIG_SMP for BPF_FUNC_get_smp_processor_id inlining because of `cpu_number` per-CPU variable, not because BPF_MOV64_PERCPU_REG() doesn't work on single CPU systems (IIUC). pw-bot: cr > + if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) && > + (insn->src_reg == BPF_PSEUDO_MAP_VALUE || > + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) { > + struct bpf_map *map; > + > + aux = &env->insn_aux_data[i + delta]; > + map = env->used_maps[aux->map_index]; > + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) > + goto next_insn; > + > + /* Reuse the original ld_imm64 insn. And add one > + * mov64_percpu_reg insn. > + */ > + > + insn_buf[0] = insn[1]; > + insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg); > + cnt = 2; > + > + i++; > + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); > + if (!new_prog) > + return -ENOMEM; > + > + delta += cnt - 1; > + env->prog = prog = new_prog; > + insn = new_prog->insnsi + i + delta; > + > + goto next_insn; > + } > +#endif > + > if (insn->code != (BPF_JMP | BPF_CALL)) > goto next_insn; > if (insn->src_reg == BPF_PSEUDO_CALL) > -- > 2.49.0 >