Re: [PATCH bpf-next v3 2/4] bpf, libbpf: Support global percpu data

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 26, 2025 at 9:22 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote:
>
> This patch introduces support for global percpu data in libbpf by adding a
> new ".data..percpu" section, similar to ".data". It enables efficient
> handling of percpu global variables in bpf programs.
>
> This enhancement improves performance for workloads that benefit from
> percpu storage.
>
> Meanwhile, add bpf_map__is_internal_percpu() API to check whether the map
> is an internal map used for global percpu variables.

I'm not a big fan of this super specific API. We do have
bpf_map__is_internal() to let customer know that map is special in
some way, but I'd like to avoid making this fine distinction between
per-CPU internal map vs non-per-CPU (and then why stop there, why not
have kconfig-specific API, ksym-specific check, etc)?

All this is mostly useful just for bpftool for skeleton codegen, and
bpftool already has to know about .percpu prefix, so it doesn't need
this API to make all these decisions. Let's try to drop this
bpf_map__is_internal_percpu() API?

>
> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx>
> ---
>  tools/lib/bpf/libbpf.c   | 102 +++++++++++++++++++++++++++++++--------
>  tools/lib/bpf/libbpf.h   |   9 ++++
>  tools/lib/bpf/libbpf.map |   1 +
>  3 files changed, 91 insertions(+), 21 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index e9c641a2fb203..65f0df09ac6d8 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -518,6 +518,7 @@ struct bpf_struct_ops {
>  };
>
>  #define DATA_SEC ".data"
> +#define PERCPU_DATA_SEC ".data..percpu"
>  #define BSS_SEC ".bss"
>  #define RODATA_SEC ".rodata"
>  #define KCONFIG_SEC ".kconfig"
> @@ -532,6 +533,7 @@ enum libbpf_map_type {
>         LIBBPF_MAP_BSS,
>         LIBBPF_MAP_RODATA,
>         LIBBPF_MAP_KCONFIG,
> +       LIBBPF_MAP_PERCPU_DATA,
>  };
>
>  struct bpf_map_def {
> @@ -642,6 +644,7 @@ enum sec_type {
>         SEC_DATA,
>         SEC_RODATA,
>         SEC_ST_OPS,
> +       SEC_PERCPU_DATA,
>  };
>
>  struct elf_sec_desc {
> @@ -1902,7 +1905,7 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
>         struct btf_var_secinfo *vsi;
>         int i, n;
>
> -       if (!map->btf_value_type_id)
> +       if (!map->btf_value_type_id || map->libbpf_type == LIBBPF_MAP_PERCPU_DATA)

Not sure this is correct. We should have btf_value_type_id for PERCPU
global data array, no?

>                 return false;
>
>         t = btf__type_by_id(obj->btf, map->btf_value_type_id);
> @@ -1926,6 +1929,7 @@ static int
>  bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>                               const char *real_name, int sec_idx, void *data, size_t data_sz)
>  {
> +       bool is_percpu = type == LIBBPF_MAP_PERCPU_DATA;
>         struct bpf_map_def *def;
>         struct bpf_map *map;
>         size_t mmap_sz;
> @@ -1947,9 +1951,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>         }
>
>         def = &map->def;
> -       def->type = BPF_MAP_TYPE_ARRAY;
> +       def->type = is_percpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY;
>         def->key_size = sizeof(int);
> -       def->value_size = data_sz;
> +       def->value_size = is_percpu ? roundup(data_sz, 8) : data_sz;
>         def->max_entries = 1;
>         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
>                 ? BPF_F_RDONLY_PROG : 0;
> @@ -1960,10 +1964,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>         if (map_is_mmapable(obj, map))
>                 def->map_flags |= BPF_F_MMAPABLE;
>
> -       pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
> -                map->name, map->sec_idx, map->sec_offset, def->map_flags);
> +       pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
> +                map->name, is_percpu ? "percpu " : "", map->sec_idx,
> +                map->sec_offset, def->map_flags);
>
> -       mmap_sz = bpf_map_mmap_sz(map);
> +       mmap_sz = is_percpu ? def->value_size : bpf_map_mmap_sz(map);
>         map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
>                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>         if (map->mmaped == MAP_FAILED) {
> @@ -1999,6 +2004,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
>                         continue;
>
>                 switch (sec_desc->sec_type) {
> +               case SEC_PERCPU_DATA:
> +                       sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
> +                       err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU_DATA,
> +                                                           sec_name, sec_idx,
> +                                                           sec_desc->data->d_buf,
> +                                                           sec_desc->data->d_size);
> +                       break;
>                 case SEC_DATA:
>                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
>                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
> @@ -3363,6 +3375,10 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
>                 fixup_offsets = true;
>         }
>
> +       /* .data..percpu DATASEC must have __aligned(8) size. */

please remind me why? similarly for def->value_size special casing?
What will happen if we don't explicitly roundup() on libbpf side
(kernel always does roundup(8) for ARRAY value_size anyways, which is
why I am asking)

> +       if (strcmp(sec_name, PERCPU_DATA_SEC) == 0 || str_has_pfx(sec_name, PERCPU_DATA_SEC))
> +               t->size = roundup(t->size, 8);
> +
>         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
>                 const struct btf_type *t_var;
>                 struct btf_var *var;
> @@ -3923,6 +3939,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
>                                 err = bpf_object__add_programs(obj, data, name, idx);
>                                 if (err)
>                                         return err;
> +                       } else if (strcmp(name, PERCPU_DATA_SEC) == 0 ||
> +                                  str_has_pfx(name, PERCPU_DATA_SEC)) {
> +                               sec_desc->sec_type = SEC_PERCPU_DATA;
> +                               sec_desc->shdr = sh;
> +                               sec_desc->data = data;
>                         } else if (strcmp(name, DATA_SEC) == 0 ||
>                                    str_has_pfx(name, DATA_SEC ".")) {
>                                 sec_desc->sec_type = SEC_DATA;
> @@ -4452,6 +4473,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
>         case SEC_BSS:
>         case SEC_DATA:
>         case SEC_RODATA:
> +       case SEC_PERCPU_DATA:
>                 return true;
>         default:
>                 return false;
> @@ -4477,6 +4499,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
>                 return LIBBPF_MAP_DATA;
>         case SEC_RODATA:
>                 return LIBBPF_MAP_RODATA;
> +       case SEC_PERCPU_DATA:
> +               return LIBBPF_MAP_PERCPU_DATA;
>         default:
>                 return LIBBPF_MAP_UNSPEC;
>         }
> @@ -4794,7 +4818,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
>
>         /*
>          * LLVM annotates global data differently in BTF, that is,
> -        * only as '.data', '.bss' or '.rodata'.
> +        * only as '.data', '.bss', '.rodata' or '.data..percpu'.
>          */
>         if (!bpf_map__is_internal(map))
>                 return -ENOENT;
> @@ -5129,23 +5153,47 @@ static int
>  bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
>  {
>         enum libbpf_map_type map_type = map->libbpf_type;
> -       int err, zero = 0;
> -       size_t mmap_sz;
> +       bool is_percpu = map_type == LIBBPF_MAP_PERCPU_DATA;
> +       int err = 0, zero = 0, num_cpus, i;
> +       size_t data_sz, elem_sz, mmap_sz;
> +       void *data = NULL;
> +
> +       data_sz = map->def.value_size;
> +       if (is_percpu) {
> +               num_cpus = libbpf_num_possible_cpus();
> +               if (num_cpus < 0) {
> +                       err = num_cpus;
> +                       return err;

hm... why not `return num_cpus;`?

> +               }
> +
> +               data_sz = data_sz * num_cpus;
> +               data = malloc(data_sz);
> +               if (!data) {
> +                       err = -ENOMEM;
> +                       return err;
> +               }

[...]

>  /**
>   * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the
>   * BPF map should be pinned. This does not actually create the 'pin'.
> diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
> index 1205f9a4fe048..1c239ac88c699 100644
> --- a/tools/lib/bpf/libbpf.map
> +++ b/tools/lib/bpf/libbpf.map
> @@ -443,4 +443,5 @@ LIBBPF_1.6.0 {
>                 bpf_program__line_info_cnt;
>                 btf__add_decl_attr;
>                 btf__add_type_attr;
> +               bpf_map__is_internal_percpu;

alphabetically sorted

>  } LIBBPF_1.5.0;




> --
> 2.49.0
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux