On Mon, Jul 28, 2025 at 07:03:08PM -0700, Ihor Solodrai wrote: > btf_encoder collects function ELF symbols into a table, which is later > used for processing DWARF data and determining whether a function can > be added to BTF. > > So far the ELF symbol name was used as a key for search in this table, > and a search by prefix match was attempted in cases when ELF symbol > name has a compiler-generated suffix. > > This implementation has bugs [1][2], causing some functions to be > inappropriately excluded from (or included into) BTF. > > Rework the implementation of the ELF functions table. Use a name of a > function without any suffix - symbol name before the first occurrence > of '.' - as a key. This way btf_encoder__find_function() always > returns a valid elf_function object (or NULL). > > Collect an array of symbol name + address pairs from GElf_Sym for each > elf_function when building the elf_functions table. > > Introduce ambiguous_addr flag to the btf_encoder_func_state. It is set > when the function is saved by examining the array of ELF symbols in > elf_function__has_ambiguous_address(). It tests whether there is only > one unique address for this function name, taking into account that > some addresses associated with it are not relevant: > * ".cold" suffix indicates a piece of hot/cold split > * ".part" suffix indicates a piece of partial inline > > When inspecting symbol name we have to search for any occurrence of > the target suffix, as opposed to testing the entire suffix, or the end > of a string. This is because suffixes may be combined by the compiler, > for example producing ".isra0.cold", and the conclusion will be > incorrect. > > In saved_functions_combine() check ambiguous_addr when deciding > whether a function should be included in BTF. > > Successful CI run: https://github.com/acmel/dwarves/pull/68/checks > > I manually spot checked some of the ~200 functions from vmlinux (BPF > CI-like kconfig) that are now excluded: all of those that I checked > had multiple addresses, and some where static functions from different > files with the same name. in my config the change removed 464, did the same check and all of them had more different addresses couple nits below, but feel free to ignore Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx> thanks, jirka > > [1] https://lore.kernel.org/bpf/2f8c792e-9675-4385-b1cb-10266c72bd45@xxxxxxxxx/ > [2] https://lore.kernel.org/dwarves/6b4fda90fbf8f6aeeb2732bbfb6e81ba5669e2f3@xxxxxxxxx/ > > v1: https://lore.kernel.org/dwarves/98f41eaf6dd364745013650d58c5f254a592221c@xxxxxxxxx/ > Signed-off-by: Ihor Solodrai <isolodrai@xxxxxxxx> > --- > btf_encoder.c | 250 ++++++++++++++++++++++++++++++++------------------ > 1 file changed, 162 insertions(+), 88 deletions(-) > > diff --git a/btf_encoder.c b/btf_encoder.c > index 0bc2334..0aa94ae 100644 > --- a/btf_encoder.c > +++ b/btf_encoder.c > @@ -87,16 +87,22 @@ struct btf_encoder_func_state { > uint8_t optimized_parms:1; > uint8_t unexpected_reg:1; > uint8_t inconsistent_proto:1; > + uint8_t ambiguous_addr:1; > int ret_type_id; > struct btf_encoder_func_parm *parms; > struct btf_encoder_func_annot *annots; > }; > > +struct elf_function_sym { > + const char *name; > + uint64_t addr; > +}; > + > struct elf_function { > - const char *name; > - char *alias; > - size_t prefixlen; > - bool kfunc; > + char *name; > + struct elf_function_sym *syms; > + uint8_t sym_cnt; should we make this bigger, or at least check the overflow 256 dups are probably not possible, but with all those different suffixes we might get close soon ;-) > + uint8_t kfunc:1; > uint32_t kfunc_flags; > }; > > @@ -115,7 +121,6 @@ struct elf_functions { > struct elf_symtab *symtab; > struct elf_function *entries; > int cnt; > - int suffix_cnt; /* number of .isra, .part etc */ > }; > > /* > @@ -161,10 +166,18 @@ struct btf_kfunc_set_range { > uint64_t end; > }; > > +static inline void elf_function__free_content(struct elf_function *func) { elf_function__clear ? > + free(func->name); > + if (func->sym_cnt) > + free(func->syms); > + memset(func, 0, sizeof(*func)); > +} > + > static inline void elf_functions__delete(struct elf_functions *funcs) > { > - for (int i = 0; i < funcs->cnt; i++) > - free(funcs->entries[i].alias); > + for (int i = 0; i < funcs->cnt; i++) { > + elf_function__free_content(&funcs->entries[i]); > + } > free(funcs->entries); > elf_symtab__delete(funcs->symtab); > list_del(&funcs->node); > @@ -981,8 +994,7 @@ static void btf_encoder__log_func_skip(struct btf_encoder *encoder, struct elf_f > > if (!encoder->verbose) > return; > - printf("%s (%s): skipping BTF encoding of function due to ", > - func->alias ?: func->name, func->name); > + printf("%s : skipping BTF encoding of function due to ", func->name); > va_start(ap, fmt); > vprintf(fmt, ap); > va_end(ap); > @@ -1176,6 +1188,48 @@ static struct btf_encoder_func_state *btf_encoder__alloc_func_state(struct btf_e > return state; > } > > +/* some "." suffixes do not correspond to real functions; > + * - .part for partial inline > + * - .cold for rarely-used codepath extracted for better code locality > + */ > +static bool str_contains_non_fn_suffix(const char *str) { > + static const char *skip[] = { > + ".cold", > + ".part" > + }; > + char *suffix = strchr(str, '.'); > + int i; > + > + if (!suffix) > + return false; > + for (i = 0; i < ARRAY_SIZE(skip); i++) { > + if (strstr(suffix, skip[i])) > + return true; > + } > + return false; > +} > + > +static bool elf_function__has_ambiguous_address(struct elf_function *func) { > + struct elf_function_sym *sym; > + uint64_t addr; > + > + if (func->sym_cnt <= 1) > + return false; > + > + addr = 0; > + for (int i = 0; i < func->sym_cnt; i++) { > + sym = &func->syms[i]; > + if (!str_contains_non_fn_suffix(sym->name)) { > + if (addr && addr != sym->addr) > + return true; > + else > + addr = sym->addr; nit extra space ' ' ^^^ > + } > + } > + > + return false; > +} > + SNIP