On 02/05/2025 00:52, Andrii Nakryiko wrote: > BTF dedup has a strong assumption that compiler with deduplicate identical > types within any given compilation unit (i.e., .c file). This property > is used when establishing equilvalence of two subgraphs of types. > > Unfortunately, this property doesn't always holds in practice. We've > seen cases of having truly identical structs, unions, array definitions, > and, most recently, even pointers to the same type being duplicated > within CU. > > Previously, we mitigated this on a case-by-case basis, adding a few > simple heuristics for validating that two BTF types (having two > different type IDs) are structurally the same. But this approach scales > poorly, and we can have more weird cases come up in the future. > > So let's take a half-step back, and implement a bit more generic > structural equivalence check, recursively. We still limit it to > reasonable depth to avoid long reference loops. Depth-wise limiting of > potentially cyclical graph isn't great, but as I mentioned below doesn't > seem to be detrimental performance-wise. We can always improve this in > the future with per-type visited markers, if necessary. > > Performance-wise this doesn't seem too affect vmlinux BTF dedup, which > makes sense because this logic kicks in not so frequently and only if we > already established a canonical candidate type match, but suddenly find > a different (but probably identical) type. > > On the other hand, this seems to help to reduce duplication across many > kernel modules. In my local test, I had 639 kernel module built. Overall > .BTF sections size goes down from 41MB bytes down to 5MB (!), which is > pretty impressive for such a straightforward piece of logic added. But > it would be nice to validate independently just in case my bash and > Python-fu is broken. > > Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> Looks great! Reviewed-by: Alan Maguire <alan.maguire@xxxxxxxxxx> Should have some numbers on the module size differences with this change by Monday, had to dash before my build completed. > --- > tools/lib/bpf/btf.c | 137 ++++++++++++++++++++++++++++---------------- > 1 file changed, 89 insertions(+), 48 deletions(-) > > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c > index b7513d4cce55..f18d7e6a453c 100644 > --- a/tools/lib/bpf/btf.c > +++ b/tools/lib/bpf/btf.c > @@ -4356,59 +4356,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) > return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; > } > > -/* Check if given two types are identical ARRAY definitions */ > -static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) > +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth) > { > struct btf_type *t1, *t2; > + int k1, k2; > +recur: > + if (depth <= 0) > + return false; > > t1 = btf_type_by_id(d->btf, id1); > t2 = btf_type_by_id(d->btf, id2); > - if (!btf_is_array(t1) || !btf_is_array(t2)) > + > + k1 = btf_kind(t1); > + k2 = btf_kind(t2); > + if (k1 != k2) > return false; > > - return btf_equal_array(t1, t2); > -} > + switch (k1) { > + case BTF_KIND_UNKN: /* VOID */ > + return true; > + case BTF_KIND_INT: > + return btf_equal_int_tag(t1, t2); > + case BTF_KIND_ENUM: > + case BTF_KIND_ENUM64: > + return btf_compat_enum(t1, t2); > + case BTF_KIND_FWD: > + case BTF_KIND_FLOAT: > + return btf_equal_common(t1, t2); > + case BTF_KIND_CONST: > + case BTF_KIND_VOLATILE: > + case BTF_KIND_RESTRICT: > + case BTF_KIND_PTR: > + case BTF_KIND_TYPEDEF: > + case BTF_KIND_FUNC: > + case BTF_KIND_TYPE_TAG: > + if (t1->info != t2->info || t1->name_off != t2->name_off) > + return false; > + id1 = t1->type;> + id2 = t2->type; > + goto recur; > + case BTF_KIND_ARRAY: { > + struct btf_array *a1, *a2; > > -/* Check if given two types are identical STRUCT/UNION definitions */ > -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) > -{ > - const struct btf_member *m1, *m2; > - struct btf_type *t1, *t2; > - int n, i; > + if (!btf_compat_array(t1, t2)) > + return false; > > - t1 = btf_type_by_id(d->btf, id1); > - t2 = btf_type_by_id(d->btf, id2); > + a1 = btf_array(t1); > + a2 = btf_array(t1); > > - if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) > - return false; > + if (a1->index_type != a2->index_type && > + !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) > + return false; > > - if (!btf_shallow_equal_struct(t1, t2)) > - return false; > + if (a1->type != a2->type && > + !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) > + return false; > > - m1 = btf_members(t1); > - m2 = btf_members(t2); > - for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { > - if (m1->type != m2->type && > - !btf_dedup_identical_arrays(d, m1->type, m2->type) && > - !btf_dedup_identical_structs(d, m1->type, m2->type)) > + return true; > + } > + case BTF_KIND_STRUCT: > + case BTF_KIND_UNION: { > + const struct btf_member *m1, *m2; > + int i, n; > + > + if (!btf_shallow_equal_struct(t1, t2)) > return false; > + > + m1 = btf_members(t1); > + m2 = btf_members(t2); > + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { > + if (m1->type == m2->type) > + continue; > + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) > + return false; > + } > + return true; > } > - return true; > -} > + case BTF_KIND_FUNC_PROTO: { > + const struct btf_param *p1, *p2; > + int i, n; > > -static bool btf_dedup_identical_ptrs(struct btf_dedup *d, __u32 id1, __u32 id2) > -{ > - struct btf_type *t1, *t2; > + if (!btf_compat_fnproto(t1, t2)) > + return false; > > - t1 = btf_type_by_id(d->btf, id1); > - t2 = btf_type_by_id(d->btf, id2); > + if (t1->type != t2->type && > + !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) > + return false; > > - if (!btf_is_ptr(t1) || !btf_is_ptr(t2)) > + p1 = btf_params(t1); > + p2 = btf_params(t2); > + for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { > + if (p1->type == p2->type) > + continue; > + if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) > + return false; > + } > + return true; > + } > + default: > return false; > - > - return t1->type == t2->type; > + } > } > > + > /* > * Check equivalence of BTF type graph formed by candidate struct/union (we'll > * call it "candidate graph" in this description for brevity) to a type graph > @@ -4527,22 +4577,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, > * different fields within the *same* struct. This breaks type > * equivalence check, which makes an assumption that candidate > * types sub-graph has a consistent and deduped-by-compiler > - * types within a single CU. So work around that by explicitly > - * allowing identical array types here. > + * types within a single CU. And similar situation can happen > + * with struct/union sometimes, and event with pointers. > + * So accommodate cases like this doing a structural > + * comparison recursively, but avoiding being stuck in endless > + * loops by limiting the depth up to which we check. > */ > - if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) > - return 1; > - /* It turns out that similar situation can happen with > - * struct/union sometimes, sigh... Handle the case where > - * structs/unions are exactly the same, down to the referenced > - * type IDs. Anything more complicated (e.g., if referenced > - * types are different, but equivalent) is *way more* > - * complicated and requires a many-to-many equivalence mapping. > - */ > - if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) > - return 1; > - /* A similar case is again observed for PTRs. */ > - if (btf_dedup_identical_ptrs(d, hypot_type_id, cand_id)) > + if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16)) > return 1; > return 0; > }