Re: [PATCH bpf-next v4 2/3] selftests/bpf: support array presets in veristat

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Mon, 23 Jun 2025 15:10:19 -0700

On Wed, Jun 18, 2025 at 1:39 PM Mykyta Yatsenko
<mykyta.yatsenko5@xxxxxxxxx> wrote:
>
> From: Mykyta Yatsenko <yatsenko@xxxxxxxx>
>
> Implement support for presetting values for array elements in veristat.
> For example:
> ```
> sudo ./veristat set_global_vars.bpf.o -G "arr[3] = 1"
> ```
> Arrays of structures and structure of arrays work, but each individual
> scalar value has to be set separately: `foo[1].bar[2] = value`.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
> ---
>  tools/testing/selftests/bpf/veristat.c | 226 ++++++++++++++++++++-----
>  1 file changed, 180 insertions(+), 46 deletions(-)
>

[...]

> +static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result)
> +{
> +       int err = 0;
> +
> +       switch (rvalue->type) {
> +       case INTEGRAL:
> +               *result = rvalue->ivalue;

return 0;

> +               break;
> +       case ENUMERATOR:
> +               err = find_enum_value(btf, rvalue->svalue, result);
> +               if (err)
> +                       fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue);

if (err) {
    fprintf(...);
    return err;
}

return 0;

?

> +               break;

default: fprintf("unknown blah"); return -EOPNOTSUPP;

I think I had a similar argument with Eduard before, so I'll explain
my logic here again. Whenever you have some branching in your code and
you know that branch's processing is effectively done and the only
thing left is to return success/failure signal, *do return early* and
explicitly ASAP (unless there is non-trivial clean up for error path,
in which case not duplicating and spreading clean up logic outweighs
the simplicity of early return code). Otherwise it takes *unnecessary*
extra mental effort to trace through the rest of the code to make sure
there is no extra common post-processing logic after that
branch/switch/for loop.

So if we know the INTEGRAL case is a success, then have `return 0;`
right there, don't make anyone read through the rest of the function
just to make sure we don't do anything extra.

> +       }
> +       return err;
> +}
> +
> +/* Returns number of consumed atoms from preset, negative error if failed */
> +static int adjust_var_secinfo_array(struct btf *btf, int tid, struct var_preset *preset,
> +                                   int atom_idx, struct btf_var_secinfo *sinfo)
> +{
> +       struct btf_array *barr;
> +       int i = atom_idx, err;
> +       const struct btf_type *t;
> +       long long off = 0, idx;
> +
> +       if (atom_idx < 1) /* Array index can't be the first atom */

can atom_idx be -1 or negative? If not, then do `if (atom_idx == 0)`.
It's another small mental overhead that we can easily avoid, and so we
should.

> +               return -EINVAL;
> +
> +       tid = btf__resolve_type(btf, tid);
> +       t = btf__type_by_id(btf, tid);
> +       if (!btf_is_array(t)) {
> +               fprintf(stderr, "Array index is not expected for %s\n",
> +                       preset->atoms[atom_idx - 1].name);
> +               return -EINVAL;
> +       }

[...]

> @@ -1815,26 +1938,29 @@ const int btf_find_member(const struct btf *btf,
>  static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
>                               struct btf_var_secinfo *sinfo, struct var_preset *preset)
>  {
> -       const struct btf_type *base_type, *member_type;
> -       int err, member_tid, i;
> -       __u32 member_offset = 0;
> -
> -       base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
> -
> -       for (i = 1; i < preset->atom_count; ++i) {
> -               err = btf_find_member(btf, base_type, 0, preset->atoms[i].name,
> -                                     &member_tid, &member_offset);
> -               if (err) {
> -                       fprintf(stderr, "Could not find member %s for variable %s\n",
> -                               preset->atoms[i].name, preset->atoms[i - 1].name);
> -                       return err;
> +       const struct btf_type *base_type;
> +       int err, i = 1, n;
> +       int tid;
> +
> +       tid = btf__resolve_type(btf, t->type);
> +       base_type = btf__type_by_id(btf, tid);
> +
> +       while (i < preset->atom_count) {
> +               if (preset->atoms[i].type == ARRAY_INDEX) {
> +                       n = adjust_var_secinfo_array(btf, tid, preset, i, sinfo);
> +                       if (n < 0)
> +                               return n;
> +                       i += n;
> +               } else {
> +                       err = btf_find_member(btf, base_type, 0, preset->atoms[i].name, sinfo);
> +                       if (err)
> +                               return err;
> +                       i++;
>                 }
> -               member_type = btf__type_by_id(btf, member_tid);
> -               sinfo->offset += member_offset / 8;
> -               sinfo->size = member_type->size;
> -               sinfo->type = member_tid;
> -               base_type = member_type;
> +               base_type = btf__type_by_id(btf, sinfo->type);
> +               tid = sinfo->type;
>         }
> +
>         return 0;
>  }

Is there a good reason to have adjust_var_secinfo() separate from
adjust_var_secinfo_array(). I won't know if I didn't miss anything
non-obvious, but in my mind this whole adjust_var_sec_info() should
look roughly like this:

cur_type = /* resolve from original var */
cur_off = 0;

for (i = 0; i < preset->atom_count; i++) {
    if (preset->atoms[i].type == ARRAY_INDEX) {
        /* a) error checking: cur_type should be array */
        /* b) resolve index (if it's enum)
        /* c) error checking: index should be within bounds of
cur_type (which is ARRAY)
        /* d) adjust cur_off += cur_type's elem_size * index_value
        /* e) cur_type = btf__resolve_type(cur_type->type) */
    } else {
        /* a) error checking: cur_type should be struct/union */
        /* b) find field by name with btf_find_member */
        /* c) cur_off += member_offset */
        /* d) cur_type = btf__resolve_type(field->type) */
    }
}

It seems inelegant that we have an outer loop over FIELD references
(one at a time), but for ARRAY_INDEX we do N items skipping. Why? We
have a set of "instructions", just execute them one at a time, and
keep track of the current type and current offset we are at.

Is there anything I am missing that would prevent this simple and more
uniform approach?

[...]