On Fri, 2025-08-15 at 20:21 +0100, Mykyta Yatsenko wrote: > From: Mykyta Yatsenko <yatsenko@xxxxxxxx> > > This patch adds necessary plumbing in verifier, syscall and maps to > support handling new kfunc bpf_task_work_schedule and kernel structure > bpf_task_work. The idea is similar to how we already handle bpf_wq and > bpf_timer. > verifier changes validate calls to bpf_task_work_schedule to make sure > it is safe and expected invariants hold. > btf part is required to detect bpf_task_work structure inside map value > and store its offset, which will be used in the next patch to calculate > key and value addresses. > arraymap and hashtab changes are needed to handle freeing of the > bpf_task_work: run code needed to deinitialize it, for example cancel > task_work callback if possible. > The use of bpf_task_work and proper implementation for kfuncs are > introduced in the next patch. > > Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx> > --- Amount of copy-paste necessary for dealing with objects btf is saddening. This patch follows current approach and seem to do it correctly. [...] > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -431,7 +431,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) > return (void *)round_down((unsigned long)array, PAGE_SIZE); > } > > -static void array_map_free_timers_wq(struct bpf_map *map) > +static void array_map_free_internal_structs(struct bpf_map *map) > { > struct bpf_array *array = container_of(map, struct bpf_array, map); > int i; > @@ -439,12 +439,14 @@ static void array_map_free_timers_wq(struct bpf_map *map) > /* We don't reset or free fields other than timer and workqueue > * on uref dropping to zero. > */ > - if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) { > + if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { Is there a way to share this code between array map and hash map? > for (i = 0; i < array->map.max_entries; i++) { > if (btf_record_has_field(map->record, BPF_TIMER)) > bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); > if (btf_record_has_field(map->record, BPF_WORKQUEUE)) > bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); > + if (btf_record_has_field(map->record, BPF_TASK_WORK)) > + bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i)); > } > } > } [...] > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > index 64739308902f..378f260235dd 100644 > --- a/kernel/bpf/btf.c > +++ b/kernel/bpf/btf.c > @@ -3527,6 +3527,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_ > goto end; > } > } > + if (field_mask & BPF_TASK_WORK) { > + if (!strcmp(name, "bpf_task_work")) { > + if (*seen_mask & BPF_TASK_WORK) > + return -E2BIG; > + *seen_mask |= BPF_TASK_WORK; > + type = BPF_TASK_WORK; > + goto end; > + } > + } Nit: extract this and ifs before it as a loop over array of name/flag pairs? > field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); > field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); > field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); [...] > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index 71f9931ac64c..207ad4823b5b 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c [...] > @@ -233,6 +233,9 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab) > if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) > bpf_obj_free_workqueue(htab->map.record, > htab_elem_value(elem, htab->map.key_size)); > + if (btf_record_has_field(htab->map.record, BPF_TASK_WORK)) > + bpf_obj_free_task_work(htab->map.record, > + htab_elem_value(elem, htab->map.key_size)); If there is no generic way to share this code with array maps, please, at-least within the hashmap.c extract these "if (btf_record_has_field(...)) {...}" groups so that there is no duplication between htab_free_{malloced,preallocated}_internal_structs(htab). > cond_resched(); > } > } [...] > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 0fbfa8532c39..108d86f7eeaf 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c [...] > @@ -1309,6 +1322,14 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, > goto free_map_tab; > } > break; > + case BPF_TASK_WORK: This can be added to the group with BPF_TIMER and BPF_WORKQUEUE just above. > + if (map->map_type != BPF_MAP_TYPE_HASH && > + map->map_type != BPF_MAP_TYPE_LRU_HASH && > + map->map_type != BPF_MAP_TYPE_ARRAY) { > + ret = -EOPNOTSUPP; > + goto free_map_tab; > + } > + break; > default: > /* Fail if map_type checks are missing for a field type */ > ret = -EOPNOTSUPP; > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index a61d57996692..be7a744c7917 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c [...] This function repeats process_timer_func() almost verbatim. > +{ > + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; > + struct bpf_map *map = reg->map_ptr; > + bool is_const = tnum_is_const(reg->var_off); > + u64 val = reg->var_off.value; > + > + if (!map->btf) { > + verbose(env, "map '%s' has to have BTF in order to use bpf_task_work\n", > + map->name); > + return -EINVAL; > + } > + if (!btf_record_has_field(map->record, BPF_TASK_WORK)) { > + verbose(env, "map '%s' has no valid bpf_task_work\n", map->name); > + return -EINVAL; > + } > + if (!is_const) { > + verbose(env, > + "bpf_task_work has to be at the constant offset\n"); > + return -EINVAL; > + } > + if (map->record->task_work_off != val + reg->off) { > + verbose(env, > + "off %lld doesn't point to 'struct bpf_task_work' that is at %d\n", > + val + reg->off, map->record->task_work_off); > + return -EINVAL; > + } > + if (meta->map.ptr) { > + verifier_bug(env, "Two map pointers in a bpf_task_work kfunc"); > + return -EFAULT; > + } > + > + meta->map.uid = reg->map_uid; > + meta->map.ptr = map; > + return 0; > +} > + > static int process_kptr_func(struct bpf_verifier_env *env, int regno, > struct bpf_call_arg_meta *meta) > { [...]