Re: [PATCH bpf-next 1/4] bpf: bpf task work plumbing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 6 Aug 2025 at 16:46, Mykyta Yatsenko <mykyta.yatsenko5@xxxxxxxxx> wrote:
>
> From: Mykyta Yatsenko <yatsenko@xxxxxxxx>
>
> This patch adds necessary plumbing in verifier, syscall and maps to
> support handling new kfunc bpf_task_work_schedule and kernel structure
> bpf_task_work. The idea is similar to how we already handle bpf_wq and
> bpf_timer.
> verifier changes validate calls to bpf_task_work_schedule to make sure
> it is safe and expected invariants hold.
> btf part is required to detect bpf_task_work structure inside map value
> and store its offset, which will be used in the next patch to calculate
> key and value addresses.
> arraymap and hashtab changes are needed to handle freeing of the
> bpf_task_work: run code needed to deinitialize it, for example cancel
> task_work callback if possible.
> The use of bpf_task_work and proper implementation for kfuncs are
> introduced in the next patch.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
> ---
>  include/linux/bpf.h            |  11 +++
>  include/uapi/linux/bpf.h       |   4 +
>  kernel/bpf/arraymap.c          |   8 +-
>  kernel/bpf/btf.c               |  15 ++++
>  kernel/bpf/hashtab.c           |  22 ++++--
>  kernel/bpf/helpers.c           |  45 +++++++++++
>  kernel/bpf/syscall.c           |  23 +++++-
>  kernel/bpf/verifier.c          | 131 ++++++++++++++++++++++++++++++++-
>  tools/include/uapi/linux/bpf.h |   4 +
>  9 files changed, 247 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f9cd2164ed23..cb83ba0eaed5 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -206,6 +206,7 @@ enum btf_field_type {
>         BPF_WORKQUEUE  = (1 << 10),
>         BPF_UPTR       = (1 << 11),
>         BPF_RES_SPIN_LOCK = (1 << 12),
> +       BPF_TASK_WORK  = (1 << 13),
>  };
>
>  typedef void (*btf_dtor_kfunc_t)(void *);
> @@ -245,6 +246,7 @@ struct btf_record {
>         int timer_off;
>         int wq_off;
>         int refcount_off;
> +       int task_work_off;
>         struct btf_field fields[];
>  };
>
> @@ -340,6 +342,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
>                 return "bpf_rb_node";
>         case BPF_REFCOUNT:
>                 return "bpf_refcount";
> +       case BPF_TASK_WORK:
> +               return "bpf_task_work";
>         default:
>                 WARN_ON_ONCE(1);
>                 return "unknown";
> @@ -378,6 +382,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
>                 return sizeof(struct bpf_rb_node);
>         case BPF_REFCOUNT:
>                 return sizeof(struct bpf_refcount);
> +       case BPF_TASK_WORK:
> +               return sizeof(struct bpf_task_work);
>         default:
>                 WARN_ON_ONCE(1);
>                 return 0;
> @@ -410,6 +416,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
>                 return __alignof__(struct bpf_rb_node);
>         case BPF_REFCOUNT:
>                 return __alignof__(struct bpf_refcount);
> +       case BPF_TASK_WORK:
> +               return __alignof__(struct bpf_task_work);
>         default:
>                 WARN_ON_ONCE(1);
>                 return 0;
> @@ -441,6 +449,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
>         case BPF_KPTR_REF:
>         case BPF_KPTR_PERCPU:
>         case BPF_UPTR:
> +       case BPF_TASK_WORK:
>                 break;
>         default:
>                 WARN_ON_ONCE(1);
> @@ -577,6 +586,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
>                            bool lock_src);
>  void bpf_timer_cancel_and_free(void *timer);
>  void bpf_wq_cancel_and_free(void *timer);
> +void bpf_task_work_cancel_and_free(void *timer);
>  void bpf_list_head_free(const struct btf_field *field, void *list_head,
>                         struct bpf_spin_lock *spin_lock);
>  void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
> @@ -2391,6 +2401,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
>  bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
>  void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
>  void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
> +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
>  void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
>  void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 233de8677382..e444d9f67829 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -7418,6 +7418,10 @@ struct bpf_timer {
>         __u64 __opaque[2];
>  } __attribute__((aligned(8)));
>
> +struct bpf_task_work {
> +       __u64 __opaque[16];
> +} __attribute__((aligned(8)));
> +
>  struct bpf_wq {
>         __u64 __opaque[2];
>  } __attribute__((aligned(8)));
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 3d080916faf9..4130d8e76dff 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -431,7 +431,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
>         return (void *)round_down((unsigned long)array, PAGE_SIZE);
>  }
>
> -static void array_map_free_timers_wq(struct bpf_map *map)
> +static void array_map_free_internal_structs(struct bpf_map *map)
>  {
>         struct bpf_array *array = container_of(map, struct bpf_array, map);
>         int i;
> @@ -439,12 +439,14 @@ static void array_map_free_timers_wq(struct bpf_map *map)
>         /* We don't reset or free fields other than timer and workqueue
>          * on uref dropping to zero.
>          */
> -       if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
> +       if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
>                 for (i = 0; i < array->map.max_entries; i++) {
>                         if (btf_record_has_field(map->record, BPF_TIMER))
>                                 bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
>                         if (btf_record_has_field(map->record, BPF_WORKQUEUE))
>                                 bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
> +                       if (btf_record_has_field(map->record, BPF_TASK_WORK))
> +                               bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
>                 }
>         }
>  }
> @@ -783,7 +785,7 @@ const struct bpf_map_ops array_map_ops = {
>         .map_alloc = array_map_alloc,
>         .map_free = array_map_free,
>         .map_get_next_key = array_map_get_next_key,
> -       .map_release_uref = array_map_free_timers_wq,
> +       .map_release_uref = array_map_free_internal_structs,
>         .map_lookup_elem = array_map_lookup_elem,
>         .map_update_elem = array_map_update_elem,
>         .map_delete_elem = array_map_delete_elem,
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 0aff814cb53a..c66f9c6dfc48 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3527,6 +3527,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
>                         goto end;
>                 }
>         }
> +       if (field_mask & BPF_TASK_WORK) {
> +               if (!strcmp(name, "bpf_task_work")) {
> +                       if (*seen_mask & BPF_TASK_WORK)
> +                               return -E2BIG;
> +                       *seen_mask |= BPF_TASK_WORK;
> +                       type = BPF_TASK_WORK;
> +                       goto end;
> +               }
> +       }
>         field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
>         field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
>         field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
> @@ -3693,6 +3702,7 @@ static int btf_find_field_one(const struct btf *btf,
>         case BPF_LIST_NODE:
>         case BPF_RB_NODE:
>         case BPF_REFCOUNT:
> +       case BPF_TASK_WORK:
>                 ret = btf_find_struct(btf, var_type, off, sz, field_type,
>                                       info_cnt ? &info[0] : &tmp);
>                 if (ret < 0)
> @@ -3985,6 +3995,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
>         rec->timer_off = -EINVAL;
>         rec->wq_off = -EINVAL;
>         rec->refcount_off = -EINVAL;
> +       rec->task_work_off = -EINVAL;
>         for (i = 0; i < cnt; i++) {
>                 field_type_size = btf_field_type_size(info_arr[i].type);
>                 if (info_arr[i].off + field_type_size > value_size) {
> @@ -4050,6 +4061,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
>                 case BPF_LIST_NODE:
>                 case BPF_RB_NODE:
>                         break;
> +               case BPF_TASK_WORK:
> +                       WARN_ON_ONCE(rec->task_work_off >= 0);
> +                       rec->task_work_off = rec->fields[i].offset;
> +                       break;
>                 default:
>                         ret = -EFAULT;
>                         goto end;
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 71f9931ac64c..207ad4823b5b 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -215,7 +215,7 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
>         return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
>  }
>
> -static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
> +static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
>  {
>         u32 num_entries = htab->map.max_entries;
>         int i;
> @@ -233,6 +233,9 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
>                 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
>                         bpf_obj_free_workqueue(htab->map.record,
>                                                htab_elem_value(elem, htab->map.key_size));
> +               if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
> +                       bpf_obj_free_task_work(htab->map.record,
> +                                              htab_elem_value(elem, htab->map.key_size));
>                 cond_resched();
>         }
>  }
> @@ -1490,7 +1493,7 @@ static void delete_all_elements(struct bpf_htab *htab)
>         }
>  }
>
> -static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
> +static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
>  {
>         int i;
>
> @@ -1508,22 +1511,25 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
>                         if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
>                                 bpf_obj_free_workqueue(htab->map.record,
>                                                        htab_elem_value(l, htab->map.key_size));
> +                       if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
> +                               bpf_obj_free_task_work(htab->map.record,
> +                                                      htab_elem_value(l, htab->map.key_size));
>                 }
>                 cond_resched_rcu();
>         }
>         rcu_read_unlock();
>  }
>
> -static void htab_map_free_timers_and_wq(struct bpf_map *map)
> +static void htab_map_free_internal_structs(struct bpf_map *map)
>  {
>         struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>
>         /* We only free timer and workqueue on uref dropping to zero */
> -       if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
> +       if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
>                 if (!htab_is_prealloc(htab))
> -                       htab_free_malloced_timers_and_wq(htab);
> +                       htab_free_malloced_internal_structs(htab);
>                 else
> -                       htab_free_prealloced_timers_and_wq(htab);
> +                       htab_free_prealloced_internal_structs(htab);
>         }
>  }
>
> @@ -2255,7 +2261,7 @@ const struct bpf_map_ops htab_map_ops = {
>         .map_alloc = htab_map_alloc,
>         .map_free = htab_map_free,
>         .map_get_next_key = htab_map_get_next_key,
> -       .map_release_uref = htab_map_free_timers_and_wq,
> +       .map_release_uref = htab_map_free_internal_structs,
>         .map_lookup_elem = htab_map_lookup_elem,
>         .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
>         .map_update_elem = htab_map_update_elem,
> @@ -2276,7 +2282,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
>         .map_alloc = htab_map_alloc,
>         .map_free = htab_map_free,
>         .map_get_next_key = htab_map_get_next_key,
> -       .map_release_uref = htab_map_free_timers_and_wq,
> +       .map_release_uref = htab_map_free_internal_structs,
>         .map_lookup_elem = htab_lru_map_lookup_elem,
>         .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
>         .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 6b4877e85a68..322ffcaedc38 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3703,8 +3703,53 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
>         return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
>  }
>
> +typedef void (*bpf_task_work_callback_t)(struct bpf_map *, void *, void *);
> +
> +/**
> + * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
> + * @task: Task struct for which callback should be scheduled
> + * @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
> + * @map__map: bpf_map which contains bpf_task_work in one of the values
> + * @callback: pointer to BPF subprogram to call
> + * @aux__prog: user should pass NULL
> + *
> + * Return: 0 if task work has been scheduled successfully, negative error code otherwise
> + */
> +__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task,
> +                                             struct bpf_task_work *tw,
> +                                             struct bpf_map *map__map,
> +                                             bpf_task_work_callback_t callback,
> +                                             void *aux__prog)
> +{
> +       return 0;
> +}
> +
> +/**
> + * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME or
> + * TWA_NMI_CURRENT mode if scheduling for the current task in the NMI
> + * @task: Task struct for which callback should be scheduled
> + * @tw: Pointer to the bpf_task_work struct, to use by kernel internally for bookkeeping
> + * @map__map: bpf_map which contains bpf_task_work in one of the values
> + * @callback: pointer to BPF subprogram to call
> + * @aux__prog: user should pass NULL
> + *
> + * Return: 0 if task work has been scheduled successfully, negative error code otherwise
> + */
> +__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task,
> +                                             struct bpf_task_work *tw,
> +                                             struct bpf_map *map__map,
> +                                             bpf_task_work_callback_t callback,
> +                                             void *aux__prog)
> +{
> +       return 0;
> +}

Is there a reason we need separate kfuncs? Why can't we have one with
flags for different TWA modes?

> +
>  __bpf_kfunc_end_defs();
>
> +void bpf_task_work_cancel_and_free(void *val)
> +{
> +}
> +
>  BTF_KFUNCS_START(generic_btf_ids)
>  #ifdef CONFIG_CRASH_DUMP
>  BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index e63039817af3..73f801751280 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -670,6 +670,7 @@ void btf_record_free(struct btf_record *rec)
>                 case BPF_TIMER:
>                 case BPF_REFCOUNT:
>                 case BPF_WORKQUEUE:
> +               case BPF_TASK_WORK:
>                         /* Nothing to release */
>                         break;
>                 default:
> @@ -723,6 +724,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
>                 case BPF_TIMER:
>                 case BPF_REFCOUNT:
>                 case BPF_WORKQUEUE:
> +               case BPF_TASK_WORK:
>                         /* Nothing to acquire */
>                         break;
>                 default:
> @@ -781,6 +783,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
>         bpf_wq_cancel_and_free(obj + rec->wq_off);
>  }
>
> +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
> +{
> +       if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
> +               return;
> +       bpf_task_work_cancel_and_free(obj + rec->task_work_off);
> +}
> +
>  void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
>  {
>         const struct btf_field *fields;
> @@ -838,6 +847,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
>                                 continue;
>                         bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
>                         break;
> +               case BPF_TASK_WORK:
> +                       bpf_task_work_cancel_and_free(field_ptr);
> +                       break;
>                 case BPF_LIST_NODE:
>                 case BPF_RB_NODE:
>                 case BPF_REFCOUNT:
> @@ -1234,7 +1246,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
>
>         map->record = btf_parse_fields(btf, value_type,
>                                        BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
> -                                      BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
> +                                      BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
> +                                      BPF_TASK_WORK,
>                                        map->value_size);
>         if (!IS_ERR_OR_NULL(map->record)) {
>                 int i;
> @@ -1306,6 +1319,14 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
>                                         goto free_map_tab;
>                                 }
>                                 break;
> +                       case BPF_TASK_WORK:
> +                               if (map->map_type != BPF_MAP_TYPE_HASH &&
> +                                   map->map_type != BPF_MAP_TYPE_LRU_HASH &&
> +                                   map->map_type != BPF_MAP_TYPE_ARRAY) {
> +                                       ret = -EOPNOTSUPP;
> +                                       goto free_map_tab;
> +                               }
> +                               break;
>                         default:
>                                 /* Fail if map_type checks are missing for a field type */
>                                 ret = -EOPNOTSUPP;
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 399f03e62508..905dc0c5a73d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -524,9 +524,11 @@ static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
>                func_id == BPF_FUNC_user_ringbuf_drain;
>  }
>
> -static bool is_async_callback_calling_function(enum bpf_func_id func_id)
> +static bool is_task_work_add_kfunc(u32 func_id);
> +
> +static bool is_async_callback_calling_function(u32 func_id)
>  {
> -       return func_id == BPF_FUNC_timer_set_callback;
> +       return func_id == BPF_FUNC_timer_set_callback || is_task_work_add_kfunc(func_id);

Hmm, isn't this for helpers? You probably want to change
is_async_callback_calling_kfunc.

>  }
>
>  static bool is_callback_calling_function(enum bpf_func_id func_id)
> @@ -2236,6 +2238,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
>                                 reg->map_uid = reg->id;
>                         if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE))
>                                 reg->map_uid = reg->id;
> +                       if (btf_record_has_field(map->inner_map_meta->record, BPF_TASK_WORK))
> +                               reg->map_uid = reg->id;
>                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
>                         reg->type = PTR_TO_XDP_SOCK;
>                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
> @@ -8569,6 +8573,44 @@ static int process_wq_func(struct bpf_verifier_env *env, int regno,
>         return 0;
>  }
>
> +static int process_task_work_func(struct bpf_verifier_env *env, int regno,
> +                                 struct bpf_kfunc_call_arg_meta *meta)
> +{
> +       struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
> +       struct bpf_map *map = reg->map_ptr;
> +       bool is_const = tnum_is_const(reg->var_off);
> +       u64 val = reg->var_off.value;
> +
> +       if (!map->btf) {
> +               verbose(env, "map '%s' has to have BTF in order to use bpf_task_work\n",
> +                       map->name);
> +               return -EINVAL;
> +       }
> +       if (!btf_record_has_field(map->record, BPF_TASK_WORK)) {
> +               verbose(env, "map '%s' has no valid bpf_task_work\n", map->name);
> +               return -EINVAL;
> +       }
> +       if (map->record->task_work_off != val + reg->off) {
> +               verbose(env,
> +                       "off %lld doesn't point to 'struct bpf_task_work' that is at %d\n",
> +                       val + reg->off, map->record->task_work_off);
> +               return -EINVAL;
> +       }
> +       if (!is_const) {
> +               verbose(env,
> +                       "bpf_task_work has to be at the constant offset\n");
> +               return -EINVAL;
> +       }

It would make more sense to me to check this before matching val + reg->off.

> +       if (meta->map.ptr) {
> +               verifier_bug(env, "Two map pointers in a bpf_task_work kfunc");
> +               return -EFAULT;
> +       }
> +
> +       meta->map.uid = reg->map_uid;
> +       meta->map.ptr = map;
> +       return 0;
> +}
> +
>  static int process_kptr_func(struct bpf_verifier_env *env, int regno,
>                              struct bpf_call_arg_meta *meta)
>  {
> @@ -10616,7 +10658,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
>                 env->subprog_info[subprog].is_async_cb = true;
>                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
>                                          insn_idx, subprog,
> -                                        is_bpf_wq_set_callback_impl_kfunc(insn->imm));
> +                                        is_bpf_wq_set_callback_impl_kfunc(insn->imm) ||
> +                                        is_task_work_add_kfunc(insn->imm));
>                 if (!async_cb)
>                         return -EFAULT;
>                 callee = async_cb->frame[0];
> @@ -10929,6 +10972,35 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
>         return 0;
>  }
>
> +static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
> +                                                struct bpf_func_state *caller,
> +                                                struct bpf_func_state *callee,
> +                                                int insn_idx)
> +{
> +       struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
> +
> +       /*
> +        * callback_fn(struct bpf_map *map, void *key, void *value);
> +        */
> +       callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
> +       __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
> +       callee->regs[BPF_REG_1].map_ptr = map_ptr;
> +
> +       callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
> +       __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
> +       callee->regs[BPF_REG_2].map_ptr = map_ptr;
> +
> +       callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
> +       __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
> +       callee->regs[BPF_REG_3].map_ptr = map_ptr;
> +
> +       /* unused */
> +       __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
> +       __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
> +       callee->in_callback_fn = true;
> +       return 0;
> +}
> +
>  static bool is_rbtree_lock_required_kfunc(u32 btf_id);
>
>  /* Are we currently verifying the callback for a rbtree helper that must
> @@ -12059,6 +12131,7 @@ enum {
>         KF_ARG_RB_NODE_ID,
>         KF_ARG_WORKQUEUE_ID,
>         KF_ARG_RES_SPIN_LOCK_ID,
> +       KF_ARG_TASK_WORK_ID,
>  };
>
>  BTF_ID_LIST(kf_arg_btf_ids)
> @@ -12069,6 +12142,7 @@ BTF_ID(struct, bpf_rb_root)
>  BTF_ID(struct, bpf_rb_node)
>  BTF_ID(struct, bpf_wq)
>  BTF_ID(struct, bpf_res_spin_lock)
> +BTF_ID(struct, bpf_task_work)
>
>  static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
>                                     const struct btf_param *arg, int type)
> @@ -12117,6 +12191,11 @@ static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
>         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
>  }
>
> +static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
> +{
> +       return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
> +}
> +
>  static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
>  {
>         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
> @@ -12204,6 +12283,7 @@ enum kfunc_ptr_arg_type {
>         KF_ARG_PTR_TO_WORKQUEUE,
>         KF_ARG_PTR_TO_IRQ_FLAG,
>         KF_ARG_PTR_TO_RES_SPIN_LOCK,
> +       KF_ARG_PTR_TO_TASK_WORK,
>  };
>
>  enum special_kfunc_type {
> @@ -12252,6 +12332,8 @@ enum special_kfunc_type {
>         KF_bpf_res_spin_lock_irqsave,
>         KF_bpf_res_spin_unlock_irqrestore,
>         KF___bpf_trap,
> +       KF_bpf_task_work_schedule_signal,
> +       KF_bpf_task_work_schedule_resume,
>  };
>
>  BTF_ID_LIST(special_kfunc_list)
> @@ -12318,6 +12400,14 @@ BTF_ID(func, bpf_res_spin_unlock)
>  BTF_ID(func, bpf_res_spin_lock_irqsave)
>  BTF_ID(func, bpf_res_spin_unlock_irqrestore)
>  BTF_ID(func, __bpf_trap)
> +BTF_ID(func, bpf_task_work_schedule_signal)
> +BTF_ID(func, bpf_task_work_schedule_resume)
> +
> +static bool is_task_work_add_kfunc(u32 func_id)
> +{
> +       return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
> +              func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
> +}
>
>  static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>  {
> @@ -12408,6 +12498,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
>         if (is_kfunc_arg_wq(meta->btf, &args[argno]))
>                 return KF_ARG_PTR_TO_WORKQUEUE;
>
> +       if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
> +               return KF_ARG_PTR_TO_TASK_WORK;
> +
>         if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
>                 return KF_ARG_PTR_TO_IRQ_FLAG;
>
> @@ -12751,7 +12844,8 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id)
>
>  static bool is_async_callback_calling_kfunc(u32 btf_id)
>  {
> -       return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
> +       return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl] ||
> +              is_task_work_add_kfunc(btf_id);
>  }
>
>  static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
> @@ -13153,6 +13247,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>                                         return -EINVAL;
>                                 }
>                         }
> +                       if (meta->map.ptr && reg->map_ptr->record->task_work_off >= 0) {
> +                               if (meta->map.ptr != reg->map_ptr ||
> +                                   meta->map.uid != reg->map_uid) {
> +                                       verbose(env,
> +                                               "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
> +                                               meta->map.uid, reg->map_uid);
> +                                       return -EINVAL;
> +                               }
> +                       }
>                         meta->map.ptr = reg->map_ptr;
>                         meta->map.uid = reg->map_uid;
>                         fallthrough;
> @@ -13185,6 +13288,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
>                 case KF_ARG_PTR_TO_CONST_STR:
>                 case KF_ARG_PTR_TO_WORKQUEUE:
> +               case KF_ARG_PTR_TO_TASK_WORK:
>                 case KF_ARG_PTR_TO_IRQ_FLAG:
>                 case KF_ARG_PTR_TO_RES_SPIN_LOCK:
>                         break;
> @@ -13476,6 +13580,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>                         if (ret < 0)
>                                 return ret;
>                         break;
> +               case KF_ARG_PTR_TO_TASK_WORK:
> +                       if (reg->type != PTR_TO_MAP_VALUE) {
> +                               verbose(env, "arg#%d doesn't point to a map value\n", i);
> +                               return -EINVAL;
> +                       }
> +                       ret = process_task_work_func(env, regno, meta);
> +                       if (ret < 0)
> +                               return ret;
> +                       break;
>                 case KF_ARG_PTR_TO_IRQ_FLAG:
>                         if (reg->type != PTR_TO_STACK) {
>                                 verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
> @@ -13842,6 +13955,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 }
>         }
>
> +       if (is_task_work_add_kfunc(meta.func_id)) {
> +               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
> +                                        set_task_work_schedule_callback_state);
> +               if (err) {
> +                       verbose(env, "kfunc %s#%d failed callback verification\n",
> +                               func_name, meta.func_id);
> +                       return err;
> +               }
> +       }
> +
>         rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
>         rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 233de8677382..e444d9f67829 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -7418,6 +7418,10 @@ struct bpf_timer {
>         __u64 __opaque[2];
>  } __attribute__((aligned(8)));
>
> +struct bpf_task_work {
> +       __u64 __opaque[16];
> +} __attribute__((aligned(8)));
> +
>  struct bpf_wq {
>         __u64 __opaque[2];
>  } __attribute__((aligned(8)));
> --
> 2.50.1
>
>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux