Support cookie when attaching a struct_ops map using link. The cookie is associated with the link and can be retrieved in bpf struct_ops program using bpf_get_attach_cookie(). Implementation wise, trampoline and ksyms preparation are deferred from map_update to link_create for struct_ops maps with BPF_F_LINK. Since bpf cookie is hardcoded to the trampoline in arch_prepare_bpf_trampoline(), it must be done when cookie is known (i.e., link_create). The trampoline and ksyms are freed once a link is detached as the struct_ops map is no longer associated with the link. TODO: A struct_ops map with BPF_F_LINK should be prevented from being used to create another link after this patch since a struct_ops map currently only support a set of trampoline. This may be done reusing the state from non-BPF_F_LINK struct_ops map: set the state from READY to INUSE in link_create, and check the state in link_create and link_update. Signed-off-by: Amery Hung <ameryhung@xxxxxxxxx> --- include/uapi/linux/bpf.h | 3 ++ kernel/bpf/bpf_struct_ops.c | 59 +++++++++++++++++++++++++--------- kernel/bpf/helpers.c | 18 +++++++++++ tools/include/uapi/linux/bpf.h | 3 ++ 4 files changed, 68 insertions(+), 15 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0670e15a6100..4708d0783130 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1818,6 +1818,9 @@ union bpf_attr { }; __u64 expected_revision; } cgroup; + struct { + __u64 cookie; + } struct_ops; }; } link_create; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 4d150e99a86c..3ad0697a3c00 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -59,6 +59,7 @@ struct bpf_struct_ops_link { struct bpf_link link; struct bpf_map __rcu *map; wait_queue_head_t wait_hup; + u64 cookie; }; static DEFINE_MUTEX(update_mutex); @@ -673,7 +674,7 @@ static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map) } } -static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map) +static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map, u64 cookie) { const struct bpf_struct_ops *st_ops = st_map->st_ops_desc->st_ops; const struct btf_type *t = st_map->st_ops_desc->type; @@ -714,6 +715,7 @@ static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map) mname = btf_name_by_offset(st_map->btf, member->name_off); link = container_of(*plink++, struct bpf_tramp_link, link); + link->cookie = cookie; ksym = kzalloc(sizeof(*ksym), GFP_USER); if (!ksym) { @@ -892,10 +894,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, *(unsigned long *)(udata + moff) = prog->aux->id; } - err = bpf_struct_ops_prepare_attach(st_map); - if (err) - goto reset_unlock; - if (st_map->map.map_flags & BPF_F_LINK) { err = 0; /* Let bpf_link handle registration & unregistration. @@ -906,6 +904,10 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } + err = bpf_struct_ops_prepare_attach(st_map, 0); + if (err) + goto reset_unlock; + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after @@ -915,6 +917,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, * or transition it to TOBEFREE concurrently. */ bpf_map_inc(map); + bpf_struct_ops_map_add_ksyms(st_map); /* Pair with smp_load_acquire() during lookup_elem(). * It ensures the above udata updates (e.g. prog->aux->id) * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. @@ -937,8 +940,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, memset(kvalue, 0, map->value_size); unlock: mutex_unlock(&st_map->lock); - if (!err) - bpf_struct_ops_map_add_ksyms(st_map); return err; } @@ -1247,7 +1248,11 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, rcu_read_lock(); map = rcu_dereference(st_link->map); if (map) - seq_printf(seq, "map_id:\t%d\n", map->id); + seq_printf(seq, + "map_id:\t%d\n" + "cookie:\t%llu\n", + map->id, + st_link->cookie); rcu_read_unlock(); } @@ -1302,14 +1307,28 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } + err = bpf_struct_ops_prepare_attach(st_map, st_link->cookie); + if (err) + goto free_image; + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) - goto err_out; + goto free_image; bpf_map_inc(new_map); rcu_assign_pointer(st_link->map, new_map); + bpf_struct_ops_map_add_ksyms(st_map); + bpf_struct_ops_map_del_ksyms(old_st_map); + bpf_struct_ops_map_free_ksyms(old_st_map); + bpf_struct_ops_map_free_image(old_st_map); bpf_map_put(old_map); + mutex_unlock(&update_mutex); + + return 0; +free_image: + bpf_struct_ops_map_free_ksyms(st_map); + bpf_struct_ops_map_free_image(st_map); err_out: mutex_unlock(&update_mutex); @@ -1395,24 +1414,34 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; + link->cookie = attr->link_create.struct_ops.cookie; + init_waitqueue_head(&link->wait_hup); /* Hold the update_mutex such that the subsystem cannot * do link->ops->detach() before the link is fully initialized. */ mutex_lock(&update_mutex); + err = bpf_struct_ops_prepare_attach(st_map, link->cookie); + if (err) + goto free_image; + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); - if (err) { - mutex_unlock(&update_mutex); - bpf_link_cleanup(&link_primer); - link = NULL; - goto err_out; - } + if (err) + goto free_image; + RCU_INIT_POINTER(link->map, map); + bpf_struct_ops_map_add_ksyms(st_map); mutex_unlock(&update_mutex); return bpf_link_settle(&link_primer); +free_image: + bpf_struct_ops_map_free_ksyms(st_map); + bpf_struct_ops_map_free_image(st_map); + mutex_unlock(&update_mutex); + bpf_link_cleanup(&link_primer); + link = NULL; err_out: bpf_map_put(map); kfree(link); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3d33181d5e67..4075fdd1533f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1894,6 +1894,21 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = { .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; +BPF_CALL_1(bpf_get_attach_cookie_struct_ops, void *, ctx) +{ + struct bpf_tramp_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_tramp_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto_struct_ops = { + .func = bpf_get_attach_cookie_struct_ops, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1962,6 +1977,9 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_ns_current_pid_tgid_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_get_attach_cookie: + return prog->type == BPF_PROG_TYPE_STRUCT_OPS ? + &bpf_get_attach_cookie_proto_struct_ops : NULL; default: break; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0670e15a6100..4708d0783130 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1818,6 +1818,9 @@ union bpf_attr { }; __u64 expected_revision; } cgroup; + struct { + __u64 cookie; + } struct_ops; }; } link_create; -- 2.47.1