[RFC bpf-next v1 2/4] bpf: Support cookie for linked-based struct_ops attachment

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Support cookie when attaching a struct_ops map using link. The cookie is
associated with the link and can be retrieved in bpf struct_ops program
using bpf_get_attach_cookie().

Implementation wise, trampoline and ksyms preparation are deferred from
map_update to link_create for struct_ops maps with BPF_F_LINK. Since bpf
cookie is hardcoded to the trampoline in arch_prepare_bpf_trampoline(),
it must be done when cookie is known (i.e., link_create). The trampoline
and ksyms are freed once a link is detached as the struct_ops map is no
longer associated with the link.

TODO:
A struct_ops map with BPF_F_LINK should be prevented from being used to
create another link after this patch since a struct_ops map currently
only support a set of trampoline. This may be done reusing the state
from non-BPF_F_LINK struct_ops map: set the state from READY to INUSE
in link_create, and check the state in link_create and link_update.

Signed-off-by: Amery Hung <ameryhung@xxxxxxxxx>
---
 include/uapi/linux/bpf.h       |  3 ++
 kernel/bpf/bpf_struct_ops.c    | 59 +++++++++++++++++++++++++---------
 kernel/bpf/helpers.c           | 18 +++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++
 4 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0670e15a6100..4708d0783130 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1818,6 +1818,9 @@ union bpf_attr {
 				};
 				__u64		expected_revision;
 			} cgroup;
+			struct {
+				__u64		cookie;
+			} struct_ops;
 		};
 	} link_create;
 
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 4d150e99a86c..3ad0697a3c00 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -59,6 +59,7 @@ struct bpf_struct_ops_link {
 	struct bpf_link link;
 	struct bpf_map __rcu *map;
 	wait_queue_head_t wait_hup;
+	u64 cookie;
 };
 
 static DEFINE_MUTEX(update_mutex);
@@ -673,7 +674,7 @@ static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
 	}
 }
 
-static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map)
+static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map, u64 cookie)
 {
 	const struct bpf_struct_ops *st_ops = st_map->st_ops_desc->st_ops;
 	const struct btf_type *t = st_map->st_ops_desc->type;
@@ -714,6 +715,7 @@ static int bpf_struct_ops_prepare_attach(struct bpf_struct_ops_map *st_map)
 
 		mname = btf_name_by_offset(st_map->btf, member->name_off);
 		link = container_of(*plink++, struct bpf_tramp_link, link);
+		link->cookie = cookie;
 
 		ksym = kzalloc(sizeof(*ksym), GFP_USER);
 		if (!ksym) {
@@ -892,10 +894,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		*(unsigned long *)(udata + moff) = prog->aux->id;
 	}
 
-	err = bpf_struct_ops_prepare_attach(st_map);
-	if (err)
-		goto reset_unlock;
-
 	if (st_map->map.map_flags & BPF_F_LINK) {
 		err = 0;
 		/* Let bpf_link handle registration & unregistration.
@@ -906,6 +904,10 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		goto unlock;
 	}
 
+	err = bpf_struct_ops_prepare_attach(st_map, 0);
+	if (err)
+		goto reset_unlock;
+
 	err = st_ops->reg(kdata, NULL);
 	if (likely(!err)) {
 		/* This refcnt increment on the map here after
@@ -915,6 +917,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		 * or transition it to TOBEFREE concurrently.
 		 */
 		bpf_map_inc(map);
+		bpf_struct_ops_map_add_ksyms(st_map);
 		/* Pair with smp_load_acquire() during lookup_elem().
 		 * It ensures the above udata updates (e.g. prog->aux->id)
 		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
@@ -937,8 +940,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	memset(kvalue, 0, map->value_size);
 unlock:
 	mutex_unlock(&st_map->lock);
-	if (!err)
-		bpf_struct_ops_map_add_ksyms(st_map);
 	return err;
 }
 
@@ -1247,7 +1248,11 @@ static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
 	rcu_read_lock();
 	map = rcu_dereference(st_link->map);
 	if (map)
-		seq_printf(seq, "map_id:\t%d\n", map->id);
+		seq_printf(seq,
+			   "map_id:\t%d\n"
+			   "cookie:\t%llu\n",
+			   map->id,
+			   st_link->cookie);
 	rcu_read_unlock();
 }
 
@@ -1302,14 +1307,28 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
 		goto err_out;
 	}
 
+	err = bpf_struct_ops_prepare_attach(st_map, st_link->cookie);
+	if (err)
+		goto free_image;
+
 	err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link);
 	if (err)
-		goto err_out;
+		goto free_image;
 
 	bpf_map_inc(new_map);
 	rcu_assign_pointer(st_link->map, new_map);
+	bpf_struct_ops_map_add_ksyms(st_map);
+	bpf_struct_ops_map_del_ksyms(old_st_map);
+	bpf_struct_ops_map_free_ksyms(old_st_map);
+	bpf_struct_ops_map_free_image(old_st_map);
 	bpf_map_put(old_map);
+	mutex_unlock(&update_mutex);
+
+	return 0;
 
+free_image:
+	bpf_struct_ops_map_free_ksyms(st_map);
+	bpf_struct_ops_map_free_image(st_map);
 err_out:
 	mutex_unlock(&update_mutex);
 
@@ -1395,24 +1414,34 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 	if (err)
 		goto err_out;
 
+	link->cookie = attr->link_create.struct_ops.cookie;
+
 	init_waitqueue_head(&link->wait_hup);
 
 	/* Hold the update_mutex such that the subsystem cannot
 	 * do link->ops->detach() before the link is fully initialized.
 	 */
 	mutex_lock(&update_mutex);
+	err = bpf_struct_ops_prepare_attach(st_map, link->cookie);
+	if (err)
+		goto free_image;
+
 	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link);
-	if (err) {
-		mutex_unlock(&update_mutex);
-		bpf_link_cleanup(&link_primer);
-		link = NULL;
-		goto err_out;
-	}
+	if (err)
+		goto free_image;
+
 	RCU_INIT_POINTER(link->map, map);
+	bpf_struct_ops_map_add_ksyms(st_map);
 	mutex_unlock(&update_mutex);
 
 	return bpf_link_settle(&link_primer);
 
+free_image:
+	bpf_struct_ops_map_free_ksyms(st_map);
+	bpf_struct_ops_map_free_image(st_map);
+	mutex_unlock(&update_mutex);
+	bpf_link_cleanup(&link_primer);
+	link = NULL;
 err_out:
 	bpf_map_put(map);
 	kfree(link);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3d33181d5e67..4075fdd1533f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1894,6 +1894,21 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 };
 
+BPF_CALL_1(bpf_get_attach_cookie_struct_ops, void *, ctx)
+{
+	struct bpf_tramp_run_ctx *run_ctx;
+
+	run_ctx = container_of(current->bpf_ctx, struct bpf_tramp_run_ctx, run_ctx);
+	return run_ctx->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_struct_ops = {
+	.func		= bpf_get_attach_cookie_struct_ops,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1962,6 +1977,9 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_ns_current_pid_tgid_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_attach_cookie:
+		return prog->type == BPF_PROG_TYPE_STRUCT_OPS ?
+		       &bpf_get_attach_cookie_proto_struct_ops : NULL;
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0670e15a6100..4708d0783130 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1818,6 +1818,9 @@ union bpf_attr {
 				};
 				__u64		expected_revision;
 			} cgroup;
+			struct {
+				__u64		cookie;
+			} struct_ops;
 		};
 	} link_create;
 
-- 
2.47.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux