Re: [PATCH bpf-next v2 1/4] bpf: bpf task work plumbing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/19/25 02:34, Eduard Zingerman wrote:
On Fri, 2025-08-15 at 20:21 +0100, Mykyta Yatsenko wrote:
From: Mykyta Yatsenko <yatsenko@xxxxxxxx>

This patch adds necessary plumbing in verifier, syscall and maps to
support handling new kfunc bpf_task_work_schedule and kernel structure
bpf_task_work. The idea is similar to how we already handle bpf_wq and
bpf_timer.
verifier changes validate calls to bpf_task_work_schedule to make sure
it is safe and expected invariants hold.
btf part is required to detect bpf_task_work structure inside map value
and store its offset, which will be used in the next patch to calculate
key and value addresses.
arraymap and hashtab changes are needed to handle freeing of the
bpf_task_work: run code needed to deinitialize it, for example cancel
task_work callback if possible.
The use of bpf_task_work and proper implementation for kfuncs are
introduced in the next patch.

Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
---
Amount of copy-paste necessary for dealing with objects btf is saddening.
This patch follows current approach and seem to do it correctly.

[...]

--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -431,7 +431,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
  	return (void *)round_down((unsigned long)array, PAGE_SIZE);
  }
-static void array_map_free_timers_wq(struct bpf_map *map)
+static void array_map_free_internal_structs(struct bpf_map *map)
  {
  	struct bpf_array *array = container_of(map, struct bpf_array, map);
  	int i;
@@ -439,12 +439,14 @@ static void array_map_free_timers_wq(struct bpf_map *map)
  	/* We don't reset or free fields other than timer and workqueue
  	 * on uref dropping to zero.
  	 */
-	if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
+	if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
Is there a way to share this code between array map and hash map?
I don't see any common library used by both arraymap and hashtab (it's likely I'm missing something). Although this code looks similar, there are some differences, for example use of array_map_elem_ptr/htab_elem_value, which currently runs only when value has special field, if we to extract below code into a separate function, we'll have to run it unconditionally, which will add some small cost. All this combined makes it not very appealing to go after reusing that code. I agree that having a more centralized place to deal with these special structs would make things better.

  		for (i = 0; i < array->map.max_entries; i++) {
  			if (btf_record_has_field(map->record, BPF_TIMER))
  				bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
  			if (btf_record_has_field(map->record, BPF_WORKQUEUE))
  				bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
+			if (btf_record_has_field(map->record, BPF_TASK_WORK))
+				bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
  		}
  	}
  }
[...]

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 64739308902f..378f260235dd 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3527,6 +3527,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
  			goto end;
  		}
  	}
+	if (field_mask & BPF_TASK_WORK) {
+		if (!strcmp(name, "bpf_task_work")) {
+			if (*seen_mask & BPF_TASK_WORK)
+				return -E2BIG;
+			*seen_mask |= BPF_TASK_WORK;
+			type = BPF_TASK_WORK;
+			goto end;
+		}
+	}
Nit: extract this and ifs before it as a loop over array
      of name/flag pairs?
Makes sense, though, I guess it'll require a separate refactoring patch.

  	field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
  	field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
  	field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
[...]

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 71f9931ac64c..207ad4823b5b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
[...]

@@ -233,6 +233,9 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
  		if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
  			bpf_obj_free_workqueue(htab->map.record,
  					       htab_elem_value(elem, htab->map.key_size));
+		if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
+			bpf_obj_free_task_work(htab->map.record,
+					       htab_elem_value(elem, htab->map.key_size));
If there is no generic way to share this code with array maps,
please, at-least within the hashmap.c extract these "if (btf_record_has_field(...)) {...}"
groups so that there is no duplication between
htab_free_{malloced,preallocated}_internal_structs(htab).

  		cond_resched();
  	}
  }
[...]

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fbfa8532c39..108d86f7eeaf 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
[...]

@@ -1309,6 +1322,14 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
  					goto free_map_tab;
  				}
  				break;
+			case BPF_TASK_WORK:
This can be added to the group with BPF_TIMER and BPF_WORKQUEUE just above.
Ack.

+				if (map->map_type != BPF_MAP_TYPE_HASH &&
+				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+				    map->map_type != BPF_MAP_TYPE_ARRAY) {
+					ret = -EOPNOTSUPP;
+					goto free_map_tab;
+				}
+				break;
  			default:
  				/* Fail if map_type checks are missing for a field type */
  				ret = -EOPNOTSUPP;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a61d57996692..be7a744c7917 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
[...]

This function repeats process_timer_func() almost verbatim.
Right, I'll extract into a generic function.

+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	struct bpf_map *map = reg->map_ptr;
+	bool is_const = tnum_is_const(reg->var_off);
+	u64 val = reg->var_off.value;
+
+	if (!map->btf) {
+		verbose(env, "map '%s' has to have BTF in order to use bpf_task_work\n",
+			map->name);
+		return -EINVAL;
+	}
+	if (!btf_record_has_field(map->record, BPF_TASK_WORK)) {
+		verbose(env, "map '%s' has no valid bpf_task_work\n", map->name);
+		return -EINVAL;
+	}
+	if (!is_const) {
+		verbose(env,
+			"bpf_task_work has to be at the constant offset\n");
+		return -EINVAL;
+	}
+	if (map->record->task_work_off != val + reg->off) {
+		verbose(env,
+			"off %lld doesn't point to 'struct bpf_task_work' that is at %d\n",
+			val + reg->off, map->record->task_work_off);
+		return -EINVAL;
+	}
+	if (meta->map.ptr) {
+		verifier_bug(env, "Two map pointers in a bpf_task_work kfunc");
+		return -EFAULT;
+	}
+
+	meta->map.uid = reg->map_uid;
+	meta->map.ptr = map;
+	return 0;
+}
+
  static int process_kptr_func(struct bpf_verifier_env *env, int regno,
  			     struct bpf_call_arg_meta *meta)
  {
[...]





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux