The vma->vm_mm might be NULL and it can be accessed outside of RCU. Thus, we can mark it as trusted_or_null. With this change, BPF helpers can safely access vma->vm_mm to retrieve the associated mm_struct from the VMA. Then we can make policy decision from the VMA. The lsm selftest must be modified because it directly accesses vma->vm_mm without a NULL pointer check; otherwise it will break due to this change. For the VMA based THP policy, the use case is as follows, @mm = @vma->vm_mm; // vm_area_struct::vm_mm is trusted or null if (!@mm) return; bpf_rcu_read_lock(); // rcu lock must be held to dereference the owner @owner = @mm->owner; // mm_struct::owner is rcu trusted or null if (!@owner) goto out; @cgroup1 = bpf_task_get_cgroup1(@owner, MEMCG_HIERARCHY_ID); /* make the decision based on the @cgroup1 attribute */ bpf_cgroup_release(@cgroup1); // release the associated cgroup out: bpf_rcu_read_unlock(); PSI memory information can be obtained from the associated cgroup to inform policy decisions. Since upstream PSI support is currently limited to cgroup v2, the following example demonstrates cgroup v2 implementation: @owner = @mm->owner; if (@owner) { // @ancestor_cgid is user-configured @ancestor = bpf_cgroup_from_id(@ancestor_cgid); if (bpf_task_under_cgroup(@owner, @ancestor)) { @psi_group = @ancestor->psi; /* Extract PSI metrics from @psi_group and * implement policy logic based on the values */ } } Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> Cc: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx> --- kernel/bpf/verifier.c | 5 +++++ tools/testing/selftests/bpf/progs/lsm.c | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d400e18ee31e..b708b98f796c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7165,6 +7165,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) { struct sock *sk; }; +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) { + struct mm_struct *vm_mm; +}; + static bool type_is_rcu(struct bpf_verifier_env *env, struct bpf_reg_state *reg, const char *field_name, u32 btf_id) @@ -7206,6 +7210,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted_or_null"); diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index 0c13b7409947..7de173daf27b 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -89,14 +89,16 @@ SEC("lsm/file_mprotect") int BPF_PROG(test_int_hook, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot, int ret) { - if (ret != 0) + struct mm_struct *mm = vma->vm_mm; + + if (ret != 0 || !mm) return ret; __s32 pid = bpf_get_current_pid_tgid() >> 32; int is_stack = 0; - is_stack = (vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack); + is_stack = (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack); if (is_stack && monitored_pid == pid) { mprotect_count++; -- 2.47.3