Re: [PATCH bpf-next v2 2/3] bpf: implement dynptr copy kfuncs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, May 2, 2025 at 12:06 PM Mykyta Yatsenko
<mykyta.yatsenko5@xxxxxxxxx> wrote:
>
> From: Mykyta Yatsenko <yatsenko@xxxxxxxx>
>
> This patch introduces a new set of kfuncs for working with dynptrs in
> BPF programs, enabling reading variable-length user or kernel data
> into dynptr directly. To enable memory-safety, verifier allows only
> constant-sized reads via existing bpf_probe_read_{user|kernel} etc.
> kfuncs, dynptr-based kfuncs allow dynamically-sized reads without memory
> safety shortcomings.
>
> The following kfuncs are introduced:
> * `bpf_probe_read_kernel_dynptr()`: probes kernel-space data into a dynptr
> * `bpf_probe_read_user_dynptr()`: probes user-space data into a dynptr
> * `bpf_probe_read_kernel_str_dynptr()`: probes kernel-space string into
> a dynptr
> * `bpf_probe_read_user_str_dynptr()`: probes user-space string into a
> dynptr
> * `bpf_copy_from_user_dynptr()`: sleepable, copies user-space data into
> a dynptr for the current task
> * `bpf_copy_from_user_str_dynptr()`: sleepable, copies user-space string
> into a dynptr for the current task
> * `bpf_copy_from_user_task_dynptr()`: sleepable, copies user-space data
> of the task into a dynptr
> * `bpf_copy_from_user_task_str_dynptr()`: sleepable, copies user-space
> string of the task into a dynptr
>
> The implementation is built on two generic functions:
>  * __bpf_dynptr_copy
>  * __bpf_dynptr_copy_str
> These functions take function pointers as arguments, enabling the
> copying of data from various sources, including both kernel and user
> space. Notably, these indirect calls are typically inlined.
>
> Reviewed-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
> Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
> ---
>  kernel/bpf/helpers.c     |   8 ++
>  kernel/trace/bpf_trace.c | 199 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 207 insertions(+)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 2aad7c57425b..7d72d3e87324 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -3294,6 +3294,14 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
>  BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
>  BTF_ID_FLAGS(func, bpf_local_irq_save)
>  BTF_ID_FLAGS(func, bpf_local_irq_restore)
> +BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
> +BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
> +BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
> +BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
> +BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE)
> +BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE)

They need to have KF_TRUSTED_ARGS, otherwise legacy ptr_to_btf_id
can be passed in.

>  BTF_KFUNCS_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 52c432a44aeb..52926d572006 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -3499,6 +3499,147 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
>
>  late_initcall(bpf_kprobe_multi_kfuncs_init);
>
> +typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
> +
> +static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,

why always_inline?

patch 1 already adds overhead in non-LTO build,
since small helper bpf_dynptr_check_off_len() will not be inlined.
This __always_inline looks odd and probably wrong
from optimizations pov.

All other __always_inline look wrong too.


> +                                                const void __user *unsafe_src,
> +                                                copy_fn_t str_copy_fn,
> +                                                struct task_struct *tsk)
> +{
> +       struct bpf_dynptr_kern *dst;
> +       u32 chunk_sz, off;
> +       void *dst_slice;
> +       int cnt, err;
> +       char buf[256];
> +
> +       dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
> +       if (likely(dst_slice))
> +               return str_copy_fn(dst_slice, unsafe_src, size, tsk);
> +
> +       dst = (struct bpf_dynptr_kern *)dptr;
> +       if (bpf_dynptr_check_off_len(dst, doff, size))
> +               return -E2BIG;
> +
> +       for (off = 0; off < size; off += chunk_sz - 1) {
> +               chunk_sz = min_t(u32, sizeof(buf), size - off);
> +               /* Expect str_copy_fn to return count of copied bytes, including
> +                * zero terminator. Next iteration increment off by chunk_sz - 1 to
> +                * overwrite NUL.
> +                */
> +               cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
> +               if (cnt < 0)
> +                       return cnt;
> +               err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
> +               if (err)
> +                       return err;
> +               if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
> +                       return off + cnt;
> +       }
> +       return off;
> +}
> +
> +static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
> +                                            u32 size, const void __user *unsafe_src,
> +                                            copy_fn_t copy_fn, struct task_struct *tsk)
> +{
> +       struct bpf_dynptr_kern *dst;
> +       void *dst_slice;
> +       char buf[256];
> +       u32 off, chunk_sz;
> +       int err;
> +
> +       dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
> +       if (likely(dst_slice))
> +               return copy_fn(dst_slice, unsafe_src, size, tsk);
> +
> +       dst = (struct bpf_dynptr_kern *)dptr;
> +       if (bpf_dynptr_check_off_len(dst, doff, size))
> +               return -E2BIG;
> +
> +       for (off = 0; off < size; off += chunk_sz) {
> +               chunk_sz = min_t(u32, sizeof(buf), size - off);
> +               err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
> +               if (err)
> +                       return err;
> +               err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
> +               if (err)
> +                       return err;
> +       }
> +       return 0;
> +}
> +
> +static __always_inline int copy_user_data_nofault(void *dst, const void __user *unsafe_src,
> +                                                 u32 size, struct task_struct *tsk)
> +{
> +       if (WARN_ON_ONCE(tsk))
> +               return -EFAULT;
> +
> +       return copy_from_user_nofault(dst, unsafe_src, size);
> +}
> +
> +static __always_inline int copy_user_data_sleepable(void *dst, const void __user *unsafe_src,
> +                                                   u32 size, struct task_struct *tsk)
> +{
> +       int ret;
> +
> +       if (!tsk) /* Read from the current task */
> +               return copy_from_user(dst, unsafe_src, size);
> +
> +       ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
> +       if (ret != size)
> +               return -EFAULT;
> +       return 0;
> +}
> +
> +static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
> +                                                   u32 size, struct task_struct *tsk)
> +{
> +       if (WARN_ON_ONCE(tsk))
> +               return -EFAULT;

Why ? Let's not do defensive programming.
The caller clearly passes NULL.
Just drop this line.

> +
> +       return copy_from_kernel_nofault(dst, unsafe_src, size);
> +}
> +
> +static __always_inline int copy_user_str_nofault(void *dst, const void __user *unsafe_src,
> +                                                u32 size, struct task_struct *tsk)
> +{
> +       if (WARN_ON_ONCE(tsk))
> +               return -EFAULT;

same here and further down.

pw-bot: cr





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux