Am Sonntag, dem 08.06.2025 um 17:53 +0200 schrieb Bert Karwatzki: > Am Sonntag, dem 08.06.2025 um 10:45 +0200 schrieb Bert Karwatzki: > > Am Donnerstag, dem 05.06.2025 um 14:51 +0200 schrieb Sebastian Andrzej Siewior: > > > On 2025-06-05 08:48:38 [-0400], Steven Rostedt wrote: > > > > On Thu, 5 Jun 2025 11:19:03 +0200 > > > > Bert Karwatzki <spasswolf@xxxxxx> wrote: > > > > > > > > > This patch seems to create so much output that the orginal error message and > > > > > backtrace often get lost, so I needed several runs to get a meaningful message > > > > > when running > > > > > > > > Are you familiar with preempt count tracing? > > > > > > I have an initial set of patches to tackle this problem, I'm going to > > > send them after the merge window. > > > > > > Sebastian > > > > I've found the reason for the "mysterious" increase of preempt_count: > > > > [ 70.821750] [ T2746] bpf_link_settle calling fd_install() preemt_count = 0 > > [ 70.821751] [ T2746] preempt_count_add 5898: preempt_count = 0x0 counter = 0x1b232c > > [ 70.821752] [ T2746] preempt_count_add 5900: preempt_count = 0x1 counter = 0x1b232d > > [ 70.821754] [ T2746] preempt_count_sub 5966: preempt_count = 0x1 counter = 0x1b232e > > [ 70.821755] [ T2746] preempt_count_sub 5968: preempt_count = 0x0 counter = 0x1b232f > > [ 70.821761] [ T2746] __bpf_trace_sys_enter 18: preempt_count = 0x0 > > [ 70.821762] [ T2746] __bpf_trace_sys_enter 18: preempt_count = 0x1 > > [ 70.821764] [ T2746] __bpf_trace_run: preempt_count = 1 > > [ 70.821765] [ T2746] bpf_prog_run: preempt_count = 1 > > [ 70.821766] [ T2746] __bpf_prog_run: preempt_count = 1 > > > > It's caused by this macro from include/trace/bpf_probe.h (with my pr_err()): > > > > #define __BPF_DECLARE_TRACE_SYSCALL(call, proto, args) \ > > static notrace void \ > > __bpf_trace_##call(void *__data, proto) \ > > { \ > > might_fault(); \ > > if (!strcmp(get_current()->comm, "test_progs")) \ > > pr_err("%s %d: preempt_count = 0x%x", __func__, __LINE__, preempt_count());\ > > preempt_disable_notrace(); \ > > if (!strcmp(get_current()->comm, "test_progs")) \ > > pr_err("%s %d: preempt_count = 0x%x", __func__, __LINE__, preempt_count());\ > > CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ > > preempt_enable_notrace(); \ > > } > > > > The preempt_{en,dis}able_notrace were introduced in > > commit 4aadde89d81f ("tracing/bpf: disable preemption in syscall probe") > > This commit is present in v6.14 and v6.15, but the bug already appears in > > v6.12 so in that case preemption is disable somewhere else. > > > > Bert Karwatzki > > After reading this > https://lore.kernel.org/bpf/CAADnVQJf535hwud5XtQKStOge9=pYVYWSiq_8Q2YAvN5rba==A@xxxxxxxxxxxxxx/ > I tried using migrate_{en,disable} like this (in v6.15) > > diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h > index 183fa2aa2935..49257cb90209 100644 > --- a/include/trace/bpf_probe.h > +++ b/include/trace/bpf_probe.h > @@ -58,9 +58,9 @@ static notrace void \ > __bpf_trace_##call(void *__data, proto) \ > { \ > might_fault(); \ > - preempt_disable_notrace(); \ > + migrate_disable(); \ > CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ > - preempt_enable_notrace(); \ > + migrate_enable(); \ > } > > #undef DECLARE_EVENT_SYSCALL_CLASS > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 187dc37d61d4..ec0326405fc3 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -2350,7 +2350,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) > struct bpf_run_ctx *old_run_ctx; > struct bpf_trace_run_ctx run_ctx; > > - cant_sleep(); > + cant_migrate(); > if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { > bpf_prog_inc_misses_counter(prog); > goto out; > diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c > index e1fba28e4a86..7cfb9473a526 100644 > --- a/tools/testing/selftests/bpf/progs/dynptr_success.c > +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c > @@ -7,6 +7,7 @@ > #include <bpf/bpf_helpers.h> > #include <bpf/bpf_tracing.h> > #include "bpf_misc.h" > +#include "bpf_kfuncs.h" > #include "errno.h" > > char _license[] SEC("license") = "GPL"; > > > This fixes the warnings when running the bpf cgroup examples: > > ./test_progs -a "cgrp_local_storage/cgrp1*" > > but I still get a warning from another example (I don't know which, yet): > > Bert Karwatzki Another of the bpf selftests that gives a warning with PREEMPT_RT=y (for calling spinlock with preemption disabled) is $ ./test_progs -a wq giving this warning: [ T3576] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 [ T3576] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 3576, name: test_progs [ T3576] preempt_count: 1, expected: 0 [ T3576] RCU nest depth: 3, expected: 3 [ T3576] 6 locks held by test_progs/3576: [ T3576] #0: ffffffffa1131300 (rcu_read_lock){....}-{1:3}, at: bpf_test_timer_enter+0x1e/0xc0 [ T3576] #1: ffffffffa109acc0 (local_bh){.+.+}-{1:3}, at: __local_bh_disable_ip+0x29/0x1c0 [ T3576] #2: ffff997b0e7d78b8 ((softirq_ctrl.lock)){+.+.}-{3:3}, at: __local_bh_disable_ip+0xc8/0x1c0 [ T3576] #3: ffffffffa1131300 (rcu_read_lock){....}-{1:3}, at: rt_spin_lock+0xf0/0x190 [ T3576] #4: ffffffffa1131300 (rcu_read_lock){....}-{1:3}, at: __local_bh_disable_ip+0x29/0x1c0 [ T3576] #5: ffff997b0e7f4588 ((&c->lock)){+.+.}-{3:3}, at: ___slab_alloc+0x68/0xde0 [ T3576] irq event stamp: 247437 [ T3576] hardirqs last enabled at (247435): [<ffffffffa05b5fa7>] _raw_spin_unlock_irqrestore+0x57/0x80 [ T3576] hardirqs last disabled at (247437): [<ffffffff9fbbc57b>] __bpf_async_init+0xdb/0x310 [ T3576] softirqs last enabled at (241464): [<ffffffff9f98a2e1>] __local_bh_enable_ip+0x111/0x180 [ T3576] softirqs last disabled at (247436): [<ffffffffa036688c>] bpf_test_run+0x10c/0x350 [ T3576] CPU: 7 UID: 0 PID: 3576 Comm: test_progs Tainted: G O 6.15.0-bpf-00003-g5197b534e6ad #4 PREEMPT_{RT,(full)} [ T3576] Tainted: [O]=OOT_MODULE [ T3576] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS-158L, BIOS E158LAMS.10F 11/11/2024 [ T3576] Call Trace: [ T3576] <TASK> [ T3576] dump_stack_lvl+0x6d/0xb0 [ T3576] __might_resched.cold+0xe1/0xf3 [ T3576] rt_spin_lock+0x5f/0x190 [ T3576] ? ___slab_alloc+0x68/0xde0 [ T3576] ? srso_alias_return_thunk+0x5/0xfbef5 [ T3576] ? __lock_acquire+0x45f/0x2a70 [ T3576] ___slab_alloc+0x68/0xde0 [ T3576] ? bpf_map_kmalloc_node+0x72/0x220 [ T3576] ? srso_alias_return_thunk+0x5/0xfbef5 [ T3576] ? lock_acquire+0xbe/0x2e0 [ T3576] ? bpf_map_get_memcg.isra.0+0x182/0x310 [ T3576] ? srso_alias_return_thunk+0x5/0xfbef5 [ T3576] ? find_held_lock+0x2b/0x80 [ T3576] ? bpf_map_get_memcg.isra.0+0x8d/0x310 [ T3576] ? bpf_map_kmalloc_node+0x72/0x220 [ T3576] __kmalloc_node_noprof+0xee/0x490 [ T3576] bpf_map_kmalloc_node+0x72/0x220 [ T3576] __bpf_async_init+0x107/0x310 [ T3576] bpf_prog_aa38f9274c0318a2_test_call_array_sleepable+0xb3/0x10e [ T3576] bpf_test_run+0x1ef/0x350 [ T3576] ? bpf_test_run+0x10c/0x350 [ T3576] ? migrate_enable+0x115/0x160 [ T3576] ? kmem_cache_alloc_noprof+0x210/0x2b0 [ T3576] bpf_prog_test_run_skb+0x37b/0x7c0 [ T3576] ? fput+0x3f/0x90 [ T3576] __sys_bpf+0xd33/0x26d0 [ T3576] ? srso_alias_return_thunk+0x5/0xfbef5 [ T3576] __x64_sys_bpf+0x21/0x30 [ T3576] do_syscall_64+0x72/0xfa0 [ T3576] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ T3576] RIP: 0033:0x7f1c8e2a6779 [ T3576] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4f 86 0d 00 f7 d8 64 89 01 48 [ T3576] RSP: 002b:00007fff8ef7b4d8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141 [ T3576] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1c8e2a6779 [ T3576] RDX: 0000000000000050 RSI: 00007fff8ef7b510 RDI: 000000000000000a [ T3576] RBP: 00007fff8ef7b4f0 R08: 00000000ffffffff R09: 00007fff8ef7b510 [ T3576] R10: 0000000000000064 R11: 0000000000000202 R12: 0000000000000000 [ T3576] R13: 00007fff8ef7c038 R14: 00007f1c8e8db000 R15: 000055d507eb3890 [ T3576] </TASK> Here the problem is in __bpf_spin_lock() which calls arch_spin_lock() with preemption disabled: static inline void __bpf_spin_lock(struct bpf_spin_lock *lock) { arch_spinlock_t *l = (void *)lock; union { __u32 val; arch_spinlock_t lock; } u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); if (!strcmp(get_current()->comm, "test_progs")) pr_err("%s: calling preempt_disable()\n", __func__); preempt_disable(); arch_spin_lock(l); } The call to preempt_disable here was introduced in commit 5861d1e8dbc4 ("bpf: Allow bpf_spin_{lock,unlock} in sleepable progs"). Bert Karwatzki