Am Mittwoch, dem 14.05.2025 um 12:23 +0200 schrieb Johannes Berg: > + linux-wireless > > On Wed, 2025-05-14 at 09:32 +0000, Bert Karwatzki wrote: > > > Then I reapplied commit 76a853f86c97 hunk by hunk and found the one hunk that > > causes the problem: > > > > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > > index 3e751dd3ae7b..63df21228029 100644 > > --- a/net/mac80211/tx.c > > +++ b/net/mac80211/tx.c > > @@ -4648,8 +4648,7 @@ static void ieee80211_8023_xmit(struct > > ieee80211_sub_if_data *sdata, > > memcpy(IEEE80211_SKB_CB(seg), info, sizeof(*info)); > > } > > > > - if (unlikely(skb->sk && > > - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) { > > + if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) { > > info->status_data = ieee80211_store_ack_skb(local, skb, > > &info->flags, NULL); > > if (info->status_data) > > I think it crashed later on the status, but this inserts the skb into > the IDR so the status can pick it up to return the status and afaict > _that's_ where it crashed. > > Still I don't really know what could go wrong? The (copied) skb should > still have been keeping the socket alive. > > > This is enough to cause a kernel panic when compiled with clang (clang-19.1.7 > > from debian sid). Compiling the same kernel with gcc (gcc-14.2.0 from debian > > sid) shows no problem. > > Right, even stranger. But I can't even say you should look at this place > (which inserts) or the other (which takes it out again and crashed) to > compare the code :-/ > > > johannes I've split off the problematic piece of code into an noinline function to simplify the disassembly: diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20de6e6b0929..075e012d9992 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4582,7 +4582,19 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata, return ret; } -static noinline void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, +static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb, + struct ieee80211_local *local, + struct ieee80211_tx_info *info) +{ + if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) { + info->status_data = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); + if (info->status_data) + info->status_data_idr = 1; + } +} + +static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_key *key, struct sk_buff *skb) { @@ -4648,12 +4660,7 @@ static noinline void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, memcpy(IEEE80211_SKB_CB(seg), info, sizeof(*info)); } - if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) { - info->status_data = ieee80211_store_ack_skb(local, skb, - &info->flags, NULL); - if (info->status_data) - info->status_data_idr = 1; - } + ieee80211_8023_xmit_clang_debug_helper(skb, local, info); dev_sw_netstats_tx_add(dev, skbs, len); sta->deflink.tx_stats.packets[queue] += skbs; This shows the the behaviour as the old code, i.e. kernel panic when compiled with clang(-19.1.7), no problem when compiled with gcc(-14.2.0). When compiled with clang the disassembly of the function is (from objdump -d) 000000000000a260 <ieee80211_8023_xmit_clang_debug_helper>: a260: 48 8b 47 18 mov 0x18(%rdi),%rax a264: 48 85 c0 test %rax,%rax a267: 74 0c je a275 <ieee80211_8023_xmit_clang_debug_helper+0x15> a269: 53 push %rbx a26a: 48 f7 40 60 00 00 08 testq $0x80000,0x60(%rax) a271: 00 a272: 75 07 jne a27b <ieee80211_8023_xmit_clang_debug_helper+0x1b> a274: 5b pop %rbx a275: 2e e9 00 00 00 00 cs jmp a27b <ieee80211_8023_xmit_clang_debug_helper+0x1b> a27b: 48 89 f8 mov %rdi,%rax a27e: 48 89 f7 mov %rsi,%rdi a281: 48 89 c6 mov %rax,%rsi a284: 48 89 d3 mov %rdx,%rbx a287: 31 c9 xor %ecx,%ecx a289: e8 02 ff ff ff call a190 <ieee80211_store_ack_skb> a28e: 25 ff 1f 00 00 and $0x1fff,%eax a293: 89 c2 mov %eax,%edx a295: b9 0f 00 fe ff mov $0xfffe000f,%ecx a29a: 23 4b 04 and 0x4(%rbx),%ecx a29d: c1 e2 04 shl $0x4,%edx a2a0: 09 d1 or %edx,%ecx a2a2: 89 4b 04 mov %ecx,0x4(%rbx) a2a5: 85 c0 test %eax,%eax a2a7: 74 cb je a274 <ieee80211_8023_xmit_clang_debug_helper+0x14> a2a9: 83 c9 08 or $0x8,%ecx a2ac: 89 4b 04 mov %ecx,0x4(%rbx) a2af: eb c3 jmp a274 <ieee80211_8023_xmit_clang_debug_helper+0x14> a2b1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 cs nopw 0x0(%rax,%rax,1) a2b8: 0f 1f 84 00 00 00 00 a2bf: 00 When compiled with gcc the disassembly is 00000000000010e0 <ieee80211_8023_xmit_clang_debug_helper>: 10e0: 48 8b 4f 18 mov 0x18(%rdi),%rcx 10e4: 48 89 f8 mov %rdi,%rax 10e7: 48 85 c9 test %rcx,%rcx 10ea: 75 05 jne 10f1 <ieee80211_8023_xmit_clang_debug_helper+0x11> 10ec: e9 00 00 00 00 jmp 10f1 <ieee80211_8023_xmit_clang_debug_helper+0x11> 10f1: 48 8b 49 60 mov 0x60(%rcx),%rcx 10f5: f7 c1 00 00 08 00 test $0x80000,%ecx 10fb: 74 ef je 10ec <ieee80211_8023_xmit_clang_debug_helper+0xc> 10fd: 48 83 ec 08 sub $0x8,%rsp 1101: 48 89 f7 mov %rsi,%rdi 1104: 31 c9 xor %ecx,%ecx 1106: 48 89 c6 mov %rax,%rsi 1109: 48 89 14 24 mov %rdx,(%rsp) 110d: e8 ce f8 ff ff call 9e0 <ieee80211_store_ack_skb> 1112: 48 8b 14 24 mov (%rsp),%rdx 1116: 89 c1 mov %eax,%ecx 1118: 8b 42 04 mov 0x4(%rdx),%eax 111b: 81 e1 ff 1f 00 00 and $0x1fff,%ecx 1121: c1 e1 04 shl $0x4,%ecx 1124: 25 0f 00 fe ff and $0xfffe000f,%eax 1129: 09 c8 or %ecx,%eax 112b: 89 42 04 mov %eax,0x4(%rdx) 112e: a9 f0 ff 01 00 test $0x1fff0,%eax 1133: 74 04 je 1139 <ieee80211_8023_xmit_clang_debug_helper+0x59> 1135: 80 4a 04 08 orb $0x8,0x4(%rdx) 1139: 48 83 c4 08 add $0x8,%rsp 113d: e9 00 00 00 00 jmp 1142 <ieee80211_8023_xmit_clang_debug_helper+0x62> 1142: 66 66 2e 0f 1f 84 00 data16 cs nopw 0x0(%rax,%rax,1) 1149: 00 00 00 00 114d: 0f 1f 00 nopl (%rax) 1150: 90 nop 1151: 90 nop 1152: 90 nop 1153: 90 nop 1154: 90 nop 1155: 90 nop 1156: 90 nop 1157: 90 nop 1158: 90 nop 1159: 90 nop 115a: 90 nop 115b: 90 nop 115c: 90 nop 115d: 90 nop 115e: 90 nop 115f: 90 nop I've not yet taken a closer look, but perhaps the error is obvious for some one else. Bert Karwatzki