Am Samstag, dem 17.05.2025 um 13:34 +0200 schrieb Bert Karwatzki: > Am Freitag, dem 16.05.2025 um 20:19 +0200 schrieb Bert Karwatzki: > > I've added a debugging statement: > > > > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > > index 3bd5ee0995fe..853493eca4f5 100644 > > --- a/net/mac80211/tx.c > > +++ b/net/mac80211/tx.c > > @@ -4586,7 +4586,11 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb, > > struct ieee80211_local *local, > > struct ieee80211_tx_info *info) > > { > > - if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) { > > + if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || > > + sock_flag(skb->sk, SOCK_WIFI_STATUS)))) { > > + if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) > > + printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk, > > SOCK_WIFI_STATUS) = %u\n", > > + __func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk, > > SOCK_WIFI_STATUS)); > > info->status_data = ieee80211_store_ack_skb(local, skb, > > &info->flags, NULL); > > if (info->status_data) > > > > This gives the following logoutput (and a lockup), indicating that sock_flag(skb->sk, SOCK_WIFI_STATUS) and > > (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) are actually NOT equivalent (when compiled with clang and > > PREEMPT_RT=y) > > I've added more debugging output: > > diff --git a/include/net/sock.h b/include/net/sock.h > index e223102337c7..e13560b5b7a8 100644 > --- a/include/net/sock.h > +++ b/include/net/sock.h > @@ -2735,8 +2735,10 @@ static inline void _sock_tx_timestamp(struct sock *sk, > *tskey = atomic_inc_return(&sk->sk_tskey) - 1; > } > } > - if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) > + if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) { > + printk(KERN_INFO "%s: setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk); > *tx_flags |= SKBTX_WIFI_STATUS; > + } > } > > static inline void sock_tx_timestamp(struct sock *sk, > diff --git a/net/core/sock.c b/net/core/sock.c > index e02a78538e3e..f6589ad5ba36 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -1548,6 +1548,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname, > break; > > case SO_WIFI_STATUS: > + printk(KERN_INFO "%s: setting SOCK_WIFI_STATUS to %u for sk = %px\n", __func__, valbool, sk); > sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); > break; > > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c > index 853493eca4f5..eee2f80949c6 100644 > --- a/net/mac80211/tx.c > +++ b/net/mac80211/tx.c > @@ -4588,9 +4588,12 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb, > { > if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || > sock_flag(skb->sk, SOCK_WIFI_STATUS)))) { > - if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) > + if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) { > printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk, SOCK_WIFI_STATUS) = %u\n", > __func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk, SOCK_WIFI_STATUS)); > + printk(KERN_INFO "%s: skb->sk = %px skb->sk->sk_flags = 0x%lx\n", __func__, skb->sk, skb->sk->sk_flags); > + return; // This should make this case non-fatal. > + } > info->status_data = ieee80211_store_ack_skb(local, skb, > &info->flags, NULL); > if (info->status_data) > > > > This gives after ~15min uptime > > [ 189.337797] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 189.337803] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c4e00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 191.325256] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 191.325259] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c5a00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 257.591831] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 257.591844] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1baf3bca00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 301.786963] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 301.786967] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1c1bc40100 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 302.780881] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 302.780884] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a44cf6000 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 482.792298] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 482.792304] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 482.806144] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 482.806148] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4c500 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 482.817280] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 482.817284] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4df00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 552.327291] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 552.327295] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640 > [ 916.971599] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1 > [ 916.971607] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a62834000 skb->sk->sk_flags = 0xffffffffb4efe640 > > The printk()s in sk_set_sockopt() and _sock_tx_timestamp() are not called at all so the flag > SOCK_WIFI_STATUS is actually nevers set! What is printed when printing skb->sk->sk_flags looks > suspiciously like a pointer, and as sk_flags is actually a member of a union in struct sock_common > it seems clang is using sk_flags for one of the other union members here > > struct sock_common { > [...] > union { > unsigned long skc_flags; > struct sock *skc_listener; /* request_sock */ > struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */ > }; > [...] > } > > Bert Karwatzki I added even more debugging output and found out why commit 76a853f86c97 (" wifi: free SKBTX_WIFI_STATUS skb tx_flags flag") does not work. diff --git a/include/net/sock.h b/include/net/sock.h index e13560b5b7a8..6e1291d2e5a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2738,6 +2738,8 @@ static inline void _sock_tx_timestamp(struct sock *sk, if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) { printk(KERN_INFO "%s: setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk); *tx_flags |= SKBTX_WIFI_STATUS; + } else { + printk(KERN_INFO "%s: NOT setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 20915895bdaa..4913b09c0617 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -912,6 +912,7 @@ reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, return NULL; } req->rsk_listener = sk_listener; + printk(KERN_INFO "%s: sk_listener = %px\n", __func__, sk_listener); } req->rsk_ops = ops; req_to_sk(req)->sk_prot = sk_listener->sk_prot; @@ -986,6 +987,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; nreq->rsk_listener = sk; + printk(KERN_INFO "%s: rsk_listener =%px\n", __func__, sk); /* We need not acquire fastopenq->lock * because the child socket is locked in inet_csk_listen_stop(). diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 67efe9501581..1a3108ec7503 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -190,6 +190,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const struct inet_sock *inet = inet_sk(sk); tw->tw_dr = dr; + printk(KERN_INFO "%s: sk = %px tw_dr = %px\n", __func__, sk, dr); /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index eee2f80949c6..227b86427e06 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4586,6 +4586,8 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb, struct ieee80211_local *local, struct ieee80211_tx_info *info) { + if (skb->sk) + printk(KERN_INFO "%s: skb->sk = %px skb->sk->sk_flags = 0x%lx\n", __func__, skb->sk, skb->sk->sk_flags); if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || sock_flag(skb->sk, SOCK_WIFI_STATUS)))) { if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) { This monitor the value of skb->sk->sk_flags not only in the error case but in all cases, and also monitors the places where the other members of the sk_flags union are set. The error occurs when at the start of ieee80211_8023_xmit_clang_debug_helper() sk_flags is not actually the skc_flags member of the union but insted is skc_tw_dr which is only interpreted is flags. So why does it work with gcc but fail with clang? sock_flag(skb->sk, SOCK_WIFI_STATUS) test bit 19 of skb->sk->sk_flags Here are the important snippets of debug output: clang: [ T575] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8f1bebba4300 skb->sk->sk_flags = 0xffffffffa16fe640 Here test_bit(0xffffffffa16fe640, SOCK_WIFI_STATUS) is 1. gcc: [ T600] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8d3506bec700 skb->sk->sk_flags = 0xffffffff93d40100 Here test_bit(0xffffffff93d40100, SOCK_WIFI_STATUS) is 0. So that this works with gcc just seems like luck. I've not yet test why it works with clang when PREEMPT_RT is not enabled but my guess is that in that case we have a tw_dr pointer which fails the test_bit(). Bert Karwatzki