On Mon, Jun 9, 2025 at 9:05 AM Jeremy Harris <jgh@xxxxxxxx> wrote: > > Set the request_sock flag for fastopen earlier, making it available > to the af_ops SYN-handler function. > > In that function copy data from the listen socket write queue into an > sk_buff, allocating if needed and adding to the write queue of the > newly-created child socket. > Set sequence number values depending on the fastopen status. > > Signed-off-by: Jeremy Harris <jgh@xxxxxxxx> > --- > net/ipv4/tcp_fastopen.c | 3 ++- > net/ipv4/tcp_ipv4.c | 4 +-- > net/ipv4/tcp_minisocks.c | 58 ++++++++++++++++++++++++++++++++++++---- > 3 files changed, 57 insertions(+), 8 deletions(-) > > diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c > index 9b83d639b5ac..03a86d0b87ba 100644 > --- a/net/ipv4/tcp_fastopen.c > +++ b/net/ipv4/tcp_fastopen.c > @@ -245,6 +245,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, > struct sock *child; > bool own_req; > > + tcp_rsk(req)->tfo_listener = true; > + > child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, > NULL, &own_req); > if (!child) > @@ -261,7 +263,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, > tp = tcp_sk(child); > > rcu_assign_pointer(tp->fastopen_rsk, req); > - tcp_rsk(req)->tfo_listener = true; > > /* RFC1323: The window in SYN & SYN/ACK segments is never > * scaled. So correct it appropriately. > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 6a14f9e6fef6..e488effdbdb2 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -1747,8 +1747,8 @@ EXPORT_IPV6_MOD(tcp_v4_conn_request); > > > /* > - * The three way handshake has completed - we got a valid synack - > - * now create the new socket. > + * The three way handshake has completed - we got a valid synack > + * (or a FASTOPEN syn) - now create the new socket. > */ > struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, > struct request_sock *req, > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index 43d7852ce07e..d471531b4a78 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -529,7 +529,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, > struct inet_connection_sock *newicsk; > const struct tcp_sock *oldtp; > struct tcp_sock *newtp; > - u32 seq; > + u32 seq, a_seq, n_seq; > > if (!newsk) > return NULL; > @@ -550,9 +550,55 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, > newtp->segs_in = 1; > > seq = treq->snt_isn + 1; > - newtp->snd_sml = newtp->snd_una = seq; > - WRITE_ONCE(newtp->snd_nxt, seq); > - newtp->snd_up = seq; > + n_seq = seq; > + a_seq = seq; > + newtp->write_seq = seq; > + newtp->snd_una = seq; > + > + /* If there is write-data sitting on the listen socket, copy it to > + * the accept socket. If FASTOPEN we will send it on the synack, > + * otherwise it sits there until 3rd-ack arrives. > + */ > + > + if (unlikely(!skb_queue_empty(&sk->sk_write_queue))) { > + struct sk_buff *l_skb = tcp_send_head(sk), > + *a_skb = tcp_write_queue_tail(newsk); > + ssize_t copy = 0; > + > + if (a_skb) > + copy = l_skb->len - a_skb->len; > + > + if (copy <= 0 || !tcp_skb_can_collapse_to(a_skb)) { > + bool first_skb = tcp_rtx_and_write_queues_empty(newsk); > + > + a_skb = tcp_stream_alloc_skb(newsk, > + newsk->sk_allocation, > + first_skb); > + if (!a_skb) { > + /* is this the correct free? */ > + bh_unlock_sock(newsk); > + sk_free(newsk); > + return NULL; > + } > + > + tcp_skb_entail(newsk, a_skb); > + } > + copy = min_t(int, l_skb->len, skb_tailroom(a_skb)); > + skb_put_data(a_skb, l_skb->data, copy); TCP stack no longer puts payload in skb->head. This caused many issues, trust me. Please do not try to bring this back. Also I see no locking there, this is racy with another thread trying to sendmsg() data to the listener. Honestly, I do not like this series at all adding cost in TCP fast path for a very narrow use-case.