In the tcp sendmsg handler, permit a write in LISTENING state if a MSG_PRELOAD flag is used. Copy from iovec to a linear sk_buff for placement on the socket write queue. Signed-off-by: Jeremy Harris <jgh@xxxxxxxx> --- include/linux/socket.h | 1 + net/ipv4/tcp.c | 17 +++++++++++++---- tools/perf/trace/beauty/include/linux/socket.h | 1 + tools/perf/trace/beauty/msg_flags.c | 3 +++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 3b262487ec06..b41f4cd4dc97 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -330,6 +330,7 @@ struct ucred { #define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ +#define MSG_PRELOAD 0x10000000 /* Preload tx data while listening */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b7b6ab41b496..72b5d7cad351 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1136,12 +1136,13 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) tcp_rate_check_app_limited(sk); /* is sending application-limited? */ - /* Wait for a connection to finish. One exception is TCP Fast Open + /* Wait for a connection to finish. Exceptions are TCP Fast Open * (passive side) where data is allowed to be sent before a connection - * is fully established. + * is fully established, and a message marked as preload which is + * allowed to be placed in the send queue of a listening socket. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && - !tcp_passive_fastopen(sk)) { + !tcp_passive_fastopen(sk) && !(flags & MSG_PRELOAD)) { err = sk_stream_wait_connect(sk, &timeo); if (err != 0) goto do_error; @@ -1226,7 +1227,13 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (copy > msg_data_left(msg)) copy = msg_data_left(msg); - if (zc == 0) { + if (unlikely(flags & MSG_PRELOAD)) { + copy = min_t(int, copy, skb_tailroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, + copy); + if (err) + goto do_error; + } else if (zc == 0) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1330,6 +1337,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (!msg_data_left(msg)) { if (unlikely(flags & MSG_EOR)) TCP_SKB_CB(skb)->eor = 1; + if (unlikely(flags & MSG_PRELOAD)) + goto out_nopush; goto out; } diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index c3322eb3d686..e9ea498169f3 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -330,6 +330,7 @@ struct ucred { #define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ +#define MSG_PRELOAD 0x10000000 /* Preload tx data while listening */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index 2da581ff0c80..27e40da9b02d 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -20,6 +20,9 @@ #ifndef MSG_SPLICE_PAGES #define MSG_SPLICE_PAGES 0x8000000 #endif +#ifndef MSG_PRELOAD +#define MSG_PRELOAD 0x10000000 +#endif #ifndef MSG_FASTOPEN #define MSG_FASTOPEN 0x20000000 #endif -- 2.49.0