[PATCH net-next] tcp: account for memory pressure signaled by cgroup

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently, we have two memory pressure counters for TCP sockets [1],
which we manipulate only when the memory pressure is signalled through
the proto struct [2].

However, the memory pressure can also be signaled through the cgroup
memory subsystem, which we do not reflect in the netstat counters.

This patch adds a new counter to account for memory pressure signaled by
the memory cgroup.

Link: https://elixir.bootlin.com/linux/v6.15.4/source/include/uapi/linux/snmp.h#L231-L232 [1]
Link: https://elixir.bootlin.com/linux/v6.15.4/source/include/net/sock.h#L1300-L1301 [2]
Co-developed-by: Matyas Hurtik <matyas.hurtik@xxxxxxxxx>
Signed-off-by: Matyas Hurtik <matyas.hurtik@xxxxxxxxx>
Signed-off-by: Daniel Sedlak <daniel.sedlak@xxxxxxxxx>
---
This patch is a result of our long-standing debug sessions, where it
all started as "networking is slow". TLDR; our cgroup memory controller
was "pressuring" our socket communication (eventhough we had enough
free memory and all configurables were set to unlimited), which resulted
in very slow networking speeds (1.5 Mbps instead of 1 Gbps), and we
needed to use the bpftrace to debug this, but if it were in the netstat
counters, we would notice this much earlier.

 Documentation/networking/net_cachelines/snmp.rst |  1 +
 include/net/tcp.h                                | 14 ++++++++------
 include/uapi/linux/snmp.h                        |  1 +
 net/ipv4/proc.c                                  |  1 +
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst
index bd44b3eebbef..ed17ff84e39c 100644
--- a/Documentation/networking/net_cachelines/snmp.rst
+++ b/Documentation/networking/net_cachelines/snmp.rst
@@ -76,6 +76,7 @@ unsigned_long  LINUX_MIB_TCPABORTONLINGER
 unsigned_long  LINUX_MIB_TCPABORTFAILED
 unsigned_long  LINUX_MIB_TCPMEMORYPRESSURES
 unsigned_long  LINUX_MIB_TCPMEMORYPRESSURESCHRONO
+unsigned_long  LINUX_MIB_TCPCGROUPSOCKETPRESSURE
 unsigned_long  LINUX_MIB_TCPSACKDISCARD
 unsigned_long  LINUX_MIB_TCPDSACKIGNOREDOLD
 unsigned_long  LINUX_MIB_TCPDSACKIGNOREDNOUNDO
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 761c4a0ad386..aae3efe24282 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -267,6 +267,11 @@ extern long sysctl_tcp_mem[3];
 #define TCP_RACK_STATIC_REO_WND  0x2 /* Use static RACK reo wnd */
 #define TCP_RACK_NO_DUPTHRESH    0x4 /* Do not use DUPACK threshold in RACK */
 
+#define TCP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.tcp_statistics, field)
+#define __TCP_INC_STATS(net, field)	__SNMP_INC_STATS((net)->mib.tcp_statistics, field)
+#define TCP_DEC_STATS(net, field)	SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
+#define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
+
 extern atomic_long_t tcp_memory_allocated;
 DECLARE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
 
@@ -277,8 +282,10 @@ extern unsigned long tcp_memory_pressure;
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    mem_cgroup_under_socket_pressure(sk->sk_memcg))
+	    mem_cgroup_under_socket_pressure(sk->sk_memcg)) {
+		TCP_INC_STATS(sock_net(sk), LINUX_MIB_TCPCGROUPSOCKETPRESSURE);
 		return true;
+	}
 
 	return READ_ONCE(tcp_memory_pressure);
 }
@@ -316,11 +323,6 @@ bool tcp_check_oom(const struct sock *sk, int shift);
 
 extern struct proto tcp_prot;
 
-#define TCP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.tcp_statistics, field)
-#define __TCP_INC_STATS(net, field)	__SNMP_INC_STATS((net)->mib.tcp_statistics, field)
-#define TCP_DEC_STATS(net, field)	SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
-#define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
-
 void tcp_tsq_work_init(void);
 
 int tcp_v4_err(struct sk_buff *skb, u32);
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 1d234d7e1892..9e8d1a5e56a9 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -231,6 +231,7 @@ enum
 	LINUX_MIB_TCPABORTFAILED,		/* TCPAbortFailed */
 	LINUX_MIB_TCPMEMORYPRESSURES,		/* TCPMemoryPressures */
 	LINUX_MIB_TCPMEMORYPRESSURESCHRONO,	/* TCPMemoryPressuresChrono */
+	LINUX_MIB_TCPCGROUPSOCKETPRESSURE,      /* TCPCgroupSocketPressure */
 	LINUX_MIB_TCPSACKDISCARD,		/* TCPSACKDiscard */
 	LINUX_MIB_TCPDSACKIGNOREDOLD,		/* TCPSACKIgnoredOld */
 	LINUX_MIB_TCPDSACKIGNOREDNOUNDO,	/* TCPSACKIgnoredNoUndo */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ea2f01584379..0bcec9a51fb0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -235,6 +235,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED),
 	SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES),
 	SNMP_MIB_ITEM("TCPMemoryPressuresChrono", LINUX_MIB_TCPMEMORYPRESSURESCHRONO),
+	SNMP_MIB_ITEM("TCPCgroupSocketPressure", LINUX_MIB_TCPCGROUPSOCKETPRESSURE),
 	SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
 	SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),

base-commit: e96ee511c906c59b7c4e6efd9d9b33917730e000
-- 
2.39.5





[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux