On Mon, Jul 28, 2025 at 09:43:45AM +0000, Mahe Tardy wrote: > This test opens a server and client, attach a cgroup_skb program on > egress and calls the icmp_send_unreach function from the client egress > so that an ICMP unreach control message is sent back to the client. > It then fetches the message from the error queue to confirm the correct > ICMP unreach code has been sent. > > Note that the BPF program returns SK_PASS to let the connection being > established to finish the test cases quicker. Otherwise, you have to > wait for the TCP three-way handshake to timeout in the kernel and > retrieve the errno translated from the unreach code set by the ICMP > control message. > > Signed-off-by: Mahe Tardy <mahe.tardy@xxxxxxxxx> > --- > .../bpf/prog_tests/icmp_send_unreach_kfunc.c | 99 +++++++++++++++++++ > .../selftests/bpf/progs/icmp_send_unreach.c | 36 +++++++ > 2 files changed, 135 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c > create mode 100644 tools/testing/selftests/bpf/progs/icmp_send_unreach.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c > new file mode 100644 > index 000000000000..414c1ed8ced3 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c > @@ -0,0 +1,99 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <test_progs.h> > +#include <network_helpers.h> > +#include <linux/errqueue.h> > +#include "icmp_send_unreach.skel.h" > + > +#define TIMEOUT_MS 1000 > +#define SRV_PORT 54321 > + > +#define ICMP_DEST_UNREACH 3 > + > +#define ICMP_FRAG_NEEDED 4 > +#define NR_ICMP_UNREACH 15 small nit: Any reason why ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, and NR_ICMP_UNREACH are redefined here? I think you should just be able to #include <linux/icmp.h> this at the top to avoid redefining these. > + > +static void read_icmp_errqueue(int sockfd, int expected_code) > +{ > + ssize_t n; > + struct sock_extended_err *sock_err; > + struct cmsghdr *cm; > + char ctrl_buf[512]; > + struct msghdr msg = { > + .msg_control = ctrl_buf, > + .msg_controllen = sizeof(ctrl_buf), > + }; > + > + n = recvmsg(sockfd, &msg, MSG_ERRQUEUE); > + if (!ASSERT_GE(n, 0, "recvmsg_errqueue")) > + return; > + > + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { > + if (!ASSERT_EQ(cm->cmsg_level, IPPROTO_IP, "cmsg_type") || > + !ASSERT_EQ(cm->cmsg_type, IP_RECVERR, "cmsg_level")) > + continue; > + > + sock_err = (struct sock_extended_err *)CMSG_DATA(cm); > + > + if (!ASSERT_EQ(sock_err->ee_origin, SO_EE_ORIGIN_ICMP, > + "sock_err_origin_icmp")) > + return; > + if (!ASSERT_EQ(sock_err->ee_type, ICMP_DEST_UNREACH, > + "sock_err_type_dest_unreach")) > + return; > + ASSERT_EQ(sock_err->ee_code, expected_code, "sock_err_code"); > + } > +} > + > +void test_icmp_send_unreach_kfunc(void) > +{ > + struct icmp_send_unreach *skel; > + int cgroup_fd = -1, client_fd = 1, srv_fd = -1; > + int *code; > + > + skel = icmp_send_unreach__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel_open")) > + goto cleanup; > + > + cgroup_fd = test__join_cgroup("/icmp_send_unreach_cgroup"); > + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) > + goto cleanup; > + > + skel->links.egress = > + bpf_program__attach_cgroup(skel->progs.egress, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links.egress, "prog_attach_cgroup")) > + goto cleanup; > + > + code = &skel->bss->unreach_code; > + > + for (*code = 0; *code <= NR_ICMP_UNREACH; (*code)++) { > + // The TCP stack reacts differently when asking for > + // fragmentation, let's ignore it for now > + if (*code == ICMP_FRAG_NEEDED) > + continue; > + > + skel->bss->kfunc_ret = -1; > + > + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", > + SRV_PORT, TIMEOUT_MS); > + if (!ASSERT_GE(srv_fd, 0, "start_server")) > + goto for_cleanup; > + > + client_fd = socket(AF_INET, SOCK_STREAM, 0); > + ASSERT_GE(client_fd, 0, "client_socket"); > + > + client_fd = connect_to_fd(srv_fd, 0); > + if (!ASSERT_GE(client_fd, 0, "client_connect")) > + goto for_cleanup; > + > + read_icmp_errqueue(client_fd, *code); > + > + ASSERT_EQ(skel->bss->kfunc_ret, SK_DROP, "kfunc_ret"); It might be worth testing that the kfunc returns -EINVAL when the code is outside the accepted range as well for completeness. > +for_cleanup: > + close(client_fd); > + close(srv_fd); > + } > + > +cleanup: > + icmp_send_unreach__destroy(skel); > + close(cgroup_fd); > +} > diff --git a/tools/testing/selftests/bpf/progs/icmp_send_unreach.c b/tools/testing/selftests/bpf/progs/icmp_send_unreach.c > new file mode 100644 > index 000000000000..15783e5d1d65 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/icmp_send_unreach.c > @@ -0,0 +1,36 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_endian.h> > + > +char LICENSE[] SEC("license") = "Dual BSD/GPL"; > + > +int unreach_code = 0; > +int kfunc_ret = 0; > + > +#define SERVER_PORT 54321 > +#define SERVER_IP 0x7F000001 > + > +SEC("cgroup_skb/egress") > +int egress(struct __sk_buff *skb) > +{ > + void *data = (void *)(long)skb->data; > + void *data_end = (void *)(long)skb->data_end; > + struct iphdr *iph; > + struct tcphdr *tcph; > + > + iph = data; > + if ((void *)(iph + 1) > data_end || iph->version != 4 || > + iph->protocol != IPPROTO_TCP || iph->daddr != bpf_htonl(SERVER_IP)) > + return SK_PASS; > + > + tcph = (void *)iph + iph->ihl * 4; > + if ((void *)(tcph + 1) > data_end || > + tcph->dest != bpf_htons(SERVER_PORT)) > + return SK_PASS; > + > + kfunc_ret = bpf_icmp_send_unreach(skb, unreach_code); > + > + /* returns SK_PASS to execute the test case quicker */ > + return SK_PASS; > +} > -- > 2.34.1 > Jordan