From: Wang Yaxin <wang.yaxin@xxxxxxxxxx> support showing whole delay of system by reading PSI, just like the first few lines of information output by the top command. the output of delaytop includes both system-wide delay and delay of individual tasks, providing a more comprehensive reflection of system latency status. Use case ======== bash# ./delaytop System Pressure Information: (avg10/avg60/avg300/total) CPU: full: 0.0%/ 0.0%/ 0.0%/0 some: 0.1%/ 0.0%/ 0.0%/14216596 Memory: full: 0.0%/ 0.0%/ 0.0%/34010659 some: 0.0%/ 0.0%/ 0.0%/35406492 IO: full: 0.1%/ 0.0%/ 0.0%/51029453 some: 0.1%/ 0.0%/ 0.0%/55330465 IRQ: full: 0.0%/ 0.0%/ 0.0%/0 Top 20 processes (sorted by CPU delay): PID TGID COMMAND CPU(ms) IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms) --------------------------------------------------------------------------------------------- 32 32 kworker/2:0H-sy 23.65 0.00 0.00 0.00 0.00 0.00 0.00 0.00 497 497 kworker/R-scsi_ 1.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 495 495 kworker/R-scsi_ 1.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 494 494 scsi_eh_0 1.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 485 485 kworker/R-ata_s 0.90 0.00 0.00 0.00 0.00 0.00 0.00 0.00 574 574 kworker/R-kdmfl 0.36 0.00 0.00 0.00 0.00 0.00 0.00 0.00 34 34 idle_inject/3 0.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1123 1123 nde-netfilter 0.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 60 60 ksoftirqd/7 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 114 114 kworker/0:2-cgr 0.25 0.00 0.00 0.00 0.00 0.00 0.00 0.00 496 496 scsi_eh_1 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 51 51 cpuhp/6 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1667 1667 atd 0.24 0.00 0.00 0.00 0.00 0.00 0.00 0.00 45 45 cpuhp/5 0.23 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1102 1102 nde-backupservi 0.22 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1098 1098 systemsettings 0.21 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1100 1100 audit-monitor 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 53 53 migration/6 0.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1482 1482 sshd 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 39 39 cpuhp/4 0.19 0.00 0.00 0.00 0.00 0.00 0.00 0.00 Co-developed-by: Fan Yu <fan.yu9@xxxxxxxxxx> Signed-off-by: Fan Yu <fan.yu9@xxxxxxxxxx> Signed-off-by: Wang Yaxin <wang.yaxin@xxxxxxxxxx> Signed-off-by: Jiang Kun <jiang.kun2@xxxxxxxxxx> --- tools/accounting/delaytop.c | 163 ++++++++++++++++++++++++++++++++---- 1 file changed, 149 insertions(+), 14 deletions(-) diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 23e38f39e97d..cd848af9a856 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -10,9 +10,9 @@ * individual tasks (PIDs). * * Key features: - * - Collects per-task delay accounting statistics via taskstats. - * - Supports sorting, filtering. - * - Supports both interactive (screen refresh). + * - Collects per-task delay accounting statistics via taskstats. + * - Supports sorting, filtering. + * - Supports both interactive (screen refresh). * * Copyright (C) Fan Yu, ZTE Corp. 2025 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 @@ -43,6 +43,14 @@ #include <linux/cgroupstats.h> #include <ncurses.h> +#define PSI_CPU_SOME "/proc/pressure/cpu" +#define PSI_CPU_FULL "/proc/pressure/cpu" +#define PSI_MEMORY_SOME "/proc/pressure/memory" +#define PSI_MEMORY_FULL "/proc/pressure/memory" +#define PSI_IO_SOME "/proc/pressure/io" +#define PSI_IO_FULL "/proc/pressure/io" +#define PSI_IRQ_FULL "/proc/pressure/irq" + #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) @@ -66,6 +74,24 @@ struct config { char *container_path; /* Path to container cgroup */ }; +/* PSI statistics structure */ +struct psi_stats { + double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; + unsigned long long cpu_some_total; + double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; + unsigned long long cpu_full_total; + double memory_some_avg10, memory_some_avg60, memory_some_avg300; + unsigned long long memory_some_total; + double memory_full_avg10, memory_full_avg60, memory_full_avg300; + unsigned long long memory_full_total; + double io_some_avg10, io_some_avg60, io_some_avg300; + unsigned long long io_some_total; + double io_full_avg10, io_full_avg60, io_full_avg300; + unsigned long long io_full_total; + double irq_full_avg10, irq_full_avg60, irq_full_avg300; + unsigned long long irq_full_total; +}; + /* Task delay information structure */ struct task_info { int pid; @@ -100,6 +126,7 @@ struct container_stats { /* Global variables */ static struct config cfg; +static struct psi_stats psi; static struct task_info tasks[MAX_TASKS]; static int task_count; static int running = 1; @@ -130,13 +157,13 @@ static void usage(void) { printf("Usage: delaytop [Options]\n" "Options:\n" - " -h, --help Show this help message and exit\n" - " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" - " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" - " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" - " -o, --once Display once and exit\n" - " -p, --pid=PID Monitor only the specified PID\n" - " -C, --container=PATH Monitor the container at specified cgroup path\n"); + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n"); exit(0); } @@ -276,7 +303,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, - sizeof(nladdr))) < buflen) { + sizeof(nladdr))) < buflen) { if (r > 0) { buf += r; buflen -= r; @@ -320,6 +347,89 @@ static int get_family_id(int sd) return id; } +static void read_psi_stats(void) +{ + FILE *fp; + char line[256]; + int ret = 0; + /* Zero all fields */ + memset(&psi, 0, sizeof(psi)); + /* CPU pressure */ + fp = fopen(PSI_CPU_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_some_avg10, &psi.cpu_some_avg60, + &psi.cpu_some_avg300, &psi.cpu_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.cpu_full_avg10, &psi.cpu_full_avg60, + &psi.cpu_full_avg300, &psi.cpu_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse CPU full PSI data\n"); + } + } + fclose(fp); + } + /* Memory pressure */ + fp = fopen(PSI_MEMORY_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_some_avg10, &psi.memory_some_avg60, + &psi.memory_some_avg300, &psi.memory_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse Memory some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.memory_full_avg10, &psi.memory_full_avg60, + &psi.memory_full_avg300, &psi.memory_full_total); + } + if (ret != 4) + fprintf(stderr, "Failed to parse Memory full PSI data\n"); + } + fclose(fp); + } + /* IO pressure */ + fp = fopen(PSI_IO_SOME, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "some", 4) == 0) { + ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_some_avg10, &psi.io_some_avg60, + &psi.io_some_avg300, &psi.io_some_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO some PSI data\n"); + } else if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.io_full_avg10, &psi.io_full_avg60, + &psi.io_full_avg300, &psi.io_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IO full PSI data\n"); + } + } + fclose(fp); + } + /* IRQ pressure (only full) */ + fp = fopen(PSI_IRQ_FULL, "r"); + if (fp) { + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "full", 4) == 0) { + ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", + &psi.irq_full_avg10, &psi.irq_full_avg60, + &psi.irq_full_avg300, &psi.irq_full_total); + if (ret != 4) + fprintf(stderr, "Failed to parse IRQ full PSI data\n"); + } + } + fclose(fp); + } +} + static int read_comm(int pid, char *comm_buf, size_t buf_size) { char path[64]; @@ -549,7 +659,29 @@ static void display_results(void) FILE *out = stdout; fprintf(out, "\033[H\033[J"); - + /* PSI output (one-line, no cat style) */ + fprintf(out, "System Pressure Information: "); + fprintf(out, "(avg10/avg60/avg300/total)\n"); + fprintf(out, "CPU:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10, + psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10, + psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total); + + fprintf(out, "Memory:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10, + psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10, + psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total); + + fprintf(out, "IO:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10, + psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total); + fprintf(out, " some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10, + psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total); + fprintf(out, "IRQ:"); + fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10, + psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total); if (cfg.container_path) { fprintf(out, "Container Information (%s):\n", cfg.container_path); fprintf(out, "Processes: running=%d, sleeping=%d, ", @@ -559,8 +691,8 @@ static void display_results(void) container_stats.nr_io_wait); } fprintf(out, "Top %d processes (sorted by CPU delay):\n\n", - cfg.max_processes); - fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); + cfg.max_processes); + fprintf(out, " PID TGID COMMAND CPU(ms) IO(ms) "); fprintf(out, "SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) IRQ(ms)\n"); fprintf(out, "-----------------------------------------------"); fprintf(out, "----------------------------------------------\n"); @@ -616,6 +748,9 @@ int main(int argc, char **argv) /* Main loop */ while (running) { + /* Read PSI statistics */ + read_psi_stats(); + /* Get container stats if container path provided */ if (cfg.container_path) get_container_stats(); -- 2.25.1