This commit adds a new field mem_peak / "Peak memory" field to a set of gathered statistics. The field is intended as an estimate for peak verifier memory consumption for processing of a given program. Mechanically stat is collected as follows: - At the beginning of handle_verif_mode() a new cgroup namespace is created and cgroup fs is mounted in this namespace, memory controller is enabled for the root cgroup. - At each program load: - bpf_object__load() is split into bpf_object__prepare() and bpf_object__load() to avoid accounting for memory allocated for maps; - before bpf_object__load() a new cgroup is created and veristat process enters this cgroup, "memory.peak" of the new cgroup is stashed; - after bpf_object__load() the difference between current "memory.peak" and stashed "memory.peak" is used as a metric, veristat exits the cgroup and cgroup is discarded. If any of the above steps fails veristat would proceed w/o collecting mem_peak information for a program. The change has impact on veristat running time, e.g. for all test_progs object files there is an increase from 82s to 102s. I take a correlation between "Peak states" and "Peak memory" fields as a sanity check for gathered statistics, e.g. here is a sample of data for sched_ext programs: File Program Peak states Peak memory (KiB) --------- -------------------- ----------- ----------------- bpf.bpf.o lavd_select_cpu 1311 26256 bpf.bpf.o lavd_enqueue 1140 22720 bpf.bpf.o layered_enqueue 777 11504 bpf.bpf.o layered_dispatch 578 7976 bpf.bpf.o lavd_dispatch 634 6204 bpf.bpf.o rusty_init 343 5352 bpf.bpf.o lavd_init 361 5092 ... bpf.bpf.o rusty_exit_task 36 256 bpf.bpf.o rusty_running 19 256 bpf.bpf.o bpfland_dispatch 3 0 bpf.bpf.o bpfland_enable 1 0 Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> --- tools/testing/selftests/bpf/veristat.c | 249 ++++++++++++++++++++++++- 1 file changed, 242 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b2bb20b00952..e68f5dda5278 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -12,6 +12,7 @@ #include <signal.h> #include <fcntl.h> #include <unistd.h> +#include <sys/mount.h> #include <sys/time.h> #include <sys/sysinfo.h> #include <sys/stat.h> @@ -49,6 +50,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -208,6 +210,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char cgroup_fs_mount[PATH_MAX + 1]; + char stat_cgroup[PATH_MAX + 1]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +224,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__printf(3, 4) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "<kernel>" #endif @@ -734,13 +755,13 @@ static int append_file_from_file(const char *path) } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +802,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (KiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -1278,16 +1300,213 @@ static int max_verifier_log_size(void) return log_size; } +__printf(2, 3) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +/* + * This works around GCC warning about snprintf truncating strings like: + * + * char a[PATH_MAX], b[PATH_MAX]; + * snprintf(a, "%s/foo", b); // triggers -Wformat-truncation + */ +__printf(3, 4) +static int snprintf_trunc(char *str, volatile size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vsnprintf(str, size, fmt, ap); + va_end(ap); + return ret; +} + +static void destroy_stat_cgroup(void); +static void umount_cgroupfs(void); + +/* + * Enters new cgroup namespace and mounts cgroupfs at /tmp/veristat-cgroup-mount-XXXXXX, + * enables "memory" controller for the root cgroup. + */ +static int mount_cgroupfs(void) +{ + char buf[PATH_MAX + 1]; + int err; + + env.memory_peak_fd = -1; + + err = unshare(CLONE_NEWCGROUP); + if (err < 0) { + err = log_errno("unshare(CLONE_NEWCGROUP)"); + goto err_out; + } + + snprintf_trunc(buf, sizeof(buf), "%s/veristat-cgroup-mount-XXXXXX", P_tmpdir); + if (mkdtemp(buf) == NULL) { + err = log_errno("mkdtemp(%s)", buf); + goto err_out; + } + strcpy(env.cgroup_fs_mount, buf); + + err = mount("none", env.cgroup_fs_mount, "cgroup2", 0, NULL); + if (err < 0) { + err = log_errno("mount none %s -t cgroup2", env.cgroup_fs_mount); + goto err_out; + } + + snprintf_trunc(buf, sizeof(buf), "%s/cgroup.subtree_control", env.cgroup_fs_mount); + err = write_one_line(buf, "+memory\n"); + if (err < 0) { + err = log_errno("echo '+memory' > %s", buf); + goto err_out; + } + + return 0; + +err_out: + umount_cgroupfs(); + return err; +} + +static void umount_cgroupfs(void) +{ + int err; + + if (!env.cgroup_fs_mount[0]) + return; + + err = umount(env.cgroup_fs_mount); + if (err < 0) + log_errno("umount %s", env.cgroup_fs_mount); + + err = rmdir(env.cgroup_fs_mount); + if (err < 0) + log_errno("rmdir %s", env.cgroup_fs_mount); + + env.cgroup_fs_mount[0] = 0; +} + +/* + * Creates a cgroup at /tmp/veristat-cgroup-mount-XXXXXX/accounting-<pid>, + * moves current process to this cgroup. + */ +static int create_stat_cgroup(void) +{ + char buf[PATH_MAX + 1]; + int err; + + if (!env.cgroup_fs_mount[0]) + return -1; + + env.memory_peak_fd = -1; + + snprintf_trunc(buf, sizeof(buf), "%s/accounting-%d", env.cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + err = log_errno("mkdir(%s)", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf_trunc(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + err = log_errno("echo %d > %s", getpid(), buf); + goto err_out; + } + + snprintf_trunc(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + err = log_errno("open(%s)", buf); + goto err_out; + } + + return 0; + +err_out: + destroy_stat_cgroup(); + return err; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.cgroup_fs_mount[0]) { + snprintf_trunc(buf, sizeof(buf), "%s/cgroup.procs", env.cgroup_fs_mount); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("echo %d > %s", getpid(), buf); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("rmdir %s", env.stat_cgroup); + } + + env.stat_cgroup[0] = 0; +} + +/* Current value of /tmp/veristat-cgroup-mount-XXXXXX/accounting-<pid>/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("read(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("unrecognized %s/memory.peak format: %s", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1332,7 +1551,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = create_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + destroy_stat_cgroup(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1341,6 +1569,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / 1024; memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1824,6 +2053,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2053,6 +2283,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2139,6 +2370,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2776,7 +3008,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2784,11 +3016,12 @@ static int handle_verif_mode(void) return -EINVAL; } + mount_cgroupfs(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2796,7 +3029,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + umount_cgroupfs(); + return err; } static int handle_replay_mode(void) -- 2.48.1