Creates an "rcuscale" folder in debugfs and creates a "writer_durations" file in the folder. This file is in CSV format. Each line represents one duration record, with columns defined as: writer_id,duration Added an option "writer_no_print" to skip printing writer durations on cleanup. This allows external tools to read structured data and also drastically improves cleanup performance on large core count machines. On a 256C 512T machines running nreaders=1 nwriters=511: Before: $ time modprobe -r rcuscale; modprobe -r torture real 3m17.349s user 0m0.000s sys 3m15.288s After: $ time cat /sys/kernel/debug/rcuscale/writer_durations > durations.csv real 0m0.005s user 0m0.000s sys 0m0.005s $ time modprobe -r rcuscale; modprobe -r torture real 0m0.388s user 0m0.000s sys 0m0.335s Signed-off-by: Yuzhuo Jing <yuzhuo@xxxxxxxxxx> --- .../admin-guide/kernel-parameters.txt | 5 + kernel/rcu/rcuscale.c | 142 +++++++++++++++++- 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..7b62a84a19d4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5583,6 +5583,11 @@ periods, but in jiffies. The default of zero says no holdoff. + rcuscale.writer_no_print= [KNL] + Do not print writer durations to kernel ring buffer. + Instead, users can read them from the + rcuscale/writer_durations file in debugfs. + rcutorture.fqs_duration= [KNL] Set duration of force_quiescent_state bursts in microseconds. diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index b521d0455992..ad10b42be6fc 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -40,6 +40,8 @@ #include <linux/vmalloc.h> #include <linux/rcupdate_trace.h> #include <linux/sched/debug.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> #include "rcu.h" @@ -97,6 +99,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN, torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); +torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer"); torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?"); @@ -138,6 +141,9 @@ static u64 t_rcu_scale_writer_finished; static unsigned long b_rcu_gp_test_started; static unsigned long b_rcu_gp_test_finished; +static struct dentry *debugfs_dir; +static struct dentry *debugfs_writer_durations; + #define MAX_MEAS 10000 #define MIN_MEAS 100 @@ -607,6 +613,7 @@ rcu_scale_writer(void *arg) t = ktime_get_mono_fast_ns(); *wdp = t - *wdp; i_max = i; + writer_n_durations[me] = i_max + 1; if (!started && atomic_read(&n_rcu_scale_writer_started) >= nrealwriters) started = true; @@ -620,6 +627,7 @@ rcu_scale_writer(void *arg) nrealwriters) { schedule_timeout_interruptible(10); rcu_ftrace_dump(DUMP_ALL); + WRITE_ONCE(test_complete, true); SCALEOUT_STRING("Test complete"); t_rcu_scale_writer_finished = t; if (gp_exp) { @@ -666,7 +674,6 @@ rcu_scale_writer(void *arg) rcu_scale_free(wmbp); cur_ops->gp_barrier(); } - writer_n_durations[me] = i_max + 1; torture_kthread_stopping("rcu_scale_writer"); return 0; } @@ -941,6 +948,117 @@ kfree_scale_init(void) return firsterr; } +/* + * A seq_file for writer_durations. Content is only visible when all writers + * finish. Element i of the sequence is writer_durations + i. + */ +static void *writer_durations_start(struct seq_file *m, loff_t *pos) +{ + loff_t writer_id = *pos; + + if (!test_complete || writer_id < 0 || writer_id >= nrealwriters) + return NULL; + + return writer_durations + writer_id; +} + +static void *writer_durations_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return writer_durations_start(m, pos); +} + +static void writer_durations_stop(struct seq_file *m, void *v) +{ +} + +/* + * Each element in the seq_file is an array of one writer's durations. + * Each element prints writer_n_durations[writer_id] lines, and each line + * contains one duration record, in CSV format: + * writer_id,duration + */ +static int writer_durations_show(struct seq_file *m, void *v) +{ + u64 **durations = v; + loff_t writer_id = durations - writer_durations; + + for (int i = 0; i < writer_n_durations[writer_id]; ++i) + seq_printf(m, "%lld,%lld\n", writer_id, durations[0][i]); + + return 0; +} + +static const struct seq_operations writer_durations_op = { + .start = writer_durations_start, + .next = writer_durations_next, + .stop = writer_durations_stop, + .show = writer_durations_show +}; + +static int writer_durations_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &writer_durations_op); +} + +static const struct file_operations writer_durations_fops = { + .owner = THIS_MODULE, + .open = writer_durations_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * Create an rcuscale directory exposing run states and results. + */ +static int register_debugfs(void) +{ +#define try_create_file(variable, name, mode, parent, data, fops) \ +({ \ + variable = debugfs_create_file((name), (mode), (parent), (data), (fops)); \ + err = PTR_ERR_OR_ZERO(variable); \ + err; \ +}) + + int err; + + debugfs_dir = debugfs_create_dir("rcuscale", NULL); + err = PTR_ERR_OR_ZERO(debugfs_dir); + if (err) + goto fail; + + if (try_create_file(debugfs_writer_durations, "writer_durations", 0444, + debugfs_dir, NULL, &writer_durations_fops)) + goto fail; + + return 0; +fail: + pr_err("rcu-scale: Failed to create debugfs file."); + /* unregister_debugfs is called by rcu_scale_cleanup, avoid + * calling it twice. + */ + return err; +#undef try_create_file +} + +static void unregister_debugfs(void) +{ +#define try_remove(variable) \ +do { \ + if (!IS_ERR_OR_NULL(variable)) \ + debugfs_remove(variable); \ + variable = NULL; \ +} while (0) + + try_remove(debugfs_writer_durations); + + /* Remove directory after files. */ + try_remove(debugfs_dir); + +#undef try_remove +} + static void rcu_scale_cleanup(void) { @@ -961,6 +1079,8 @@ rcu_scale_cleanup(void) if (gp_exp && gp_async) SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + unregister_debugfs(); + // If built-in, just report all of the GP kthread's CPU time. if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread) kthread_tp = cur_ops->rso_gp_kthread(); @@ -1020,13 +1140,15 @@ rcu_scale_cleanup(void) wdpp = writer_durations[i]; if (!wdpp) continue; - for (j = 0; j < writer_n_durations[i]; j++) { - wdp = &wdpp[j]; - pr_alert("%s%s %4d writer-duration: %5d %llu\n", - scale_type, SCALE_FLAG, - i, j, *wdp); - if (j % 100 == 0) - schedule_timeout_uninterruptible(1); + if (!writer_no_print) { + for (j = 0; j < writer_n_durations[i]; j++) { + wdp = &wdpp[j]; + pr_alert("%s%s %4d writer-duration: %5d %llu\n", + scale_type, SCALE_FLAG, + i, j, *wdp); + if (j % 100 == 0) + schedule_timeout_uninterruptible(1); + } } kfree(writer_durations[i]); if (writer_freelists) { @@ -1202,6 +1324,10 @@ rcu_scale_init(void) if (torture_init_error(firsterr)) goto unwind; } + + if (register_debugfs()) + goto unwind; + torture_init_end(); return 0; -- 2.50.1.552.g942d659e1b-goog