Re: [PATCH] perf ftrace latency: Add -e option to measure time between two events

Ian Rogers <irogers@xxxxxxxxxx> · Fri, 11 Jul 2025 17:34:50 -0700

On Fri, Jul 11, 2025 at 3:35 PM Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>
> In addition to the function latency, it can measure events latencies.
> Some kernel tracepoints are paired and it's menningful to measure how
> long it takes between the two events.  The latency is tracked for the
> same thread.
>
> Currently it only uses BPF to do the work but it can be lifted later.
> Instead of having separate a BPF program for each tracepoint, it only
> uses generic 'event_begin' and 'event_end' programs to attach to any
> (raw) tracepoints.
>
>   $ sudo perf ftrace latency -a -b --hide-empty \
>     -e i915_request_wait_begin,i915_request_wait_end -- sleep 1
>   #   DURATION     |      COUNT | GRAPH                                |
>      256 -  512 us |          4 | ######                               |
>        2 -    4 ms |          2 | ###                                  |
>        4 -    8 ms |         12 | ###################                  |
>        8 -   16 ms |         10 | ################                     |
>
>   # statistics  (in usec)
>     total time:               194915
>       avg time:                 6961
>       max time:                12855
>       min time:                  373
>          count:                   28

Nice!

> Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
> ---
>  tools/perf/Documentation/perf-ftrace.txt    |   6 +
>  tools/perf/builtin-ftrace.c                 |  50 ++++++-
>  tools/perf/util/bpf_ftrace.c                |  69 ++++++---
>  tools/perf/util/bpf_skel/func_latency.bpf.c | 148 +++++++++++++-------
>  tools/perf/util/ftrace.h                    |   1 +
>  5 files changed, 199 insertions(+), 75 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
> index b77f58c4d2fdcff9..914457853bcf53ac 100644
> --- a/tools/perf/Documentation/perf-ftrace.txt
> +++ b/tools/perf/Documentation/perf-ftrace.txt
> @@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency'
>         Set the function name to get the histogram.  Unlike perf ftrace trace,
>         it only allows single function to calculate the histogram.
>
> +-e::
> +--events=::
> +       Set the pair of events to get the histogram.  The histogram is calculated
> +       by the time difference between the two events from the same thread.  This
> +       requires -b/--use-bpf option.
> +
>  -b::
>  --use-bpf::
>         Use BPF to measure function latency instead of using the ftrace (it
> diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
> index 3a253a1b9f4526b9..e1f2f3fb1b0850a3 100644
> --- a/tools/perf/builtin-ftrace.c
> +++ b/tools/perf/builtin-ftrace.c
> @@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head)
>         }
>  }
>
> +static int parse_filter_event(const struct option *opt, const char *str,
> +                            int unset __maybe_unused)
> +{
> +       struct list_head *head = opt->value;
> +       struct filter_entry *entry;
> +       char *s, *p;
> +       int ret = -ENOMEM;
> +
> +       s = strdup(str);
> +       if (s == NULL)
> +               return -ENOMEM;
> +
> +       while ((p = strsep(&s, ",")) != NULL) {
> +               entry = malloc(sizeof(*entry) + strlen(p) + 1);
> +               if (entry == NULL)
> +                       goto out;
> +
> +               strcpy(entry->name, p);
> +               list_add_tail(&entry->list, head);
> +       }
> +       ret = 0;
> +
> +out:
> +       free(s);
> +       return ret;
> +}
> +
>  static int parse_buffer_size(const struct option *opt,
>                              const char *str, int unset)
>  {
> @@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv)
>         const struct option latency_options[] = {
>         OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
>                      "Show latency of given function", parse_filter_func),
> +       OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2",
> +                    "Show latency between the two events", parse_filter_event),
>  #ifdef HAVE_BPF_SKEL
>         OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
>                     "Use BPF to measure function latency"),
> @@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv)
>         INIT_LIST_HEAD(&ftrace.notrace);
>         INIT_LIST_HEAD(&ftrace.graph_funcs);
>         INIT_LIST_HEAD(&ftrace.nograph_funcs);
> +       INIT_LIST_HEAD(&ftrace.event_pair);
>
>         signal(SIGINT, sig_handler);
>         signal(SIGUSR1, sig_handler);
> @@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv)
>                 cmd_func = __cmd_ftrace;
>                 break;
>         case PERF_FTRACE_LATENCY:
> -               if (list_empty(&ftrace.filters)) {
> -                       pr_err("Should provide a function to measure\n");
> +               if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) {
> +                       pr_err("Should provide a function or events to measure\n");
>                         parse_options_usage(ftrace_usage, options, "T", 1);
> +                       parse_options_usage(NULL, options, "e", 1);
> +                       ret = -EINVAL;
> +                       goto out_delete_filters;
> +               }
> +               if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) {
> +                       pr_err("Please specify either of function or events\n");
> +                       parse_options_usage(ftrace_usage, options, "T", 1);
> +                       parse_options_usage(NULL, options, "e", 1);
> +                       ret = -EINVAL;
> +                       goto out_delete_filters;
> +               }
> +               if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) {
> +                       pr_err("Event processing needs BPF\n");
> +                       parse_options_usage(ftrace_usage, options, "b", 1);
> +                       parse_options_usage(NULL, options, "e", 1);
>                         ret = -EINVAL;
>                         goto out_delete_filters;
>                 }
> @@ -1910,6 +1955,7 @@ int cmd_ftrace(int argc, const char **argv)
>         delete_filter_func(&ftrace.notrace);
>         delete_filter_func(&ftrace.graph_funcs);
>         delete_filter_func(&ftrace.nograph_funcs);
> +       delete_filter_func(&ftrace.event_pair);
>
>         return ret;
>  }
> diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
> index 7324668cc83e747e..34ac0adf841c27ee 100644
> --- a/tools/perf/util/bpf_ftrace.c
> +++ b/tools/perf/util/bpf_ftrace.c
> @@ -21,16 +21,21 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
>  {
>         int fd, err;
>         int i, ncpus = 1, ntasks = 1;
> -       struct filter_entry *func;
> +       struct filter_entry *func = NULL;
>
> -       if (!list_is_singular(&ftrace->filters)) {
> -               pr_err("ERROR: %s target function(s).\n",
> -                      list_empty(&ftrace->filters) ? "No" : "Too many");
> -               return -1;
> +       if (!list_empty(&ftrace->filters)) {
> +               if (!list_is_singular(&ftrace->filters)) {
> +                       pr_err("ERROR: Too many target functions.\n");
> +                       return -1;
> +               }
> +               func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> +       } else {
> +               if (list_is_singular(&ftrace->event_pair)) {

Should there be a list length function and then check the length == 2
here? An empty list or list of length >2 will otherwise pass.

Thanks,
Ian

> +                       pr_err("ERROR: Needs two target events.\n");
> +                       return -1;
> +               }
>         }
>
> -       func = list_first_entry(&ftrace->filters, struct filter_entry, list);
> -
>         skel = func_latency_bpf__open();
>         if (!skel) {
>                 pr_err("Failed to open func latency skeleton\n");
> @@ -93,20 +98,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
>
>         skel->bss->min = INT64_MAX;
>
> -       skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
> -                                                           false, func->name);
> -       if (IS_ERR(skel->links.func_begin)) {
> -               pr_err("Failed to attach fentry program\n");
> -               err = PTR_ERR(skel->links.func_begin);
> -               goto out;
> -       }
> +       if (func) {
> +               skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
> +                                                                   false, func->name);
> +               if (IS_ERR(skel->links.func_begin)) {
> +                       pr_err("Failed to attach fentry program\n");
> +                       err = PTR_ERR(skel->links.func_begin);
> +                       goto out;
> +               }
>
> -       skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
> -                                                         true, func->name);
> -       if (IS_ERR(skel->links.func_end)) {
> -               pr_err("Failed to attach fexit program\n");
> -               err = PTR_ERR(skel->links.func_end);
> -               goto out;
> +               skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
> +                                                                 true, func->name);
> +               if (IS_ERR(skel->links.func_end)) {
> +                       pr_err("Failed to attach fexit program\n");
> +                       err = PTR_ERR(skel->links.func_end);
> +                       goto out;
> +               }
> +       } else {
> +               struct filter_entry *event;
> +
> +               event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
> +
> +               skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
> +                                                                            event->name);
> +               if (IS_ERR(skel->links.event_begin)) {
> +                       pr_err("Failed to attach first tracepoint program\n");
> +                       err = PTR_ERR(skel->links.event_begin);
> +                       goto out;
> +               }
> +
> +               event = list_next_entry(event, list);
> +
> +               skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
> +                                                                            event->name);
> +               if (IS_ERR(skel->links.event_end)) {
> +                       pr_err("Failed to attach second tracepoint program\n");
> +                       err = PTR_ERR(skel->links.event_end);
> +                       goto out;
> +               }
>         }
>
>         /* XXX: we don't actually use this fd - just for poll() */
> diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
> index e731a79a753a4d2d..621e2022c8bc9648 100644
> --- a/tools/perf/util/bpf_skel/func_latency.bpf.c
> +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
> @@ -52,34 +52,89 @@ const volatile unsigned int min_latency;
>  const volatile unsigned int max_latency;
>  const volatile unsigned int bucket_num = NUM_BUCKET;
>
> -SEC("kprobe/func")
> -int BPF_PROG(func_begin)
> +static bool can_record(void)
>  {
> -       __u64 key, now;
> -
> -       if (!enabled)
> -               return 0;
> -
> -       key = bpf_get_current_pid_tgid();
> -
>         if (has_cpu) {
>                 __u32 cpu = bpf_get_smp_processor_id();
>                 __u8 *ok;
>
>                 ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
>                 if (!ok)
> -                       return 0;
> +                       return false;
>         }
>
>         if (has_task) {
> -               __u32 pid = key & 0xffffffff;
> +               __u32 pid = bpf_get_current_pid_tgid();
>                 __u8 *ok;
>
>                 ok = bpf_map_lookup_elem(&task_filter, &pid);
>                 if (!ok)
> -                       return 0;
> +                       return false;
>         }
> +       return true;
> +}
> +
> +static void update_latency(__s64 delta)
> +{
> +       __u64 val = delta;
> +       __u32 key = 0;
> +       __u64 *hist;
> +       __u64 cmp_base = use_nsec ? 1 : 1000;
> +
> +       if (delta < 0)
> +               return;
>
> +       if (bucket_range != 0) {
> +               val = delta / cmp_base;
> +
> +               if (min_latency > 0) {
> +                       if (val > min_latency)
> +                               val -= min_latency;
> +                       else
> +                               goto do_lookup;
> +               }
> +
> +               // Less than 1 unit (ms or ns), or, in the future,
> +               // than the min latency desired.
> +               if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
> +                       key = val / bucket_range + 1;
> +                       if (key >= bucket_num)
> +                               key = bucket_num - 1;
> +               }
> +
> +               goto do_lookup;
> +       }
> +       // calculate index using delta
> +       for (key = 0; key < (bucket_num - 1); key++) {
> +               if (delta < (cmp_base << key))
> +                       break;
> +       }
> +
> +do_lookup:
> +       hist = bpf_map_lookup_elem(&latency, &key);
> +       if (!hist)
> +               return;
> +
> +       __sync_fetch_and_add(hist, 1);
> +
> +       __sync_fetch_and_add(&total, delta); // always in nsec
> +       __sync_fetch_and_add(&count, 1);
> +
> +       if (delta > max)
> +               max = delta;
> +       if (delta < min)
> +               min = delta;
> +}
> +
> +SEC("kprobe/func")
> +int BPF_PROG(func_begin)
> +{
> +       __u64 key, now;
> +
> +       if (!enabled || !can_record())
> +               return 0;
> +
> +       key = bpf_get_current_pid_tgid();
>         now = bpf_ktime_get_ns();
>
>         // overwrite timestamp for nested functions
> @@ -92,7 +147,6 @@ int BPF_PROG(func_end)
>  {
>         __u64 tid;
>         __u64 *start;
> -       __u64 cmp_base = use_nsec ? 1 : 1000;
>
>         if (!enabled)
>                 return 0;
> @@ -101,56 +155,44 @@ int BPF_PROG(func_end)
>
>         start = bpf_map_lookup_elem(&functime, &tid);
>         if (start) {
> -               __s64 delta = bpf_ktime_get_ns() - *start;
> -               __u64 val = delta;
> -               __u32 key = 0;
> -               __u64 *hist;
> -
> +               update_latency(bpf_ktime_get_ns() - *start);
>                 bpf_map_delete_elem(&functime, &tid);
> +       }
>
> -               if (delta < 0)
> -                       return 0;
> +       return 0;
> +}
>
> -               if (bucket_range != 0) {
> -                       val = delta / cmp_base;
> +SEC("raw_tp")
> +int BPF_PROG(event_begin)
> +{
> +       __u64 key, now;
>
> -                       if (min_latency > 0) {
> -                               if (val > min_latency)
> -                                       val -= min_latency;
> -                               else
> -                                       goto do_lookup;
> -                       }
> +       if (!enabled || !can_record())
> +               return 0;
>
> -                       // Less than 1 unit (ms or ns), or, in the future,
> -                       // than the min latency desired.
> -                       if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
> -                               key = val / bucket_range + 1;
> -                               if (key >= bucket_num)
> -                                       key = bucket_num - 1;
> -                       }
> +       key = bpf_get_current_pid_tgid();
> +       now = bpf_ktime_get_ns();
>
> -                       goto do_lookup;
> -               }
> -               // calculate index using delta
> -               for (key = 0; key < (bucket_num - 1); key++) {
> -                       if (delta < (cmp_base << key))
> -                               break;
> -               }
> +       // overwrite timestamp for nested events
> +       bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
> +       return 0;
> +}
>
> -do_lookup:
> -               hist = bpf_map_lookup_elem(&latency, &key);
> -               if (!hist)
> -                       return 0;
> +SEC("raw_tp")
> +int BPF_PROG(event_end)
> +{
> +       __u64 tid;
> +       __u64 *start;
>
> -               __sync_fetch_and_add(hist, 1);
> +       if (!enabled)
> +               return 0;
>
> -               __sync_fetch_and_add(&total, delta); // always in nsec
> -               __sync_fetch_and_add(&count, 1);
> +       tid = bpf_get_current_pid_tgid();
>
> -               if (delta > max)
> -                       max = delta;
> -               if (delta < min)
> -                       min = delta;
> +       start = bpf_map_lookup_elem(&functime, &tid);
> +       if (start) {
> +               update_latency(bpf_ktime_get_ns() - *start);
> +               bpf_map_delete_elem(&functime, &tid);
>         }
>
>         return 0;
> diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
> index a9bc47da83a56cd6..3f5094ac59080310 100644
> --- a/tools/perf/util/ftrace.h
> +++ b/tools/perf/util/ftrace.h
> @@ -17,6 +17,7 @@ struct perf_ftrace {
>         struct list_head        notrace;
>         struct list_head        graph_funcs;
>         struct list_head        nograph_funcs;
> +       struct list_head        event_pair;
>         struct hashmap          *profile_hash;
>         unsigned long           percpu_buffer_size;
>         bool                    inherit;
> --
> 2.50.0.727.gbf7dc18ff4-goog
>