Re: [PATCH v1 10/14] bpf: selftests: bpf OOM handler test

Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> · Wed, 20 Aug 2025 11:33:42 +0200

On Mon, 18 Aug 2025 at 19:02, Roman Gushchin <roman.gushchin@xxxxxxxxx> wrote:
>
> Implement a pseudo-realistic test for the OOM handling
> functionality.
>
> The OOM handling policy which is implemented in bpf is to
> kill all tasks belonging to the biggest leaf cgroup, which
> doesn't contain unkillable tasks (tasks with oom_score_adj
> set to -1000). Pagecache size is excluded from the accounting.
>
> The test creates a hierarchy of memory cgroups, causes an
> OOM at the top level, checks that the expected process will be
> killed and checks memcg's oom statistics.
>
> Signed-off-by: Roman Gushchin <roman.gushchin@xxxxxxxxx>
> ---
>  [...]
> +
> +/*
> + * Find the largest leaf cgroup (ignoring page cache) without unkillable tasks
> + * and kill all belonging tasks.
> + */
> +SEC("struct_ops.s/handle_out_of_memory")
> +int BPF_PROG(test_out_of_memory, struct oom_control *oc)
> +{
> +       struct task_struct *task;
> +       struct mem_cgroup *root_memcg = oc->memcg;
> +       struct mem_cgroup *memcg, *victim = NULL;
> +       struct cgroup_subsys_state *css_pos;
> +       unsigned long usage, max_usage = 0;
> +       unsigned long pagecache = 0;
> +       int ret = 0;
> +
> +       if (root_memcg)
> +               root_memcg = bpf_get_mem_cgroup(&root_memcg->css);
> +       else
> +               root_memcg = bpf_get_root_mem_cgroup();
> +
> +       if (!root_memcg)
> +               return 0;
> +
> +       bpf_rcu_read_lock();
> +       bpf_for_each(css, css_pos, &root_memcg->css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
> +               if (css_pos->cgroup->nr_descendants + css_pos->cgroup->nr_dying_descendants)
> +                       continue;
> +
> +               memcg = bpf_get_mem_cgroup(css_pos);
> +               if (!memcg)
> +                       continue;
> +
> +               usage = bpf_mem_cgroup_usage(memcg);
> +               pagecache = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
> +
> +               if (usage > pagecache)
> +                       usage -= pagecache;
> +               else
> +                       usage = 0;
> +
> +               if ((usage > max_usage) && mem_cgroup_killable(memcg)) {
> +                       max_usage = usage;
> +                       if (victim)
> +                               bpf_put_mem_cgroup(victim);
> +                       victim = bpf_get_mem_cgroup(&memcg->css);
> +               }
> +
> +               bpf_put_mem_cgroup(memcg);
> +       }
> +       bpf_rcu_read_unlock();
> +
> +       if (!victim)
> +               goto exit;
> +
> +       bpf_for_each(css_task, task, &victim->css, CSS_TASK_ITER_PROCS) {
> +               struct task_struct *t = bpf_task_acquire(task);
> +
> +               if (t) {
> +                       if (!bpf_task_is_oom_victim(task))
> +                               bpf_oom_kill_process(oc, task, "bpf oom test");

Is there a scenario where we want to invoke bpf_oom_kill_process when
the task is not an oom victim?
Would it be better to subsume this check in the kfunc itself?

> +                       bpf_task_release(t);
> +                       ret = 1;
> +               }
> +       }
> +
> +       bpf_put_mem_cgroup(victim);
> +exit:
> +       bpf_put_mem_cgroup(root_memcg);
> +
> +       return ret;
> +}
> +
> +SEC(".struct_ops.link")
> +struct bpf_oom_ops test_bpf_oom = {
> +       .name = "bpf_test_policy",
> +       .handle_out_of_memory = (void *)test_out_of_memory,
> +};
> --
> 2.50.1
>