On 2025-07-11 19:19:26 [-0700], Alexei Starovoitov wrote: > > If there is no parent check then we could do "normal lock" on both > > sides. > > How would ___slab_alloc() know whether there was a parent check or not? > > imo keeping local_lock_irqsave() as-is is cleaner, > since if there is no parent check lockdep will rightfully complain. what about this: diff --git a/mm/slub.c b/mm/slub.c index 7e2ffe1d46c6c..3520d1c25c205 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3693,6 +3693,34 @@ static inline void *freeze_slab(struct kmem_cache *s, struct slab *slab) return freelist; } +static void local_lock_cpu_slab(struct kmem_cache *s, const gfp_t gfp_flags, + unsigned long *flags) +{ + bool allow_spin = gfpflags_allow_spinning(gfp_flags); + + /* + * ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock + * can be acquired without a deadlock before invoking the function. + * + * On PREEMPT_RT an invocation is not possible from IRQ-off or preempt + * disabled context. The lock will always be acquired and if needed it + * block and sleep until the lock is available. + * + * On !PREEMPT_RT allocations from any context but NMI are safe. The lock + * is always acquired with disabled interrupts meaning it is always + * possible to it. + * In NMI context it is needed to check if the lock is acquired. If it is not, + * it is safe to acquire it. The trylock semantic is used to tell lockdep + * that we don't spin. The BUG_ON() will not trigger if it is safe to acquire + * the lock. + * + */ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && !allow_spin) + BUG_ON(!local_trylock_irqsave(&s->cpu_slab->lock, *flags)); + else + local_lock_irqsave(&s->cpu_slab->lock, *flags); +} + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -3765,7 +3793,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, goto deactivate_slab; /* must check again c->slab in case we got preempted and it changed */ - local_lock_irqsave(&s->cpu_slab->lock, flags); + local_lock_cpu_slab(s, gfpflags, &flags); + if (unlikely(slab != c->slab)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); goto reread_slab; @@ -3803,7 +3832,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, deactivate_slab: - local_lock_irqsave(&s->cpu_slab->lock, flags); + local_lock_cpu_slab(s, gfpflags, &flags); if (slab != c->slab) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); goto reread_slab; @@ -3819,7 +3848,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, #ifdef CONFIG_SLUB_CPU_PARTIAL while (slub_percpu_partial(c)) { - local_lock_irqsave(&s->cpu_slab->lock, flags); + local_lock_cpu_slab(s, gfpflags, &flags); if (unlikely(c->slab)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); goto reread_slab; @@ -3947,7 +3976,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, retry_load_slab: - local_lock_irqsave(&s->cpu_slab->lock, flags); + local_lock_cpu_slab(s, gfpflags, &flags); if (unlikely(c->slab)) { void *flush_freelist = c->freelist; struct slab *flush_slab = c->slab; @@ -4003,12 +4032,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, p = ERR_PTR(-EBUSY); goto out; } - local_lock_lockdep_start(&s->cpu_slab->lock); - p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size); - local_lock_lockdep_end(&s->cpu_slab->lock); - } else { - p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size); } + p = ___slab_alloc(s, gfpflags, node, addr, c, orig_size); out: #ifdef CONFIG_PREEMPT_COUNT slub_put_cpu_ptr(s->cpu_slab); Sebastian