The local copy of smp_cond_load_acquire_timewait() (from [1]) is only usable for rqspinlock timeout and deadlock checking in a degenerate fashion by overloading the evaluation of the condvar. Update smp_cond_load_acquire_timewait(). Move the timeout and deadlock handlng (partially stubbed) to the wait policy handler. [1] https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@xxxxxxxxxx Cc: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx> --- Note: This patch is missing all the important bits. Just wanted to check if the interface is workable before threshing plugging in the deadlock checking etc. arch/arm64/include/asm/rqspinlock.h | 96 ++++++----------------------- 1 file changed, 19 insertions(+), 77 deletions(-) diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h index 9ea0a74e5892..27138b591e31 100644 --- a/arch/arm64/include/asm/rqspinlock.h +++ b/arch/arm64/include/asm/rqspinlock.h @@ -4,89 +4,31 @@ #include <asm/barrier.h> -/* - * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom - * version based on [0]. In rqspinlock code, our conditional expression involves - * checking the value _and_ additionally a timeout. However, on arm64, the - * WFE-based implementation may never spin again if no stores occur to the - * locked byte in the lock word. As such, we may be stuck forever if - * event-stream based unblocking is not available on the platform for WFE spin - * loops (arch_timer_evtstrm_available). - * - * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this - * copy-paste. - * - * While we rely on the implementation to amortize the cost of sampling - * cond_expr for us, it will not happen when event stream support is - * unavailable, time_expr check is amortized. This is not the common case, and - * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns - * comparison, hence just let it be. In case of event-stream, the loop is woken - * up at microsecond granularity. - * - * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@xxxxxxxxxx - */ +#define RES_DEF_SPIN_COUNT (32 * 1024) -#ifndef smp_cond_load_acquire_timewait - -#define smp_cond_time_check_count 200 - -#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \ - time_limit_ns) ({ \ - typeof(ptr) __PTR = (ptr); \ - __unqual_scalar_typeof(*ptr) VAL; \ - unsigned int __count = 0; \ - for (;;) { \ - VAL = READ_ONCE(*__PTR); \ - if (cond_expr) \ - break; \ - cpu_relax(); \ - if (__count++ < smp_cond_time_check_count) \ - continue; \ - if ((time_expr_ns) >= (time_limit_ns)) \ - break; \ - __count = 0; \ - } \ - (typeof(*ptr))VAL; \ -}) - -#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \ - time_expr_ns, time_limit_ns) \ -({ \ - typeof(ptr) __PTR = (ptr); \ - __unqual_scalar_typeof(*ptr) VAL; \ - for (;;) { \ - VAL = smp_load_acquire(__PTR); \ - if (cond_expr) \ - break; \ - __cmpwait_relaxed(__PTR, VAL); \ - if ((time_expr_ns) >= (time_limit_ns)) \ - break; \ - } \ - (typeof(*ptr))VAL; \ -}) - -#define smp_cond_load_acquire_timewait(ptr, cond_expr, \ - time_expr_ns, time_limit_ns) \ -({ \ - __unqual_scalar_typeof(*ptr) _val; \ - int __wfe = arch_timer_evtstrm_available(); \ +#define rqspinlock_cond_timewait(now, prev, end, spin, wait) ({ \ + bool __ev = arch_timer_evtstrm_available(); \ + bool __wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT); \ + u64 __ret; \ \ - if (likely(__wfe)) { \ - _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \ - time_expr_ns, \ - time_limit_ns); \ + *wait = false; \ + /* TODO Handle deadlock check. */ \ + if (end >= now) { \ + __ret = 0; \ } else { \ - _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \ - time_expr_ns, \ - time_limit_ns); \ - smp_acquire__after_ctrl_dep(); \ + if (__ev || __wfet) \ + *wait = true; \ + else \ + *spin = RES_DEF_SPIN_COUNT; \ + __ret = now; \ } \ - (typeof(*ptr))_val; \ + \ + __ret; \ }) -#endif - -#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) +#define res_smp_cond_load_acquire(v, c) \ + smp_cond_load_acquire_timewait(v, c, rqspinlock_cond_timewait, \ + ktime_get_mono_fast_ns(), (u64)RES_DEF_TIMEOUT) #include <asm-generic/rqspinlock.h> -- 2.43.5