I'm not sure we should optimize for shadow stack yet. Unless it's easy to think about... (below) On Wed, 2025-08-20 at 14:30 +0200, Peter Zijlstra wrote: > --- a/arch/x86/include/asm/shstk.h > +++ b/arch/x86/include/asm/shstk.h > @@ -23,6 +23,8 @@ int setup_signal_shadow_stack(struct ksi > int restore_signal_shadow_stack(void); > int shstk_update_last_frame(unsigned long val); > bool shstk_is_enabled(void); > +int shstk_pop(u64 *val); > +int shstk_push(u64 val); > #else > static inline long shstk_prctl(struct task_struct *task, int option, > unsigned long arg2) { return -EINVAL; } > @@ -35,6 +37,8 @@ static inline int setup_signal_shadow_st > static inline int restore_signal_shadow_stack(void) { return 0; } > static inline int shstk_update_last_frame(unsigned long val) { return 0; } > static inline bool shstk_is_enabled(void) { return false; } > +static inline int shstk_pop(u64 *val) { return -ENOTSUPP; } > +static inline int shstk_push(u64 val) { return -ENOTSUPP; } > #endif /* CONFIG_X86_USER_SHADOW_STACK */ > > #endif /* __ASSEMBLER__ */ > --- a/arch/x86/kernel/shstk.c > +++ b/arch/x86/kernel/shstk.c > @@ -246,6 +246,46 @@ static unsigned long get_user_shstk_addr > return ssp; > } > > +int shstk_pop(u64 *val) > +{ > + int ret = 0; > + u64 ssp; > + > + if (!features_enabled(ARCH_SHSTK_SHSTK)) > + return -ENOTSUPP; > + > + fpregs_lock_and_load(); > + > + rdmsrq(MSR_IA32_PL3_SSP, ssp); > + if (val && get_user(*val, (__user u64 *)ssp)) > + ret = -EFAULT; > + ssp += SS_FRAME_SIZE; > + wrmsrq(MSR_IA32_PL3_SSP, ssp); > + > + fpregs_unlock(); > + > + return ret; > +} > + > +int shstk_push(u64 val) > +{ > + u64 ssp; > + int ret; > + > + if (!features_enabled(ARCH_SHSTK_SHSTK)) > + return -ENOTSUPP; > + > + fpregs_lock_and_load(); > + > + rdmsrq(MSR_IA32_PL3_SSP, ssp); > + ssp -= SS_FRAME_SIZE; > + wrmsrq(MSR_IA32_PL3_SSP, ssp); > + ret = write_user_shstk_64((__user void *)ssp, val); Should we role back ssp if there is a fault? > + fpregs_unlock(); > + > + return ret; > +} > + > #define SHSTK_DATA_BIT BIT(63) > > static int put_shstk_data(u64 __user *addr, u64 data) > --- a/arch/x86/kernel/uprobes.c > +++ b/arch/x86/kernel/uprobes.c > @@ -804,7 +804,7 @@ SYSCALL_DEFINE0(uprobe) > { > struct pt_regs *regs = task_pt_regs(current); > struct uprobe_syscall_args args; > - unsigned long ip, sp; > + unsigned long ip, sp, sret; > int err; > > /* Allow execution only from uprobe trampolines. */ > @@ -831,6 +831,9 @@ SYSCALL_DEFINE0(uprobe) > > sp = regs->sp; > > + if (shstk_pop(&sret) == 0 && sret != args.retaddr) > + goto sigill; > + > handle_syscall_uprobe(regs, regs->ip); > > /* > @@ -855,6 +858,9 @@ SYSCALL_DEFINE0(uprobe) > if (args.retaddr - 5 != regs->ip) > args.retaddr = regs->ip; > > + if (shstk_push(args.retaddr) == -EFAULT) > + goto sigill; > + Are we effectively allowing arbitrary shadow stack push here? I see we need to be in in_uprobe_trampoline(), but there is no mmap lock taken, so it's a racy check. I'm questioning if the security posture tweak is worth thinking about for whatever the level of intersection of uprobes usage and shadow stack is today. > regs->ip = ip; > > err = copy_to_user((void __user *)regs->sp, &args, sizeof(args)); > @@ -1124,14 +1130,6 @@ void arch_uprobe_optimize(struct arch_up > struct mm_struct *mm = current->mm; > uprobe_opcode_t insn[5]; > > - /* > - * Do not optimize if shadow stack is enabled, the return address hijack > - * code in arch_uretprobe_hijack_return_addr updates wrong frame when > - * the entry uprobe is optimized and the shadow stack crashes the app. > - */ > - if (shstk_is_enabled()) > - return; > - > if (!should_optimize(auprobe)) > return; >