On Tue, 1 Jul 2025 12:58:31 +0300 "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> wrote: > Extract memcpy and memset functions from copy_user_generic() and > __clear_user(). > > They can be used as inline memcpy and memset instead of the GCC builtins > whenever necessary. LASS requires them to handle text_poke. Except they contain the fault handlers so aren't generic calls. > > Originally-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Link: https://lore.kernel.org/all/20241029184840.GJ14555@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/ > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/string.h | 46 +++++++++++++++++++++++++++++++ > arch/x86/include/asm/uaccess_64.h | 38 +++++++------------------ > arch/x86/lib/clear_page_64.S | 13 +++++++-- > 3 files changed, 67 insertions(+), 30 deletions(-) > > diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h > index c3c2c1914d65..17f6b5bfa8c1 100644 > --- a/arch/x86/include/asm/string.h > +++ b/arch/x86/include/asm/string.h > @@ -1,6 +1,52 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _ASM_X86_STRING_H > +#define _ASM_X86_STRING_H > + > +#include <asm/asm.h> > +#include <asm/alternative.h> > +#include <asm/cpufeatures.h> > + > #ifdef CONFIG_X86_32 > # include <asm/string_32.h> > #else > # include <asm/string_64.h> > #endif > + > +#ifdef CONFIG_X86_64 > +#define ALT_64(orig, alt, feat) ALTERNATIVE(orig, alt, feat) > +#else > +#define ALT_64(orig, alt, feat) orig "\n" > +#endif > + > +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) > +{ > + void *ret = to; > + > + asm volatile("1:\n\t" > + ALT_64("rep movsb", > + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) > + "2:\n\t" > + _ASM_EXTABLE_UA(1b, 2b) > + : "+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT > + : : "memory", _ASM_AX); > + > + return ret + len; > +} > + > +static __always_inline void *__inline_memset(void *addr, int v, size_t len) > +{ > + void *ret = addr; > + > + asm volatile("1:\n\t" > + ALT_64("rep stosb", > + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRM)) > + "2:\n\t" > + _ASM_EXTABLE_UA(1b, 2b) > + : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT > + : "a" ((uint8_t)v) You shouldn't need the (uint8_t) cast (should that be (u8) anyway). At best it doesn't matter, at worst it will add code to mask with 0xff. > + : "memory", _ASM_SI, _ASM_DX); > + > + return ret + len; > +} > + > +#endif /* _ASM_X86_STRING_H */ > diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h > index c8a5ae35c871..eb531e13e659 100644 > --- a/arch/x86/include/asm/uaccess_64.h > +++ b/arch/x86/include/asm/uaccess_64.h > @@ -13,6 +13,7 @@ > #include <asm/page.h> > #include <asm/percpu.h> > #include <asm/runtime-const.h> > +#include <asm/string.h> > > /* > * Virtual variable: there's no actual backing store for this, > @@ -118,21 +119,12 @@ rep_movs_alternative(void *to, const void *from, unsigned len); > static __always_inline __must_check unsigned long > copy_user_generic(void *to, const void *from, unsigned long len) > { > + void *ret; > + > stac(); > - /* > - * If CPU has FSRM feature, use 'rep movs'. > - * Otherwise, use rep_movs_alternative. > - */ > - asm volatile( > - "1:\n\t" > - ALTERNATIVE("rep movsb", > - "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) > - "2:\n" > - _ASM_EXTABLE_UA(1b, 2b) > - :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT > - : : "memory", "rax"); > + ret = __inline_memcpy(to, from, len); > clac(); > - return len; > + return ret - to; > } > > static __always_inline __must_check unsigned long > @@ -178,25 +170,15 @@ rep_stos_alternative(void __user *addr, unsigned long len); > > static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) > { > + void *ptr = (__force void *)addr; > + void *ret; > + > might_fault(); > stac(); > - > - /* > - * No memory constraint because it doesn't change any memory gcc > - * knows about. > - */ > - asm volatile( > - "1:\n\t" > - ALTERNATIVE("rep stosb", > - "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) > - "2:\n" > - _ASM_EXTABLE_UA(1b, 2b) > - : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT > - : "a" (0)); > - > + ret = __inline_memset(ptr, 0, size); > clac(); > > - return size; > + return ret - ptr; > } > > static __always_inline unsigned long clear_user(void __user *to, unsigned long n) > diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S > index a508e4a8c66a..47b613690f84 100644 > --- a/arch/x86/lib/clear_page_64.S > +++ b/arch/x86/lib/clear_page_64.S > @@ -55,17 +55,26 @@ SYM_FUNC_END(clear_page_erms) > EXPORT_SYMBOL_GPL(clear_page_erms) > > /* > - * Default clear user-space. > + * Default memset. > * Input: > * rdi destination > + * rsi scratch > * rcx count > - * rax is zero > + * al is value > * > * Output: > * rcx: uncleared bytes or 0 if successful. > + * rdx: clobbered > */ > SYM_FUNC_START(rep_stos_alternative) > ANNOTATE_NOENDBR > + > + movzbq %al, %rsi > + movabs $0x0101010101010101, %rax > + > + /* RDX:RAX = RAX * RSI */ > + mulq %rsi NAK - you can't do that here. Neither %rsi nor %rdx can be trashed. The function has a very explicit calling convention. It is also almost certainly a waste of time. Pretty much all the calls will be for a constant 0x00. Rename it all memzero() ... David > + > cmpq $64,%rcx > jae .Lunrolled >