On Thu, Jul 03, 2025 at 09:44:17AM +0100, David Laight wrote: > On Tue, 1 Jul 2025 12:58:31 +0300 > "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> wrote: > > > Extract memcpy and memset functions from copy_user_generic() and > > __clear_user(). > > > > They can be used as inline memcpy and memset instead of the GCC builtins > > whenever necessary. LASS requires them to handle text_poke. > > Except they contain the fault handlers so aren't generic calls. That's true. I will add a comment to clarify it. > > Originally-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > > Link: https://lore.kernel.org/all/20241029184840.GJ14555@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/ > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > > --- > > arch/x86/include/asm/string.h | 46 +++++++++++++++++++++++++++++++ > > arch/x86/include/asm/uaccess_64.h | 38 +++++++------------------ > > arch/x86/lib/clear_page_64.S | 13 +++++++-- > > 3 files changed, 67 insertions(+), 30 deletions(-) > > > > diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h > > index c3c2c1914d65..17f6b5bfa8c1 100644 > > --- a/arch/x86/include/asm/string.h > > +++ b/arch/x86/include/asm/string.h > > @@ -1,6 +1,52 @@ > > /* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef _ASM_X86_STRING_H > > +#define _ASM_X86_STRING_H > > + > > +#include <asm/asm.h> > > +#include <asm/alternative.h> > > +#include <asm/cpufeatures.h> > > + > > #ifdef CONFIG_X86_32 > > # include <asm/string_32.h> > > #else > > # include <asm/string_64.h> > > #endif > > + > > +#ifdef CONFIG_X86_64 > > +#define ALT_64(orig, alt, feat) ALTERNATIVE(orig, alt, feat) > > +#else > > +#define ALT_64(orig, alt, feat) orig "\n" > > +#endif > > + > > +static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) > > +{ > > + void *ret = to; > > + > > + asm volatile("1:\n\t" > > + ALT_64("rep movsb", > > + "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) > > + "2:\n\t" > > + _ASM_EXTABLE_UA(1b, 2b) > > + : "+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT > > + : : "memory", _ASM_AX); > > + > > + return ret + len; > > +} > > + > > +static __always_inline void *__inline_memset(void *addr, int v, size_t len) > > +{ > > + void *ret = addr; > > + > > + asm volatile("1:\n\t" > > + ALT_64("rep stosb", > > + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRM)) > > + "2:\n\t" > > + _ASM_EXTABLE_UA(1b, 2b) > > + : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT > > + : "a" ((uint8_t)v) > > You shouldn't need the (uint8_t) cast (should that be (u8) anyway). > At best it doesn't matter, at worst it will add code to mask with 0xff. Right, will drop. > > + : "memory", _ASM_SI, _ASM_DX); > > + > > + return ret + len; > > +} > > + > > +#endif /* _ASM_X86_STRING_H */ > > diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h > > index c8a5ae35c871..eb531e13e659 100644 > > --- a/arch/x86/include/asm/uaccess_64.h > > +++ b/arch/x86/include/asm/uaccess_64.h > > @@ -13,6 +13,7 @@ > > #include <asm/page.h> > > #include <asm/percpu.h> > > #include <asm/runtime-const.h> > > +#include <asm/string.h> > > > > /* > > * Virtual variable: there's no actual backing store for this, > > @@ -118,21 +119,12 @@ rep_movs_alternative(void *to, const void *from, unsigned len); > > static __always_inline __must_check unsigned long > > copy_user_generic(void *to, const void *from, unsigned long len) > > { > > + void *ret; > > + > > stac(); > > - /* > > - * If CPU has FSRM feature, use 'rep movs'. > > - * Otherwise, use rep_movs_alternative. > > - */ > > - asm volatile( > > - "1:\n\t" > > - ALTERNATIVE("rep movsb", > > - "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM)) > > - "2:\n" > > - _ASM_EXTABLE_UA(1b, 2b) > > - :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT > > - : : "memory", "rax"); > > + ret = __inline_memcpy(to, from, len); > > clac(); > > - return len; > > + return ret - to; > > } > > > > static __always_inline __must_check unsigned long > > @@ -178,25 +170,15 @@ rep_stos_alternative(void __user *addr, unsigned long len); > > > > static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) > > { > > + void *ptr = (__force void *)addr; > > + void *ret; > > + > > might_fault(); > > stac(); > > - > > - /* > > - * No memory constraint because it doesn't change any memory gcc > > - * knows about. > > - */ > > - asm volatile( > > - "1:\n\t" > > - ALTERNATIVE("rep stosb", > > - "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) > > - "2:\n" > > - _ASM_EXTABLE_UA(1b, 2b) > > - : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT > > - : "a" (0)); > > - > > + ret = __inline_memset(ptr, 0, size); > > clac(); > > > > - return size; > > + return ret - ptr; > > } > > > > static __always_inline unsigned long clear_user(void __user *to, unsigned long n) > > diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S > > index a508e4a8c66a..47b613690f84 100644 > > --- a/arch/x86/lib/clear_page_64.S > > +++ b/arch/x86/lib/clear_page_64.S > > @@ -55,17 +55,26 @@ SYM_FUNC_END(clear_page_erms) > > EXPORT_SYMBOL_GPL(clear_page_erms) > > > > /* > > - * Default clear user-space. > > + * Default memset. > > * Input: > > * rdi destination > > + * rsi scratch > > * rcx count > > - * rax is zero > > + * al is value > > * > > * Output: > > * rcx: uncleared bytes or 0 if successful. > > + * rdx: clobbered > > */ > > SYM_FUNC_START(rep_stos_alternative) > > ANNOTATE_NOENDBR > > + > > + movzbq %al, %rsi > > + movabs $0x0101010101010101, %rax > > + > > + /* RDX:RAX = RAX * RSI */ > > + mulq %rsi > > NAK - you can't do that here. > Neither %rsi nor %rdx can be trashed. > The function has a very explicit calling convention. What calling convention? We change the only caller to confirm to this. > It is also almost certainly a waste of time. > Pretty much all the calls will be for a constant 0x00. > Rename it all memzero() ... text_poke_memset() is not limited to zeroing. -- Kiryl Shutsemau / Kirill A. Shutemov