David Matlack <dmatlack@xxxxxxxxxx> writes: > Import iosubmit_cmds512() from arch/x86/include/asm/io.h into tools/ so > it can be used by VFIO selftests to interact with Intel DSA devices. > minor: perhaps move this patch to be near the one that adds the DSA driver? (in case there's a next revision) Anyway, Acked-by: Vinicius Costa Gomes <vinicius.gomes@xxxxxxxxx> > Also pull in movdir64b() from arch/x86/include/asm/special_insns.h into > tools/, which is the underlying instruction used by iosubmit_cmds512(). > > Changes made when importing: None > > Signed-off-by: David Matlack <dmatlack@xxxxxxxxxx> > --- > tools/arch/x86/include/asm/io.h | 26 +++++++++++++++++++++ > tools/arch/x86/include/asm/special_insns.h | 27 ++++++++++++++++++++++ > 2 files changed, 53 insertions(+) > create mode 100644 tools/arch/x86/include/asm/special_insns.h > > diff --git a/tools/arch/x86/include/asm/io.h b/tools/arch/x86/include/asm/io.h > index 4c787a2363de..ecad61a3ea52 100644 > --- a/tools/arch/x86/include/asm/io.h > +++ b/tools/arch/x86/include/asm/io.h > @@ -4,6 +4,7 @@ > > #include <linux/compiler.h> > #include <linux/types.h> > +#include "special_insns.h" > > #define build_mmio_read(name, size, type, reg, barrier) \ > static inline type name(const volatile void __iomem *addr) \ > @@ -72,4 +73,29 @@ build_mmio_write(__writeq, "q", u64, "r", ) > > #include <asm-generic/io.h> > > +/** > + * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units > + * @dst: destination, in MMIO space (must be 512-bit aligned) > + * @src: source > + * @count: number of 512 bits quantities to submit > + * > + * Submit data from kernel space to MMIO space, in units of 512 bits at a > + * time. Order of access is not guaranteed, nor is a memory barrier > + * performed afterwards. > + * > + * Warning: Do not use this helper unless your driver has checked that the CPU > + * instruction is supported on the platform. > + */ > +static inline void iosubmit_cmds512(void __iomem *dst, const void *src, > + size_t count) > +{ > + const u8 *from = src; > + const u8 *end = from + count * 64; > + > + while (from < end) { > + movdir64b(dst, from); > + from += 64; > + } > +} > + > #endif /* _TOOLS_ASM_X86_IO_H */ > diff --git a/tools/arch/x86/include/asm/special_insns.h b/tools/arch/x86/include/asm/special_insns.h > new file mode 100644 > index 000000000000..04af42a99c38 > --- /dev/null > +++ b/tools/arch/x86/include/asm/special_insns.h > @@ -0,0 +1,27 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _TOOLS_ASM_X86_SPECIAL_INSNS_H > +#define _TOOLS_ASM_X86_SPECIAL_INSNS_H > + > +/* The dst parameter must be 64-bytes aligned */ > +static inline void movdir64b(void *dst, const void *src) > +{ > + const struct { char _[64]; } *__src = src; > + struct { char _[64]; } *__dst = dst; > + > + /* > + * MOVDIR64B %(rdx), rax. > + * > + * Both __src and __dst must be memory constraints in order to tell the > + * compiler that no other memory accesses should be reordered around > + * this one. > + * > + * Also, both must be supplied as lvalues because this tells > + * the compiler what the object is (its size) the instruction accesses. > + * I.e., not the pointers but what they point to, thus the deref'ing '*'. > + */ > + asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" > + : "+m" (*__dst) > + : "m" (*__src), "a" (__dst), "d" (__src)); > +} > + > +#endif /* _TOOLS_ASM_X86_SPECIAL_INSNS_H */ > -- > 2.50.0.rc2.701.gf1e915cc24-goog > -- Vinicius