As I did for sha512_blocks(), reorganize x86's sha256_blocks() to be just a static_call. To achieve that, for each assembly function add a C function that handles the kernel-mode FPU section and fallback. While this increases total code size slightly, the amount of code actually executed on a given system does not increase, and it is slightly more efficient since it eliminates the extra static_key. It also makes the assembly functions be called with standard direct calls instead of static calls, eliminating the need for ANNOTATE_NOENDBR. Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> --- lib/crypto/x86/sha256-avx-asm.S | 3 -- lib/crypto/x86/sha256-avx2-asm.S | 3 -- lib/crypto/x86/sha256-ni-asm.S | 2 -- lib/crypto/x86/sha256-ssse3-asm.S | 2 -- lib/crypto/x86/sha256.h | 48 ++++++++++++++++--------------- 5 files changed, 25 insertions(+), 33 deletions(-) diff --git a/lib/crypto/x86/sha256-avx-asm.S b/lib/crypto/x86/sha256-avx-asm.S index 73bcff2b548f4..798a7f07fa013 100644 --- a/lib/crypto/x86/sha256-avx-asm.S +++ b/lib/crypto/x86/sha256-avx-asm.S @@ -46,11 +46,10 @@ ######################################################################## # This code schedules 1 block at a time, with 4 lanes per block ######################################################################## #include <linux/linkage.h> -#include <linux/objtool.h> ## assume buffers not aligned #define VMOVDQ vmovdqu ################################ Define Macros @@ -344,12 +343,10 @@ a = TMP_ ## void sha256_transform_avx(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_avx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256-avx2-asm.S b/lib/crypto/x86/sha256-avx2-asm.S index 45787570387f2..62a46993359e6 100644 --- a/lib/crypto/x86/sha256-avx2-asm.S +++ b/lib/crypto/x86/sha256-avx2-asm.S @@ -47,11 +47,10 @@ ######################################################################## # This code schedules 2 blocks at a time, with 4 lanes per block ######################################################################## #include <linux/linkage.h> -#include <linux/objtool.h> ## assume buffers not aligned #define VMOVDQ vmovdqu ################################ Define Macros @@ -521,12 +520,10 @@ STACK_SIZE = _CTX + _CTX_SIZE ## void sha256_transform_rorx(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_rorx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S index 4af7d22e29e47..9ebbacbb9c13b 100644 --- a/lib/crypto/x86/sha256-ni-asm.S +++ b/lib/crypto/x86/sha256-ni-asm.S @@ -52,11 +52,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include <linux/linkage.h> -#include <linux/objtool.h> #define STATE_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ #define NUM_BLKS %rdx /* 3rd arg */ @@ -109,11 +108,10 @@ * void sha256_ni_transform(struct sha256_block_state *state, * const u8 *data, size_t nblocks); */ .text SYM_FUNC_START(sha256_ni_transform) - ANNOTATE_NOENDBR # since this is called only via static_call shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash add DATA_PTR, NUM_BLKS /* pointer to end of data */ diff --git a/lib/crypto/x86/sha256-ssse3-asm.S b/lib/crypto/x86/sha256-ssse3-asm.S index 407b30adcd37f..3b602b7d43fad 100644 --- a/lib/crypto/x86/sha256-ssse3-asm.S +++ b/lib/crypto/x86/sha256-ssse3-asm.S @@ -351,12 +351,10 @@ a = TMP_ ## void sha256_transform_ssse3(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_ssse3) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 3b5456c222ba6..669bc06538b67 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -6,50 +6,52 @@ */ #include <asm/fpu/api.h> #include <crypto/internal/simd.h> #include <linux/static_call.h> -asmlinkage void sha256_transform_ssse3(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_avx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_rorx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_ni_transform(struct sha256_block_state *state, - const u8 *data, size_t nblocks); +DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86); +#define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ + asmlinkage void asm_fn(struct sha256_block_state *state, \ + const u8 *data, size_t nblocks); \ + static void c_fn(struct sha256_block_state *state, const u8 *data, \ + size_t nblocks) \ + { \ + if (likely(crypto_simd_usable())) { \ + kernel_fpu_begin(); \ + asm_fn(state, data, nblocks); \ + kernel_fpu_end(); \ + } else { \ + sha256_blocks_generic(state, data, nblocks); \ + } \ + } -DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); +DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); +DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) { - kernel_fpu_begin(); - static_call(sha256_blocks_x86)(state, data, nblocks); - kernel_fpu_end(); - } else { - sha256_blocks_generic(state, data, nblocks); - } + static_call(sha256_blocks_x86)(state, data, nblocks); } #define sha256_mod_init_arch sha256_mod_init_arch static inline void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - static_call_update(sha256_blocks_x86, sha256_ni_transform); + static_call_update(sha256_blocks_x86, sha256_blocks_ni); } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) static_call_update(sha256_blocks_x86, - sha256_transform_rorx); + sha256_blocks_avx2); else static_call_update(sha256_blocks_x86, - sha256_transform_avx); - } else if (!boot_cpu_has(X86_FEATURE_SSSE3)) { - return; + sha256_blocks_avx); + } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { + static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); } - static_branch_enable(&have_sha256_x86); } -- 2.50.0