Reorganize the crc-pclmul static_call initialization to place more of the logic in the *_mod_init_arch() functions instead of in the INIT_CRC_PCLMUL macro. This provides the flexibility to do more than a single static_call update for each CPU feature check. Right away, optimize crc64_mod_init_arch() to check the CPU features just once instead of twice, doing both the crc64_lsb and crc64_msb static_call updates together. A later commit will also use this to initialize an additional static_key when crc32_lsb_vpclmul_avx512() is enabled. Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> --- lib/crc/x86/crc-pclmul-template.h | 31 ++++++++++++++----------------- lib/crc/x86/crc-t10dif.h | 9 ++++++++- lib/crc/x86/crc32.h | 10 +++++++++- lib/crc/x86/crc64.h | 15 +++++++++++++-- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/lib/crc/x86/crc-pclmul-template.h b/lib/crc/x86/crc-pclmul-template.h index 51cba520a7dbd..35c950d7010c2 100644 --- a/lib/crc/x86/crc-pclmul-template.h +++ b/lib/crc/x86/crc-pclmul-template.h @@ -23,27 +23,24 @@ crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \ const void *consts_ptr); \ crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \ const void *consts_ptr); \ DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse) -#define INIT_CRC_PCLMUL(prefix) \ -do { \ - if (boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \ - boot_cpu_has(X86_FEATURE_AVX2) && \ - cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \ - if (boot_cpu_has(X86_FEATURE_AVX512BW) && \ - boot_cpu_has(X86_FEATURE_AVX512VL) && \ - !boot_cpu_has(X86_FEATURE_PREFER_YMM) && \ - cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \ - static_call_update(prefix##_pclmul, \ - prefix##_vpclmul_avx512); \ - } else { \ - static_call_update(prefix##_pclmul, \ - prefix##_vpclmul_avx2); \ - } \ - } \ -} while (0) +static inline bool have_vpclmul(void) +{ + return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL); +} + +static inline bool have_avx512(void) +{ + return boot_cpu_has(X86_FEATURE_AVX512BW) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + !boot_cpu_has(X86_FEATURE_PREFER_YMM) && + cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL); +} /* * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16 * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD. * diff --git a/lib/crc/x86/crc-t10dif.h b/lib/crc/x86/crc-t10dif.h index eb1f23db4daa1..2a02a3026f3f8 100644 --- a/lib/crc/x86/crc-t10dif.h +++ b/lib/crc/x86/crc-t10dif.h @@ -21,8 +21,15 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) #define crc_t10dif_mod_init_arch crc_t10dif_mod_init_arch static inline void crc_t10dif_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); - INIT_CRC_PCLMUL(crc16_msb); + if (have_vpclmul()) { + if (have_avx512()) + static_call_update(crc16_msb_pclmul, + crc16_msb_vpclmul_avx512); + else + static_call_update(crc16_msb_pclmul, + crc16_msb_vpclmul_avx2); + } } } diff --git a/lib/crc/x86/crc32.h b/lib/crc/x86/crc32.h index 28451d5769c3a..ba4dacf23340c 100644 --- a/lib/crc/x86/crc32.h +++ b/lib/crc/x86/crc32.h @@ -75,11 +75,19 @@ static inline void crc32_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_XMM4_2)) static_branch_enable(&have_crc32); if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); - INIT_CRC_PCLMUL(crc32_lsb); + if (have_vpclmul()) { + if (have_avx512()) { + static_call_update(crc32_lsb_pclmul, + crc32_lsb_vpclmul_avx512); + } else { + static_call_update(crc32_lsb_pclmul, + crc32_lsb_vpclmul_avx2); + } + } } } static inline u32 crc32_optimizations_arch(void) { diff --git a/lib/crc/x86/crc64.h b/lib/crc/x86/crc64.h index 54aca3a9475c9..aea64409e4193 100644 --- a/lib/crc/x86/crc64.h +++ b/lib/crc/x86/crc64.h @@ -29,9 +29,20 @@ static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) #define crc64_mod_init_arch crc64_mod_init_arch static inline void crc64_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); - INIT_CRC_PCLMUL(crc64_msb); - INIT_CRC_PCLMUL(crc64_lsb); + if (have_vpclmul()) { + if (have_avx512()) { + static_call_update(crc64_lsb_pclmul, + crc64_lsb_vpclmul_avx512); + static_call_update(crc64_msb_pclmul, + crc64_msb_vpclmul_avx512); + } else { + static_call_update(crc64_lsb_pclmul, + crc64_lsb_vpclmul_avx2); + static_call_update(crc64_msb_pclmul, + crc64_msb_vpclmul_avx2); + } + } } } base-commit: 9b0236f4efb889869f7d4f3f084f508cc0433ec9 -- 2.50.1