On Fri, 16 May 2025 at 10:15, Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> wrote: > > Both Intel and AMD CPUs support 5-level paging, which is expected to > become more widely adopted in the future. > > Remove CONFIG_X86_5LEVEL and ifdeffery for it to make it more readable. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > Suggested-by: Borislav Petkov <bp@xxxxxxxxx> This will conflict with my changes, but I agree with them in principle so Reviewed-by: Ard Biesheuvel <ardb@xxxxxxxxxx> > --- > Documentation/arch/x86/cpuinfo.rst | 8 +++---- > .../arch/x86/x86_64/5level-paging.rst | 9 -------- > arch/x86/Kconfig | 22 +------------------ > arch/x86/Kconfig.cpufeatures | 4 ---- > arch/x86/boot/compressed/pgtable_64.c | 11 ++-------- > arch/x86/boot/header.S | 4 ---- > arch/x86/boot/startup/map_kernel.c | 5 +---- > arch/x86/include/asm/page_64.h | 2 -- > arch/x86/include/asm/page_64_types.h | 7 ------ > arch/x86/include/asm/pgtable_64_types.h | 18 --------------- > arch/x86/kernel/alternative.c | 2 +- > arch/x86/kernel/head64.c | 2 -- > arch/x86/kernel/head_64.S | 2 -- > arch/x86/mm/init.c | 4 ---- > arch/x86/mm/pgtable.c | 2 +- > drivers/firmware/efi/libstub/x86-5lvl.c | 2 +- > 16 files changed, 10 insertions(+), 94 deletions(-) > > diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst > index f80e2a558d2a..dd8b7806944e 100644 > --- a/Documentation/arch/x86/cpuinfo.rst > +++ b/Documentation/arch/x86/cpuinfo.rst > @@ -173,10 +173,10 @@ For example, when an old kernel is running on new hardware. > The kernel disabled support for it at compile-time > -------------------------------------------------- > > -For example, if 5-level-paging is not enabled when building (i.e., > -CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_. > +For example, if Linear Address Masking (LAM) is not enabled when building (i.e., > +CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up. > Even though the feature will still be detected via CPUID, the kernel disables > -it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57). > +it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM). > > The feature is disabled at boot-time > ------------------------------------ > @@ -200,5 +200,3 @@ missing at runtime. For example, AVX flags will not show up if XSAVE feature > is disabled since they depend on XSAVE feature. Another example would be broken > CPUs and them missing microcode patches. Due to that, the kernel decides not to > enable a feature. > - > -.. [#f1] 5-level paging uses linear address of 57 bits. > diff --git a/Documentation/arch/x86/x86_64/5level-paging.rst b/Documentation/arch/x86/x86_64/5level-paging.rst > index 71f882f4a173..ad7ddc13f79d 100644 > --- a/Documentation/arch/x86/x86_64/5level-paging.rst > +++ b/Documentation/arch/x86/x86_64/5level-paging.rst > @@ -22,15 +22,6 @@ QEMU 2.9 and later support 5-level paging. > Virtual memory layout for 5-level paging is described in > Documentation/arch/x86/x86_64/mm.rst > > - > -Enabling 5-level paging > -======================= > -CONFIG_X86_5LEVEL=y enables the feature. > - > -Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware. > -In this case additional page table level -- p4d -- will be folded at > -runtime. > - > User-space and large virtual address space > ========================================== > On x86, 5-level paging enables 56-bit userspace virtual address space. > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 45b36a019b5e..7aed3fa0e780 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -427,8 +427,7 @@ config DYNAMIC_PHYSICAL_MASK > > config PGTABLE_LEVELS > int > - default 5 if X86_5LEVEL > - default 4 if X86_64 > + default 5 if X86_64 > default 3 if X86_PAE > default 2 > > @@ -1464,25 +1463,6 @@ config X86_PAE > has the cost of more pagetable lookup overhead, and also > consumes more pagetable space per process. > > -config X86_5LEVEL > - bool "Enable 5-level page tables support" > - default y > - depends on X86_64 > - help > - 5-level paging enables access to larger address space: > - up to 128 PiB of virtual address space and 4 PiB of > - physical address space. > - > - It will be supported by future Intel CPUs. > - > - A kernel with the option enabled can be booted on machines that > - support 4- or 5-level paging. > - > - See Documentation/arch/x86/x86_64/5level-paging.rst for more > - information. > - > - Say N if unsure. > - > config X86_DIRECT_GBPAGES > def_bool y > depends on X86_64 > diff --git a/arch/x86/Kconfig.cpufeatures b/arch/x86/Kconfig.cpufeatures > index e12d5b7e39a2..250c10627ab3 100644 > --- a/arch/x86/Kconfig.cpufeatures > +++ b/arch/x86/Kconfig.cpufeatures > @@ -132,10 +132,6 @@ config X86_DISABLED_FEATURE_OSPKE > def_bool y > depends on !X86_INTEL_MEMORY_PROTECTION_KEYS > > -config X86_DISABLED_FEATURE_LA57 > - def_bool y > - depends on !X86_5LEVEL > - > config X86_DISABLED_FEATURE_PTI > def_bool y > depends on !MITIGATION_PAGE_TABLE_ISOLATION > diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c > index 5a6c7a190e5b..bdd26050dff7 100644 > --- a/arch/x86/boot/compressed/pgtable_64.c > +++ b/arch/x86/boot/compressed/pgtable_64.c > @@ -10,12 +10,10 @@ > #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ > #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ > > -#ifdef CONFIG_X86_5LEVEL > /* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */ > unsigned int __section(".data") __pgtable_l5_enabled; > unsigned int __section(".data") pgdir_shift = 39; > unsigned int __section(".data") ptrs_per_p4d = 1; > -#endif > > /* Buffer to preserve trampoline memory */ > static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; > @@ -114,18 +112,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) > * Check if LA57 is desired and supported. > * > * There are several parts to the check: > - * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y > * - if user asked to disable 5-level paging: no5lvl in cmdline > * - if the machine supports 5-level paging: > * + CPUID leaf 7 is supported > * + the leaf has the feature bit set > - * > - * That's substitute for boot_cpu_has() in early boot code. > */ > - if (IS_ENABLED(CONFIG_X86_5LEVEL) && > - !cmdline_find_option_bool("no5lvl") && > - native_cpuid_eax(0) >= 7 && > - (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { > + if (!cmdline_find_option_bool("no5lvl") && > + native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & BIT(16))) { > l5_required = true; > > /* Initialize variables for 5-level paging */ > diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S > index 9cb91421b4e4..e30649e44d8f 100644 > --- a/arch/x86/boot/header.S > +++ b/arch/x86/boot/header.S > @@ -361,12 +361,8 @@ xloadflags: > #endif > > #ifdef CONFIG_X86_64 > -#ifdef CONFIG_X86_5LEVEL > #define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED) > #else > -#define XLF56 XLF_5LEVEL > -#endif > -#else > #define XLF56 0 > #endif > > diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c > index 905e8734b5a3..332dbe6688c4 100644 > --- a/arch/x86/boot/startup/map_kernel.c > +++ b/arch/x86/boot/startup/map_kernel.c > @@ -16,9 +16,6 @@ extern unsigned int next_early_pgt; > > static inline bool check_la57_support(void) > { > - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) > - return false; > - > /* > * 5-level paging is detected and enabled at kernel decompression > * stage. Only check if it has been enabled there. > @@ -129,7 +126,7 @@ unsigned long __head __startup_64(unsigned long p2v_offset, > pgd = rip_rel_ptr(early_top_pgt); > pgd[pgd_index(__START_KERNEL_map)] += load_delta; > > - if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { > + if (la57) { > p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); > p4d[MAX_PTRS_PER_P4D - 1] += load_delta; > > diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h > index d3aab6f4e59a..015d23f3e01f 100644 > --- a/arch/x86/include/asm/page_64.h > +++ b/arch/x86/include/asm/page_64.h > @@ -62,7 +62,6 @@ static inline void clear_page(void *page) > void copy_page(void *to, void *from); > KCFI_REFERENCE(copy_page); > > -#ifdef CONFIG_X86_5LEVEL > /* > * User space process size. This is the first address outside the user range. > * There are a few constraints that determine this: > @@ -93,7 +92,6 @@ static __always_inline unsigned long task_size_max(void) > > return ret; > } > -#endif /* CONFIG_X86_5LEVEL */ > > #endif /* !__ASSEMBLER__ */ > > diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h > index 6b8c8169c71d..7400dab373fe 100644 > --- a/arch/x86/include/asm/page_64_types.h > +++ b/arch/x86/include/asm/page_64_types.h > @@ -48,14 +48,7 @@ > /* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */ > > #define __PHYSICAL_MASK_SHIFT 52 > - > -#ifdef CONFIG_X86_5LEVEL > #define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47) > -/* See task_size_max() in <asm/page_64.h> */ > -#else > -#define __VIRTUAL_MASK_SHIFT 47 > -#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) > -#endif > > #define TASK_SIZE_MAX task_size_max() > #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) > diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h > index eee06f77b245..4604f924d8b8 100644 > --- a/arch/x86/include/asm/pgtable_64_types.h > +++ b/arch/x86/include/asm/pgtable_64_types.h > @@ -23,7 +23,6 @@ typedef struct { pmdval_t pmd; } pmd_t; > > extern unsigned int __pgtable_l5_enabled; > > -#ifdef CONFIG_X86_5LEVEL > #ifdef USE_EARLY_PGTABLE_L5 > /* > * cpu_feature_enabled() is not available in early boot code. > @@ -37,17 +36,11 @@ static inline bool pgtable_l5_enabled(void) > #define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) > #endif /* USE_EARLY_PGTABLE_L5 */ > > -#else > -#define pgtable_l5_enabled() 0 > -#endif /* CONFIG_X86_5LEVEL */ > - > extern unsigned int pgdir_shift; > extern unsigned int ptrs_per_p4d; > > #endif /* !__ASSEMBLER__ */ > > -#ifdef CONFIG_X86_5LEVEL > - > /* > * PGDIR_SHIFT determines what a top-level page table entry can map > */ > @@ -65,17 +58,6 @@ extern unsigned int ptrs_per_p4d; > > #define MAX_POSSIBLE_PHYSMEM_BITS 52 > > -#else /* CONFIG_X86_5LEVEL */ > - > -/* > - * PGDIR_SHIFT determines what a top-level page table entry can map > - */ > -#define PGDIR_SHIFT 39 > -#define PTRS_PER_PGD 512 > -#define MAX_PTRS_PER_P4D 1 > - > -#endif /* CONFIG_X86_5LEVEL */ > - > /* > * 3rd level page > */ > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c > index 29572927f9c5..ecfe7b497cad 100644 > --- a/arch/x86/kernel/alternative.c > +++ b/arch/x86/kernel/alternative.c > @@ -596,7 +596,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, > DPRINTK(ALT, "alt table %px, -> %px", start, end); > > /* > - * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using > + * KASAN_SHADOW_START is defined using > * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. > * During the process, KASAN becomes confused seeing partial LA57 > * conversion and triggers a false-positive out-of-bound report. > diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c > index 9f617be64fa9..533fcf5636fc 100644 > --- a/arch/x86/kernel/head64.c > +++ b/arch/x86/kernel/head64.c > @@ -51,13 +51,11 @@ unsigned int __initdata next_early_pgt; > SYM_PIC_ALIAS(next_early_pgt); > pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); > > -#ifdef CONFIG_X86_5LEVEL > unsigned int __pgtable_l5_enabled __ro_after_init; > unsigned int pgdir_shift __ro_after_init = 39; > EXPORT_SYMBOL(pgdir_shift); > unsigned int ptrs_per_p4d __ro_after_init = 1; > EXPORT_SYMBOL(ptrs_per_p4d); > -#endif > > unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; > EXPORT_SYMBOL(page_offset_base); > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S > index 069420853304..3e9b3a3bd039 100644 > --- a/arch/x86/kernel/head_64.S > +++ b/arch/x86/kernel/head_64.S > @@ -649,13 +649,11 @@ SYM_DATA_START_PTI_ALIGNED(init_top_pgt) > SYM_DATA_END(init_top_pgt) > #endif > > -#ifdef CONFIG_X86_5LEVEL > SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) > .fill 511,8,0 > .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC > SYM_DATA_END(level4_kernel_pgt) > SYM_PIC_ALIAS(level4_kernel_pgt) > -#endif > > SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt) > .fill L3_START_KERNEL,8,0 > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index aa56d9ac0b8f..7456df985d96 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -174,11 +174,7 @@ __ref void *alloc_low_pages(unsigned int num) > * randomization is enabled. > */ > > -#ifndef CONFIG_X86_5LEVEL > -#define INIT_PGD_PAGE_TABLES 3 > -#else > #define INIT_PGD_PAGE_TABLES 4 > -#endif > > #ifndef CONFIG_RANDOMIZE_MEMORY > #define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES) > diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c > index 59c42dec7076..62777ba4de1a 100644 > --- a/arch/x86/mm/pgtable.c > +++ b/arch/x86/mm/pgtable.c > @@ -592,7 +592,7 @@ void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, > } > > #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP > -#ifdef CONFIG_X86_5LEVEL > +#if CONFIG_PGTABLE_LEVELS > 4 > /** > * p4d_set_huge - Set up kernel P4D mapping > * @p4d: Pointer to the P4D entry > diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c > index 77359e802181..f1c5fb45d5f7 100644 > --- a/drivers/firmware/efi/libstub/x86-5lvl.c > +++ b/drivers/firmware/efi/libstub/x86-5lvl.c > @@ -62,7 +62,7 @@ efi_status_t efi_setup_5level_paging(void) > > void efi_5level_switch(void) > { > - bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl; > + bool want_la57 = !efi_no5lvl; > bool have_la57 = native_read_cr4() & X86_CR4_LA57; > bool need_toggle = want_la57 ^ have_la57; > u64 *pgt = (void *)la57_toggle + PAGE_SIZE; > -- > 2.47.2 >