Add functions to register a ghes entry with HEE, allowing the OS to receive hardware error notifications from firmware through standardized ACPI interfaces. Signed-off-by: Ruidong Tian <tianruidong@xxxxxxxxxxxxxxxxx> --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/fixmap.h | 6 ++++ drivers/acpi/apei/Kconfig | 12 +++++++ drivers/acpi/apei/ghes.c | 58 +++++++++++++++++++++++++++++++++ include/acpi/ghes.h | 6 ++++ 5 files changed, 83 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4b233a0659e..b085e172b355 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -23,6 +23,7 @@ config RISCV select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + select HAVE_ACPI_APEI if (ACPI && EFI) select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 0a55099bb734..07421edc9daa 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -38,6 +38,12 @@ enum fixed_addresses { FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_ACPI_APEI_HEE + /* Used for GHES mapping from assorted contexts */ + FIX_APEI_GHES_IRQ, + FIX_APEI_GHES_HEE, +#endif /* CONFIG_ACPI_APEI_GHES */ + __end_of_permanent_fixed_addresses, /* * Temporary boot-time mappings, used by early_ioremap(), diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 070c07d68dfb..d54a295cfc8d 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -46,6 +46,18 @@ config ACPI_APEI_SEA depends on ARM64 && ACPI_APEI_GHES default y +config ACPI_APEI_HEE + bool "APEI Hardware Error Exception support" + depends on RISCV && ACPI_APEI_GHES + default y + help + Enable support for RISC-V Hardware Error Exception (HEE) notification + in ACPI Platform Error Interface (APEI). This allows firmware + to report hardware errors through RISC-V exception mechanism. + + Say Y if you want to support firmware-first error handling + on RISC-V platforms with ACPI. + config ACPI_APEI_MEMORY_FAILURE bool "APEI memory error recovering support" depends on ACPI_APEI && MEMORY_FAILURE diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index a0d54993edb3..1011e28091dc 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -97,6 +97,11 @@ #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses #endif +#if !defined(CONFIG_X86) && !defined(CONFIG_ARM64) +#define FIX_APEI_GHES_NMI __end_of_fixed_addresses +#define FIX_APEI_GHES_SEA __end_of_fixed_addresses +#endif + static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); static inline bool is_hest_type_generic_v2(struct ghes *ghes) @@ -1415,6 +1420,45 @@ static inline void ghes_sea_add(struct ghes *ghes) { } static inline void ghes_sea_remove(struct ghes *ghes) { } #endif /* CONFIG_ACPI_APEI_SEA */ +#ifdef CONFIG_ACPI_APEI_HEE +static LIST_HEAD(ghes_hee); + +/* + * Return 0 only if one of the HEE error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_hee(void) +{ + static DEFINE_RAW_SPINLOCK(ghes_notify_lock_hee); + int rv; + + raw_spin_lock(&ghes_notify_lock_hee); + rv = ghes_in_nmi_spool_from_list(&ghes_hee, FIX_APEI_GHES_HEE); + raw_spin_unlock(&ghes_notify_lock_hee); + + return rv; +} +EXPORT_SYMBOL_GPL(ghes_notify_hee); + +static void ghes_hee_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_hee); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_hee_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_HEE */ +static inline void ghes_hee_add(struct ghes *ghes) { } +static inline void ghes_hee_remove(struct ghes *ghes) { } +#endif /* CONFIG_ACPI_APEI_HEE */ + #ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * NMI may be triggered on any CPU, so ghes_in_nmi is used for @@ -1558,6 +1602,14 @@ static int ghes_probe(struct platform_device *ghes_dev) goto err; } break; + case ACPI_HEST_NOTIFY_HEE: + if (!IS_ENABLED(CONFIG_ACPI_APEI_HEE)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via HEE is not supported\n", + generic->header.source_id); + rc = -ENOTSUPP; + goto err; + } + break; case ACPI_HEST_NOTIFY_NMI: if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", @@ -1631,6 +1683,9 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_SEA: ghes_sea_add(ghes); break; + case ACPI_HEST_NOTIFY_HEE: + ghes_hee_add(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; @@ -1698,6 +1753,9 @@ static void ghes_remove(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_SEA: ghes_sea_remove(ghes); break; + case ACPI_HEST_NOTIFY_HEE: + ghes_hee_remove(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index ebd21b05fe6e..8046e1b30c21 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -127,6 +127,12 @@ int ghes_notify_sea(void); static inline int ghes_notify_sea(void) { return -ENOENT; } #endif +#ifdef CONFIG_ACPI_APEI_HEE +int ghes_notify_hee(void); +#else +static inline int ghes_notify_hee(void) { return -ENOENT; } +#endif + struct notifier_block; extern void ghes_register_report_chain(struct notifier_block *nb); extern void ghes_unregister_report_chain(struct notifier_block *nb); -- 2.43.7