If an error is triggered before or during system suspend, any bus level power state transition will result in unpredictable behaviour by the device with failed recovery. Avoid suspending such a device and leave it in its existing power state. This only covers the devices that rely on PCI core PM for their power state transition. Signed-off-by: Raag Jadav <raag.jadav@xxxxxxxxx> --- v2: Synchronize AER handling with PCI PM (Rafael) More discussion on [1]. [1] https://lore.kernel.org/all/CAJZ5v0g-aJXfVH+Uc=9eRPuW08t-6PwzdyMXsC6FZRKYJtY03Q@xxxxxxxxxxxxxx/ drivers/pci/pci-driver.c | 3 ++- drivers/pci/pcie/aer.c | 11 +++++++++++ include/linux/aer.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f57ea36d125d..289a1fa7cb2d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -884,7 +884,8 @@ static int pci_pm_suspend_noirq(struct device *dev) } } - if (!pci_dev->state_saved) { + /* Avoid suspending the device with errors */ + if (!pci_aer_in_progress(pci_dev) && !pci_dev->state_saved) { pci_save_state(pci_dev); /* diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 508474e17183..b70f5011d4f5 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -233,6 +233,17 @@ int pcie_aer_is_native(struct pci_dev *dev) } EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, "CXL"); +bool pci_aer_in_progress(struct pci_dev *dev) +{ + u16 reg16; + + if (!pcie_aer_is_native(dev)) + return false; + + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, ®16); + return !!(reg16 & PCI_EXP_AER_FLAGS); +} + static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { int rc; diff --git a/include/linux/aer.h b/include/linux/aer.h index 947b63091902..68ae5378256e 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -48,12 +48,14 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +bool pci_aer_in_progress(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline bool pci_aer_in_progress(struct pci_dev *dev) { return false; } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, -- 2.34.1