On Thu, 11 Sep 2025 11:32:58 -0700 Farhan Ali <alifm@xxxxxxxxxxxxx> wrote: > The current reset process saves the device's config space state before > reset and restores it afterward. However, when a device is in an error > state before reset, config space reads may return error values instead of > valid data. This results in saving corrupted values that get written back > to the device during state restoration. > > Avoid saving the state of the config space when the device is in error. > While restoring we only restorei the state that can be restored through s/restorei/restore/ > kernel data such as BARs or doesn't depend on the saved state. > > Signed-off-by: Farhan Ali <alifm@xxxxxxxxxxxxx> > --- > drivers/pci/pci.c | 29 ++++++++++++++++++++++++++--- > drivers/pci/pcie/aer.c | 5 +++++ > drivers/pci/pcie/dpc.c | 5 +++++ > drivers/pci/pcie/ptm.c | 5 +++++ > drivers/pci/tph.c | 5 +++++ > drivers/pci/vc.c | 5 +++++ > 6 files changed, 51 insertions(+), 3 deletions(-) > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c > index b0f4d98036cd..4b67d22faf0a 100644 > --- a/drivers/pci/pci.c > +++ b/drivers/pci/pci.c > @@ -1720,6 +1720,11 @@ static void pci_restore_pcie_state(struct pci_dev *dev) > struct pci_cap_saved_state *save_state; > u16 *cap; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring pcie state, no saved state"); > + return; > + } > + > /* > * Restore max latencies (in the LTR capability) before enabling > * LTR itself in PCI_EXP_DEVCTL2. > @@ -1775,6 +1780,11 @@ static void pci_restore_pcix_state(struct pci_dev *dev) > struct pci_cap_saved_state *save_state; > u16 *cap; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring pcix state, no saved state"); > + return; > + } > + > save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); > pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); > if (!save_state || !pos) > @@ -1792,6 +1802,14 @@ static void pci_restore_pcix_state(struct pci_dev *dev) > int pci_save_state(struct pci_dev *dev) > { > int i; > + u16 val; > + > + pci_read_config_word(dev, PCI_DEVICE_ID, &val); > + if (PCI_POSSIBLE_ERROR(val)) { > + pci_warn(dev, "Device in error, not saving config space state\n"); > + return -EIO; > + } > + I don't think this works with standard VFs, per the spec the device ID register returns 0xFFFF. Likely need to look for a CRS or error status across both vendor and device ID registers. We could be a little more formal and specific describing the skipped states too, ex. "PCIe capability", "PCI-X capability", "PCI AER capability", etc. Thanks, Alex > /* XXX: 100% dword access ok here? */ > for (i = 0; i < 16; i++) { > pci_read_config_dword(dev, i * 4, &dev->saved_config_space[i]); > @@ -1854,6 +1872,14 @@ static void pci_restore_config_space_range(struct pci_dev *pdev, > > static void pci_restore_config_space(struct pci_dev *pdev) > { > + if (!pdev->state_saved) { > + pci_warn(pdev, "No saved config space, restoring BARs\n"); > + pci_restore_bars(pdev); > + pci_write_config_word(pdev, PCI_COMMAND, > + PCI_COMMAND_MEMORY | PCI_COMMAND_IO); > + return; > + } > + > if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) { > pci_restore_config_space_range(pdev, 10, 15, 0, false); > /* Restore BARs before the command register. */ > @@ -1906,9 +1932,6 @@ static void pci_restore_rebar_state(struct pci_dev *pdev) > */ > void pci_restore_state(struct pci_dev *dev) > { > - if (!dev->state_saved) > - return; > - > pci_restore_pcie_state(dev); > pci_restore_pasid_state(dev); > pci_restore_pri_state(dev); > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c > index e286c197d716..dca3502ef669 100644 > --- a/drivers/pci/pcie/aer.c > +++ b/drivers/pci/pcie/aer.c > @@ -361,6 +361,11 @@ void pci_restore_aer_state(struct pci_dev *dev) > if (!aer) > return; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring aer state, no saved state"); > + return; > + } > + > save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR); > if (!save_state) > return; > diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c > index fc18349614d7..62c520af71a7 100644 > --- a/drivers/pci/pcie/dpc.c > +++ b/drivers/pci/pcie/dpc.c > @@ -67,6 +67,11 @@ void pci_restore_dpc_state(struct pci_dev *dev) > if (!pci_is_pcie(dev)) > return; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring dpc state, no saved state"); > + return; > + } > + > save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); > if (!save_state) > return; > diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c > index 65e4b008be00..7b5bcc23000d 100644 > --- a/drivers/pci/pcie/ptm.c > +++ b/drivers/pci/pcie/ptm.c > @@ -112,6 +112,11 @@ void pci_restore_ptm_state(struct pci_dev *dev) > if (!ptm) > return; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring ptm state, no saved state"); > + return; > + } > + > save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_PTM); > if (!save_state) > return; > diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c > index cc64f93709a4..f0f1bae46736 100644 > --- a/drivers/pci/tph.c > +++ b/drivers/pci/tph.c > @@ -435,6 +435,11 @@ void pci_restore_tph_state(struct pci_dev *pdev) > if (!pdev->tph_enabled) > return; > > + if (!pdev->state_saved) { > + pci_warn(pdev, "Not restoring tph state, no saved state"); > + return; > + } > + > save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_TPH); > if (!save_state) > return; > diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c > index a4ff7f5f66dd..fda435cd49c1 100644 > --- a/drivers/pci/vc.c > +++ b/drivers/pci/vc.c > @@ -391,6 +391,11 @@ void pci_restore_vc_state(struct pci_dev *dev) > { > int i; > > + if (!dev->state_saved) { > + pci_warn(dev, "Not restoring vc state, no saved state"); > + return; > + } > + > for (i = 0; i < ARRAY_SIZE(vc_caps); i++) { > int pos; > struct pci_cap_saved_state *save_state;