On Tue, Jun 24, 2025 at 08:58:57AM +0800, Hui Wang wrote: > Sorry for late response, I was OOO the past week. > > This is the log after applied your patch: > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2111521/comments/61 > > Looks like the "retry" makes the nvme work. Thank you! It seems like we get 0xffffffff (probably PCIe error) for a long time after we think the device should be able to respond with RRS. I always thought the spec required that after the delays, a device should respond with RRS if it's not ready, but now I guess I'm not 100% sure. Maybe it's allowed to just do nothing, which would lead to the Root Port timing out and logging an Unsupported Request error. Can I trouble you to try the patch below? I think we might have to start explicitly checking for that error. That probably would require some setup to enable the error, check for it, and clear it. I hacked in some of that here, but ultimately some of it should go elsewhere. diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..c276d0a2b522 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1264,10 +1264,13 @@ void pci_resume_bus(struct pci_bus *bus) static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) { - int delay = 1; + int delay = 10; bool retrain = false; struct pci_dev *root, *bridge; + u16 devctl, devsta; + pci_info(dev, "%s: VF%c %s timeout %d\n", __func__, + dev->is_virtfn ? '+' : '-', reset_type, timeout); root = pcie_find_root_port(dev); if (pci_is_pcie(dev)) { @@ -1276,6 +1279,19 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) retrain = true; } + if (root) { + pcie_capability_read_word(root, PCI_EXP_DEVCTL, &devctl); + if (!(devctl & PCI_EXP_DEVCTL_URRE)) + pcie_capability_write_word(root, PCI_EXP_DEVCTL, + devctl | PCI_EXP_DEVCTL_URRE); + pcie_capability_read_word(root, PCI_EXP_DEVSTA, &devsta); + if (devsta & PCI_EXP_DEVSTA_URD) + pcie_capability_write_word(root, PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_URD); + pci_info(root, "%s: DEVCTL %#06x DEVSTA %#06x\n", __func__, + devctl, devsta); + } + /* * The caller has already waited long enough after a reset that the * device should respond to config requests, but it may respond @@ -1305,14 +1321,33 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) if (root && root->config_rrs_sv) { pci_read_config_dword(dev, PCI_VENDOR_ID, &id); - if (!pci_bus_rrs_vendor_id(id)) - break; + + if (pci_bus_rrs_vendor_id(id)) { + pci_info(dev, "%s: read %#06x (RRS)\n", + __func__, id); + goto retry; + } + + if (PCI_POSSIBLE_ERROR(id)) { + pcie_capability_read_word(root, PCI_EXP_DEVSTA, + &devsta); + if (devsta & PCI_EXP_DEVSTA_URD) + pcie_capability_write_word(root, + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_URD); + pci_info(root, "%s: read %#06x DEVSTA %#06x\n", + __func__, id, devsta); + goto retry; + } + + break; } else { pci_read_config_dword(dev, PCI_COMMAND, &id); if (!PCI_POSSIBLE_ERROR(id)) break; } +retry: if (delay > timeout) { pci_warn(dev, "not ready %dms after %s; giving up\n", delay - 1, reset_type); @@ -1332,7 +1367,6 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) } msleep(delay); - delay *= 2; } if (delay > PCI_RESET_WAIT) @@ -4670,8 +4704,10 @@ static int pcie_wait_for_link_status(struct pci_dev *pdev, end_jiffies = jiffies + msecs_to_jiffies(PCIE_LINK_RETRAIN_TIMEOUT_MS); do { pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); - if ((lnksta & lnksta_mask) == lnksta_match) + if ((lnksta & lnksta_mask) == lnksta_match) { + pci_info(pdev, "%s: LNKSTA %#06x\n", __func__, lnksta); return 0; + } msleep(1); } while (time_before(jiffies, end_jiffies)); @@ -4760,6 +4796,8 @@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, * Some controllers might not implement link active reporting. In this * case, we wait for 1000 ms + any delay requested by the caller. */ + pci_info(pdev, "%s: active %d delay %d link_active_reporting %d\n", + __func__, active, delay, pdev->link_active_reporting); if (!pdev->link_active_reporting) { msleep(PCIE_LINK_RETRAIN_TIMEOUT_MS + delay); return true; @@ -4784,6 +4822,7 @@ static bool pcie_wait_for_link_delay(struct pci_dev *pdev, bool active, return false; msleep(delay); + pci_info(pdev, "%s: waited %dms\n", __func__, delay); return true; } @@ -4960,6 +4999,7 @@ void pci_reset_secondary_bus(struct pci_dev *dev) ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + pci_info(dev, "%s: PCI_BRIDGE_CTL_BUS_RESET deasserted\n", __func__); } void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)