PCIe permits a device to ignore ATS invalidation TLPs, while processing a reset. This creates a problem visible to the OS where an ATS invalidation command will time out: e.g. an SVA domain will have no coordination with a reset event and can racily issue ATS invalidations to a resetting device. The PCIe spec in sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block ATS before initiating a Function Level Reset. It also mentions that other reset methods could have the same vulnerability as well. Now iommu_dev_reset_prepare/done() helpers are introduced for this matter. Use them in all the existing reset functions, which will attach the device to an IOMMU_DOMAIN_BLOCKED during a reset, so as to allow IOMMU driver to: - invoke pci_disable_ats() and pci_enable_ats() respectively - wait for all ATS invalidations to complete - stop issuing new ATS invalidations - fence any incoming ATS queries Add a warning if ATS isn't disabled, in which case IOMMU driver should fix itself to disable ATS following the design in iommu_dev_reset_prepare(). Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> --- drivers/pci/pci-acpi.c | 21 ++++++++++- drivers/pci/pci.c | 84 +++++++++++++++++++++++++++++++++++++++--- drivers/pci/quirks.c | 27 +++++++++++++- 3 files changed, 124 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b78e0e417324..727957f193ca 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -9,6 +9,7 @@ #include <linux/delay.h> #include <linux/init.h> +#include <linux/iommu.h> #include <linux/irqdomain.h> #include <linux/pci.h> #include <linux/msi.h> @@ -974,6 +975,7 @@ void pci_set_acpi_fwnode(struct pci_dev *dev) int pci_dev_acpi_reset(struct pci_dev *dev, bool probe) { acpi_handle handle = ACPI_HANDLE(&dev->dev); + int ret = 0; if (!handle || !acpi_has_method(handle, "_RST")) return -ENOTTY; @@ -981,12 +983,27 @@ int pci_dev_acpi_reset(struct pci_dev *dev, bool probe) if (probe) return 0; + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + */ + ret = iommu_dev_reset_prepare(&dev->dev); + if (ret) { + pci_err(dev, "failed to stop IOMMU\n"); + return ret; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) { pci_warn(dev, "ACPI _RST failed\n"); - return -ENOTTY; + ret = -ENOTTY; } - return 0; + iommu_dev_reset_done(&dev->dev); + return ret; } bool acpi_pci_power_manageable(struct pci_dev *dev) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..ddb7a10ef500 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -13,6 +13,7 @@ #include <linux/delay.h> #include <linux/dmi.h> #include <linux/init.h> +#include <linux/iommu.h> #include <linux/msi.h> #include <linux/of.h> #include <linux/pci.h> @@ -4518,13 +4519,30 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction); */ int pcie_flr(struct pci_dev *dev) { + int ret = 0; + if (!pci_wait_for_pending_transaction(dev)) pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n"); + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + * Have to call it after waiting for pending DMA transaction. + */ + ret = iommu_dev_reset_prepare(&dev->dev); + if (ret) { + pci_err(dev, "failed to stop IOMMU\n"); + return ret; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); if (dev->imm_ready) - return 0; + goto done; /* * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within @@ -4533,7 +4551,11 @@ int pcie_flr(struct pci_dev *dev) */ msleep(100); - return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS); + ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS); + +done: + iommu_dev_reset_done(&dev->dev); + return ret; } EXPORT_SYMBOL_GPL(pcie_flr); @@ -4561,6 +4583,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr); static int pci_af_flr(struct pci_dev *dev, bool probe) { + int ret = 0; int pos; u8 cap; @@ -4587,10 +4610,25 @@ static int pci_af_flr(struct pci_dev *dev, bool probe) PCI_AF_STATUS_TP << 8)) pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n"); + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + * Have to call it after waiting for pending DMA transaction. + */ + ret = iommu_dev_reset_prepare(&dev->dev); + if (ret) { + pci_err(dev, "failed to stop IOMMU\n"); + return ret; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); if (dev->imm_ready) - return 0; + goto done; /* * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006, @@ -4600,7 +4638,11 @@ static int pci_af_flr(struct pci_dev *dev, bool probe) */ msleep(100); - return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS); + ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS); + +done: + iommu_dev_reset_done(&dev->dev); + return ret; } /** @@ -4621,6 +4663,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe) static int pci_pm_reset(struct pci_dev *dev, bool probe) { u16 csr; + int ret; if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET) return -ENOTTY; @@ -4635,6 +4678,20 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe) if (dev->current_state != PCI_D0) return -EINVAL; + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + */ + ret = iommu_dev_reset_prepare(&dev->dev); + if (ret) { + pci_err(dev, "failed to stop IOMMU\n"); + return ret; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + csr &= ~PCI_PM_CTRL_STATE_MASK; csr |= PCI_D3hot; pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); @@ -4645,7 +4702,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe) pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr); pci_dev_d3_sleep(dev); - return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS); + ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS); + iommu_dev_reset_done(&dev->dev); + return ret; } /** @@ -5100,6 +5159,20 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) if (rc) return -ENOTTY; + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + */ + rc = iommu_dev_reset_prepare(&dev->dev); + if (rc) { + pci_err(dev, "failed to stop IOMMU\n"); + return rc; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) { val = reg; } else { @@ -5114,6 +5187,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL, reg); + iommu_dev_reset_done(&dev->dev); return rc; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d7f4ee634263..7a66c01392d9 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -21,6 +21,7 @@ #include <linux/pci.h> #include <linux/isa-dma.h> /* isa_dma_bridge_buggy */ #include <linux/init.h> +#include <linux/iommu.h> #include <linux/delay.h> #include <linux/acpi.h> #include <linux/dmi.h> @@ -4223,6 +4224,30 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { 0 } }; +static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe, + const struct pci_dev_reset_methods *i) +{ + int ret; + + /* + * Per PCIe r6.3, sec 10.3.1 IMPLEMENTATION NOTE, software disables ATS + * before initiating a reset. Notify the iommu driver that enabled ATS. + */ + ret = iommu_dev_reset_prepare(&dev->dev); + if (ret) { + pci_err(dev, "failed to stop IOMMU\n"); + return ret; + } + + /* Something wrong with the iommu driver that failed to disable ATS */ + if (dev->ats_enabled) + pci_err(dev, "failed to stop ATS. ATS invalidation may time out\n"); + + ret = i->reset(dev, probe); + iommu_dev_reset_done(&dev->dev); + return ret; +} + /* * These device-specific reset methods are here rather than in a driver * because when a host assigns a device to a guest VM, the host may need @@ -4237,7 +4262,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe) i->vendor == (u16)PCI_ANY_ID) && (i->device == dev->device || i->device == (u16)PCI_ANY_ID)) - return i->reset(dev, probe); + return __pci_dev_specific_reset(dev, probe, i); } return -ENOTTY; -- 2.43.0