On 2025-07-28 17:11, Jason Gunthorpe wrote: >> If the dma mapping for P2P memory doesn't need to create an iommu >> mapping then that's fine. But it should be the dma-iommu layer to decide >> that. > > So above, we can't use dma-iommu.c, it might not be compiled into the > kernel but the dma_map_phys() path is still valid. This is an easily solved problem. I did a very rough sketch below to say it's really not that hard. (Note it has some rough edges that could be cleaned up and I based it off Leon's git repo which appears to not be the same as what was posted, but the core concept is sound). Logan diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1853a969e197..da1a6003620a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1806,6 +1806,22 @@ bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, } EXPORT_SYMBOL_GPL(dma_iova_try_alloc); +void dma_iova_try_alloc_p2p(struct p2pdma_provider *provider, struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size) +{ + switch (pci_p2pdma_map_type(provider, dev)) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + dma_iova_try_alloc(dev, state, phys, size); + return; + case PCI_P2PDMA_MAP_BUS_ADDR: + state->bus_addr = true; + return; + default: + return; + } +} +EXPORT_SYMBOL_GPL(dma_iova_try_alloc_p2p); + /** * dma_iova_free - Free an IOVA space * @dev: Device to free the IOVA space for diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 455541d21538..5749be3a9b58 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -30,25 +30,12 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf, if (priv->revoked) return -ENODEV; - switch (pci_p2pdma_map_type(priv->vdev->provider, attachment->dev)) { - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - break; - case PCI_P2PDMA_MAP_BUS_ADDR: - /* - * There is no need in IOVA at all for this flow. - * We rely on attachment->priv == NULL as a marker - * for this mode. - */ - return 0; - default: - return -EINVAL; - } - attachment->priv = kzalloc(sizeof(struct dma_iova_state), GFP_KERNEL); if (!attachment->priv) return -ENOMEM; - dma_iova_try_alloc(attachment->dev, attachment->priv, 0, priv->size); + dma_iova_try_alloc_p2p(priv->vdev->provider, attachment->dev, + attachment->priv, 0, priv->size); return 0; } @@ -98,26 +85,11 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment, sgl = sgt->sgl; for (i = 0; i < priv->nr_ranges; i++) { - if (!state) { - addr = pci_p2pdma_bus_addr_map(provider, - phys_vec[i].paddr); - } else if (dma_use_iova(state)) { - ret = dma_iova_link(attachment->dev, state, - phys_vec[i].paddr, 0, - phys_vec[i].len, dir, attrs); - if (ret) - goto err_unmap_dma; - - mapped_len += phys_vec[i].len; - } else { - addr = dma_map_phys(attachment->dev, phys_vec[i].paddr, - phys_vec[i].len, dir, attrs); - ret = dma_mapping_error(attachment->dev, addr); - if (ret) - goto err_unmap_dma; - } + addr = dma_map_phys_prealloc(attachment->dev, phys_vec[i].paddr, + phys_vec[i].len, dir, attrs, state, + provider); - if (!state || !dma_use_iova(state)) { + if (addr != DMA_MAPPING_USE_IOVA) { /* * In IOVA case, there is only one SG entry which spans * for whole IOVA address space. So there is no need @@ -128,7 +100,7 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment, } } - if (state && dma_use_iova(state)) { + if (addr == DMA_MAPPING_USE_IOVA) { WARN_ON_ONCE(mapped_len != priv->size); ret = dma_iova_sync(attachment->dev, state, 0, mapped_len); if (ret) @@ -139,7 +111,7 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment, return sgt; err_unmap_dma: - if (!i || !state) + if (!i || state->bus_addr) ; /* Do nothing */ else if (dma_use_iova(state)) dma_iova_destroy(attachment->dev, state, mapped_len, dir, @@ -164,7 +136,7 @@ static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment, struct scatterlist *sgl; int i; - if (!state) + if (state->bus_addr) ; /* Do nothing */ else if (dma_use_iova(state)) dma_iova_destroy(attachment->dev, state, priv->size, dir, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ba54bbeca861..675e5ac13265 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -70,11 +70,14 @@ */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) +#define DMA_MAPPING_USE_IOVA ((dma_addr_t)-2) + #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) struct dma_iova_state { dma_addr_t addr; u64 __size; + bool bus_addr; }; /* @@ -120,6 +123,12 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); + +struct p2pdma_provider; +dma_addr_t dma_map_phys_prealloc(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs, + struct dma_iova_state *state, struct p2pdma_provider *provider); + void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, @@ -321,6 +330,8 @@ static inline bool dma_use_iova(struct dma_iova_state *state) bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, phys_addr_t phys, size_t size); +void dma_iova_try_alloc_p2p(struct p2pdma_provider *provider, struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size); void dma_iova_free(struct device *dev, struct dma_iova_state *state); void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, size_t mapped_len, enum dma_data_direction dir, @@ -343,6 +354,11 @@ static inline bool dma_iova_try_alloc(struct device *dev, { return false; } +static inline void dma_iova_try_alloc_p2p(struct p2pdma_provider *provider, + struct device *dev, struct dma_iova_state *state, phys_addr_t phys, + size_t size) +{ +} static inline void dma_iova_free(struct device *dev, struct dma_iova_state *state) { diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index e1586eb52ab3..b2110098a29b 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -13,6 +13,7 @@ #include <linux/iommu-dma.h> #include <linux/kmsan.h> #include <linux/of_device.h> +#include <linux/pci-p2pdma.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include "debug.h" @@ -202,6 +203,27 @@ dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, } EXPORT_SYMBOL_GPL(dma_map_phys); +dma_addr_t dma_map_phys_prealloc(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs, + struct dma_iova_state *state, struct p2pdma_provider *provider) +{ + int ret; + + if (state->bus_addr) + return pci_p2pdma_bus_addr_map(provider, phys); + + if (dma_use_iova(state)) { + ret = dma_iova_link(dev, state, phys, 0, size, dir, attrs); + if (ret) + return DMA_MAPPING_ERROR; + + return DMA_MAPPING_USE_IOVA; + } + + return dma_map_phys(dev, phys, size, dir, attrs); +} +EXPORT_SYMBOL_GPL(dma_map_phys_prealloc); + dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs)