From: Li Zhe <lizhe.67@xxxxxxxxxxxxx> This patch is based on patch 'vfio/type1: optimize vfio_pin_pages_remote() for large folios'[1]. When vfio_unpin_pages_remote() is called with a range of addresses that includes large folios, the function currently performs individual put_pfn() operations for each page. This can lead to significant performance overheads, especially when dealing with large ranges of pages. This patch optimize this process by batching the put_pfn() operations. The performance test results, based on v6.15, for completing the 8G VFIO IOMMU DMA unmapping, obtained through trace-cmd, are as follows. In this case, the 8G virtual address space has been separately mapped to small folio and physical memory using hugetlbfs with pagesize=2M. For large folio, we achieve an approximate 66% performance improvement. However, for small folios, there is an approximate 11% performance degradation. Before this patch: hugetlbfs with pagesize=2M: funcgraph_entry: # 94413.092 us | vfio_unmap_unpin(); small folio: funcgraph_entry: # 118273.331 us | vfio_unmap_unpin(); After this patch: hugetlbfs with pagesize=2M: funcgraph_entry: # 31260.124 us | vfio_unmap_unpin(); small folio: funcgraph_entry: # 131945.796 us | vfio_unmap_unpin(); [1]: https://lore.kernel.org/all/20250529064947.38433-1-lizhe.67@xxxxxxxxxxxxx/ Signed-off-by: Li Zhe <lizhe.67@xxxxxxxxxxxxx> --- drivers/vfio/vfio_iommu_type1.c | 58 ++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 28ee4b8d39ae..9d3ee0f1b298 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -469,17 +469,24 @@ static bool is_invalid_reserved_pfn(unsigned long pfn) return true; } -static int put_pfn(unsigned long pfn, int prot) +/* + * The caller must ensure that these npages PFNs belong to the same folio. + */ +static int put_pfns(unsigned long pfn, int prot, int npages) { if (!is_invalid_reserved_pfn(pfn)) { - struct page *page = pfn_to_page(pfn); - - unpin_user_pages_dirty_lock(&page, 1, prot & IOMMU_WRITE); - return 1; + unpin_user_page_range_dirty_lock(pfn_to_page(pfn), + npages, prot & IOMMU_WRITE); + return npages; } return 0; } +static int put_pfn(unsigned long pfn, int prot) +{ + return put_pfns(pfn, prot, 1); +} + #define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) static void __vfio_batch_init(struct vfio_batch *batch, bool single) @@ -801,19 +808,48 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, return pinned; } +static long get_step(unsigned long pfn, unsigned long npage) +{ + struct folio *folio; + struct page *page; + + if (is_invalid_reserved_pfn(pfn)) + return 1; + + page = pfn_to_page(pfn); + folio = page_folio(page); + + if (!folio_test_large(folio)) + return 1; + + /* + * The precondition for doing this here is that pfn is contiguous + */ + return min_t(long, npage, + folio_nr_pages(folio) - folio_page_idx(folio, page)); +} + static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, unsigned long pfn, unsigned long npage, bool do_accounting) { long unlocked = 0, locked = 0; - long i; - for (i = 0; i < npage; i++, iova += PAGE_SIZE) { - if (put_pfn(pfn++, dma->prot)) { - unlocked++; - if (vfio_find_vpfn(dma, iova)) - locked++; + while (npage) { + long step = get_step(pfn, npage); + + /* + * Although the third parameter of put_pfns() is of type int, + * the value of step here will not exceed the range that int + * can represent. Therefore, it is safe to pass step. + */ + if (put_pfns(pfn, dma->prot, step)) { + unlocked += step; + locked += vpfn_pages(dma, iova, step); } + pfn += step; + iova += PAGE_SIZE * step; + npage -= step; } if (do_accounting) -- 2.20.1