Hi, On 4/23/25 11:13, Leon Romanovsky wrote: > From: Leon Romanovsky <leonro@xxxxxxxxxx> > > Introduce new sticky flag (HMM_PFN_DMA_MAPPED), which isn't overwritten > by HMM range fault. Such flag allows users to tag specific PFNs with > information if this specific PFN was already DMA mapped. > > Tested-by: Jens Axboe <axboe@xxxxxxxxx> > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> > --- > include/linux/hmm.h | 17 +++++++++++++++ > mm/hmm.c | 51 ++++++++++++++++++++++++++++----------------- > 2 files changed, 49 insertions(+), 19 deletions(-) > > diff --git a/include/linux/hmm.h b/include/linux/hmm.h > index 126a36571667..a1ddbedc19c0 100644 > --- a/include/linux/hmm.h > +++ b/include/linux/hmm.h > @@ -23,6 +23,8 @@ struct mmu_interval_notifier; > * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) > * HMM_PFN_ERROR - accessing the pfn is impossible and the device should > * fail. ie poisoned memory, special pages, no vma, etc > + * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation > + * to mark that page is already DMA mapped > * > * On input: > * 0 - Return the current state of the page, do not fault it. > @@ -36,6 +38,13 @@ enum hmm_pfn_flags { > HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), > HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), > HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), > + > + /* > + * Sticky flags, carried from input to output, > + * don't forget to update HMM_PFN_INOUT_FLAGS > + */ > + HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 7), > + How is this playing together with the mapped order usage? > HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), > > /* Input flags */ > @@ -57,6 +66,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) > return pfn_to_page(hmm_pfn & ~HMM_PFN_FLAGS); > } > > +/* > + * hmm_pfn_to_phys() - return physical address pointed to by a device entry > + */ > +static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn) > +{ > + return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS); > +} > + > /* > * hmm_pfn_to_map_order() - return the CPU mapping size order > * > diff --git a/mm/hmm.c b/mm/hmm.c > index 082f7b7c0b9e..51fe8b011cc7 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -39,13 +39,20 @@ enum { > HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT, > }; > > +enum { > + /* These flags are carried from input-to-output */ > + HMM_PFN_INOUT_FLAGS = HMM_PFN_DMA_MAPPED, > +}; > + > static int hmm_pfns_fill(unsigned long addr, unsigned long end, > struct hmm_range *range, unsigned long cpu_flags) > { > unsigned long i = (addr - range->start) >> PAGE_SHIFT; > > - for (; addr < end; addr += PAGE_SIZE, i++) > - range->hmm_pfns[i] = cpu_flags; > + for (; addr < end; addr += PAGE_SIZE, i++) { > + range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS; > + range->hmm_pfns[i] |= cpu_flags; > + } > return 0; > } > > @@ -202,8 +209,10 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, > return hmm_vma_fault(addr, end, required_fault, walk); > > pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > - for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) > - hmm_pfns[i] = pfn | cpu_flags; > + for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { > + hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS; > + hmm_pfns[i] |= pfn | cpu_flags; > + } > return 0; > } > #else /* CONFIG_TRANSPARENT_HUGEPAGE */ > @@ -230,14 +239,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > unsigned long cpu_flags; > pte_t pte = ptep_get(ptep); > uint64_t pfn_req_flags = *hmm_pfn; > + uint64_t new_pfn_flags = 0; > > if (pte_none_mostly(pte)) { > required_fault = > hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); > if (required_fault) > goto fault; > - *hmm_pfn = 0; > - return 0; > + goto out; > } > > if (!pte_present(pte)) { > @@ -253,16 +262,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > cpu_flags = HMM_PFN_VALID; > if (is_writable_device_private_entry(entry)) > cpu_flags |= HMM_PFN_WRITE; > - *hmm_pfn = swp_offset_pfn(entry) | cpu_flags; > - return 0; > + new_pfn_flags = swp_offset_pfn(entry) | cpu_flags; > + goto out; > } > > required_fault = > hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); > - if (!required_fault) { > - *hmm_pfn = 0; > - return 0; > - } > + if (!required_fault) > + goto out; > > if (!non_swap_entry(entry)) > goto fault; > @@ -304,11 +311,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > pte_unmap(ptep); > return -EFAULT; > } > - *hmm_pfn = HMM_PFN_ERROR; > - return 0; > + new_pfn_flags = HMM_PFN_ERROR; > + goto out; > } > > - *hmm_pfn = pte_pfn(pte) | cpu_flags; > + new_pfn_flags = pte_pfn(pte) | cpu_flags; > +out: > + *hmm_pfn = (*hmm_pfn & HMM_PFN_INOUT_FLAGS) | new_pfn_flags; > return 0; > > fault: > @@ -448,8 +457,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, > } > > pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); > - for (i = 0; i < npages; ++i, ++pfn) > - hmm_pfns[i] = pfn | cpu_flags; > + for (i = 0; i < npages; ++i, ++pfn) { > + hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS; > + hmm_pfns[i] |= pfn | cpu_flags; > + } > goto out_unlock; > } > > @@ -507,8 +518,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, > } > > pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); > - for (; addr < end; addr += PAGE_SIZE, i++, pfn++) > - range->hmm_pfns[i] = pfn | cpu_flags; > + for (; addr < end; addr += PAGE_SIZE, i++, pfn++) { > + range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS; > + range->hmm_pfns[i] |= pfn | cpu_flags; > + } > > spin_unlock(ptl); > return 0;