On Wed, 10 Sept 2025 at 16:51, David Hildenbrand <david@xxxxxxxxxx> wrote: > > On 10.09.25 10:25, Chunyan Zhang wrote: > > Hi David, > > > > On Tue, 9 Sept 2025 at 19:42, David Hildenbrand <david@xxxxxxxxxx> wrote: > >> > >> On 09.09.25 11:56, Chunyan Zhang wrote: > >>> Some platforms can customize the PTE soft dirty bit and make it unavailable > >>> even if the architecture allows providing the PTE resource. > >>> > >>> Add an API which architectures can define their specific implementations > >>> to detect if the PTE soft-dirty bit is available, on which the kernel > >>> is running. > >>> > >>> Signed-off-by: Chunyan Zhang <zhangchunyan@xxxxxxxxxxx> > >>> --- > >>> fs/proc/task_mmu.c | 17 ++++++++++++++++- > >>> include/linux/pgtable.h | 10 ++++++++++ > >>> mm/debug_vm_pgtable.c | 9 +++++---- > >>> mm/huge_memory.c | 10 ++++++---- > >>> mm/internal.h | 2 +- > >>> mm/mremap.c | 10 ++++++---- > >>> mm/userfaultfd.c | 6 ++++-- > >>> 7 files changed, 48 insertions(+), 16 deletions(-) > >>> > >>> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > >>> index 29cca0e6d0ff..20a609ec1ba6 100644 > >>> --- a/fs/proc/task_mmu.c > >>> +++ b/fs/proc/task_mmu.c > >>> @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > >>> * -Werror=unterminated-string-initialization warning > >>> * with GCC 15 > >>> */ > >>> - static const char mnemonics[BITS_PER_LONG][3] = { > >>> + static char mnemonics[BITS_PER_LONG][3] = { > >>> /* > >>> * In case if we meet a flag we don't know about. > >>> */ > >>> @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > >>> [ilog2(VM_SEALED)] = "sl", > >>> #endif > >>> }; > >>> +/* > >>> + * We should remove the VM_SOFTDIRTY flag if the PTE soft-dirty bit is > >>> + * unavailable on which the kernel is running, even if the architecture > >>> + * allows providing the PTE resource and soft-dirty is compiled in. > >>> + */ > >>> +#ifdef CONFIG_MEM_SOFT_DIRTY > >>> + if (!pte_soft_dirty_available()) > >>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; > >>> +#endif > >>> + > >>> size_t i; > >>> > >>> seq_puts(m, "VmFlags: "); > >>> @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, > >>> static inline void clear_soft_dirty(struct vm_area_struct *vma, > >>> unsigned long addr, pte_t *pte) > >>> { > >>> + if (!pte_soft_dirty_available()) > >>> + return; > >>> /* > >>> * The soft-dirty tracker uses #PF-s to catch writes > >>> * to pages, so write-protect the pte as well. See the > >>> @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, > >>> { > >>> pmd_t old, pmd = *pmdp; > >>> > >>> + if (!pte_soft_dirty_available()) > >>> + return; > >>> + > >>> if (pmd_present(pmd)) { > >>> /* See comment in change_huge_pmd() */ > >>> old = pmdp_invalidate(vma, addr, pmdp); > >>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > >>> index 4c035637eeb7..c0e2a6dc69f4 100644 > >>> --- a/include/linux/pgtable.h > >>> +++ b/include/linux/pgtable.h > >>> @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) > >>> #endif > >>> > >>> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY > >>> + > >>> +/* > >>> + * Some platforms can customize the PTE soft dirty bit and make it unavailable > >>> + * even if the architecture allows providing the PTE resource. > >>> + */ > >>> +#ifndef pte_soft_dirty_available > >>> +#define pte_soft_dirty_available() (true) > >>> +#endif > >>> + > >>> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION > >>> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > >>> { > >>> @@ -1555,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) > >>> } > >>> #endif > >>> #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ > >>> +#define pte_soft_dirty_available() (false) > >>> static inline int pte_soft_dirty(pte_t pte) > >>> { > >>> return 0; > >>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > >>> index 830107b6dd08..98ed7e22ccec 100644 > >>> --- a/mm/debug_vm_pgtable.c > >>> +++ b/mm/debug_vm_pgtable.c > >>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) > >>> { > >>> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); > >>> > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > >>> + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available()) > >> > >> I suggest that you instead make pte_soft_dirty_available() be false without CONFIG_MEM_SOFT_DIRTY. > >> > >> e.g., for the default implementation > >> > >> define pte_soft_dirty_available() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) > >> > >> That way you can avoid some ifefs and cleanup these checks. > > > > Do you mean something like this: > > > > --- a/include/linux/pgtable.h > > +++ b/include/linux/pgtable.h > > @@ -1538,6 +1538,16 @@ static inline pgprot_t pgprot_modify(pgprot_t > > oldprot, pgprot_t newprot) > > #endif > > > > #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY > > +#ifndef arch_soft_dirty_available > > +#define arch_soft_dirty_available() (true) > > +#endif > > +#define pgtable_soft_dirty_supported() > > (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && arch_soft_dirty_available()) > > + > > #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION > > static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > > { > > @@ -1555,6 +1565,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) > > } > > #endif > > #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ > > +#define pgtable_soft_dirty_supported() (false) > > Maybe we can simplify to > > #ifndef pgtable_soft_dirty_supported > #define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) > #endif > > And then just let the arch that overrides this function just make it > respect IS_ENABLED(CONFIG_MEM_SOFT_DIRTY). Ok, got you, I will address it. Thanks for your review, Chunyan > > -- > Cheers > > David / dhildenb >