Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()

David Hildenbrand <david@xxxxxxxxxx> · Fri, 12 Sep 2025 10:41:10 +0200

[...]

+/*
+ * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
+ * unavailable on which the kernel is running, even if the architecture
+ * provides the resource and soft-dirty is compiled in.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+     if (!pgtable_soft_dirty_supported())
+             mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
+#endif

You can now drop the ifdef.

Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if
MEM_SOFT_DIRTY is compiled in, right?

Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here.

The whole hunk will not be required when we make sure VM_SOFTDIRTY never 
gets set, correct?



But, I wonder if could we instead just stop setting the flag. Then we don't
have to worry about any VM_SOFTDIRTY checks.

Something like the following

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 892fe5dbf9de0..8b8bf63a32ef7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
   static inline void vm_flags_init(struct vm_area_struct *vma,
                                  vm_flags_t flags)
   {
+       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
         ACCESS_PRIVATE(vma, __vm_flags) = flags;
   }

@@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
   static inline void vm_flags_reset_once(struct vm_area_struct *vma,
                                        vm_flags_t flags)
   {
+       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
         vma_assert_write_locked(vma);
         WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
   }
@@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
   static inline void vm_flags_set(struct vm_area_struct *vma,
                                 vm_flags_t flags)
   {
+       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
         vma_start_write(vma);
         ACCESS_PRIVATE(vma, __vm_flags) |= flags;
   }
diff --git a/mm/mmap.c b/mm/mmap.c
index 5fd3b80fda1d5..40cb3fbf9a247 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping(
                 return ERR_PTR(-ENOMEM);

         vma_set_range(vma, addr, addr + len, 0);
-       vm_flags_init(vma, (vm_flags | mm->def_flags |
-                     VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
+       vm_flags |= mm->def_flags | VM_DONTEXPAND;

Why use '|=' rather than not directly setting vm_flags which is an
uninitialized variable?

vm_flags is passed in by the caller?

But just to clarify: this code was just a quick hack, adjust it as you need.

[...]


+     if (!pgtable_soft_dirty_supported())
+             return;
+
       if (pmd_present(pmd)) {
               /* See comment in change_huge_pmd() */
               old = pmdp_invalidate(vma, addr, pmdp);

That would all be handled with the above never-set-VM_SOFTDIRTY.

I meant that there is no need to add the pgtable_soft_dirty_supported() 
check.


Sorry I'm not sure I understand here, you mean no longer need #ifdef
CONFIG_MEM_SOFT_DIRTY for these function definitions, right?

Likely we could drop them. VM_SOFTDIRTY will never be set so the code 
will not be invoked.

And for architectures where VM_SOFTDIRTY is never even possible 
(!CONFIG_MEM_SOFT_DIRTY) we keep it as 0.

That way, the compiler can even optimize out all of that code because

"vma->vm_flags & VM_SOFTDIRTY" -> "vma->vm_flags & 0"

will never be true.



diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..2a3578a4ae4c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
   #define arch_start_context_switch(prev)     do {} while (0)
   #endif

+/*
+ * Some platforms can customize the PTE soft-dirty bit making it unavailable
+ * even if the architecture provides the resource.
+ * Adding this API allows architectures to add their own checks for the
+ * devices on which the kernel is running.
+ * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
+ * is part of this macro.
+ */
+#ifndef pgtable_soft_dirty_supported
+#define pgtable_soft_dirty_supported()       IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
+#endif
+
   #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
   #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
   static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..b32ce2b0b998 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
   {
       pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);

-     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+     if (!pgtable_soft_dirty_supported())
               return;

       pr_debug("Validating PTE soft dirty\n");
@@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
   {
       pte_t pte;

-     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+     if (!pgtable_soft_dirty_supported())
               return;

       pr_debug("Validating PTE swap soft dirty\n");
@@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
   {
       pmd_t pmd;

-     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+     if (!pgtable_soft_dirty_supported())
               return;

       if (!has_transparent_hugepage())
@@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
   {
       pmd_t pmd;

-     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
-             !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
+     if (!pgtable_soft_dirty_supported() ||
+         !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
               return;

       if (!has_transparent_hugepage())
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c38a95e9f09..218d430a2ec6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,

   static pmd_t move_soft_dirty_pmd(pmd_t pmd)
   {
-#ifdef CONFIG_MEM_SOFT_DIRTY
-     if (unlikely(is_pmd_migration_entry(pmd)))
-             pmd = pmd_swp_mksoft_dirty(pmd);
-     else if (pmd_present(pmd))
-             pmd = pmd_mksoft_dirty(pmd);
-#endif
+     if (pgtable_soft_dirty_supported()) {
+             if (unlikely(is_pmd_migration_entry(pmd)))
+                     pmd = pmd_swp_mksoft_dirty(pmd);
+             else if (pmd_present(pmd))
+                     pmd = pmd_mksoft_dirty(pmd);
+     }
+

Wondering, should simply the arch take care of that and we can just clal
pmd_swp_mksoft_dirty / pmd_mksoft_dirty?


I think we have that already in include/linux/pgtable.h:

We have stubs that just don't do anything.

For riscv support you would handle runtime-enablement in these helpers.



       return pmd;
   }

diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc03..c6ca62f8ecf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
        * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
        * will be constantly true.
        */
-     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+     if (!pgtable_soft_dirty_supported())
               return false;


That should be handled with the above never-set-VM_SOFTDIRTY.

We don't need to check if (!pgtable_soft_dirty_supported()) if I
understand correctly.
Hm, let me think about that. No, I think this has to stay as the comment
says, so this case here is special.

--
Cheers

David / dhildenb