Re: [PATCH V8 2/3] riscv: mm: Add soft-dirty page tracking support

From: Palmer Dabbelt
Date: Mon Jun 23 2025 - 19:19:54 EST


On Wed, 18 Jun 2025 23:52:31 PDT (-0700), zhangchunyan@xxxxxxxxxxx wrote:
The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59
for software, this patch uses bit 59 for soft-dirty.

To add swap PTE soft-dirty tracking, we borrow bit 3 which is available
for swap PTEs on RISC-V systems.

Signed-off-by: Chunyan Zhang <zhangchunyan@xxxxxxxxxxx>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/pgtable-bits.h | 19 +++++++
arch/riscv/include/asm/pgtable.h | 71 ++++++++++++++++++++++++++-
3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 01e4c15bee12..5c787c09f4dc 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -141,6 +141,7 @@ config RISCV
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_SOFT_DIRTY if 64BIT && MMU && RISCV_ISA_SVRSW60T59B
select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index a8f5205cea54..a6fa871dc19e 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -20,6 +20,25 @@

#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */
#define _PAGE_DEVMAP (1 << 9) /* RSW, devmap */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+
+/* ext_svrsw60t59b: bit 59 for software dirty tracking */
+#define _PAGE_SOFT_DIRTY \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ (1UL << 59) : 0)

I haven't fully followed the code paths here, but it looks like we'll unconditionally enable VM_SOFTDIRTY and thus allow vma_soft_dirty_enabled() to return true even when we don't have support in hardware.

I'm not exactly sure what could go wrong there, but it seems like at least userspace could be told that soft dirty is around and thus get confused. It also looks like debug_vm_pgtable() will start failing tests on these systems, which will probably trigger some bug reports.

At a bare minimum someone who understands the core MM code should look here. Maybe the best way to do that is to convert those static CONFIG_MEM_SOFT_DIRTY checks to some sort of arch-implemented callback so this can be dynamic, and then see what people say?

+/*
+ * Bit 3 is always zero for swap entry computation, so we
+ * can borrow it for swap page soft-dirty tracking.
+ */
+#define _PAGE_SWP_SOFT_DIRTY \
+ ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \
+ _PAGE_EXEC : 0)
+#else
+#define _PAGE_SOFT_DIRTY 0
+#define _PAGE_SWP_SOFT_DIRTY 0
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
#define _PAGE_TABLE _PAGE_PRESENT

/*
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a11816bbf9e7..efc2da97f124 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -434,7 +434,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte)

static inline pte_t pte_mkdirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY);
+ return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_mkclean(pte_t pte)
@@ -467,6 +467,38 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}

+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY));
+}
+
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
#ifdef CONFIG_RISCV_ISA_SVNAPOT
#define pte_leaf_size(pte) (pte_napot(pte) ? \
napot_cont_size(napot_cont_order(pte)) :\
@@ -819,6 +851,40 @@ static inline pud_t pud_mkspecial(pud_t pud)
}
#endif

+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pmd_soft_dirty(pmd_t pmd)
+{
+ return pte_soft_dirty(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)));
+}
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline bool pmd_swp_soft_dirty(pmd_t pmd)
+{
+ return pte_swp_soft_dirty(pmd_pte(pmd));
+}
+
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)));
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+ return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)));
+}
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
@@ -1005,7 +1071,8 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
- * bit 1 to 3: _PAGE_LEAF (zero)
+ * bit 1 to 2: (zero)
+ * bit 3: _PAGE_SWP_SOFT_DIRTY
* bit 5: _PAGE_PROT_NONE (zero)
* bit 6: exclusive marker
* bits 7 to 11: swap type