On Tue, Jun 24, 2025 at 05:56:07AM +0000, Raghavendra K T wrote:
Since we already have the list of mm_struct in the system, add a module to
scan each mm that walks VMAs of each mm_struct and scan all the pages
associated with that.
In the scan path: Check for the recently acccessed pages (folios) belonging
to slowtier nodes. Add all those folios to a list.
Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx>
---
Hi, just taking a quick look...
mm/kscand.c | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 318 insertions(+), 1 deletion(-)
diff --git a/mm/kscand.c b/mm/kscand.c
index d5b0d3041b0f..0edec1b7730d 100644
--- a/mm/kscand.c
+++ b/mm/kscand.c
@@ -42,6 +55,8 @@ static struct kmem_cache *kscand_slot_cache __read_mostly;
@@ -84,11 +122,275 @@ static void kscand_wait_work(void)
scan_sleep_jiffies);
}
+static inline bool is_valid_folio(struct folio *folio)
+{
+ if (!folio || folio_test_unevictable(folio) || !folio_mapped(folio) ||
+ folio_is_zone_device(folio) || folio_maybe_mapped_shared(folio))
+ return false;
+
+ return true;
+}
What makes it undesirable to migrate shared folios?
+static bool folio_idle_clear_pte_refs_one(struct folio *folio,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t *ptep)
+{
+ bool referenced = false;
+ struct mm_struct *mm = vma->vm_mm;
+ pmd_t *pmd = pmd_off(mm, addr);
+
+ if (ptep) {
+ if (ptep_clear_young_notify(vma, addr, ptep))
+ referenced = true;
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ if (!pmd_present(*pmd))
+ WARN_ON_ONCE(1);
+ if (pmdp_clear_young_notify(vma, addr, pmd))
+ referenced = true;
+ } else {
+ WARN_ON_ONCE(1);
+ }
This does not look good.
I think pmd entry handling should be handled in
mm_walk_ops.pmd_entry callback?
+
+ if (referenced) {
+ folio_clear_idle(folio);
+ folio_set_young(folio);
+ }
+
+ return true;
+}
+
+static void page_idle_clear_pte_refs(struct page *page, pte_t *pte, struct mm_walk *walk)
+{
+ bool need_lock;
+ struct folio *folio = page_folio(page);
+ unsigned long address;
+
+ if (!folio_mapped(folio) || !folio_raw_mapping(folio))
+ return;
+
+ need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
+ if (need_lock && !folio_trylock(folio))
+ return;
Why acquire folio lock here?
And I'm not even sure if it's safe to acquire it?
The locking order is folio_lock -> pte_lock
page walk should have already acquired pte_lock before calling
->pte_entry() callback.
+ address = vma_address(walk->vma, page_pgoff(folio, page), compound_nr(page));
+ VM_BUG_ON_VMA(address == -EFAULT, walk->vma);
+ folio_idle_clear_pte_refs_one(folio, walk->vma, address, pte);
+
+ if (need_lock)
+ folio_unlock(folio);
+}
+
+static const struct mm_walk_ops hot_vma_set_idle_ops = {
+ .pte_entry = hot_vma_idle_pte_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static void kscand_walk_page_vma(struct vm_area_struct *vma, struct kscand_scanctrl *scanctrl)
+{
+ if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
+ is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
+ return;
+ }
+ if (!vma->vm_mm ||
+ (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
+ return;
Why not walk writable file VMAs?
+ if (!vma_is_accessible(vma))
+ return;
+
+ walk_page_vma(vma, &hot_vma_set_idle_ops, scanctrl);
+}