[PATCH v2] mm/madvise: add vmstat statistics for madvise_[cold|pageout]

From: Minchan Kim
Date: Tue Jan 24 2023 - 19:55:07 EST


madvise LRU manipulation APIs need to scan address ranges to find
present pages at page table and provides advice hints for them.

Likewise pg[scan/steal] count on vmstat, madvise_pg[scanned/hinted]
shows the proactive reclaim efficiency so this patch adds those
two statistics in vmstat.

madvise_pgscanned, madvise_pghinted

Since proactive reclaim using process_madvise(2) as userland
memory policy is popular(e.g,. Android ActivityManagerService),
those stats are helpful to know how efficiently the policy works
well.

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---

* From v1 - https://lore.kernel.org/linux-mm/20230117231632.2734737-1-minchan@xxxxxxxxxx/
* not relying on the pageout for accounting - mhocko
* drop unnecessary changes - mhocko

include/linux/vm_event_item.h | 2 ++
mm/madvise.c | 8 ++++++++
mm/vmstat.c | 2 ++
3 files changed, 12 insertions(+)

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 7f5d1caf5890..3c117858946d 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -52,6 +52,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
PGSCAN_FILE,
PGSTEAL_ANON,
PGSTEAL_FILE,
+ MADVISE_PGSCANNED,
+ MADVISE_PGHINTED,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
diff --git a/mm/madvise.c b/mm/madvise.c
index 7db6622f8293..d2624e77f729 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -344,6 +344,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
spinlock_t *ptl;
struct folio *folio = NULL;
LIST_HEAD(folio_list);
+ unsigned int nr_scanned = 0;
+ unsigned int nr_hinted = 0;
bool pageout_anon_only_filter;

if (fatal_signal_pending(current))
@@ -357,6 +359,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
pmd_t orig_pmd;
unsigned long next = pmd_addr_end(addr, end);

+ nr_scanned += HPAGE_PMD_NR;
tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
ptl = pmd_trans_huge_lock(pmd, vma);
if (!ptl)
@@ -414,6 +417,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}
} else
folio_deactivate(folio);
+ nr_hinted += HPAGE_PMD_NR;
huge_unlock:
spin_unlock(ptl);
if (pageout)
@@ -431,6 +435,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
arch_enter_lazy_mmu_mode();
for (; addr < end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
+ nr_scanned++;

if (pte_none(ptent))
continue;
@@ -508,6 +513,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}
} else
folio_deactivate(folio);
+ nr_hinted++;
}

arch_leave_lazy_mmu_mode();
@@ -515,6 +521,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (pageout)
reclaim_pages(&folio_list);
cond_resched();
+ count_vm_events(MADVISE_PGSCANNED, nr_scanned);
+ count_vm_events(MADVISE_PGHINTED, nr_hinted);

return 0;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1ea6a5ce1c41..84acc90820e1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1283,6 +1283,8 @@ const char * const vmstat_text[] = {
"pgscan_file",
"pgsteal_anon",
"pgsteal_file",
+ "madvise_pgscanned",
+ "madvise_pghinted",

#ifdef CONFIG_NUMA
"zone_reclaim_failed",
--
2.39.1.405.gd4c25cc71f-goog