[PATCH v4] Add /proc/PID/smaps support for DAX

From: Fan Du
Date: Thu Oct 26 2017 - 01:14:26 EST


Memory behind device DAX is not attached into normal memory
management system, when user mmap /dev/dax, smaps part is
currently missing, so no idea for user to check how much
device DAX memory are actually used in practice.

Whether vma is backed up by normal page,huge page, or both
at the same time, this makes no difference for device DAX
user so far.

Using existing smaps structure is enough to do the job, so
this patch tries to use existing RSS/PSS stuff for statistics.
An example reading is like this:
----------------------------------------------------
7f30fe200000-7f3102200000 rw-s 00000000 00:06 19567 /dev/dax12.0
Size: 65536 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
Rss: 65536 kB
Pss: 65536 kB
Shared_Clean: 0 kB
Shared_Dirty: 0 kB
Private_Clean: 0 kB
Private_Dirty: 65536 kB
Referenced: 65536 kB
Anonymous: 0 kB
LazyFree: 0 kB
AnonHugePages: 0 kB
ShmemPmdMapped: 0 kB
Shared_Hugetlb: 0 kB
Private_Hugetlb: 0 kB
Swap: 0 kB
SwapPss: 0 kB
Locked: 65536 kB
ProtectionKey: 0
VmFlags: rd wr sh mr mw me ms mm hg

Signed-off-by: Fan Du <fan.du@xxxxxxxxx>
---
v4:
* Merge device DAX readings into existing smap counters
for simplicity.

v3:
* Elaborate more about the usage suggested by Michal Hocko

v2:
* Using pte_devmap to check valid pfn page structure,
Pointed out by Dan. thx!
fs/proc/task_mmu.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5589b4b..9b2d3e6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -507,6 +507,55 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
}
}

+/* page structure behind DAX mappings is NOT compound page
+ * when it's a huge page mappings, so introduce new API to
+ * account for both PMD and PUD mapping.
+ */
+static void smaps_account_dax_huge(struct mem_size_stats *mss,
+ struct page *page, unsigned long size, bool young, bool dirty)
+{
+ int mapcount = page_mapcount(page);
+
+ if (PageAnon(page)) {
+ mss->anonymous += size;
+ if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
+ mss->lazyfree += size;
+ }
+
+ mss->resident += size;
+ /* Accumulate the size in pages that have been accessed. */
+ if (young || page_is_young(page) || PageReferenced(page))
+ mss->referenced += size;
+
+ /*
+ * page_count(page) == 1 guarantees the page is mapped exactly once.
+ * If any subpage of the compound page mapped with PTE it would elevate
+ * page_count().
+ */
+ if (page_count(page) == 1) {
+ if (dirty || PageDirty(page))
+ mss->private_dirty += size;
+ else
+ mss->private_clean += size;
+ mss->pss += (u64)size << PSS_SHIFT;
+ return;
+ }
+
+ if (mapcount >= 2) {
+ if (dirty || PageDirty(page))
+ mss->shared_dirty += size;
+ else
+ mss->shared_clean += size;
+ mss->pss += (size << PSS_SHIFT) / mapcount;
+ } else {
+ if (dirty || PageDirty(page))
+ mss->private_dirty += size;
+ else
+ mss->private_clean += size;
+ mss->pss += size << PSS_SHIFT;
+ }
+}
+
#ifdef CONFIG_SHMEM
static int smaps_pte_hole(unsigned long addr, unsigned long end,
struct mm_walk *walk)
@@ -528,7 +577,16 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct page *page = NULL;

if (pte_present(*pte)) {
- page = vm_normal_page(vma, addr, *pte);
+ if (!vma_is_dax(vma))
+ page = vm_normal_page(vma, addr, *pte);
+ else if (pte_devmap(*pte)) {
+ struct dev_pagemap *pgmap;
+
+ pgmap = get_dev_pagemap(pte_pfn(*pte), NULL);
+ if (!pgmap)
+ return;
+ page = pte_page(*pte);
+ }
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);

@@ -579,7 +637,19 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct page *page;

/* FOLL_DUMP will return -EFAULT on huge zero page */
- page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+ if (!vma_is_dax(vma))
+ page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+ else if (pmd_devmap(*pmd)) {
+ struct dev_pagemap *pgmap;
+
+ pgmap = get_dev_pagemap(pmd_pfn(*pmd), NULL);
+ if (!pgmap)
+ return;
+ page = pmd_page(*pmd);
+ smaps_account_dax_huge(mss, page, PMD_SIZE, pmd_young(*pmd),
+ pmd_dirty(*pmd));
+ return;
+ }
if (IS_ERR_OR_NULL(page))
return;
if (PageAnon(page))
--
1.8.3.1