[RFC][PATCH] /proc/<pid>/pmaps - memory maps in granularity ofpages

From: Fengguang Wu
Date: Tue Aug 14 2007 - 04:52:44 EST


Hello,

Matt Mackall brings us many memory-footprint-optimization
opportunities with his pagemap/kpagemap patches. However I wonder if
the binary interface can be improved.

This seq_file based implementation iterates in the unit of vmas. So
super large vmas can be a problem. The next step is to to do address
based iteration. That should not be hard.

Andrew, please stop me early if it is at all a wrong interface :)

Thank you,
Fengguang
===

Show a process's memory maps page-by-page in /proc/<pid>/pmaps.
It helps to analyze application's memory footprint in a comprehensive way.

Pages share the same states are grouped into a page range.
For each page range, the following fields are exported:
- first page index
- number of pages in the range
- well known page/pte flags
- number of mmap users

Only page flags not expected to disappear in the near future are exported:

Y:young R:referenced A:active U:uptodate P:ptedirty D:dirty W:writeback

Here is a sample output:

# cat /proc/$$/pmaps
08048000-080c9000 r-xp 08048000 00:00 0
32840 129 Y_A_P__ 1
080c9000-080f6000 rwxp 080c9000 00:00 0 [heap]
32969 45 Y_A_P__ 1
f7dba000-f7dc3000 r-xp 00000000 03:00 176633 /lib/libnss_files-2.3.6.so
0 1 Y_AU___ 1
1 1 YR_U___ 1
5 1 YR_U___ 1
8 1 Y_AU___ 1
f7dc3000-f7dc5000 rwxp 00008000 03:00 176633 /lib/libnss_files-2.3.6.so
8 2 Y_A_P__ 1
f7dc5000-f7dcd000 r-xp 00000000 03:00 176635 /lib/libnss_nis-2.3.6.so
0 1 Y_AU___ 1
1 1 YR_U___ 1
4 1 YR_U___ 1
7 1 Y_AU___ 1
f7dcd000-f7dcf000 rwxp 00007000 03:00 176635 /lib/libnss_nis-2.3.6.so
7 2 Y_A_P__ 1
f7dcf000-f7de1000 r-xp 00000000 03:00 176630 /lib/libnsl-2.3.6.so
0 1 Y_AU___ 1
1 3 YR_U___ 1
16 1 YR_U___ 1
f7de1000-f7de3000 rwxp 00011000 03:00 176630 /lib/libnsl-2.3.6.so
17 2 Y_A_P__ 1
[...]


Matt Mackall's pagemap/kpagemap and John Berthels's exmap can achieve the same goals,
and probably more. But this text based pmaps interface should be more easy to use.

Cc: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Matt Mackall <mpm@xxxxxxxxxxx>
Cc: John Berthels <jjberthels@xxxxxxxxx>
Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx>
Signed-off-by: Fengguang Wu <wfg@xxxxxxxxxxxxxxxx>
---
fs/proc/base.c | 5 +
fs/proc/internal.h | 1
fs/proc/task_mmu.c | 170 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 176 insertions(+)

--- linux-2.6.23-rc2.orig/fs/proc/task_mmu.c
+++ linux-2.6.23-rc2/fs/proc/task_mmu.c
@@ -258,6 +258,126 @@ static void smaps_pte_range(struct vm_ar
cond_resched();
}

+struct pmaps_private {
+ struct vm_area_struct *vma;
+ struct seq_file *m;
+ pte_t ptent;
+ struct page *page;
+ unsigned long offset;
+ unsigned long len;
+};
+
+static unsigned long page_mask;
+#define PG_YOUNG PG_readahead /* reuse any non-relevant flag */
+#define PG_DIRTY PG_lru /* ditto */
+#define NR_FAKED_FLAG 2
+#define PG_COUNT (sizeof(page_flags)/sizeof(page_flags[0]))
+
+/*
+ * Page state names, prefixed by their abbreviations.
+ */
+struct pmaps_page_flag {
+ unsigned long mask;
+ const char *name;
+ bool faked;
+};
+
+static struct pmaps_page_flag page_flags [] = {
+ {1 << PG_YOUNG, "Y:young", 1},
+ {1 << PG_referenced, "R:referenced", 0},
+ {1 << PG_active, "A:active", 0},
+
+ {1 << PG_uptodate, "U:uptodate", 0},
+ {1 << PG_DIRTY, "P:ptedirty", 1},
+ {1 << PG_dirty, "D:dirty", 0},
+ {1 << PG_writeback, "W:writeback", 0},
+};
+
+static unsigned long pmaps_page_flags(pte_t ptent, struct page* page)
+{
+ unsigned long flags;
+
+ flags = page->flags & page_mask;
+
+ if (pte_young(ptent))
+ flags |= (1 << PG_YOUNG);
+
+ if (pte_dirty(ptent))
+ flags |= (1 << PG_DIRTY);
+
+ return flags;
+}
+
+static int pmaps_pages_similiar(pte_t ptent0, struct page* page0,
+ pte_t ptent, struct page* page)
+{
+ if (!page0)
+ return 0;
+
+ if (page_mapcount(page0) != page_mapcount(page))
+ return 0;
+
+ if (pmaps_page_flags(ptent0, page0) != pmaps_page_flags(ptent, page))
+ return 0;
+
+ return 1;
+}
+
+static void pmaps_show_range(struct pmaps_private *pp)
+{
+ int i;
+ unsigned long flags;
+
+ if (!pp->page)
+ return;
+
+ seq_printf(pp->m, "%lu\t%lu\t", pp->offset, pp->len);
+
+ flags = pmaps_page_flags(pp->ptent, pp->page);
+ for (i = 0; i < PG_COUNT; i++)
+ seq_putc(pp->m, (flags & page_flags[i].mask) ?
+ page_flags[i].name[0] : '_');
+
+ seq_printf(pp->m, "\t%d\n", page_mapcount(pp->page));
+}
+
+static void pmaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ void *private)
+{
+ struct pmaps_private *pp = private;
+ pte_t *pte, ptent;
+ spinlock_t *ptl;
+ struct page *page;
+ pgoff_t offset;
+
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ for (; addr != end; pte++, addr += PAGE_SIZE) {
+ ptent = *pte;
+ if (!pte_present(ptent))
+ continue;
+
+ page = vm_normal_page(vma, addr, ptent);
+ if (!page)
+ continue;
+
+ offset = page_index(page);
+ if (offset == pp->offset + pp->len &&
+ pmaps_pages_similiar(pp->ptent, pp->page,
+ ptent, page)) {
+ pp->len++;
+ } else {
+ pmaps_show_range(pp);
+ pp->page = page;
+ pp->ptent = ptent;
+ pp->offset = offset;
+ pp->len = 1;
+ }
+ }
+ pte_unmap_unlock(pte - 1, ptl);
+ cond_resched();
+}
+
static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
void *private)
@@ -359,6 +479,26 @@ static int show_smap(struct seq_file *m,
return show_map_internal(m, v, &mss);
}

+static int show_pmaps(struct seq_file *m, void *v)
+{
+ struct vm_area_struct *vma = v;
+ struct pmaps_private pp;
+
+ if (!vma->vm_mm || is_vm_hugetlb_page(vma))
+ return 0;
+
+ memset(&pp, 0, sizeof pp);
+ pp.m = m;
+ pp.vma = vma;
+
+ spin_lock(&vma->vm_mm->page_table_lock);
+ show_map_internal(m, v, NULL);
+ walk_page_range(vma, pmaps_pte_range, &pp);
+ pmaps_show_range(&pp);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return 0;
+}
+
void clear_refs_smap(struct mm_struct *mm)
{
struct vm_area_struct *vma;
@@ -482,6 +622,13 @@ static struct seq_operations proc_pid_sm
.show = show_smap
};

+static struct seq_operations proc_pid_pmaps_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = show_pmaps
+};
+
static int do_maps_open(struct inode *inode, struct file *file,
struct seq_operations *ops)
{
@@ -558,3 +705,26 @@ const struct file_operations proc_smaps_
.llseek = seq_lseek,
.release = seq_release_private,
};
+
+static int pmaps_open(struct inode *inode, struct file *file)
+{
+ return do_maps_open(inode, file, &proc_pid_pmaps_op);
+}
+
+const struct file_operations proc_pmaps_operations = {
+ .open = pmaps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static __init int task_mmu_init(void)
+{
+ int i;
+ for (page_mask = 0, i = 0; i < PG_COUNT; i++)
+ if (!page_flags[i].faked)
+ page_mask |= page_flags[i].mask;
+ return 0;
+}
+
+module_init(task_mmu_init);
--- linux-2.6.23-rc2.orig/fs/proc/base.c
+++ linux-2.6.23-rc2/fs/proc/base.c
@@ -45,6 +45,9 @@
*
* Paul Mundt <paul.mundt@xxxxxxxxx>:
* Overall revision about smaps.
+ *
+ * Fengguang Wu <wfg@xxxxxxxxxxxxxxxx>:
+ * Initial version of pmaps.
*/

#include <asm/uaccess.h>
@@ -2070,6 +2073,7 @@ static const struct pid_entry tgid_base_
#ifdef CONFIG_MMU
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pmaps", S_IRUGO, pmaps),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
@@ -2356,6 +2360,7 @@ static const struct pid_entry tid_base_s
#ifdef CONFIG_MMU
REG("clear_refs", S_IWUSR, clear_refs),
REG("smaps", S_IRUGO, smaps),
+ REG("pmaps", S_IRUGO, pmaps),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
--- linux-2.6.23-rc2.orig/fs/proc/internal.h
+++ linux-2.6.23-rc2/fs/proc/internal.h
@@ -53,6 +53,7 @@ extern const struct file_operations proc
extern const struct file_operations proc_maps_operations;
extern const struct file_operations proc_numa_maps_operations;
extern const struct file_operations proc_smaps_operations;
+extern const struct file_operations proc_pmaps_operations;


void free_proc_entry(struct proc_dir_entry *de);

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/