[patch 3/3] smaps: add clear_refs file to clear reference
From: David Rientjes
Date:  Wed Feb 07 2007 - 00:28:08 EST
Adds an additional file to /proc/pid: clear_refs.  When any non-zero
number is written to this file, all the PG_referenced flags (meaning the
page has been accessed) are cleared within each VMA for the corresponding
task.
It is now possible to measure how much memory a task is using by clearing
the reference bits with
	echo 1 > /proc/pid/clear_refs
and checking the reference count for each VMA from the /proc/pid/smaps
output at a time interval later.
The /proc/pid/clear_refs file is only writable by the user who owns the
task.
Cc: Hugh Dickins <hugh@xxxxxxxxxxx>
Cc: Paul Mundt <lethal@xxxxxxxxxxxx>
Cc: Christoph Lameter <clameter@xxxxxxx>
Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx>
---
 fs/proc/base.c          |   31 +++++++++++++++++++++++++++++++
 fs/proc/task_mmu.c      |   36 ++++++++++++++++++++++++++++++++++++
 include/linux/proc_fs.h |    1 +
 3 files changed, 68 insertions(+), 0 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea..b50315f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -715,6 +715,35 @@ static struct file_operations proc_oom_adjust_operations = {
 	.write		= oom_adjust_write,
 };
 
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	char buffer[PROC_NUMBUF], *end;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count))
+		return -EFAULT;
+	if (!simple_strtol(buffer, &end, 0))
+		return -EINVAL;
+	if (*end == '\n')
+		end++;
+	task = get_proc_task(file->f_path.dentry->d_inode);
+	if (!task)
+		return -ESRCH;
+	clear_refs_smap(task->mm->mmap);
+	put_task_struct(task);
+	if (end - buffer == 0)
+		return -EIO;
+	return end - buffer;
+}
+
+static struct file_operations proc_clear_refs_operations = {
+	.write		= clear_refs_write,
+};
+
 #ifdef CONFIG_AUDITSYSCALL
 #define TMPBUFLEN 21
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -1856,6 +1885,7 @@ static struct pid_entry tgid_base_stuff[] = {
 	REG("mounts",     S_IRUGO, mounts),
 	REG("mountstats", S_IRUSR, mountstats),
 #ifdef CONFIG_MMU
+	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",      S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
@@ -2137,6 +2167,7 @@ static struct pid_entry tid_base_stuff[] = {
 	LNK("exe",       exe),
 	REG("mounts",    S_IRUGO, mounts),
 #ifdef CONFIG_MMU
+	REG("clear_refs", S_IWUSR, clear_refs),
 	REG("smaps",     S_IRUGO, smaps),
 #endif
 #ifdef CONFIG_SECURITY
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a1f5e87..10714c9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -322,6 +322,26 @@ static void smaps_pte_func(struct pte_walker *walker, pte_t *pte,
 }
 
 /*
+ * Called for each PTE in the struct pte_walker address range.  For all normal,
+ * present pages, we clear their referenced bits.
+ */
+static void clear_refs_pte_func(struct pte_walker *walker, pte_t *pte,
+				unsigned long addr)
+{
+	struct page *page;
+	pte_t ptent;
+
+	ptent = *pte;
+	if (!pte_present(ptent))
+		return;
+
+	page = vm_normal_page(walker->vma, addr, ptent);
+	if (!page)
+		return;
+	ClearPageReferenced(page);
+}
+
+/*
  * Displays the smap for the process.  smaps_pte_func() is called for each PTE
  * in the range from vma->vm_start to vma->vm_end.
  */
@@ -343,6 +363,22 @@ static int show_smap(struct seq_file *m, void *v)
 	return show_map_internal(m, v, &mss);
 }
 
+void clear_refs_smap(struct vm_area_struct *vma)
+{
+	for (; vma; vma = vma->vm_next) {
+		struct pte_walker walker = {
+			.vma		= vma,
+			.start		= vma->vm_start,
+			.end		= vma->vm_end,
+			.private	= NULL,
+			.func		= clear_refs_pte_func,
+		};
+
+		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+			walk_pgds(&walker);
+	};
+}
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
 	struct proc_maps_private *priv = m->private;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 87dec8f..f3d426b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -104,6 +104,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 int task_statm(struct mm_struct *, int *, int *, int *, int *);
 char *task_mem(struct mm_struct *, char *);
+void clear_refs_smap(struct vm_area_struct *);
 
 extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 						struct proc_dir_entry *parent);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/