[RFC][PATCH 4/8] RSS controller accounting

From: Balbir Singh
Date: Thu Nov 09 2006 - 14:38:32 EST




Account RSS usage of a task and the associated container. The definition
of RSS was debated and discussed in the following thread

http://lkml.org/lkml/2006/10/10/130


The code tracks all resident pages (including shared pages) as RSS. This patch
can easily adapt to the definition of RSS that will be agreed upon. This
implementation provides a proof of concept RSS controller.

The accounting is inspired from Rohit Seth's container patches.

TODO's

1. Merge file_rss and anon_rss tracking with the current rss tracking to
maximize code reuse
2. Add/remove RSS tracking as the definition of RSS evolves


Signed-off-by: Balbir Singh <balbir@xxxxxxxxxx>
---

include/linux/memctlr.h | 26 +++++++++++
include/linux/rmap.h | 2
include/linux/sched.h | 11 +++++
kernel/fork.c | 6 ++
kernel/res_group/memctlr.c | 99 +++++++++++++++++++++++++++++++++++++++++++--
mm/rmap.c | 6 ++
6 files changed, 145 insertions(+), 5 deletions(-)

diff -puN include/linux/sched.h~container-memctlr-acct include/linux/sched.h
--- linux-2.6.19-rc2/include/linux/sched.h~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/include/linux/sched.h 2006-11-09 21:46:22.000000000 +0530
@@ -88,6 +88,10 @@ struct sched_param {
struct exec_domain;
struct futex_pi_state;

+struct memctlr;
+struct container;
+struct mem_counter;
+
/*
* List of flags we want to share for kernel threads,
* if only because they are not used by them anyway.
@@ -355,6 +359,13 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
+#ifdef CONFIG_RES_GROUPS_MEMORY
+ struct container *container;
+ /*
+ * Try and merge anon and file rss accounting
+ */
+ struct mem_counter *counter;
+#endif
};

struct sighand_struct {
diff -puN kernel/res_group/memctlr.c~container-memctlr-acct kernel/res_group/memctlr.c
--- linux-2.6.19-rc2/kernel/res_group/memctlr.c~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/kernel/res_group/memctlr.c 2006-11-09 21:47:06.000000000 +0530
@@ -37,6 +37,8 @@ static struct resource_group *root_rgrou
static const char version[] = "0.01";
static struct memctlr *memctlr_root;

+#define MEMCTLR_MAGIC 0xdededede
+
struct mem_counter {
atomic_long_t rss;
};
@@ -49,6 +51,7 @@ struct memctlr {
/* Statistics */
int successes;
int failures;
+ int magic;
};

struct res_controller memctlr_rg;
@@ -66,12 +69,91 @@ static struct memctlr *get_memctlr(struc
&memctlr_rg));
}

+static void memctlr_init_mem_counter(struct mem_counter *counter)
+{
+ atomic_long_set(&counter->rss, 0);
+}
+
+int mm_init_mem_counter(struct mm_struct *mm)
+{
+ mm->counter = kmalloc(sizeof(struct mem_counter), GFP_KERNEL);
+ if (!mm->counter)
+ return -ENOMEM;
+ memctlr_init_mem_counter(mm->counter);
+ return 0;
+}
+
+void mm_free_mem_counter(struct mm_struct *mm)
+{
+ kfree(mm->counter);
+}
+
+void mm_assign_container(struct mm_struct *mm, struct task_struct *p)
+{
+ rcu_read_lock();
+ mm->container = rcu_dereference(p->container);
+ rcu_read_unlock();
+}
+
+static inline struct memctlr *get_memctlr_from_page(struct page *page)
+{
+ struct resource_group *rgroup;
+ struct memctlr *res;
+
+ /*
+ * Is the resource groups infrastructure initialized?
+ */
+ if (!memctlr_root)
+ return NULL;
+
+ rcu_read_lock();
+ rgroup = (struct resource_group *)rcu_dereference(current->container);
+ rcu_read_unlock();
+
+ res = get_memctlr(rgroup);
+ if (!res)
+ return NULL;
+
+ BUG_ON(res->magic != MEMCTLR_MAGIC);
+ return res;
+}
+
+
+void memctlr_inc_rss(struct page *page)
+{
+ struct memctlr *res;
+
+ res = get_memctlr_from_page(page);
+ if (!res)
+ return;
+
+ atomic_long_inc(&current->mm->counter->rss);
+ atomic_long_inc(&res->counter.rss);
+}
+
+void memctlr_dec_rss(struct page *page)
+{
+ struct memctlr *res;
+
+ res = get_memctlr_from_page(page);
+ if (!res)
+ return;
+
+ atomic_long_dec(&res->counter.rss);
+
+ if ((current->flags & PF_EXITING) && !current->mm)
+ return;
+ atomic_long_dec(&current->mm->counter->rss);
+}
+
static void memctlr_init_new(struct memctlr *res)
{
res->shares.min_shares = SHARE_DONT_CARE;
res->shares.max_shares = SHARE_DONT_CARE;
res->shares.child_shares_divisor = SHARE_DEFAULT_DIVISOR;
res->shares.unused_min_shares = SHARE_DEFAULT_DIVISOR;
+
+ memctlr_init_mem_counter(&res->counter);
}

static struct res_shares *memctlr_alloc_instance(struct resource_group *rgroup)
@@ -83,6 +165,7 @@ static struct res_shares *memctlr_alloc_
return NULL;
res->rgroup = rgroup;
memctlr_init_new(res);
+ res->magic = MEMCTLR_MAGIC;
if (is_res_group_root(rgroup)) {
root_rgroup = rgroup;
memctlr_root = res;
@@ -93,7 +176,7 @@ static struct res_shares *memctlr_alloc_

static void memctlr_free_instance(struct res_shares *shares)
{
- struct memctlr *res, *parres;
+ struct memctlr *res;

res = get_memctlr_from_shares(shares);
BUG_ON(!res);
@@ -108,12 +191,19 @@ static void memctlr_free_instance(struct
static ssize_t memctlr_show_stats(struct res_shares *shares, char *buf,
size_t len)
{
- int i = 0;
+ int i = 0, j = 0;
+ struct memctlr *res;
+
+ res = get_memctlr_from_shares(shares);
+ BUG_ON(!res);

- i += snprintf(buf, len, "Accounting will be added soon\n");
+ i = snprintf(buf, len, "RSS Pages %ld\n",
+ atomic_long_read(&res->counter.rss));
buf += i;
len -= i;
- return i;
+ j += i;
+
+ return j;
}

struct res_controller memctlr_rg = {
@@ -142,5 +232,6 @@ void __exit memctlr_exit(void)
BUG_ON(rc != 0);
}

+
module_init(memctlr_init);
module_exit(memctlr_exit);
diff -puN include/linux/memctlr.h~container-memctlr-acct include/linux/memctlr.h
--- linux-2.6.19-rc2/include/linux/memctlr.h~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/include/linux/memctlr.h 2006-11-09 21:46:22.000000000 +0530
@@ -26,6 +26,32 @@

#ifdef CONFIG_RES_GROUPS_MEMORY
#include <linux/res_group_rc.h>
+
+extern int mm_init_mem_counter(struct mm_struct *mm);
+extern void mm_assign_container(struct mm_struct *mm, struct task_struct *p);
+extern void memctlr_inc_rss(struct page *page);
+extern void memctlr_dec_rss(struct page *page);
+extern void mm_free_mem_counter(struct mm_struct *mm);
+
+#else /* CONFIG_RES_GROUPS_MEMORY */
+
+void memctlr_inc_rss(struct page *page)
+{}
+
+void memctlr_dec_rss(struct page *page)
+{}
+
+int mm_init_mem_counter(struct mm_struct *mm)
+{
+ return 0;
+}
+
+void mm_assign_container(struct mm_struct *mm, struct task_struct *p)
+{}
+
+void mm_free_mem_counter(struct mm_struct *mm)
+{}
+
#endif /* CONFIG_RES_GROUPS_MEMORY */

#endif /* _LINUX_MEMCTRL_H */
diff -puN kernel/fork.c~container-memctlr-acct kernel/fork.c
--- linux-2.6.19-rc2/kernel/fork.c~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/kernel/fork.c 2006-11-09 21:46:22.000000000 +0530
@@ -49,6 +49,7 @@
#include <linux/taskstats_kern.h>
#include <linux/random.h>
#include <linux/numtasks.h>
+#include <linux/memctlr.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -340,11 +341,14 @@ static struct mm_struct * mm_init(struct
mm->ioctx_list = NULL;
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
+ if (mm_init_mem_counter(mm) < 0)
+ goto mem_fail;

if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
return mm;
}
+mem_fail:
free_mm(mm);
return NULL;
}
@@ -372,6 +376,7 @@ struct mm_struct * mm_alloc(void)
void fastcall __mmdrop(struct mm_struct *mm)
{
BUG_ON(mm == &init_mm);
+ mm_free_mem_counter(mm);
mm_free_pgd(mm);
destroy_context(mm);
free_mm(mm);
@@ -544,6 +549,7 @@ static int copy_mm(unsigned long clone_f

good_mm:
tsk->mm = mm;
+ mm_assign_container(mm, tsk);
tsk->active_mm = mm;
return 0;

diff -puN mm/rmap.c~container-memctlr-acct mm/rmap.c
--- linux-2.6.19-rc2/mm/rmap.c~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/mm/rmap.c 2006-11-09 21:46:22.000000000 +0530
@@ -537,6 +537,7 @@ void page_add_anon_rmap(struct page *pag
if (atomic_inc_and_test(&page->_mapcount))
__page_set_anon_rmap(page, vma, address);
/* else checking page index and mapping is racy */
+ memctlr_inc_rss(page);
}

/*
@@ -553,6 +554,7 @@ void page_add_new_anon_rmap(struct page
{
atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
__page_set_anon_rmap(page, vma, address);
+ memctlr_inc_rss(page);
}

/**
@@ -565,6 +567,7 @@ void page_add_file_rmap(struct page *pag
{
if (atomic_inc_and_test(&page->_mapcount))
__inc_zone_page_state(page, NR_FILE_MAPPED);
+ memctlr_inc_rss(page);
}

/**
@@ -596,8 +599,9 @@ void page_remove_rmap(struct page *page)
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
__dec_zone_page_state(page,
- PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
+ PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
+ memctlr_dec_rss(page, mm);
}

/*
diff -puN include/linux/rmap.h~container-memctlr-acct include/linux/rmap.h
--- linux-2.6.19-rc2/include/linux/rmap.h~container-memctlr-acct 2006-11-09 21:46:22.000000000 +0530
+++ linux-2.6.19-rc2-balbir/include/linux/rmap.h 2006-11-09 21:46:22.000000000 +0530
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
+#include <linux/memctlr.h>

/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -84,6 +85,7 @@ void page_remove_rmap(struct page *);
static inline void page_dup_rmap(struct page *page)
{
atomic_inc(&page->_mapcount);
+ memctlr_inc_rss(page);
}

/*
_

--

Balbir Singh,
Linux Technology Center,
IBM Software Labs
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/