[PATCH 4/9] memcg: dirty page accounting support routines

From: Fengguang Wu
Date: Tue Feb 28 2012 - 09:56:20 EST


From: Greg Thelen <gthelen@xxxxxxxxxx>

Added memcg dirty page accounting support routines. These routines are
used by later changes to provide memcg aware writeback and dirty page
limiting. A mem_cgroup_dirty_info() tracepoint is is also included to
allow for easier understanding of memcg writeback operation.

Signed-off-by: Greg Thelen <gthelen@xxxxxxxxxx>
Signed-off-by: Fengguang Wu <fengguang.wu@xxxxxxxxx>
---
Changelog since v8:
- Use 'memcg' rather than 'mem' for local variables and parameters.
This is consistent with other memory controller code.

include/linux/memcontrol.h | 5 +
mm/memcontrol.c | 112 +++++++++++++++++++++++++++++++++++
2 files changed, 117 insertions(+)

--- linux.orig/include/linux/memcontrol.h 2012-02-25 20:48:34.337580646 +0800
+++ linux/include/linux/memcontrol.h 2012-02-25 20:48:34.361580646 +0800
@@ -36,8 +36,13 @@ enum mem_cgroup_page_stat_item {
MEMCG_NR_FILE_DIRTY, /* # of dirty pages in page cache */
MEMCG_NR_FILE_WRITEBACK, /* # of pages under writeback */
MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+ MEMCG_NR_DIRTYABLE_PAGES, /* # of pages that could be dirty */
};

+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_page_stat_item item);
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg);
+
struct mem_cgroup_reclaim_cookie {
struct zone *zone;
int priority;
--- linux.orig/mm/memcontrol.c 2012-02-25 20:48:34.337580646 +0800
+++ linux/mm/memcontrol.c 2012-02-25 21:09:54.073554384 +0800
@@ -1255,6 +1255,118 @@ int mem_cgroup_swappiness(struct mem_cgr
return memcg->swappiness;
}

+static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg)
+{
+ if (nr_swap_pages == 0)
+ return false;
+ if (!do_swap_account)
+ return true;
+ if (memcg->memsw_is_minimum)
+ return false;
+ if (res_counter_margin(&memcg->memsw) == 0)
+ return false;
+ return true;
+}
+
+static s64 mem_cgroup_local_page_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_page_stat_item item)
+{
+ s64 ret;
+
+ switch (item) {
+ case MEMCG_NR_FILE_DIRTY:
+ ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_DIRTY);
+ break;
+ case MEMCG_NR_FILE_WRITEBACK:
+ ret = mem_cgroup_read_stat(memcg,
+ MEM_CGROUP_STAT_FILE_WRITEBACK);
+ break;
+ case MEMCG_NR_FILE_UNSTABLE_NFS:
+ ret = mem_cgroup_read_stat(memcg,
+ MEM_CGROUP_STAT_FILE_UNSTABLE_NFS);
+ break;
+ case MEMCG_NR_DIRTYABLE_PAGES:
+ ret = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)) +
+ mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
+ if (mem_cgroup_can_swap(memcg))
+ ret += mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)) +
+ mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
+ break;
+ default:
+ BUG();
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Return the number of additional pages that the @memcg cgroup could allocate.
+ * If use_hierarchy is set, then this involves checking parent mem cgroups to
+ * find the cgroup with the smallest free space.
+ */
+static unsigned long
+mem_cgroup_hierarchical_free_pages(struct mem_cgroup *memcg)
+{
+ u64 free;
+ unsigned long min_free;
+
+ min_free = global_page_state(NR_FREE_PAGES);
+
+ while (memcg) {
+ free = mem_cgroup_margin(memcg);
+ min_free = min_t(u64, min_free, free);
+ memcg = parent_mem_cgroup(memcg);
+ }
+
+ return min_free;
+}
+
+/*
+ * mem_cgroup_page_stat() - get memory cgroup file cache statistics
+ * @memcg: memory cgroup to query
+ * @item: memory statistic item exported to the kernel
+ *
+ * Return the accounted statistic value.
+ */
+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+ enum mem_cgroup_page_stat_item item)
+{
+ struct mem_cgroup *iter;
+ s64 value;
+
+ /*
+ * If we're looking for dirtyable pages we need to evaluate free pages
+ * depending on the limit and usage of the parents first of all.
+ */
+ if (item == MEMCG_NR_DIRTYABLE_PAGES)
+ value = mem_cgroup_hierarchical_free_pages(memcg);
+ else
+ value = 0;
+
+ /*
+ * Recursively evaluate page statistics against all cgroup under
+ * hierarchy tree
+ */
+ for_each_mem_cgroup_tree(iter, memcg)
+ value += mem_cgroup_local_page_stat(iter, item);
+
+ /*
+ * Summing of unlocked per-cpu counters is racy and may yield a slightly
+ * negative value. Zero is the only sensible value in such cases.
+ */
+ if (unlikely(value < 0))
+ value = 0;
+
+ return value;
+}
+
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg)
+{
+ return mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_DIRTY) +
+ mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_WRITEBACK) +
+ mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_UNSTABLE_NFS);
+}
+
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
int cpu;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/