[PATCH -mm 5/8] memcg: rework non-slab kmem pages charge path

From: Vladimir Davydov
Date: Mon Jul 07 2014 - 08:01:43 EST


Currently we have two functions for that: memcg_kmem_newpage_charge and
memcg_kmem_commit_charge. The former is called before allocating a page
to charge it to the current cgroup, while the latter saves the memcg the
new page was charged to in its page_cgroup.

Actually, there's no need to use page_cgroups for kmem pages, because
such pages are allocated when the user actually would like to kmalloc,
but falls back to alloc_page due to the allocation order is too large,
so the user won't use internal page struct fields and we can safely use
one to save a pointer to the memcg holding the charge instead of using
page_cgorups, just like SL[AU]B does.

This will make the code cleaner and allow us to get rid of
memcg_kmem_commit_charge.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
---
include/linux/memcontrol.h | 79 +++++++++++++++++---------------------------
include/linux/mm_types.h | 6 ++++
mm/memcontrol.c | 70 ++++-----------------------------------
mm/page_alloc.c | 22 ++++++++----
4 files changed, 57 insertions(+), 120 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5b0fbba00b01..33077215b8d4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,11 +440,8 @@ static inline bool memcg_kmem_enabled(void)
* conditions, but because they are pretty simple, they are expected to be
* fast.
*/
-bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
- int order);
-void __memcg_kmem_commit_charge(struct page *page,
- struct mem_cgroup *memcg, int order);
-void __memcg_kmem_uncharge_pages(struct page *page, int order);
+int __memcg_charge_kmem_pages(gfp_t gfp, int order, struct mem_cgroup **memcg);
+void __memcg_uncharge_kmem_pages(struct mem_cgroup *memcg, int order);

int memcg_cache_id(struct mem_cgroup *memcg);

@@ -464,22 +461,26 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
void __memcg_cleanup_cache_params(struct kmem_cache *s);

/**
- * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * memcg_charge_kmem_pages: verify if a kmem page allocation is allowed.
* @gfp: the gfp allocation flags.
- * @memcg: a pointer to the memcg this was charged against.
* @order: allocation order.
+ * @memcg: a pointer to the memcg this was charged against.
*
- * returns true if the memcg where the current task belongs can hold this
- * allocation.
+ * The function tries to charge a kmem page allocation to the memory cgroup
+ * which the current task belongs to. It should be used for accounting non-slab
+ * kmem pages allocations (see alloc_kmem_pages). For slab allocations
+ * memcg_charge_slab is used.
*
- * We return true automatically if this allocation is not to be accounted to
- * any memcg.
+ * Returns 0 on success, -ENOMEM on failure. Note we skip charging and return 0
+ * if this allocation is not to be accounted to any memcg.
*/
-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+static inline int
+memcg_charge_kmem_pages(gfp_t gfp, int order, struct mem_cgroup **memcg)
{
+ *memcg = NULL;
+
if (!memcg_kmem_enabled())
- return true;
+ return 0;

/*
* __GFP_NOFAIL allocations will move on even if charging is not
@@ -489,47 +490,30 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* and won't be worth the trouble.
*/
if (gfp & __GFP_NOFAIL)
- return true;
+ return 0;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
- return true;
+ return 0;

/* If the test is dying, just let it go. */
if (unlikely(fatal_signal_pending(current)))
- return true;
+ return 0;

- return __memcg_kmem_newpage_charge(gfp, memcg, order);
-}
-
-/**
- * memcg_kmem_uncharge_pages: uncharge pages from memcg
- * @page: pointer to struct page being freed
- * @order: allocation order.
- *
- * there is no need to specify memcg here, since it is embedded in page_cgroup
- */
-static inline void
-memcg_kmem_uncharge_pages(struct page *page, int order)
-{
- if (memcg_kmem_enabled())
- __memcg_kmem_uncharge_pages(page, order);
+ return __memcg_charge_kmem_pages(gfp, order, memcg);
}

/**
- * memcg_kmem_commit_charge: embeds correct memcg in a page
- * @page: pointer to struct page recently allocated
- * @memcg: the memcg structure we charged against
+ * memcg_uncharge_kmem_pages: uncharge a kmem page allocation
+ * @memcg: the memcg the allocation is charged to.
* @order: allocation order.
*
- * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
- * failure of the allocation. if @page is NULL, this function will revert the
- * charges. Otherwise, it will commit the memcg given by @memcg to the
- * corresponding page_cgroup.
+ * The function is used to uncharge kmem page allocations charged using
+ * memcg_charge_kmem_pages.
*/
static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+memcg_uncharge_kmem_pages(struct mem_cgroup *memcg, int order)
{
if (memcg_kmem_enabled() && memcg)
- __memcg_kmem_commit_charge(page, memcg, order);
+ __memcg_uncharge_kmem_pages(memcg, order);
}

/**
@@ -562,18 +546,15 @@ static inline bool memcg_kmem_enabled(void)
return false;
}

-static inline bool
-memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
-{
- return true;
-}
-
-static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+static inline int
+memcg_charge_kmem_pages(gfp_t gfp, int order, struct mem_cgroup **memcg)
{
+ *memcg = NULL;
+ return 0;
}

static inline void
-memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+memcg_uncharge_kmem_pages(struct mem_cgroup *memcg, int order)
{
}

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a6236cff3c31..4656c02fcd1d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -23,6 +23,7 @@
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))

struct address_space;
+struct mem_cgroup;

#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \
@@ -165,6 +166,11 @@ struct page {
#endif
#endif
struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
+
+ /* for non-slab kmem pages (see alloc_kmem_pages):
+ * memcg which the page is charged to */
+ struct mem_cgroup *memcg;
+
struct page *first_page; /* Compound tail pages */
};

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4dedb67787c7..4b155ebf1973 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3304,28 +3304,11 @@ out:
return cachep;
}

-/*
- * We need to verify if the allocation against current->mm->owner's memcg is
- * possible for the given order. But the page is not allocated yet, so we'll
- * need a further commit step to do the final arrangements.
- *
- * It is possible for the task to switch cgroups in this mean time, so at
- * commit time, we can't rely on task conversion any longer. We'll then use
- * the handle argument to return to the caller which cgroup we should commit
- * against. We could also return the memcg directly and avoid the pointer
- * passing, but a boolean return value gives better semantics considering
- * the compiled-out case as well.
- *
- * Returning true means the allocation is possible.
- */
-bool
-__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+int __memcg_charge_kmem_pages(gfp_t gfp, int order, struct mem_cgroup **_memcg)
{
struct mem_cgroup *memcg;
int ret;

- *_memcg = NULL;
-
/*
* Disabling accounting is only relevant for some specific memcg
* internal allocations. Therefore we would initially not have such
@@ -3351,14 +3334,13 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
* allocations are extremely rare but can happen, for instance, for the
* cache arrays. We bring this test here.
*/
- if (!current->mm || current->memcg_kmem_skip_account)
- return true;
+ if (current->memcg_kmem_skip_account)
+ return 0;

memcg = get_mem_cgroup_from_mm(current->mm);
-
if (!memcg_can_account_kmem(memcg)) {
css_put(&memcg->css);
- return true;
+ return 0;
}

ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
@@ -3366,51 +3348,11 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
*_memcg = memcg;

css_put(&memcg->css);
- return (ret == 0);
-}
-
-void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
- int order)
-{
- struct page_cgroup *pc;
-
- VM_BUG_ON(mem_cgroup_is_root(memcg));
-
- /* The page allocation failed. Revert */
- if (!page) {
- memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
- return;
- }
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
- pc = lookup_page_cgroup(page);
- pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
+ return ret;
}

-void __memcg_kmem_uncharge_pages(struct page *page, int order)
+void __memcg_uncharge_kmem_pages(struct mem_cgroup *memcg, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
-
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
-
- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
- if (!memcg)
- return;
-
- VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
}
#else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4351dd972803..f4090a582caf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2902,24 +2902,32 @@ EXPORT_SYMBOL(free_pages);
struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
{
struct page *page;
- struct mem_cgroup *memcg = NULL;
+ struct mem_cgroup *memcg;

- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ if (memcg_charge_kmem_pages(gfp_mask, order, &memcg) != 0)
return NULL;
page = alloc_pages(gfp_mask, order);
- memcg_kmem_commit_charge(page, memcg, order);
+ if (!page) {
+ memcg_uncharge_kmem_pages(memcg, order);
+ return NULL;
+ }
+ page->memcg = memcg;
return page;
}

struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
struct page *page;
- struct mem_cgroup *memcg = NULL;
+ struct mem_cgroup *memcg;

- if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ if (memcg_charge_kmem_pages(gfp_mask, order, &memcg) != 0)
return NULL;
page = alloc_pages_node(nid, gfp_mask, order);
- memcg_kmem_commit_charge(page, memcg, order);
+ if (!page) {
+ memcg_uncharge_kmem_pages(memcg, order);
+ return NULL;
+ }
+ page->memcg = memcg;
return page;
}

@@ -2929,7 +2937,7 @@ struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
*/
void __free_kmem_pages(struct page *page, unsigned int order)
{
- memcg_kmem_uncharge_pages(page, order);
+ memcg_uncharge_kmem_pages(page->memcg, order);
__free_pages(page, order);
}

--
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/