[PATCH 3/6] memcg: remove PCG_MOVE_LOCK flag from page_cgroup.

From: KAMEZAWA Hiroyuki
Date: Wed Feb 01 2012 - 21:49:59 EST


PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting
pc->mem_cgroup and page statistics accounting per memcg.
This lock helps to avoid the race but the race is very rare because moving
tasks between cgroup is not a usual job.
So, it seems using 1bit per page is too costly.

This patch changes this lock as per-memcg spinlock and removes PCG_MOVE_LOCK.

If smaller lock is required, we'll be able to add some hashes but
I'd like to start from this.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
---
include/linux/page_cgroup.h | 19 -------------------
mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++--
2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1060292..7a3af74 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -7,7 +7,6 @@ enum {
PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */
- PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
__NR_PCG_FLAGS,
};
@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
bit_spin_unlock(PCG_LOCK, &pc->flags);
}

-static inline void move_lock_page_cgroup(struct page_cgroup *pc,
- unsigned long *flags)
-{
- /*
- * We know updates to pc->flags of page cache's stats are from both of
- * usual context or IRQ context. Disable IRQ to avoid deadlock.
- */
- local_irq_save(*flags);
- bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
-}
-
-static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
- unsigned long *flags)
-{
- bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
- local_irq_restore(*flags);
-}
-
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ba0d76..083154d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -280,6 +280,8 @@ struct mem_cgroup {
* set > 0 if pages under this cgroup are moving to other cgroup.
*/
atomic_t moving_account;
+ /* taken only while moving_account > 0 */
+ spinlock_t move_lock;
/*
* percpu counter.
*/
@@ -1338,6 +1340,34 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
return false;
}

+/*
+ * Take this lock when
+ * - a code tries to modify page's memcg while it's USED.
+ * - a code tries to modify page state accounting in a memcg.
+ * see mem_cgroup_stealed(), too.
+ */
+static void move_lock_page_cgroup(struct page_cgroup *pc, unsigned long *flags)
+{
+ struct mem_cgroup *memcg;
+
+again:
+ memcg = pc->mem_cgroup;
+ spin_lock_irqsave(&memcg->move_lock, *flags);
+ if (unlikely(pc->mem_cgroup != memcg)) {
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
+ goto again;
+ }
+}
+
+static void move_unlock_page_cgroup(struct page_cgroup *pc,
+ unsigned long *flags)
+{
+ struct mem_cgroup *memcg;
+
+ memcg = pc->mem_cgroup;
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
+}
+
/**
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
* @memcg: The memory cgroup that went over limit
@@ -2435,8 +2465,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

-#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
- (1 << PCG_MIGRATION))
+#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -4923,6 +4952,7 @@ mem_cgroup_create(struct cgroup *cont)
atomic_set(&memcg->refcnt, 1);
memcg->move_charge_at_immigrate = 0;
mutex_init(&memcg->thresholds_lock);
+ spin_lock_init(&memcg->move_lock);
return &memcg->css;
free_out:
__mem_cgroup_free(memcg);
--
1.7.4.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/