[PATCH 07/11] memcg: redefine callback functions for page table walker

From: Naoya Horiguchi
Date: Wed Oct 30 2013 - 17:47:38 EST


Move code around pte loop in mem_cgroup_count_precharge_pte_range() into
mem_cgroup_count_precharge_pte() connected to pte_entry().

We don't change the callback mem_cgroup_move_charge_pte_range() for now,
because we can't do the same replacement easily due to 'goto retry'.

ChangeLog v2:
- rebase onto mmots

Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
---
mm/memcontrol.c | 71 ++++++++++++++++++++++-----------------------------------
1 file changed, 27 insertions(+), 44 deletions(-)

diff --git v3.12-rc7-mmots-2013-10-29-16-24.orig/mm/memcontrol.c v3.12-rc7-mmots-2013-10-29-16-24/mm/memcontrol.c
index e375bcb..60c9826 100644
--- v3.12-rc7-mmots-2013-10-29-16-24.orig/mm/memcontrol.c
+++ v3.12-rc7-mmots-2013-10-29-16-24/mm/memcontrol.c
@@ -6601,30 +6601,29 @@ static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
}
#endif

-static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
+static int mem_cgroup_count_precharge_pte(pte_t *pte,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
- pte_t *pte;
+ if (get_mctgt_type(walk->vma, addr, *pte, NULL))
+ mc.precharge++; /* increment precharge temporarily */
+ return 0;
+}
+
+static int mem_cgroup_count_precharge_pmd(pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;

if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
- return 0;
+ /* don't call mem_cgroup_count_precharge_pte() */
+ walk->skip = 1;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE)
- if (get_mctgt_type(vma, addr, *pte, NULL))
- mc.precharge++; /* increment precharge temporarily */
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
-
return 0;
}

@@ -6633,18 +6632,14 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
unsigned long precharge;
struct vm_area_struct *vma;

+ struct mm_walk mem_cgroup_count_precharge_walk = {
+ .pmd_entry = mem_cgroup_count_precharge_pmd,
+ .pte_entry = mem_cgroup_count_precharge_pte,
+ .mm = mm,
+ };
down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- struct mm_walk mem_cgroup_count_precharge_walk = {
- .pmd_entry = mem_cgroup_count_precharge_pte_range,
- .mm = mm,
- .private = vma,
- };
- if (is_vm_hugetlb_page(vma))
- continue;
- walk_page_range(vma->vm_start, vma->vm_end,
- &mem_cgroup_count_precharge_walk);
- }
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ walk_page_vma(vma, &mem_cgroup_count_precharge_walk);
up_read(&mm->mmap_sem);

precharge = mc.precharge;
@@ -6783,7 +6778,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
struct mm_walk *walk)
{
int ret = 0;
- struct vm_area_struct *vma = walk->private;
+ struct vm_area_struct *vma = walk->vma;
pte_t *pte;
spinlock_t *ptl;
enum mc_target_type target_type;
@@ -6884,6 +6879,10 @@ put: /* get_mctgt_type() gets the page */
static void mem_cgroup_move_charge(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ struct mm_walk mem_cgroup_move_charge_walk = {
+ .pmd_entry = mem_cgroup_move_charge_pte_range,
+ .mm = mm,
+ };

lru_add_drain_all();
retry:
@@ -6899,24 +6898,8 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
cond_resched();
goto retry;
}
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- int ret;
- struct mm_walk mem_cgroup_move_charge_walk = {
- .pmd_entry = mem_cgroup_move_charge_pte_range,
- .mm = mm,
- .private = vma,
- };
- if (is_vm_hugetlb_page(vma))
- continue;
- ret = walk_page_range(vma->vm_start, vma->vm_end,
- &mem_cgroup_move_charge_walk);
- if (ret)
- /*
- * means we have consumed all precharges and failed in
- * doing additional charge. Just abandon here.
- */
- break;
- }
+ for (vma = mm->mmap; vma; vma = vma->vm_next)
+ walk_page_vma(vma, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem);
}

--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/