Re: [PATCH v2] mm: do not update memcg stats for NR_{FILE/SHMEM}_PMDMAPPED

From: Yosry Ahmed
Date: Mon May 06 2024 - 16:18:21 EST


On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote:
> On 06.05.24 21:29, Yosry Ahmed wrote:
> > Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
> > although some of those fields are not exposed anywhere. Commit
> > 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
> > changed this such that we only maintain the stats we actually expose
> > per-memcg via a translation table.
> >
> > Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
> > and stats") added a warning if a per-memcg stat update is attempted for
> > a stat that is not in the translation table. The warning started firing
> > for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
> > stats are not maintained per-memcg, and hence are not in the translation
> > table.
> >
> > Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
> > NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
> > the global per-node stats only.
> >
> > Reported-by: syzbot+9319a4268a640e26b72b@xxxxxxxxxxxxxxxxxxxxxxxxx
> > Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@xxxxxxxxxx
> > Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
> > Acked-by: Shakeel Butt <shakeel.butt@xxxxxxxxx>
> > Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
> > ---
> > mm/rmap.c | 15 +++++++++------
> > 1 file changed, 9 insertions(+), 6 deletions(-)
> >
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 12be4241474ab..ed7f820369864 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
> > struct page *page, int nr_pages, struct vm_area_struct *vma,
> > enum rmap_level level)
> > {
> > + pg_data_t *pgdat = folio_pgdat(folio);
> > int nr, nr_pmdmapped = 0;
> > VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
> > nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
> > if (nr_pmdmapped)
> > - __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> > + __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
> > NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> > if (nr)
> > __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
> > @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> > enum rmap_level level)
> > {
> > atomic_t *mapped = &folio->_nr_pages_mapped;
> > + pg_data_t *pgdat = folio_pgdat(folio);
> > int last, nr = 0, nr_pmdmapped = 0;
> > bool partially_mapped = false;
> > enum node_stat_item idx;
> > @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> > }
> > if (nr_pmdmapped) {
> > + /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
> > if (folio_test_anon(folio))
> > - idx = NR_ANON_THPS;
> > - else if (folio_test_swapbacked(folio))
> > - idx = NR_SHMEM_PMDMAPPED;
> > + __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
> > else
> > - idx = NR_FILE_PMDMAPPED;
> > - __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
> > + __mod_node_page_state(pgdat,
>
> folio_pgdat(folio) should fit here easily. :)
>
> But I would actually suggest something like the following in mm/rmap.c

I am not a big fan of this. Not because I don't like the abstraction,
but because I think it doesn't go all the way. It abstracts a very
certain case: updating nr_pmdmapped for file folios.

I think if we opt for abstracting the stats updates in mm/rmap.c, we
should go all the way with something like the following (probably split
as two patches: refactoring then bug fix). WDYT about the below?

diff --git a/mm/rmap.c b/mm/rmap.c
index 12be4241474ab..70d6f6309da01 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
page);
}

+static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
+{
+ int idx;
+
+ if (nr) {
+ idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
+ __lruvec_stat_mod_folio(folio, idx, nr);
+ }
+ if (nr_pmdmapped) {
+ if (folio_test_anon(folio)) {
+ idx = NR_ANON_THPS;
+ __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
+ } else {
+ /* NR_*_PMDMAPPED are not maintained per-memcg */
+ idx = folio_test_swapbacked(folio) ?
+ NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
+ __mod_node_page_state(folio_pgdat(folio), idx,
+ nr_pmdmapped);
+ }
+ }
+}
+
static __always_inline void __folio_add_anon_rmap(struct folio *folio,
struct page *page, int nr_pages, struct vm_area_struct *vma,
unsigned long address, rmap_t flags, enum rmap_level level)
@@ -1276,10 +1298,6 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
int i, nr, nr_pmdmapped = 0;

nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
- if (nr_pmdmapped)
- __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
- if (nr)
- __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);

if (unlikely(!folio_test_anon(folio))) {
VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -1297,6 +1315,8 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
__page_check_anon_rmap(folio, page, vma, address);
}

+ __folio_mod_stat(folio, nr, nr_pmdmapped);
+
if (flags & RMAP_EXCLUSIVE) {
switch (level) {
case RMAP_LEVEL_PTE:
@@ -1393,6 +1413,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
unsigned long address)
{
int nr = folio_nr_pages(folio);
+ int nr_pmdmapped = 0;

VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
VM_BUG_ON_VMA(address < vma->vm_start ||
@@ -1425,10 +1446,10 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
SetPageAnonExclusive(&folio->page);
- __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
+ nr_pmdmapped = nr;
}

- __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
+ __folio_mod_stat(folio, nr, nr_pmdmapped);
}

static __always_inline void __folio_add_file_rmap(struct folio *folio,
@@ -1440,11 +1461,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);

nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
- if (nr_pmdmapped)
- __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
- NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
- if (nr)
- __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
+ __folio_mod_stat(folio, nr, nr_pmdmapped);

/* See comments in folio_add_anon_rmap_*() */
if (!folio_test_large(folio))
@@ -1539,19 +1556,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
break;
}

- if (nr_pmdmapped) {
- if (folio_test_anon(folio))
- idx = NR_ANON_THPS;
- else if (folio_test_swapbacked(folio))
- idx = NR_SHMEM_PMDMAPPED;
- else
- idx = NR_FILE_PMDMAPPED;
- __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
- }
if (nr) {
- idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
- __lruvec_stat_mod_folio(folio, idx, -nr);
-
/*
* Queue anon large folio for deferred split if at least one
* page of the folio is unmapped and at least one page
@@ -1563,6 +1568,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
list_empty(&folio->_deferred_list))
deferred_split_folio(folio);
}
+ __foio_mod_stat(folio, nr, nr_pmdmapped);

/*
* It would be tidy to reset folio_test_anon mapping when fully

>
> static void __folio_mod_node_file_state(folio, int nr_pages)
> {
> enum node_stat_item idx = NR_FILE_PMDMAPPED;
>
> if (folio_test_swapbacked(folio))
> idx = NR_SHMEM_PMDMAPPED;
>
> __mod_node_page_state(folio_pgdat(folio), idx, nr_pages);
> }
>
> And then simply calling here
>
> __folio_mod_node_file_state(folio, -nr_pmdmapped);
>
> And likewise in __folio_add_file_rmap()
>
>
> ... will be cleaner.
>
> In any case
>
> Acked-by: David Hildenbrand <david@xxxxxxxxxx>
>
> --
> Cheers,
>
> David / dhildenb
>