Re: [PATCH 02/12] vmscan: shrinker->nr updates race and go wrong

From: KOSAKI Motohiro
Date: Sun Jun 19 2011 - 20:47:02 EST


> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 48e3fbd..dce2767 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -251,17 +251,29 @@ unsigned long shrink_slab(struct shrink_control *shrink,
> unsigned long total_scan;
> unsigned long max_pass;
> int shrink_ret = 0;
> + long nr;
> + long new_nr;
>
> + /*
> + * copy the current shrinker scan count into a local variable
> + * and zero it so that other concurrent shrinker invocations
> + * don't also do this scanning work.
> + */
> + do {
> + nr = shrinker->nr;
> + } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
> +
> + total_scan = nr;
> max_pass = do_shrinker_shrink(shrinker, shrink, 0);
> delta = (4 * nr_pages_scanned) / shrinker->seeks;
> delta *= max_pass;
> do_div(delta, lru_pages + 1);
> - shrinker->nr += delta;
> - if (shrinker->nr < 0) {
> + total_scan += delta;
> + if (total_scan < 0) {
> printk(KERN_ERR "shrink_slab: %pF negative objects to "
> "delete nr=%ld\n",
> - shrinker->shrink, shrinker->nr);
> - shrinker->nr = max_pass;
> + shrinker->shrink, total_scan);
> + total_scan = max_pass;
> }
>
> /*
> @@ -269,13 +281,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
> * never try to free more than twice the estimate number of
> * freeable entries.
> */
> - if (shrinker->nr > max_pass * 2)
> - shrinker->nr = max_pass * 2;
> + if (total_scan > max_pass * 2)
> + total_scan = max_pass * 2;
>
> - total_scan = shrinker->nr;
> - shrinker->nr = 0;
>
> - trace_mm_shrink_slab_start(shrinker, shrink, nr_pages_scanned,
> + trace_mm_shrink_slab_start(shrinker, shrink, nr, nr_pages_scanned,
> lru_pages, max_pass, delta, total_scan);
>
> while (total_scan >= SHRINK_BATCH) {
> @@ -295,8 +305,19 @@ unsigned long shrink_slab(struct shrink_control *shrink,
> cond_resched();
> }
>
> - shrinker->nr += total_scan;
> - trace_mm_shrink_slab_end(shrinker, shrink_ret, total_scan);
> + /*
> + * move the unused scan count back into the shrinker in a
> + * manner that handles concurrent updates. If we exhausted the
> + * scan, there is no need to do an update.
> + */
> + do {
> + nr = shrinker->nr;
> + new_nr = total_scan + nr;
> + if (total_scan <= 0)
> + break;
> + } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
> +
> + trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
> }
> up_read(&shrinker_rwsem);
> out:

Looks great fix. Please remove tracepoint change from this patch and send it
to -stable. iow, I expect I'll ack your next spin.

thanks.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/