[PATCH 2/2] zram: switch to per-cpu streams

From: Sergey Senozhatsky
Date: Thu Mar 24 2016 - 21:50:02 EST


Use per-cpu compression streams.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
drivers/block/zram/zcomp.c | 293 ++++++++++++------------------------------
drivers/block/zram/zcomp.h | 12 +-
drivers/block/zram/zram_drv.c | 32 ++++-
3 files changed, 110 insertions(+), 227 deletions(-)

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 3ef42e5..d4159e4 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
+#include <linux/cpu.h>

#include "zcomp.h"
#include "zcomp_lzo.h"
@@ -20,29 +21,6 @@
#include "zcomp_lz4.h"
#endif

-/*
- * single zcomp_strm backend
- */
-struct zcomp_strm_single {
- struct mutex strm_lock;
- struct zcomp_strm *zstrm;
-};
-
-/*
- * multi zcomp_strm backend
- */
-struct zcomp_strm_multi {
- /* protect strm list */
- spinlock_t strm_lock;
- /* max possible number of zstrm streams */
- int max_strm;
- /* number of available zstrm streams */
- int avail_strm;
- /* list of available strms */
- struct list_head idle_strm;
- wait_queue_head_t strm_wait;
-};
-
static struct zcomp_backend *backends[] = {
&zcomp_lzo,
#ifdef CONFIG_ZRAM_LZ4_COMPRESS
@@ -93,188 +71,6 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags)
return zstrm;
}

-/*
- * get idle zcomp_strm or wait until other process release
- * (zcomp_strm_release()) one for us
- */
-static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- while (1) {
- spin_lock(&zs->strm_lock);
- if (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- spin_unlock(&zs->strm_lock);
- return zstrm;
- }
- /* zstrm streams limit reached, wait for idle stream */
- if (zs->avail_strm >= zs->max_strm) {
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- /* allocate new zstrm stream */
- zs->avail_strm++;
- spin_unlock(&zs->strm_lock);
- /*
- * This function can be called in swapout/fs write path
- * so we can't use GFP_FS|IO. And it assumes we already
- * have at least one stream in zram initialization so we
- * don't do best effort to allocate more stream in here.
- * A default stream will work well without further multiple
- * streams. That's why we use NORETRY | NOWARN.
- */
- zstrm = zcomp_strm_alloc(comp, GFP_NOIO | __GFP_NORETRY |
- __GFP_NOWARN);
- if (!zstrm) {
- spin_lock(&zs->strm_lock);
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
- continue;
- }
- break;
- }
- return zstrm;
-}
-
-/* add stream back to idle list and wake up waiter or free the stream */
-static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
-
- spin_lock(&zs->strm_lock);
- if (zs->avail_strm <= zs->max_strm) {
- list_add(&zstrm->list, &zs->idle_strm);
- spin_unlock(&zs->strm_lock);
- wake_up(&zs->strm_wait);
- return;
- }
-
- zs->avail_strm--;
- spin_unlock(&zs->strm_lock);
- zcomp_strm_free(comp, zstrm);
-}
-
-/* change max_strm limit */
-static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- spin_lock(&zs->strm_lock);
- zs->max_strm = num_strm;
- /*
- * if user has lowered the limit and there are idle streams,
- * immediately free as much streams (and memory) as we can.
- */
- while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
- zs->avail_strm--;
- }
- spin_unlock(&zs->strm_lock);
- return true;
-}
-
-static void zcomp_strm_multi_destroy(struct zcomp *comp)
-{
- struct zcomp_strm_multi *zs = comp->stream;
- struct zcomp_strm *zstrm;
-
- while (!list_empty(&zs->idle_strm)) {
- zstrm = list_entry(zs->idle_strm.next,
- struct zcomp_strm, list);
- list_del(&zstrm->list);
- zcomp_strm_free(comp, zstrm);
- }
- kfree(zs);
-}
-
-static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
-{
- struct zcomp_strm *zstrm;
- struct zcomp_strm_multi *zs;
-
- comp->destroy = zcomp_strm_multi_destroy;
- comp->strm_find = zcomp_strm_multi_find;
- comp->strm_release = zcomp_strm_multi_release;
- comp->set_max_streams = zcomp_strm_multi_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
-
- comp->stream = zs;
- spin_lock_init(&zs->strm_lock);
- INIT_LIST_HEAD(&zs->idle_strm);
- init_waitqueue_head(&zs->strm_wait);
- zs->max_strm = max_strm;
- zs->avail_strm = 1;
-
- zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
- if (!zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- list_add(&zstrm->list, &zs->idle_strm);
- return 0;
-}
-
-static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_lock(&zs->strm_lock);
- return zs->zstrm;
-}
-
-static void zcomp_strm_single_release(struct zcomp *comp,
- struct zcomp_strm *zstrm)
-{
- struct zcomp_strm_single *zs = comp->stream;
- mutex_unlock(&zs->strm_lock);
-}
-
-static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
-{
- /* zcomp_strm_single support only max_comp_streams == 1 */
- return false;
-}
-
-static void zcomp_strm_single_destroy(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs = comp->stream;
- zcomp_strm_free(comp, zs->zstrm);
- kfree(zs);
-}
-
-static int zcomp_strm_single_create(struct zcomp *comp)
-{
- struct zcomp_strm_single *zs;
-
- comp->destroy = zcomp_strm_single_destroy;
- comp->strm_find = zcomp_strm_single_find;
- comp->strm_release = zcomp_strm_single_release;
- comp->set_max_streams = zcomp_strm_single_set_max_streams;
- zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
- if (!zs)
- return -ENOMEM;
-
- comp->stream = zs;
- mutex_init(&zs->strm_lock);
- zs->zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
- if (!zs->zstrm) {
- kfree(zs);
- return -ENOMEM;
- }
- return 0;
-}
-
/* show available compressors */
ssize_t zcomp_available_show(const char *comp, char *buf)
{
@@ -301,17 +97,17 @@ bool zcomp_available_algorithm(const char *comp)

bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
{
- return comp->set_max_streams(comp, num_strm);
+ return true;
}

struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
{
- return comp->strm_find(comp);
+ return *get_cpu_ptr(comp->stream);
}

void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
{
- comp->strm_release(comp, zstrm);
+ put_cpu_ptr(comp->stream);
}

int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
@@ -327,9 +123,83 @@ int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
return comp->backend->decompress(src, src_len, dst);
}

+static int __zcomp_cpu_notifier(struct zcomp *comp,
+ unsigned long action, unsigned long cpu)
+{
+ struct zcomp_strm *zstrm;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
+ break;
+ zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(zstrm)) {
+ pr_err("Can't allocate a compression stream\n");
+ return NOTIFY_BAD;
+ }
+ *per_cpu_ptr(comp->stream, cpu) = zstrm;
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ zstrm = *per_cpu_ptr(comp->stream, cpu);
+ if (!IS_ERR_OR_NULL(zstrm))
+ zcomp_strm_free(comp, zstrm);
+ *per_cpu_ptr(comp->stream, cpu) = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static int zcomp_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
+{
+ unsigned long cpu = (unsigned long)pcpu;
+ struct zcomp *comp = container_of(nb, typeof(*comp), notifier);
+
+ return __zcomp_cpu_notifier(comp, action, cpu);
+}
+
+static int zcomp_init(struct zcomp *comp)
+{
+ unsigned long cpu;
+ int ret;
+
+ comp->notifier.notifier_call = zcomp_cpu_notifier;
+
+ comp->stream = alloc_percpu(struct zcomp_strm *);
+ if (!comp->stream)
+ return -ENOMEM;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu) {
+ ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu);
+ if (ret == NOTIFY_BAD)
+ goto cleanup;
+ }
+ __register_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+ return 0;
+
+cleanup:
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ cpu_notifier_register_done();
+ return -ENOMEM;
+}
+
void zcomp_destroy(struct zcomp *comp)
{
- comp->destroy(comp);
+ unsigned long cpu;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
+ __unregister_cpu_notifier(&comp->notifier);
+ cpu_notifier_register_done();
+
+ free_percpu(comp->stream);
kfree(comp);
}

@@ -356,10 +226,7 @@ struct zcomp *zcomp_create(const char *compress, int max_strm)
return ERR_PTR(-ENOMEM);

comp->backend = backend;
- if (max_strm > 1)
- error = zcomp_strm_multi_create(comp, max_strm);
- else
- error = zcomp_strm_single_create(comp);
+ error = zcomp_init(comp);
if (error) {
kfree(comp);
return ERR_PTR(error);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index b7d2a4b..aba8c21 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -10,8 +10,6 @@
#ifndef _ZCOMP_H_
#define _ZCOMP_H_

-#include <linux/mutex.h>
-
struct zcomp_strm {
/* compression/decompression buffer */
void *buffer;
@@ -21,8 +19,6 @@ struct zcomp_strm {
* working memory)
*/
void *private;
- /* used in multi stream backend, protected by backend strm_lock */
- struct list_head list;
};

/* static compression backend */
@@ -41,13 +37,9 @@ struct zcomp_backend {

/* dynamic per-device compression frontend */
struct zcomp {
- void *stream;
+ struct zcomp_strm * __percpu *stream;
struct zcomp_backend *backend;
-
- struct zcomp_strm *(*strm_find)(struct zcomp *comp);
- void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
- bool (*set_max_streams)(struct zcomp *comp, int num_strm);
- void (*destroy)(struct zcomp *comp);
+ struct notifier_block notifier;
};

ssize_t zcomp_available_show(const char *comp, char *buf);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9030992..c5e682c 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -650,7 +650,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
{
int ret = 0;
size_t clen;
- unsigned long handle;
+ unsigned long handle = 0;
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
@@ -673,9 +673,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}

- zstrm = zcomp_strm_find(zram->comp);
+compress_again:
user_mem = kmap_atomic(page);
-
if (is_partial_io(bvec)) {
memcpy(uncmem + offset, user_mem + bvec->bv_offset,
bvec->bv_len);
@@ -699,6 +698,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}

+ zstrm = zcomp_strm_find(zram->comp);
ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
if (!is_partial_io(bvec)) {
kunmap_atomic(user_mem);
@@ -710,6 +710,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
+
src = zstrm->buffer;
if (unlikely(clen > max_zpage_size)) {
clen = PAGE_SIZE;
@@ -717,8 +718,31 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
src = uncmem;
}

- handle = zs_malloc(meta->mem_pool, clen, GFP_NOIO | __GFP_HIGHMEM);
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to alocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (!handle)
+ handle = zs_malloc(meta->mem_pool, clen,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM);
if (!handle) {
+ zcomp_strm_release(zram->comp, zstrm);
+ handle = zs_malloc(meta->mem_pool, clen,
+ GFP_NOIO | __GFP_HIGHMEM);
+ if (handle)
+ goto compress_again;
+
pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
index, clen);
ret = -ENOMEM;
--
2.8.0.rc0.1.gd285ab0


--RnlQjJ0d97Da+TV1--