[PATCH v2] zram: bug fix: don't grab mutex in zram_slot_free_noity

From: Minchan Kim
Date: Mon Aug 05 2013 - 10:53:05 EST

Next message: Tejun Heo: "Re: percpu: kernel BUG at mm/percpu.c:579!"
Previous message: Tejun Heo: "Re: [PATCH v2 RESEND 13/18] x86, numa, mem_hotplug: Skip all theregions the kernel resides in."
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

[1] introduced down_write in zram_slot_free_notify to prevent race
between zram_slot_free_notify and zram_bvec_[read|write]. The race
could happen if somebody who has right permission to open swap device
is reading swap device while it is used by swap in parallel.

However, zram_slot_free_notify is called with holding spin_lock of
swap layer so we shouldn't avoid holing mutex. Otherwise, lockdep
warns it.

This patch adds new list to handle free object and workqueue
so zram_slot_free_notify just registers index to be freed and
queue works. If workqueue is expired, it could hold mutex_lock
and spinlock so there isn't no race between them.

If any I/O is issued, zram handles pending free request caused
by zram_slot_free_notify right before hanling issued request
because workqueue wouldn't handle pending requests yet.

Lastly, when zram is reset, flush_work could handle all of pending
free request so we shouldn't have memory leak.

NOTE: If zram_slot_free_notify's kmalloc with GFP_ATOMIC would be
failed, the slot will be freed when next write I/O write the slot.

[1] [57ab0485, zram: use zram->lock to protect zram_free_page()
in swap free notify path]

* from v1
* totally redesign

Cc: Jiang Liu <jiang.liu@xxxxxxxxxx>
Cc: Nitin Gupta <ngupta@xxxxxxxxxx>
Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
drivers/staging/zram/zram_drv.c | 60 ++++++++++++++++++++++++++++++++++++++---
drivers/staging/zram/zram_drv.h | 8 ++++++
2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 7ebf91d..ec881e0 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -440,6 +440,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}

+ /*
+ * zram_slot_free_notify could miss free so that let's
+ * double check.
+ */
+ if (unlikely(meta->table[index].handle ||
+ zram_test_flag(meta, index, ZRAM_ZERO)))
+ zram_free_page(zram, index);
+
ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
meta->compress_workmem);

@@ -505,6 +513,20 @@ out:
return ret;
}

+static void free_pending_rq(struct zram *zram)
+{
+ struct zram_free_rq *free_rq;
+
+ spin_lock(&zram->free_rq_lock);
+ while (zram->free_rq_head) {
+ free_rq = zram->free_rq_head;
+ zram->free_rq_head = free_rq->next;
+ zram_free_page(zram, free_rq->index);
+ kfree(free_rq);
+ }
+ spin_unlock(&zram->free_rq_lock);
+}
+
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, struct bio *bio, int rw)
{
@@ -512,10 +534,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,

if (rw == READ) {
down_read(&zram->lock);
+ free_pending_rq(zram);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
up_read(&zram->lock);
} else {
down_write(&zram->lock);
+ free_pending_rq(zram);
ret = zram_bvec_write(zram, bvec, index, offset);
up_write(&zram->lock);
}
@@ -528,6 +552,8 @@ static void zram_reset_device(struct zram *zram)
size_t index;
struct zram_meta *meta;

+ flush_work(&zram->work);
+
down_write(&zram->init_lock);
if (!zram->init_done) {
up_write(&zram->init_lock);
@@ -721,16 +747,40 @@ error:
bio_io_error(bio);
}

+static void handle_pend_free_rq(struct work_struct *work)
+{
+ struct zram *zram;
+
+ zram = container_of(work, struct zram, work);
+ down_write(&zram->lock);
+ free_pending_rq(zram);
+ up_write(&zram->lock);
+}
+
+static void add_free_rq(struct zram *zram, struct zram_free_rq *free_rq)
+{
+ spin_lock(&zram->free_rq_lock);
+ free_rq->next = zram->free_rq_head;
+ zram->free_rq_head = free_rq;
+ spin_unlock(&zram->free_rq_lock);
+}
+
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
+ struct zram_free_rq *free_rq;

zram = bdev->bd_disk->private_data;
- down_write(&zram->lock);
- zram_free_page(zram, index);
- up_write(&zram->lock);
atomic64_inc(&zram->stats.notify_free);
+
+ free_rq = kmalloc(sizeof(struct zram_free_rq), GFP_ATOMIC);
+ if (!free_rq)
+ return;
+
+ free_rq->index = index;
+ add_free_rq(zram, free_rq);
+ schedule_work(&zram->work);
}

static const struct block_device_operations zram_devops = {
@@ -777,6 +827,10 @@ static int create_device(struct zram *zram, int device_id)
init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);

+ INIT_WORK(&zram->work, handle_pend_free_rq);
+ spin_lock_init(&zram->free_rq_lock);
+ zram->free_rq_head = NULL;
+
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 9e57bfb..dec6241 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -94,12 +94,20 @@ struct zram_meta {
struct zs_pool *mem_pool;
};

+struct zram_free_rq {
+ unsigned long index;
+ struct zram_free_rq *next;
+};
+
struct zram {
struct zram_meta *meta;
struct rw_semaphore lock; /* protect compression buffers, table,
* 32bit stat counters against concurrent
* notifications, reads and writes */
+ struct work_struct work; /* handle pending free request */
+ void *free_rq_head; /* list head of free request */
struct request_queue *queue;
+ spinlock_t free_rq_lock;
struct gendisk *disk;
int init_done;
/* Prevent concurrent execution of device init, reset and R/W request */
--
1.8.3.2

--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Tejun Heo: "Re: percpu: kernel BUG at mm/percpu.c:579!"
Previous message: Tejun Heo: "Re: [PATCH v2 RESEND 13/18] x86, numa, mem_hotplug: Skip all theregions the kernel resides in."
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]