[PATCH v2 11/21] block, blksnap: minimum data storage unit of the original block device
From: Sergei Shtepa
Date: Fri Dec 09 2022 - 10:10:01 EST
The struct chunk describes the minimum data storage unit of the original
block device. Functions for working with these minimal blocks implement
algorithms for reading and writing blocks.
Signed-off-by: Sergei Shtepa <sergei.shtepa@xxxxxxxxx>
---
drivers/block/blksnap/chunk.c | 345 ++++++++++++++++++++++++++++++++++
drivers/block/blksnap/chunk.h | 139 ++++++++++++++
2 files changed, 484 insertions(+)
create mode 100644 drivers/block/blksnap/chunk.c
create mode 100644 drivers/block/blksnap/chunk.h
diff --git a/drivers/block/blksnap/chunk.c b/drivers/block/blksnap/chunk.c
new file mode 100644
index 000000000000..563a6b1612d3
--- /dev/null
+++ b/drivers/block/blksnap/chunk.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME "-chunk: " fmt
+
+#include <linux/slab.h>
+#include <linux/dm-io.h>
+#include <linux/sched/mm.h>
+#include "params.h"
+#include "chunk.h"
+#include "diff_io.h"
+#include "diff_buffer.h"
+#include "diff_area.h"
+#include "diff_storage.h"
+
+void chunk_diff_buffer_release(struct chunk *chunk)
+{
+ if (unlikely(!chunk->diff_buffer))
+ return;
+
+ chunk_state_unset(chunk, CHUNK_ST_BUFFER_READY);
+ diff_buffer_release(chunk->diff_area, chunk->diff_buffer);
+ chunk->diff_buffer = NULL;
+}
+
+void chunk_store_failed(struct chunk *chunk, int error)
+{
+ struct diff_area *diff_area = chunk->diff_area;
+
+ chunk_state_set(chunk, CHUNK_ST_FAILED);
+ chunk_diff_buffer_release(chunk);
+ diff_storage_free_region(chunk->diff_region);
+ chunk->diff_region = NULL;
+
+ up(&chunk->lock);
+ if (error)
+ diff_area_set_corrupted(diff_area, error);
+};
+
+int chunk_schedule_storing(struct chunk *chunk, bool is_nowait)
+{
+ struct diff_area *diff_area = chunk->diff_area;
+
+ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link),
+ "The chunk already in the cache"))
+ return -EINVAL;
+
+ if (!chunk->diff_region) {
+ struct diff_region *diff_region;
+
+ diff_region = diff_storage_new_region(
+ diff_area->diff_storage,
+ diff_area_chunk_sectors(diff_area));
+ if (IS_ERR(diff_region)) {
+ pr_debug("Cannot get store for chunk #%ld\n",
+ chunk->number);
+ return PTR_ERR(diff_region);
+ }
+
+ chunk->diff_region = diff_region;
+ }
+
+ return chunk_async_store_diff(chunk, is_nowait);
+}
+
+void chunk_schedule_caching(struct chunk *chunk)
+{
+ int in_cache_count = 0;
+ struct diff_area *diff_area = chunk->diff_area;
+
+ might_sleep();
+
+ spin_lock(&diff_area->caches_lock);
+
+ /*
+ * The locked chunk cannot be in the cache.
+ * If the check reveals that the chunk is in the cache, then something
+ * is wrong in the algorithm.
+ */
+ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link),
+ "The chunk already in the cache")) {
+ spin_unlock(&diff_area->caches_lock);
+
+ chunk_store_failed(chunk, 0);
+ return;
+ }
+
+ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) {
+ list_add_tail(&chunk->cache_link,
+ &diff_area->write_cache_queue);
+ in_cache_count =
+ atomic_inc_return(&diff_area->write_cache_count);
+ } else {
+ list_add_tail(&chunk->cache_link, &diff_area->read_cache_queue);
+ in_cache_count =
+ atomic_inc_return(&diff_area->read_cache_count);
+ }
+ spin_unlock(&diff_area->caches_lock);
+
+ up(&chunk->lock);
+
+ /* Initiate the cache clearing process */
+ if ((in_cache_count > chunk_maximum_in_cache) &&
+ !diff_area_is_corrupted(diff_area))
+ queue_work(system_wq, &diff_area->cache_release_work);
+}
+
+static void chunk_notify_load(void *ctx)
+{
+ struct chunk *chunk = ctx;
+ int error = chunk->diff_io->error;
+
+ diff_io_free(chunk->diff_io);
+ chunk->diff_io = NULL;
+
+ might_sleep();
+
+ if (unlikely(error)) {
+ chunk_store_failed(chunk, error);
+ goto out;
+ }
+
+ if (unlikely(chunk_state_check(chunk, CHUNK_ST_FAILED))) {
+ pr_err("Chunk in a failed state\n");
+ up(&chunk->lock);
+ goto out;
+ }
+
+ if (chunk_state_check(chunk, CHUNK_ST_LOADING)) {
+ int ret;
+ unsigned int current_flag;
+
+ chunk_state_unset(chunk, CHUNK_ST_LOADING);
+ chunk_state_set(chunk, CHUNK_ST_BUFFER_READY);
+
+ current_flag = memalloc_noio_save();
+ ret = chunk_schedule_storing(chunk, false);
+ memalloc_noio_restore(current_flag);
+ if (ret)
+ chunk_store_failed(chunk, ret);
+ goto out;
+ }
+
+ pr_err("invalid chunk state 0x%x\n", atomic_read(&chunk->state));
+ up(&chunk->lock);
+out:
+ atomic_dec(&chunk->diff_area->pending_io_count);
+}
+
+static void chunk_notify_store(void *ctx)
+{
+ struct chunk *chunk = ctx;
+ int error = chunk->diff_io->error;
+
+ diff_io_free(chunk->diff_io);
+ chunk->diff_io = NULL;
+
+ might_sleep();
+
+ if (unlikely(error)) {
+ chunk_store_failed(chunk, error);
+ goto out;
+ }
+
+ if (unlikely(chunk_state_check(chunk, CHUNK_ST_FAILED))) {
+ pr_err("Chunk in a failed state\n");
+ chunk_store_failed(chunk, 0);
+ goto out;
+ }
+ if (chunk_state_check(chunk, CHUNK_ST_STORING)) {
+ chunk_state_unset(chunk, CHUNK_ST_STORING);
+ chunk_state_set(chunk, CHUNK_ST_STORE_READY);
+
+ if (chunk_state_check(chunk, CHUNK_ST_DIRTY)) {
+ /*
+ * The chunk marked "dirty" was stored in the difference
+ * storage. Now it is processed in the same way as any
+ * other stored chunks.
+ * Therefore, the "dirty" mark can be removed.
+ */
+ chunk_state_unset(chunk, CHUNK_ST_DIRTY);
+ chunk_diff_buffer_release(chunk);
+ } else {
+ unsigned int current_flag;
+
+ current_flag = memalloc_noio_save();
+ chunk_schedule_caching(chunk);
+ memalloc_noio_restore(current_flag);
+ goto out;
+ }
+ } else
+ pr_err("invalid chunk state 0x%x\n", atomic_read(&chunk->state));
+ up(&chunk->lock);
+out:
+ atomic_dec(&chunk->diff_area->pending_io_count);
+}
+
+struct chunk *chunk_alloc(struct diff_area *diff_area, unsigned long number)
+{
+ struct chunk *chunk;
+
+ chunk = kzalloc(sizeof(struct chunk), GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ INIT_LIST_HEAD(&chunk->cache_link);
+ sema_init(&chunk->lock, 1);
+ chunk->diff_area = diff_area;
+ chunk->number = number;
+ atomic_set(&chunk->state, 0);
+
+ return chunk;
+}
+
+void chunk_free(struct chunk *chunk)
+{
+ if (unlikely(!chunk))
+ return;
+
+ down(&chunk->lock);
+ chunk_diff_buffer_release(chunk);
+ diff_storage_free_region(chunk->diff_region);
+ chunk_state_set(chunk, CHUNK_ST_FAILED);
+ up(&chunk->lock);
+
+ kfree(chunk);
+}
+
+/*
+ * Starts asynchronous storing of a chunk to the difference storage.
+ */
+int chunk_async_store_diff(struct chunk *chunk, bool is_nowait)
+{
+ int ret;
+ struct diff_io *diff_io;
+ struct diff_region *region = chunk->diff_region;
+
+ if (WARN(!list_is_first(&chunk->cache_link, &chunk->cache_link),
+ "The chunk already in the cache"))
+ return -EINVAL;
+
+ diff_io = diff_io_new_async_write(chunk_notify_store, chunk, is_nowait);
+ if (unlikely(!diff_io)) {
+ if (is_nowait)
+ return -EAGAIN;
+ else
+ return -ENOMEM;
+ }
+
+ WARN_ON(chunk->diff_io);
+ chunk->diff_io = diff_io;
+ chunk_state_set(chunk, CHUNK_ST_STORING);
+ atomic_inc(&chunk->diff_area->pending_io_count);
+
+ ret = diff_io_do(chunk->diff_io, region, chunk->diff_buffer, is_nowait);
+ if (ret) {
+ atomic_dec(&chunk->diff_area->pending_io_count);
+ diff_io_free(chunk->diff_io);
+ chunk->diff_io = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * Starts asynchronous loading of a chunk from the original block device.
+ */
+int chunk_async_load_orig(struct chunk *chunk, const bool is_nowait)
+{
+ int ret;
+ struct diff_io *diff_io;
+ struct diff_region region = {
+ .bdev = chunk->diff_area->orig_bdev,
+ .sector = (sector_t)(chunk->number) *
+ diff_area_chunk_sectors(chunk->diff_area),
+ .count = chunk->sector_count,
+ };
+
+ diff_io = diff_io_new_async_read(chunk_notify_load, chunk, is_nowait);
+ if (unlikely(!diff_io)) {
+ if (is_nowait)
+ return -EAGAIN;
+ else
+ return -ENOMEM;
+ }
+
+ WARN_ON(chunk->diff_io);
+ chunk->diff_io = diff_io;
+ chunk_state_set(chunk, CHUNK_ST_LOADING);
+ atomic_inc(&chunk->diff_area->pending_io_count);
+
+ ret = diff_io_do(chunk->diff_io, ®ion, chunk->diff_buffer, is_nowait);
+ if (ret) {
+ atomic_dec(&chunk->diff_area->pending_io_count);
+ diff_io_free(chunk->diff_io);
+ chunk->diff_io = NULL;
+ }
+ return ret;
+}
+
+/*
+ * Performs synchronous loading of a chunk from the original block device.
+ */
+int chunk_load_orig(struct chunk *chunk)
+{
+ int ret;
+ struct diff_io *diff_io;
+ struct diff_region region = {
+ .bdev = chunk->diff_area->orig_bdev,
+ .sector = (sector_t)(chunk->number) *
+ diff_area_chunk_sectors(chunk->diff_area),
+ .count = chunk->sector_count,
+ };
+
+ diff_io = diff_io_new_sync_read();
+ if (unlikely(!diff_io))
+ return -ENOMEM;
+
+ ret = diff_io_do(diff_io, ®ion, chunk->diff_buffer, false);
+ if (!ret)
+ ret = diff_io->error;
+
+ diff_io_free(diff_io);
+ return ret;
+}
+
+/*
+ * Performs synchronous loading of a chunk from the difference storage.
+ */
+int chunk_load_diff(struct chunk *chunk)
+{
+ int ret;
+ struct diff_io *diff_io;
+ struct diff_region *region = chunk->diff_region;
+
+ diff_io = diff_io_new_sync_read();
+ if (unlikely(!diff_io))
+ return -ENOMEM;
+
+ ret = diff_io_do(diff_io, region, chunk->diff_buffer, false);
+ if (!ret)
+ ret = diff_io->error;
+
+ diff_io_free(diff_io);
+
+ return ret;
+}
diff --git a/drivers/block/blksnap/chunk.h b/drivers/block/blksnap/chunk.h
new file mode 100644
index 000000000000..6f2350930095
--- /dev/null
+++ b/drivers/block/blksnap/chunk.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BLK_SNAP_CHUNK_H
+#define __BLK_SNAP_CHUNK_H
+
+#include <linux/blk_types.h>
+#include <linux/blkdev.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+
+struct diff_area;
+struct diff_region;
+struct diff_io;
+
+/**
+ * enum chunk_st - Possible states for a chunk.
+ *
+ * @CHUNK_ST_FAILED:
+ * An error occurred while processing the chunk data.
+ * @CHUNK_ST_DIRTY:
+ * The chunk is in the dirty state. The chunk is marked dirty in case
+ * there was a write operation to the snapshot image.
+ * The flag is removed when the data of the chunk is stored in the
+ * difference storage.
+ * @CHUNK_ST_BUFFER_READY:
+ * The data of the chunk is ready to be read from the RAM buffer.
+ * The flag is removed when a chunk is removed from the cache and its
+ * buffer is released.
+ * @CHUNK_ST_STORE_READY:
+ * The data of the chunk has been written to the difference storage.
+ * The flag cannot be removed.
+ * @CHUNK_ST_LOADING:
+ * The data is being read from the original block device.
+ * The flag is replaced with the CHUNK_ST_BUFFER_READY flag.
+ * @CHUNK_ST_STORING:
+ * The data is being saved to the difference storage.
+ * The flag is replaced with the CHUNK_ST_STORE_READY flag.
+ *
+ * Chunks life circle.
+ * Copy-on-write when writing to original:
+ * 0 -> LOADING -> BUFFER_READY -> BUFFER_READY | STORING ->
+ * BUFFER_READY | STORE_READY -> STORE_READY
+ * Write to snapshot image:
+ * 0 -> LOADING -> BUFFER_READY | DIRTY -> DIRTY | STORING ->
+ * BUFFER_READY | STORE_READY -> STORE_READY
+ */
+enum chunk_st {
+ CHUNK_ST_FAILED = (1 << 0),
+ CHUNK_ST_DIRTY = (1 << 1),
+ CHUNK_ST_BUFFER_READY = (1 << 2),
+ CHUNK_ST_STORE_READY = (1 << 3),
+ CHUNK_ST_LOADING = (1 << 4),
+ CHUNK_ST_STORING = (1 << 5),
+};
+
+/**
+ * struct chunk - Minimum data storage unit.
+ *
+ * @cache_link:
+ * The list header allows to create caches of chunks.
+ * @diff_area:
+ * Pointer to the difference area - the storage of changes for a specific device.
+ * @number:
+ * Sequential number of the chunk.
+ * @sector_count:
+ * Number of sectors in the current chunk. This is especially true
+ * for the last chunk.
+ * @lock:
+ * Binary semaphore. Syncs access to the chunks fields: state,
+ * diff_buffer, diff_region and diff_io.
+ * @state:
+ * Defines the state of a chunk. May contain CHUNK_ST_* bits.
+ * @diff_buffer:
+ * Pointer to &struct diff_buffer. Describes a buffer in the memory
+ * for storing the chunk data.
+ * @diff_region:
+ * Pointer to &struct diff_region. Describes a copy of the chunk data
+ * on the difference storage.
+ * @diff_io:
+ * Provides I/O operations for a chunk.
+ *
+ * This structure describes the block of data that the module operates
+ * with when executing the copy-on-write algorithm and when performing I/O
+ * to snapshot images.
+ *
+ * If the data of the chunk has been changed or has just been read, then
+ * the chunk gets into cache.
+ *
+ * The semaphore is blocked for writing if there is no actual data in the
+ * buffer, since a block of data is being read from the original device or
+ * from a diff storage. If data is being read from or written to the
+ * diff_buffer, the semaphore must be locked.
+ */
+struct chunk {
+ struct list_head cache_link;
+ struct diff_area *diff_area;
+
+ unsigned long number;
+ sector_t sector_count;
+
+ struct semaphore lock;
+
+ atomic_t state;
+ struct diff_buffer *diff_buffer;
+ struct diff_region *diff_region;
+ struct diff_io *diff_io;
+};
+
+static inline void chunk_state_set(struct chunk *chunk, int st)
+{
+ atomic_or(st, &chunk->state);
+};
+
+static inline void chunk_state_unset(struct chunk *chunk, int st)
+{
+ atomic_and(~st, &chunk->state);
+};
+
+static inline bool chunk_state_check(struct chunk *chunk, int st)
+{
+ return !!(atomic_read(&chunk->state) & st);
+};
+
+struct chunk *chunk_alloc(struct diff_area *diff_area, unsigned long number);
+void chunk_free(struct chunk *chunk);
+
+int chunk_schedule_storing(struct chunk *chunk, bool is_nowait);
+void chunk_diff_buffer_release(struct chunk *chunk);
+void chunk_store_failed(struct chunk *chunk, int error);
+
+void chunk_schedule_caching(struct chunk *chunk);
+
+/* Asynchronous operations are used to implement the COW algorithm. */
+int chunk_async_store_diff(struct chunk *chunk, bool is_nowait);
+int chunk_async_load_orig(struct chunk *chunk, const bool is_nowait);
+
+/* Synchronous operations are used to implement reading and writing to the snapshot image. */
+int chunk_load_orig(struct chunk *chunk);
+int chunk_load_diff(struct chunk *chunk);
+#endif /* __BLK_SNAP_CHUNK_H */
--
2.20.1