[PATCH 4.2.y-ckt 142/305] dm thin metadata: fix bug in dm_thin_remove_range()

From: Kamal Mostafa
Date: Fri Jan 15 2016 - 19:55:18 EST


4.2.8-ckt2 -stable review patch. If anyone has any objections, please let me know.

---8<------------------------------------------------------------

From: Joe Thornber <ejt@xxxxxxxxxx>

commit 993ceab91986e2e737ce9a3e23bebc8cce649240 upstream.

dm_btree_remove_leaves() only unmaps a contiguous region so we need a
loop, in __remove_range(), to handle ranges that contain multiple
regions.

A new btree function, dm_btree_lookup_next(), is introduced which is
more efficiently able to skip over regions of the thin device which
aren't mapped. __remove_range() uses dm_btree_lookup_next() for each
iteration of __remove_range()'s loop.

Also, improve description of dm_btree_remove_leaves().

Fixes: 6550f075 ("dm thin metadata: add dm_thin_remove_range()")
Signed-off-by: Joe Thornber <ejt@xxxxxxxxxx>
Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx>
Signed-off-by: Kamal Mostafa <kamal@xxxxxxxxxxxxx>
---
drivers/md/dm-thin-metadata.c | 28 +++++++++---
drivers/md/persistent-data/dm-btree.c | 81 +++++++++++++++++++++++++++++++++++
drivers/md/persistent-data/dm-btree.h | 14 ++++--
3 files changed, 115 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 6ba47cf..53a7d58 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1530,7 +1530,7 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
{
int r;
- unsigned count;
+ unsigned count, total_count = 0;
struct dm_pool_metadata *pmd = td->pmd;
dm_block_t keys[1] = { td->id };
__le64 value;
@@ -1553,11 +1553,29 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_
if (r)
return r;

- r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
- if (r)
- return r;
+ /*
+ * Remove leaves stops at the first unmapped entry, so we have to
+ * loop round finding mapped ranges.
+ */
+ while (begin < end) {
+ r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
+ if (r == -ENODATA)
+ break;
+
+ if (r)
+ return r;
+
+ if (begin >= end)
+ break;
+
+ r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
+ if (r)
+ return r;
+
+ total_count += count;
+ }

- td->mapped_blocks -= count;
+ td->mapped_blocks -= total_count;
td->changed = 1;

/*
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 7ba85e2..40bbd51 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -63,6 +63,11 @@ int lower_bound(struct btree_node *n, uint64_t key)
return bsearch(n, key, 0);
}

+static int upper_bound(struct btree_node *n, uint64_t key)
+{
+ return bsearch(n, key, 1);
+}
+
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
@@ -390,6 +395,82 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
}
EXPORT_SYMBOL_GPL(dm_btree_lookup);

+static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root,
+ uint64_t key, uint64_t *rkey, void *value_le)
+{
+ int r, i;
+ uint32_t flags, nr_entries;
+ struct dm_block *node;
+ struct btree_node *n;
+
+ r = bn_read_lock(info, root, &node);
+ if (r)
+ return r;
+
+ n = dm_block_data(node);
+ flags = le32_to_cpu(n->header.flags);
+ nr_entries = le32_to_cpu(n->header.nr_entries);
+
+ if (flags & INTERNAL_NODE) {
+ i = lower_bound(n, key);
+ if (i < 0 || i >= nr_entries) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+ if (r == -ENODATA && i < (nr_entries - 1)) {
+ i++;
+ r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+ }
+
+ } else {
+ i = upper_bound(n, key);
+ if (i < 0 || i >= nr_entries) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ *rkey = le64_to_cpu(n->keys[i]);
+ memcpy(value_le, value_ptr(n, i), info->value_type.size);
+ }
+out:
+ dm_tm_unlock(info->tm, node);
+ return r;
+}
+
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, uint64_t *rkey, void *value_le)
+{
+ unsigned level;
+ int r = -ENODATA;
+ __le64 internal_value_le;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels - 1u; level++) {
+ r = btree_lookup_raw(&spine, root, keys[level],
+ lower_bound, rkey,
+ &internal_value_le, sizeof(uint64_t));
+ if (r)
+ goto out;
+
+ if (*rkey != keys[level]) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ root = le64_to_cpu(internal_value_le);
+ }
+
+ r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le);
+out:
+ exit_ro_spine(&spine);
+ return r;
+}
+
+EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
+
/*
* Splits a node by creating a sibling node and shifting half the nodes
* contents across. Assumes there is a parent node, and it has room for
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 11d8cf7..c74301f 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -110,6 +110,13 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, void *value_le);

/*
+ * Tries to find the first key where the bottom level key is >= to that
+ * given. Useful for skipping empty sections of the btree.
+ */
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, uint64_t *rkey, void *value_le);
+
+/*
* Insertion (or overwrite an existing value). O(ln(n))
*/
int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
@@ -135,9 +142,10 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, dm_block_t *new_root);

/*
- * Removes values between 'keys' and keys2, where keys2 is keys with the
- * final key replaced with 'end_key'. 'end_key' is the one-past-the-end
- * value. 'keys' may be altered.
+ * Removes a _contiguous_ run of values starting from 'keys' and not
+ * reaching keys2 (where keys2 is keys with the final key replaced with
+ * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be
+ * altered.
*/
int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, uint64_t end_key,
--
1.9.1