[PATCH 3.16.y-ckt 61/94] rbd: prevent kernel stack blow up on rbd map

From: Luis Henriques
Date: Fri Nov 13 2015 - 05:33:53 EST


3.16.7-ckt20 -stable review patch. If anyone has any objections, please let me know.

------------------

From: Ilya Dryomov <idryomov@xxxxxxxxx>

commit 6d69bb536bac0d403d83db1ca841444981b280cd upstream.

Mapping an image with a long parent chain (e.g. image foo, whose parent
is bar, whose parent is baz, etc) currently leads to a kernel stack
overflow, due to the following recursion in the reply path:

rbd_osd_req_callback()
rbd_obj_request_complete()
rbd_img_obj_callback()
rbd_img_parent_read_callback()
rbd_obj_request_complete()
...

Limit the parent chain to 16 images, which is ~5K worth of stack. When
the above recursion is eliminated, this limit can be lifted.

Fixes: http://tracker.ceph.com/issues/12538

Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx>
Reviewed-by: Josh Durgin <jdurgin@xxxxxxxxxx>
[idryomov@xxxxxxxxx: backport to 3.14: rbd_dev->opts, context]
Signed-off-by: Luis Henriques <luis.henriques@xxxxxxxxxxxxx>
---
drivers/block/rbd.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5990cb7fb2fa..f667ad88a31f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -95,6 +95,8 @@ static int atomic_dec_return_safe(atomic_t *v)
#define RBD_MINORS_PER_MAJOR 256
#define RBD_SINGLE_MAJOR_PART_SHIFT 4

+#define RBD_MAX_PARENT_CHAIN_LEN 16
+
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
#define RBD_MAX_SNAP_NAME_LEN \
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
@@ -415,7 +417,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
size_t count);
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
size_t count);
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
static void rbd_spec_put(struct rbd_spec *spec);

static int rbd_dev_id_to_minor(int dev_id)
@@ -5026,7 +5028,12 @@ out_err:
return ret;
}

-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
+/*
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
+ * rbd_dev_image_probe() recursion depth, which means it's also the
+ * length of the already discovered part of the parent chain.
+ */
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
{
struct rbd_device *parent = NULL;
int ret;
@@ -5034,6 +5041,12 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
if (!rbd_dev->parent_spec)
return 0;

+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
+ pr_info("parent chain is too long (%d)\n", depth);
+ ret = -EINVAL;
+ goto out_err;
+ }
+
parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec);
if (!parent) {
ret = -ENOMEM;
@@ -5047,7 +5060,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
__rbd_get_client(rbd_dev->rbd_client);
rbd_spec_get(rbd_dev->parent_spec);

- ret = rbd_dev_image_probe(parent, false);
+ ret = rbd_dev_image_probe(parent, depth);
if (ret < 0)
goto out_err;

@@ -5186,7 +5199,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
* parent), initiate a watch on its header object before using that
* object to get detailed information about the rbd image.
*/
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
int ret;

@@ -5206,7 +5219,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_format;

- if (mapping) {
+ if (!depth) {
ret = rbd_dev_header_watch_sync(rbd_dev);
if (ret)
goto out_header_name;
@@ -5223,7 +5236,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
if (ret)
goto err_out_probe;

- ret = rbd_dev_probe_parent(rbd_dev);
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
if (ret)
goto err_out_probe;

@@ -5234,7 +5247,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
err_out_probe:
rbd_dev_unprobe(rbd_dev);
err_out_watch:
- if (mapping)
+ if (!depth)
rbd_dev_header_unwatch_sync(rbd_dev);
out_header_name:
kfree(rbd_dev->header_name);
@@ -5299,7 +5312,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
rbdc = NULL; /* rbd_dev now owns this */
spec = NULL; /* rbd_dev now owns this */

- rc = rbd_dev_image_probe(rbd_dev, true);
+ rc = rbd_dev_image_probe(rbd_dev, 0);
if (rc < 0)
goto err_out_rbd_dev;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/