[RFC PATCH 3/7] Implement diskseq checks in blkback

From: Demi Marie Obenour
Date: Wed Jan 25 2023 - 22:34:33 EST


From: Demi Marie Obenour <demiobenour@xxxxxxxxx>

This allows specifying a disk sequence number in XenStore. If it does
not match the disk sequence number of the underlying device, the device
will not be exported and a warning will be logged. Userspace can use
this to eliminate race conditions due to major/minor number reuse.
Older kernels will ignore this, so it is safe for userspace to set it
unconditionally.

This also makes physical-device parsing stricter. I do not believe this
will break any extant userspace tools.

Signed-off-by: Demi Marie Obenour <demi@xxxxxxxxxxxxxxxxxxxxxx>
---
drivers/block/xen-blkback/xenbus.c | 137 +++++++++++++++++++++--------
1 file changed, 100 insertions(+), 37 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4807af1d58059394d7a992335dabaf2bc3901721..2c43bfc7ab5ba6954f11d4b949a5668660dbd290 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -24,6 +24,7 @@ struct backend_info {
struct xenbus_watch backend_watch;
unsigned major;
unsigned minor;
+ unsigned long long diskseq;
char *mode;
};

@@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)

static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
unsigned major, unsigned minor, int readonly,
- int cdrom)
+ bool cdrom, u64 diskseq)
{
struct xen_vbd *vbd;
struct block_device *bdev;
@@ -507,6 +508,25 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
xen_vbd_free(vbd);
return -ENOENT;
}
+
+ if (diskseq) {
+ struct gendisk *disk = bdev->bd_disk;
+ if (unlikely(disk == NULL)) {
+ pr_err("xen_vbd_create: device %08x has no gendisk\n",
+ vbd->pdevice);
+ xen_vbd_free(vbd);
+ return -EFAULT;
+ }
+
+ if (unlikely(disk->diskseq != diskseq)) {
+ pr_warn("xen_vbd_create: device %08x has incorrect sequence "
+ "number 0x%llx (expected 0x%llx)\n",
+ vbd->pdevice, disk->diskseq, diskseq);
+ xen_vbd_free(vbd);
+ return -ENODEV;
+ }
+ }
+
vbd->size = vbd_sz(vbd);

if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
@@ -690,6 +710,55 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
return err;
}

+static bool read_physical_device(struct xenbus_device *dev,
+ unsigned long long *diskseq,
+ unsigned *major, unsigned *minor)
+{
+ char *physical_device, *problem;
+ int i, physical_device_length;
+ char junk;
+
+ physical_device = xenbus_read(XBT_NIL, dev->nodename, "physical-device",
+ &physical_device_length);
+
+ if (IS_ERR(physical_device)) {
+ int err = PTR_ERR(physical_device);
+ /*
+ * Since this watch will fire once immediately after it is
+ * registered, we expect "does not exist" errors. Ignore
+ * them and wait for the hotplug scripts.
+ */
+ if (unlikely(!XENBUS_EXIST_ERR(err)))
+ xenbus_dev_fatal(dev, err, "reading physical-device");
+ return false;
+ }
+
+ for (i = 0; i < physical_device_length; ++i)
+ if (unlikely(physical_device[i] <= 0x20 || physical_device[i] >= 0x7F)) {
+ problem = "bad byte in physical-device";
+ goto fail;
+ }
+
+ if (sscanf(physical_device, "%16llx@%8x:%8x%c",
+ diskseq, major, minor, &junk) == 3) {
+ if (*diskseq == 0) {
+ problem = "diskseq 0 is invalid";
+ goto fail;
+ }
+ } else if (sscanf(physical_device, "%8x:%8x%c", major, minor, &junk) == 2) {
+ *diskseq = 0;
+ } else {
+ problem = "invalid physical-device";
+ goto fail;
+ }
+ kfree(physical_device);
+ return true;
+fail:
+ kfree(physical_device);
+ xenbus_dev_fatal(dev, -EINVAL, problem);
+ return false;
+}
+
/*
* Callback received when the hotplug scripts have placed the physical-device
* node. Read it and the mode node, and create a vbd. If the frontend is
@@ -707,28 +776,17 @@ static void backend_changed(struct xenbus_watch *watch,
int cdrom = 0;
unsigned long handle;
char *device_type;
+ unsigned long long diskseq;

pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
-
- err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
- &major, &minor);
- if (XENBUS_EXIST_ERR(err)) {
- /*
- * Since this watch will fire once immediately after it is
- * registered, we expect this. Ignore it, and wait for the
- * hotplug scripts.
- */
+ if (!read_physical_device(dev, &diskseq, &major, &minor))
return;
- }
- if (err != 2) {
- xenbus_dev_fatal(dev, err, "reading physical-device");
- return;
- }

- if (be->major | be->minor) {
- if (be->major != major || be->minor != minor)
- pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
- be->major, be->minor, major, minor);
+ if (be->major | be->minor | be->diskseq) {
+ if (be->major != major || be->minor != minor || be->diskseq != diskseq)
+ pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
+ " not supported.\n",
+ be->major, be->minor, be->diskseq, major, minor, diskseq);
return;
}

@@ -756,29 +814,34 @@ static void backend_changed(struct xenbus_watch *watch,

be->major = major;
be->minor = minor;
+ be->diskseq = diskseq;

err = xen_vbd_create(be->blkif, handle, major, minor,
- !strchr(be->mode, 'w'), cdrom);
-
- if (err)
- xenbus_dev_fatal(dev, err, "creating vbd structure");
- else {
- err = xenvbd_sysfs_addif(dev);
- if (err) {
- xen_vbd_free(&be->blkif->vbd);
- xenbus_dev_fatal(dev, err, "creating sysfs entries");
- }
- }
+ !strchr(be->mode, 'w'), cdrom, diskseq);

if (err) {
- kfree(be->mode);
- be->mode = NULL;
- be->major = 0;
- be->minor = 0;
- } else {
- /* We're potentially connected now */
- xen_update_blkif_status(be->blkif);
+ xenbus_dev_fatal(dev, err, "creating vbd structure");
+ goto fail;
}
+
+ err = xenvbd_sysfs_addif(dev);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
+ goto free_vbd;
+ }
+
+ /* We're potentially connected now */
+ xen_update_blkif_status(be->blkif);
+ return;
+
+free_vbd:
+ xen_vbd_free(&be->blkif->vbd);
+fail:
+ kfree(be->mode);
+ be->mode = NULL;
+ be->major = 0;
+ be->minor = 0;
+ be->diskseq = 0;
}

/*
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab