[PATCH 003 of 7] md: Allow a maximum extent to be set for resyncing.

From: NeilBrown
Date: Fri Dec 14 2007 - 01:27:54 EST



This allows userspace to control resync/reshape progress and
synchronise it with other activities, such as shared access in a SAN,
or backing up critical sections during a tricky reshape.

Writing a number of sectors (which must be a multiple of the chunk
size if such is meaningful) causes a resync to pause when it
gets to that point.

Signed-off-by: Neil Brown <neilb@xxxxxxx>

### Diffstat output
./Documentation/md.txt | 10 +++++
./drivers/md/md.c | 75 ++++++++++++++++++++++++++++++++++++++------
./drivers/md/raid1.c | 2 +
./drivers/md/raid10.c | 3 +
./drivers/md/raid5.c | 25 ++++++++++++++
./include/linux/raid/md_k.h | 2 +
6 files changed, 107 insertions(+), 10 deletions(-)

diff .prev/Documentation/md.txt ./Documentation/md.txt
--- .prev/Documentation/md.txt 2007-12-14 16:07:50.000000000 +1100
+++ ./Documentation/md.txt 2007-12-14 16:08:57.000000000 +1100
@@ -416,6 +416,16 @@ also have
sectors in total that could need to be processed. The two
numbers are separated by a '/' thus effectively showing one
value, a fraction of the process that is complete.
+ A 'select' on this attribute will return when resync completes,
+ when it reaches the current sync_max (below) and possibly at
+ other times.
+
+ sync_max
+ This is a number of sectors at which point a resync/recovery
+ process will pause. When a resync is active, the value can
+ only ever be increased, never decreased. The value of 'max'
+ effectively disables the limit.
+

sync_speed
This shows the current actual speed, in K/sec, of the current

diff .prev/drivers/md/md.c ./drivers/md/md.c
--- .prev/drivers/md/md.c 2007-12-14 16:08:52.000000000 +1100
+++ ./drivers/md/md.c 2007-12-14 16:08:57.000000000 +1100
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector;
+ new->resync_max = MaxSector;

new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
@@ -2926,6 +2927,43 @@ sync_completed_show(mddev_t *mddev, char
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);

static ssize_t
+max_sync_show(mddev_t *mddev, char *page)
+{
+ if (mddev->resync_max == MaxSector)
+ return sprintf(page, "max\n");
+ else
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->resync_max);
+}
+static ssize_t
+max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ if (strncmp(buf, "max", 3) == 0)
+ mddev->resync_max = MaxSector;
+ else {
+ char *ep;
+ unsigned long long max = simple_strtoull(buf, &ep, 10);
+ if (ep == buf || (*ep != 0 && *ep != '\n'))
+ return -EINVAL;
+ if (max < mddev->resync_max &&
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return -EBUSY;
+
+ /* Must be a multiple of chunk_size */
+ if (mddev->chunk_size) {
+ if (max & (sector_t)((mddev->chunk_size>>9)-1))
+ return -EINVAL;
+ }
+ mddev->resync_max = max;
+ }
+ wake_up(&mddev->recovery_wait);
+ return len;
+}
+
+static struct md_sysfs_entry md_max_sync =
+__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
+
+static ssize_t
suspend_lo_show(mddev_t *mddev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
@@ -3035,6 +3073,7 @@ static struct attribute *md_redundancy_a
&md_sync_max.attr,
&md_sync_speed.attr,
&md_sync_completed.attr,
+ &md_max_sync.attr,
&md_suspend_lo.attr,
&md_suspend_hi.attr,
&md_bitmap.attr,
@@ -3582,6 +3621,7 @@ static int do_md_stop(mddev_t * mddev, i
mddev->size = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
mddev->external = 0;

@@ -5445,8 +5485,16 @@ void md_do_sync(mddev_t *mddev)
sector_t sectors;

skipped = 0;
+ if (j >= mddev->resync_max) {
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ wait_event(mddev->recovery_wait,
+ mddev->resync_max > j
+ || kthread_should_stop());
+ }
+ if (kthread_should_stop())
+ goto interrupted;
sectors = mddev->pers->sync_request(mddev, j, &skipped,
- currspeed < speed_min(mddev));
+ currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
goto out;
@@ -5488,15 +5536,9 @@ void md_do_sync(mddev_t *mddev)
}


- if (kthread_should_stop()) {
- /*
- * got a signal, exit.
- */
- printk(KERN_INFO
- "md: md_do_sync() got signal ... exiting\n");
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- goto out;
- }
+ if (kthread_should_stop())
+ goto interrupted;
+

/*
* this loop exits only if either when we are slower than
@@ -5560,9 +5602,22 @@ void md_do_sync(mddev_t *mddev)

skip:
mddev->curr_resync = 0;
+ mddev->resync_max = MaxSector;
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ return;
+
+ interrupted:
+ /*
+ * got a signal, exit.
+ */
+ printk(KERN_INFO
+ "md: md_do_sync() got signal ... exiting\n");
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ goto out;
+
}
EXPORT_SYMBOL_GPL(md_do_sync);


diff .prev/drivers/md/raid10.c ./drivers/md/raid10.c
--- .prev/drivers/md/raid10.c 2007-12-14 16:07:50.000000000 +1100
+++ ./drivers/md/raid10.c 2007-12-14 16:08:57.000000000 +1100
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *md
return (max_sector - sector_nr) + sectors_skipped;
}

+ if (max_sector > mddev->resync_max)
+ max_sector = mddev->resync_max; /* Don't do IO beyond here */
+
/* make sure whole request will fit in a chunk - if chunks
* are meaningful
*/

diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
--- .prev/drivers/md/raid1.c 2007-12-14 16:07:50.000000000 +1100
+++ ./drivers/md/raid1.c 2007-12-14 16:08:57.000000000 +1100
@@ -1784,6 +1784,8 @@ static sector_t sync_request(mddev_t *md
return rv;
}

+ if (max_sector > mddev->resync_max)
+ max_sector = mddev->resync_max; /* Don't do IO beyond here */
nr_sectors = 0;
sync_blocks = 0;
do {

diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c 2007-12-14 16:07:50.000000000 +1100
+++ ./drivers/md/raid5.c 2007-12-14 16:08:57.000000000 +1100
@@ -4277,6 +4277,25 @@ static sector_t reshape_request(mddev_t
release_queue(sq);
first_sector += STRIPE_SECTORS;
}
+ /* If this takes us to the resync_max point where we have to pause,
+ * then we need to write out the superblock.
+ */
+ sector_nr += conf->chunk_size>>9;
+ if (sector_nr >= mddev->resync_max) {
+ /* Cannot proceed until we've updated the superblock... */
+ wait_event(conf->wait_for_overlap,
+ atomic_read(&conf->reshape_stripes) == 0);
+ mddev->reshape_position = conf->expand_progress;
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ md_wakeup_thread(mddev->thread);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_DEVS, &mddev->flags)
+ || kthread_should_stop());
+ spin_lock_irq(&conf->device_lock);
+ conf->expand_lo = mddev->reshape_position;
+ spin_unlock_irq(&conf->device_lock);
+ wake_up(&conf->wait_for_overlap);
+ }
return conf->chunk_size>>9;
}

@@ -4314,6 +4333,12 @@ static inline sector_t sync_request(mdde
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return reshape_request(mddev, sector_nr, skipped);

+ /* No need to check resync_max as we never do more than one
+ * stripe, and as resync_max will always be on a chunk boundary,
+ * if the check in md_do_sync didn't fire, there is no chance
+ * of overstepping resync_max here
+ */
+
/* if there is too many failed drives and we are trying
* to resync, then assert that we are finished, because there is
* nothing we can do.

diff .prev/include/linux/raid/md_k.h ./include/linux/raid/md_k.h
--- .prev/include/linux/raid/md_k.h 2007-12-14 16:07:54.000000000 +1100
+++ ./include/linux/raid/md_k.h 2007-12-14 16:08:57.000000000 +1100
@@ -219,6 +219,8 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;
+ sector_t resync_max; /* resync should pause
+ * when it gets here */

spinlock_t write_lock;
wait_queue_head_t sb_wait; /* for waiting on superblock updates */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/