From 9092c02d943515b3c9ffd5d0003527f8cc1dd77b Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 May 2013 14:19:24 -0500 Subject: DM RAID: Add ability to restore transiently failed devices on resume DM RAID: Add ability to restore transiently failed devices on resume This patch adds code to the resume function to check over the devices in the RAID array. If any are found to be marked as failed and their superblocks can be read, an attempt is made to reintegrate them into the array. This allows the user to refresh the array with a simple suspend and resume of the array - rather than having to load a completely new table, allocate and initialize all the structures and throw away the old instantiation. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1d3fe1a40a9b..facaf9142d5a 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1574,12 +1574,54 @@ static void raid_postsuspend(struct dm_target *ti) static void raid_resume(struct dm_target *ti) { + int i; + uint64_t failed_devices, cleared_failed_devices = 0; + unsigned long flags; + struct dm_raid_superblock *sb; struct raid_set *rs = ti->private; + struct md_rdev *r; set_bit(MD_CHANGE_DEVS, &rs->md.flags); if (!rs->bitmap_loaded) { bitmap_load(&rs->md); rs->bitmap_loaded = 1; + } else { + /* + * A secondary resume while the device is active. + * Take this opportunity to check whether any failed + * devices are reachable again. + */ + for (i = 0; i < rs->md.raid_disks; i++) { + r = &rs->dev[i].rdev; + if (test_bit(Faulty, &r->flags) && r->sb_page && + sync_page_io(r, 0, r->sb_size, + r->sb_page, READ, 1)) { + DMINFO("Faulty device #%d has readable super" + "block. Attempting to revive it.", i); + r->raid_disk = i; + r->saved_raid_disk = i; + flags = r->flags; + clear_bit(Faulty, &r->flags); + clear_bit(WriteErrorSeen, &r->flags); + clear_bit(In_sync, &r->flags); + if (r->mddev->pers->hot_add_disk(r->mddev, r)) { + r->raid_disk = -1; + r->saved_raid_disk = -1; + r->flags = flags; + } else { + r->recovery_offset = 0; + cleared_failed_devices |= 1 << i; + } + } + } + if (cleared_failed_devices) { + rdev_for_each(r, &rs->md) { + sb = page_address(r->sb_page); + failed_devices = le64_to_cpu(sb->failed_devices); + failed_devices &= ~cleared_failed_devices; + sb->failed_devices = cpu_to_le64(failed_devices); + } + } } clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); @@ -1588,7 +1630,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3 From f381e71b042af910fbe5f8222792cc5092750993 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Wed, 8 May 2013 17:57:13 -0500 Subject: DM RAID: Break-up untidy function DM RAID: Break-up untidy function Clean-up excessive indentation by moving some code in raid_resume() into its own function. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 72 ++++++++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 33 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index facaf9142d5a..59d15ec0ba81 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1572,15 +1572,51 @@ static void raid_postsuspend(struct dm_target *ti) mddev_suspend(&rs->md); } -static void raid_resume(struct dm_target *ti) +static void attempt_restore_of_faulty_devices(struct raid_set *rs) { int i; uint64_t failed_devices, cleared_failed_devices = 0; unsigned long flags; struct dm_raid_superblock *sb; - struct raid_set *rs = ti->private; struct md_rdev *r; + for (i = 0; i < rs->md.raid_disks; i++) { + r = &rs->dev[i].rdev; + if (test_bit(Faulty, &r->flags) && r->sb_page && + sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) { + DMINFO("Faulty %s device #%d has readable super block." + " Attempting to revive it.", + rs->raid_type->name, i); + r->raid_disk = i; + r->saved_raid_disk = i; + flags = r->flags; + clear_bit(Faulty, &r->flags); + clear_bit(WriteErrorSeen, &r->flags); + clear_bit(In_sync, &r->flags); + if (r->mddev->pers->hot_add_disk(r->mddev, r)) { + r->raid_disk = -1; + r->saved_raid_disk = -1; + r->flags = flags; + } else { + r->recovery_offset = 0; + cleared_failed_devices |= 1 << i; + } + } + } + if (cleared_failed_devices) { + rdev_for_each(r, &rs->md) { + sb = page_address(r->sb_page); + failed_devices = le64_to_cpu(sb->failed_devices); + failed_devices &= ~cleared_failed_devices; + sb->failed_devices = cpu_to_le64(failed_devices); + } + } +} + +static void raid_resume(struct dm_target *ti) +{ + struct raid_set *rs = ti->private; + set_bit(MD_CHANGE_DEVS, &rs->md.flags); if (!rs->bitmap_loaded) { bitmap_load(&rs->md); @@ -1591,37 +1627,7 @@ static void raid_resume(struct dm_target *ti) * Take this opportunity to check whether any failed * devices are reachable again. */ - for (i = 0; i < rs->md.raid_disks; i++) { - r = &rs->dev[i].rdev; - if (test_bit(Faulty, &r->flags) && r->sb_page && - sync_page_io(r, 0, r->sb_size, - r->sb_page, READ, 1)) { - DMINFO("Faulty device #%d has readable super" - "block. Attempting to revive it.", i); - r->raid_disk = i; - r->saved_raid_disk = i; - flags = r->flags; - clear_bit(Faulty, &r->flags); - clear_bit(WriteErrorSeen, &r->flags); - clear_bit(In_sync, &r->flags); - if (r->mddev->pers->hot_add_disk(r->mddev, r)) { - r->raid_disk = -1; - r->saved_raid_disk = -1; - r->flags = flags; - } else { - r->recovery_offset = 0; - cleared_failed_devices |= 1 << i; - } - } - } - if (cleared_failed_devices) { - rdev_for_each(r, &rs->md) { - sb = page_address(r->sb_page); - failed_devices = le64_to_cpu(sb->failed_devices); - failed_devices &= ~cleared_failed_devices; - sb->failed_devices = cpu_to_le64(failed_devices); - } - } + attempt_restore_of_faulty_devices(rs); } clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); -- cgit v1.2.3 From a4dc163a55964d683f92742705c90c78c0f56c0c Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Wed, 8 May 2013 18:00:54 -0500 Subject: DM RAID: Fix raid_resume not reviving failed devices in all cases DM RAID: Fix raid_resume not reviving failed devices in all cases When a device fails in a RAID array, it is marked as Faulty. Later, md_check_recovery is called which (through the call chain) calls 'hot_remove_disk' in order to have the personalities remove the device from use in the array. Sometimes, it is possible for the array to be suspended before the personalities get their chance to perform 'hot_remove_disk'. This is normally not an issue. If the array is deactivated, then the failed device will be noticed when the array is reinstantiated. If the array is resumed and the disk is still missing, md_check_recovery will be called upon resume and 'hot_remove_disk' will be called at that time. However, (for dm-raid) if the device has been restored, a resume on the array would cause it to attempt to revive the device by calling 'hot_add_disk'. If 'hot_remove_disk' had not been called, a situation is then created where the device is thought to concurrently be the replacement and the device to be replaced. Thus, the device is first sync'ed with the rest of the array (because it is the replacement device) and then marked Faulty and removed from the array (because it is also the device being replaced). The solution is to check and see if the device had properly been removed before the array was suspended. This is done by seeing whether the device's 'raid_disk' field is -1 - a condition that implies that 'md_check_recovery -> remove_and_add_spares (where raid_disk is set to -1) -> hot_remove_disk' has been called. If 'raid_disk' is not -1, then 'hot_remove_disk' must be called to complete the removal of the previously faulty device before it can be revived via 'hot_add_disk'. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 59d15ec0ba81..49f0bd510fb9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1587,6 +1587,21 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) DMINFO("Faulty %s device #%d has readable super block." " Attempting to revive it.", rs->raid_type->name, i); + + /* + * Faulty bit may be set, but sometimes the array can + * be suspended before the personalities can respond + * by removing the device from the array (i.e. calling + * 'hot_remove_disk'). If they haven't yet removed + * the failed device, its 'raid_disk' number will be + * '>= 0' - meaning we must call this function + * ourselves. + */ + if ((r->raid_disk >= 0) && + (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) + /* Failed to revive this device, try next */ + continue; + r->raid_disk = i; r->saved_raid_disk = i; flags = r->flags; -- cgit v1.2.3 From 3f6bbd3ffd7b733dd705e494663e5761aa2cb9c1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 9 May 2013 10:27:49 +1000 Subject: dm-raid: silence compiler warning on rebuilds_per_group. This doesn't really need to be initialised, but it doesn't hurt, silences the compiler, and as it is a counter it makes sense for it to start at zero. Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 49f0bd510fb9..0f5a6fb5978f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned i, rebuild_cnt = 0; - unsigned rebuilds_per_group, copies, d; + unsigned rebuilds_per_group = 0, copies, d; unsigned group_size, last_group_start; for (i = 0; i < rs->md.raid_disks; i++) -- cgit v1.2.3 From b29bebd66dbd492105668ec3515a5ffb0b25e4c1 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Sat, 1 Jun 2013 16:15:16 +0900 Subject: md: replace strict_strto*() with kstrto*() The usage of strict_strtoul() is not preferred, because strict_strtoul() is obsolete. Thus, kstrtoul() should be used. Signed-off-by: Jingoo Han Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 0f5a6fb5978f..21e8e4660c59 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, * First, parse the in-order required arguments * "chunk_size" is the only argument of this type. */ - if ((strict_strtoul(argv[0], 10, &value) < 0)) { + if ((kstrtoul(argv[0], 10, &value) < 0)) { rs->ti->error = "Bad chunk size"; return -EINVAL; } else if (rs->raid_type->level == 1) { @@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, continue; } - if (strict_strtoul(argv[i], 10, &value) < 0) { + if (kstrtoul(argv[i], 10, &value) < 0) { rs->ti->error = "Bad numerical argument given in raid params"; return -EINVAL; } @@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) argv++; /* number of RAID parameters */ - if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { + if (kstrtoul(argv[0], 10, &num_raid_params) < 0) { ti->error = "Cannot understand number of RAID parameters"; return -EINVAL; } @@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } - if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || + if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || (num_raid_devs >= INT_MAX)) { ti->error = "Cannot understand number of raid devices"; return -EINVAL; -- cgit v1.2.3 From c4a39551451666229b4ea5e8aae8ca0131d00665 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Tue, 25 Jun 2013 01:23:59 -0500 Subject: MD: Remember the last sync operation that was performed MD: Remember the last sync operation that was performed This patch adds a field to the mddev structure to track the last sync operation that was performed. This is especially useful when it comes to what is recorded in mismatch_cnt in sysfs. If the last operation was "data-check", then it reports the number of descrepancies found by the user-initiated check. If it was a "repair" operation, then it is reporting the number of descrepancies repaired. etc. Signed-off-by: Jonathan Brassow Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 21e8e4660c59..4880b69e2e9e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, * performing a "check" of the array. */ DMEMIT(" %llu", + (strcmp(rs->md.last_sync_action, "check")) ? 0 : (unsigned long long) atomic64_read(&rs->md.resync_mismatches)); break; @@ -1651,7 +1652,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 5, 1}, + .version = {1, 5, 2}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3