From 5e96ee65c8bd629ce093da67a066d3946468298a Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:24 +1000 Subject: Allow setting start point for requested check/repair This makes it possible to just resync a small part of an array. e.g. if a drive reports that it has questionable sectors, a 'repair' of just the region covering those sectors will cause them to be read and, if there is an error, re-written with correct data. Signed-off-by: Neil Brown --- include/linux/raid/md_k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 3dea9f545c8f..780e0613e6d5 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -227,6 +227,8 @@ struct mddev_s atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; sector_t recovery_cp; + sector_t resync_min; /* user requested sync + * starts here */ sector_t resync_max; /* resync should pause * when it gets here */ -- cgit v1.2.3 From 72a23c211e4587859d5bf61ac4962d76e593fb02 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:41 +1000 Subject: Make sure all changes to md/sync_action are notified. When the 'resync' thread starts or stops, when we explicitly set sync_action, or when we determine that there is definitely nothing to do, we notify sync_action. To stop "sync_action" from occasionally showing the wrong value, we introduce a new flags - MD_RECOVERY_RECOVER - to say that a recovery is probably needed or happening, and we make sure that we set MD_RECOVERY_RUNNING before clearing MD_RECOVERY_NEEDED. Signed-off-by: Neil Brown --- include/linux/raid/md_k.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 780e0613e6d5..62aa9c9a6ddc 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -188,6 +188,7 @@ struct mddev_s * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started * SYNC: actually doing a resync, not a recovery + * RECOVER: doing recovery, or need to try it. * INTR: resync needs to be aborted for some reason * DONE: thread is done and is waiting to be reaped * REQUEST: user-space has requested a sync (used with SYNC) @@ -198,6 +199,7 @@ struct mddev_s */ #define MD_RECOVERY_RUNNING 0 #define MD_RECOVERY_SYNC 1 +#define MD_RECOVERY_RECOVER 2 #define MD_RECOVERY_INTR 3 #define MD_RECOVERY_DONE 4 #define MD_RECOVERY_NEEDED 5 -- cgit v1.2.3 From 526647320e696f434647f38421a6ecf65b859c43 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 28 Jun 2008 08:31:44 +1000 Subject: Make sure all changes to md/dev-XX/state are notified The important state change happens during an interrupt in md_error. So just set a flag there and call sysfs_notify later in process context. Signed-off-by: Neil Brown --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 62aa9c9a6ddc..df30c4395875 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -87,6 +87,9 @@ struct mdk_rdev_s #define Blocked 8 /* An error occured on an externally * managed array, don't allow writes * until it is cleared */ +#define StateChanged 9 /* Faulty or Blocked has changed during + * interrupt, so it needs to be + * notified by the thread */ wait_queue_head_t blocked_wait; int desc_nr; /* descriptor index in the superblock */ -- cgit v1.2.3 From 0f420358e3a2abc028320ace7783e2e38cae77bf Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Fri, 11 Jul 2008 22:02:23 +1000 Subject: md: Turn rdev->sb_offset into a sector-based quantity. Rename it to sb_start to make sure all users have been converted. Signed-off-by: Andre Noll Signed-off-by: Neil Brown --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index df30c4395875..e37aaa41efc6 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -59,7 +59,7 @@ struct mdk_rdev_s int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ - sector_t sb_offset; + sector_t sb_start; /* offset of the super block (in 512byte sectors) */ int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ -- cgit v1.2.3 From f233ea5c9e0d8b95e4283bf6a3436b88f6fd3586 Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Mon, 21 Jul 2008 17:05:22 +1000 Subject: md: Make mddev->array_size sector-based. This patch renames the array_size field of struct mddev_s to array_sectors and converts all instances to use units of 512 byte sectors instead of 1k blocks. Signed-off-by: Andre Noll Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e37aaa41efc6..6f72b47ae41c 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -150,7 +150,7 @@ struct mddev_s int raid_disks; int max_disks; sector_t size; /* used size of component devices */ - sector_t array_size; /* exported array size */ + sector_t array_sectors; /* exported array size */ __u64 events; char uuid[16]; -- cgit v1.2.3 From f2ea68cf42aafdd93393b6b8b20fc3c2b5f4390c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 Jul 2008 17:05:25 +1000 Subject: md: only count actual openers as access which prevent a 'stop' Open isn't the only thing that increments ->active. e.g. reading /proc/mdstat will increment it briefly. So to avoid false positives in testing for concurrent access, introduce a new counter that counts just the number of times the md device it open. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 6f72b47ae41c..4bef4791d80d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -215,7 +215,8 @@ struct mddev_s int in_sync; /* know to not need resync */ struct mutex reconfig_mutex; - atomic_t active; + atomic_t active; /* general refcount */ + atomic_t openers; /* number of active opens */ int changed; /* true if we might need to reread partition info */ int degraded; /* whether md should consider -- cgit v1.2.3 From 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 Jul 2008 17:05:25 +1000 Subject: md: Protect access to mddev->disks list using RCU All modifications and most access to the mddev->disks list are made under the reconfig_mutex lock. However there are three places where the list is walked without any locking. If a reconfig happens at this time, havoc (and oops) can ensue. So use RCU to protect these accesses: - wrap them in rcu_read_{,un}lock() - use list_for_each_entry_rcu - add to the list with list_add_rcu - delete from the list with list_del_rcu - delay the 'free' with call_rcu rather than schedule_work Note that export_rdev did a list_del_init on this list. In almost all cases the entry was not in the list anymore so it was a no-op and so safe. It is no longer safe as after list_del_rcu we may not touch the list_head. An audit shows that export_rdev is called: - after unbind_rdev_from_array, in which case the delete has already been done, - after bind_rdev_to_array fails, in which case the delete isn't needed. - before the device has been put on a list at all (e.g. in add_new_disk where reading the superblock fails). - and in autorun devices after a failure when the device is on a different list. So remove the list_del_init call from export_rdev, and add it back immediately before the called to export_rdev for that last case. Note also that ->same_set is sometimes used for lists other than mddev->list (e.g. candidates). In these cases rcu is not needed. Signed-off-by: NeilBrown --- include/linux/raid/md_k.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 4bef4791d80d..9f2549ac0e2d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -339,6 +339,9 @@ static inline char * mdname (mddev_t * mddev) #define rdev_for_each(rdev, tmp, mddev) \ rdev_for_each_list(rdev, tmp, (mddev)->disks) +#define rdev_for_each_rcu(rdev, mddev) \ + list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set) + typedef struct mdk_thread_s { void (*run) (mddev_t *mddev); mddev_t *mddev; -- cgit v1.2.3 From d8e64406a037a64444175730294e449c9e21f5ec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 23 Jul 2008 13:09:48 -0700 Subject: md: delay notification of 'active_idle' to the recovery thread sysfs_notify might sleep, so do not call it from md_safemode_timeout. Signed-off-by: Dan Williams --- include/linux/raid/md_k.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/raid/md_k.h') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 9f2549ac0e2d..c200b9a34aff 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -128,6 +128,7 @@ struct mddev_s #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ #define MD_CHANGE_PENDING 2 /* superblock update in progress */ +#define MD_NOTIFY_ARRAY_STATE 3 /* atomic context wants to notify userspace */ int ro; -- cgit v1.2.3