From 5e96ee65c8bd629ce093da67a066d3946468298a Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@notabene.brown>
Date: Sat, 28 Jun 2008 08:31:24 +1000
Subject: Allow setting start point for requested check/repair

This makes it possible to just resync a small part of an array.
e.g. if a drive reports that it has questionable sectors,
a 'repair' of just the region covering those sectors will
cause them to be read and, if there is an error, re-written
with correct data.

Signed-off-by: Neil Brown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 3dea9f545c8f..780e0613e6d5 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -227,6 +227,8 @@ struct mddev_s
 	atomic_t			recovery_active; /* blocks scheduled, but not written */
 	wait_queue_head_t		recovery_wait;
 	sector_t			recovery_cp;
+	sector_t			resync_min;	/* user requested sync
+							 * starts here */
 	sector_t			resync_max;	/* resync should pause
 							 * when it gets here */
 
-- 
cgit v1.2.3


From 72a23c211e4587859d5bf61ac4962d76e593fb02 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@notabene.brown>
Date: Sat, 28 Jun 2008 08:31:41 +1000
Subject: Make sure all changes to md/sync_action are notified.

When the 'resync' thread starts or stops, when we explicitly
set sync_action, or when we determine that there is definitely nothing
to do, we notify sync_action.

To stop "sync_action" from occasionally showing the wrong value,
we introduce a new flags - MD_RECOVERY_RECOVER - to say that a
recovery is probably needed or happening, and we make sure
that we set MD_RECOVERY_RUNNING before clearing MD_RECOVERY_NEEDED.

Signed-off-by: Neil Brown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 780e0613e6d5..62aa9c9a6ddc 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -188,6 +188,7 @@ struct mddev_s
 	 * NEEDED:   we might need to start a resync/recover
 	 * RUNNING:  a thread is running, or about to be started
 	 * SYNC:     actually doing a resync, not a recovery
+	 * RECOVER:  doing recovery, or need to try it.
 	 * INTR:     resync needs to be aborted for some reason
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
@@ -198,6 +199,7 @@ struct mddev_s
 	 */
 #define	MD_RECOVERY_RUNNING	0
 #define	MD_RECOVERY_SYNC	1
+#define	MD_RECOVERY_RECOVER	2
 #define	MD_RECOVERY_INTR	3
 #define	MD_RECOVERY_DONE	4
 #define	MD_RECOVERY_NEEDED	5
-- 
cgit v1.2.3


From 526647320e696f434647f38421a6ecf65b859c43 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@notabene.brown>
Date: Sat, 28 Jun 2008 08:31:44 +1000
Subject: Make sure all changes to md/dev-XX/state are notified

The important state change happens during an interrupt
in md_error.  So just set a flag there and call sysfs_notify
later in process context.

Signed-off-by: Neil Brown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 62aa9c9a6ddc..df30c4395875 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -87,6 +87,9 @@ struct mdk_rdev_s
 #define Blocked		8		/* An error occured on an externally
 					 * managed array, don't allow writes
 					 * until it is cleared */
+#define StateChanged	9		/* Faulty or Blocked has changed during
+					 * interrupt, so it needs to be
+					 * notified by the thread */
 	wait_queue_head_t blocked_wait;
 
 	int desc_nr;			/* descriptor index in the superblock */
-- 
cgit v1.2.3


From 0f420358e3a2abc028320ace7783e2e38cae77bf Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Fri, 11 Jul 2008 22:02:23 +1000
Subject: md: Turn rdev->sb_offset into a sector-based quantity.

Rename it to sb_start to make sure all users have been converted.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: Neil Brown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index df30c4395875..e37aaa41efc6 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -59,7 +59,7 @@ struct mdk_rdev_s
 	int		sb_loaded;
 	__u64		sb_events;
 	sector_t	data_offset;	/* start of data in array */
-	sector_t	sb_offset;
+	sector_t 	sb_start;	/* offset of the super block (in 512byte sectors) */
 	int		sb_size;	/* bytes in the superblock */
 	int		preferred_minor;	/* autorun support */
 
-- 
cgit v1.2.3


From f233ea5c9e0d8b95e4283bf6a3436b88f6fd3586 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Mon, 21 Jul 2008 17:05:22 +1000
Subject: md: Make mddev->array_size sector-based.

This patch renames the array_size field of struct mddev_s to array_sectors
and converts all instances to use units of 512 byte sectors instead of 1k
blocks.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index e37aaa41efc6..6f72b47ae41c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -150,7 +150,7 @@ struct mddev_s
 	int				raid_disks;
 	int				max_disks;
 	sector_t			size; /* used size of component devices */
-	sector_t			array_size; /* exported array size */
+	sector_t			array_sectors; /* exported array size */
 	__u64				events;
 
 	char				uuid[16];
-- 
cgit v1.2.3


From f2ea68cf42aafdd93393b6b8b20fc3c2b5f4390c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 21 Jul 2008 17:05:25 +1000
Subject: md: only count actual openers as access which prevent a 'stop'

Open isn't the only thing that increments ->active.  e.g. reading
/proc/mdstat will increment it briefly.  So to avoid false positives
in testing for concurrent access, introduce a new counter that counts
just the number of times the md device it open.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 6f72b47ae41c..4bef4791d80d 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -215,7 +215,8 @@ struct mddev_s
 
 	int				in_sync;	/* know to not need resync */
 	struct mutex			reconfig_mutex;
-	atomic_t			active;
+	atomic_t			active;		/* general refcount */
+	atomic_t			openers;	/* number of active opens */
 
 	int				changed;	/* true if we might need to reread partition info */
 	int				degraded;	/* whether md should consider
-- 
cgit v1.2.3


From 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 21 Jul 2008 17:05:25 +1000
Subject: md: Protect access to mddev->disks list using RCU

All modifications and most access to the mddev->disks list are made
under the reconfig_mutex lock.  However there are three places where
the list is walked without any locking.  If a reconfig happens at this
time, havoc (and oops) can ensue.

So use RCU to protect these accesses:
  - wrap them in rcu_read_{,un}lock()
  - use list_for_each_entry_rcu
  - add to the list with list_add_rcu
  - delete from the list with list_del_rcu
  - delay the 'free' with call_rcu rather than schedule_work

Note that export_rdev did a list_del_init on this list.  In almost all
cases the entry was not in the list anymore so it was a no-op and so
safe.  It is no longer safe as after list_del_rcu we may not touch
the list_head.
An audit shows that export_rdev is called:
  - after unbind_rdev_from_array, in which case the delete has
     already been done,
  - after bind_rdev_to_array fails, in which case the delete isn't needed.
  - before the device has been put on a list at all (e.g. in
      add_new_disk where reading the superblock fails).
  - and in autorun devices after a failure when the device is on a
      different list.

So remove the list_del_init call from export_rdev, and add it back
immediately before the called to export_rdev for that last case.

Note also that ->same_set is sometimes used for lists other than
mddev->list (e.g. candidates).  In these cases rcu is not needed.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 4bef4791d80d..9f2549ac0e2d 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -339,6 +339,9 @@ static inline char * mdname (mddev_t * mddev)
 #define rdev_for_each(rdev, tmp, mddev)				\
 	rdev_for_each_list(rdev, tmp, (mddev)->disks)
 
+#define rdev_for_each_rcu(rdev, mddev)				\
+	list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
+
 typedef struct mdk_thread_s {
 	void			(*run) (mddev_t *mddev);
 	mddev_t			*mddev;
-- 
cgit v1.2.3


From d8e64406a037a64444175730294e449c9e21f5ec Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 23 Jul 2008 13:09:48 -0700
Subject: md: delay notification of 'active_idle' to the recovery thread

sysfs_notify might sleep, so do not call it from md_safemode_timeout.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/raid/md_k.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/raid/md_k.h')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 9f2549ac0e2d..c200b9a34aff 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -128,6 +128,7 @@ struct mddev_s
 #define MD_CHANGE_DEVS	0	/* Some device status has changed */
 #define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
 #define MD_CHANGE_PENDING 2	/* superblock update in progress */
+#define MD_NOTIFY_ARRAY_STATE 3	/* atomic context wants to notify userspace */
 
 	int				ro;
 
-- 
cgit v1.2.3