15 years ago · 9341625307
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2087,6 +2087,7 @@ static void sync_sbs(mddev_t * mddev, int nospares)
 
				 	/* First make sure individual recovery_offsets are correct */
			
 
				 	list_for_each_entry(rdev, &mddev->disks, same_set) {
			
 
				 		if (rdev->raid_disk >= 0 &&
			
 
				+		    mddev->delta_disks >= 0 &&
			
 
				 		    !test_bit(In_sync, &rdev->flags) &&
			
 
				 		    mddev->curr_resync_completed > rdev->recovery_offset)
			
 
				 				rdev->recovery_offset = mddev->curr_resync_completed;
			
@@ -3001,6 +3002,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	list_for_each_entry(rdev, &mddev->disks, same_set)
			
 
				+		rdev->new_raid_disk = rdev->raid_disk;
			
 
				+
			
 
				 	/* ->takeover must set new_* and/or delta_disks
			
 
				 	 * if it succeeds, and may set them when it fails.
			
 
				 	 */
			
@@ -3051,13 +3055,35 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 
				 		mddev->safemode = 0;
			
 
				 	}
			
 
				 
			
 
				-	module_put(mddev->pers->owner);
			
 
				-	/* Invalidate devices that are now superfluous */
			
 
				-	list_for_each_entry(rdev, &mddev->disks, same_set)
			
 
				-		if (rdev->raid_disk >= mddev->raid_disks) {
			
 
				-			rdev->raid_disk = -1;
			
 
				+	list_for_each_entry(rdev, &mddev->disks, same_set) {
			
 
				+		char nm[20];
			
 
				+		if (rdev->raid_disk < 0)
			
 
				+			continue;
			
 
				+		if (rdev->new_raid_disk > mddev->raid_disks)
			
 
				+			rdev->new_raid_disk = -1;
			
 
				+		if (rdev->new_raid_disk == rdev->raid_disk)
			
 
				+			continue;
			
 
				+		sprintf(nm, "rd%d", rdev->raid_disk);
			
 
				+		sysfs_remove_link(&mddev->kobj, nm);
			
 
				+	}
			
 
				+	list_for_each_entry(rdev, &mddev->disks, same_set) {
			
 
				+		if (rdev->raid_disk < 0)
			
 
				+			continue;
			
 
				+		if (rdev->new_raid_disk == rdev->raid_disk)
			
 
				+			continue;
			
 
				+		rdev->raid_disk = rdev->new_raid_disk;
			
 
				+		if (rdev->raid_disk < 0)
			
 
				 			clear_bit(In_sync, &rdev->flags);
			
 
				+		else {
			
 
				+			char nm[20];
			
 
				+			sprintf(nm, "rd%d", rdev->raid_disk);
			
 
				+			if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
			
 
				+				printk("md: cannot register %s for %s after level change\n",
			
 
				+				       nm, mdname(mddev));
			
 
				 		}
			
 
				+	}
			
 
				+
			
 
				+	module_put(mddev->pers->owner);
			
 
				 	mddev->pers = pers;
			
 
				 	mddev->private = priv;
			
 
				 	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
			
@@ -5895,6 +5921,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 
				 	atomic_inc(&mddev->openers);
			
 
				 	mutex_unlock(&mddev->open_mutex);
			
 
				 
			
 
				+	check_disk_size_change(mddev->gendisk, bdev);
			
 
				  out:
			
 
				 	return err;
			
 
				 }
			
@@ -6846,6 +6873,7 @@ void md_do_sync(mddev_t *mddev)
 
				 			rcu_read_lock();
			
 
				 			list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
			
 
				 				if (rdev->raid_disk >= 0 &&
			
 
				+				    mddev->delta_disks >= 0 &&
			
 
				 				    !test_bit(Faulty, &rdev->flags) &&
			
 
				 				    !test_bit(In_sync, &rdev->flags) &&
			
 
				 				    rdev->recovery_offset < mddev->curr_resync)
			
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -78,6 +78,9 @@ struct mdk_rdev_s
 
				 
			
 
				 	int desc_nr;			/* descriptor index in the superblock */
			
 
				 	int raid_disk;			/* role of device in array */
			
 
				+	int new_raid_disk;		/* role that the device will have in
			
 
				+					 * the array after a level-change completes.
			
 
				+					 */
			
 
				 	int saved_raid_disk;		/* role that device used to have in the
			
 
				 					 * array and could again if we did a partial
			
 
				 					 * resync from the bitmap
			
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -173,9 +173,11 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
 
				 	list_for_each_entry(rdev1, &mddev->disks, same_set) {
			
 
				 		int j = rdev1->raid_disk;
			
 
				 
			
 
				-		if (mddev->level == 10)
			
 
				+		if (mddev->level == 10) {
			
 
				 			/* taking over a raid10-n2 array */
			
 
				 			j /= 2;
			
 
				+			rdev1->new_raid_disk = j;
			
 
				+		}
			
 
				 
			
 
				 		if (j < 0 || j >= mddev->raid_disks) {
			
 
				 			printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
			
@@ -361,12 +363,6 @@ static int raid0_run(mddev_t *mddev)
 
				 		mddev->private = conf;
			
 
				 	}
			
 
				 	conf = mddev->private;
			
 
				-	if (conf->scale_raid_disks) {
			
 
				-		int i;
			
 
				-		for (i=0; i < conf->strip_zone[0].nb_dev; i++)
			
 
				-			conf->devlist[i]->raid_disk /= conf->scale_raid_disks;
			
 
				-		/* FIXME update sysfs rd links */
			
 
				-	}
			
 
				 
			
 
				 	/* calculate array device size */
			
 
				 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
			
@@ -573,7 +569,7 @@ static void raid0_status(struct seq_file *seq, mddev_t *mddev)
 
				 	return;
			
 
				 }
			
 
				 
			
 
				-static void *raid0_takeover_raid5(mddev_t *mddev)
			
 
				+static void *raid0_takeover_raid45(mddev_t *mddev)
			
 
				 {
			
 
				 	mdk_rdev_t *rdev;
			
 
				 	raid0_conf_t *priv_conf;
			
@@ -596,6 +592,7 @@ static void *raid0_takeover_raid5(mddev_t *mddev)
 
				 
			
 
				 	/* Set new parameters */
			
 
				 	mddev->new_level = 0;
			
 
				+	mddev->new_layout = 0;
			
 
				 	mddev->new_chunk_sectors = mddev->chunk_sectors;
			
 
				 	mddev->raid_disks--;
			
 
				 	mddev->delta_disks = -1;
			
@@ -635,6 +632,7 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
 
				 
			
 
				 	/* Set new parameters */
			
 
				 	mddev->new_level = 0;
			
 
				+	mddev->new_layout = 0;
			
 
				 	mddev->new_chunk_sectors = mddev->chunk_sectors;
			
 
				 	mddev->delta_disks = - mddev->raid_disks / 2;
			
 
				 	mddev->raid_disks += mddev->delta_disks;
			
@@ -643,19 +641,22 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
 
				 	mddev->recovery_cp = MaxSector;
			
 
				 
			
 
				 	create_strip_zones(mddev, &priv_conf);
			
 
				-	priv_conf->scale_raid_disks = 2;
			
 
				 	return priv_conf;
			
 
				 }
			
 
				 
			
 
				 static void *raid0_takeover(mddev_t *mddev)
			
 
				 {
			
 
				 	/* raid0 can take over:
			
 
				+	 *  raid4 - if all data disks are active.
			
 
				 	 *  raid5 - providing it is Raid4 layout and one disk is faulty
			
 
				 	 *  raid10 - assuming we have all necessary active disks
			
 
				 	 */
			
 
				+	if (mddev->level == 4)
			
 
				+		return raid0_takeover_raid45(mddev);
			
 
				+
			
 
				 	if (mddev->level == 5) {
			
 
				 		if (mddev->layout == ALGORITHM_PARITY_N)
			
 
				-			return raid0_takeover_raid5(mddev);
			
 
				+			return raid0_takeover_raid45(mddev);
			
 
				 
			
 
				 		printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
			
 
				 		       mdname(mddev), ALGORITHM_PARITY_N);
			
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -13,9 +13,6 @@ struct raid0_private_data
 
				 	struct strip_zone *strip_zone;
			
 
				 	mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
			
 
				 	int nr_strip_zones;
			
 
				-	int scale_raid_disks; /* divide rdev->raid_disks by this in run()
			
 
				-			       * to handle conversion from raid10
			
 
				-			       */
			
 
				 };
			
 
				 
			
 
				 typedef struct raid0_private_data raid0_conf_t;
			
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1482,14 +1482,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
				 	int sectors = r10_bio->sectors;
			
 
				 	mdk_rdev_t*rdev;
			
 
				 	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
			
 
				+	int d = r10_bio->devs[r10_bio->read_slot].devnum;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	{
			
 
				-		int d = r10_bio->devs[r10_bio->read_slot].devnum;
			
 
				+	rdev = rcu_dereference(conf->mirrors[d].rdev);
			
 
				+	if (rdev) { /* If rdev is not NULL */
			
 
				 		char b[BDEVNAME_SIZE];
			
 
				 		int cur_read_error_count = 0;
			
 
				 
			
 
				-		rdev = rcu_dereference(conf->mirrors[d].rdev);
			
 
				 		bdevname(rdev->bdev, b);
			
 
				 
			
 
				 		if (test_bit(Faulty, &rdev->flags)) {
			
@@ -1530,7 +1530,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
				 
			
 
				 		rcu_read_lock();
			
 
				 		do {
			
 
				-			int d = r10_bio->devs[sl].devnum;
			
 
				+			d = r10_bio->devs[sl].devnum;
			
 
				 			rdev = rcu_dereference(conf->mirrors[d].rdev);
			
 
				 			if (rdev &&
			
 
				 			    test_bit(In_sync, &rdev->flags)) {
			
@@ -1564,7 +1564,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
				 		rcu_read_lock();
			
 
				 		while (sl != r10_bio->read_slot) {
			
 
				 			char b[BDEVNAME_SIZE];
			
 
				-			int d;
			
 
				+
			
 
				 			if (sl==0)
			
 
				 				sl = conf->copies;
			
 
				 			sl--;
			
@@ -1601,7 +1601,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
 
				 		}
			
 
				 		sl = start;
			
 
				 		while (sl != r10_bio->read_slot) {
			
 
				-			int d;
			
 
				+
			
 
				 			if (sl==0)
			
 
				 				sl = conf->copies;
			
 
				 			sl--;
			
@@ -2161,22 +2161,22 @@ static conf_t *setup_conf(mddev_t *mddev)
 
				 	sector_t stride, size;
			
 
				 	int err = -EINVAL;
			
 
				 
			
 
				-	if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
			
 
				-	    !is_power_of_2(mddev->chunk_sectors)) {
			
 
				+	if (mddev->new_chunk_sectors < (PAGE_SIZE >> 9) ||
			
 
				+	    !is_power_of_2(mddev->new_chunk_sectors)) {
			
 
				 		printk(KERN_ERR "md/raid10:%s: chunk size must be "
			
 
				 		       "at least PAGE_SIZE(%ld) and be a power of 2.\n",
			
 
				 		       mdname(mddev), PAGE_SIZE);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	nc = mddev->layout & 255;
			
 
				-	fc = (mddev->layout >> 8) & 255;
			
 
				-	fo = mddev->layout & (1<<16);
			
 
				+	nc = mddev->new_layout & 255;
			
 
				+	fc = (mddev->new_layout >> 8) & 255;
			
 
				+	fo = mddev->new_layout & (1<<16);
			
 
				 
			
 
				 	if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
			
 
				-	    (mddev->layout >> 17)) {
			
 
				+	    (mddev->new_layout >> 17)) {
			
 
				 		printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
			
 
				-		       mdname(mddev), mddev->layout);
			
 
				+		       mdname(mddev), mddev->new_layout);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
@@ -2241,7 +2241,6 @@ static conf_t *setup_conf(mddev_t *mddev)
 
				 	if (!conf->thread)
			
 
				 		goto out;
			
 
				 
			
 
				-	conf->scale_disks = 0;
			
 
				 	conf->mddev = mddev;
			
 
				 	return conf;
			
 
				 
			
@@ -2300,11 +2299,6 @@ static int run(mddev_t *mddev)
 
				 		if (disk_idx >= conf->raid_disks
			
 
				 		    || disk_idx < 0)
			
 
				 			continue;
			
 
				-		if (conf->scale_disks) {
			
 
				-			disk_idx *= conf->scale_disks;
			
 
				-			rdev->raid_disk = disk_idx;
			
 
				-			/* MOVE 'rd%d' link !! */
			
 
				-		}
			
 
				 		disk = conf->mirrors + disk_idx;
			
 
				 
			
 
				 		disk->rdev = rdev;
			
@@ -2435,26 +2429,22 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
 
				 		return ERR_PTR(-EINVAL);
			
 
				 	}
			
 
				 
			
 
				-	/* Update slot numbers to obtain
			
 
				-	 * degraded raid10 with missing mirrors
			
 
				-	 */
			
 
				-	list_for_each_entry(rdev, &mddev->disks, same_set) {
			
 
				-		rdev->raid_disk *= 2;
			
 
				-	}
			
 
				-
			
 
				 	/* Set new parameters */
			
 
				 	mddev->new_level = 10;
			
 
				 	/* new layout: far_copies = 1, near_copies = 2 */
			
 
				 	mddev->new_layout = (1<<8) + 2;
			
 
				 	mddev->new_chunk_sectors = mddev->chunk_sectors;
			
 
				 	mddev->delta_disks = mddev->raid_disks;
			
 
				-	mddev->degraded = mddev->raid_disks;
			
 
				 	mddev->raid_disks *= 2;
			
 
				 	/* make sure it will be not marked as dirty */
			
 
				 	mddev->recovery_cp = MaxSector;
			
 
				 
			
 
				 	conf = setup_conf(mddev);
			
 
				-	conf->scale_disks = 2;
			
 
				+	if (!IS_ERR(conf))
			
 
				+		list_for_each_entry(rdev, &mddev->disks, same_set)
			
 
				+			if (rdev->raid_disk >= 0)
			
 
				+				rdev->new_raid_disk = rdev->raid_disk * 2;
			
 
				+		
			
 
				 	return conf;
			
 
				 }
			
 
				 
			
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -38,11 +38,6 @@ struct r10_private_data_s {
 
				 	int chunk_shift; /* shift from chunks to sectors */
			
 
				 	sector_t chunk_mask;
			
 
				 
			
 
				-	int			scale_disks;  /* When starting array, multiply
			
 
				-					       * each ->raid_disk by this.
			
 
				-					       * Need for raid0->raid10 migration
			
 
				-					       */
			
 
				-
			
 
				 	struct list_head	retry_list;
			
 
				 	/* queue pending writes and submit them on unplug */
			
 
				 	struct bio_list		pending_bio_list;
			
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -277,12 +277,13 @@ out:
 
				 	return sh;
			
 
				 }
			
 
				 
			
 
				-static void shrink_buffers(struct stripe_head *sh, int num)
			
 
				+static void shrink_buffers(struct stripe_head *sh)
			
 
				 {
			
 
				 	struct page *p;
			
 
				 	int i;
			
 
				+	int num = sh->raid_conf->pool_size;
			
 
				 
			
 
				-	for (i=0; i<num ; i++) {
			
 
				+	for (i = 0; i < num ; i++) {
			
 
				 		p = sh->dev[i].page;
			
 
				 		if (!p)
			
 
				 			continue;
			
@@ -291,11 +292,12 @@ static void shrink_buffers(struct stripe_head *sh, int num)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int grow_buffers(struct stripe_head *sh, int num)
			
 
				+static int grow_buffers(struct stripe_head *sh)
			
 
				 {
			
 
				 	int i;
			
 
				+	int num = sh->raid_conf->pool_size;
			
 
				 
			
 
				-	for (i=0; i<num; i++) {
			
 
				+	for (i = 0; i < num; i++) {
			
 
				 		struct page *page;
			
 
				 
			
 
				 		if (!(page = alloc_page(GFP_KERNEL))) {
			
@@ -364,6 +366,73 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector,
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Need to check if array has failed when deciding whether to:
			
 
				+ *  - start an array
			
 
				+ *  - remove non-faulty devices
			
 
				+ *  - add a spare
			
 
				+ *  - allow a reshape
			
 
				+ * This determination is simple when no reshape is happening.
			
 
				+ * However if there is a reshape, we need to carefully check
			
 
				+ * both the before and after sections.
			
 
				+ * This is because some failed devices may only affect one
			
 
				+ * of the two sections, and some non-in_sync devices may
			
 
				+ * be insync in the section most affected by failed devices.
			
 
				+ */
			
 
				+static int has_failed(raid5_conf_t *conf)
			
 
				+{
			
 
				+	int degraded;
			
 
				+	int i;
			
 
				+	if (conf->mddev->reshape_position == MaxSector)
			
 
				+		return conf->mddev->degraded > conf->max_degraded;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	degraded = 0;
			
 
				+	for (i = 0; i < conf->previous_raid_disks; i++) {
			
 
				+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
			
 
				+		if (!rdev || test_bit(Faulty, &rdev->flags))
			
 
				+			degraded++;
			
 
				+		else if (test_bit(In_sync, &rdev->flags))
			
 
				+			;
			
 
				+		else
			
 
				+			/* not in-sync or faulty.
			
 
				+			 * If the reshape increases the number of devices,
			
 
				+			 * this is being recovered by the reshape, so
			
 
				+			 * this 'previous' section is not in_sync.
			
 
				+			 * If the number of devices is being reduced however,
			
 
				+			 * the device can only be part of the array if
			
 
				+			 * we are reverting a reshape, so this section will
			
 
				+			 * be in-sync.
			
 
				+			 */
			
 
				+			if (conf->raid_disks >= conf->previous_raid_disks)
			
 
				+				degraded++;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	if (degraded > conf->max_degraded)
			
 
				+		return 1;
			
 
				+	rcu_read_lock();
			
 
				+	degraded = 0;
			
 
				+	for (i = 0; i < conf->raid_disks; i++) {
			
 
				+		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
			
 
				+		if (!rdev || test_bit(Faulty, &rdev->flags))
			
 
				+			degraded++;
			
 
				+		else if (test_bit(In_sync, &rdev->flags))
			
 
				+			;
			
 
				+		else
			
 
				+			/* not in-sync or faulty.
			
 
				+			 * If reshape increases the number of devices, this
			
 
				+			 * section has already been recovered, else it
			
 
				+			 * almost certainly hasn't.
			
 
				+			 */
			
 
				+			if (conf->raid_disks <= conf->previous_raid_disks)
			
 
				+				degraded++;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+	if (degraded > conf->max_degraded)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static void unplug_slaves(mddev_t *mddev);
			
 
				 static void raid5_unplug_device(struct request_queue *q);
			
 
				 
			
@@ -1240,19 +1309,18 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 
				 static int grow_one_stripe(raid5_conf_t *conf)
			
 
				 {
			
 
				 	struct stripe_head *sh;
			
 
				-	int disks = max(conf->raid_disks, conf->previous_raid_disks);
			
 
				 	sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
			
 
				 	if (!sh)
			
 
				 		return 0;
			
 
				-	memset(sh, 0, sizeof(*sh) + (disks-1)*sizeof(struct r5dev));
			
 
				+	memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev));
			
 
				 	sh->raid_conf = conf;
			
 
				 	spin_lock_init(&sh->lock);
			
 
				 	#ifdef CONFIG_MULTICORE_RAID456
			
 
				 	init_waitqueue_head(&sh->ops.wait_for_ops);
			
 
				 	#endif
			
 
				 
			
 
				-	if (grow_buffers(sh, disks)) {
			
 
				-		shrink_buffers(sh, disks);
			
 
				+	if (grow_buffers(sh)) {
			
 
				+		shrink_buffers(sh);
			
 
				 		kmem_cache_free(conf->slab_cache, sh);
			
 
				 		return 0;
			
 
				 	}
			
@@ -1468,7 +1536,7 @@ static int drop_one_stripe(raid5_conf_t *conf)
 
				 	if (!sh)
			
 
				 		return 0;
			
 
				 	BUG_ON(atomic_read(&sh->count));
			
 
				-	shrink_buffers(sh, conf->pool_size);
			
 
				+	shrink_buffers(sh);
			
 
				 	kmem_cache_free(conf->slab_cache, sh);
			
 
				 	atomic_dec(&conf->active_stripes);
			
 
				 	return 1;
			
@@ -2963,7 +3031,6 @@ static void handle_stripe5(struct stripe_head *sh)
 
				 		mdk_rdev_t *rdev;
			
 
				 
			
 
				 		dev = &sh->dev[i];
			
 
				-		clear_bit(R5_Insync, &dev->flags);
			
 
				 
			
 
				 		pr_debug("check %d: state 0x%lx toread %p read %p write %p "
			
 
				 			"written %p\n",	i, dev->flags, dev->toread, dev->read,
			
@@ -3000,17 +3067,27 @@ static void handle_stripe5(struct stripe_head *sh)
 
				 			blocked_rdev = rdev;
			
 
				 			atomic_inc(&rdev->nr_pending);
			
 
				 		}
			
 
				-		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
			
 
				+		clear_bit(R5_Insync, &dev->flags);
			
 
				+		if (!rdev)
			
 
				+			/* Not in-sync */;
			
 
				+		else if (test_bit(In_sync, &rdev->flags))
			
 
				+			set_bit(R5_Insync, &dev->flags);
			
 
				+		else {
			
 
				+			/* could be in-sync depending on recovery/reshape status */
			
 
				+			if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
			
 
				+				set_bit(R5_Insync, &dev->flags);
			
 
				+		}
			
 
				+		if (!test_bit(R5_Insync, &dev->flags)) {
			
 
				 			/* The ReadError flag will just be confusing now */
			
 
				 			clear_bit(R5_ReadError, &dev->flags);
			
 
				 			clear_bit(R5_ReWrite, &dev->flags);
			
 
				 		}
			
 
				-		if (!rdev || !test_bit(In_sync, &rdev->flags)
			
 
				-		    || test_bit(R5_ReadError, &dev->flags)) {
			
 
				+		if (test_bit(R5_ReadError, &dev->flags))
			
 
				+			clear_bit(R5_Insync, &dev->flags);
			
 
				+		if (!test_bit(R5_Insync, &dev->flags)) {
			
 
				 			s.failed++;
			
 
				 			s.failed_num = i;
			
 
				-		} else
			
 
				-			set_bit(R5_Insync, &dev->flags);
			
 
				+		}
			
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
@@ -3244,7 +3321,6 @@ static void handle_stripe6(struct stripe_head *sh)
 
				 	for (i=disks; i--; ) {
			
 
				 		mdk_rdev_t *rdev;
			
 
				 		dev = &sh->dev[i];
			
 
				-		clear_bit(R5_Insync, &dev->flags);
			
 
				 
			
 
				 		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
			
 
				 			i, dev->flags, dev->toread, dev->towrite, dev->written);
			
@@ -3282,18 +3358,28 @@ static void handle_stripe6(struct stripe_head *sh)
 
				 			blocked_rdev = rdev;
			
 
				 			atomic_inc(&rdev->nr_pending);
			
 
				 		}
			
 
				-		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
			
 
				+		clear_bit(R5_Insync, &dev->flags);
			
 
				+		if (!rdev)
			
 
				+			/* Not in-sync */;
			
 
				+		else if (test_bit(In_sync, &rdev->flags))
			
 
				+			set_bit(R5_Insync, &dev->flags);
			
 
				+		else {
			
 
				+			/* in sync if before recovery_offset */
			
 
				+			if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
			
 
				+				set_bit(R5_Insync, &dev->flags);
			
 
				+		}
			
 
				+		if (!test_bit(R5_Insync, &dev->flags)) {
			
 
				 			/* The ReadError flag will just be confusing now */
			
 
				 			clear_bit(R5_ReadError, &dev->flags);
			
 
				 			clear_bit(R5_ReWrite, &dev->flags);
			
 
				 		}
			
 
				-		if (!rdev || !test_bit(In_sync, &rdev->flags)
			
 
				-		    || test_bit(R5_ReadError, &dev->flags)) {
			
 
				+		if (test_bit(R5_ReadError, &dev->flags))
			
 
				+			clear_bit(R5_Insync, &dev->flags);
			
 
				+		if (!test_bit(R5_Insync, &dev->flags)) {
			
 
				 			if (s.failed < 2)
			
 
				 				r6s.failed_num[s.failed] = i;
			
 
				 			s.failed++;
			
 
				-		} else
			
 
				-			set_bit(R5_Insync, &dev->flags);
			
 
				+		}
			
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
@@ -4971,8 +5057,10 @@ static int run(mddev_t *mddev)
 
				 	list_for_each_entry(rdev, &mddev->disks, same_set) {
			
 
				 		if (rdev->raid_disk < 0)
			
 
				 			continue;
			
 
				-		if (test_bit(In_sync, &rdev->flags))
			
 
				+		if (test_bit(In_sync, &rdev->flags)) {
			
 
				 			working_disks++;
			
 
				+			continue;
			
 
				+		}
			
 
				 		/* This disc is not fully in-sync.  However if it
			
 
				 		 * just stored parity (beyond the recovery_offset),
			
 
				 		 * when we don't need to be concerned about the
			
@@ -5005,7 +5093,7 @@ static int run(mddev_t *mddev)
 
				 	mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
			
 
				 			   - working_disks);
			
 
				 
			
 
				-	if (mddev->degraded > conf->max_degraded) {
			
 
				+	if (has_failed(conf)) {
			
 
				 		printk(KERN_ERR "md/raid:%s: not enough operational devices"
			
 
				 			" (%d/%d failed)\n",
			
 
				 			mdname(mddev), mddev->degraded, conf->raid_disks);
			
@@ -5207,6 +5295,7 @@ static int raid5_spare_active(mddev_t *mddev)
 
				 	for (i = 0; i < conf->raid_disks; i++) {
			
 
				 		tmp = conf->disks + i;
			
 
				 		if (tmp->rdev
			
 
				+		    && tmp->rdev->recovery_offset == MaxSector
			
 
				 		    && !test_bit(Faulty, &tmp->rdev->flags)
			
 
				 		    && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
			
 
				 			unsigned long flags;
			
@@ -5242,7 +5331,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
 
				 		 * isn't possible.
			
 
				 		 */
			
 
				 		if (!test_bit(Faulty, &rdev->flags) &&
			
 
				-		    mddev->degraded <= conf->max_degraded &&
			
 
				+		    !has_failed(conf) &&
			
 
				 		    number < conf->raid_disks) {
			
 
				 			err = -EBUSY;
			
 
				 			goto abort;
			
@@ -5270,7 +5359,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 
				 	int first = 0;
			
 
				 	int last = conf->raid_disks - 1;
			
 
				 
			
 
				-	if (mddev->degraded > conf->max_degraded)
			
 
				+	if (has_failed(conf))
			
 
				 		/* no point adding a device */
			
 
				 		return -EINVAL;
			
 
				 
			
@@ -5362,7 +5451,7 @@ static int check_reshape(mddev_t *mddev)
 
				 	if (mddev->bitmap)
			
 
				 		/* Cannot grow a bitmap yet */
			
 
				 		return -EBUSY;
			
 
				-	if (mddev->degraded > conf->max_degraded)
			
 
				+	if (has_failed(conf))
			
 
				 		return -EINVAL;
			
 
				 	if (mddev->delta_disks < 0) {
			
 
				 		/* We might be able to shrink, but the devices must
			
@@ -5437,8 +5526,13 @@ static int raid5_start_reshape(mddev_t *mddev)
 
				 
			
 
				 	/* Add some new drives, as many as will fit.
			
 
				 	 * We know there are enough to make the newly sized array work.
			
 
				+	 * Don't add devices if we are reducing the number of
			
 
				+	 * devices in the array.  This is because it is not possible
			
 
				+	 * to correctly record the "partially reconstructed" state of
			
 
				+	 * such devices during the reshape and confusion could result.
			
 
				 	 */
			
 
				-	list_for_each_entry(rdev, &mddev->disks, same_set)
			
 
				+	if (mddev->delta_disks >= 0)
			
 
				+	    list_for_each_entry(rdev, &mddev->disks, same_set)
			
 
				 		if (rdev->raid_disk < 0 &&
			
 
				 		    !test_bit(Faulty, &rdev->flags)) {
			
 
				 			if (raid5_add_disk(mddev, rdev) == 0) {
			
@@ -5460,7 +5554,7 @@ static int raid5_start_reshape(mddev_t *mddev)
 
				 		}
			
 
				 
			
 
				 	/* When a reshape changes the number of devices, ->degraded
			
 
				-	 * is measured against the large of the pre and post number of
			
 
				+	 * is measured against the larger of the pre and post number of
			
 
				 	 * devices.*/
			
 
				 	if (mddev->delta_disks > 0) {
			
 
				 		spin_lock_irqsave(&conf->device_lock, flags);