Browse Source

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  FIX: md: process hangs at wait_barrier after 0->10 takeover
  md_make_request: don't touch the bio after calling make_request
  md: Don't allow slot_store while resync/recovery is happening.
  md: don't clear curr_resync_completed at end of resync.
  md: Don't use remove_and_add_spares to remove failed devices from a read-only array
  Add raid1->raid0 takeover support
  md: Remove the AllReserved flag for component devices.
  md: don't abort checking spares as soon as one cannot be added.
  md: fix the test for finding spares in raid5_start_reshape.
  md: simplify some 'if' conditionals in raid5_start_reshape.
  md: revert change to raid_disks on failure.
Linus Torvalds 14 years ago
parent
commit
ae8eed2d09
5 changed files with 106 additions and 49 deletions
  1. 32 15
      drivers/md/md.c
  2. 0 2
      drivers/md/md.h
  3. 40 0
      drivers/md/raid0.c
  4. 4 2
      drivers/md/raid10.c
  5. 30 30
      drivers/md/raid5.c

+ 32 - 15
drivers/md/md.c

@@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
 	mddev_t *mddev = q->queuedata;
 	int rv;
 	int cpu;
+	unsigned int sectors;
 
 	if (mddev == NULL || mddev->pers == NULL
 	    || !mddev->ready) {
@@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
 	atomic_inc(&mddev->active_io);
 	rcu_read_unlock();
 
+	/*
+	 * save the sectors now since our bio can
+	 * go away inside make_request
+	 */
+	sectors = bio_sectors(bio);
 	rv = mddev->pers->make_request(mddev, bio);
 
 	cpu = part_stat_lock();
 	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
-		      bio_sectors(bio));
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
 	part_stat_unlock();
 
 	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
@@ -1947,8 +1952,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
 			__bdevname(dev, b));
 		return PTR_ERR(bdev);
 	}
-	if (!shared)
-		set_bit(AllReserved, &rdev->flags);
 	rdev->bdev = bdev;
 	return err;
 }
@@ -2465,6 +2468,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 		if (rdev->raid_disk != -1)
 			return -EBUSY;
 
+		if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
+			return -EBUSY;
+
 		if (rdev->mddev->pers->hot_add_disk == NULL)
 			return -EINVAL;
 
@@ -2610,12 +2616,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 
 			mddev_lock(mddev);
 			list_for_each_entry(rdev2, &mddev->disks, same_set)
-				if (test_bit(AllReserved, &rdev2->flags) ||
-				    (rdev->bdev == rdev2->bdev &&
-				     rdev != rdev2 &&
-				     overlaps(rdev->data_offset, rdev->sectors,
-					      rdev2->data_offset,
-					      rdev2->sectors))) {
+				if (rdev->bdev == rdev2->bdev &&
+				    rdev != rdev2 &&
+				    overlaps(rdev->data_offset, rdev->sectors,
+					     rdev2->data_offset,
+					     rdev2->sectors)) {
 					overlap = 1;
 					break;
 				}
@@ -5578,6 +5583,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
 	mddev->delta_disks = raid_disks - mddev->raid_disks;
 
 	rv = mddev->pers->check_reshape(mddev);
+	if (rv < 0)
+		mddev->delta_disks = 0;
 	return rv;
 }
 
@@ -6985,9 +6992,6 @@ void md_do_sync(mddev_t *mddev)
 	} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
 		mddev->resync_min = mddev->curr_resync_completed;
 	mddev->curr_resync = 0;
-	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-		mddev->curr_resync_completed = 0;
-	sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 	wake_up(&resync_wait);
 	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
@@ -7028,7 +7032,7 @@ static int remove_and_add_spares(mddev_t *mddev)
 			}
 		}
 
-	if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+	if (mddev->degraded && !mddev->recovery_disabled) {
 		list_for_each_entry(rdev, &mddev->disks, same_set) {
 			if (rdev->raid_disk >= 0 &&
 			    !test_bit(In_sync, &rdev->flags) &&
@@ -7151,7 +7155,20 @@ void md_check_recovery(mddev_t *mddev)
 			/* Only thing we do on a ro array is remove
 			 * failed devices.
 			 */
-			remove_and_add_spares(mddev);
+			mdk_rdev_t *rdev;
+			list_for_each_entry(rdev, &mddev->disks, same_set)
+				if (rdev->raid_disk >= 0 &&
+				    !test_bit(Blocked, &rdev->flags) &&
+				    test_bit(Faulty, &rdev->flags) &&
+				    atomic_read(&rdev->nr_pending)==0) {
+					if (mddev->pers->hot_remove_disk(
+						    mddev, rdev->raid_disk)==0) {
+						char nm[20];
+						sprintf(nm,"rd%d", rdev->raid_disk);
+						sysfs_remove_link(&mddev->kobj, nm);
+						rdev->raid_disk = -1;
+					}
+				}
 			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 			goto unlock;
 		}

+ 0 - 2
drivers/md/md.h

@@ -93,8 +93,6 @@ struct mdk_rdev_s
 #define	Faulty		1		/* device is known to have a fault */
 #define	In_sync		2		/* device is in_sync with rest of array */
 #define	WriteMostly	4		/* Avoid reading if at all possible */
-#define	AllReserved	6		/* If whole device is reserved for
-					 * one array */
 #define	AutoDetected	7		/* added by auto-detect */
 #define Blocked		8		/* An error occured on an externally
 					 * managed array, don't allow writes

+ 40 - 0
drivers/md/raid0.c

@@ -179,6 +179,14 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
 			rdev1->new_raid_disk = j;
 		}
 
+		if (mddev->level == 1) {
+			/* taiking over a raid1 array-
+			 * we have only one active disk
+			 */
+			j = 0;
+			rdev1->new_raid_disk = j;
+		}
+
 		if (j < 0 || j >= mddev->raid_disks) {
 			printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
 			       "aborting!\n", mdname(mddev), j);
@@ -644,12 +652,38 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
 	return priv_conf;
 }
 
+static void *raid0_takeover_raid1(mddev_t *mddev)
+{
+	raid0_conf_t *priv_conf;
+
+	/* Check layout:
+	 *  - (N - 1) mirror drives must be already faulty
+	 */
+	if ((mddev->raid_disks - 1) != mddev->degraded) {
+		printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
+		       mdname(mddev));
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Set new parameters */
+	mddev->new_level = 0;
+	mddev->new_layout = 0;
+	mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+	mddev->delta_disks = 1 - mddev->raid_disks;
+	/* make sure it will be not marked as dirty */
+	mddev->recovery_cp = MaxSector;
+
+	create_strip_zones(mddev, &priv_conf);
+	return priv_conf;
+}
+
 static void *raid0_takeover(mddev_t *mddev)
 {
 	/* raid0 can take over:
 	 *  raid4 - if all data disks are active.
 	 *  raid5 - providing it is Raid4 layout and one disk is faulty
 	 *  raid10 - assuming we have all necessary active disks
+	 *  raid1 - with (N -1) mirror drives faulty
 	 */
 	if (mddev->level == 4)
 		return raid0_takeover_raid45(mddev);
@@ -665,6 +699,12 @@ static void *raid0_takeover(mddev_t *mddev)
 	if (mddev->level == 10)
 		return raid0_takeover_raid10(mddev);
 
+	if (mddev->level == 1)
+		return raid0_takeover_raid1(mddev);
+
+	printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
+		mddev->level);
+
 	return ERR_PTR(-EINVAL);
 }
 

+ 4 - 2
drivers/md/raid10.c

@@ -2463,11 +2463,13 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
 	mddev->recovery_cp = MaxSector;
 
 	conf = setup_conf(mddev);
-	if (!IS_ERR(conf))
+	if (!IS_ERR(conf)) {
 		list_for_each_entry(rdev, &mddev->disks, same_set)
 			if (rdev->raid_disk >= 0)
 				rdev->new_raid_disk = rdev->raid_disk * 2;
-		
+		conf->barrier = 1;
+	}
+
 	return conf;
 }
 

+ 30 - 30
drivers/md/raid5.c

@@ -5517,7 +5517,6 @@ static int raid5_start_reshape(mddev_t *mddev)
 	raid5_conf_t *conf = mddev->private;
 	mdk_rdev_t *rdev;
 	int spares = 0;
-	int added_devices = 0;
 	unsigned long flags;
 
 	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
@@ -5527,8 +5526,8 @@ static int raid5_start_reshape(mddev_t *mddev)
 		return -ENOSPC;
 
 	list_for_each_entry(rdev, &mddev->disks, same_set)
-		if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
-		     && !test_bit(Faulty, &rdev->flags))
+		if (!test_bit(In_sync, &rdev->flags)
+		    && !test_bit(Faulty, &rdev->flags))
 			spares++;
 
 	if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
@@ -5571,34 +5570,35 @@ static int raid5_start_reshape(mddev_t *mddev)
 	 * to correctly record the "partially reconstructed" state of
 	 * such devices during the reshape and confusion could result.
 	 */
-	if (mddev->delta_disks >= 0)
-	    list_for_each_entry(rdev, &mddev->disks, same_set)
-		if (rdev->raid_disk < 0 &&
-		    !test_bit(Faulty, &rdev->flags)) {
-			if (raid5_add_disk(mddev, rdev) == 0) {
-				char nm[20];
-				if (rdev->raid_disk >= conf->previous_raid_disks) {
-					set_bit(In_sync, &rdev->flags);
-					added_devices++;
-				} else
-					rdev->recovery_offset = 0;
-				sprintf(nm, "rd%d", rdev->raid_disk);
-				if (sysfs_create_link(&mddev->kobj,
-						      &rdev->kobj, nm))
-					/* Failure here is OK */;
-			} else
-				break;
-		} else if (rdev->raid_disk >= conf->previous_raid_disks
-			   && !test_bit(Faulty, &rdev->flags)) {
-			/* This is a spare that was manually added */
-			set_bit(In_sync, &rdev->flags);
-			added_devices++;
-		}
+	if (mddev->delta_disks >= 0) {
+		int added_devices = 0;
+		list_for_each_entry(rdev, &mddev->disks, same_set)
+			if (rdev->raid_disk < 0 &&
+			    !test_bit(Faulty, &rdev->flags)) {
+				if (raid5_add_disk(mddev, rdev) == 0) {
+					char nm[20];
+					if (rdev->raid_disk
+					    >= conf->previous_raid_disks) {
+						set_bit(In_sync, &rdev->flags);
+						added_devices++;
+					} else
+						rdev->recovery_offset = 0;
+					sprintf(nm, "rd%d", rdev->raid_disk);
+					if (sysfs_create_link(&mddev->kobj,
+							      &rdev->kobj, nm))
+						/* Failure here is OK */;
+				}
+			} else if (rdev->raid_disk >= conf->previous_raid_disks
+				   && !test_bit(Faulty, &rdev->flags)) {
+				/* This is a spare that was manually added */
+				set_bit(In_sync, &rdev->flags);
+				added_devices++;
+			}
 
-	/* When a reshape changes the number of devices, ->degraded
-	 * is measured against the larger of the pre and post number of
-	 * devices.*/
-	if (mddev->delta_disks > 0) {
+		/* When a reshape changes the number of devices,
+		 * ->degraded is measured against the larger of the
+		 * pre and post number of devices.
+		 */
 		spin_lock_irqsave(&conf->device_lock, flags);
 		mddev->degraded += (conf->raid_disks - conf->previous_raid_disks)
 			- added_devices;