瀏覽代碼

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: Fix handling for devices from 2TB to 4TB in 0.90 metadata.
  md/raid1,10: Remove use-after-free bug in make_request.
  md/raid10: unify handling of write completion.
  Avoid dereferencing a 'request_queue' after last close.
Linus Torvalds 13 年之前
父節點
當前提交
290a1cc4f7
共有 4 個文件被更改,包括 48 次插入32 次删除
  1. 10 2
      drivers/md/md.c
  2. 9 5
      drivers/md/raid1.c
  3. 24 23
      drivers/md/raid10.c
  4. 5 2
      fs/block_dev.c

+ 10 - 2
drivers/md/md.c

@@ -1138,8 +1138,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 			ret = 0;
 			ret = 0;
 	}
 	}
 	rdev->sectors = rdev->sb_start;
 	rdev->sectors = rdev->sb_start;
+	/* Limit to 4TB as metadata cannot record more than that */
+	if (rdev->sectors >= (2ULL << 32))
+		rdev->sectors = (2ULL << 32) - 2;
 
 
-	if (rdev->sectors < sb->size * 2 && sb->level > 1)
+	if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
 		/* "this cannot possibly happen" ... */
 		/* "this cannot possibly happen" ... */
 		ret = -EINVAL;
 		ret = -EINVAL;
 
 
@@ -1173,7 +1176,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->clevel[0] = 0;
 		mddev->clevel[0] = 0;
 		mddev->layout = sb->layout;
 		mddev->layout = sb->layout;
 		mddev->raid_disks = sb->raid_disks;
 		mddev->raid_disks = sb->raid_disks;
-		mddev->dev_sectors = sb->size * 2;
+		mddev->dev_sectors = ((sector_t)sb->size) * 2;
 		mddev->events = ev1;
 		mddev->events = ev1;
 		mddev->bitmap_info.offset = 0;
 		mddev->bitmap_info.offset = 0;
 		mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
 		mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
@@ -1415,6 +1418,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
 	rdev->sb_start = calc_dev_sboffset(rdev);
 	rdev->sb_start = calc_dev_sboffset(rdev);
 	if (!num_sectors || num_sectors > rdev->sb_start)
 	if (!num_sectors || num_sectors > rdev->sb_start)
 		num_sectors = rdev->sb_start;
 		num_sectors = rdev->sb_start;
+	/* Limit to 4TB as metadata cannot record more than that.
+	 * 4TB == 2^32 KB, or 2*2^32 sectors.
+	 */
+	if (num_sectors >= (2ULL << 32))
+		num_sectors = (2ULL << 32) - 2;
 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
 	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
 		       rdev->sb_page);
 		       rdev->sb_page);
 	md_super_wait(rdev->mddev);
 	md_super_wait(rdev->mddev);

+ 9 - 5
drivers/md/raid1.c

@@ -1099,12 +1099,11 @@ read_again:
 		bio_list_add(&conf->pending_bio_list, mbio);
 		bio_list_add(&conf->pending_bio_list, mbio);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 	}
-	r1_bio_write_done(r1_bio);
-
-	/* In case raid1d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
-
+	/* Mustn't call r1_bio_write_done before this next test,
+	 * as it could result in the bio being freed.
+	 */
 	if (sectors_handled < (bio->bi_size >> 9)) {
 	if (sectors_handled < (bio->bi_size >> 9)) {
+		r1_bio_write_done(r1_bio);
 		/* We need another r1_bio.  It has already been counted
 		/* We need another r1_bio.  It has already been counted
 		 * in bio->bi_phys_segments
 		 * in bio->bi_phys_segments
 		 */
 		 */
@@ -1117,6 +1116,11 @@ read_again:
 		goto retry_write;
 		goto retry_write;
 	}
 	}
 
 
+	r1_bio_write_done(r1_bio);
+
+	/* In case raid1d snuck in to freeze_array */
+	wake_up(&conf->wait_barrier);
+
 	if (do_sync || !bitmap || !plugged)
 	if (do_sync || !bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 		md_wakeup_thread(mddev->thread);
 
 

+ 24 - 23
drivers/md/raid10.c

@@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio)
 	md_write_end(r10_bio->mddev);
 	md_write_end(r10_bio->mddev);
 }
 }
 
 
+static void one_write_done(r10bio_t *r10_bio)
+{
+	if (atomic_dec_and_test(&r10_bio->remaining)) {
+		if (test_bit(R10BIO_WriteError, &r10_bio->state))
+			reschedule_retry(r10_bio);
+		else {
+			close_write(r10_bio);
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				raid_end_bio_io(r10_bio);
+		}
+	}
+}
+
 static void raid10_end_write_request(struct bio *bio, int error)
 static void raid10_end_write_request(struct bio *bio, int error)
 {
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
 	 * Let's see if all mirrored write operations have finished
 	 * Let's see if all mirrored write operations have finished
 	 * already.
 	 * already.
 	 */
 	 */
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (test_bit(R10BIO_WriteError, &r10_bio->state))
-			reschedule_retry(r10_bio);
-		else {
-			close_write(r10_bio);
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				raid_end_bio_io(r10_bio);
-		}
-	}
+	one_write_done(r10_bio);
 	if (dec_rdev)
 	if (dec_rdev)
 		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
 }
@@ -1127,20 +1132,12 @@ retry_write:
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 	}
 
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		/* This matches the end of raid10_end_write_request() */
-		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-				r10_bio->sectors,
-				!test_bit(R10BIO_Degraded, &r10_bio->state),
-				0);
-		md_write_end(mddev);
-		raid_end_bio_io(r10_bio);
-	}
-
-	/* In case raid10d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
+	/* Don't remove the bias on 'remaining' (one_write_done) until
+	 * after checking if we need to go around again.
+	 */
 
 
 	if (sectors_handled < (bio->bi_size >> 9)) {
 	if (sectors_handled < (bio->bi_size >> 9)) {
+		one_write_done(r10_bio);
 		/* We need another r10_bio.  It has already been counted
 		/* We need another r10_bio.  It has already been counted
 		 * in bio->bi_phys_segments.
 		 * in bio->bi_phys_segments.
 		 */
 		 */
@@ -1154,6 +1151,10 @@ retry_write:
 		r10_bio->state = 0;
 		r10_bio->state = 0;
 		goto retry_write;
 		goto retry_write;
 	}
 	}
+	one_write_done(r10_bio);
+
+	/* In case raid10d snuck in to freeze_array */
+	wake_up(&conf->wait_barrier);
 
 
 	if (do_sync || !mddev->bitmap || !plugged)
 	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
 		md_wakeup_thread(mddev->thread);

+ 5 - 2
fs/block_dev.c

@@ -1429,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		WARN_ON_ONCE(bdev->bd_holders);
 		WARN_ON_ONCE(bdev->bd_holders);
 		sync_blockdev(bdev);
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
 		kill_bdev(bdev);
+		/* ->release can cause the old bdi to disappear,
+		 * so must switch it out first
+		 */
+		bdev_inode_switch_bdi(bdev->bd_inode,
+					&default_backing_dev_info);
 	}
 	}
 	if (bdev->bd_contains == bdev) {
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
 		if (disk->fops->release)
@@ -1442,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		disk_put_part(bdev->bd_part);
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
 		bdev->bd_disk = NULL;
-		bdev_inode_switch_bdi(bdev->bd_inode,
-					&default_backing_dev_info);
 		if (bdev != bdev->bd_contains)
 		if (bdev != bdev->bd_contains)
 			victim = bdev->bd_contains;
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 		bdev->bd_contains = NULL;