Browse Source

[PATCH] md: fix possible problem in raid1/raid10 error overwriting

The code to overwrite/reread for addressing read errors in raid1/raid10
currently assumes that the read will not alter the buffer which could be used
to write to the next device.  This is not a safe assumption to make.

So we split the loops into a overwrite loop and a separate re-read loop, so
that the writing is complete before reading is attempted.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
NeilBrown 19 years ago
parent
commit
097426f689
2 changed files with 48 additions and 12 deletions
  1. 30 8
      drivers/md/raid1.c
  2. 18 4
      drivers/md/raid10.c

+ 30 - 8
drivers/md/raid1.c

@@ -1253,6 +1253,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 			} while (!success && d != r1_bio->read_disk);
 			} while (!success && d != r1_bio->read_disk);
 
 
 			if (success) {
 			if (success) {
+				int start = d;
 				/* write it back and re-read */
 				/* write it back and re-read */
 				set_bit(R1BIO_Uptodate, &r1_bio->state);
 				set_bit(R1BIO_Uptodate, &r1_bio->state);
 				while (d != r1_bio->read_disk) {
 				while (d != r1_bio->read_disk) {
@@ -1266,14 +1267,23 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 							 sect + rdev->data_offset,
 							 sect + rdev->data_offset,
 							 s<<9,
 							 s<<9,
 							 bio->bi_io_vec[idx].bv_page,
 							 bio->bi_io_vec[idx].bv_page,
-							 WRITE) == 0 ||
-					    sync_page_io(rdev->bdev,
+							 WRITE) == 0)
+						md_error(mddev, rdev);
+				}
+				d = start;
+				while (d != r1_bio->read_disk) {
+					if (d == 0)
+						d = conf->raid_disks;
+					d--;
+					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+						continue;
+					rdev = conf->mirrors[d].rdev;
+					if (sync_page_io(rdev->bdev,
 							 sect + rdev->data_offset,
 							 sect + rdev->data_offset,
 							 s<<9,
 							 s<<9,
 							 bio->bi_io_vec[idx].bv_page,
 							 bio->bi_io_vec[idx].bv_page,
-							 READ) == 0) {
+							 READ) == 0)
 						md_error(mddev, rdev);
 						md_error(mddev, rdev);
-					}
 				}
 				}
 			} else {
 			} else {
 				char b[BDEVNAME_SIZE];
 				char b[BDEVNAME_SIZE];
@@ -1445,6 +1455,7 @@ static void raid1d(mddev_t *mddev)
 
 
 				if (success) {
 				if (success) {
 					/* write it back and re-read */
 					/* write it back and re-read */
+					int start = d;
 					while (d != r1_bio->read_disk) {
 					while (d != r1_bio->read_disk) {
 						if (d==0)
 						if (d==0)
 							d = conf->raid_disks;
 							d = conf->raid_disks;
@@ -1454,13 +1465,24 @@ static void raid1d(mddev_t *mddev)
 						    test_bit(In_sync, &rdev->flags)) {
 						    test_bit(In_sync, &rdev->flags)) {
 							if (sync_page_io(rdev->bdev,
 							if (sync_page_io(rdev->bdev,
 									 sect + rdev->data_offset,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, WRITE) == 0 ||
-							    sync_page_io(rdev->bdev,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					d = start;
+					while (d != r1_bio->read_disk) {
+						if (d==0)
+							d = conf->raid_disks;
+						d--;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
 									 sect + rdev->data_offset,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, READ) == 0) {
+									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
 								md_error(mddev, rdev);
-							}
 						}
 						}
 					}
 					}
 				} else {
 				} else {

+ 18 - 4
drivers/md/raid10.c

@@ -1421,6 +1421,7 @@ static void raid10d(mddev_t *mddev)
 				} while (!success && sl != r10_bio->read_slot);
 				} while (!success && sl != r10_bio->read_slot);
 
 
 				if (success) {
 				if (success) {
+					int start = sl;
 					/* write it back and re-read */
 					/* write it back and re-read */
 					while (sl != r10_bio->read_slot) {
 					while (sl != r10_bio->read_slot) {
 						int d;
 						int d;
@@ -1434,14 +1435,27 @@ static void raid10d(mddev_t *mddev)
 							if (sync_page_io(rdev->bdev,
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, WRITE) == 0 ||
-							    sync_page_io(rdev->bdev,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					sl = start;
+					while (sl != r10_bio->read_slot) {
+						int d;
+						if (sl==0)
+							sl = conf->copies;
+						sl--;
+						d = r10_bio->devs[sl].devnum;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
 									 sect + rdev->data_offset,
-									 s<<9, conf->tmppage, READ) == 0) {
+									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
 								md_error(mddev, rdev);
-							}
 						}
 						}
 					}
 					}
 				} else {
 				} else {