|
@@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error)
|
|
|
if (r1_bio->bios[mirror] == bio)
|
|
|
break;
|
|
|
|
|
|
- if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
|
|
|
- set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
|
|
|
- set_bit(R1BIO_BarrierRetry, &r1_bio->state);
|
|
|
- r1_bio->mddev->barriers_work = 0;
|
|
|
- /* Don't rdev_dec_pending in this branch - keep it for the retry */
|
|
|
- } else {
|
|
|
+ /*
|
|
|
+ * 'one mirror IO has finished' event handler:
|
|
|
+ */
|
|
|
+ r1_bio->bios[mirror] = NULL;
|
|
|
+ to_put = bio;
|
|
|
+ if (!uptodate) {
|
|
|
+ md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
|
|
|
+ /* an I/O failed, we can't clear the bitmap */
|
|
|
+ set_bit(R1BIO_Degraded, &r1_bio->state);
|
|
|
+ } else
|
|
|
/*
|
|
|
- * this branch is our 'one mirror IO has finished' event handler:
|
|
|
+ * Set R1BIO_Uptodate in our master bio, so that we
|
|
|
+ * will return a good error code for to the higher
|
|
|
+ * levels even if IO on some other mirrored buffer
|
|
|
+ * fails.
|
|
|
+ *
|
|
|
+ * The 'master' represents the composite IO operation
|
|
|
+ * to user-side. So if something waits for IO, then it
|
|
|
+ * will wait for the 'master' bio.
|
|
|
*/
|
|
|
- r1_bio->bios[mirror] = NULL;
|
|
|
- to_put = bio;
|
|
|
- if (!uptodate) {
|
|
|
- md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
|
|
|
- /* an I/O failed, we can't clear the bitmap */
|
|
|
- set_bit(R1BIO_Degraded, &r1_bio->state);
|
|
|
- } else
|
|
|
- /*
|
|
|
- * Set R1BIO_Uptodate in our master bio, so that
|
|
|
- * we will return a good error code for to the higher
|
|
|
- * levels even if IO on some other mirrored buffer fails.
|
|
|
- *
|
|
|
- * The 'master' represents the composite IO operation to
|
|
|
- * user-side. So if something waits for IO, then it will
|
|
|
- * wait for the 'master' bio.
|
|
|
- */
|
|
|
- set_bit(R1BIO_Uptodate, &r1_bio->state);
|
|
|
-
|
|
|
- update_head_pos(mirror, r1_bio);
|
|
|
-
|
|
|
- if (behind) {
|
|
|
- if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
|
|
|
- atomic_dec(&r1_bio->behind_remaining);
|
|
|
-
|
|
|
- /* In behind mode, we ACK the master bio once the I/O has safely
|
|
|
- * reached all non-writemostly disks. Setting the Returned bit
|
|
|
- * ensures that this gets done only once -- we don't ever want to
|
|
|
- * return -EIO here, instead we'll wait */
|
|
|
-
|
|
|
- if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
|
|
|
- test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
|
|
- /* Maybe we can return now */
|
|
|
- if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
|
|
|
- struct bio *mbio = r1_bio->master_bio;
|
|
|
- PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
|
|
|
- (unsigned long long) mbio->bi_sector,
|
|
|
- (unsigned long long) mbio->bi_sector +
|
|
|
- (mbio->bi_size >> 9) - 1);
|
|
|
- bio_endio(mbio, 0);
|
|
|
- }
|
|
|
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
|
|
|
+
|
|
|
+ update_head_pos(mirror, r1_bio);
|
|
|
+
|
|
|
+ if (behind) {
|
|
|
+ if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
|
|
|
+ atomic_dec(&r1_bio->behind_remaining);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * In behind mode, we ACK the master bio once the I/O
|
|
|
+ * has safely reached all non-writemostly
|
|
|
+ * disks. Setting the Returned bit ensures that this
|
|
|
+ * gets done only once -- we don't ever want to return
|
|
|
+ * -EIO here, instead we'll wait
|
|
|
+ */
|
|
|
+ if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
|
|
|
+ test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
|
|
+ /* Maybe we can return now */
|
|
|
+ if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
|
|
|
+ struct bio *mbio = r1_bio->master_bio;
|
|
|
+ PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
|
|
|
+ (unsigned long long) mbio->bi_sector,
|
|
|
+ (unsigned long long) mbio->bi_sector +
|
|
|
+ (mbio->bi_size >> 9) - 1);
|
|
|
+ bio_endio(mbio, 0);
|
|
|
}
|
|
|
}
|
|
|
- rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
|
|
}
|
|
|
+ rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
|
|
+
|
|
|
/*
|
|
|
- *
|
|
|
* Let's see if all mirrored write operations have finished
|
|
|
* already.
|
|
|
*/
|
|
|
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
|
|
- if (test_bit(R1BIO_BarrierRetry, &r1_bio->state))
|
|
|
- reschedule_retry(r1_bio);
|
|
|
- else {
|
|
|
- /* it really is the end of this request */
|
|
|
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
|
|
- /* free extra copy of the data pages */
|
|
|
- int i = bio->bi_vcnt;
|
|
|
- while (i--)
|
|
|
- safe_put_page(bio->bi_io_vec[i].bv_page);
|
|
|
- }
|
|
|
- /* clear the bitmap if all writes complete successfully */
|
|
|
- bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
|
|
- r1_bio->sectors,
|
|
|
- !test_bit(R1BIO_Degraded, &r1_bio->state),
|
|
|
- behind);
|
|
|
- md_write_end(r1_bio->mddev);
|
|
|
- raid_end_bio_io(r1_bio);
|
|
|
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
|
|
+ /* free extra copy of the data pages */
|
|
|
+ int i = bio->bi_vcnt;
|
|
|
+ while (i--)
|
|
|
+ safe_put_page(bio->bi_io_vec[i].bv_page);
|
|
|
}
|
|
|
+ /* clear the bitmap if all writes complete successfully */
|
|
|
+ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
|
|
+ r1_bio->sectors,
|
|
|
+ !test_bit(R1BIO_Degraded, &r1_bio->state),
|
|
|
+ behind);
|
|
|
+ md_write_end(r1_bio->mddev);
|
|
|
+ raid_end_bio_io(r1_bio);
|
|
|
}
|
|
|
|
|
|
if (to_put)
|
|
@@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|
|
struct page **behind_pages = NULL;
|
|
|
const int rw = bio_data_dir(bio);
|
|
|
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
|
|
- unsigned long do_barriers;
|
|
|
+ const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
|
|
|
mdk_rdev_t *blocked_rdev;
|
|
|
|
|
|
/*
|
|
|
* Register the new request and wait if the reconstruction
|
|
|
* thread has put up a bar for new requests.
|
|
|
* Continue immediately if no resync is active currently.
|
|
|
- * We test barriers_work *after* md_write_start as md_write_start
|
|
|
- * may cause the first superblock write, and that will check out
|
|
|
- * if barriers work.
|
|
|
*/
|
|
|
|
|
|
md_write_start(mddev, bio); /* wait on superblock update early */
|
|
@@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|
|
}
|
|
|
finish_wait(&conf->wait_barrier, &w);
|
|
|
}
|
|
|
- if (unlikely(!mddev->barriers_work &&
|
|
|
- (bio->bi_rw & REQ_HARDBARRIER))) {
|
|
|
- if (rw == WRITE)
|
|
|
- md_write_end(mddev);
|
|
|
- bio_endio(bio, -EOPNOTSUPP);
|
|
|
- return 0;
|
|
|
- }
|
|
|
|
|
|
wait_barrier(conf);
|
|
|
|
|
@@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|
|
atomic_set(&r1_bio->remaining, 0);
|
|
|
atomic_set(&r1_bio->behind_remaining, 0);
|
|
|
|
|
|
- do_barriers = bio->bi_rw & REQ_HARDBARRIER;
|
|
|
- if (do_barriers)
|
|
|
- set_bit(R1BIO_Barrier, &r1_bio->state);
|
|
|
-
|
|
|
bio_list_init(&bl);
|
|
|
for (i = 0; i < disks; i++) {
|
|
|
struct bio *mbio;
|
|
@@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|
|
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
|
|
|
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
|
|
mbio->bi_end_io = raid1_end_write_request;
|
|
|
- mbio->bi_rw = WRITE | do_barriers | do_sync;
|
|
|
+ mbio->bi_rw = WRITE | do_flush_fua | do_sync;
|
|
|
mbio->bi_private = r1_bio;
|
|
|
|
|
|
if (behind_pages) {
|
|
@@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev)
|
|
|
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
|
|
|
sync_request_write(mddev, r1_bio);
|
|
|
unplug = 1;
|
|
|
- } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
|
|
|
- /* some requests in the r1bio were REQ_HARDBARRIER
|
|
|
- * requests which failed with -EOPNOTSUPP. Hohumm..
|
|
|
- * Better resubmit without the barrier.
|
|
|
- * We know which devices to resubmit for, because
|
|
|
- * all others have had their bios[] entry cleared.
|
|
|
- * We already have a nr_pending reference on these rdevs.
|
|
|
- */
|
|
|
- int i;
|
|
|
- const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
|
|
|
- clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
|
|
|
- clear_bit(R1BIO_Barrier, &r1_bio->state);
|
|
|
- for (i=0; i < conf->raid_disks; i++)
|
|
|
- if (r1_bio->bios[i])
|
|
|
- atomic_inc(&r1_bio->remaining);
|
|
|
- for (i=0; i < conf->raid_disks; i++)
|
|
|
- if (r1_bio->bios[i]) {
|
|
|
- struct bio_vec *bvec;
|
|
|
- int j;
|
|
|
-
|
|
|
- bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
|
|
|
- /* copy pages from the failed bio, as
|
|
|
- * this might be a write-behind device */
|
|
|
- __bio_for_each_segment(bvec, bio, j, 0)
|
|
|
- bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
|
|
|
- bio_put(r1_bio->bios[i]);
|
|
|
- bio->bi_sector = r1_bio->sector +
|
|
|
- conf->mirrors[i].rdev->data_offset;
|
|
|
- bio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
|
|
- bio->bi_end_io = raid1_end_write_request;
|
|
|
- bio->bi_rw = WRITE | do_sync;
|
|
|
- bio->bi_private = r1_bio;
|
|
|
- r1_bio->bios[i] = bio;
|
|
|
- generic_make_request(bio);
|
|
|
- }
|
|
|
} else {
|
|
|
int disk;
|
|
|
|