12 years ago · a2a3c74f24
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -831,7 +831,8 @@ enum drbd_flag {
 
				 				   once no more io in flight, start bitmap io */
			
 
				 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
			
 
				 	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
			
 
				-	WAS_IO_ERROR,		/* Local disk failed returned IO error */
			
 
				+	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
			
 
				+	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
			
 
				 	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
			
 
				 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
			
 
				 	NET_CONGESTED,		/* The data socket is congested */
			
@@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
 
				 }
			
 
				 
			
 
				 enum drbd_force_detach_flags {
			
 
				-	DRBD_IO_ERROR,
			
 
				+	DRBD_READ_ERROR,
			
 
				+	DRBD_WRITE_ERROR,
			
 
				 	DRBD_META_IO_ERROR,
			
 
				 	DRBD_FORCE_DETACH,
			
 
				 };
			
 
				 
			
 
				 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
			
 
				 static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
			
 
				-		enum drbd_force_detach_flags forcedetach,
			
 
				+		enum drbd_force_detach_flags df,
			
 
				 		const char *where)
			
 
				 {
			
 
				 	switch (mdev->ldev->dc.on_io_error) {
			
 
				 	case EP_PASS_ON:
			
 
				-		if (forcedetach == DRBD_IO_ERROR) {
			
 
				+		if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
			
 
				 			if (__ratelimit(&drbd_ratelimit_state))
			
 
				 				dev_err(DEV, "Local IO failed in %s.\n", where);
			
 
				 			if (mdev->state.disk > D_INCONSISTENT)
			
 
				 				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
			
 
				 			break;
			
 
				 		}
			
 
				-		/* NOTE fall through to detach case if forcedetach set */
			
 
				+		/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
			
 
				 	case EP_DETACH:
			
 
				 	case EP_CALL_HELPER:
			
 
				+		/* Remember whether we saw a READ or WRITE error.
			
 
				+		 *
			
 
				+		 * Recovery of the affected area for WRITE failure is covered
			
 
				+		 * by the activity log.
			
 
				+		 * READ errors may fall outside that area though. Certain READ
			
 
				+		 * errors can be "healed" by writing good data to the affected
			
 
				+		 * blocks, which triggers block re-allocation in lower layers.
			
 
				+		 *
			
 
				+		 * If we can not write the bitmap after a READ error,
			
 
				+		 * we may need to trigger a full sync (see w_go_diskless()).
			
 
				+		 *
			
 
				+		 * Force-detach is not really an IO error, but rather a
			
 
				+		 * desperate measure to try to deal with a completely
			
 
				+		 * unresponsive lower level IO stack.
			
 
				+		 * Still it should be treated as a WRITE error.
			
 
				+		 *
			
 
				+		 * Meta IO error is always WRITE error:
			
 
				+		 * we read meta data only once during attach,
			
 
				+		 * which will fail in case of errors.
			
 
				+		 */
			
 
				 		drbd_set_flag(mdev, WAS_IO_ERROR);
			
 
				-		if (forcedetach == DRBD_FORCE_DETACH)
			
 
				+		if (df == DRBD_READ_ERROR)
			
 
				+			drbd_set_flag(mdev, WAS_READ_ERROR);
			
 
				+		if (df == DRBD_FORCE_DETACH)
			
 
				 			drbd_set_flag(mdev, FORCE_DETACH);
			
 
				 		if (mdev->state.disk > D_FAILED) {
			
 
				 			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
			
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
				 	/* first half of local IO error, failure to attach,
			
 
				 	 * or administrative detach */
			
 
				 	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
			
 
				-		enum drbd_io_error_p eh = EP_PASS_ON;
			
 
				-		int was_io_error = 0;
			
 
				 		/* corresponding get_ldev was in __drbd_set_state, to serialize
			
 
				 		 * our cleanup here with the transition to D_DISKLESS.
			
 
				-		 * But is is still not save to dreference ldev here, since
			
 
				-		 * we might come from an failed Attach before ldev was set. */
			
 
				+		 * But it is still not safe to dreference ldev here, we may end
			
 
				+		 * up here from a failed attach, before ldev was even set.  */
			
 
				 		if (mdev->ldev) {
			
 
				-			eh = mdev->ldev->dc.on_io_error;
			
 
				-			was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);
			
 
				-
			
 
				-			if (was_io_error && eh == EP_CALL_HELPER)
			
 
				+			enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
			
 
				+
			
 
				+			/* In some setups, this handler triggers a suicide,
			
 
				+			 * basically mapping IO error to node failure, to
			
 
				+			 * reduce the number of different failure scenarios.
			
 
				+			 *
			
 
				+			 * This handler intentionally runs before we abort IO,
			
 
				+			 * notify the peer, or try to update our meta data. */
			
 
				+			if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
			
 
				 				drbd_khelper(mdev, "local-io-error");
			
 
				 
			
 
				 			/* Immediately allow completion of all application IO,
			
@@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
				 			 * So aborting local requests may cause crashes,
			
 
				 			 * or even worse, silent data corruption.
			
 
				 			 */
			
 
				-			if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
			
 
				+			if (drbd_test_flag(mdev, FORCE_DETACH))
			
 
				 				tl_abort_disk_io(mdev);
			
 
				 
			
 
				 			/* current state still has to be D_FAILED,
			
@@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
 
				 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
			
 
				 	 * the protected members anymore, though, so once put_ldev reaches zero
			
 
				 	 * again, it will be safe to free them. */
			
 
				+
			
 
				+	/* Try to write changed bitmap pages, read errors may have just
			
 
				+	 * set some bits outside the area covered by the activity log.
			
 
				+	 *
			
 
				+	 * If we have an IO error during the bitmap writeout,
			
 
				+	 * we will want a full sync next time, just in case.
			
 
				+	 * (Do we want a specific meta data flag for this?)
			
 
				+	 *
			
 
				+	 * If that does not make it to stable storage either,
			
 
				+	 * we cannot do anything about that anymore.  */
			
 
				+	if (mdev->bitmap) {
			
 
				+		if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
			
 
				+					"detach", BM_LOCKED_MASK)) {
			
 
				+			if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
			
 
				+				drbd_md_set_flag(mdev, MDF_FULL_SYNC);
			
 
				+				drbd_md_sync(mdev);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	drbd_force_state(mdev, NS(disk, D_DISKLESS));
			
 
				 	return 1;
			
 
				 }
			
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
				 
			
 
				 	/* make sure there is no leftover from previous force-detach attempts */
			
 
				 	drbd_clear_flag(mdev, FORCE_DETACH);
			
 
				+	drbd_clear_flag(mdev, WAS_IO_ERROR);
			
 
				+	drbd_clear_flag(mdev, WAS_READ_ERROR);
			
 
				 
			
 
				 	/* and no leftover from previously aborted resync or verify, either */
			
 
				 	mdev->rs_total = 0;
			
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
				 		req->rq_state |= RQ_LOCAL_COMPLETED;
			
 
				 		req->rq_state &= ~RQ_LOCAL_PENDING;
			
 
				 
			
 
				-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
			
 
				+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
			
 
				 		_req_may_be_done_not_susp(req, m);
			
 
				 		break;
			
 
				 
			
@@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
			
 
				+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
			
 
				 
			
 
				 	goto_queue_for_net_read:
			
 
				 
			
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
 
				 	if (list_empty(&mdev->read_ee))
			
 
				 		wake_up(&mdev->ee_wait);
			
 
				 	if (test_bit(__EE_WAS_ERROR, &e->flags))
			
 
				-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
			
 
				+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
			
 
				 	spin_unlock_irqrestore(&mdev->req_lock, flags);
			
 
				 
			
 
				 	drbd_queue_work(&mdev->data.work, &e->w);
			
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
 
				 		: list_empty(&mdev->active_ee);
			
 
				 
			
 
				 	if (test_bit(__EE_WAS_ERROR, &e->flags))
			
 
				-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
			
 
				+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
			
 
				 	spin_unlock_irqrestore(&mdev->req_lock, flags);
			
 
				 
			
 
				 	if (is_syncer_req)