Browse Source

drbd: Fix a race condition that can lead to a BUG()

If the preconditions for a state change change after the wait_event() we
might hit the BUG() statement in conn_set_state().

With holding the spin_lock while evaluating the condition AND until the
actual state change we ensure the the preconditions can not change anymore.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Philipp Reisner 13 years ago
parent
commit
c1fd29a11f
2 changed files with 33 additions and 8 deletions
  1. 27 0
      drivers/block/drbd/drbd_int.h
  2. 6 8
      drivers/block/drbd/drbd_state.c

+ 27 - 0
drivers/block/drbd/drbd_int.h

@@ -2301,3 +2301,30 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
 }
 
 #endif
+
+/* This is defined in drivers/md/md.h as well. Should go into wait.h */
+#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
+do {									\
+	wait_queue_t __wait;						\
+	init_waitqueue_entry(&__wait, current);				\
+									\
+	add_wait_queue(&wq, &__wait);					\
+	for (;;) {							\
+		set_current_state(TASK_UNINTERRUPTIBLE);		\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	current->state = TASK_RUNNING;					\
+	remove_wait_queue(&wq, &__wait);				\
+} while (0)
+
+#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
+do {									\
+	if (condition)	 						\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+} while (0)

+ 6 - 8
drivers/block/drbd/drbd_state.c

@@ -1710,7 +1710,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
 	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
 		return SS_CW_FAILED_BY_PEER;
 
-	spin_lock_irq(&tconn->req_lock);
 	rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
 
 	if (rv == SS_UNKNOWN_ERROR)
@@ -1719,8 +1718,6 @@ _conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
 	if (rv == SS_SUCCESS)
 		rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
 
-	spin_unlock_irq(&tconn->req_lock);
-
 	return rv;
 }
 
@@ -1736,21 +1733,22 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v
 	set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
 	if (conn_send_state_req(tconn, mask, val)) {
 		clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
-		rv = SS_CW_FAILED_BY_PEER;
 		/* if (f & CS_VERBOSE)
 		   print_st_err(mdev, os, ns, rv); */
-		goto abort;
+		mutex_unlock(&tconn->cstate_mutex);
+		spin_lock_irq(&tconn->req_lock);
+		return SS_CW_FAILED_BY_PEER;
 	}
 
 	if (val.conn == C_DISCONNECTING)
 		set_bit(DISCONNECT_SENT, &tconn->flags);
 
-	wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)));
+	spin_lock_irq(&tconn->req_lock);
+
+	wait_event_lock_irq(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)), tconn->req_lock,);
 	clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
 
-abort:
 	mutex_unlock(&tconn->cstate_mutex);
-	spin_lock_irq(&tconn->req_lock);
 
 	return rv;
 }