|
@@ -139,6 +139,8 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
|
|
|
struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
|
|
|
mempool_t *drbd_request_mempool;
|
|
|
mempool_t *drbd_ee_mempool;
|
|
|
+mempool_t *drbd_md_io_page_pool;
|
|
|
+struct bio_set *drbd_md_io_bio_set;
|
|
|
|
|
|
/* I do not use a standard mempool, because:
|
|
|
1) I want to hand out the pre-allocated objects first.
|
|
@@ -159,7 +161,24 @@ static const struct block_device_operations drbd_ops = {
|
|
|
.release = drbd_release,
|
|
|
};
|
|
|
|
|
|
-#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
|
|
|
+static void bio_destructor_drbd(struct bio *bio)
|
|
|
+{
|
|
|
+ bio_free(bio, drbd_md_io_bio_set);
|
|
|
+}
|
|
|
+
|
|
|
+struct bio *bio_alloc_drbd(gfp_t gfp_mask)
|
|
|
+{
|
|
|
+ struct bio *bio;
|
|
|
+
|
|
|
+ if (!drbd_md_io_bio_set)
|
|
|
+ return bio_alloc(gfp_mask, 1);
|
|
|
+
|
|
|
+ bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
|
|
|
+ if (!bio)
|
|
|
+ return NULL;
|
|
|
+ bio->bi_destructor = bio_destructor_drbd;
|
|
|
+ return bio;
|
|
|
+}
|
|
|
|
|
|
#ifdef __CHECKER__
|
|
|
/* When checking with sparse, and this is an inline function, sparse will
|
|
@@ -208,6 +227,7 @@ static int tl_init(struct drbd_conf *mdev)
|
|
|
mdev->oldest_tle = b;
|
|
|
mdev->newest_tle = b;
|
|
|
INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
|
|
|
+ INIT_LIST_HEAD(&mdev->barrier_acked_requests);
|
|
|
|
|
|
mdev->tl_hash = NULL;
|
|
|
mdev->tl_hash_s = 0;
|
|
@@ -246,9 +266,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
|
|
|
new->n_writes = 0;
|
|
|
|
|
|
newest_before = mdev->newest_tle;
|
|
|
- /* never send a barrier number == 0, because that is special-cased
|
|
|
- * when using TCQ for our write ordering code */
|
|
|
- new->br_number = (newest_before->br_number+1) ?: 1;
|
|
|
+ new->br_number = newest_before->br_number+1;
|
|
|
if (mdev->newest_tle != new) {
|
|
|
mdev->newest_tle->next = new;
|
|
|
mdev->newest_tle = new;
|
|
@@ -311,7 +329,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
|
|
|
These have been list_move'd to the out_of_sequence_requests list in
|
|
|
_req_mod(, barrier_acked) above.
|
|
|
*/
|
|
|
- list_del_init(&b->requests);
|
|
|
+ list_splice_init(&b->requests, &mdev->barrier_acked_requests);
|
|
|
|
|
|
nob = b->next;
|
|
|
if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
|
|
@@ -411,6 +429,23 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
|
|
|
b = tmp;
|
|
|
list_splice(&carry_reads, &b->requests);
|
|
|
}
|
|
|
+
|
|
|
+ /* Actions operating on the disk state, also want to work on
|
|
|
+ requests that got barrier acked. */
|
|
|
+ switch (what) {
|
|
|
+ case fail_frozen_disk_io:
|
|
|
+ case restart_frozen_disk_io:
|
|
|
+ list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
|
|
|
+ req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
+ _req_mod(req, what);
|
|
|
+ }
|
|
|
+
|
|
|
+ case connection_lost_while_pending:
|
|
|
+ case resend:
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ dev_err(DEV, "what = %d in _tl_restart()\n", what);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
|
|
@@ -457,6 +492,38 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
|
|
|
spin_unlock_irq(&mdev->req_lock);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
|
|
|
+ * @mdev: DRBD device.
|
|
|
+ */
|
|
|
+void tl_abort_disk_io(struct drbd_conf *mdev)
|
|
|
+{
|
|
|
+ struct drbd_tl_epoch *b;
|
|
|
+ struct list_head *le, *tle;
|
|
|
+ struct drbd_request *req;
|
|
|
+
|
|
|
+ spin_lock_irq(&mdev->req_lock);
|
|
|
+ b = mdev->oldest_tle;
|
|
|
+ while (b) {
|
|
|
+ list_for_each_safe(le, tle, &b->requests) {
|
|
|
+ req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
+ if (!(req->rq_state & RQ_LOCAL_PENDING))
|
|
|
+ continue;
|
|
|
+ _req_mod(req, abort_disk_io);
|
|
|
+ }
|
|
|
+ b = b->next;
|
|
|
+ }
|
|
|
+
|
|
|
+ list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
|
|
|
+ req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
+ if (!(req->rq_state & RQ_LOCAL_PENDING))
|
|
|
+ continue;
|
|
|
+ _req_mod(req, abort_disk_io);
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock_irq(&mdev->req_lock);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* cl_wide_st_chg() - true if the state change is a cluster wide one
|
|
|
* @mdev: DRBD device.
|
|
@@ -470,7 +537,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,
|
|
|
((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
|
|
|
(os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
|
|
|
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
|
|
|
- (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
|
|
|
+ (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
|
|
|
(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
|
|
|
(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
|
|
|
}
|
|
@@ -509,8 +576,16 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
|
|
|
static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
|
|
|
union drbd_state,
|
|
|
union drbd_state);
|
|
|
+enum sanitize_state_warnings {
|
|
|
+ NO_WARNING,
|
|
|
+ ABORTED_ONLINE_VERIFY,
|
|
|
+ ABORTED_RESYNC,
|
|
|
+ CONNECTION_LOST_NEGOTIATING,
|
|
|
+ IMPLICITLY_UPGRADED_DISK,
|
|
|
+ IMPLICITLY_UPGRADED_PDSK,
|
|
|
+};
|
|
|
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
|
|
|
- union drbd_state ns, const char **warn_sync_abort);
|
|
|
+ union drbd_state ns, enum sanitize_state_warnings *warn);
|
|
|
int drbd_send_state_req(struct drbd_conf *,
|
|
|
union drbd_state, union drbd_state);
|
|
|
|
|
@@ -785,6 +860,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
|
|
|
rv = SS_IN_TRANSIENT_STATE;
|
|
|
|
|
|
+ /* While establishing a connection only allow cstate to change.
|
|
|
+ Delay/refuse role changes, detach attach etc... */
|
|
|
+ if (test_bit(STATE_SENT, &mdev->flags) &&
|
|
|
+ !(os.conn == C_WF_REPORT_PARAMS ||
|
|
|
+ (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
|
|
|
+ rv = SS_IN_TRANSIENT_STATE;
|
|
|
+
|
|
|
if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
|
|
|
rv = SS_NEED_CONNECTION;
|
|
|
|
|
@@ -803,6 +885,21 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
return rv;
|
|
|
}
|
|
|
|
|
|
+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
|
|
|
+{
|
|
|
+ static const char *msg_table[] = {
|
|
|
+ [NO_WARNING] = "",
|
|
|
+ [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
|
|
|
+ [ABORTED_RESYNC] = "Resync aborted.",
|
|
|
+ [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
|
|
|
+ [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
|
|
|
+ [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
|
|
|
+ };
|
|
|
+
|
|
|
+ if (warn != NO_WARNING)
|
|
|
+ dev_warn(DEV, "%s\n", msg_table[warn]);
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* sanitize_state() - Resolves implicitly necessary additional changes to a state transition
|
|
|
* @mdev: DRBD device.
|
|
@@ -814,11 +911,14 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
* to D_UNKNOWN. This rule and many more along those lines are in this function.
|
|
|
*/
|
|
|
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
|
|
|
- union drbd_state ns, const char **warn_sync_abort)
|
|
|
+ union drbd_state ns, enum sanitize_state_warnings *warn)
|
|
|
{
|
|
|
enum drbd_fencing_p fp;
|
|
|
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
|
|
|
|
|
|
+ if (warn)
|
|
|
+ *warn = NO_WARNING;
|
|
|
+
|
|
|
fp = FP_DONT_CARE;
|
|
|
if (get_ldev(mdev)) {
|
|
|
fp = mdev->ldev->dc.fencing;
|
|
@@ -833,18 +933,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
|
|
|
/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
|
|
|
* If you try to go into some Sync* state, that shall fail (elsewhere). */
|
|
|
if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
|
|
|
- ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
|
|
|
+ ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
|
|
|
ns.conn = os.conn;
|
|
|
|
|
|
/* we cannot fail (again) if we already detached */
|
|
|
if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
|
|
|
ns.disk = D_DISKLESS;
|
|
|
|
|
|
- /* if we are only D_ATTACHING yet,
|
|
|
- * we can (and should) go directly to D_DISKLESS. */
|
|
|
- if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
|
|
|
- ns.disk = D_DISKLESS;
|
|
|
-
|
|
|
/* After C_DISCONNECTING only C_STANDALONE may follow */
|
|
|
if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
|
|
|
ns.conn = os.conn;
|
|
@@ -863,10 +958,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
|
|
|
/* Abort resync if a disk fails/detaches */
|
|
|
if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
|
|
|
(ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
|
|
|
- if (warn_sync_abort)
|
|
|
- *warn_sync_abort =
|
|
|
- os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
|
|
|
- "Online-verify" : "Resync";
|
|
|
+ if (warn)
|
|
|
+ *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
|
|
|
+ ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
|
|
|
ns.conn = C_CONNECTED;
|
|
|
}
|
|
|
|
|
@@ -877,7 +971,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
|
|
|
ns.disk = mdev->new_state_tmp.disk;
|
|
|
ns.pdsk = mdev->new_state_tmp.pdsk;
|
|
|
} else {
|
|
|
- dev_alert(DEV, "Connection lost while negotiating, no data!\n");
|
|
|
+ if (warn)
|
|
|
+ *warn = CONNECTION_LOST_NEGOTIATING;
|
|
|
ns.disk = D_DISKLESS;
|
|
|
ns.pdsk = D_UNKNOWN;
|
|
|
}
|
|
@@ -959,16 +1054,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
|
|
|
ns.disk = disk_max;
|
|
|
|
|
|
if (ns.disk < disk_min) {
|
|
|
- dev_warn(DEV, "Implicitly set disk from %s to %s\n",
|
|
|
- drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
|
|
|
+ if (warn)
|
|
|
+ *warn = IMPLICITLY_UPGRADED_DISK;
|
|
|
ns.disk = disk_min;
|
|
|
}
|
|
|
if (ns.pdsk > pdsk_max)
|
|
|
ns.pdsk = pdsk_max;
|
|
|
|
|
|
if (ns.pdsk < pdsk_min) {
|
|
|
- dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
|
|
|
- drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
|
|
|
+ if (warn)
|
|
|
+ *warn = IMPLICITLY_UPGRADED_PDSK;
|
|
|
ns.pdsk = pdsk_min;
|
|
|
}
|
|
|
|
|
@@ -1045,12 +1140,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
{
|
|
|
union drbd_state os;
|
|
|
enum drbd_state_rv rv = SS_SUCCESS;
|
|
|
- const char *warn_sync_abort = NULL;
|
|
|
+ enum sanitize_state_warnings ssw;
|
|
|
struct after_state_chg_work *ascw;
|
|
|
|
|
|
os = mdev->state;
|
|
|
|
|
|
- ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
|
|
|
+ ns = sanitize_state(mdev, os, ns, &ssw);
|
|
|
|
|
|
if (ns.i == os.i)
|
|
|
return SS_NOTHING_TO_DO;
|
|
@@ -1076,8 +1171,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
return rv;
|
|
|
}
|
|
|
|
|
|
- if (warn_sync_abort)
|
|
|
- dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
|
|
|
+ print_sanitize_warnings(mdev, ssw);
|
|
|
|
|
|
{
|
|
|
char *pbp, pb[300];
|
|
@@ -1243,7 +1337,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
drbd_thread_stop_nowait(&mdev->receiver);
|
|
|
|
|
|
/* Upon network failure, we need to restart the receiver. */
|
|
|
- if (os.conn > C_TEAR_DOWN &&
|
|
|
+ if (os.conn > C_WF_CONNECTION &&
|
|
|
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
|
|
|
drbd_thread_restart_nowait(&mdev->receiver);
|
|
|
|
|
@@ -1251,6 +1345,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|
|
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
|
|
drbd_resume_al(mdev);
|
|
|
|
|
|
+ /* remember last connect and attach times so request_timer_fn() won't
|
|
|
+ * kill newly established sessions while we are still trying to thaw
|
|
|
+ * previously frozen IO */
|
|
|
+ if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS)
|
|
|
+ mdev->last_reconnect_jif = jiffies;
|
|
|
+ if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
|
|
|
+ ns.disk > D_NEGOTIATING)
|
|
|
+ mdev->last_reattach_jif = jiffies;
|
|
|
+
|
|
|
ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
|
|
|
if (ascw) {
|
|
|
ascw->os = os;
|
|
@@ -1354,12 +1457,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
/* Here we have the actions that are performed after a
|
|
|
state change. This function might sleep */
|
|
|
|
|
|
+ if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
|
|
|
+ mod_timer(&mdev->request_timer, jiffies + HZ);
|
|
|
+
|
|
|
nsm.i = -1;
|
|
|
if (ns.susp_nod) {
|
|
|
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
|
|
what = resend;
|
|
|
|
|
|
- if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
|
|
|
+ if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
|
|
|
+ ns.disk > D_NEGOTIATING)
|
|
|
what = restart_frozen_disk_io;
|
|
|
|
|
|
if (what != nothing)
|
|
@@ -1408,7 +1515,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
/* Do not change the order of the if above and the two below... */
|
|
|
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
|
|
|
drbd_send_uuids(mdev);
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
}
|
|
|
/* No point in queuing send_bitmap if we don't have a connection
|
|
|
* anymore, so check also the _current_ state, not only the new state
|
|
@@ -1441,11 +1548,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
}
|
|
|
|
|
|
if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
|
|
|
- if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
|
|
|
+ if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
|
|
|
+ mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
|
|
|
drbd_uuid_new_current(mdev);
|
|
|
drbd_send_uuids(mdev);
|
|
|
}
|
|
|
-
|
|
|
/* D_DISKLESS Peer becomes secondary */
|
|
|
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
|
|
|
/* We may still be Primary ourselves.
|
|
@@ -1473,14 +1580,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
|
|
|
drbd_send_sizes(mdev, 0, 0); /* to start sync... */
|
|
|
drbd_send_uuids(mdev);
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
}
|
|
|
|
|
|
/* We want to pause/continue resync, tell peer. */
|
|
|
if (ns.conn >= C_CONNECTED &&
|
|
|
((os.aftr_isp != ns.aftr_isp) ||
|
|
|
(os.user_isp != ns.user_isp)))
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
|
|
|
/* In case one of the isp bits got set, suspend other devices. */
|
|
|
if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
|
|
@@ -1490,10 +1597,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
/* Make sure the peer gets informed about eventual state
|
|
|
changes (ISP bits) while we were in WFReportParams. */
|
|
|
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
|
|
|
if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
|
|
|
/* We are in the progress to start a full sync... */
|
|
|
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
|
|
@@ -1513,33 +1620,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
/* first half of local IO error, failure to attach,
|
|
|
* or administrative detach */
|
|
|
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
|
|
|
- enum drbd_io_error_p eh;
|
|
|
- int was_io_error;
|
|
|
+ enum drbd_io_error_p eh = EP_PASS_ON;
|
|
|
+ int was_io_error = 0;
|
|
|
/* corresponding get_ldev was in __drbd_set_state, to serialize
|
|
|
- * our cleanup here with the transition to D_DISKLESS,
|
|
|
- * so it is safe to dreference ldev here. */
|
|
|
- eh = mdev->ldev->dc.on_io_error;
|
|
|
- was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
|
|
|
-
|
|
|
- /* current state still has to be D_FAILED,
|
|
|
- * there is only one way out: to D_DISKLESS,
|
|
|
- * and that may only happen after our put_ldev below. */
|
|
|
- if (mdev->state.disk != D_FAILED)
|
|
|
- dev_err(DEV,
|
|
|
- "ASSERT FAILED: disk is %s during detach\n",
|
|
|
- drbd_disk_str(mdev->state.disk));
|
|
|
-
|
|
|
- if (drbd_send_state(mdev))
|
|
|
- dev_warn(DEV, "Notified peer that I am detaching my disk\n");
|
|
|
- else
|
|
|
- dev_err(DEV, "Sending state for detaching disk failed\n");
|
|
|
-
|
|
|
- drbd_rs_cancel_all(mdev);
|
|
|
-
|
|
|
- /* In case we want to get something to stable storage still,
|
|
|
- * this may be the last chance.
|
|
|
- * Following put_ldev may transition to D_DISKLESS. */
|
|
|
- drbd_md_sync(mdev);
|
|
|
+ * our cleanup here with the transition to D_DISKLESS.
|
|
|
+ * But is is still not save to dreference ldev here, since
|
|
|
+ * we might come from an failed Attach before ldev was set. */
|
|
|
+ if (mdev->ldev) {
|
|
|
+ eh = mdev->ldev->dc.on_io_error;
|
|
|
+ was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
|
|
|
+
|
|
|
+ /* Immediately allow completion of all application IO, that waits
|
|
|
+ for completion from the local disk. */
|
|
|
+ tl_abort_disk_io(mdev);
|
|
|
+
|
|
|
+ /* current state still has to be D_FAILED,
|
|
|
+ * there is only one way out: to D_DISKLESS,
|
|
|
+ * and that may only happen after our put_ldev below. */
|
|
|
+ if (mdev->state.disk != D_FAILED)
|
|
|
+ dev_err(DEV,
|
|
|
+ "ASSERT FAILED: disk is %s during detach\n",
|
|
|
+ drbd_disk_str(mdev->state.disk));
|
|
|
+
|
|
|
+ if (ns.conn >= C_CONNECTED)
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
+
|
|
|
+ drbd_rs_cancel_all(mdev);
|
|
|
+
|
|
|
+ /* In case we want to get something to stable storage still,
|
|
|
+ * this may be the last chance.
|
|
|
+ * Following put_ldev may transition to D_DISKLESS. */
|
|
|
+ drbd_md_sync(mdev);
|
|
|
+ }
|
|
|
put_ldev(mdev);
|
|
|
|
|
|
if (was_io_error && eh == EP_CALL_HELPER)
|
|
@@ -1561,16 +1673,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
mdev->rs_failed = 0;
|
|
|
atomic_set(&mdev->rs_pending_cnt, 0);
|
|
|
|
|
|
- if (drbd_send_state(mdev))
|
|
|
- dev_warn(DEV, "Notified peer that I'm now diskless.\n");
|
|
|
+ if (ns.conn >= C_CONNECTED)
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
+
|
|
|
/* corresponding get_ldev in __drbd_set_state
|
|
|
* this may finally trigger drbd_ldev_destroy. */
|
|
|
put_ldev(mdev);
|
|
|
}
|
|
|
|
|
|
/* Notify peer that I had a local IO error, and did not detached.. */
|
|
|
- if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
|
|
|
- drbd_send_state(mdev);
|
|
|
+ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
|
|
|
/* Disks got bigger while they were detached */
|
|
|
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
|
|
@@ -1588,7 +1701,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
/* sync target done with resync. Explicitly notify peer, even though
|
|
|
* it should (at least for non-empty resyncs) already know itself. */
|
|
|
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
|
|
|
- drbd_send_state(mdev);
|
|
|
+ drbd_send_state(mdev, ns);
|
|
|
+
|
|
|
+ /* Wake up role changes, that were delayed because of connection establishing */
|
|
|
+ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
|
|
|
+ clear_bit(STATE_SENT, &mdev->flags);
|
|
|
+ wake_up(&mdev->state_wait);
|
|
|
+ }
|
|
|
|
|
|
/* This triggers bitmap writeout of potentially still unwritten pages
|
|
|
* if the resync finished cleanly, or aborted because of peer disk
|
|
@@ -1598,8 +1717,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|
|
* No harm done if some bits change during this phase.
|
|
|
*/
|
|
|
if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
|
|
|
- drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
|
|
|
- "write from resync_finished", BM_LOCKED_SET_ALLOWED);
|
|
|
+ drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
|
|
|
+ "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
|
|
|
put_ldev(mdev);
|
|
|
}
|
|
|
|
|
@@ -2057,7 +2176,11 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
|
|
|
|
|
|
D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
|
|
|
|
|
|
- uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
|
|
|
+ uuid = mdev->ldev->md.uuid[UI_BITMAP];
|
|
|
+ if (uuid && uuid != UUID_JUST_CREATED)
|
|
|
+ uuid = uuid + UUID_NEW_BM_OFFSET;
|
|
|
+ else
|
|
|
+ get_random_bytes(&uuid, sizeof(u64));
|
|
|
drbd_uuid_set(mdev, UI_BITMAP, uuid);
|
|
|
drbd_print_uuids(mdev, "updated sync UUID");
|
|
|
drbd_md_sync(mdev);
|
|
@@ -2089,6 +2212,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
|
|
|
max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
|
|
|
}
|
|
|
|
|
|
+ /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
|
|
|
+ if (mdev->agreed_pro_version <= 94)
|
|
|
+ max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
|
|
|
+
|
|
|
p.d_size = cpu_to_be64(d_size);
|
|
|
p.u_size = cpu_to_be64(u_size);
|
|
|
p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
|
|
@@ -2102,10 +2229,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * drbd_send_state() - Sends the drbd state to the peer
|
|
|
+ * drbd_send_current_state() - Sends the drbd state to the peer
|
|
|
* @mdev: DRBD device.
|
|
|
*/
|
|
|
-int drbd_send_state(struct drbd_conf *mdev)
|
|
|
+int drbd_send_current_state(struct drbd_conf *mdev)
|
|
|
{
|
|
|
struct socket *sock;
|
|
|
struct p_state p;
|
|
@@ -2131,6 +2258,37 @@ int drbd_send_state(struct drbd_conf *mdev)
|
|
|
return ok;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * drbd_send_state() - After a state change, sends the new state to the peer
|
|
|
+ * @mdev: DRBD device.
|
|
|
+ * @state: the state to send, not necessarily the current state.
|
|
|
+ *
|
|
|
+ * Each state change queues an "after_state_ch" work, which will eventually
|
|
|
+ * send the resulting new state to the peer. If more state changes happen
|
|
|
+ * between queuing and processing of the after_state_ch work, we still
|
|
|
+ * want to send each intermediary state in the order it occurred.
|
|
|
+ */
|
|
|
+int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
|
|
|
+{
|
|
|
+ struct socket *sock;
|
|
|
+ struct p_state p;
|
|
|
+ int ok = 0;
|
|
|
+
|
|
|
+ mutex_lock(&mdev->data.mutex);
|
|
|
+
|
|
|
+ p.state = cpu_to_be32(state.i);
|
|
|
+ sock = mdev->data.socket;
|
|
|
+
|
|
|
+ if (likely(sock != NULL)) {
|
|
|
+ ok = _drbd_send_cmd(mdev, sock, P_STATE,
|
|
|
+ (struct p_header80 *)&p, sizeof(p), 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ mutex_unlock(&mdev->data.mutex);
|
|
|
+
|
|
|
+ return ok;
|
|
|
+}
|
|
|
+
|
|
|
int drbd_send_state_req(struct drbd_conf *mdev,
|
|
|
union drbd_state mask, union drbd_state val)
|
|
|
{
|
|
@@ -2615,7 +2773,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
|
|
|
struct bio_vec *bvec;
|
|
|
int i;
|
|
|
/* hint all but last page with MSG_MORE */
|
|
|
- __bio_for_each_segment(bvec, bio, i, 0) {
|
|
|
+ bio_for_each_segment(bvec, bio, i) {
|
|
|
if (!_drbd_no_send_page(mdev, bvec->bv_page,
|
|
|
bvec->bv_offset, bvec->bv_len,
|
|
|
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
|
|
@@ -2629,7 +2787,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
|
|
|
struct bio_vec *bvec;
|
|
|
int i;
|
|
|
/* hint all but last page with MSG_MORE */
|
|
|
- __bio_for_each_segment(bvec, bio, i, 0) {
|
|
|
+ bio_for_each_segment(bvec, bio, i) {
|
|
|
if (!_drbd_send_page(mdev, bvec->bv_page,
|
|
|
bvec->bv_offset, bvec->bv_len,
|
|
|
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
|
|
@@ -2695,8 +2853,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
|
|
|
|
|
|
p.sector = cpu_to_be64(req->sector);
|
|
|
p.block_id = (unsigned long)req;
|
|
|
- p.seq_num = cpu_to_be32(req->seq_num =
|
|
|
- atomic_add_return(1, &mdev->packet_seq));
|
|
|
+ p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
|
|
|
|
|
|
dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
|
|
|
|
|
@@ -2987,8 +3144,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
|
|
|
atomic_set(&mdev->rs_sect_in, 0);
|
|
|
atomic_set(&mdev->rs_sect_ev, 0);
|
|
|
atomic_set(&mdev->ap_in_flight, 0);
|
|
|
+ atomic_set(&mdev->md_io_in_use, 0);
|
|
|
|
|
|
- mutex_init(&mdev->md_io_mutex);
|
|
|
mutex_init(&mdev->data.mutex);
|
|
|
mutex_init(&mdev->meta.mutex);
|
|
|
sema_init(&mdev->data.work.s, 0);
|
|
@@ -3126,6 +3283,10 @@ static void drbd_destroy_mempools(void)
|
|
|
|
|
|
/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
|
|
|
|
|
|
+ if (drbd_md_io_bio_set)
|
|
|
+ bioset_free(drbd_md_io_bio_set);
|
|
|
+ if (drbd_md_io_page_pool)
|
|
|
+ mempool_destroy(drbd_md_io_page_pool);
|
|
|
if (drbd_ee_mempool)
|
|
|
mempool_destroy(drbd_ee_mempool);
|
|
|
if (drbd_request_mempool)
|
|
@@ -3139,6 +3300,8 @@ static void drbd_destroy_mempools(void)
|
|
|
if (drbd_al_ext_cache)
|
|
|
kmem_cache_destroy(drbd_al_ext_cache);
|
|
|
|
|
|
+ drbd_md_io_bio_set = NULL;
|
|
|
+ drbd_md_io_page_pool = NULL;
|
|
|
drbd_ee_mempool = NULL;
|
|
|
drbd_request_mempool = NULL;
|
|
|
drbd_ee_cache = NULL;
|
|
@@ -3162,6 +3325,8 @@ static int drbd_create_mempools(void)
|
|
|
drbd_bm_ext_cache = NULL;
|
|
|
drbd_al_ext_cache = NULL;
|
|
|
drbd_pp_pool = NULL;
|
|
|
+ drbd_md_io_page_pool = NULL;
|
|
|
+ drbd_md_io_bio_set = NULL;
|
|
|
|
|
|
/* caches */
|
|
|
drbd_request_cache = kmem_cache_create(
|
|
@@ -3185,6 +3350,16 @@ static int drbd_create_mempools(void)
|
|
|
goto Enomem;
|
|
|
|
|
|
/* mempools */
|
|
|
+#ifdef COMPAT_HAVE_BIOSET_CREATE
|
|
|
+ drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
|
|
|
+ if (drbd_md_io_bio_set == NULL)
|
|
|
+ goto Enomem;
|
|
|
+#endif
|
|
|
+
|
|
|
+ drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
|
|
|
+ if (drbd_md_io_page_pool == NULL)
|
|
|
+ goto Enomem;
|
|
|
+
|
|
|
drbd_request_mempool = mempool_create(number,
|
|
|
mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
|
|
|
if (drbd_request_mempool == NULL)
|
|
@@ -3262,6 +3437,8 @@ static void drbd_delete_device(unsigned int minor)
|
|
|
if (!mdev)
|
|
|
return;
|
|
|
|
|
|
+ del_timer_sync(&mdev->request_timer);
|
|
|
+
|
|
|
/* paranoia asserts */
|
|
|
if (mdev->open_cnt != 0)
|
|
|
dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
|
|
@@ -3666,8 +3843,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
|
|
|
if (!get_ldev_if_state(mdev, D_FAILED))
|
|
|
return;
|
|
|
|
|
|
- mutex_lock(&mdev->md_io_mutex);
|
|
|
- buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
|
|
|
+ buffer = drbd_md_get_buffer(mdev);
|
|
|
+ if (!buffer)
|
|
|
+ goto out;
|
|
|
+
|
|
|
memset(buffer, 0, 512);
|
|
|
|
|
|
buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
|
|
@@ -3698,7 +3877,8 @@ void drbd_md_sync(struct drbd_conf *mdev)
|
|
|
* since we updated it on metadata. */
|
|
|
mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
|
|
|
|
|
|
- mutex_unlock(&mdev->md_io_mutex);
|
|
|
+ drbd_md_put_buffer(mdev);
|
|
|
+out:
|
|
|
put_ldev(mdev);
|
|
|
}
|
|
|
|
|
@@ -3718,8 +3898,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
|
|
|
if (!get_ldev_if_state(mdev, D_ATTACHING))
|
|
|
return ERR_IO_MD_DISK;
|
|
|
|
|
|
- mutex_lock(&mdev->md_io_mutex);
|
|
|
- buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
|
|
|
+ buffer = drbd_md_get_buffer(mdev);
|
|
|
+ if (!buffer)
|
|
|
+ goto out;
|
|
|
|
|
|
if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
|
|
|
/* NOTE: can't do normal error processing here as this is
|
|
@@ -3780,7 +3961,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
|
|
|
mdev->sync_conf.al_extents = 127;
|
|
|
|
|
|
err:
|
|
|
- mutex_unlock(&mdev->md_io_mutex);
|
|
|
+ drbd_md_put_buffer(mdev);
|
|
|
+ out:
|
|
|
put_ldev(mdev);
|
|
|
|
|
|
return rv;
|
|
@@ -4183,12 +4365,11 @@ const char *drbd_buildtag(void)
|
|
|
static char buildtag[38] = "\0uilt-in";
|
|
|
|
|
|
if (buildtag[0] == 0) {
|
|
|
-#ifdef CONFIG_MODULES
|
|
|
- if (THIS_MODULE != NULL)
|
|
|
- sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
|
|
|
- else
|
|
|
+#ifdef MODULE
|
|
|
+ sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
|
|
|
+#else
|
|
|
+ buildtag[0] = 'b';
|
|
|
#endif
|
|
|
- buildtag[0] = 'b';
|
|
|
}
|
|
|
|
|
|
return buildtag;
|