|
@@ -300,48 +300,6 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e
|
|
|
_req_may_be_done(req, m);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * checks whether there was an overlapping request
|
|
|
- * or ee already registered.
|
|
|
- *
|
|
|
- * if so, return 1, in which case this request is completed on the spot,
|
|
|
- * without ever being submitted or send.
|
|
|
- *
|
|
|
- * return 0 if it is ok to submit this request.
|
|
|
- *
|
|
|
- * NOTE:
|
|
|
- * paranoia: assume something above us is broken, and issues different write
|
|
|
- * requests for the same block simultaneously...
|
|
|
- *
|
|
|
- * To ensure these won't be reordered differently on both nodes, resulting in
|
|
|
- * diverging data sets, we discard the later one(s). Not that this is supposed
|
|
|
- * to happen, but this is the rationale why we also have to check for
|
|
|
- * conflicting requests with local origin, and why we have to do so regardless
|
|
|
- * of whether we allowed multiple primaries.
|
|
|
- */
|
|
|
-static int _req_conflicts(struct drbd_request *req)
|
|
|
-{
|
|
|
- struct drbd_conf *mdev = req->mdev;
|
|
|
- const sector_t sector = req->i.sector;
|
|
|
- const int size = req->i.size;
|
|
|
- struct drbd_interval *i;
|
|
|
-
|
|
|
- D_ASSERT(drbd_interval_empty(&req->i));
|
|
|
-
|
|
|
- i = drbd_find_overlap(&mdev->write_requests, sector, size);
|
|
|
- if (i) {
|
|
|
- dev_alert(DEV, "%s[%u] Concurrent %s write detected! "
|
|
|
- "[DISCARD L] new: %llus +%u; "
|
|
|
- "pending: %llus +%u\n",
|
|
|
- current->comm, current->pid,
|
|
|
- i->local ? "local" : "remote",
|
|
|
- (unsigned long long)sector, size,
|
|
|
- (unsigned long long)i->sector, i->size);
|
|
|
- return 1;
|
|
|
- }
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
/* obviously this could be coded as many single functions
|
|
|
* instead of one huge switch,
|
|
|
* or by putting the code directly in the respective locations
|
|
@@ -721,6 +679,34 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
|
|
|
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * complete_conflicting_writes - wait for any conflicting write requests
|
|
|
+ *
|
|
|
+ * The write_requests tree contains all active write requests which we
|
|
|
+ * currently know about. Wait for any requests to complete which conflict with
|
|
|
+ * the new one.
|
|
|
+ */
|
|
|
+static int complete_conflicting_writes(struct drbd_conf *mdev,
|
|
|
+ sector_t sector, int size)
|
|
|
+{
|
|
|
+ for(;;) {
|
|
|
+ DEFINE_WAIT(wait);
|
|
|
+ struct drbd_interval *i;
|
|
|
+
|
|
|
+ i = drbd_find_overlap(&mdev->write_requests, sector, size);
|
|
|
+ if (!i)
|
|
|
+ return 0;
|
|
|
+ i->waiting = true;
|
|
|
+ prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
|
|
|
+ spin_unlock_irq(&mdev->tconn->req_lock);
|
|
|
+ schedule();
|
|
|
+ finish_wait(&mdev->misc_wait, &wait);
|
|
|
+ spin_lock_irq(&mdev->tconn->req_lock);
|
|
|
+ if (signal_pending(current))
|
|
|
+ return -ERESTARTSYS;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
|
|
|
{
|
|
|
const int rw = bio_rw(bio);
|
|
@@ -729,7 +715,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
|
|
|
struct drbd_tl_epoch *b = NULL;
|
|
|
struct drbd_request *req;
|
|
|
int local, remote, send_oos = 0;
|
|
|
- int err = -EIO;
|
|
|
+ int err;
|
|
|
int ret = 0;
|
|
|
|
|
|
/* allocate outside of all locks; */
|
|
@@ -799,6 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
|
|
|
if (!(local || remote) && !is_susp(mdev->state)) {
|
|
|
if (__ratelimit(&drbd_ratelimit_state))
|
|
|
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
|
|
|
+ err = -EIO;
|
|
|
goto fail_free_complete;
|
|
|
}
|
|
|
|
|
@@ -823,6 +810,14 @@ allocate_barrier:
|
|
|
/* GOOD, everything prepared, grab the spin_lock */
|
|
|
spin_lock_irq(&mdev->tconn->req_lock);
|
|
|
|
|
|
+ if (rw == WRITE) {
|
|
|
+ err = complete_conflicting_writes(mdev, sector, size);
|
|
|
+ if (err) {
|
|
|
+ spin_unlock_irq(&mdev->tconn->req_lock);
|
|
|
+ goto fail_free_complete;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
if (is_susp(mdev->state)) {
|
|
|
/* If we got suspended, use the retry mechanism of
|
|
|
generic_make_request() to restart processing of this
|
|
@@ -843,6 +838,7 @@ allocate_barrier:
|
|
|
if (!(local || remote)) {
|
|
|
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
|
|
|
spin_unlock_irq(&mdev->tconn->req_lock);
|
|
|
+ err = -EIO;
|
|
|
goto fail_free_complete;
|
|
|
}
|
|
|
}
|
|
@@ -903,12 +899,6 @@ allocate_barrier:
|
|
|
if (local)
|
|
|
_req_mod(req, TO_BE_SUBMITTED);
|
|
|
|
|
|
- /* check this request on the collision detection hash tables.
|
|
|
- * if we have a conflict, just complete it here.
|
|
|
- * THINK do we want to check reads, too? (I don't think so...) */
|
|
|
- if (rw == WRITE && _req_conflicts(req))
|
|
|
- goto fail_conflicting;
|
|
|
-
|
|
|
list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests);
|
|
|
|
|
|
/* NOTE remote first: to get the concurrent write detection right,
|
|
@@ -975,21 +965,6 @@ allocate_barrier:
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
-fail_conflicting:
|
|
|
- /* this is a conflicting request.
|
|
|
- * even though it may have been only _partially_
|
|
|
- * overlapping with one of the currently pending requests,
|
|
|
- * without even submitting or sending it, we will
|
|
|
- * pretend that it was successfully served right now.
|
|
|
- */
|
|
|
- _drbd_end_io_acct(mdev, req);
|
|
|
- spin_unlock_irq(&mdev->tconn->req_lock);
|
|
|
- if (remote)
|
|
|
- dec_ap_pending(mdev);
|
|
|
- /* THINK: do we want to fail it (-EIO), or pretend success?
|
|
|
- * this pretends success. */
|
|
|
- err = 0;
|
|
|
-
|
|
|
fail_free_complete:
|
|
|
if (req->rq_state & RQ_IN_ACT_LOG)
|
|
|
drbd_al_complete_io(mdev, sector);
|