|
@@ -188,147 +188,75 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
|
|
|
#endif
|
|
|
|
|
|
/**
|
|
|
- * DOC: The transfer log
|
|
|
- *
|
|
|
- * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
|
|
|
- * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
|
|
|
- * of the list. There is always at least one &struct drbd_tl_epoch object.
|
|
|
- *
|
|
|
- * Each &struct drbd_tl_epoch has a circular double linked list of requests
|
|
|
- * attached.
|
|
|
- */
|
|
|
-static int tl_init(struct drbd_tconn *tconn)
|
|
|
-{
|
|
|
- struct drbd_tl_epoch *b;
|
|
|
-
|
|
|
- /* during device minor initialization, we may well use GFP_KERNEL */
|
|
|
- b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
|
|
|
- if (!b)
|
|
|
- return 0;
|
|
|
- INIT_LIST_HEAD(&b->requests);
|
|
|
- INIT_LIST_HEAD(&b->w.list);
|
|
|
- b->next = NULL;
|
|
|
- b->br_number = atomic_inc_return(&tconn->current_tle_nr);
|
|
|
- b->n_writes = 0;
|
|
|
- b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
|
|
|
-
|
|
|
- tconn->oldest_tle = b;
|
|
|
- tconn->newest_tle = b;
|
|
|
- INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
|
|
|
- INIT_LIST_HEAD(&tconn->barrier_acked_requests);
|
|
|
-
|
|
|
- return 1;
|
|
|
-}
|
|
|
-
|
|
|
-static void tl_cleanup(struct drbd_tconn *tconn)
|
|
|
-{
|
|
|
- if (tconn->oldest_tle != tconn->newest_tle)
|
|
|
- conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
|
|
|
- if (!list_empty(&tconn->out_of_sequence_requests))
|
|
|
- conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
|
|
|
- kfree(tconn->oldest_tle);
|
|
|
- tconn->oldest_tle = NULL;
|
|
|
- kfree(tconn->unused_spare_tle);
|
|
|
- tconn->unused_spare_tle = NULL;
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * _tl_add_barrier() - Adds a barrier to the transfer log
|
|
|
- * @mdev: DRBD device.
|
|
|
- * @new: Barrier to be added before the current head of the TL.
|
|
|
- *
|
|
|
- * The caller must hold the req_lock.
|
|
|
- */
|
|
|
-void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
|
|
|
-{
|
|
|
- INIT_LIST_HEAD(&new->requests);
|
|
|
- INIT_LIST_HEAD(&new->w.list);
|
|
|
- new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
|
|
|
- new->next = NULL;
|
|
|
- new->n_writes = 0;
|
|
|
-
|
|
|
- new->br_number = atomic_inc_return(&tconn->current_tle_nr);
|
|
|
- if (tconn->newest_tle != new) {
|
|
|
- tconn->newest_tle->next = new;
|
|
|
- tconn->newest_tle = new;
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
|
|
|
- * @mdev: DRBD device.
|
|
|
+ * tl_release() - mark as BARRIER_ACKED all requests in the corresponding transfer log epoch
|
|
|
+ * @tconn: DRBD connection.
|
|
|
* @barrier_nr: Expected identifier of the DRBD write barrier packet.
|
|
|
* @set_size: Expected number of requests before that barrier.
|
|
|
*
|
|
|
* In case the passed barrier_nr or set_size does not match the oldest
|
|
|
- * &struct drbd_tl_epoch objects this function will cause a termination
|
|
|
- * of the connection.
|
|
|
+ * epoch of not yet barrier-acked requests, this function will cause a
|
|
|
+ * termination of the connection.
|
|
|
*/
|
|
|
void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
|
|
|
unsigned int set_size)
|
|
|
{
|
|
|
- struct drbd_conf *mdev;
|
|
|
- struct drbd_tl_epoch *b, *nob; /* next old barrier */
|
|
|
- struct list_head *le, *tle;
|
|
|
struct drbd_request *r;
|
|
|
+ struct drbd_request *req = NULL;
|
|
|
+ int expect_epoch = 0;
|
|
|
+ int expect_size = 0;
|
|
|
|
|
|
spin_lock_irq(&tconn->req_lock);
|
|
|
|
|
|
- b = tconn->oldest_tle;
|
|
|
+ /* find latest not yet barrier-acked write request,
|
|
|
+ * count writes in its epoch. */
|
|
|
+ list_for_each_entry(r, &tconn->transfer_log, tl_requests) {
|
|
|
+ const unsigned long s = r->rq_state;
|
|
|
+ if (!req) {
|
|
|
+ if (!(s & RQ_WRITE))
|
|
|
+ continue;
|
|
|
+ if (!(s & RQ_NET_MASK))
|
|
|
+ continue;
|
|
|
+ if (s & RQ_NET_DONE)
|
|
|
+ continue;
|
|
|
+ req = r;
|
|
|
+ expect_epoch = req->epoch;
|
|
|
+ expect_size ++;
|
|
|
+ } else {
|
|
|
+ if (r->epoch != expect_epoch)
|
|
|
+ break;
|
|
|
+ if (!(s & RQ_WRITE))
|
|
|
+ continue;
|
|
|
+ /* if (s & RQ_DONE): not expected */
|
|
|
+ /* if (!(s & RQ_NET_MASK)): not expected */
|
|
|
+ expect_size++;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
/* first some paranoia code */
|
|
|
- if (b == NULL) {
|
|
|
+ if (req == NULL) {
|
|
|
conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
|
|
|
barrier_nr);
|
|
|
goto bail;
|
|
|
}
|
|
|
- if (b->br_number != barrier_nr) {
|
|
|
+ if (expect_epoch != barrier_nr) {
|
|
|
conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
|
|
|
- barrier_nr, b->br_number);
|
|
|
+ barrier_nr, expect_epoch);
|
|
|
goto bail;
|
|
|
}
|
|
|
- if (b->n_writes != set_size) {
|
|
|
+
|
|
|
+ if (expect_size != set_size) {
|
|
|
conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
|
|
|
- barrier_nr, set_size, b->n_writes);
|
|
|
+ barrier_nr, set_size, expect_size);
|
|
|
goto bail;
|
|
|
}
|
|
|
|
|
|
/* Clean up list of requests processed during current epoch */
|
|
|
- list_for_each_safe(le, tle, &b->requests) {
|
|
|
- r = list_entry(le, struct drbd_request, tl_requests);
|
|
|
- _req_mod(r, BARRIER_ACKED);
|
|
|
- }
|
|
|
- /* There could be requests on the list waiting for completion
|
|
|
- of the write to the local disk. To avoid corruptions of
|
|
|
- slab's data structures we have to remove the lists head.
|
|
|
-
|
|
|
- Also there could have been a barrier ack out of sequence, overtaking
|
|
|
- the write acks - which would be a bug and violating write ordering.
|
|
|
- To not deadlock in case we lose connection while such requests are
|
|
|
- still pending, we need some way to find them for the
|
|
|
- _req_mode(CONNECTION_LOST_WHILE_PENDING).
|
|
|
-
|
|
|
- These have been list_move'd to the out_of_sequence_requests list in
|
|
|
- _req_mod(, BARRIER_ACKED) above.
|
|
|
- */
|
|
|
- list_splice_init(&b->requests, &tconn->barrier_acked_requests);
|
|
|
- mdev = b->w.mdev;
|
|
|
-
|
|
|
- nob = b->next;
|
|
|
- if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) {
|
|
|
- _tl_add_barrier(tconn, b);
|
|
|
- if (nob)
|
|
|
- tconn->oldest_tle = nob;
|
|
|
- /* if nob == NULL b was the only barrier, and becomes the new
|
|
|
- barrier. Therefore tconn->oldest_tle points already to b */
|
|
|
- } else {
|
|
|
- D_ASSERT(nob != NULL);
|
|
|
- tconn->oldest_tle = nob;
|
|
|
- kfree(b);
|
|
|
+ list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
|
|
|
+ if (req->epoch != expect_epoch)
|
|
|
+ break;
|
|
|
+ _req_mod(req, BARRIER_ACKED);
|
|
|
}
|
|
|
-
|
|
|
spin_unlock_irq(&tconn->req_lock);
|
|
|
- dec_ap_pending(mdev);
|
|
|
|
|
|
return;
|
|
|
|
|
@@ -346,91 +274,20 @@ bail:
|
|
|
* @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
|
|
|
* RESTART_FROZEN_DISK_IO.
|
|
|
*/
|
|
|
+/* must hold resource->req_lock */
|
|
|
void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
|
|
|
{
|
|
|
- struct drbd_tl_epoch *b, *tmp, **pn;
|
|
|
- struct list_head *le, *tle, carry_reads;
|
|
|
- struct drbd_request *req;
|
|
|
- int rv, n_writes, n_reads;
|
|
|
-
|
|
|
- b = tconn->oldest_tle;
|
|
|
- pn = &tconn->oldest_tle;
|
|
|
- while (b) {
|
|
|
- n_writes = 0;
|
|
|
- n_reads = 0;
|
|
|
- INIT_LIST_HEAD(&carry_reads);
|
|
|
- list_for_each_safe(le, tle, &b->requests) {
|
|
|
- req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
- rv = _req_mod(req, what);
|
|
|
-
|
|
|
- if (rv & MR_WRITE)
|
|
|
- n_writes++;
|
|
|
- if (rv & MR_READ)
|
|
|
- n_reads++;
|
|
|
- }
|
|
|
- tmp = b->next;
|
|
|
-
|
|
|
- if (n_writes) {
|
|
|
- if (what == RESEND) {
|
|
|
- b->n_writes = n_writes;
|
|
|
- if (b->w.cb == NULL) {
|
|
|
- b->w.cb = w_send_barrier;
|
|
|
- inc_ap_pending(b->w.mdev);
|
|
|
- set_bit(CREATE_BARRIER, &tconn->flags);
|
|
|
- }
|
|
|
-
|
|
|
- drbd_queue_work(&tconn->sender_work, &b->w);
|
|
|
- }
|
|
|
- pn = &b->next;
|
|
|
- } else {
|
|
|
- if (n_reads)
|
|
|
- list_add(&carry_reads, &b->requests);
|
|
|
- /* there could still be requests on that ring list,
|
|
|
- * in case local io is still pending */
|
|
|
- list_del(&b->requests);
|
|
|
-
|
|
|
- /* dec_ap_pending corresponding to queue_barrier.
|
|
|
- * the newest barrier may not have been queued yet,
|
|
|
- * in which case w.cb is still NULL. */
|
|
|
- if (b->w.cb != NULL)
|
|
|
- dec_ap_pending(b->w.mdev);
|
|
|
-
|
|
|
- if (b == tconn->newest_tle) {
|
|
|
- /* recycle, but reinit! */
|
|
|
- if (tmp != NULL)
|
|
|
- conn_err(tconn, "ASSERT FAILED tmp == NULL");
|
|
|
- INIT_LIST_HEAD(&b->requests);
|
|
|
- list_splice(&carry_reads, &b->requests);
|
|
|
- INIT_LIST_HEAD(&b->w.list);
|
|
|
- b->w.cb = NULL;
|
|
|
- b->br_number = atomic_inc_return(&tconn->current_tle_nr);
|
|
|
- b->n_writes = 0;
|
|
|
-
|
|
|
- *pn = b;
|
|
|
- break;
|
|
|
- }
|
|
|
- *pn = tmp;
|
|
|
- kfree(b);
|
|
|
- }
|
|
|
- b = tmp;
|
|
|
- list_splice(&carry_reads, &b->requests);
|
|
|
- }
|
|
|
-
|
|
|
- /* Actions operating on the disk state, also want to work on
|
|
|
- requests that got barrier acked. */
|
|
|
- switch (what) {
|
|
|
- case FAIL_FROZEN_DISK_IO:
|
|
|
- case RESTART_FROZEN_DISK_IO:
|
|
|
- list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
|
|
|
- req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
- _req_mod(req, what);
|
|
|
- }
|
|
|
- case CONNECTION_LOST_WHILE_PENDING:
|
|
|
- case RESEND:
|
|
|
- break;
|
|
|
- default:
|
|
|
- conn_err(tconn, "what = %d in _tl_restart()\n", what);
|
|
|
- }
|
|
|
+ struct drbd_request *req, *r;
|
|
|
+
|
|
|
+ list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests)
|
|
|
+ _req_mod(req, what);
|
|
|
+}
|
|
|
+
|
|
|
+void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
|
|
|
+{
|
|
|
+ spin_lock_irq(&tconn->req_lock);
|
|
|
+ _tl_restart(tconn, what);
|
|
|
+ spin_unlock_irq(&tconn->req_lock);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -443,36 +300,7 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
|
|
|
*/
|
|
|
void tl_clear(struct drbd_tconn *tconn)
|
|
|
{
|
|
|
- struct list_head *le, *tle;
|
|
|
- struct drbd_request *r;
|
|
|
-
|
|
|
- spin_lock_irq(&tconn->req_lock);
|
|
|
-
|
|
|
- _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
|
|
|
-
|
|
|
- /* we expect this list to be empty. */
|
|
|
- if (!list_empty(&tconn->out_of_sequence_requests))
|
|
|
- conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
|
|
|
-
|
|
|
- /* but just in case, clean it up anyways! */
|
|
|
- list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
|
|
|
- r = list_entry(le, struct drbd_request, tl_requests);
|
|
|
- /* It would be nice to complete outside of spinlock.
|
|
|
- * But this is easier for now. */
|
|
|
- _req_mod(r, CONNECTION_LOST_WHILE_PENDING);
|
|
|
- }
|
|
|
-
|
|
|
- /* ensure bit indicating barrier is required is clear */
|
|
|
- clear_bit(CREATE_BARRIER, &tconn->flags);
|
|
|
-
|
|
|
- spin_unlock_irq(&tconn->req_lock);
|
|
|
-}
|
|
|
-
|
|
|
-void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
|
|
|
-{
|
|
|
- spin_lock_irq(&tconn->req_lock);
|
|
|
- _tl_restart(tconn, what);
|
|
|
- spin_unlock_irq(&tconn->req_lock);
|
|
|
+ tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -482,31 +310,16 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
|
|
|
void tl_abort_disk_io(struct drbd_conf *mdev)
|
|
|
{
|
|
|
struct drbd_tconn *tconn = mdev->tconn;
|
|
|
- struct drbd_tl_epoch *b;
|
|
|
- struct list_head *le, *tle;
|
|
|
- struct drbd_request *req;
|
|
|
+ struct drbd_request *req, *r;
|
|
|
|
|
|
spin_lock_irq(&tconn->req_lock);
|
|
|
- b = tconn->oldest_tle;
|
|
|
- while (b) {
|
|
|
- list_for_each_safe(le, tle, &b->requests) {
|
|
|
- req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
- if (!(req->rq_state & RQ_LOCAL_PENDING))
|
|
|
- continue;
|
|
|
- if (req->w.mdev == mdev)
|
|
|
- _req_mod(req, ABORT_DISK_IO);
|
|
|
- }
|
|
|
- b = b->next;
|
|
|
- }
|
|
|
-
|
|
|
- list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
|
|
|
- req = list_entry(le, struct drbd_request, tl_requests);
|
|
|
+ list_for_each_entry_safe(req, r, &tconn->transfer_log, tl_requests) {
|
|
|
if (!(req->rq_state & RQ_LOCAL_PENDING))
|
|
|
continue;
|
|
|
- if (req->w.mdev == mdev)
|
|
|
- _req_mod(req, ABORT_DISK_IO);
|
|
|
+ if (req->w.mdev != mdev)
|
|
|
+ continue;
|
|
|
+ _req_mod(req, ABORT_DISK_IO);
|
|
|
}
|
|
|
-
|
|
|
spin_unlock_irq(&tconn->req_lock);
|
|
|
}
|
|
|
|
|
@@ -2680,17 +2493,21 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
|
|
|
if (set_resource_options(tconn, res_opts))
|
|
|
goto fail;
|
|
|
|
|
|
- if (!tl_init(tconn))
|
|
|
- goto fail;
|
|
|
-
|
|
|
tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
|
|
|
if (!tconn->current_epoch)
|
|
|
goto fail;
|
|
|
+
|
|
|
+ INIT_LIST_HEAD(&tconn->transfer_log);
|
|
|
+
|
|
|
INIT_LIST_HEAD(&tconn->current_epoch->list);
|
|
|
tconn->epochs = 1;
|
|
|
spin_lock_init(&tconn->epoch_lock);
|
|
|
tconn->write_ordering = WO_bdev_flush;
|
|
|
|
|
|
+ tconn->send.seen_any_write_yet = false;
|
|
|
+ tconn->send.current_epoch_nr = 0;
|
|
|
+ tconn->send.current_epoch_writes = 0;
|
|
|
+
|
|
|
tconn->cstate = C_STANDALONE;
|
|
|
mutex_init(&tconn->cstate_mutex);
|
|
|
spin_lock_init(&tconn->req_lock);
|
|
@@ -2713,7 +2530,6 @@ struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
|
|
|
|
|
|
fail:
|
|
|
kfree(tconn->current_epoch);
|
|
|
- tl_cleanup(tconn);
|
|
|
free_cpumask_var(tconn->cpu_mask);
|
|
|
drbd_free_socket(&tconn->meta);
|
|
|
drbd_free_socket(&tconn->data);
|