|
@@ -2077,36 +2077,101 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
|
|
|
|
|
|
}
|
|
|
|
|
|
+/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
|
|
|
+ * to process
|
|
|
+ */
|
|
|
+static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
|
|
|
+ struct stripe_head_state *s, int disk_idx, int disks)
|
|
|
+{
|
|
|
+ struct r5dev *dev = &sh->dev[disk_idx];
|
|
|
+ struct r5dev *failed_dev = &sh->dev[s->failed_num];
|
|
|
+
|
|
|
+ /* don't schedule compute operations or reads on the parity block while
|
|
|
+ * a check is in flight
|
|
|
+ */
|
|
|
+ if ((disk_idx == sh->pd_idx) &&
|
|
|
+ test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
|
|
|
+ return ~0;
|
|
|
+
|
|
|
+ /* is the data in this block needed, and can we get it? */
|
|
|
+ if (!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
+ !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
|
|
|
+ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
|
|
|
+ s->syncing || s->expanding || (s->failed &&
|
|
|
+ (failed_dev->toread || (failed_dev->towrite &&
|
|
|
+ !test_bit(R5_OVERWRITE, &failed_dev->flags)
|
|
|
+ ))))) {
|
|
|
+ /* 1/ We would like to get this block, possibly by computing it,
|
|
|
+ * but we might not be able to.
|
|
|
+ *
|
|
|
+ * 2/ Since parity check operations potentially make the parity
|
|
|
+ * block !uptodate it will need to be refreshed before any
|
|
|
+ * compute operations on data disks are scheduled.
|
|
|
+ *
|
|
|
+ * 3/ We hold off parity block re-reads until check operations
|
|
|
+ * have quiesced.
|
|
|
+ */
|
|
|
+ if ((s->uptodate == disks - 1) &&
|
|
|
+ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
|
|
|
+ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
|
|
|
+ set_bit(R5_Wantcompute, &dev->flags);
|
|
|
+ sh->ops.target = disk_idx;
|
|
|
+ s->req_compute = 1;
|
|
|
+ sh->ops.count++;
|
|
|
+ /* Careful: from this point on 'uptodate' is in the eye
|
|
|
+ * of raid5_run_ops which services 'compute' operations
|
|
|
+ * before writes. R5_Wantcompute flags a block that will
|
|
|
+ * be R5_UPTODATE by the time it is needed for a
|
|
|
+ * subsequent operation.
|
|
|
+ */
|
|
|
+ s->uptodate++;
|
|
|
+ return 0; /* uptodate + compute == disks */
|
|
|
+ } else if ((s->uptodate < disks - 1) &&
|
|
|
+ test_bit(R5_Insync, &dev->flags)) {
|
|
|
+ /* Note: we hold off compute operations while checks are
|
|
|
+ * in flight, but we still prefer 'compute' over 'read'
|
|
|
+ * hence we only read if (uptodate < * disks-1)
|
|
|
+ */
|
|
|
+ set_bit(R5_LOCKED, &dev->flags);
|
|
|
+ set_bit(R5_Wantread, &dev->flags);
|
|
|
+ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
|
|
|
+ sh->ops.count++;
|
|
|
+ s->locked++;
|
|
|
+ pr_debug("Reading block %d (sync=%d)\n", disk_idx,
|
|
|
+ s->syncing);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return ~0;
|
|
|
+}
|
|
|
+
|
|
|
static void handle_issuing_new_read_requests5(struct stripe_head *sh,
|
|
|
struct stripe_head_state *s, int disks)
|
|
|
{
|
|
|
int i;
|
|
|
- for (i = disks; i--; ) {
|
|
|
- struct r5dev *dev = &sh->dev[i];
|
|
|
- if (!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
- !test_bit(R5_UPTODATE, &dev->flags) &&
|
|
|
- (dev->toread ||
|
|
|
- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
|
|
|
- s->syncing || s->expanding ||
|
|
|
- (s->failed && (sh->dev[s->failed_num].toread ||
|
|
|
- (sh->dev[s->failed_num].towrite &&
|
|
|
- !test_bit(R5_OVERWRITE, &sh->dev[s->failed_num].flags))
|
|
|
- )))) {
|
|
|
- /* we would like to get this block, possibly
|
|
|
- * by computing it, but we might not be able to
|
|
|
- */
|
|
|
- if (s->uptodate == disks-1) {
|
|
|
- pr_debug("Computing block %d\n", i);
|
|
|
- compute_block(sh, i);
|
|
|
- s->uptodate++;
|
|
|
- } else if (test_bit(R5_Insync, &dev->flags)) {
|
|
|
- set_bit(R5_LOCKED, &dev->flags);
|
|
|
- set_bit(R5_Wantread, &dev->flags);
|
|
|
- s->locked++;
|
|
|
- pr_debug("Reading block %d (sync=%d)\n",
|
|
|
- i, s->syncing);
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
+ /* Clear completed compute operations. Parity recovery
|
|
|
+ * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
|
|
|
+ * later on in this routine
|
|
|
+ */
|
|
|
+ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
|
|
|
+ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
|
|
|
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
|
|
|
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
|
|
|
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* look for blocks to read/compute, skip this if a compute
|
|
|
+ * is already in flight, or if the stripe contents are in the
|
|
|
+ * midst of changing due to a write
|
|
|
+ */
|
|
|
+ if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
|
|
|
+ !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
|
|
|
+ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
|
|
|
+ for (i = disks; i--; )
|
|
|
+ if (__handle_issuing_new_read_requests5(
|
|
|
+ sh, s, i, disks) == 0)
|
|
|
+ break;
|
|
|
}
|
|
|
set_bit(STRIPE_HANDLE, &sh->state);
|
|
|
}
|
|
@@ -2223,7 +2288,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
if ((dev->towrite || i == sh->pd_idx) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
- !test_bit(R5_UPTODATE, &dev->flags)) {
|
|
|
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
+ test_bit(R5_Wantcompute, &dev->flags))) {
|
|
|
if (test_bit(R5_Insync, &dev->flags))
|
|
|
rmw++;
|
|
|
else
|
|
@@ -2232,9 +2298,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
|
|
|
/* Would I have to read this buffer for reconstruct_write */
|
|
|
if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
- !test_bit(R5_UPTODATE, &dev->flags)) {
|
|
|
- if (test_bit(R5_Insync, &dev->flags))
|
|
|
- rcw++;
|
|
|
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
+ test_bit(R5_Wantcompute, &dev->flags))) {
|
|
|
+ if (test_bit(R5_Insync, &dev->flags)) rcw++;
|
|
|
else
|
|
|
rcw += 2*disks;
|
|
|
}
|
|
@@ -2248,7 +2314,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
|
|
|
struct r5dev *dev = &sh->dev[i];
|
|
|
if ((dev->towrite || i == sh->pd_idx) &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
- !test_bit(R5_UPTODATE, &dev->flags) &&
|
|
|
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
+ test_bit(R5_Wantcompute, &dev->flags)) &&
|
|
|
test_bit(R5_Insync, &dev->flags)) {
|
|
|
if (
|
|
|
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
|
@@ -2270,7 +2337,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
|
|
|
if (!test_bit(R5_OVERWRITE, &dev->flags) &&
|
|
|
i != sh->pd_idx &&
|
|
|
!test_bit(R5_LOCKED, &dev->flags) &&
|
|
|
- !test_bit(R5_UPTODATE, &dev->flags) &&
|
|
|
+ !(test_bit(R5_UPTODATE, &dev->flags) ||
|
|
|
+ test_bit(R5_Wantcompute, &dev->flags)) &&
|
|
|
test_bit(R5_Insync, &dev->flags)) {
|
|
|
if (
|
|
|
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
|
@@ -2288,8 +2356,17 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
|
|
|
/* now if nothing is locked, and if we have enough data,
|
|
|
* we can start a write request
|
|
|
*/
|
|
|
- if (s->locked == 0 && (rcw == 0 || rmw == 0) &&
|
|
|
- !test_bit(STRIPE_BIT_DELAY, &sh->state))
|
|
|
+ /* since handle_stripe can be called at any time we need to handle the
|
|
|
+ * case where a compute block operation has been submitted and then a
|
|
|
+ * subsequent call wants to start a write request. raid5_run_ops only
|
|
|
+ * handles the case where compute block and postxor are requested
|
|
|
+ * simultaneously. If this is not the case then new writes need to be
|
|
|
+ * held off until the compute completes.
|
|
|
+ */
|
|
|
+ if ((s->req_compute ||
|
|
|
+ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
|
|
|
+ (s->locked == 0 && (rcw == 0 || rmw == 0) &&
|
|
|
+ !test_bit(STRIPE_BIT_DELAY, &sh->state)))
|
|
|
s->locked += handle_write_operations5(sh, rcw == 0, 0);
|
|
|
}
|
|
|
|
|
@@ -2650,6 +2727,7 @@ static void handle_stripe5(struct stripe_head *sh)
|
|
|
/* now count some things */
|
|
|
if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
|
|
|
if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
|
|
|
+ if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
|
|
|
|
|
|
if (dev->toread)
|
|
|
s.to_read++;
|
|
@@ -2706,7 +2784,8 @@ static void handle_stripe5(struct stripe_head *sh)
|
|
|
* or to load a block that is being partially written.
|
|
|
*/
|
|
|
if (s.to_read || s.non_overwrite ||
|
|
|
- (s.syncing && (s.uptodate < disks)) || s.expanding)
|
|
|
+ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
|
|
|
+ test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
|
|
|
handle_issuing_new_read_requests5(sh, &s, disks);
|
|
|
|
|
|
/* Now we check to see if any write operations have recently
|