|
@@ -38,21 +38,36 @@
|
|
|
* near_copies (stored in low byte of layout)
|
|
|
* far_copies (stored in second byte of layout)
|
|
|
* far_offset (stored in bit 16 of layout )
|
|
|
+ * use_far_sets (stored in bit 17 of layout )
|
|
|
*
|
|
|
- * The data to be stored is divided into chunks using chunksize.
|
|
|
- * Each device is divided into far_copies sections.
|
|
|
- * In each section, chunks are laid out in a style similar to raid0, but
|
|
|
- * near_copies copies of each chunk is stored (each on a different drive).
|
|
|
- * The starting device for each section is offset near_copies from the starting
|
|
|
- * device of the previous section.
|
|
|
- * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
|
|
|
- * drive.
|
|
|
- * near_copies and far_copies must be at least one, and their product is at most
|
|
|
- * raid_disks.
|
|
|
+ * The data to be stored is divided into chunks using chunksize. Each device
|
|
|
+ * is divided into far_copies sections. In each section, chunks are laid out
|
|
|
+ * in a style similar to raid0, but near_copies copies of each chunk is stored
|
|
|
+ * (each on a different drive). The starting device for each section is offset
|
|
|
+ * near_copies from the starting device of the previous section. Thus there
|
|
|
+ * are (near_copies * far_copies) of each chunk, and each is on a different
|
|
|
+ * drive. near_copies and far_copies must be at least one, and their product
|
|
|
+ * is at most raid_disks.
|
|
|
*
|
|
|
* If far_offset is true, then the far_copies are handled a bit differently.
|
|
|
- * The copies are still in different stripes, but instead of be very far apart
|
|
|
- * on disk, there are adjacent stripes.
|
|
|
+ * The copies are still in different stripes, but instead of being very far
|
|
|
+ * apart on disk, there are adjacent stripes.
|
|
|
+ *
|
|
|
+ * The far and offset algorithms are handled slightly differently if
|
|
|
+ * 'use_far_sets' is true. In this case, the array's devices are grouped into
|
|
|
+ * sets that are (near_copies * far_copies) in size. The far copied stripes
|
|
|
+ * are still shifted by 'near_copies' devices, but this shifting stays confined
|
|
|
+ * to the set rather than the entire array. This is done to improve the number
|
|
|
+ * of device combinations that can fail without causing the array to fail.
|
|
|
+ * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk
|
|
|
+ * on a device):
|
|
|
+ * A B C D A B C D E
|
|
|
+ * ... ...
|
|
|
+ * D A B C E A B C D
|
|
|
+ * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s):
|
|
|
+ * [A B] [C D] [A B] [C D E]
|
|
|
+ * |...| |...| |...| | ... |
|
|
|
+ * [B A] [D C] [B A] [E C D]
|
|
|
*/
|
|
|
|
|
|
/*
|
|
@@ -551,14 +566,18 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
|
|
|
/* and calculate all the others */
|
|
|
for (n = 0; n < geo->near_copies; n++) {
|
|
|
int d = dev;
|
|
|
+ int set;
|
|
|
sector_t s = sector;
|
|
|
r10bio->devs[slot].devnum = d;
|
|
|
r10bio->devs[slot].addr = s;
|
|
|
slot++;
|
|
|
|
|
|
for (f = 1; f < geo->far_copies; f++) {
|
|
|
+ set = d / geo->far_set_size;
|
|
|
d += geo->near_copies;
|
|
|
- d %= geo->raid_disks;
|
|
|
+ d %= geo->far_set_size;
|
|
|
+ d += geo->far_set_size * set;
|
|
|
+
|
|
|
s += geo->stride;
|
|
|
r10bio->devs[slot].devnum = d;
|
|
|
r10bio->devs[slot].addr = s;
|
|
@@ -594,6 +613,8 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
|
|
|
* or recovery, so reshape isn't happening
|
|
|
*/
|
|
|
struct geom *geo = &conf->geo;
|
|
|
+ int far_set_start = (dev / geo->far_set_size) * geo->far_set_size;
|
|
|
+ int far_set_size = geo->far_set_size;
|
|
|
|
|
|
offset = sector & geo->chunk_mask;
|
|
|
if (geo->far_offset) {
|
|
@@ -601,13 +622,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
|
|
|
chunk = sector >> geo->chunk_shift;
|
|
|
fc = sector_div(chunk, geo->far_copies);
|
|
|
dev -= fc * geo->near_copies;
|
|
|
- if (dev < 0)
|
|
|
- dev += geo->raid_disks;
|
|
|
+ if (dev < far_set_start)
|
|
|
+ dev += far_set_size;
|
|
|
} else {
|
|
|
while (sector >= geo->stride) {
|
|
|
sector -= geo->stride;
|
|
|
- if (dev < geo->near_copies)
|
|
|
- dev += geo->raid_disks - geo->near_copies;
|
|
|
+ if (dev < (geo->near_copies + far_set_start))
|
|
|
+ dev += far_set_size - geo->near_copies;
|
|
|
else
|
|
|
dev -= geo->near_copies;
|
|
|
}
|
|
@@ -3438,7 +3459,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
|
|
|
disks = mddev->raid_disks + mddev->delta_disks;
|
|
|
break;
|
|
|
}
|
|
|
- if (layout >> 17)
|
|
|
+ if (layout >> 18)
|
|
|
return -1;
|
|
|
if (chunk < (PAGE_SIZE >> 9) ||
|
|
|
!is_power_of_2(chunk))
|
|
@@ -3450,6 +3471,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
|
|
|
geo->near_copies = nc;
|
|
|
geo->far_copies = fc;
|
|
|
geo->far_offset = fo;
|
|
|
+ geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks;
|
|
|
geo->chunk_mask = chunk - 1;
|
|
|
geo->chunk_shift = ffz(~chunk);
|
|
|
return nc*fc;
|