|
@@ -41,19 +41,13 @@
|
|
|
#define XFSA_FIXUP_BNO_OK 1
|
|
|
#define XFSA_FIXUP_CNT_OK 2
|
|
|
|
|
|
-/*
|
|
|
- * Prototypes for per-ag allocation routines
|
|
|
- */
|
|
|
-
|
|
|
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
|
|
|
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
|
|
|
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
|
|
|
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
|
|
|
- xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
|
|
|
-
|
|
|
-/*
|
|
|
- * Internal functions.
|
|
|
- */
|
|
|
+ xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
|
|
|
+STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
|
|
|
+ xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
|
|
|
|
|
|
/*
|
|
|
* Lookup the record equal to [bno, len] in the btree given by cur.
|
|
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
|
|
|
xfs_extlen_t *reslen) /* result length */
|
|
|
{
|
|
|
xfs_agblock_t bno;
|
|
|
- xfs_extlen_t diff;
|
|
|
xfs_extlen_t len;
|
|
|
|
|
|
- if (args->alignment > 1 && foundlen >= args->minlen) {
|
|
|
- bno = roundup(foundbno, args->alignment);
|
|
|
- diff = bno - foundbno;
|
|
|
- len = diff >= foundlen ? 0 : foundlen - diff;
|
|
|
+ /* Trim busy sections out of found extent */
|
|
|
+ xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
|
|
|
+
|
|
|
+ if (args->alignment > 1 && len >= args->minlen) {
|
|
|
+ xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
|
|
|
+ xfs_extlen_t diff = aligned_bno - bno;
|
|
|
+
|
|
|
+ *resbno = aligned_bno;
|
|
|
+ *reslen = diff >= len ? 0 : len - diff;
|
|
|
} else {
|
|
|
- bno = foundbno;
|
|
|
- len = foundlen;
|
|
|
+ *resbno = bno;
|
|
|
+ *reslen = len;
|
|
|
}
|
|
|
- *resbno = bno;
|
|
|
- *reslen = len;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
|
|
|
return 1;
|
|
|
agf = XFS_BUF_TO_AGF(args->agbp);
|
|
|
diff = be32_to_cpu(agf->agf_freeblks)
|
|
|
- + be32_to_cpu(agf->agf_flcount)
|
|
|
- args->len - args->minleft;
|
|
|
if (diff >= 0)
|
|
|
return 1;
|
|
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
|
|
|
if (error)
|
|
|
return error;
|
|
|
|
|
|
- /*
|
|
|
- * Search the busylist for these blocks and mark the
|
|
|
- * transaction as synchronous if blocks are found. This
|
|
|
- * avoids the need to block due to a synchronous log
|
|
|
- * force to ensure correct ordering as the synchronous
|
|
|
- * transaction will guarantee that for us.
|
|
|
- */
|
|
|
- if (xfs_alloc_busy_search(args->mp, args->agno,
|
|
|
- args->agbno, args->len))
|
|
|
- xfs_trans_set_sync(args->tp);
|
|
|
+ ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
|
|
|
+ args->agbno, args->len));
|
|
|
}
|
|
|
|
|
|
if (!args->isfl) {
|
|
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
|
|
|
{
|
|
|
xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
|
|
|
xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
|
|
|
- xfs_agblock_t end; /* end of allocated extent */
|
|
|
int error;
|
|
|
xfs_agblock_t fbno; /* start block of found extent */
|
|
|
- xfs_agblock_t fend; /* end block of found extent */
|
|
|
xfs_extlen_t flen; /* length of found extent */
|
|
|
+ xfs_agblock_t tbno; /* start block of trimmed extent */
|
|
|
+ xfs_extlen_t tlen; /* length of trimmed extent */
|
|
|
+ xfs_agblock_t tend; /* end block of trimmed extent */
|
|
|
+ xfs_agblock_t end; /* end of allocated extent */
|
|
|
int i; /* success/failure of operation */
|
|
|
- xfs_agblock_t maxend; /* end of maximal extent */
|
|
|
- xfs_agblock_t minend; /* end of minimal extent */
|
|
|
xfs_extlen_t rlen; /* length of returned extent */
|
|
|
|
|
|
ASSERT(args->alignment == 1);
|
|
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
|
|
|
goto error0;
|
|
|
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
|
|
ASSERT(fbno <= args->agbno);
|
|
|
- minend = args->agbno + args->minlen;
|
|
|
- maxend = args->agbno + args->maxlen;
|
|
|
- fend = fbno + flen;
|
|
|
|
|
|
/*
|
|
|
- * Give up if the freespace isn't long enough for the minimum request.
|
|
|
+ * Check for overlapping busy extents.
|
|
|
+ */
|
|
|
+ xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Give up if the start of the extent is busy, or the freespace isn't
|
|
|
+ * long enough for the minimum request.
|
|
|
*/
|
|
|
- if (fend < minend)
|
|
|
+ if (tbno > args->agbno)
|
|
|
+ goto not_found;
|
|
|
+ if (tlen < args->minlen)
|
|
|
+ goto not_found;
|
|
|
+ tend = tbno + tlen;
|
|
|
+ if (tend < args->agbno + args->minlen)
|
|
|
goto not_found;
|
|
|
|
|
|
/*
|
|
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
|
|
|
*
|
|
|
* Fix the length according to mod and prod if given.
|
|
|
*/
|
|
|
- end = XFS_AGBLOCK_MIN(fend, maxend);
|
|
|
+ end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
|
|
|
args->len = end - args->agbno;
|
|
|
xfs_alloc_fix_len(args);
|
|
|
if (!xfs_alloc_fix_minleft(args))
|
|
|
goto not_found;
|
|
|
|
|
|
rlen = args->len;
|
|
|
- ASSERT(args->agbno + rlen <= fend);
|
|
|
+ ASSERT(args->agbno + rlen <= tend);
|
|
|
end = args->agbno + rlen;
|
|
|
|
|
|
/*
|
|
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
|
|
|
struct xfs_btree_cur **scur, /* searching cursor */
|
|
|
xfs_agblock_t gdiff, /* difference for search comparison */
|
|
|
xfs_agblock_t *sbno, /* extent found by search */
|
|
|
- xfs_extlen_t *slen,
|
|
|
- xfs_extlen_t *slena, /* aligned length */
|
|
|
+ xfs_extlen_t *slen, /* extent length */
|
|
|
+ xfs_agblock_t *sbnoa, /* aligned extent found by search */
|
|
|
+ xfs_extlen_t *slena, /* aligned extent length */
|
|
|
int dir) /* 0 = search right, 1 = search left */
|
|
|
{
|
|
|
- xfs_agblock_t bno;
|
|
|
xfs_agblock_t new;
|
|
|
xfs_agblock_t sdiff;
|
|
|
int error;
|
|
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
|
|
|
if (error)
|
|
|
goto error0;
|
|
|
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
|
|
- xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
|
|
|
+ xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
|
|
|
|
|
|
/*
|
|
|
* The good extent is closer than this one.
|
|
|
*/
|
|
|
if (!dir) {
|
|
|
- if (bno >= args->agbno + gdiff)
|
|
|
+ if (*sbnoa >= args->agbno + gdiff)
|
|
|
goto out_use_good;
|
|
|
} else {
|
|
|
- if (bno <= args->agbno - gdiff)
|
|
|
+ if (*sbnoa <= args->agbno - gdiff)
|
|
|
goto out_use_good;
|
|
|
}
|
|
|
|
|
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
|
|
|
xfs_alloc_fix_len(args);
|
|
|
|
|
|
sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
|
|
- args->alignment, *sbno,
|
|
|
- *slen, &new);
|
|
|
+ args->alignment, *sbnoa,
|
|
|
+ *slena, &new);
|
|
|
|
|
|
/*
|
|
|
* Choose closer size and invalidate other cursor.
|
|
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
|
|
|
xfs_agblock_t gtbnoa; /* aligned ... */
|
|
|
xfs_extlen_t gtdiff; /* difference to right side entry */
|
|
|
xfs_extlen_t gtlen; /* length of right side entry */
|
|
|
- xfs_extlen_t gtlena = 0; /* aligned ... */
|
|
|
+ xfs_extlen_t gtlena; /* aligned ... */
|
|
|
xfs_agblock_t gtnew; /* useful start bno of right side */
|
|
|
int error; /* error code */
|
|
|
int i; /* result code, temporary */
|
|
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
|
|
|
xfs_agblock_t ltbnoa; /* aligned ... */
|
|
|
xfs_extlen_t ltdiff; /* difference to left side entry */
|
|
|
xfs_extlen_t ltlen; /* length of left side entry */
|
|
|
- xfs_extlen_t ltlena = 0; /* aligned ... */
|
|
|
+ xfs_extlen_t ltlena; /* aligned ... */
|
|
|
xfs_agblock_t ltnew; /* useful start bno of left side */
|
|
|
xfs_extlen_t rlen; /* length of returned extent */
|
|
|
+ int forced = 0;
|
|
|
#if defined(DEBUG) && defined(__KERNEL__)
|
|
|
/*
|
|
|
* Randomly don't execute the first algorithm.
|
|
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
|
|
|
|
|
|
dofirst = random32() & 1;
|
|
|
#endif
|
|
|
+
|
|
|
+restart:
|
|
|
+ bno_cur_lt = NULL;
|
|
|
+ bno_cur_gt = NULL;
|
|
|
+ ltlen = 0;
|
|
|
+ gtlena = 0;
|
|
|
+ ltlena = 0;
|
|
|
+
|
|
|
/*
|
|
|
* Get a cursor for the by-size btree.
|
|
|
*/
|
|
|
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
|
|
|
args->agno, XFS_BTNUM_CNT);
|
|
|
- ltlen = 0;
|
|
|
- bno_cur_lt = bno_cur_gt = NULL;
|
|
|
+
|
|
|
/*
|
|
|
* See if there are any free extents as big as maxlen.
|
|
|
*/
|
|
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
|
|
|
goto error0;
|
|
|
if (i == 0 || ltlen == 0) {
|
|
|
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
|
|
+ trace_xfs_alloc_near_noentry(args);
|
|
|
return 0;
|
|
|
}
|
|
|
ASSERT(i == 1);
|
|
|
}
|
|
|
args->wasfromfl = 0;
|
|
|
+
|
|
|
/*
|
|
|
* First algorithm.
|
|
|
* If the requested extent is large wrt the freespaces available
|
|
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
|
|
|
if (args->len < blen)
|
|
|
continue;
|
|
|
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
|
|
- args->alignment, ltbno, ltlen, <new);
|
|
|
+ args->alignment, ltbnoa, ltlena, <new);
|
|
|
if (ltnew != NULLAGBLOCK &&
|
|
|
(args->len > blen || ltdiff < bdiff)) {
|
|
|
bdiff = ltdiff;
|
|
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
|
|
|
args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
|
|
|
xfs_alloc_fix_len(args);
|
|
|
ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
|
|
- args->alignment, ltbno, ltlen, <new);
|
|
|
+ args->alignment, ltbnoa, ltlena, <new);
|
|
|
|
|
|
error = xfs_alloc_find_best_extent(args,
|
|
|
&bno_cur_lt, &bno_cur_gt,
|
|
|
- ltdiff, >bno, >len, >lena,
|
|
|
+ ltdiff, >bno, >len,
|
|
|
+ >bnoa, >lena,
|
|
|
0 /* search right */);
|
|
|
} else {
|
|
|
ASSERT(gtlena >= args->minlen);
|
|
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
|
|
|
args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
|
|
|
xfs_alloc_fix_len(args);
|
|
|
gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
|
|
|
- args->alignment, gtbno, gtlen, >new);
|
|
|
+ args->alignment, gtbnoa, gtlena, >new);
|
|
|
|
|
|
error = xfs_alloc_find_best_extent(args,
|
|
|
&bno_cur_gt, &bno_cur_lt,
|
|
|
- gtdiff, <bno, <len, <lena,
|
|
|
+ gtdiff, <bno, <len,
|
|
|
+ <bnoa, <lena,
|
|
|
1 /* search left */);
|
|
|
}
|
|
|
|
|
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
|
|
|
* If we couldn't get anything, give up.
|
|
|
*/
|
|
|
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
|
|
|
+ if (!forced++) {
|
|
|
+ trace_xfs_alloc_near_busy(args);
|
|
|
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
+
|
|
|
trace_xfs_alloc_size_neither(args);
|
|
|
args->agbno = NULLAGBLOCK;
|
|
|
return 0;
|
|
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
|
|
|
return 0;
|
|
|
}
|
|
|
rlen = args->len;
|
|
|
- (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
|
|
|
- ltlen, <new);
|
|
|
+ (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
|
|
|
+ ltbnoa, ltlena, <new);
|
|
|
ASSERT(ltnew >= ltbno);
|
|
|
- ASSERT(ltnew + rlen <= ltbno + ltlen);
|
|
|
+ ASSERT(ltnew + rlen <= ltbnoa + ltlena);
|
|
|
ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
|
|
|
args->agbno = ltnew;
|
|
|
+
|
|
|
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
|
|
|
ltnew, rlen, XFSA_FIXUP_BNO_OK)))
|
|
|
goto error0;
|
|
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
|
|
|
int i; /* temp status variable */
|
|
|
xfs_agblock_t rbno; /* returned block number */
|
|
|
xfs_extlen_t rlen; /* length of returned extent */
|
|
|
+ int forced = 0;
|
|
|
|
|
|
+restart:
|
|
|
/*
|
|
|
* Allocate and initialize a cursor for the by-size btree.
|
|
|
*/
|
|
|
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
|
|
|
args->agno, XFS_BTNUM_CNT);
|
|
|
bno_cur = NULL;
|
|
|
+
|
|
|
/*
|
|
|
* Look for an entry >= maxlen+alignment-1 blocks.
|
|
|
*/
|
|
|
if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
|
|
|
args->maxlen + args->alignment - 1, &i)))
|
|
|
goto error0;
|
|
|
+
|
|
|
/*
|
|
|
- * If none, then pick up the last entry in the tree unless the
|
|
|
- * tree is empty.
|
|
|
+ * If none or we have busy extents that we cannot allocate from, then
|
|
|
+ * we have to settle for a smaller extent. In the case that there are
|
|
|
+ * no large extents, this will return the last entry in the tree unless
|
|
|
+ * the tree is empty. In the case that there are only busy large
|
|
|
+ * extents, this will return the largest small extent unless there
|
|
|
+ * are no smaller extents available.
|
|
|
*/
|
|
|
- if (!i) {
|
|
|
- if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
|
|
|
- &flen, &i)))
|
|
|
+ if (!i || forced > 1) {
|
|
|
+ error = xfs_alloc_ag_vextent_small(args, cnt_cur,
|
|
|
+ &fbno, &flen, &i);
|
|
|
+ if (error)
|
|
|
goto error0;
|
|
|
if (i == 0 || flen == 0) {
|
|
|
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
|
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
|
|
|
return 0;
|
|
|
}
|
|
|
ASSERT(i == 1);
|
|
|
+ xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Search for a non-busy extent that is large enough.
|
|
|
+ * If we are at low space, don't check, or if we fall of
|
|
|
+ * the end of the btree, turn off the busy check and
|
|
|
+ * restart.
|
|
|
+ */
|
|
|
+ for (;;) {
|
|
|
+ error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
|
|
|
+ if (error)
|
|
|
+ goto error0;
|
|
|
+ XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
|
|
+
|
|
|
+ xfs_alloc_compute_aligned(args, fbno, flen,
|
|
|
+ &rbno, &rlen);
|
|
|
+
|
|
|
+ if (rlen >= args->maxlen)
|
|
|
+ break;
|
|
|
+
|
|
|
+ error = xfs_btree_increment(cnt_cur, 0, &i);
|
|
|
+ if (error)
|
|
|
+ goto error0;
|
|
|
+ if (i == 0) {
|
|
|
+ /*
|
|
|
+ * Our only valid extents must have been busy.
|
|
|
+ * Make it unbusy by forcing the log out and
|
|
|
+ * retrying. If we've been here before, forcing
|
|
|
+ * the log isn't making the extents available,
|
|
|
+ * which means they have probably been freed in
|
|
|
+ * this transaction. In that case, we have to
|
|
|
+ * give up on them and we'll attempt a minlen
|
|
|
+ * allocation the next time around.
|
|
|
+ */
|
|
|
+ xfs_btree_del_cursor(cnt_cur,
|
|
|
+ XFS_BTREE_NOERROR);
|
|
|
+ trace_xfs_alloc_size_busy(args);
|
|
|
+ if (!forced++)
|
|
|
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
- /*
|
|
|
- * There's a freespace as big as maxlen+alignment-1, get it.
|
|
|
- */
|
|
|
- else {
|
|
|
- if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
|
|
|
- goto error0;
|
|
|
- XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
|
|
|
- }
|
|
|
+
|
|
|
/*
|
|
|
* In the first case above, we got the last entry in the
|
|
|
* by-size btree. Now we check to see if the space hits maxlen
|
|
|
* once aligned; if not, we search left for something better.
|
|
|
* This can't happen in the second case above.
|
|
|
*/
|
|
|
- xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
|
|
|
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
|
|
|
XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
|
|
|
(rlen <= flen && rbno + rlen <= fbno + flen), error0);
|
|
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
|
|
|
* Fix up the length.
|
|
|
*/
|
|
|
args->len = rlen;
|
|
|
- xfs_alloc_fix_len(args);
|
|
|
- if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
|
|
|
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
|
|
- trace_xfs_alloc_size_nominleft(args);
|
|
|
- args->agbno = NULLAGBLOCK;
|
|
|
- return 0;
|
|
|
+ if (rlen < args->minlen) {
|
|
|
+ if (!forced++) {
|
|
|
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
|
|
+ trace_xfs_alloc_size_busy(args);
|
|
|
+ xfs_log_force(args->mp, XFS_LOG_SYNC);
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
+ goto out_nominleft;
|
|
|
}
|
|
|
+ xfs_alloc_fix_len(args);
|
|
|
+
|
|
|
+ if (!xfs_alloc_fix_minleft(args))
|
|
|
+ goto out_nominleft;
|
|
|
rlen = args->len;
|
|
|
XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
|
|
|
/*
|
|
@@ -1287,6 +1350,12 @@ error0:
|
|
|
if (bno_cur)
|
|
|
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
|
|
|
return error;
|
|
|
+
|
|
|
+out_nominleft:
|
|
|
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
|
|
|
+ trace_xfs_alloc_size_nominleft(args);
|
|
|
+ args->agbno = NULLAGBLOCK;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
|
|
|
if (error)
|
|
|
goto error0;
|
|
|
if (fbno != NULLAGBLOCK) {
|
|
|
+ xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
|
|
|
+ args->userdata);
|
|
|
+
|
|
|
if (args->userdata) {
|
|
|
xfs_buf_t *bp;
|
|
|
|
|
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
|
|
|
|
|
|
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
|
|
|
|
|
|
- /*
|
|
|
- * Since blocks move to the free list without the coordination
|
|
|
- * used in xfs_bmap_finish, we can't allow block to be available
|
|
|
- * for reallocation and non-transaction writing (user data)
|
|
|
- * until we know that the transaction that moved it to the free
|
|
|
- * list is permanently on disk. We track the blocks by declaring
|
|
|
- * these blocks as "busy"; the busy list is maintained on a per-ag
|
|
|
- * basis and each transaction records which entries should be removed
|
|
|
- * when the iclog commits to disk. If a busy block is allocated,
|
|
|
- * the iclog is pushed up to the LSN that freed the block.
|
|
|
- */
|
|
|
- xfs_alloc_busy_insert(tp, agno, bno, len);
|
|
|
return 0;
|
|
|
|
|
|
error0:
|
|
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
|
|
|
xfs_alloc_log_agf(tp, agbp, logflags);
|
|
|
*bnop = bno;
|
|
|
|
|
|
- /*
|
|
|
- * As blocks are freed, they are added to the per-ag busy list and
|
|
|
- * remain there until the freeing transaction is committed to disk.
|
|
|
- * Now that we have allocated blocks, this list must be searched to see
|
|
|
- * if a block is being reused. If one is, then the freeing transaction
|
|
|
- * must be pushed to disk before this transaction.
|
|
|
- *
|
|
|
- * We do this by setting the current transaction to a sync transaction
|
|
|
- * which guarantees that the freeing transaction is on disk before this
|
|
|
- * transaction. This is done instead of a synchronous log force here so
|
|
|
- * that we don't sit and wait with the AGF locked in the transaction
|
|
|
- * during the log force.
|
|
|
- */
|
|
|
- if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
|
|
|
- xfs_trans_set_sync(tp);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -2423,105 +2468,13 @@ xfs_free_extent(
|
|
|
}
|
|
|
|
|
|
error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
|
|
|
+ if (!error)
|
|
|
+ xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
|
|
|
error0:
|
|
|
xfs_perag_put(args.pag);
|
|
|
return error;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-/*
|
|
|
- * AG Busy list management
|
|
|
- * The busy list contains block ranges that have been freed but whose
|
|
|
- * transactions have not yet hit disk. If any block listed in a busy
|
|
|
- * list is reused, the transaction that freed it must be forced to disk
|
|
|
- * before continuing to use the block.
|
|
|
- *
|
|
|
- * xfs_alloc_busy_insert - add to the per-ag busy list
|
|
|
- * xfs_alloc_busy_clear - remove an item from the per-ag busy list
|
|
|
- * xfs_alloc_busy_search - search for a busy extent
|
|
|
- */
|
|
|
-
|
|
|
-/*
|
|
|
- * Insert a new extent into the busy tree.
|
|
|
- *
|
|
|
- * The busy extent tree is indexed by the start block of the busy extent.
|
|
|
- * there can be multiple overlapping ranges in the busy extent tree but only
|
|
|
- * ever one entry at a given start block. The reason for this is that
|
|
|
- * multi-block extents can be freed, then smaller chunks of that extent
|
|
|
- * allocated and freed again before the first transaction commit is on disk.
|
|
|
- * If the exact same start block is freed a second time, we have to wait for
|
|
|
- * that busy extent to pass out of the tree before the new extent is inserted.
|
|
|
- * There are two main cases we have to handle here.
|
|
|
- *
|
|
|
- * The first case is a transaction that triggers a "free - allocate - free"
|
|
|
- * cycle. This can occur during btree manipulations as a btree block is freed
|
|
|
- * to the freelist, then allocated from the free list, then freed again. In
|
|
|
- * this case, the second extxpnet free is what triggers the duplicate and as
|
|
|
- * such the transaction IDs should match. Because the extent was allocated in
|
|
|
- * this transaction, the transaction must be marked as synchronous. This is
|
|
|
- * true for all cases where the free/alloc/free occurs in the one transaction,
|
|
|
- * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
|
|
|
- * This serves to catch violations of the second case quite effectively.
|
|
|
- *
|
|
|
- * The second case is where the free/alloc/free occur in different
|
|
|
- * transactions. In this case, the thread freeing the extent the second time
|
|
|
- * can't mark the extent busy immediately because it is already tracked in a
|
|
|
- * transaction that may be committing. When the log commit for the existing
|
|
|
- * busy extent completes, the busy extent will be removed from the tree. If we
|
|
|
- * allow the second busy insert to continue using that busy extent structure,
|
|
|
- * it can be freed before this transaction is safely in the log. Hence our
|
|
|
- * only option in this case is to force the log to remove the existing busy
|
|
|
- * extent from the list before we insert the new one with the current
|
|
|
- * transaction ID.
|
|
|
- *
|
|
|
- * The problem we are trying to avoid in the free-alloc-free in separate
|
|
|
- * transactions is most easily described with a timeline:
|
|
|
- *
|
|
|
- * Thread 1 Thread 2 Thread 3 xfslogd
|
|
|
- * xact alloc
|
|
|
- * free X
|
|
|
- * mark busy
|
|
|
- * commit xact
|
|
|
- * free xact
|
|
|
- * xact alloc
|
|
|
- * alloc X
|
|
|
- * busy search
|
|
|
- * mark xact sync
|
|
|
- * commit xact
|
|
|
- * free xact
|
|
|
- * force log
|
|
|
- * checkpoint starts
|
|
|
- * ....
|
|
|
- * xact alloc
|
|
|
- * free X
|
|
|
- * mark busy
|
|
|
- * finds match
|
|
|
- * *** KABOOM! ***
|
|
|
- * ....
|
|
|
- * log IO completes
|
|
|
- * unbusy X
|
|
|
- * checkpoint completes
|
|
|
- *
|
|
|
- * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
|
|
|
- * the checkpoint completes, and the busy extent it matched will have been
|
|
|
- * removed from the tree when it is woken. Hence it can then continue safely.
|
|
|
- *
|
|
|
- * However, to ensure this matching process is robust, we need to use the
|
|
|
- * transaction ID for identifying transaction, as delayed logging results in
|
|
|
- * the busy extent and transaction lifecycles being different. i.e. the busy
|
|
|
- * extent is active for a lot longer than the transaction. Hence the
|
|
|
- * transaction structure can be freed and reallocated, then mark the same
|
|
|
- * extent busy again in the new transaction. In this case the new transaction
|
|
|
- * will have a different tid but can have the same address, and hence we need
|
|
|
- * to check against the tid.
|
|
|
- *
|
|
|
- * Future: for delayed logging, we could avoid the log force if the extent was
|
|
|
- * first freed in the current checkpoint sequence. This, however, requires the
|
|
|
- * ability to pin the current checkpoint in memory until this transaction
|
|
|
- * commits to ensure that both the original free and the current one combine
|
|
|
- * logically into the one checkpoint. If the checkpoint sequences are
|
|
|
- * different, however, we still need to wait on a log force.
|
|
|
- */
|
|
|
void
|
|
|
xfs_alloc_busy_insert(
|
|
|
struct xfs_trans *tp,
|
|
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
|
|
|
struct xfs_busy_extent *busyp;
|
|
|
struct xfs_perag *pag;
|
|
|
struct rb_node **rbp;
|
|
|
- struct rb_node *parent;
|
|
|
- int match;
|
|
|
-
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
|
|
|
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
|
|
|
if (!new) {
|
|
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
|
|
|
* block, make this a synchronous transaction to insure that
|
|
|
* the block is not reused before this transaction commits.
|
|
|
*/
|
|
|
- trace_xfs_alloc_busy(tp, agno, bno, len, 1);
|
|
|
+ trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
|
|
|
xfs_trans_set_sync(tp);
|
|
|
return;
|
|
|
}
|
|
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
|
|
|
new->agno = agno;
|
|
|
new->bno = bno;
|
|
|
new->length = len;
|
|
|
- new->tid = xfs_log_get_trans_ident(tp);
|
|
|
-
|
|
|
INIT_LIST_HEAD(&new->list);
|
|
|
|
|
|
/* trace before insert to be able to see failed inserts */
|
|
|
- trace_xfs_alloc_busy(tp, agno, bno, len, 0);
|
|
|
+ trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
|
|
|
|
|
|
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
|
|
-restart:
|
|
|
spin_lock(&pag->pagb_lock);
|
|
|
rbp = &pag->pagb_tree.rb_node;
|
|
|
- parent = NULL;
|
|
|
- busyp = NULL;
|
|
|
- match = 0;
|
|
|
- while (*rbp && match >= 0) {
|
|
|
+ while (*rbp) {
|
|
|
parent = *rbp;
|
|
|
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
|
|
|
|
|
|
if (new->bno < busyp->bno) {
|
|
|
- /* may overlap, but exact start block is lower */
|
|
|
rbp = &(*rbp)->rb_left;
|
|
|
- if (new->bno + new->length > busyp->bno)
|
|
|
- match = busyp->tid == new->tid ? 1 : -1;
|
|
|
+ ASSERT(new->bno + new->length <= busyp->bno);
|
|
|
} else if (new->bno > busyp->bno) {
|
|
|
- /* may overlap, but exact start block is higher */
|
|
|
rbp = &(*rbp)->rb_right;
|
|
|
- if (bno < busyp->bno + busyp->length)
|
|
|
- match = busyp->tid == new->tid ? 1 : -1;
|
|
|
+ ASSERT(bno >= busyp->bno + busyp->length);
|
|
|
} else {
|
|
|
- match = busyp->tid == new->tid ? 1 : -1;
|
|
|
- break;
|
|
|
+ ASSERT(0);
|
|
|
}
|
|
|
}
|
|
|
- if (match < 0) {
|
|
|
- /* overlap marked busy in different transaction */
|
|
|
- spin_unlock(&pag->pagb_lock);
|
|
|
- xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
|
|
|
- goto restart;
|
|
|
- }
|
|
|
- if (match > 0) {
|
|
|
- /*
|
|
|
- * overlap marked busy in same transaction. Update if exact
|
|
|
- * start block match, otherwise combine the busy extents into
|
|
|
- * a single range.
|
|
|
- */
|
|
|
- if (busyp->bno == new->bno) {
|
|
|
- busyp->length = max(busyp->length, new->length);
|
|
|
- spin_unlock(&pag->pagb_lock);
|
|
|
- ASSERT(tp->t_flags & XFS_TRANS_SYNC);
|
|
|
- xfs_perag_put(pag);
|
|
|
- kmem_free(new);
|
|
|
- return;
|
|
|
- }
|
|
|
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
|
- new->length = max(busyp->bno + busyp->length,
|
|
|
- new->bno + new->length) -
|
|
|
- min(busyp->bno, new->bno);
|
|
|
- new->bno = min(busyp->bno, new->bno);
|
|
|
- } else
|
|
|
- busyp = NULL;
|
|
|
|
|
|
rb_link_node(&new->rb_node, parent, rbp);
|
|
|
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
|
@@ -2619,7 +2532,6 @@ restart:
|
|
|
list_add(&new->list, &tp->t_busy);
|
|
|
spin_unlock(&pag->pagb_lock);
|
|
|
xfs_perag_put(pag);
|
|
|
- kmem_free(busyp);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
|
|
|
}
|
|
|
}
|
|
|
spin_unlock(&pag->pagb_lock);
|
|
|
- trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
|
|
|
xfs_perag_put(pag);
|
|
|
return match;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * The found free extent [fbno, fend] overlaps part or all of the given busy
|
|
|
+ * extent. If the overlap covers the beginning, the end, or all of the busy
|
|
|
+ * extent, the overlapping portion can be made unbusy and used for the
|
|
|
+ * allocation. We can't split a busy extent because we can't modify a
|
|
|
+ * transaction/CIL context busy list, but we can update an entries block
|
|
|
+ * number or length.
|
|
|
+ *
|
|
|
+ * Returns true if the extent can safely be reused, or false if the search
|
|
|
+ * needs to be restarted.
|
|
|
+ */
|
|
|
+STATIC bool
|
|
|
+xfs_alloc_busy_update_extent(
|
|
|
+ struct xfs_mount *mp,
|
|
|
+ struct xfs_perag *pag,
|
|
|
+ struct xfs_busy_extent *busyp,
|
|
|
+ xfs_agblock_t fbno,
|
|
|
+ xfs_extlen_t flen,
|
|
|
+ bool userdata)
|
|
|
+{
|
|
|
+ xfs_agblock_t fend = fbno + flen;
|
|
|
+ xfs_agblock_t bbno = busyp->bno;
|
|
|
+ xfs_agblock_t bend = bbno + busyp->length;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If there is a busy extent overlapping a user allocation, we have
|
|
|
+ * no choice but to force the log and retry the search.
|
|
|
+ *
|
|
|
+ * Fortunately this does not happen during normal operation, but
|
|
|
+ * only if the filesystem is very low on space and has to dip into
|
|
|
+ * the AGFL for normal allocations.
|
|
|
+ */
|
|
|
+ if (userdata)
|
|
|
+ goto out_force_log;
|
|
|
+
|
|
|
+ if (bbno < fbno && bend > fend) {
|
|
|
+ /*
|
|
|
+ * Case 1:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +---------+
|
|
|
+ * fbno fend
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We would have to split the busy extent to be able to track
|
|
|
+ * it correct, which we cannot do because we would have to
|
|
|
+ * modify the list of busy extents attached to the transaction
|
|
|
+ * or CIL context, which is immutable.
|
|
|
+ *
|
|
|
+ * Force out the log to clear the busy extent and retry the
|
|
|
+ * search.
|
|
|
+ */
|
|
|
+ goto out_force_log;
|
|
|
+ } else if (bbno >= fbno && bend <= fend) {
|
|
|
+ /*
|
|
|
+ * Case 2:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-----------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 3:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +--------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 4:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +--------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 5:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-----------------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The busy extent is fully covered by the extent we are
|
|
|
+ * allocating, and can simply be removed from the rbtree.
|
|
|
+ * However we cannot remove it from the immutable list
|
|
|
+ * tracking busy extents in the transaction or CIL context,
|
|
|
+ * so set the length to zero to mark it invalid.
|
|
|
+ *
|
|
|
+ * We also need to restart the busy extent search from the
|
|
|
+ * tree root, because erasing the node can rearrange the
|
|
|
+ * tree topology.
|
|
|
+ */
|
|
|
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
|
+ busyp->length = 0;
|
|
|
+ return false;
|
|
|
+ } else if (fend < bend) {
|
|
|
+ /*
|
|
|
+ * Case 6:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +---------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 7:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ */
|
|
|
+ busyp->bno = fend;
|
|
|
+ } else if (bbno < fbno) {
|
|
|
+ /*
|
|
|
+ * Case 8:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 9:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +----------------------+
|
|
|
+ * fbno fend
|
|
|
+ */
|
|
|
+ busyp->length = fbno - busyp->bno;
|
|
|
+ } else {
|
|
|
+ ASSERT(0);
|
|
|
+ }
|
|
|
+
|
|
|
+ trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
|
|
|
+ return true;
|
|
|
+
|
|
|
+out_force_log:
|
|
|
+ spin_unlock(&pag->pagb_lock);
|
|
|
+ xfs_log_force(mp, XFS_LOG_SYNC);
|
|
|
+ trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
|
|
|
+ spin_lock(&pag->pagb_lock);
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/*
|
|
|
+ * For a given extent [fbno, flen], make sure we can reuse it safely.
|
|
|
+ */
|
|
|
void
|
|
|
-xfs_alloc_busy_clear(
|
|
|
+xfs_alloc_busy_reuse(
|
|
|
struct xfs_mount *mp,
|
|
|
- struct xfs_busy_extent *busyp)
|
|
|
+ xfs_agnumber_t agno,
|
|
|
+ xfs_agblock_t fbno,
|
|
|
+ xfs_extlen_t flen,
|
|
|
+ bool userdata)
|
|
|
{
|
|
|
struct xfs_perag *pag;
|
|
|
+ struct rb_node *rbp;
|
|
|
|
|
|
- trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
|
|
|
- busyp->length);
|
|
|
+ ASSERT(flen > 0);
|
|
|
|
|
|
- ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
|
|
|
- busyp->length) == 1);
|
|
|
+ pag = xfs_perag_get(mp, agno);
|
|
|
+ spin_lock(&pag->pagb_lock);
|
|
|
+restart:
|
|
|
+ rbp = pag->pagb_tree.rb_node;
|
|
|
+ while (rbp) {
|
|
|
+ struct xfs_busy_extent *busyp =
|
|
|
+ rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
|
|
+ xfs_agblock_t bbno = busyp->bno;
|
|
|
+ xfs_agblock_t bend = bbno + busyp->length;
|
|
|
|
|
|
- list_del_init(&busyp->list);
|
|
|
+ if (fbno + flen <= bbno) {
|
|
|
+ rbp = rbp->rb_left;
|
|
|
+ continue;
|
|
|
+ } else if (fbno >= bend) {
|
|
|
+ rbp = rbp->rb_right;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
- pag = xfs_perag_get(mp, busyp->agno);
|
|
|
- spin_lock(&pag->pagb_lock);
|
|
|
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
|
+ if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
|
|
|
+ userdata))
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
spin_unlock(&pag->pagb_lock);
|
|
|
xfs_perag_put(pag);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * For a given extent [fbno, flen], search the busy extent list to find a
|
|
|
+ * subset of the extent that is not busy. If *rlen is smaller than
|
|
|
+ * args->minlen no suitable extent could be found, and the higher level
|
|
|
+ * code needs to force out the log and retry the allocation.
|
|
|
+ */
|
|
|
+STATIC void
|
|
|
+xfs_alloc_busy_trim(
|
|
|
+ struct xfs_alloc_arg *args,
|
|
|
+ xfs_agblock_t bno,
|
|
|
+ xfs_extlen_t len,
|
|
|
+ xfs_agblock_t *rbno,
|
|
|
+ xfs_extlen_t *rlen)
|
|
|
+{
|
|
|
+ xfs_agblock_t fbno;
|
|
|
+ xfs_extlen_t flen;
|
|
|
+ struct rb_node *rbp;
|
|
|
+
|
|
|
+ ASSERT(len > 0);
|
|
|
|
|
|
+ spin_lock(&args->pag->pagb_lock);
|
|
|
+restart:
|
|
|
+ fbno = bno;
|
|
|
+ flen = len;
|
|
|
+ rbp = args->pag->pagb_tree.rb_node;
|
|
|
+ while (rbp && flen >= args->minlen) {
|
|
|
+ struct xfs_busy_extent *busyp =
|
|
|
+ rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
|
|
+ xfs_agblock_t fend = fbno + flen;
|
|
|
+ xfs_agblock_t bbno = busyp->bno;
|
|
|
+ xfs_agblock_t bend = bbno + busyp->length;
|
|
|
+
|
|
|
+ if (fend <= bbno) {
|
|
|
+ rbp = rbp->rb_left;
|
|
|
+ continue;
|
|
|
+ } else if (fbno >= bend) {
|
|
|
+ rbp = rbp->rb_right;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If this is a metadata allocation, try to reuse the busy
|
|
|
+ * extent instead of trimming the allocation.
|
|
|
+ */
|
|
|
+ if (!args->userdata) {
|
|
|
+ if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
|
|
|
+ busyp, fbno, flen,
|
|
|
+ false))
|
|
|
+ goto restart;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (bbno <= fbno) {
|
|
|
+ /* start overlap */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Case 1:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +---------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 2:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 3:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 4:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-----------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * No unbusy region in extent, return failure.
|
|
|
+ */
|
|
|
+ if (fend <= bend)
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Case 5:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +----------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 6:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +--------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Needs to be trimmed to:
|
|
|
+ * +-------+
|
|
|
+ * fbno fend
|
|
|
+ */
|
|
|
+ fbno = bend;
|
|
|
+ } else if (bend >= fend) {
|
|
|
+ /* end overlap */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Case 7:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Case 8:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +--------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Needs to be trimmed to:
|
|
|
+ * +-------+
|
|
|
+ * fbno fend
|
|
|
+ */
|
|
|
+ fend = bbno;
|
|
|
+ } else {
|
|
|
+ /* middle overlap */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Case 9:
|
|
|
+ * bbno bend
|
|
|
+ * +BBBBBBBBBBBBBBBBB+
|
|
|
+ * +-----------------------------------+
|
|
|
+ * fbno fend
|
|
|
+ *
|
|
|
+ * Can be trimmed to:
|
|
|
+ * +-------+ OR +-------+
|
|
|
+ * fbno fend fbno fend
|
|
|
+ *
|
|
|
+ * Backward allocation leads to significant
|
|
|
+ * fragmentation of directories, which degrades
|
|
|
+ * directory performance, therefore we always want to
|
|
|
+ * choose the option that produces forward allocation
|
|
|
+ * patterns.
|
|
|
+ * Preferring the lower bno extent will make the next
|
|
|
+ * request use "fend" as the start of the next
|
|
|
+ * allocation; if the segment is no longer busy at
|
|
|
+ * that point, we'll get a contiguous allocation, but
|
|
|
+ * even if it is still busy, we will get a forward
|
|
|
+ * allocation.
|
|
|
+ * We try to avoid choosing the segment at "bend",
|
|
|
+ * because that can lead to the next allocation
|
|
|
+ * taking the segment at "fbno", which would be a
|
|
|
+ * backward allocation. We only use the segment at
|
|
|
+ * "fbno" if it is much larger than the current
|
|
|
+ * requested size, because in that case there's a
|
|
|
+ * good chance subsequent allocations will be
|
|
|
+ * contiguous.
|
|
|
+ */
|
|
|
+ if (bbno - fbno >= args->maxlen) {
|
|
|
+ /* left candidate fits perfect */
|
|
|
+ fend = bbno;
|
|
|
+ } else if (fend - bend >= args->maxlen * 4) {
|
|
|
+ /* right candidate has enough free space */
|
|
|
+ fbno = bend;
|
|
|
+ } else if (bbno - fbno >= args->minlen) {
|
|
|
+ /* left candidate fits minimum requirement */
|
|
|
+ fend = bbno;
|
|
|
+ } else {
|
|
|
+ goto fail;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ flen = fend - fbno;
|
|
|
+ }
|
|
|
+ spin_unlock(&args->pag->pagb_lock);
|
|
|
+
|
|
|
+ if (fbno != bno || flen != len) {
|
|
|
+ trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
|
|
|
+ fbno, flen);
|
|
|
+ }
|
|
|
+ *rbno = fbno;
|
|
|
+ *rlen = flen;
|
|
|
+ return;
|
|
|
+fail:
|
|
|
+ /*
|
|
|
+ * Return a zero extent length as failure indications. All callers
|
|
|
+ * re-check if the trimmed extent satisfies the minlen requirement.
|
|
|
+ */
|
|
|
+ spin_unlock(&args->pag->pagb_lock);
|
|
|
+ trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
|
|
|
+ *rbno = fbno;
|
|
|
+ *rlen = 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+xfs_alloc_busy_clear_one(
|
|
|
+ struct xfs_mount *mp,
|
|
|
+ struct xfs_perag *pag,
|
|
|
+ struct xfs_busy_extent *busyp)
|
|
|
+{
|
|
|
+ if (busyp->length) {
|
|
|
+ trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
|
|
|
+ busyp->length);
|
|
|
+ rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
|
+ }
|
|
|
+
|
|
|
+ list_del_init(&busyp->list);
|
|
|
kmem_free(busyp);
|
|
|
}
|
|
|
+
|
|
|
+void
|
|
|
+xfs_alloc_busy_clear(
|
|
|
+ struct xfs_mount *mp,
|
|
|
+ struct list_head *list)
|
|
|
+{
|
|
|
+ struct xfs_busy_extent *busyp, *n;
|
|
|
+ struct xfs_perag *pag = NULL;
|
|
|
+ xfs_agnumber_t agno = NULLAGNUMBER;
|
|
|
+
|
|
|
+ list_for_each_entry_safe(busyp, n, list, list) {
|
|
|
+ if (busyp->agno != agno) {
|
|
|
+ if (pag) {
|
|
|
+ spin_unlock(&pag->pagb_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
+ }
|
|
|
+ pag = xfs_perag_get(mp, busyp->agno);
|
|
|
+ spin_lock(&pag->pagb_lock);
|
|
|
+ agno = busyp->agno;
|
|
|
+ }
|
|
|
+
|
|
|
+ xfs_alloc_busy_clear_one(mp, pag, busyp);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pag) {
|
|
|
+ spin_unlock(&pag->pagb_lock);
|
|
|
+ xfs_perag_put(pag);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Callback for list_sort to sort busy extents by the AG they reside in.
|
|
|
+ */
|
|
|
+int
|
|
|
+xfs_busy_extent_ag_cmp(
|
|
|
+ void *priv,
|
|
|
+ struct list_head *a,
|
|
|
+ struct list_head *b)
|
|
|
+{
|
|
|
+ return container_of(a, struct xfs_busy_extent, list)->agno -
|
|
|
+ container_of(b, struct xfs_busy_extent, list)->agno;
|
|
|
+}
|