14 years ago · a77febbef1
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
 
				 #include <linux/migrate.h>
			
 
				 #include <linux/backing-dev.h>
			
 
				 #include <linux/freezer.h>
			
 
				-#include <linux/list_sort.h>
			
 
				 
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_inum.h"
			
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
 
				 	return bp;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return a buffer allocated as an empty buffer and associated to external
			
 
				+ * memory via xfs_buf_associate_memory() back to it's empty state.
			
 
				+ */
			
 
				+void
			
 
				+xfs_buf_set_empty(
			
 
				+	struct xfs_buf		*bp,
			
 
				+	size_t			len)
			
 
				+{
			
 
				+	if (bp->b_pages)
			
 
				+		_xfs_buf_free_pages(bp);
			
 
				+
			
 
				+	bp->b_pages = NULL;
			
 
				+	bp->b_page_count = 0;
			
 
				+	bp->b_addr = NULL;
			
 
				+	bp->b_file_offset = 0;
			
 
				+	bp->b_buffer_length = bp->b_count_desired = len;
			
 
				+	bp->b_bn = XFS_BUF_DADDR_NULL;
			
 
				+	bp->b_flags &= ~XBF_MAPPED;
			
 
				+}
			
 
				+
			
 
				 static inline struct page *
			
 
				 mem_to_page(
			
 
				 	void			*addr)
			
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
 
				 				xfs_buf_flags_t);
			
 
				 
			
 
				 extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
			
 
				+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
			
 
				 extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
			
 
				 extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
			
 
				 extern void xfs_buf_hold(xfs_buf_t *);
			
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
 
				 	case XFS_IOC_RESVSP_32:
			
 
				 	case XFS_IOC_UNRESVSP_32:
			
 
				 	case XFS_IOC_RESVSP64_32:
			
 
				-	case XFS_IOC_UNRESVSP64_32: {
			
 
				+	case XFS_IOC_UNRESVSP64_32:
			
 
				+	case XFS_IOC_ZERO_RANGE_32: {
			
 
				 		struct xfs_flock64	bf;
			
 
				 
			
 
				 		if (xfs_compat_flock64_copyin(&bf, arg))
			
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
 
				 #define XFS_IOC_UNRESVSP_32	_IOW('X', 41, struct compat_xfs_flock64)
			
 
				 #define XFS_IOC_RESVSP64_32	_IOW('X', 42, struct compat_xfs_flock64)
			
 
				 #define XFS_IOC_UNRESVSP64_32	_IOW('X', 43, struct compat_xfs_flock64)
			
 
				+#define XFS_IOC_ZERO_RANGE_32	_IOW('X', 57, struct compat_xfs_flock64)
			
 
				 
			
 
				 typedef struct compat_xfs_fsop_geom_v1 {
			
 
				 	__u32		blocksize;	/* filesystem (data) block size */
			
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
 
				 #include <linux/ctype.h>
			
 
				 #include <linux/writeback.h>
			
 
				 #include <linux/capability.h>
			
 
				+#include <linux/list_sort.h>
			
 
				 
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/div64.h>
			
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
 
				 	printk("%sXFS: %pV\n", level, vaf);
			
 
				 }
			
 
				 
			
 
				-void xfs_printk(
			
 
				-	const char		*level,
			
 
				-	const struct xfs_mount	*mp,
			
 
				-	const char		*fmt, ...)
			
 
				-{
			
 
				-	struct va_format	vaf;
			
 
				-	va_list			args;
			
 
				-
			
 
				-	va_start(args, fmt);
			
 
				-
			
 
				-	vaf.fmt = fmt;
			
 
				-	vaf.va = &args;
			
 
				-
			
 
				-	__xfs_printk(level, mp, &vaf);
			
 
				-	va_end(args);
			
 
				-}
			
 
				-
			
 
				 #define define_xfs_printk_level(func, kern_level)		\
			
 
				 void func(const struct xfs_mount *mp, const char *fmt, ...)	\
			
 
				 {								\
			
@@ -95,8 +78,7 @@ xfs_alert_tag(
 
				 	int			do_panic = 0;
			
 
				 
			
 
				 	if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
			
 
				-		xfs_printk(KERN_ALERT, mp,
			
 
				-			"XFS: Transforming an alert into a BUG.");
			
 
				+		xfs_alert(mp, "Transforming an alert into a BUG.");
			
 
				 		do_panic = 1;
			
 
				 	}
			
 
				 
			
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
 
				 
			
 
				 struct xfs_mount;
			
 
				 
			
 
				-extern void xfs_printk(const char *level, const struct xfs_mount *mp,
			
 
				-                      const char *fmt, ...)
			
 
				-        __attribute__ ((format (printf, 3, 4)));
			
 
				 extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
			
 
				         __attribute__ ((format (printf, 2, 3)));
			
 
				 extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
			
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
 
				 extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
			
 
				         __attribute__ ((format (printf, 2, 3)));
			
 
				 #else
			
 
				-static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
			
 
				+static inline void
			
 
				+__attribute__ ((format (printf, 2, 3)))
			
 
				+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
			
 
				 {
			
 
				 }
			
 
				 #endif
			
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
 
				 	if (error)
			
 
				 		goto out_cleanup_procfs;
			
 
				 
			
 
				-	error = xfs_init_workqueues();
			
 
				-	if (error)
			
 
				-		goto out_sysctl_unregister;
			
 
				-
			
 
				 	vfs_initquota();
			
 
				 
			
 
				 	error = register_filesystem(&xfs_fs_type);
			
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
 
				 
			
 
				 	error = xfs_iflush(ip, flags);
			
 
				 
			
 
				+	/*
			
 
				+	 * We don't want to try again on non-blocking flushes that can't run
			
 
				+	 * again immediately. If an inode really must be written, then that's
			
 
				+	 * what the SYNC_WAIT flag is for.
			
 
				+	 */
			
 
				+	if (error == EAGAIN) {
			
 
				+		ASSERT(!(flags & SYNC_WAIT));
			
 
				+		error = 0;
			
 
				+	}
			
 
				+
			
 
				  out_unlock:
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				 	return error;
			
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
 
				 
			
 
				 );
			
 
				 
			
 
				-#define XFS_BUSY_SYNC \
			
 
				-	{ 0,	"async" }, \
			
 
				-	{ 1,	"sync" }
			
 
				-
			
 
				-TRACE_EVENT(xfs_alloc_busy,
			
 
				-	TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
			
 
				-		 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
			
 
				-	TP_ARGS(trans, agno, agbno, len, sync),
			
 
				-	TP_STRUCT__entry(
			
 
				-		__field(dev_t, dev)
			
 
				-		__field(struct xfs_trans *, tp)
			
 
				-		__field(int, tid)
			
 
				-		__field(xfs_agnumber_t, agno)
			
 
				-		__field(xfs_agblock_t, agbno)
			
 
				-		__field(xfs_extlen_t, len)
			
 
				-		__field(int, sync)
			
 
				-	),
			
 
				-	TP_fast_assign(
			
 
				-		__entry->dev = trans->t_mountp->m_super->s_dev;
			
 
				-		__entry->tp = trans;
			
 
				-		__entry->tid = trans->t_ticket->t_tid;
			
 
				-		__entry->agno = agno;
			
 
				-		__entry->agbno = agbno;
			
 
				-		__entry->len = len;
			
 
				-		__entry->sync = sync;
			
 
				-	),
			
 
				-	TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
			
 
				-		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				-		  __entry->tp,
			
 
				-		  __entry->tid,
			
 
				-		  __entry->agno,
			
 
				-		  __entry->agbno,
			
 
				-		  __entry->len,
			
 
				-		  __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
			
 
				-
			
 
				-);
			
 
				-
			
 
				-TRACE_EVENT(xfs_alloc_unbusy,
			
 
				+DECLARE_EVENT_CLASS(xfs_busy_class,
			
 
				 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				 		 xfs_agblock_t agbno, xfs_extlen_t len),
			
 
				 	TP_ARGS(mp, agno, agbno, len),
			
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
 
				 		  __entry->agbno,
			
 
				 		  __entry->len)
			
 
				 );
			
 
				+#define DEFINE_BUSY_EVENT(name) \
			
 
				+DEFINE_EVENT(xfs_busy_class, name, \
			
 
				+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
			
 
				+		 xfs_agblock_t agbno, xfs_extlen_t len), \
			
 
				+	TP_ARGS(mp, agno, agbno, len))
			
 
				+DEFINE_BUSY_EVENT(xfs_alloc_busy);
			
 
				+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
			
 
				+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
			
 
				+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
			
 
				+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
			
 
				 
			
 
				-#define XFS_BUSY_STATES \
			
 
				-	{ 0,	"missing" }, \
			
 
				-	{ 1,	"found" }
			
 
				-
			
 
				-TRACE_EVENT(xfs_alloc_busysearch,
			
 
				+TRACE_EVENT(xfs_alloc_busy_trim,
			
 
				 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				-		 xfs_agblock_t agbno, xfs_extlen_t len, int found),
			
 
				-	TP_ARGS(mp, agno, agbno, len, found),
			
 
				+		 xfs_agblock_t agbno, xfs_extlen_t len,
			
 
				+		 xfs_agblock_t tbno, xfs_extlen_t tlen),
			
 
				+	TP_ARGS(mp, agno, agbno, len, tbno, tlen),
			
 
				 	TP_STRUCT__entry(
			
 
				 		__field(dev_t, dev)
			
 
				 		__field(xfs_agnumber_t, agno)
			
 
				 		__field(xfs_agblock_t, agbno)
			
 
				 		__field(xfs_extlen_t, len)
			
 
				-		__field(int, found)
			
 
				+		__field(xfs_agblock_t, tbno)
			
 
				+		__field(xfs_extlen_t, tlen)
			
 
				 	),
			
 
				 	TP_fast_assign(
			
 
				 		__entry->dev = mp->m_super->s_dev;
			
 
				 		__entry->agno = agno;
			
 
				 		__entry->agbno = agbno;
			
 
				 		__entry->len = len;
			
 
				-		__entry->found = found;
			
 
				+		__entry->tbno = tbno;
			
 
				+		__entry->tlen = tlen;
			
 
				 	),
			
 
				-	TP_printk("dev %d:%d agno %u agbno %u len %u %s",
			
 
				+	TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
			
 
				 		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				 		  __entry->agno,
			
 
				 		  __entry->agbno,
			
 
				 		  __entry->len,
			
 
				-		  __print_symbolic(__entry->found, XFS_BUSY_STATES))
			
 
				+		  __entry->tbno,
			
 
				+		  __entry->tlen)
			
 
				 );
			
 
				 
			
 
				 TRACE_EVENT(xfs_trans_commit_lsn,
			
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
 
				 		  __entry->wasfromfl,
			
 
				 		  __entry->isfl,
			
 
				 		  __entry->userdata,
			
 
				-		  __entry->firstblock)
			
 
				+		  (unsigned long long)__entry->firstblock)
			
 
				 )
			
 
				 
			
 
				 #define DEFINE_ALLOC_EVENT(name) \
			
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
			
 
				+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
			
 
				+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
			
 
				+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
			
 
				 DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
			
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
 
				 	xfs_agnumber_t	agno;
			
 
				 	xfs_agblock_t	bno;
			
 
				 	xfs_extlen_t	length;
			
 
				-	xlog_tid_t	tid;		/* transaction that created this */
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
 
				 #define	XFSA_FIXUP_BNO_OK	1
			
 
				 #define	XFSA_FIXUP_CNT_OK	2
			
 
				 
			
 
				-/*
			
 
				- * Prototypes for per-ag allocation routines
			
 
				- */
			
 
				-
			
 
				 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
			
 
				 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
			
 
				 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
			
 
				 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
			
 
				-	xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
			
 
				-
			
 
				-/*
			
 
				- * Internal functions.
			
 
				- */
			
 
				+		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
			
 
				+STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
			
 
				+		xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
			
 
				 
			
 
				 /*
			
 
				  * Lookup the record equal to [bno, len] in the btree given by cur.
			
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
 
				 	xfs_extlen_t	*reslen)	/* result length */
			
 
				 {
			
 
				 	xfs_agblock_t	bno;
			
 
				-	xfs_extlen_t	diff;
			
 
				 	xfs_extlen_t	len;
			
 
				 
			
 
				-	if (args->alignment > 1 && foundlen >= args->minlen) {
			
 
				-		bno = roundup(foundbno, args->alignment);
			
 
				-		diff = bno - foundbno;
			
 
				-		len = diff >= foundlen ? 0 : foundlen - diff;
			
 
				+	/* Trim busy sections out of found extent */
			
 
				+	xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
			
 
				+
			
 
				+	if (args->alignment > 1 && len >= args->minlen) {
			
 
				+		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
			
 
				+		xfs_extlen_t	diff = aligned_bno - bno;
			
 
				+
			
 
				+		*resbno = aligned_bno;
			
 
				+		*reslen = diff >= len ? 0 : len - diff;
			
 
				 	} else {
			
 
				-		bno = foundbno;
			
 
				-		len = foundlen;
			
 
				+		*resbno = bno;
			
 
				+		*reslen = len;
			
 
				 	}
			
 
				-	*resbno = bno;
			
 
				-	*reslen = len;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
 
				 		return 1;
			
 
				 	agf = XFS_BUF_TO_AGF(args->agbp);
			
 
				 	diff = be32_to_cpu(agf->agf_freeblks)
			
 
				-		+ be32_to_cpu(agf->agf_flcount)
			
 
				 		- args->len - args->minleft;
			
 
				 	if (diff >= 0)
			
 
				 		return 1;
			
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
 
				 		if (error)
			
 
				 			return error;
			
 
				 
			
 
				-		/*
			
 
				-		 * Search the busylist for these blocks and mark the
			
 
				-		 * transaction as synchronous if blocks are found. This
			
 
				-		 * avoids the need to block due to a synchronous log
			
 
				-		 * force to ensure correct ordering as the synchronous
			
 
				-		 * transaction will guarantee that for us.
			
 
				-		 */
			
 
				-		if (xfs_alloc_busy_search(args->mp, args->agno,
			
 
				-					args->agbno, args->len))
			
 
				-			xfs_trans_set_sync(args->tp);
			
 
				+		ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
			
 
				+					      args->agbno, args->len));
			
 
				 	}
			
 
				 
			
 
				 	if (!args->isfl) {
			
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
 
				 {
			
 
				 	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
			
 
				 	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
			
 
				-	xfs_agblock_t	end;	/* end of allocated extent */
			
 
				 	int		error;
			
 
				 	xfs_agblock_t	fbno;	/* start block of found extent */
			
 
				-	xfs_agblock_t	fend;	/* end block of found extent */
			
 
				 	xfs_extlen_t	flen;	/* length of found extent */
			
 
				+	xfs_agblock_t	tbno;	/* start block of trimmed extent */
			
 
				+	xfs_extlen_t	tlen;	/* length of trimmed extent */
			
 
				+	xfs_agblock_t	tend;	/* end block of trimmed extent */
			
 
				+	xfs_agblock_t	end;	/* end of allocated extent */
			
 
				 	int		i;	/* success/failure of operation */
			
 
				-	xfs_agblock_t	maxend;	/* end of maximal extent */
			
 
				-	xfs_agblock_t	minend;	/* end of minimal extent */
			
 
				 	xfs_extlen_t	rlen;	/* length of returned extent */
			
 
				 
			
 
				 	ASSERT(args->alignment == 1);
			
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
 
				 		goto error0;
			
 
				 	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			
 
				 	ASSERT(fbno <= args->agbno);
			
 
				-	minend = args->agbno + args->minlen;
			
 
				-	maxend = args->agbno + args->maxlen;
			
 
				-	fend = fbno + flen;
			
 
				 
			
 
				 	/*
			
 
				-	 * Give up if the freespace isn't long enough for the minimum request.
			
 
				+	 * Check for overlapping busy extents.
			
 
				+	 */
			
 
				+	xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
			
 
				+
			
 
				+	/*
			
 
				+	 * Give up if the start of the extent is busy, or the freespace isn't
			
 
				+	 * long enough for the minimum request.
			
 
				 	 */
			
 
				-	if (fend < minend)
			
 
				+	if (tbno > args->agbno)
			
 
				+		goto not_found;
			
 
				+	if (tlen < args->minlen)
			
 
				+		goto not_found;
			
 
				+	tend = tbno + tlen;
			
 
				+	if (tend < args->agbno + args->minlen)
			
 
				 		goto not_found;
			
 
				 
			
 
				 	/*
			
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
 
				 	 *
			
 
				 	 * Fix the length according to mod and prod if given.
			
 
				 	 */
			
 
				-	end = XFS_AGBLOCK_MIN(fend, maxend);
			
 
				+	end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
			
 
				 	args->len = end - args->agbno;
			
 
				 	xfs_alloc_fix_len(args);
			
 
				 	if (!xfs_alloc_fix_minleft(args))
			
 
				 		goto not_found;
			
 
				 
			
 
				 	rlen = args->len;
			
 
				-	ASSERT(args->agbno + rlen <= fend);
			
 
				+	ASSERT(args->agbno + rlen <= tend);
			
 
				 	end = args->agbno + rlen;
			
 
				 
			
 
				 	/*
			
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
 
				 	struct xfs_btree_cur	**scur,	/* searching cursor */
			
 
				 	xfs_agblock_t		gdiff,	/* difference for search comparison */
			
 
				 	xfs_agblock_t		*sbno,	/* extent found by search */
			
 
				-	xfs_extlen_t		*slen,
			
 
				-	xfs_extlen_t		*slena,	/* aligned length */
			
 
				+	xfs_extlen_t		*slen,	/* extent length */
			
 
				+	xfs_agblock_t		*sbnoa,	/* aligned extent found by search */
			
 
				+	xfs_extlen_t		*slena,	/* aligned extent length */
			
 
				 	int			dir)	/* 0 = search right, 1 = search left */
			
 
				 {
			
 
				-	xfs_agblock_t		bno;
			
 
				 	xfs_agblock_t		new;
			
 
				 	xfs_agblock_t		sdiff;
			
 
				 	int			error;
			
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
 
				 		if (error)
			
 
				 			goto error0;
			
 
				 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			
 
				-		xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
			
 
				+		xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
			
 
				 
			
 
				 		/*
			
 
				 		 * The good extent is closer than this one.
			
 
				 		 */
			
 
				 		if (!dir) {
			
 
				-			if (bno >= args->agbno + gdiff)
			
 
				+			if (*sbnoa >= args->agbno + gdiff)
			
 
				 				goto out_use_good;
			
 
				 		} else {
			
 
				-			if (bno <= args->agbno - gdiff)
			
 
				+			if (*sbnoa <= args->agbno - gdiff)
			
 
				 				goto out_use_good;
			
 
				 		}
			
 
				 
			
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
 
				 			xfs_alloc_fix_len(args);
			
 
				 
			
 
				 			sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
			
 
				-						       args->alignment, *sbno,
			
 
				-						       *slen, &new);
			
 
				+						       args->alignment, *sbnoa,
			
 
				+						       *slena, &new);
			
 
				 
			
 
				 			/*
			
 
				 			 * Choose closer size and invalidate other cursor.
			
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
 
				 	xfs_agblock_t	gtbnoa;		/* aligned ... */
			
 
				 	xfs_extlen_t	gtdiff;		/* difference to right side entry */
			
 
				 	xfs_extlen_t	gtlen;		/* length of right side entry */
			
 
				-	xfs_extlen_t	gtlena = 0;	/* aligned ... */
			
 
				+	xfs_extlen_t	gtlena;		/* aligned ... */
			
 
				 	xfs_agblock_t	gtnew;		/* useful start bno of right side */
			
 
				 	int		error;		/* error code */
			
 
				 	int		i;		/* result code, temporary */
			
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
 
				 	xfs_agblock_t	ltbnoa;		/* aligned ... */
			
 
				 	xfs_extlen_t	ltdiff;		/* difference to left side entry */
			
 
				 	xfs_extlen_t	ltlen;		/* length of left side entry */
			
 
				-	xfs_extlen_t	ltlena = 0;	/* aligned ... */
			
 
				+	xfs_extlen_t	ltlena;		/* aligned ... */
			
 
				 	xfs_agblock_t	ltnew;		/* useful start bno of left side */
			
 
				 	xfs_extlen_t	rlen;		/* length of returned extent */
			
 
				+	int		forced = 0;
			
 
				 #if defined(DEBUG) && defined(__KERNEL__)
			
 
				 	/*
			
 
				 	 * Randomly don't execute the first algorithm.
			
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
 
				 
			
 
				 	dofirst = random32() & 1;
			
 
				 #endif
			
 
				+
			
 
				+restart:
			
 
				+	bno_cur_lt = NULL;
			
 
				+	bno_cur_gt = NULL;
			
 
				+	ltlen = 0;
			
 
				+	gtlena = 0;
			
 
				+	ltlena = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * Get a cursor for the by-size btree.
			
 
				 	 */
			
 
				 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
			
 
				 		args->agno, XFS_BTNUM_CNT);
			
 
				-	ltlen = 0;
			
 
				-	bno_cur_lt = bno_cur_gt = NULL;
			
 
				+
			
 
				 	/*
			
 
				 	 * See if there are any free extents as big as maxlen.
			
 
				 	 */
			
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
 
				 			goto error0;
			
 
				 		if (i == 0 || ltlen == 0) {
			
 
				 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
			
 
				+			trace_xfs_alloc_near_noentry(args);
			
 
				 			return 0;
			
 
				 		}
			
 
				 		ASSERT(i == 1);
			
 
				 	}
			
 
				 	args->wasfromfl = 0;
			
 
				+
			
 
				 	/*
			
 
				 	 * First algorithm.
			
 
				 	 * If the requested extent is large wrt the freespaces available
			
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
 
				 			if (args->len < blen)
			
 
				 				continue;
			
 
				 			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
			
 
				-				args->alignment, ltbno, ltlen, &ltnew);
			
 
				+				args->alignment, ltbnoa, ltlena, &ltnew);
			
 
				 			if (ltnew != NULLAGBLOCK &&
			
 
				 			    (args->len > blen || ltdiff < bdiff)) {
			
 
				 				bdiff = ltdiff;
			
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
 
				 			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
			
 
				 			xfs_alloc_fix_len(args);
			
 
				 			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
			
 
				-				args->alignment, ltbno, ltlen, &ltnew);
			
 
				+				args->alignment, ltbnoa, ltlena, &ltnew);
			
 
				 
			
 
				 			error = xfs_alloc_find_best_extent(args,
			
 
				 						&bno_cur_lt, &bno_cur_gt,
			
 
				-						ltdiff, &gtbno, &gtlen, &gtlena,
			
 
				+						ltdiff, &gtbno, &gtlen,
			
 
				+						&gtbnoa, &gtlena,
			
 
				 						0 /* search right */);
			
 
				 		} else {
			
 
				 			ASSERT(gtlena >= args->minlen);
			
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
 
				 			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
			
 
				 			xfs_alloc_fix_len(args);
			
 
				 			gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
			
 
				-				args->alignment, gtbno, gtlen, &gtnew);
			
 
				+				args->alignment, gtbnoa, gtlena, &gtnew);
			
 
				 
			
 
				 			error = xfs_alloc_find_best_extent(args,
			
 
				 						&bno_cur_gt, &bno_cur_lt,
			
 
				-						gtdiff, &ltbno, &ltlen, &ltlena,
			
 
				+						gtdiff, &ltbno, &ltlen,
			
 
				+						&ltbnoa, &ltlena,
			
 
				 						1 /* search left */);
			
 
				 		}
			
 
				 
			
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
 
				 	 * If we couldn't get anything, give up.
			
 
				 	 */
			
 
				 	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
			
 
				+		if (!forced++) {
			
 
				+			trace_xfs_alloc_near_busy(args);
			
 
				+			xfs_log_force(args->mp, XFS_LOG_SYNC);
			
 
				+			goto restart;
			
 
				+		}
			
 
				+
			
 
				 		trace_xfs_alloc_size_neither(args);
			
 
				 		args->agbno = NULLAGBLOCK;
			
 
				 		return 0;
			
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
 
				 		return 0;
			
 
				 	}
			
 
				 	rlen = args->len;
			
 
				-	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
			
 
				-		ltlen, &ltnew);
			
 
				+	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
			
 
				+				     ltbnoa, ltlena, &ltnew);
			
 
				 	ASSERT(ltnew >= ltbno);
			
 
				-	ASSERT(ltnew + rlen <= ltbno + ltlen);
			
 
				+	ASSERT(ltnew + rlen <= ltbnoa + ltlena);
			
 
				 	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
			
 
				 	args->agbno = ltnew;
			
 
				+
			
 
				 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
			
 
				 			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
			
 
				 		goto error0;
			
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
 
				 	int		i;		/* temp status variable */
			
 
				 	xfs_agblock_t	rbno;		/* returned block number */
			
 
				 	xfs_extlen_t	rlen;		/* length of returned extent */
			
 
				+	int		forced = 0;
			
 
				 
			
 
				+restart:
			
 
				 	/*
			
 
				 	 * Allocate and initialize a cursor for the by-size btree.
			
 
				 	 */
			
 
				 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
			
 
				 		args->agno, XFS_BTNUM_CNT);
			
 
				 	bno_cur = NULL;
			
 
				+
			
 
				 	/*
			
 
				 	 * Look for an entry >= maxlen+alignment-1 blocks.
			
 
				 	 */
			
 
				 	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
			
 
				 			args->maxlen + args->alignment - 1, &i)))
			
 
				 		goto error0;
			
 
				+
			
 
				 	/*
			
 
				-	 * If none, then pick up the last entry in the tree unless the
			
 
				-	 * tree is empty.
			
 
				+	 * If none or we have busy extents that we cannot allocate from, then
			
 
				+	 * we have to settle for a smaller extent. In the case that there are
			
 
				+	 * no large extents, this will return the last entry in the tree unless
			
 
				+	 * the tree is empty. In the case that there are only busy large
			
 
				+	 * extents, this will return the largest small extent unless there
			
 
				+	 * are no smaller extents available.
			
 
				 	 */
			
 
				-	if (!i) {
			
 
				-		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
			
 
				-				&flen, &i)))
			
 
				+	if (!i || forced > 1) {
			
 
				+		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
			
 
				+						   &fbno, &flen, &i);
			
 
				+		if (error)
			
 
				 			goto error0;
			
 
				 		if (i == 0 || flen == 0) {
			
 
				 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
			
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
 
				 			return 0;
			
 
				 		}
			
 
				 		ASSERT(i == 1);
			
 
				+		xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Search for a non-busy extent that is large enough.
			
 
				+		 * If we are at low space, don't check, or if we fall of
			
 
				+		 * the end of the btree, turn off the busy check and
			
 
				+		 * restart.
			
 
				+		 */
			
 
				+		for (;;) {
			
 
				+			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
			
 
				+			if (error)
			
 
				+				goto error0;
			
 
				+			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			
 
				+
			
 
				+			xfs_alloc_compute_aligned(args, fbno, flen,
			
 
				+						  &rbno, &rlen);
			
 
				+
			
 
				+			if (rlen >= args->maxlen)
			
 
				+				break;
			
 
				+
			
 
				+			error = xfs_btree_increment(cnt_cur, 0, &i);
			
 
				+			if (error)
			
 
				+				goto error0;
			
 
				+			if (i == 0) {
			
 
				+				/*
			
 
				+				 * Our only valid extents must have been busy.
			
 
				+				 * Make it unbusy by forcing the log out and
			
 
				+				 * retrying. If we've been here before, forcing
			
 
				+				 * the log isn't making the extents available,
			
 
				+				 * which means they have probably been freed in
			
 
				+				 * this transaction.  In that case, we have to
			
 
				+				 * give up on them and we'll attempt a minlen
			
 
				+				 * allocation the next time around.
			
 
				+				 */
			
 
				+				xfs_btree_del_cursor(cnt_cur,
			
 
				+						     XFS_BTREE_NOERROR);
			
 
				+				trace_xfs_alloc_size_busy(args);
			
 
				+				if (!forced++)
			
 
				+					xfs_log_force(args->mp, XFS_LOG_SYNC);
			
 
				+				goto restart;
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				-	/*
			
 
				-	 * There's a freespace as big as maxlen+alignment-1, get it.
			
 
				-	 */
			
 
				-	else {
			
 
				-		if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
			
 
				-			goto error0;
			
 
				-		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			
 
				-	}
			
 
				+
			
 
				 	/*
			
 
				 	 * In the first case above, we got the last entry in the
			
 
				 	 * by-size btree.  Now we check to see if the space hits maxlen
			
 
				 	 * once aligned; if not, we search left for something better.
			
 
				 	 * This can't happen in the second case above.
			
 
				 	 */
			
 
				-	xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
			
 
				 	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
			
 
				 	XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
			
 
				 			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
			
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
 
				 	 * Fix up the length.
			
 
				 	 */
			
 
				 	args->len = rlen;
			
 
				-	xfs_alloc_fix_len(args);
			
 
				-	if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
			
 
				-		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
			
 
				-		trace_xfs_alloc_size_nominleft(args);
			
 
				-		args->agbno = NULLAGBLOCK;
			
 
				-		return 0;
			
 
				+	if (rlen < args->minlen) {
			
 
				+		if (!forced++) {
			
 
				+			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
			
 
				+			trace_xfs_alloc_size_busy(args);
			
 
				+			xfs_log_force(args->mp, XFS_LOG_SYNC);
			
 
				+			goto restart;
			
 
				+		}
			
 
				+		goto out_nominleft;
			
 
				 	}
			
 
				+	xfs_alloc_fix_len(args);
			
 
				+
			
 
				+	if (!xfs_alloc_fix_minleft(args))
			
 
				+		goto out_nominleft;
			
 
				 	rlen = args->len;
			
 
				 	XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
			
 
				 	/*
			
@@ -1287,6 +1350,12 @@ error0:
 
				 	if (bno_cur)
			
 
				 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
			
 
				 	return error;
			
 
				+
			
 
				+out_nominleft:
			
 
				+	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
			
 
				+	trace_xfs_alloc_size_nominleft(args);
			
 
				+	args->agbno = NULLAGBLOCK;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
 
				 		if (error)
			
 
				 			goto error0;
			
 
				 		if (fbno != NULLAGBLOCK) {
			
 
				+			xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
			
 
				+					     args->userdata);
			
 
				+
			
 
				 			if (args->userdata) {
			
 
				 				xfs_buf_t	*bp;
			
 
				 
			
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
 
				 
			
 
				 	trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
			
 
				 
			
 
				-	/*
			
 
				-	 * Since blocks move to the free list without the coordination
			
 
				-	 * used in xfs_bmap_finish, we can't allow block to be available
			
 
				-	 * for reallocation and non-transaction writing (user data)
			
 
				-	 * until we know that the transaction that moved it to the free
			
 
				-	 * list is permanently on disk.  We track the blocks by declaring
			
 
				-	 * these blocks as "busy"; the busy list is maintained on a per-ag
			
 
				-	 * basis and each transaction records which entries should be removed
			
 
				-	 * when the iclog commits to disk.  If a busy block is allocated,
			
 
				-	 * the iclog is pushed up to the LSN that freed the block.
			
 
				-	 */
			
 
				-	xfs_alloc_busy_insert(tp, agno, bno, len);
			
 
				 	return 0;
			
 
				 
			
 
				  error0:
			
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
 
				 	xfs_alloc_log_agf(tp, agbp, logflags);
			
 
				 	*bnop = bno;
			
 
				 
			
 
				-	/*
			
 
				-	 * As blocks are freed, they are added to the per-ag busy list and
			
 
				-	 * remain there until the freeing transaction is committed to disk.
			
 
				-	 * Now that we have allocated blocks, this list must be searched to see
			
 
				-	 * if a block is being reused.  If one is, then the freeing transaction
			
 
				-	 * must be pushed to disk before this transaction.
			
 
				-	 *
			
 
				-	 * We do this by setting the current transaction to a sync transaction
			
 
				-	 * which guarantees that the freeing transaction is on disk before this
			
 
				-	 * transaction. This is done instead of a synchronous log force here so
			
 
				-	 * that we don't sit and wait with the AGF locked in the transaction
			
 
				-	 * during the log force.
			
 
				-	 */
			
 
				-	if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
			
 
				-		xfs_trans_set_sync(tp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2423,105 +2468,13 @@ xfs_free_extent(
 
				 	}
			
 
				 
			
 
				 	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
			
 
				+	if (!error)
			
 
				+		xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
			
 
				 error0:
			
 
				 	xfs_perag_put(args.pag);
			
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/*
			
 
				- * AG Busy list management
			
 
				- * The busy list contains block ranges that have been freed but whose
			
 
				- * transactions have not yet hit disk.  If any block listed in a busy
			
 
				- * list is reused, the transaction that freed it must be forced to disk
			
 
				- * before continuing to use the block.
			
 
				- *
			
 
				- * xfs_alloc_busy_insert - add to the per-ag busy list
			
 
				- * xfs_alloc_busy_clear - remove an item from the per-ag busy list
			
 
				- * xfs_alloc_busy_search - search for a busy extent
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Insert a new extent into the busy tree.
			
 
				- *
			
 
				- * The busy extent tree is indexed by the start block of the busy extent.
			
 
				- * there can be multiple overlapping ranges in the busy extent tree but only
			
 
				- * ever one entry at a given start block. The reason for this is that
			
 
				- * multi-block extents can be freed, then smaller chunks of that extent
			
 
				- * allocated and freed again before the first transaction commit is on disk.
			
 
				- * If the exact same start block is freed a second time, we have to wait for
			
 
				- * that busy extent to pass out of the tree before the new extent is inserted.
			
 
				- * There are two main cases we have to handle here.
			
 
				- *
			
 
				- * The first case is a transaction that triggers a "free - allocate - free"
			
 
				- * cycle. This can occur during btree manipulations as a btree block is freed
			
 
				- * to the freelist, then allocated from the free list, then freed again. In
			
 
				- * this case, the second extxpnet free is what triggers the duplicate and as
			
 
				- * such the transaction IDs should match. Because the extent was allocated in
			
 
				- * this transaction, the transaction must be marked as synchronous. This is
			
 
				- * true for all cases where the free/alloc/free occurs in the one transaction,
			
 
				- * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
			
 
				- * This serves to catch violations of the second case quite effectively.
			
 
				- *
			
 
				- * The second case is where the free/alloc/free occur in different
			
 
				- * transactions. In this case, the thread freeing the extent the second time
			
 
				- * can't mark the extent busy immediately because it is already tracked in a
			
 
				- * transaction that may be committing.  When the log commit for the existing
			
 
				- * busy extent completes, the busy extent will be removed from the tree. If we
			
 
				- * allow the second busy insert to continue using that busy extent structure,
			
 
				- * it can be freed before this transaction is safely in the log.  Hence our
			
 
				- * only option in this case is to force the log to remove the existing busy
			
 
				- * extent from the list before we insert the new one with the current
			
 
				- * transaction ID.
			
 
				- *
			
 
				- * The problem we are trying to avoid in the free-alloc-free in separate
			
 
				- * transactions is most easily described with a timeline:
			
 
				- *
			
 
				- *      Thread 1	Thread 2	Thread 3	xfslogd
			
 
				- *	xact alloc
			
 
				- *	free X
			
 
				- *	mark busy
			
 
				- *	commit xact
			
 
				- *	free xact
			
 
				- *			xact alloc
			
 
				- *			alloc X
			
 
				- *			busy search
			
 
				- *			mark xact sync
			
 
				- *			commit xact
			
 
				- *			free xact
			
 
				- *			force log
			
 
				- *			checkpoint starts
			
 
				- *			....
			
 
				- *					xact alloc
			
 
				- *					free X
			
 
				- *					mark busy
			
 
				- *					finds match
			
 
				- *					*** KABOOM! ***
			
 
				- *					....
			
 
				- *							log IO completes
			
 
				- *							unbusy X
			
 
				- *			checkpoint completes
			
 
				- *
			
 
				- * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
			
 
				- * the checkpoint completes, and the busy extent it matched will have been
			
 
				- * removed from the tree when it is woken. Hence it can then continue safely.
			
 
				- *
			
 
				- * However, to ensure this matching process is robust, we need to use the
			
 
				- * transaction ID for identifying transaction, as delayed logging results in
			
 
				- * the busy extent and transaction lifecycles being different. i.e. the busy
			
 
				- * extent is active for a lot longer than the transaction.  Hence the
			
 
				- * transaction structure can be freed and reallocated, then mark the same
			
 
				- * extent busy again in the new transaction. In this case the new transaction
			
 
				- * will have a different tid but can have the same address, and hence we need
			
 
				- * to check against the tid.
			
 
				- *
			
 
				- * Future: for delayed logging, we could avoid the log force if the extent was
			
 
				- * first freed in the current checkpoint sequence. This, however, requires the
			
 
				- * ability to pin the current checkpoint in memory until this transaction
			
 
				- * commits to ensure that both the original free and the current one combine
			
 
				- * logically into the one checkpoint. If the checkpoint sequences are
			
 
				- * different, however, we still need to wait on a log force.
			
 
				- */
			
 
				 void
			
 
				 xfs_alloc_busy_insert(
			
 
				 	struct xfs_trans	*tp,
			
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
 
				 	struct xfs_busy_extent	*busyp;
			
 
				 	struct xfs_perag	*pag;
			
 
				 	struct rb_node		**rbp;
			
 
				-	struct rb_node		*parent;
			
 
				-	int			match;
			
 
				-
			
 
				+	struct rb_node		*parent = NULL;
			
 
				 
			
 
				 	new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
			
 
				 	if (!new) {
			
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
 
				 		 * block, make this a synchronous transaction to insure that
			
 
				 		 * the block is not reused before this transaction commits.
			
 
				 		 */
			
 
				-		trace_xfs_alloc_busy(tp, agno, bno, len, 1);
			
 
				+		trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
			
 
				 		xfs_trans_set_sync(tp);
			
 
				 		return;
			
 
				 	}
			
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
 
				 	new->agno = agno;
			
 
				 	new->bno = bno;
			
 
				 	new->length = len;
			
 
				-	new->tid = xfs_log_get_trans_ident(tp);
			
 
				-
			
 
				 	INIT_LIST_HEAD(&new->list);
			
 
				 
			
 
				 	/* trace before insert to be able to see failed inserts */
			
 
				-	trace_xfs_alloc_busy(tp, agno, bno, len, 0);
			
 
				+	trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
			
 
				 
			
 
				 	pag = xfs_perag_get(tp->t_mountp, new->agno);
			
 
				-restart:
			
 
				 	spin_lock(&pag->pagb_lock);
			
 
				 	rbp = &pag->pagb_tree.rb_node;
			
 
				-	parent = NULL;
			
 
				-	busyp = NULL;
			
 
				-	match = 0;
			
 
				-	while (*rbp && match >= 0) {
			
 
				+	while (*rbp) {
			
 
				 		parent = *rbp;
			
 
				 		busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
			
 
				 
			
 
				 		if (new->bno < busyp->bno) {
			
 
				-			/* may overlap, but exact start block is lower */
			
 
				 			rbp = &(*rbp)->rb_left;
			
 
				-			if (new->bno + new->length > busyp->bno)
			
 
				-				match = busyp->tid == new->tid ? 1 : -1;
			
 
				+			ASSERT(new->bno + new->length <= busyp->bno);
			
 
				 		} else if (new->bno > busyp->bno) {
			
 
				-			/* may overlap, but exact start block is higher */
			
 
				 			rbp = &(*rbp)->rb_right;
			
 
				-			if (bno < busyp->bno + busyp->length)
			
 
				-				match = busyp->tid == new->tid ? 1 : -1;
			
 
				+			ASSERT(bno >= busyp->bno + busyp->length);
			
 
				 		} else {
			
 
				-			match = busyp->tid == new->tid ? 1 : -1;
			
 
				-			break;
			
 
				+			ASSERT(0);
			
 
				 		}
			
 
				 	}
			
 
				-	if (match < 0) {
			
 
				-		/* overlap marked busy in different transaction */
			
 
				-		spin_unlock(&pag->pagb_lock);
			
 
				-		xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
			
 
				-		goto restart;
			
 
				-	}
			
 
				-	if (match > 0) {
			
 
				-		/*
			
 
				-		 * overlap marked busy in same transaction. Update if exact
			
 
				-		 * start block match, otherwise combine the busy extents into
			
 
				-		 * a single range.
			
 
				-		 */
			
 
				-		if (busyp->bno == new->bno) {
			
 
				-			busyp->length = max(busyp->length, new->length);
			
 
				-			spin_unlock(&pag->pagb_lock);
			
 
				-			ASSERT(tp->t_flags & XFS_TRANS_SYNC);
			
 
				-			xfs_perag_put(pag);
			
 
				-			kmem_free(new);
			
 
				-			return;
			
 
				-		}
			
 
				-		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				-		new->length = max(busyp->bno + busyp->length,
			
 
				-					new->bno + new->length) -
			
 
				-				min(busyp->bno, new->bno);
			
 
				-		new->bno = min(busyp->bno, new->bno);
			
 
				-	} else
			
 
				-		busyp = NULL;
			
 
				 
			
 
				 	rb_link_node(&new->rb_node, parent, rbp);
			
 
				 	rb_insert_color(&new->rb_node, &pag->pagb_tree);
			
@@ -2619,7 +2532,6 @@ restart:
 
				 	list_add(&new->list, &tp->t_busy);
			
 
				 	spin_unlock(&pag->pagb_lock);
			
 
				 	xfs_perag_put(pag);
			
 
				-	kmem_free(busyp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
 
				 		}
			
 
				 	}
			
 
				 	spin_unlock(&pag->pagb_lock);
			
 
				-	trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
			
 
				 	xfs_perag_put(pag);
			
 
				 	return match;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * The found free extent [fbno, fend] overlaps part or all of the given busy
			
 
				+ * extent.  If the overlap covers the beginning, the end, or all of the busy
			
 
				+ * extent, the overlapping portion can be made unbusy and used for the
			
 
				+ * allocation.  We can't split a busy extent because we can't modify a
			
 
				+ * transaction/CIL context busy list, but we can update an entries block
			
 
				+ * number or length.
			
 
				+ *
			
 
				+ * Returns true if the extent can safely be reused, or false if the search
			
 
				+ * needs to be restarted.
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_alloc_busy_update_extent(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_perag	*pag,
			
 
				+	struct xfs_busy_extent	*busyp,
			
 
				+	xfs_agblock_t		fbno,
			
 
				+	xfs_extlen_t		flen,
			
 
				+	bool			userdata)
			
 
				+{
			
 
				+	xfs_agblock_t		fend = fbno + flen;
			
 
				+	xfs_agblock_t		bbno = busyp->bno;
			
 
				+	xfs_agblock_t		bend = bbno + busyp->length;
			
 
				+
			
 
				+	/*
			
 
				+	 * If there is a busy extent overlapping a user allocation, we have
			
 
				+	 * no choice but to force the log and retry the search.
			
 
				+	 *
			
 
				+	 * Fortunately this does not happen during normal operation, but
			
 
				+	 * only if the filesystem is very low on space and has to dip into
			
 
				+	 * the AGFL for normal allocations.
			
 
				+	 */
			
 
				+	if (userdata)
			
 
				+		goto out_force_log;
			
 
				+
			
 
				+	if (bbno < fbno && bend > fend) {
			
 
				+		/*
			
 
				+		 * Case 1:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +---------+
			
 
				+		 *        fbno   fend
			
 
				+		 */
			
 
				+
			
 
				+		/*
			
 
				+		 * We would have to split the busy extent to be able to track
			
 
				+		 * it correct, which we cannot do because we would have to
			
 
				+		 * modify the list of busy extents attached to the transaction
			
 
				+		 * or CIL context, which is immutable.
			
 
				+		 *
			
 
				+		 * Force out the log to clear the busy extent and retry the
			
 
				+		 * search.
			
 
				+		 */
			
 
				+		goto out_force_log;
			
 
				+	} else if (bbno >= fbno && bend <= fend) {
			
 
				+		/*
			
 
				+		 * Case 2:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +-----------------+
			
 
				+		 *    fbno           fend
			
 
				+		 *
			
 
				+		 * Case 3:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +--------------------------+
			
 
				+		 *    fbno                    fend
			
 
				+		 *
			
 
				+		 * Case 4:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +--------------------------+
			
 
				+		 *    fbno                    fend
			
 
				+		 *
			
 
				+		 * Case 5:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +-----------------------------------+
			
 
				+		 *    fbno                             fend
			
 
				+		 *
			
 
				+		 */
			
 
				+
			
 
				+		/*
			
 
				+		 * The busy extent is fully covered by the extent we are
			
 
				+		 * allocating, and can simply be removed from the rbtree.
			
 
				+		 * However we cannot remove it from the immutable list
			
 
				+		 * tracking busy extents in the transaction or CIL context,
			
 
				+		 * so set the length to zero to mark it invalid.
			
 
				+		 *
			
 
				+		 * We also need to restart the busy extent search from the
			
 
				+		 * tree root, because erasing the node can rearrange the
			
 
				+		 * tree topology.
			
 
				+		 */
			
 
				+		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				+		busyp->length = 0;
			
 
				+		return false;
			
 
				+	} else if (fend < bend) {
			
 
				+		/*
			
 
				+		 * Case 6:
			
 
				+		 *              bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *             +---------+
			
 
				+		 *             fbno   fend
			
 
				+		 *
			
 
				+		 * Case 7:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +------------------+
			
 
				+		 *    fbno            fend
			
 
				+		 *
			
 
				+		 */
			
 
				+		busyp->bno = fend;
			
 
				+	} else if (bbno < fbno) {
			
 
				+		/*
			
 
				+		 * Case 8:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +-------------+
			
 
				+		 *        fbno       fend
			
 
				+		 *
			
 
				+		 * Case 9:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +----------------------+
			
 
				+		 *        fbno                fend
			
 
				+		 */
			
 
				+		busyp->length = fbno - busyp->bno;
			
 
				+	} else {
			
 
				+		ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
			
 
				+	return true;
			
 
				+
			
 
				+out_force_log:
			
 
				+	spin_unlock(&pag->pagb_lock);
			
 
				+	xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				+	trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * For a given extent [fbno, flen], make sure we can reuse it safely.
			
 
				+ */
			
 
				 void
			
 
				-xfs_alloc_busy_clear(
			
 
				+xfs_alloc_busy_reuse(
			
 
				 	struct xfs_mount	*mp,
			
 
				-	struct xfs_busy_extent	*busyp)
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agblock_t		fbno,
			
 
				+	xfs_extlen_t		flen,
			
 
				+	bool			userdata)
			
 
				 {
			
 
				 	struct xfs_perag	*pag;
			
 
				+	struct rb_node		*rbp;
			
 
				 
			
 
				-	trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
			
 
				-						busyp->length);
			
 
				+	ASSERT(flen > 0);
			
 
				 
			
 
				-	ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
			
 
				-						busyp->length) == 1);
			
 
				+	pag = xfs_perag_get(mp, agno);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+restart:
			
 
				+	rbp = pag->pagb_tree.rb_node;
			
 
				+	while (rbp) {
			
 
				+		struct xfs_busy_extent *busyp =
			
 
				+			rb_entry(rbp, struct xfs_busy_extent, rb_node);
			
 
				+		xfs_agblock_t	bbno = busyp->bno;
			
 
				+		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				 
			
 
				-	list_del_init(&busyp->list);
			
 
				+		if (fbno + flen <= bbno) {
			
 
				+			rbp = rbp->rb_left;
			
 
				+			continue;
			
 
				+		} else if (fbno >= bend) {
			
 
				+			rbp = rbp->rb_right;
			
 
				+			continue;
			
 
				+		}
			
 
				 
			
 
				-	pag = xfs_perag_get(mp, busyp->agno);
			
 
				-	spin_lock(&pag->pagb_lock);
			
 
				-	rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				+		if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
			
 
				+						  userdata))
			
 
				+			goto restart;
			
 
				+	}
			
 
				 	spin_unlock(&pag->pagb_lock);
			
 
				 	xfs_perag_put(pag);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * For a given extent [fbno, flen], search the busy extent list to find a
			
 
				+ * subset of the extent that is not busy.  If *rlen is smaller than
			
 
				+ * args->minlen no suitable extent could be found, and the higher level
			
 
				+ * code needs to force out the log and retry the allocation.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_alloc_busy_trim(
			
 
				+	struct xfs_alloc_arg	*args,
			
 
				+	xfs_agblock_t		bno,
			
 
				+	xfs_extlen_t		len,
			
 
				+	xfs_agblock_t		*rbno,
			
 
				+	xfs_extlen_t		*rlen)
			
 
				+{
			
 
				+	xfs_agblock_t		fbno;
			
 
				+	xfs_extlen_t		flen;
			
 
				+	struct rb_node		*rbp;
			
 
				+
			
 
				+	ASSERT(len > 0);
			
 
				 
			
 
				+	spin_lock(&args->pag->pagb_lock);
			
 
				+restart:
			
 
				+	fbno = bno;
			
 
				+	flen = len;
			
 
				+	rbp = args->pag->pagb_tree.rb_node;
			
 
				+	while (rbp && flen >= args->minlen) {
			
 
				+		struct xfs_busy_extent *busyp =
			
 
				+			rb_entry(rbp, struct xfs_busy_extent, rb_node);
			
 
				+		xfs_agblock_t	fend = fbno + flen;
			
 
				+		xfs_agblock_t	bbno = busyp->bno;
			
 
				+		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				+
			
 
				+		if (fend <= bbno) {
			
 
				+			rbp = rbp->rb_left;
			
 
				+			continue;
			
 
				+		} else if (fbno >= bend) {
			
 
				+			rbp = rbp->rb_right;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * If this is a metadata allocation, try to reuse the busy
			
 
				+		 * extent instead of trimming the allocation.
			
 
				+		 */
			
 
				+		if (!args->userdata) {
			
 
				+			if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
			
 
				+							  busyp, fbno, flen,
			
 
				+							  false))
			
 
				+				goto restart;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (bbno <= fbno) {
			
 
				+			/* start overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 1:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +---------+
			
 
				+			 *        fbno   fend
			
 
				+			 *
			
 
				+			 * Case 2:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-------------+
			
 
				+			 *    fbno       fend
			
 
				+			 *
			
 
				+			 * Case 3:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +-------------+
			
 
				+			 *        fbno       fend
			
 
				+			 *
			
 
				+			 * Case 4:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-----------------+
			
 
				+			 *    fbno           fend
			
 
				+			 *
			
 
				+			 * No unbusy region in extent, return failure.
			
 
				+			 */
			
 
				+			if (fend <= bend)
			
 
				+				goto fail;
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 5:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +----------------------+
			
 
				+			 *        fbno                fend
			
 
				+			 *
			
 
				+			 * Case 6:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +--------------------------+
			
 
				+			 *    fbno                    fend
			
 
				+			 *
			
 
				+			 * Needs to be trimmed to:
			
 
				+			 *                       +-------+
			
 
				+			 *                       fbno fend
			
 
				+			 */
			
 
				+			fbno = bend;
			
 
				+		} else if (bend >= fend) {
			
 
				+			/* end overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 7:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +------------------+
			
 
				+			 *    fbno            fend
			
 
				+			 *
			
 
				+			 * Case 8:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +--------------------------+
			
 
				+			 *    fbno                    fend
			
 
				+			 *
			
 
				+			 * Needs to be trimmed to:
			
 
				+			 *    +-------+
			
 
				+			 *    fbno fend
			
 
				+			 */
			
 
				+			fend = bbno;
			
 
				+		} else {
			
 
				+			/* middle overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 9:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-----------------------------------+
			
 
				+			 *    fbno                             fend
			
 
				+			 *
			
 
				+			 * Can be trimmed to:
			
 
				+			 *    +-------+        OR         +-------+
			
 
				+			 *    fbno fend                   fbno fend
			
 
				+			 *
			
 
				+			 * Backward allocation leads to significant
			
 
				+			 * fragmentation of directories, which degrades
			
 
				+			 * directory performance, therefore we always want to
			
 
				+			 * choose the option that produces forward allocation
			
 
				+			 * patterns.
			
 
				+			 * Preferring the lower bno extent will make the next
			
 
				+			 * request use "fend" as the start of the next
			
 
				+			 * allocation;  if the segment is no longer busy at
			
 
				+			 * that point, we'll get a contiguous allocation, but
			
 
				+			 * even if it is still busy, we will get a forward
			
 
				+			 * allocation.
			
 
				+			 * We try to avoid choosing the segment at "bend",
			
 
				+			 * because that can lead to the next allocation
			
 
				+			 * taking the segment at "fbno", which would be a
			
 
				+			 * backward allocation.  We only use the segment at
			
 
				+			 * "fbno" if it is much larger than the current
			
 
				+			 * requested size, because in that case there's a
			
 
				+			 * good chance subsequent allocations will be
			
 
				+			 * contiguous.
			
 
				+			 */
			
 
				+			if (bbno - fbno >= args->maxlen) {
			
 
				+				/* left candidate fits perfect */
			
 
				+				fend = bbno;
			
 
				+			} else if (fend - bend >= args->maxlen * 4) {
			
 
				+				/* right candidate has enough free space */
			
 
				+				fbno = bend;
			
 
				+			} else if (bbno - fbno >= args->minlen) {
			
 
				+				/* left candidate fits minimum requirement */
			
 
				+				fend = bbno;
			
 
				+			} else {
			
 
				+				goto fail;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		flen = fend - fbno;
			
 
				+	}
			
 
				+	spin_unlock(&args->pag->pagb_lock);
			
 
				+
			
 
				+	if (fbno != bno || flen != len) {
			
 
				+		trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
			
 
				+					  fbno, flen);
			
 
				+	}
			
 
				+	*rbno = fbno;
			
 
				+	*rlen = flen;
			
 
				+	return;
			
 
				+fail:
			
 
				+	/*
			
 
				+	 * Return a zero extent length as failure indications.  All callers
			
 
				+	 * re-check if the trimmed extent satisfies the minlen requirement.
			
 
				+	 */
			
 
				+	spin_unlock(&args->pag->pagb_lock);
			
 
				+	trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
			
 
				+	*rbno = fbno;
			
 
				+	*rlen = 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_alloc_busy_clear_one(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_perag	*pag,
			
 
				+	struct xfs_busy_extent	*busyp)
			
 
				+{
			
 
				+	if (busyp->length) {
			
 
				+		trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
			
 
				+						busyp->length);
			
 
				+		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				+	}
			
 
				+
			
 
				+	list_del_init(&busyp->list);
			
 
				 	kmem_free(busyp);
			
 
				 }
			
 
				+
			
 
				+void
			
 
				+xfs_alloc_busy_clear(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct list_head	*list)
			
 
				+{
			
 
				+	struct xfs_busy_extent	*busyp, *n;
			
 
				+	struct xfs_perag	*pag = NULL;
			
 
				+	xfs_agnumber_t		agno = NULLAGNUMBER;
			
 
				+
			
 
				+	list_for_each_entry_safe(busyp, n, list, list) {
			
 
				+		if (busyp->agno != agno) {
			
 
				+			if (pag) {
			
 
				+				spin_unlock(&pag->pagb_lock);
			
 
				+				xfs_perag_put(pag);
			
 
				+			}
			
 
				+			pag = xfs_perag_get(mp, busyp->agno);
			
 
				+			spin_lock(&pag->pagb_lock);
			
 
				+			agno = busyp->agno;
			
 
				+		}
			
 
				+
			
 
				+		xfs_alloc_busy_clear_one(mp, pag, busyp);
			
 
				+	}
			
 
				+
			
 
				+	if (pag) {
			
 
				+		spin_unlock(&pag->pagb_lock);
			
 
				+		xfs_perag_put(pag);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Callback for list_sort to sort busy extents by the AG they reside in.
			
 
				+ */
			
 
				+int
			
 
				+xfs_busy_extent_ag_cmp(
			
 
				+	void			*priv,
			
 
				+	struct list_head	*a,
			
 
				+	struct list_head	*b)
			
 
				+{
			
 
				+	return container_of(a, struct xfs_busy_extent, list)->agno -
			
 
				+		container_of(b, struct xfs_busy_extent, list)->agno;
			
 
				+}
			
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
 
				 	xfs_agblock_t bno, xfs_extlen_t len);
			
 
				 
			
 
				 void
			
 
				-xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
			
 
				+xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
			
 
				 
			
 
				 int
			
 
				 xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				 	xfs_agblock_t bno, xfs_extlen_t len);
			
 
				+
			
 
				+void
			
 
				+xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+	xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
			
 
				+
			
 
				+int
			
 
				+xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
			
 
				+
			
 
				+static inline void xfs_alloc_busy_sort(struct list_head *list)
			
 
				+{
			
 
				+	list_sort(NULL, list, xfs_busy_extent_ag_cmp);
			
 
				+}
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				+	xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
			
 
				+
			
 
				 	xfs_trans_agbtree_delta(cur->bc_tp, 1);
			
 
				 	new->s = cpu_to_be32(bno);
			
 
				 
			
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	/*
			
 
				-	 * Since blocks move to the free list without the coordination used in
			
 
				-	 * xfs_bmap_finish, we can't allow block to be available for
			
 
				-	 * reallocation and non-transaction writing (user data) until we know
			
 
				-	 * that the transaction that moved it to the free list is permanently
			
 
				-	 * on disk. We track the blocks by declaring these blocks as "busy";
			
 
				-	 * the busy list is maintained on a per-ag basis and each transaction
			
 
				-	 * records which entries should be removed when the iclog commits to
			
 
				-	 * disk. If a busy block is allocated, the iclog is pushed up to the
			
 
				-	 * LSN that freed the block.
			
 
				-	 */
			
 
				 	xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
			
 
				 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
			
 
				 	return 0;
			
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
 
				 	xfs_inode_t	*tip,	/* tmp inode */
			
 
				 	xfs_swapext_t	*sxp)
			
 
				 {
			
 
				-	xfs_mount_t	*mp;
			
 
				+	xfs_mount_t	*mp = ip->i_mount;
			
 
				 	xfs_trans_t	*tp;
			
 
				 	xfs_bstat_t	*sbp = &sxp->sx_stat;
			
 
				 	xfs_ifork_t	*tempifp, *ifp, *tifp;
			
@@ -212,16 +212,12 @@ xfs_swap_extents(
 
				 	int		taforkblks = 0;
			
 
				 	__uint64_t	tmp;
			
 
				 
			
 
				-	mp = ip->i_mount;
			
 
				-
			
 
				 	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
			
 
				 	if (!tempifp) {
			
 
				 		error = XFS_ERROR(ENOMEM);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	sbp = &sxp->sx_stat;
			
 
				-
			
 
				 	/*
			
 
				 	 * we have to do two separate lock calls here to keep lockdep
			
 
				 	 * happy. If we try to get all the locks in one call, lock will
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
 
				 		return 0;
			
 
				 	}
			
 
				 	last_byte = xfs_file_last_byte(ip);
			
 
				-	trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
			
 
				+	trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
			
 
				 	if (last_byte > toss_start) {
			
 
				 		if (flags & XFS_ITRUNC_DEFINITE) {
			
 
				 			xfs_tosspages(ip, toss_start,
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
 
				 {
			
 
				 	xfs_inode_log_item_t	*iip = ip->i_itemp;
			
 
				 
			
 
				-	iip = ip->i_itemp;
			
 
				 	if (iip) {
			
 
				 		struct xfs_ail	*ailp = iip->ili_item.li_ailp;
			
 
				 		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
 
				 
			
 
				 	xlog_cil_destroy(log);
			
 
				 
			
 
				+	/*
			
 
				+	 * always need to ensure that the extra buffer does not point to memory
			
 
				+	 * owned by another log buffer before we free it.
			
 
				+	 */
			
 
				+	xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
			
 
				+	xfs_buf_free(log->l_xbuf);
			
 
				+
			
 
				 	iclog = log->l_iclog;
			
 
				 	for (i=0; i<log->l_iclog_bufs; i++) {
			
 
				 		xfs_buf_free(iclog->ic_bp);
			
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
 
				 	}
			
 
				 	spinlock_destroy(&log->l_icloglock);
			
 
				 
			
 
				-	xfs_buf_free(log->l_xbuf);
			
 
				 	log->l_mp->m_log = NULL;
			
 
				 	kmem_free(log);
			
 
				 }	/* xlog_dealloc_log */
			
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
 
				 	return ticket;
			
 
				 }
			
 
				 
			
 
				-xlog_tid_t
			
 
				-xfs_log_get_trans_ident(
			
 
				-	struct xfs_trans	*tp)
			
 
				-{
			
 
				-	return tp->t_ticket->t_tid;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Allocate and initialise a new log ticket.
			
 
				  */
			
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void	  xlog_iodone(struct xfs_buf *);
 
				 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
			
 
				 void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
			
 
				 
			
 
				-xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
			
 
				-
			
 
				 void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
			
 
				 				struct xfs_log_vec *log_vector,
			
 
				 				xfs_lsn_t *commit_lsn, int flags);
			
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
 
				 	int	abort)
			
 
				 {
			
 
				 	struct xfs_cil_ctx	*ctx = args;
			
 
				-	struct xfs_busy_extent	*busyp, *n;
			
 
				 
			
 
				 	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
			
 
				 					ctx->start_lsn, abort);
			
 
				 
			
 
				-	list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
			
 
				-		xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
			
 
				+	xfs_alloc_busy_sort(&ctx->busy_extents);
			
 
				+	xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
			
 
				 
			
 
				 	spin_lock(&ctx->cil->xc_cil_lock);
			
 
				 	list_del(&ctx->committing);
			
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
 
				 					   shutdown */
			
 
				 #define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
			
 
				 
			
 
				+typedef __uint32_t xlog_tid_t;
			
 
				+
			
 
				 #ifdef __KERNEL__
			
 
				 /*
			
 
				  * Below are states for covering allocation transactions.
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -204,6 +204,35 @@ xlog_bread(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Read at an offset into the buffer. Returns with the buffer in it's original
			
 
				+ * state regardless of the result of the read.
			
 
				+ */
			
 
				+STATIC int
			
 
				+xlog_bread_offset(
			
 
				+	xlog_t		*log,
			
 
				+	xfs_daddr_t	blk_no,		/* block to read from */
			
 
				+	int		nbblks,		/* blocks to read */
			
 
				+	xfs_buf_t	*bp,
			
 
				+	xfs_caddr_t	offset)
			
 
				+{
			
 
				+	xfs_caddr_t	orig_offset = XFS_BUF_PTR(bp);
			
 
				+	int		orig_len = bp->b_buffer_length;
			
 
				+	int		error, error2;
			
 
				+
			
 
				+	error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	error = xlog_bread_noalign(log, blk_no, nbblks, bp);
			
 
				+
			
 
				+	/* must reset buffer pointer even on error */
			
 
				+	error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+	return error2;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Write out the buffer at the given block for the given number of blocks.
			
 
				  * The buffer is kept locked across the write and is returned locked.
			
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
 
				 		 */
			
 
				 		ealign = round_down(end_block, sectbb);
			
 
				 		if (j == 0 && (start_block + endcount > ealign)) {
			
 
				-			offset = XFS_BUF_PTR(bp);
			
 
				-			balign = BBTOB(ealign - start_block);
			
 
				-			error = XFS_BUF_SET_PTR(bp, offset + balign,
			
 
				-						BBTOB(sectbb));
			
 
				+			offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
			
 
				+			error = xlog_bread_offset(log, ealign, sectbb,
			
 
				+							bp, offset);
			
 
				 			if (error)
			
 
				 				break;
			
 
				 
			
 
				-			error = xlog_bread_noalign(log, ealign, sectbb, bp);
			
 
				-			if (error)
			
 
				-				break;
			
 
				-
			
 
				-			error = XFS_BUF_SET_PTR(bp, offset, bufblks);
			
 
				-			if (error)
			
 
				-				break;
			
 
				 		}
			
 
				 
			
 
				 		offset = xlog_align(log, start_block, endcount, bp);
			
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
 
				 				 *   - order is important.
			
 
				 				 */
			
 
				 				wrapped_hblks = hblks - split_hblks;
			
 
				-				error = XFS_BUF_SET_PTR(hbp,
			
 
				-						offset + BBTOB(split_hblks),
			
 
				-						BBTOB(hblks - split_hblks));
			
 
				-				if (error)
			
 
				-					goto bread_err2;
			
 
				-
			
 
				-				error = xlog_bread_noalign(log, 0,
			
 
				-							   wrapped_hblks, hbp);
			
 
				-				if (error)
			
 
				-					goto bread_err2;
			
 
				-
			
 
				-				error = XFS_BUF_SET_PTR(hbp, offset,
			
 
				-							BBTOB(hblks));
			
 
				+				error = xlog_bread_offset(log, 0,
			
 
				+						wrapped_hblks, hbp,
			
 
				+						offset + BBTOB(split_hblks));
			
 
				 				if (error)
			
 
				 					goto bread_err2;
			
 
				 			}
			
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
 
				 				 *   _first_, then the log start (LR header end)
			
 
				 				 *   - order is important.
			
 
				 				 */
			
 
				-				error = XFS_BUF_SET_PTR(dbp,
			
 
				-						offset + BBTOB(split_bblks),
			
 
				-						BBTOB(bblks - split_bblks));
			
 
				-				if (error)
			
 
				-					goto bread_err2;
			
 
				-
			
 
				-				error = xlog_bread_noalign(log, wrapped_hblks,
			
 
				-						bblks - split_bblks,
			
 
				-						dbp);
			
 
				-				if (error)
			
 
				-					goto bread_err2;
			
 
				-
			
 
				-				error = XFS_BUF_SET_PTR(dbp, offset, h_size);
			
 
				+				error = xlog_bread_offset(log, 0,
			
 
				+						bblks - split_bblks, hbp,
			
 
				+						offset + BBTOB(split_bblks));
			
 
				 				if (error)
			
 
				 					goto bread_err2;
			
 
				 			}
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
 
				 	uint			nmsb,
			
 
				 	int			rsvd)
			
 
				 {
			
 
				-	xfs_mod_sb_t		*msbp = &msb[0];
			
 
				+	xfs_mod_sb_t		*msbp;
			
 
				 	int			error = 0;
			
 
				 
			
 
				 	/*
			
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
 
				 	 * changes will be atomic.
			
 
				 	 */
			
 
				 	spin_lock(&mp->m_sb_lock);
			
 
				-	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
			
 
				+	for (msbp = msb; msbp < (msb + nmsb); msbp++) {
			
 
				 		ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
			
 
				 		       msbp->msb_field > XFS_SBS_FDBLOCKS);
			
 
				 
			
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
 
				 xfs_trans_free(
			
 
				 	struct xfs_trans	*tp)
			
 
				 {
			
 
				-	struct xfs_busy_extent	*busyp, *n;
			
 
				-
			
 
				-	list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
			
 
				-		xfs_alloc_busy_clear(tp->t_mountp, busyp);
			
 
				+	xfs_alloc_busy_sort(&tp->t_busy);
			
 
				+	xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
			
 
				 
			
 
				 	atomic_dec(&tp->t_mountp->m_active_trans);
			
 
				 	xfs_trans_free_dqinfo(tp);
			
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
 
				 typedef	__uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
			
 
				 typedef	__uint32_t	xfs_dahash_t;	/* dir/attr hash value */
			
 
				 
			
 
				-typedef __uint32_t	xlog_tid_t;	/* transaction ID type */
			
 
				-
			
 
				 /*
			
 
				  * These types are 64 bits on disk but are either 32 or 64 bits in memory.
			
 
				  * Disk based types: