13 years ago · 9978306e31
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7623,7 +7623,7 @@ XFS FILESYSTEM
 
				 P:	Silicon Graphics Inc
			
 
				 M:	Ben Myers <bpm@sgi.com>
			
 
				 M:	Alex Elder <elder@kernel.org>
			
 
				-M:	xfs-masters@oss.sgi.com
			
 
				+M:	xfs@oss.sgi.com
			
 
				 L:	xfs@oss.sgi.com
			
 
				 W:	http://oss.sgi.com/projects/xfs
			
 
				 T:	git git://oss.sgi.com/xfs/xfs.git
			
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -33,6 +33,7 @@ xfs-y				+= xfs_aops.o \
 
				 				   xfs_discard.o \
			
 
				 				   xfs_error.o \
			
 
				 				   xfs_export.o \
			
 
				+				   xfs_extent_busy.o \
			
 
				 				   xfs_file.o \
			
 
				 				   xfs_filestream.o \
			
 
				 				   xfs_fsops.o \
			
@@ -49,7 +50,6 @@ xfs-y				+= xfs_aops.o \
 
				 				   xfs_sync.o \
			
 
				 				   xfs_xattr.o \
			
 
				 				   xfs_rename.o \
			
 
				-				   xfs_rw.o \
			
 
				 				   xfs_utils.o \
			
 
				 				   xfs_vnodeops.o \
			
 
				 				   kmem.o \
			
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -174,24 +174,6 @@ typedef struct xfs_agfl {
 
				 	__be32		agfl_bno[1];	/* actually XFS_AGFL_SIZE(mp) */
			
 
				 } xfs_agfl_t;
			
 
				 
			
 
				-/*
			
 
				- * Busy block/extent entry.  Indexed by a rbtree in perag to mark blocks that
			
 
				- * have been freed but whose transactions aren't committed to disk yet.
			
 
				- *
			
 
				- * Note that we use the transaction ID to record the transaction, not the
			
 
				- * transaction structure itself. See xfs_alloc_busy_insert() for details.
			
 
				- */
			
 
				-struct xfs_busy_extent {
			
 
				-	struct rb_node	rb_node;	/* ag by-bno indexed search tree */
			
 
				-	struct list_head list;		/* transaction busy extent list */
			
 
				-	xfs_agnumber_t	agno;
			
 
				-	xfs_agblock_t	bno;
			
 
				-	xfs_extlen_t	length;
			
 
				-	unsigned int	flags;
			
 
				-#define XFS_ALLOC_BUSY_DISCARDED	0x01	/* undergoing a discard op. */
			
 
				-#define XFS_ALLOC_BUSY_SKIP_DISCARD	0x02	/* do not discard */
			
 
				-};
			
 
				-
			
 
				 /*
			
 
				  * Per-ag incore structure, copies of information in agf and agi,
			
 
				  * to improve the performance of allocation group selection.
			
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -32,6 +31,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
 
				 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
			
 
				 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
			
 
				 		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
			
 
				-STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
			
 
				-		xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
			
 
				 
			
 
				 /*
			
 
				  * Lookup the record equal to [bno, len] in the btree given by cur.
			
@@ -152,7 +150,7 @@ xfs_alloc_compute_aligned(
 
				 	xfs_extlen_t	len;
			
 
				 
			
 
				 	/* Trim busy sections out of found extent */
			
 
				-	xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
			
 
				+	xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
			
 
				 
			
 
				 	if (args->alignment > 1 && len >= args->minlen) {
			
 
				 		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
			
@@ -536,7 +534,7 @@ xfs_alloc_ag_vextent(
 
				 		if (error)
			
 
				 			return error;
			
 
				 
			
 
				-		ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
			
 
				+		ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
			
 
				 					      args->agbno, args->len));
			
 
				 	}
			
 
				 
			
@@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact(
 
				 	/*
			
 
				 	 * Check for overlapping busy extents.
			
 
				 	 */
			
 
				-	xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
			
 
				+	xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
			
 
				 
			
 
				 	/*
			
 
				 	 * Give up if the start of the extent is busy, or the freespace isn't
			
@@ -1391,7 +1389,7 @@ xfs_alloc_ag_vextent_small(
 
				 		if (error)
			
 
				 			goto error0;
			
 
				 		if (fbno != NULLAGBLOCK) {
			
 
				-			xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
			
 
				+			xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
			
 
				 					     args->userdata);
			
 
				 
			
 
				 			if (args->userdata) {
			
@@ -2496,579 +2494,8 @@ xfs_free_extent(
 
				 
			
 
				 	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
			
 
				 	if (!error)
			
 
				-		xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0);
			
 
				+		xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
			
 
				 error0:
			
 
				 	xfs_perag_put(args.pag);
			
 
				 	return error;
			
 
				 }
			
 
				-
			
 
				-void
			
 
				-xfs_alloc_busy_insert(
			
 
				-	struct xfs_trans	*tp,
			
 
				-	xfs_agnumber_t		agno,
			
 
				-	xfs_agblock_t		bno,
			
 
				-	xfs_extlen_t		len,
			
 
				-	unsigned int		flags)
			
 
				-{
			
 
				-	struct xfs_busy_extent	*new;
			
 
				-	struct xfs_busy_extent	*busyp;
			
 
				-	struct xfs_perag	*pag;
			
 
				-	struct rb_node		**rbp;
			
 
				-	struct rb_node		*parent = NULL;
			
 
				-
			
 
				-	new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
			
 
				-	if (!new) {
			
 
				-		/*
			
 
				-		 * No Memory!  Since it is now not possible to track the free
			
 
				-		 * block, make this a synchronous transaction to insure that
			
 
				-		 * the block is not reused before this transaction commits.
			
 
				-		 */
			
 
				-		trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
			
 
				-		xfs_trans_set_sync(tp);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	new->agno = agno;
			
 
				-	new->bno = bno;
			
 
				-	new->length = len;
			
 
				-	INIT_LIST_HEAD(&new->list);
			
 
				-	new->flags = flags;
			
 
				-
			
 
				-	/* trace before insert to be able to see failed inserts */
			
 
				-	trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
			
 
				-
			
 
				-	pag = xfs_perag_get(tp->t_mountp, new->agno);
			
 
				-	spin_lock(&pag->pagb_lock);
			
 
				-	rbp = &pag->pagb_tree.rb_node;
			
 
				-	while (*rbp) {
			
 
				-		parent = *rbp;
			
 
				-		busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
			
 
				-
			
 
				-		if (new->bno < busyp->bno) {
			
 
				-			rbp = &(*rbp)->rb_left;
			
 
				-			ASSERT(new->bno + new->length <= busyp->bno);
			
 
				-		} else if (new->bno > busyp->bno) {
			
 
				-			rbp = &(*rbp)->rb_right;
			
 
				-			ASSERT(bno >= busyp->bno + busyp->length);
			
 
				-		} else {
			
 
				-			ASSERT(0);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	rb_link_node(&new->rb_node, parent, rbp);
			
 
				-	rb_insert_color(&new->rb_node, &pag->pagb_tree);
			
 
				-
			
 
				-	list_add(&new->list, &tp->t_busy);
			
 
				-	spin_unlock(&pag->pagb_lock);
			
 
				-	xfs_perag_put(pag);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Search for a busy extent within the range of the extent we are about to
			
 
				- * allocate.  You need to be holding the busy extent tree lock when calling
			
 
				- * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
			
 
				- * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
			
 
				- * match. This is done so that a non-zero return indicates an overlap that
			
 
				- * will require a synchronous transaction, but it can still be
			
 
				- * used to distinguish between a partial or exact match.
			
 
				- */
			
 
				-int
			
 
				-xfs_alloc_busy_search(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	xfs_agnumber_t		agno,
			
 
				-	xfs_agblock_t		bno,
			
 
				-	xfs_extlen_t		len)
			
 
				-{
			
 
				-	struct xfs_perag	*pag;
			
 
				-	struct rb_node		*rbp;
			
 
				-	struct xfs_busy_extent	*busyp;
			
 
				-	int			match = 0;
			
 
				-
			
 
				-	pag = xfs_perag_get(mp, agno);
			
 
				-	spin_lock(&pag->pagb_lock);
			
 
				-
			
 
				-	rbp = pag->pagb_tree.rb_node;
			
 
				-
			
 
				-	/* find closest start bno overlap */
			
 
				-	while (rbp) {
			
 
				-		busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
			
 
				-		if (bno < busyp->bno) {
			
 
				-			/* may overlap, but exact start block is lower */
			
 
				-			if (bno + len > busyp->bno)
			
 
				-				match = -1;
			
 
				-			rbp = rbp->rb_left;
			
 
				-		} else if (bno > busyp->bno) {
			
 
				-			/* may overlap, but exact start block is higher */
			
 
				-			if (bno < busyp->bno + busyp->length)
			
 
				-				match = -1;
			
 
				-			rbp = rbp->rb_right;
			
 
				-		} else {
			
 
				-			/* bno matches busyp, length determines exact match */
			
 
				-			match = (busyp->length == len) ? 1 : -1;
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-	spin_unlock(&pag->pagb_lock);
			
 
				-	xfs_perag_put(pag);
			
 
				-	return match;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The found free extent [fbno, fend] overlaps part or all of the given busy
			
 
				- * extent.  If the overlap covers the beginning, the end, or all of the busy
			
 
				- * extent, the overlapping portion can be made unbusy and used for the
			
 
				- * allocation.  We can't split a busy extent because we can't modify a
			
 
				- * transaction/CIL context busy list, but we can update an entries block
			
 
				- * number or length.
			
 
				- *
			
 
				- * Returns true if the extent can safely be reused, or false if the search
			
 
				- * needs to be restarted.
			
 
				- */
			
 
				-STATIC bool
			
 
				-xfs_alloc_busy_update_extent(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_perag	*pag,
			
 
				-	struct xfs_busy_extent	*busyp,
			
 
				-	xfs_agblock_t		fbno,
			
 
				-	xfs_extlen_t		flen,
			
 
				-	bool			userdata)
			
 
				-{
			
 
				-	xfs_agblock_t		fend = fbno + flen;
			
 
				-	xfs_agblock_t		bbno = busyp->bno;
			
 
				-	xfs_agblock_t		bend = bbno + busyp->length;
			
 
				-
			
 
				-	/*
			
 
				-	 * This extent is currently being discarded.  Give the thread
			
 
				-	 * performing the discard a chance to mark the extent unbusy
			
 
				-	 * and retry.
			
 
				-	 */
			
 
				-	if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
			
 
				-		spin_unlock(&pag->pagb_lock);
			
 
				-		delay(1);
			
 
				-		spin_lock(&pag->pagb_lock);
			
 
				-		return false;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If there is a busy extent overlapping a user allocation, we have
			
 
				-	 * no choice but to force the log and retry the search.
			
 
				-	 *
			
 
				-	 * Fortunately this does not happen during normal operation, but
			
 
				-	 * only if the filesystem is very low on space and has to dip into
			
 
				-	 * the AGFL for normal allocations.
			
 
				-	 */
			
 
				-	if (userdata)
			
 
				-		goto out_force_log;
			
 
				-
			
 
				-	if (bbno < fbno && bend > fend) {
			
 
				-		/*
			
 
				-		 * Case 1:
			
 
				-		 *    bbno           bend
			
 
				-		 *    +BBBBBBBBBBBBBBBBB+
			
 
				-		 *        +---------+
			
 
				-		 *        fbno   fend
			
 
				-		 */
			
 
				-
			
 
				-		/*
			
 
				-		 * We would have to split the busy extent to be able to track
			
 
				-		 * it correct, which we cannot do because we would have to
			
 
				-		 * modify the list of busy extents attached to the transaction
			
 
				-		 * or CIL context, which is immutable.
			
 
				-		 *
			
 
				-		 * Force out the log to clear the busy extent and retry the
			
 
				-		 * search.
			
 
				-		 */
			
 
				-		goto out_force_log;
			
 
				-	} else if (bbno >= fbno && bend <= fend) {
			
 
				-		/*
			
 
				-		 * Case 2:
			
 
				-		 *    bbno           bend
			
 
				-		 *    +BBBBBBBBBBBBBBBBB+
			
 
				-		 *    +-----------------+
			
 
				-		 *    fbno           fend
			
 
				-		 *
			
 
				-		 * Case 3:
			
 
				-		 *    bbno           bend
			
 
				-		 *    +BBBBBBBBBBBBBBBBB+
			
 
				-		 *    +--------------------------+
			
 
				-		 *    fbno                    fend
			
 
				-		 *
			
 
				-		 * Case 4:
			
 
				-		 *             bbno           bend
			
 
				-		 *             +BBBBBBBBBBBBBBBBB+
			
 
				-		 *    +--------------------------+
			
 
				-		 *    fbno                    fend
			
 
				-		 *
			
 
				-		 * Case 5:
			
 
				-		 *             bbno           bend
			
 
				-		 *             +BBBBBBBBBBBBBBBBB+
			
 
				-		 *    +-----------------------------------+
			
 
				-		 *    fbno                             fend
			
 
				-		 *
			
 
				-		 */
			
 
				-
			
 
				-		/*
			
 
				-		 * The busy extent is fully covered by the extent we are
			
 
				-		 * allocating, and can simply be removed from the rbtree.
			
 
				-		 * However we cannot remove it from the immutable list
			
 
				-		 * tracking busy extents in the transaction or CIL context,
			
 
				-		 * so set the length to zero to mark it invalid.
			
 
				-		 *
			
 
				-		 * We also need to restart the busy extent search from the
			
 
				-		 * tree root, because erasing the node can rearrange the
			
 
				-		 * tree topology.
			
 
				-		 */
			
 
				-		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				-		busyp->length = 0;
			
 
				-		return false;
			
 
				-	} else if (fend < bend) {
			
 
				-		/*
			
 
				-		 * Case 6:
			
 
				-		 *              bbno           bend
			
 
				-		 *             +BBBBBBBBBBBBBBBBB+
			
 
				-		 *             +---------+
			
 
				-		 *             fbno   fend
			
 
				-		 *
			
 
				-		 * Case 7:
			
 
				-		 *             bbno           bend
			
 
				-		 *             +BBBBBBBBBBBBBBBBB+
			
 
				-		 *    +------------------+
			
 
				-		 *    fbno            fend
			
 
				-		 *
			
 
				-		 */
			
 
				-		busyp->bno = fend;
			
 
				-	} else if (bbno < fbno) {
			
 
				-		/*
			
 
				-		 * Case 8:
			
 
				-		 *    bbno           bend
			
 
				-		 *    +BBBBBBBBBBBBBBBBB+
			
 
				-		 *        +-------------+
			
 
				-		 *        fbno       fend
			
 
				-		 *
			
 
				-		 * Case 9:
			
 
				-		 *    bbno           bend
			
 
				-		 *    +BBBBBBBBBBBBBBBBB+
			
 
				-		 *        +----------------------+
			
 
				-		 *        fbno                fend
			
 
				-		 */
			
 
				-		busyp->length = fbno - busyp->bno;
			
 
				-	} else {
			
 
				-		ASSERT(0);
			
 
				-	}
			
 
				-
			
 
				-	trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
			
 
				-	return true;
			
 
				-
			
 
				-out_force_log:
			
 
				-	spin_unlock(&pag->pagb_lock);
			
 
				-	xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				-	trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
			
 
				-	spin_lock(&pag->pagb_lock);
			
 
				-	return false;
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*
			
 
				- * For a given extent [fbno, flen], make sure we can reuse it safely.
			
 
				- */
			
 
				-void
			
 
				-xfs_alloc_busy_reuse(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	xfs_agnumber_t		agno,
			
 
				-	xfs_agblock_t		fbno,
			
 
				-	xfs_extlen_t		flen,
			
 
				-	bool			userdata)
			
 
				-{
			
 
				-	struct xfs_perag	*pag;
			
 
				-	struct rb_node		*rbp;
			
 
				-
			
 
				-	ASSERT(flen > 0);
			
 
				-
			
 
				-	pag = xfs_perag_get(mp, agno);
			
 
				-	spin_lock(&pag->pagb_lock);
			
 
				-restart:
			
 
				-	rbp = pag->pagb_tree.rb_node;
			
 
				-	while (rbp) {
			
 
				-		struct xfs_busy_extent *busyp =
			
 
				-			rb_entry(rbp, struct xfs_busy_extent, rb_node);
			
 
				-		xfs_agblock_t	bbno = busyp->bno;
			
 
				-		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				-
			
 
				-		if (fbno + flen <= bbno) {
			
 
				-			rbp = rbp->rb_left;
			
 
				-			continue;
			
 
				-		} else if (fbno >= bend) {
			
 
				-			rbp = rbp->rb_right;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
			
 
				-						  userdata))
			
 
				-			goto restart;
			
 
				-	}
			
 
				-	spin_unlock(&pag->pagb_lock);
			
 
				-	xfs_perag_put(pag);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * For a given extent [fbno, flen], search the busy extent list to find a
			
 
				- * subset of the extent that is not busy.  If *rlen is smaller than
			
 
				- * args->minlen no suitable extent could be found, and the higher level
			
 
				- * code needs to force out the log and retry the allocation.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_alloc_busy_trim(
			
 
				-	struct xfs_alloc_arg	*args,
			
 
				-	xfs_agblock_t		bno,
			
 
				-	xfs_extlen_t		len,
			
 
				-	xfs_agblock_t		*rbno,
			
 
				-	xfs_extlen_t		*rlen)
			
 
				-{
			
 
				-	xfs_agblock_t		fbno;
			
 
				-	xfs_extlen_t		flen;
			
 
				-	struct rb_node		*rbp;
			
 
				-
			
 
				-	ASSERT(len > 0);
			
 
				-
			
 
				-	spin_lock(&args->pag->pagb_lock);
			
 
				-restart:
			
 
				-	fbno = bno;
			
 
				-	flen = len;
			
 
				-	rbp = args->pag->pagb_tree.rb_node;
			
 
				-	while (rbp && flen >= args->minlen) {
			
 
				-		struct xfs_busy_extent *busyp =
			
 
				-			rb_entry(rbp, struct xfs_busy_extent, rb_node);
			
 
				-		xfs_agblock_t	fend = fbno + flen;
			
 
				-		xfs_agblock_t	bbno = busyp->bno;
			
 
				-		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				-
			
 
				-		if (fend <= bbno) {
			
 
				-			rbp = rbp->rb_left;
			
 
				-			continue;
			
 
				-		} else if (fbno >= bend) {
			
 
				-			rbp = rbp->rb_right;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * If this is a metadata allocation, try to reuse the busy
			
 
				-		 * extent instead of trimming the allocation.
			
 
				-		 */
			
 
				-		if (!args->userdata &&
			
 
				-		    !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
			
 
				-			if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
			
 
				-							  busyp, fbno, flen,
			
 
				-							  false))
			
 
				-				goto restart;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		if (bbno <= fbno) {
			
 
				-			/* start overlap */
			
 
				-
			
 
				-			/*
			
 
				-			 * Case 1:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *        +---------+
			
 
				-			 *        fbno   fend
			
 
				-			 *
			
 
				-			 * Case 2:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +-------------+
			
 
				-			 *    fbno       fend
			
 
				-			 *
			
 
				-			 * Case 3:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *        +-------------+
			
 
				-			 *        fbno       fend
			
 
				-			 *
			
 
				-			 * Case 4:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +-----------------+
			
 
				-			 *    fbno           fend
			
 
				-			 *
			
 
				-			 * No unbusy region in extent, return failure.
			
 
				-			 */
			
 
				-			if (fend <= bend)
			
 
				-				goto fail;
			
 
				-
			
 
				-			/*
			
 
				-			 * Case 5:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *        +----------------------+
			
 
				-			 *        fbno                fend
			
 
				-			 *
			
 
				-			 * Case 6:
			
 
				-			 *    bbno           bend
			
 
				-			 *    +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +--------------------------+
			
 
				-			 *    fbno                    fend
			
 
				-			 *
			
 
				-			 * Needs to be trimmed to:
			
 
				-			 *                       +-------+
			
 
				-			 *                       fbno fend
			
 
				-			 */
			
 
				-			fbno = bend;
			
 
				-		} else if (bend >= fend) {
			
 
				-			/* end overlap */
			
 
				-
			
 
				-			/*
			
 
				-			 * Case 7:
			
 
				-			 *             bbno           bend
			
 
				-			 *             +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +------------------+
			
 
				-			 *    fbno            fend
			
 
				-			 *
			
 
				-			 * Case 8:
			
 
				-			 *             bbno           bend
			
 
				-			 *             +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +--------------------------+
			
 
				-			 *    fbno                    fend
			
 
				-			 *
			
 
				-			 * Needs to be trimmed to:
			
 
				-			 *    +-------+
			
 
				-			 *    fbno fend
			
 
				-			 */
			
 
				-			fend = bbno;
			
 
				-		} else {
			
 
				-			/* middle overlap */
			
 
				-
			
 
				-			/*
			
 
				-			 * Case 9:
			
 
				-			 *             bbno           bend
			
 
				-			 *             +BBBBBBBBBBBBBBBBB+
			
 
				-			 *    +-----------------------------------+
			
 
				-			 *    fbno                             fend
			
 
				-			 *
			
 
				-			 * Can be trimmed to:
			
 
				-			 *    +-------+        OR         +-------+
			
 
				-			 *    fbno fend                   fbno fend
			
 
				-			 *
			
 
				-			 * Backward allocation leads to significant
			
 
				-			 * fragmentation of directories, which degrades
			
 
				-			 * directory performance, therefore we always want to
			
 
				-			 * choose the option that produces forward allocation
			
 
				-			 * patterns.
			
 
				-			 * Preferring the lower bno extent will make the next
			
 
				-			 * request use "fend" as the start of the next
			
 
				-			 * allocation;  if the segment is no longer busy at
			
 
				-			 * that point, we'll get a contiguous allocation, but
			
 
				-			 * even if it is still busy, we will get a forward
			
 
				-			 * allocation.
			
 
				-			 * We try to avoid choosing the segment at "bend",
			
 
				-			 * because that can lead to the next allocation
			
 
				-			 * taking the segment at "fbno", which would be a
			
 
				-			 * backward allocation.  We only use the segment at
			
 
				-			 * "fbno" if it is much larger than the current
			
 
				-			 * requested size, because in that case there's a
			
 
				-			 * good chance subsequent allocations will be
			
 
				-			 * contiguous.
			
 
				-			 */
			
 
				-			if (bbno - fbno >= args->maxlen) {
			
 
				-				/* left candidate fits perfect */
			
 
				-				fend = bbno;
			
 
				-			} else if (fend - bend >= args->maxlen * 4) {
			
 
				-				/* right candidate has enough free space */
			
 
				-				fbno = bend;
			
 
				-			} else if (bbno - fbno >= args->minlen) {
			
 
				-				/* left candidate fits minimum requirement */
			
 
				-				fend = bbno;
			
 
				-			} else {
			
 
				-				goto fail;
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		flen = fend - fbno;
			
 
				-	}
			
 
				-	spin_unlock(&args->pag->pagb_lock);
			
 
				-
			
 
				-	if (fbno != bno || flen != len) {
			
 
				-		trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
			
 
				-					  fbno, flen);
			
 
				-	}
			
 
				-	*rbno = fbno;
			
 
				-	*rlen = flen;
			
 
				-	return;
			
 
				-fail:
			
 
				-	/*
			
 
				-	 * Return a zero extent length as failure indications.  All callers
			
 
				-	 * re-check if the trimmed extent satisfies the minlen requirement.
			
 
				-	 */
			
 
				-	spin_unlock(&args->pag->pagb_lock);
			
 
				-	trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
			
 
				-	*rbno = fbno;
			
 
				-	*rlen = 0;
			
 
				-}
			
 
				-
			
 
				-static void
			
 
				-xfs_alloc_busy_clear_one(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct xfs_perag	*pag,
			
 
				-	struct xfs_busy_extent	*busyp)
			
 
				-{
			
 
				-	if (busyp->length) {
			
 
				-		trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
			
 
				-						busyp->length);
			
 
				-		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				-	}
			
 
				-
			
 
				-	list_del_init(&busyp->list);
			
 
				-	kmem_free(busyp);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Remove all extents on the passed in list from the busy extents tree.
			
 
				- * If do_discard is set skip extents that need to be discarded, and mark
			
 
				- * these as undergoing a discard operation instead.
			
 
				- */
			
 
				-void
			
 
				-xfs_alloc_busy_clear(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	struct list_head	*list,
			
 
				-	bool			do_discard)
			
 
				-{
			
 
				-	struct xfs_busy_extent	*busyp, *n;
			
 
				-	struct xfs_perag	*pag = NULL;
			
 
				-	xfs_agnumber_t		agno = NULLAGNUMBER;
			
 
				-
			
 
				-	list_for_each_entry_safe(busyp, n, list, list) {
			
 
				-		if (busyp->agno != agno) {
			
 
				-			if (pag) {
			
 
				-				spin_unlock(&pag->pagb_lock);
			
 
				-				xfs_perag_put(pag);
			
 
				-			}
			
 
				-			pag = xfs_perag_get(mp, busyp->agno);
			
 
				-			spin_lock(&pag->pagb_lock);
			
 
				-			agno = busyp->agno;
			
 
				-		}
			
 
				-
			
 
				-		if (do_discard && busyp->length &&
			
 
				-		    !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD))
			
 
				-			busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
			
 
				-		else
			
 
				-			xfs_alloc_busy_clear_one(mp, pag, busyp);
			
 
				-	}
			
 
				-
			
 
				-	if (pag) {
			
 
				-		spin_unlock(&pag->pagb_lock);
			
 
				-		xfs_perag_put(pag);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Callback for list_sort to sort busy extents by the AG they reside in.
			
 
				- */
			
 
				-int
			
 
				-xfs_busy_extent_ag_cmp(
			
 
				-	void			*priv,
			
 
				-	struct list_head	*a,
			
 
				-	struct list_head	*b)
			
 
				-{
			
 
				-	return container_of(a, struct xfs_busy_extent, list)->agno -
			
 
				-		container_of(b, struct xfs_busy_extent, list)->agno;
			
 
				-}
			
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -23,7 +23,6 @@ struct xfs_btree_cur;
 
				 struct xfs_mount;
			
 
				 struct xfs_perag;
			
 
				 struct xfs_trans;
			
 
				-struct xfs_busy_extent;
			
 
				 
			
 
				 extern struct workqueue_struct *xfs_alloc_wq;
			
 
				 
			
@@ -139,33 +138,6 @@ xfs_extlen_t
 
				 xfs_alloc_longest_free_extent(struct xfs_mount *mp,
			
 
				 		struct xfs_perag *pag);
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				-void
			
 
				-xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
			
 
				-	xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
			
 
				-
			
 
				-void
			
 
				-xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list,
			
 
				-	bool do_discard);
			
 
				-
			
 
				-int
			
 
				-xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				-	xfs_agblock_t bno, xfs_extlen_t len);
			
 
				-
			
 
				-void
			
 
				-xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				-	xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
			
 
				-
			
 
				-int
			
 
				-xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
			
 
				-
			
 
				-static inline void xfs_alloc_busy_sort(struct list_head *list)
			
 
				-{
			
 
				-	list_sort(NULL, list, xfs_busy_extent_ag_cmp);
			
 
				-}
			
 
				-
			
 
				-#endif	/* __KERNEL__ */
			
 
				-
			
 
				 /*
			
 
				  * Compute and fill in value of m_ag_maxlevels.
			
 
				  */
			
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -32,6 +30,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -94,7 +93,7 @@ xfs_allocbt_alloc_block(
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
			
 
				+	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
			
 
				 
			
 
				 	xfs_trans_agbtree_delta(cur->bc_tp, 1);
			
 
				 	new->s = cpu_to_be32(bno);
			
@@ -119,8 +118,8 @@ xfs_allocbt_free_block(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
			
 
				-			      XFS_ALLOC_BUSY_SKIP_DISCARD);
			
 
				+	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
			
 
				+			      XFS_EXTENT_BUSY_SKIP_DISCARD);
			
 
				 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
			
 
				 	return 0;
			
 
				 }
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -16,9 +16,7 @@
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				 #include "xfs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_trans.h"
			
@@ -29,7 +27,6 @@
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_iomap.h"
			
 
				 #include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
@@ -623,7 +620,7 @@ xfs_map_at_offset(
 
				  * or delayed allocate extent.
			
 
				  */
			
 
				 STATIC int
			
 
				-xfs_is_delayed_page(
			
 
				+xfs_check_page_type(
			
 
				 	struct page		*page,
			
 
				 	unsigned int		type)
			
 
				 {
			
@@ -637,11 +634,11 @@ xfs_is_delayed_page(
 
				 		bh = head = page_buffers(page);
			
 
				 		do {
			
 
				 			if (buffer_unwritten(bh))
			
 
				-				acceptable = (type == IO_UNWRITTEN);
			
 
				+				acceptable += (type == IO_UNWRITTEN);
			
 
				 			else if (buffer_delay(bh))
			
 
				-				acceptable = (type == IO_DELALLOC);
			
 
				+				acceptable += (type == IO_DELALLOC);
			
 
				 			else if (buffer_dirty(bh) && buffer_mapped(bh))
			
 
				-				acceptable = (type == IO_OVERWRITE);
			
 
				+				acceptable += (type == IO_OVERWRITE);
			
 
				 			else
			
 
				 				break;
			
 
				 		} while ((bh = bh->b_this_page) != head);
			
@@ -684,7 +681,7 @@ xfs_convert_page(
 
				 		goto fail_unlock_page;
			
 
				 	if (page->mapping != inode->i_mapping)
			
 
				 		goto fail_unlock_page;
			
 
				-	if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
			
 
				+	if (!xfs_check_page_type(page, (*ioendp)->io_type))
			
 
				 		goto fail_unlock_page;
			
 
				 
			
 
				 	/*
			
@@ -834,7 +831,7 @@ xfs_aops_discard_page(
 
				 	struct buffer_head	*bh, *head;
			
 
				 	loff_t			offset = page_offset(page);
			
 
				 
			
 
				-	if (!xfs_is_delayed_page(page, IO_DELALLOC))
			
 
				+	if (!xfs_check_page_type(page, IO_DELALLOC))
			
 
				 		goto out_invalidate;
			
 
				 
			
 
				 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
			
@@ -1146,7 +1143,14 @@ __xfs_get_blocks(
 
				 	if (!create && direct && offset >= i_size_read(inode))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (create) {
			
 
				+	/*
			
 
				+	 * Direct I/O is usually done on preallocated files, so try getting
			
 
				+	 * a block mapping without an exclusive lock first.  For buffered
			
 
				+	 * writes we already have the exclusive iolock anyway, so avoiding
			
 
				+	 * a lock roundtrip here by taking the ilock exclusive from the
			
 
				+	 * beginning is a useful micro optimization.
			
 
				+	 */
			
 
				+	if (create && !direct) {
			
 
				 		lockmode = XFS_ILOCK_EXCL;
			
 
				 		xfs_ilock(ip, lockmode);
			
 
				 	} else {
			
@@ -1168,23 +1172,45 @@ __xfs_get_blocks(
 
				 	    (!nimaps ||
			
 
				 	     (imap.br_startblock == HOLESTARTBLOCK ||
			
 
				 	      imap.br_startblock == DELAYSTARTBLOCK))) {
			
 
				-		if (direct) {
			
 
				+		if (direct || xfs_get_extsz_hint(ip)) {
			
 
				+			/*
			
 
				+			 * Drop the ilock in preparation for starting the block
			
 
				+			 * allocation transaction.  It will be retaken
			
 
				+			 * exclusively inside xfs_iomap_write_direct for the
			
 
				+			 * actual allocation.
			
 
				+			 */
			
 
				+			xfs_iunlock(ip, lockmode);
			
 
				 			error = xfs_iomap_write_direct(ip, offset, size,
			
 
				 						       &imap, nimaps);
			
 
				+			if (error)
			
 
				+				return -error;
			
 
				+			new = 1;
			
 
				 		} else {
			
 
				+			/*
			
 
				+			 * Delalloc reservations do not require a transaction,
			
 
				+			 * we can go on without dropping the lock here. If we
			
 
				+			 * are allocating a new delalloc block, make sure that
			
 
				+			 * we set the new flag so that we mark the buffer new so
			
 
				+			 * that we know that it is newly allocated if the write
			
 
				+			 * fails.
			
 
				+			 */
			
 
				+			if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
			
 
				+				new = 1;
			
 
				 			error = xfs_iomap_write_delay(ip, offset, size, &imap);
			
 
				+			if (error)
			
 
				+				goto out_unlock;
			
 
				+
			
 
				+			xfs_iunlock(ip, lockmode);
			
 
				 		}
			
 
				-		if (error)
			
 
				-			goto out_unlock;
			
 
				 
			
 
				 		trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
			
 
				 	} else if (nimaps) {
			
 
				 		trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
			
 
				+		xfs_iunlock(ip, lockmode);
			
 
				 	} else {
			
 
				 		trace_xfs_get_blocks_notfound(ip, offset, size);
			
 
				 		goto out_unlock;
			
 
				 	}
			
 
				-	xfs_iunlock(ip, lockmode);
			
 
				 
			
 
				 	if (imap.br_startblock != HOLESTARTBLOCK &&
			
 
				 	    imap.br_startblock != DELAYSTARTBLOCK) {
			
@@ -1386,52 +1412,91 @@ out_destroy_ioend:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Punch out the delalloc blocks we have already allocated.
			
 
				+ *
			
 
				+ * Don't bother with xfs_setattr given that nothing can have made it to disk yet
			
 
				+ * as the page is still locked at this point.
			
 
				+ */
			
 
				+STATIC void
			
 
				+xfs_vm_kill_delalloc_range(
			
 
				+	struct inode		*inode,
			
 
				+	loff_t			start,
			
 
				+	loff_t			end)
			
 
				+{
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				+	xfs_fileoff_t		start_fsb;
			
 
				+	xfs_fileoff_t		end_fsb;
			
 
				+	int			error;
			
 
				+
			
 
				+	start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
			
 
				+	end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
			
 
				+	if (end_fsb <= start_fsb)
			
 
				+		return;
			
 
				+
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				+	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
			
 
				+						end_fsb - start_fsb);
			
 
				+	if (error) {
			
 
				+		/* something screwed, just bail */
			
 
				+		if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				+			xfs_alert(ip->i_mount,
			
 
				+		"xfs_vm_write_failed: unable to clean up ino %lld",
			
 
				+					ip->i_ino);
			
 
				+		}
			
 
				+	}
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+}
			
 
				+
			
 
				 STATIC void
			
 
				 xfs_vm_write_failed(
			
 
				-	struct address_space	*mapping,
			
 
				-	loff_t			to)
			
 
				+	struct inode		*inode,
			
 
				+	struct page		*page,
			
 
				+	loff_t			pos,
			
 
				+	unsigned		len)
			
 
				 {
			
 
				-	struct inode		*inode = mapping->host;
			
 
				+	loff_t			block_offset = pos & PAGE_MASK;
			
 
				+	loff_t			block_start;
			
 
				+	loff_t			block_end;
			
 
				+	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
			
 
				+	loff_t			to = from + len;
			
 
				+	struct buffer_head	*bh, *head;
			
 
				 
			
 
				-	if (to > inode->i_size) {
			
 
				-		/*
			
 
				-		 * Punch out the delalloc blocks we have already allocated.
			
 
				-		 *
			
 
				-		 * Don't bother with xfs_setattr given that nothing can have
			
 
				-		 * made it to disk yet as the page is still locked at this
			
 
				-		 * point.
			
 
				-		 */
			
 
				-		struct xfs_inode	*ip = XFS_I(inode);
			
 
				-		xfs_fileoff_t		start_fsb;
			
 
				-		xfs_fileoff_t		end_fsb;
			
 
				-		int			error;
			
 
				+	ASSERT(block_offset + from == pos);
			
 
				 
			
 
				-		truncate_pagecache(inode, to, inode->i_size);
			
 
				+	head = page_buffers(page);
			
 
				+	block_start = 0;
			
 
				+	for (bh = head; bh != head || !block_start;
			
 
				+	     bh = bh->b_this_page, block_start = block_end,
			
 
				+				   block_offset += bh->b_size) {
			
 
				+		block_end = block_start + bh->b_size;
			
 
				 
			
 
				-		/*
			
 
				-		 * Check if there are any blocks that are outside of i_size
			
 
				-		 * that need to be trimmed back.
			
 
				-		 */
			
 
				-		start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
			
 
				-		end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
			
 
				-		if (end_fsb <= start_fsb)
			
 
				-			return;
			
 
				-
			
 
				-		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
			
 
				-							end_fsb - start_fsb);
			
 
				-		if (error) {
			
 
				-			/* something screwed, just bail */
			
 
				-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				-				xfs_alert(ip->i_mount,
			
 
				-			"xfs_vm_write_failed: unable to clean up ino %lld",
			
 
				-						ip->i_ino);
			
 
				-			}
			
 
				-		}
			
 
				-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		/* skip buffers before the write */
			
 
				+		if (block_end <= from)
			
 
				+			continue;
			
 
				+
			
 
				+		/* if the buffer is after the write, we're done */
			
 
				+		if (block_start >= to)
			
 
				+			break;
			
 
				+
			
 
				+		if (!buffer_delay(bh))
			
 
				+			continue;
			
 
				+
			
 
				+		if (!buffer_new(bh) && block_offset < i_size_read(inode))
			
 
				+			continue;
			
 
				+
			
 
				+		xfs_vm_kill_delalloc_range(inode, block_offset,
			
 
				+					   block_offset + bh->b_size);
			
 
				 	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This used to call block_write_begin(), but it unlocks and releases the page
			
 
				+ * on error, and we need that page to be able to punch stale delalloc blocks out
			
 
				+ * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
			
 
				+ * the appropriate point.
			
 
				+ */
			
 
				 STATIC int
			
 
				 xfs_vm_write_begin(
			
 
				 	struct file		*file,
			
@@ -1442,15 +1507,40 @@ xfs_vm_write_begin(
 
				 	struct page		**pagep,
			
 
				 	void			**fsdata)
			
 
				 {
			
 
				-	int			ret;
			
 
				+	pgoff_t			index = pos >> PAGE_CACHE_SHIFT;
			
 
				+	struct page		*page;
			
 
				+	int			status;
			
 
				 
			
 
				-	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
			
 
				-				pagep, xfs_get_blocks);
			
 
				-	if (unlikely(ret))
			
 
				-		xfs_vm_write_failed(mapping, pos + len);
			
 
				-	return ret;
			
 
				+	ASSERT(len <= PAGE_CACHE_SIZE);
			
 
				+
			
 
				+	page = grab_cache_page_write_begin(mapping, index,
			
 
				+					   flags | AOP_FLAG_NOFS);
			
 
				+	if (!page)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	status = __block_write_begin(page, pos, len, xfs_get_blocks);
			
 
				+	if (unlikely(status)) {
			
 
				+		struct inode	*inode = mapping->host;
			
 
				+
			
 
				+		xfs_vm_write_failed(inode, page, pos, len);
			
 
				+		unlock_page(page);
			
 
				+
			
 
				+		if (pos + len > i_size_read(inode))
			
 
				+			truncate_pagecache(inode, pos + len, i_size_read(inode));
			
 
				+
			
 
				+		page_cache_release(page);
			
 
				+		page = NULL;
			
 
				+	}
			
 
				+
			
 
				+	*pagep = page;
			
 
				+	return status;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * On failure, we only need to kill delalloc blocks beyond EOF because they
			
 
				+ * will never be written. For blocks within EOF, generic_write_end() zeros them
			
 
				+ * so they are safe to leave alone and be written with all the other valid data.
			
 
				+ */
			
 
				 STATIC int
			
 
				 xfs_vm_write_end(
			
 
				 	struct file		*file,
			
@@ -1463,9 +1553,19 @@ xfs_vm_write_end(
 
				 {
			
 
				 	int			ret;
			
 
				 
			
 
				+	ASSERT(len <= PAGE_CACHE_SIZE);
			
 
				+
			
 
				 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
			
 
				-	if (unlikely(ret < len))
			
 
				-		xfs_vm_write_failed(mapping, pos + len);
			
 
				+	if (unlikely(ret < len)) {
			
 
				+		struct inode	*inode = mapping->host;
			
 
				+		size_t		isize = i_size_read(inode);
			
 
				+		loff_t		to = pos + len;
			
 
				+
			
 
				+		if (to > isize) {
			
 
				+			truncate_pagecache(inode, to, isize);
			
 
				+			xfs_vm_kill_delalloc_range(inode, isize, to);
			
 
				+		}
			
 
				+	}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -21,7 +21,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -39,7 +38,6 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -1987,14 +1985,12 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
 
				 			       (map[i].br_startblock != HOLESTARTBLOCK));
			
 
				 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
			
 
				 			blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				-			error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
			
 
				-					     blkcnt, XBF_LOCK | XBF_DONT_BLOCK,
			
 
				-					     &bp);
			
 
				+			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
			
 
				+						   dblkno, blkcnt, 0, &bp);
			
 
				 			if (error)
			
 
				 				return(error);
			
 
				 
			
 
				-			tmp = (valuelen < XFS_BUF_SIZE(bp))
			
 
				-				? valuelen : XFS_BUF_SIZE(bp);
			
 
				+			tmp = min_t(int, valuelen, BBTOB(bp->b_length));
			
 
				 			xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
			
 
				 			xfs_buf_relse(bp);
			
 
				 			dst += tmp;
			
@@ -2097,6 +2093,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
 
				 	lblkno = args->rmtblkno;
			
 
				 	valuelen = args->valuelen;
			
 
				 	while (valuelen > 0) {
			
 
				+		int buflen;
			
 
				+
			
 
				 		/*
			
 
				 		 * Try to remember where we decided to put the value.
			
 
				 		 */
			
@@ -2114,15 +2112,16 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
 
				 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				 		blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
			
 
				 
			
 
				-		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
			
 
				-				 XBF_LOCK | XBF_DONT_BLOCK);
			
 
				+		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
			
 
				 		if (!bp)
			
 
				 			return ENOMEM;
			
 
				-		tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
			
 
				-							XFS_BUF_SIZE(bp);
			
 
				+
			
 
				+		buflen = BBTOB(bp->b_length);
			
 
				+		tmp = min_t(int, valuelen, buflen);
			
 
				 		xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
			
 
				-		if (tmp < XFS_BUF_SIZE(bp))
			
 
				-			xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
			
 
				+		if (tmp < buflen)
			
 
				+			xfs_buf_zero(bp, tmp, buflen - tmp);
			
 
				+
			
 
				 		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
			
 
				 		xfs_buf_relse(bp);
			
 
				 		if (error)
			
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -2983,7 +2982,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
 
				 						map.br_blockcount);
			
 
				 			bp = xfs_trans_get_buf(*trans,
			
 
				 					dp->i_mount->m_ddev_targp,
			
 
				-					dblkno, dblkcnt, XBF_LOCK);
			
 
				+					dblkno, dblkcnt, 0);
			
 
				 			if (!bp)
			
 
				 				return ENOMEM;
			
 
				 			xfs_trans_binval(*trans, bp);
			
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -41,7 +41,6 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_attr_leaf.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_buf_item.h"
			
@@ -4527,7 +4526,7 @@ out_unreserve_blocks:
 
				 		xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
			
 
				 out_unreserve_quota:
			
 
				 	if (XFS_IS_QUOTA_ON(mp))
			
 
				-		xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ?
			
 
				+		xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
			
 
				 				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
			
 
				 	return error;
			
 
				 }
			
@@ -5621,8 +5620,20 @@ xfs_getbmap(
 
				 				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				 			out[cur_ext].bmv_unused1 = 0;
			
 
				 			out[cur_ext].bmv_unused2 = 0;
			
 
				-			ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
			
 
				-			      (map[i].br_startblock != DELAYSTARTBLOCK));
			
 
				+
			
 
				+			/*
			
 
				+			 * delayed allocation extents that start beyond EOF can
			
 
				+			 * occur due to speculative EOF allocation when the
			
 
				+			 * delalloc extent is larger than the largest freespace
			
 
				+			 * extent at conversion time. These extents cannot be
			
 
				+			 * converted by data writeback, so can exist here even
			
 
				+			 * if we are not supposed to be finding delalloc
			
 
				+			 * extents.
			
 
				+			 */
			
 
				+			if (map[i].br_startblock == DELAYSTARTBLOCK &&
			
 
				+			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
			
 
				+				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
			
 
				+
			
 
				                         if (map[i].br_startblock == HOLESTARTBLOCK &&
			
 
				 			    whichfork == XFS_ATTR_FORK) {
			
 
				 				/* came to the end of attribute fork */
			
@@ -6157,3 +6168,16 @@ next_block:
 
				 
			
 
				 	return error;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Convert the given file system block to a disk block.  We have to treat it
			
 
				+ * differently based on whether the file is a real time file or not, because the
			
 
				+ * bmap code does.
			
 
				+ */
			
 
				+xfs_daddr_t
			
 
				+xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
			
 
				+{
			
 
				+	return (XFS_IS_REALTIME_INODE(ip) ? \
			
 
				+		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
			
 
				+		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
			
 
				+}
			
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -211,6 +211,9 @@ int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
 
				 		int whichfork, int *count);
			
 
				 int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
			
 
				 		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
			
 
				+
			
 
				+xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 #endif	/* __XFS_BMAP_H__ */
			
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -35,14 +35,12 @@
 
				 #include <linux/freezer.h>
			
 
				 
			
 
				 #include "xfs_sb.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				 static kmem_zone_t *xfs_buf_zone;
			
 
				-STATIC int xfsbufd(void *);
			
 
				 
			
 
				 static struct workqueue_struct *xfslogd_workqueue;
			
 
				 
			
@@ -57,11 +55,7 @@ static struct workqueue_struct *xfslogd_workqueue;
 
				 #endif
			
 
				 
			
 
				 #define xb_to_gfp(flags) \
			
 
				-	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
			
 
				-	  ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
			
 
				-
			
 
				-#define xb_to_km(flags) \
			
 
				-	 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
			
 
				+	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
			
 
				 
			
 
				 
			
 
				 static inline int
			
@@ -71,11 +65,11 @@ xfs_buf_is_vmapped(
 
				 	/*
			
 
				 	 * Return true if the buffer is vmapped.
			
 
				 	 *
			
 
				-	 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
			
 
				-	 * code is clever enough to know it doesn't have to map a single page,
			
 
				-	 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
			
 
				+	 * b_addr is null if the buffer is not mapped, but the code is clever
			
 
				+	 * enough to know it doesn't have to map a single page, so the check has
			
 
				+	 * to be both for b_addr and bp->b_page_count > 1.
			
 
				 	 */
			
 
				-	return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
			
 
				+	return bp->b_addr && bp->b_page_count > 1;
			
 
				 }
			
 
				 
			
 
				 static inline int
			
@@ -144,8 +138,17 @@ void
 
				 xfs_buf_stale(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				+	ASSERT(xfs_buf_islocked(bp));
			
 
				+
			
 
				 	bp->b_flags |= XBF_STALE;
			
 
				-	xfs_buf_delwri_dequeue(bp);
			
 
				+
			
 
				+	/*
			
 
				+	 * Clear the delwri status so that a delwri queue walker will not
			
 
				+	 * flush this buffer to disk now that it is stale. The delwri queue has
			
 
				+	 * a reference to the buffer, so this is safe to do.
			
 
				+	 */
			
 
				+	bp->b_flags &= ~_XBF_DELWRI_Q;
			
 
				+
			
 
				 	atomic_set(&(bp)->b_lru_ref, 0);
			
 
				 	if (!list_empty(&bp->b_lru)) {
			
 
				 		struct xfs_buftarg *btp = bp->b_target;
			
@@ -164,22 +167,22 @@ xfs_buf_stale(
 
				 struct xfs_buf *
			
 
				 xfs_buf_alloc(
			
 
				 	struct xfs_buftarg	*target,
			
 
				-	xfs_off_t		range_base,
			
 
				-	size_t			range_length,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	size_t			numblks,
			
 
				 	xfs_buf_flags_t		flags)
			
 
				 {
			
 
				 	struct xfs_buf		*bp;
			
 
				 
			
 
				-	bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags));
			
 
				+	bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
			
 
				 	if (unlikely(!bp))
			
 
				 		return NULL;
			
 
				 
			
 
				 	/*
			
 
				-	 * We don't want certain flags to appear in b_flags.
			
 
				+	 * We don't want certain flags to appear in b_flags unless they are
			
 
				+	 * specifically set by later operations on the buffer.
			
 
				 	 */
			
 
				-	flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
			
 
				+	flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 
			
 
				-	memset(bp, 0, sizeof(xfs_buf_t));
			
 
				 	atomic_set(&bp->b_hold, 1);
			
 
				 	atomic_set(&bp->b_lru_ref, 1);
			
 
				 	init_completion(&bp->b_iowait);
			
@@ -189,14 +192,22 @@ xfs_buf_alloc(
 
				 	sema_init(&bp->b_sema, 0); /* held, no waiters */
			
 
				 	XB_SET_OWNER(bp);
			
 
				 	bp->b_target = target;
			
 
				-	bp->b_file_offset = range_base;
			
 
				+
			
 
				 	/*
			
 
				-	 * Set buffer_length and count_desired to the same value initially.
			
 
				-	 * I/O routines should use count_desired, which will be the same in
			
 
				+	 * Set length and io_length to the same value initially.
			
 
				+	 * I/O routines should use io_length, which will be the same in
			
 
				 	 * most cases but may be reset (e.g. XFS recovery).
			
 
				 	 */
			
 
				-	bp->b_buffer_length = bp->b_count_desired = range_length;
			
 
				+	bp->b_length = numblks;
			
 
				+	bp->b_io_length = numblks;
			
 
				 	bp->b_flags = flags;
			
 
				+
			
 
				+	/*
			
 
				+	 * We do not set the block number here in the buffer because we have not
			
 
				+	 * finished initialising the buffer. We insert the buffer into the cache
			
 
				+	 * in this state, so this ensures that we are unable to do IO on a
			
 
				+	 * buffer that hasn't been fully initialised.
			
 
				+	 */
			
 
				 	bp->b_bn = XFS_BUF_DADDR_NULL;
			
 
				 	atomic_set(&bp->b_pin_count, 0);
			
 
				 	init_waitqueue_head(&bp->b_waiters);
			
@@ -219,13 +230,12 @@ _xfs_buf_get_pages(
 
				 {
			
 
				 	/* Make sure that we have a page list */
			
 
				 	if (bp->b_pages == NULL) {
			
 
				-		bp->b_offset = xfs_buf_poff(bp->b_file_offset);
			
 
				 		bp->b_page_count = page_count;
			
 
				 		if (page_count <= XB_PAGES) {
			
 
				 			bp->b_pages = bp->b_page_array;
			
 
				 		} else {
			
 
				 			bp->b_pages = kmem_alloc(sizeof(struct page *) *
			
 
				-					page_count, xb_to_km(flags));
			
 
				+						 page_count, KM_NOFS);
			
 
				 			if (bp->b_pages == NULL)
			
 
				 				return -ENOMEM;
			
 
				 		}
			
@@ -288,11 +298,11 @@ xfs_buf_allocate_memory(
 
				 	xfs_buf_t		*bp,
			
 
				 	uint			flags)
			
 
				 {
			
 
				-	size_t			size = bp->b_count_desired;
			
 
				+	size_t			size;
			
 
				 	size_t			nbytes, offset;
			
 
				 	gfp_t			gfp_mask = xb_to_gfp(flags);
			
 
				 	unsigned short		page_count, i;
			
 
				-	xfs_off_t		end;
			
 
				+	xfs_off_t		start, end;
			
 
				 	int			error;
			
 
				 
			
 
				 	/*
			
@@ -300,15 +310,15 @@ xfs_buf_allocate_memory(
 
				 	 * the memory from the heap - there's no need for the complexity of
			
 
				 	 * page arrays to keep allocation down to order 0.
			
 
				 	 */
			
 
				-	if (bp->b_buffer_length < PAGE_SIZE) {
			
 
				-		bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
			
 
				+	size = BBTOB(bp->b_length);
			
 
				+	if (size < PAGE_SIZE) {
			
 
				+		bp->b_addr = kmem_alloc(size, KM_NOFS);
			
 
				 		if (!bp->b_addr) {
			
 
				 			/* low memory - use alloc_page loop instead */
			
 
				 			goto use_alloc_page;
			
 
				 		}
			
 
				 
			
 
				-		if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
			
 
				-								PAGE_MASK) !=
			
 
				+		if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
			
 
				 		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
			
 
				 			/* b_addr spans two pages - use alloc_page instead */
			
 
				 			kmem_free(bp->b_addr);
			
@@ -319,13 +329,14 @@ xfs_buf_allocate_memory(
 
				 		bp->b_pages = bp->b_page_array;
			
 
				 		bp->b_pages[0] = virt_to_page(bp->b_addr);
			
 
				 		bp->b_page_count = 1;
			
 
				-		bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
			
 
				+		bp->b_flags |= _XBF_KMEM;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				 use_alloc_page:
			
 
				-	end = bp->b_file_offset + bp->b_buffer_length;
			
 
				-	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
			
 
				+	start = BBTOB(bp->b_bn) >> PAGE_SHIFT;
			
 
				+	end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT;
			
 
				+	page_count = end - start;
			
 
				 	error = _xfs_buf_get_pages(bp, page_count, flags);
			
 
				 	if (unlikely(error))
			
 
				 		return error;
			
@@ -388,8 +399,9 @@ _xfs_buf_map_pages(
 
				 	if (bp->b_page_count == 1) {
			
 
				 		/* A single page buffer is always mappable */
			
 
				 		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
			
 
				-		bp->b_flags |= XBF_MAPPED;
			
 
				-	} else if (flags & XBF_MAPPED) {
			
 
				+	} else if (flags & XBF_UNMAPPED) {
			
 
				+		bp->b_addr = NULL;
			
 
				+	} else {
			
 
				 		int retried = 0;
			
 
				 
			
 
				 		do {
			
@@ -403,7 +415,6 @@ _xfs_buf_map_pages(
 
				 		if (!bp->b_addr)
			
 
				 			return -ENOMEM;
			
 
				 		bp->b_addr += bp->b_offset;
			
 
				-		bp->b_flags |= XBF_MAPPED;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -420,29 +431,27 @@ _xfs_buf_map_pages(
 
				  */
			
 
				 xfs_buf_t *
			
 
				 _xfs_buf_find(
			
 
				-	xfs_buftarg_t		*btp,	/* block device target		*/
			
 
				-	xfs_off_t		ioff,	/* starting offset of range	*/
			
 
				-	size_t			isize,	/* length of range		*/
			
 
				+	struct xfs_buftarg	*btp,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	size_t			numblks,
			
 
				 	xfs_buf_flags_t		flags,
			
 
				 	xfs_buf_t		*new_bp)
			
 
				 {
			
 
				-	xfs_off_t		range_base;
			
 
				-	size_t			range_length;
			
 
				+	size_t			numbytes;
			
 
				 	struct xfs_perag	*pag;
			
 
				 	struct rb_node		**rbp;
			
 
				 	struct rb_node		*parent;
			
 
				 	xfs_buf_t		*bp;
			
 
				 
			
 
				-	range_base = (ioff << BBSHIFT);
			
 
				-	range_length = (isize << BBSHIFT);
			
 
				+	numbytes = BBTOB(numblks);
			
 
				 
			
 
				 	/* Check for IOs smaller than the sector size / not sector aligned */
			
 
				-	ASSERT(!(range_length < (1 << btp->bt_sshift)));
			
 
				-	ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
			
 
				+	ASSERT(!(numbytes < (1 << btp->bt_sshift)));
			
 
				+	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
			
 
				 
			
 
				 	/* get tree root */
			
 
				 	pag = xfs_perag_get(btp->bt_mount,
			
 
				-				xfs_daddr_to_agno(btp->bt_mount, ioff));
			
 
				+				xfs_daddr_to_agno(btp->bt_mount, blkno));
			
 
				 
			
 
				 	/* walk tree */
			
 
				 	spin_lock(&pag->pag_buf_lock);
			
@@ -453,20 +462,20 @@ _xfs_buf_find(
 
				 		parent = *rbp;
			
 
				 		bp = rb_entry(parent, struct xfs_buf, b_rbnode);
			
 
				 
			
 
				-		if (range_base < bp->b_file_offset)
			
 
				+		if (blkno < bp->b_bn)
			
 
				 			rbp = &(*rbp)->rb_left;
			
 
				-		else if (range_base > bp->b_file_offset)
			
 
				+		else if (blkno > bp->b_bn)
			
 
				 			rbp = &(*rbp)->rb_right;
			
 
				 		else {
			
 
				 			/*
			
 
				-			 * found a block offset match. If the range doesn't
			
 
				+			 * found a block number match. If the range doesn't
			
 
				 			 * match, the only way this is allowed is if the buffer
			
 
				 			 * in the cache is stale and the transaction that made
			
 
				 			 * it stale has not yet committed. i.e. we are
			
 
				 			 * reallocating a busy extent. Skip this buffer and
			
 
				 			 * continue searching to the right for an exact match.
			
 
				 			 */
			
 
				-			if (bp->b_buffer_length != range_length) {
			
 
				+			if (bp->b_length != numblks) {
			
 
				 				ASSERT(bp->b_flags & XBF_STALE);
			
 
				 				rbp = &(*rbp)->rb_right;
			
 
				 				continue;
			
@@ -511,7 +520,7 @@ found:
 
				 	 */
			
 
				 	if (bp->b_flags & XBF_STALE) {
			
 
				 		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
			
 
				-		bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
			
 
				+		bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
			
 
				 	}
			
 
				 
			
 
				 	trace_xfs_buf_find(bp, flags, _RET_IP_);
			
@@ -526,63 +535,59 @@ found:
 
				  */
			
 
				 struct xfs_buf *
			
 
				 xfs_buf_get(
			
 
				-	xfs_buftarg_t		*target,/* target for buffer		*/
			
 
				-	xfs_off_t		ioff,	/* starting offset of range	*/
			
 
				-	size_t			isize,	/* length of range		*/
			
 
				+	xfs_buftarg_t		*target,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	size_t			numblks,
			
 
				 	xfs_buf_flags_t		flags)
			
 
				 {
			
 
				 	struct xfs_buf		*bp;
			
 
				 	struct xfs_buf		*new_bp;
			
 
				 	int			error = 0;
			
 
				 
			
 
				-	bp = _xfs_buf_find(target, ioff, isize, flags, NULL);
			
 
				+	bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
			
 
				 	if (likely(bp))
			
 
				 		goto found;
			
 
				 
			
 
				-	new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT,
			
 
				-			       flags);
			
 
				+	new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
			
 
				 	if (unlikely(!new_bp))
			
 
				 		return NULL;
			
 
				 
			
 
				-	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
			
 
				-	if (!bp) {
			
 
				+	error = xfs_buf_allocate_memory(new_bp, flags);
			
 
				+	if (error) {
			
 
				 		kmem_zone_free(xfs_buf_zone, new_bp);
			
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
 
				-	if (bp == new_bp) {
			
 
				-		error = xfs_buf_allocate_memory(bp, flags);
			
 
				-		if (error)
			
 
				-			goto no_buffer;
			
 
				-	} else
			
 
				-		kmem_zone_free(xfs_buf_zone, new_bp);
			
 
				+	bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
			
 
				+	if (!bp) {
			
 
				+		xfs_buf_free(new_bp);
			
 
				+		return NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (bp != new_bp)
			
 
				+		xfs_buf_free(new_bp);
			
 
				 
			
 
				 	/*
			
 
				 	 * Now we have a workable buffer, fill in the block number so
			
 
				 	 * that we can do IO on it.
			
 
				 	 */
			
 
				-	bp->b_bn = ioff;
			
 
				-	bp->b_count_desired = bp->b_buffer_length;
			
 
				+	bp->b_bn = blkno;
			
 
				+	bp->b_io_length = bp->b_length;
			
 
				 
			
 
				 found:
			
 
				-	if (!(bp->b_flags & XBF_MAPPED)) {
			
 
				+	if (!bp->b_addr) {
			
 
				 		error = _xfs_buf_map_pages(bp, flags);
			
 
				 		if (unlikely(error)) {
			
 
				 			xfs_warn(target->bt_mount,
			
 
				 				"%s: failed to map pages\n", __func__);
			
 
				-			goto no_buffer;
			
 
				+			xfs_buf_relse(bp);
			
 
				+			return NULL;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	XFS_STATS_INC(xb_get);
			
 
				 	trace_xfs_buf_get(bp, flags, _RET_IP_);
			
 
				 	return bp;
			
 
				-
			
 
				-no_buffer:
			
 
				-	if (flags & (XBF_LOCK | XBF_TRYLOCK))
			
 
				-		xfs_buf_unlock(bp);
			
 
				-	xfs_buf_rele(bp);
			
 
				-	return NULL;
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -590,32 +595,30 @@ _xfs_buf_read(
 
				 	xfs_buf_t		*bp,
			
 
				 	xfs_buf_flags_t		flags)
			
 
				 {
			
 
				-	int			status;
			
 
				-
			
 
				-	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
			
 
				+	ASSERT(!(flags & XBF_WRITE));
			
 
				 	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
			
 
				 
			
 
				-	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
			
 
				+	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
			
 
				 
			
 
				-	status = xfs_buf_iorequest(bp);
			
 
				-	if (status || bp->b_error || (flags & XBF_ASYNC))
			
 
				-		return status;
			
 
				+	xfs_buf_iorequest(bp);
			
 
				+	if (flags & XBF_ASYNC)
			
 
				+		return 0;
			
 
				 	return xfs_buf_iowait(bp);
			
 
				 }
			
 
				 
			
 
				 xfs_buf_t *
			
 
				 xfs_buf_read(
			
 
				 	xfs_buftarg_t		*target,
			
 
				-	xfs_off_t		ioff,
			
 
				-	size_t			isize,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	size_t			numblks,
			
 
				 	xfs_buf_flags_t		flags)
			
 
				 {
			
 
				 	xfs_buf_t		*bp;
			
 
				 
			
 
				 	flags |= XBF_READ;
			
 
				 
			
 
				-	bp = xfs_buf_get(target, ioff, isize, flags);
			
 
				+	bp = xfs_buf_get(target, blkno, numblks, flags);
			
 
				 	if (bp) {
			
 
				 		trace_xfs_buf_read(bp, flags, _RET_IP_);
			
 
				 
			
@@ -627,7 +630,8 @@ xfs_buf_read(
 
				 			 * Read ahead call which is already satisfied,
			
 
				 			 * drop the buffer
			
 
				 			 */
			
 
				-			goto no_buffer;
			
 
				+			xfs_buf_relse(bp);
			
 
				+			return NULL;
			
 
				 		} else {
			
 
				 			/* We do not want read in the flags */
			
 
				 			bp->b_flags &= ~XBF_READ;
			
@@ -635,12 +639,6 @@ xfs_buf_read(
 
				 	}
			
 
				 
			
 
				 	return bp;
			
 
				-
			
 
				- no_buffer:
			
 
				-	if (flags & (XBF_LOCK | XBF_TRYLOCK))
			
 
				-		xfs_buf_unlock(bp);
			
 
				-	xfs_buf_rele(bp);
			
 
				-	return NULL;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -650,14 +648,14 @@ xfs_buf_read(
 
				 void
			
 
				 xfs_buf_readahead(
			
 
				 	xfs_buftarg_t		*target,
			
 
				-	xfs_off_t		ioff,
			
 
				-	size_t			isize)
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	size_t			numblks)
			
 
				 {
			
 
				 	if (bdi_read_congested(target->bt_bdi))
			
 
				 		return;
			
 
				 
			
 
				-	xfs_buf_read(target, ioff, isize,
			
 
				-		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
			
 
				+	xfs_buf_read(target, blkno, numblks,
			
 
				+		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -666,16 +664,15 @@ xfs_buf_readahead(
 
				  */
			
 
				 struct xfs_buf *
			
 
				 xfs_buf_read_uncached(
			
 
				-	struct xfs_mount	*mp,
			
 
				 	struct xfs_buftarg	*target,
			
 
				 	xfs_daddr_t		daddr,
			
 
				-	size_t			length,
			
 
				+	size_t			numblks,
			
 
				 	int			flags)
			
 
				 {
			
 
				 	xfs_buf_t		*bp;
			
 
				 	int			error;
			
 
				 
			
 
				-	bp = xfs_buf_get_uncached(target, length, flags);
			
 
				+	bp = xfs_buf_get_uncached(target, numblks, flags);
			
 
				 	if (!bp)
			
 
				 		return NULL;
			
 
				 
			
@@ -683,9 +680,9 @@ xfs_buf_read_uncached(
 
				 	XFS_BUF_SET_ADDR(bp, daddr);
			
 
				 	XFS_BUF_READ(bp);
			
 
				 
			
 
				-	xfsbdstrat(mp, bp);
			
 
				+	xfsbdstrat(target->bt_mount, bp);
			
 
				 	error = xfs_buf_iowait(bp);
			
 
				-	if (error || bp->b_error) {
			
 
				+	if (error) {
			
 
				 		xfs_buf_relse(bp);
			
 
				 		return NULL;
			
 
				 	}
			
@@ -699,7 +696,7 @@ xfs_buf_read_uncached(
 
				 void
			
 
				 xfs_buf_set_empty(
			
 
				 	struct xfs_buf		*bp,
			
 
				-	size_t			len)
			
 
				+	size_t			numblks)
			
 
				 {
			
 
				 	if (bp->b_pages)
			
 
				 		_xfs_buf_free_pages(bp);
			
@@ -707,10 +704,9 @@ xfs_buf_set_empty(
 
				 	bp->b_pages = NULL;
			
 
				 	bp->b_page_count = 0;
			
 
				 	bp->b_addr = NULL;
			
 
				-	bp->b_file_offset = 0;
			
 
				-	bp->b_buffer_length = bp->b_count_desired = len;
			
 
				+	bp->b_length = numblks;
			
 
				+	bp->b_io_length = numblks;
			
 
				 	bp->b_bn = XFS_BUF_DADDR_NULL;
			
 
				-	bp->b_flags &= ~XBF_MAPPED;
			
 
				 }
			
 
				 
			
 
				 static inline struct page *
			
@@ -749,7 +745,7 @@ xfs_buf_associate_memory(
 
				 	bp->b_pages = NULL;
			
 
				 	bp->b_addr = mem;
			
 
				 
			
 
				-	rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
			
 
				+	rval = _xfs_buf_get_pages(bp, page_count, 0);
			
 
				 	if (rval)
			
 
				 		return rval;
			
 
				 
			
@@ -760,9 +756,8 @@ xfs_buf_associate_memory(
 
				 		pageaddr += PAGE_SIZE;
			
 
				 	}
			
 
				 
			
 
				-	bp->b_count_desired = len;
			
 
				-	bp->b_buffer_length = buflen;
			
 
				-	bp->b_flags |= XBF_MAPPED;
			
 
				+	bp->b_io_length = BTOBB(len);
			
 
				+	bp->b_length = BTOBB(buflen);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -770,17 +765,18 @@ xfs_buf_associate_memory(
 
				 xfs_buf_t *
			
 
				 xfs_buf_get_uncached(
			
 
				 	struct xfs_buftarg	*target,
			
 
				-	size_t			len,
			
 
				+	size_t			numblks,
			
 
				 	int			flags)
			
 
				 {
			
 
				-	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
			
 
				+	unsigned long		page_count;
			
 
				 	int			error, i;
			
 
				 	xfs_buf_t		*bp;
			
 
				 
			
 
				-	bp = xfs_buf_alloc(target, 0, len, 0);
			
 
				+	bp = xfs_buf_alloc(target, 0, numblks, 0);
			
 
				 	if (unlikely(bp == NULL))
			
 
				 		goto fail;
			
 
				 
			
 
				+	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
			
 
				 	error = _xfs_buf_get_pages(bp, page_count, 0);
			
 
				 	if (error)
			
 
				 		goto fail_free_buf;
			
@@ -792,7 +788,7 @@ xfs_buf_get_uncached(
 
				 	}
			
 
				 	bp->b_flags |= _XBF_PAGES;
			
 
				 
			
 
				-	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
			
 
				+	error = _xfs_buf_map_pages(bp, 0);
			
 
				 	if (unlikely(error)) {
			
 
				 		xfs_warn(target->bt_mount,
			
 
				 			"%s: failed to map pages\n", __func__);
			
@@ -855,7 +851,7 @@ xfs_buf_rele(
 
				 			spin_unlock(&pag->pag_buf_lock);
			
 
				 		} else {
			
 
				 			xfs_buf_lru_del(bp);
			
 
				-			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
			
 
				+			ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
			
 
				 			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
			
 
				 			spin_unlock(&pag->pag_buf_lock);
			
 
				 			xfs_perag_put(pag);
			
@@ -915,13 +911,6 @@ xfs_buf_lock(
 
				 	trace_xfs_buf_lock_done(bp, _RET_IP_);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- *	Releases the lock on the buffer object.
			
 
				- *	If the buffer is marked delwri but is not queued, do so before we
			
 
				- *	unlock the buffer as we need to set flags correctly.  We also need to
			
 
				- *	take a reference for the delwri queue because the unlocker is going to
			
 
				- *	drop their's and they don't know we just queued it.
			
 
				- */
			
 
				 void
			
 
				 xfs_buf_unlock(
			
 
				 	struct xfs_buf		*bp)
			
@@ -1008,9 +997,8 @@ xfs_buf_ioerror_alert(
 
				 	const char		*func)
			
 
				 {
			
 
				 	xfs_alert(bp->b_target->bt_mount,
			
 
				-"metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd",
			
 
				-		(__uint64_t)XFS_BUF_ADDR(bp), func,
			
 
				-		bp->b_error, XFS_BUF_COUNT(bp));
			
 
				+"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
			
 
				+		(__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
			
 
				 }
			
 
				 
			
 
				 int
			
@@ -1019,10 +1007,11 @@ xfs_bwrite(
 
				 {
			
 
				 	int			error;
			
 
				 
			
 
				+	ASSERT(xfs_buf_islocked(bp));
			
 
				+
			
 
				 	bp->b_flags |= XBF_WRITE;
			
 
				-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
			
 
				+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
			
 
				 
			
 
				-	xfs_buf_delwri_dequeue(bp);
			
 
				 	xfs_bdstrat_cb(bp);
			
 
				 
			
 
				 	error = xfs_buf_iowait(bp);
			
@@ -1181,7 +1170,7 @@ _xfs_buf_ioapply(
 
				 	int			rw, map_i, total_nr_pages, nr_pages;
			
 
				 	struct bio		*bio;
			
 
				 	int			offset = bp->b_offset;
			
 
				-	int			size = bp->b_count_desired;
			
 
				+	int			size = BBTOB(bp->b_io_length);
			
 
				 	sector_t		sector = bp->b_bn;
			
 
				 
			
 
				 	total_nr_pages = bp->b_page_count;
			
@@ -1229,7 +1218,7 @@ next_chunk:
 
				 			break;
			
 
				 
			
 
				 		offset = 0;
			
 
				-		sector += nbytes >> BBSHIFT;
			
 
				+		sector += BTOBB(nbytes);
			
 
				 		size -= nbytes;
			
 
				 		total_nr_pages--;
			
 
				 	}
			
@@ -1248,13 +1237,13 @@ next_chunk:
 
				 	}
			
 
				 }
			
 
				 
			
 
				-int
			
 
				+void
			
 
				 xfs_buf_iorequest(
			
 
				 	xfs_buf_t		*bp)
			
 
				 {
			
 
				 	trace_xfs_buf_iorequest(bp, _RET_IP_);
			
 
				 
			
 
				-	ASSERT(!(bp->b_flags & XBF_DELWRI));
			
 
				+	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
			
 
				 
			
 
				 	if (bp->b_flags & XBF_WRITE)
			
 
				 		xfs_buf_wait_unpin(bp);
			
@@ -1269,13 +1258,12 @@ xfs_buf_iorequest(
 
				 	_xfs_buf_ioend(bp, 0);
			
 
				 
			
 
				 	xfs_buf_rele(bp);
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- *	Waits for I/O to complete on the buffer supplied.
			
 
				- *	It returns immediately if no I/O is pending.
			
 
				- *	It returns the I/O error code, if any, or 0 if there was no error.
			
 
				+ * Waits for I/O to complete on the buffer supplied.  It returns immediately if
			
 
				+ * no I/O is pending or there is already a pending error on the buffer.  It
			
 
				+ * returns the I/O error code, if any, or 0 if there was no error.
			
 
				  */
			
 
				 int
			
 
				 xfs_buf_iowait(
			
@@ -1283,7 +1271,8 @@ xfs_buf_iowait(
 
				 {
			
 
				 	trace_xfs_buf_iowait(bp, _RET_IP_);
			
 
				 
			
 
				-	wait_for_completion(&bp->b_iowait);
			
 
				+	if (!bp->b_error)
			
 
				+		wait_for_completion(&bp->b_iowait);
			
 
				 
			
 
				 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
			
 
				 	return bp->b_error;
			
@@ -1296,7 +1285,7 @@ xfs_buf_offset(
 
				 {
			
 
				 	struct page		*page;
			
 
				 
			
 
				-	if (bp->b_flags & XBF_MAPPED)
			
 
				+	if (bp->b_addr)
			
 
				 		return bp->b_addr + offset;
			
 
				 
			
 
				 	offset += bp->b_offset;
			
@@ -1315,27 +1304,30 @@ xfs_buf_iomove(
 
				 	void			*data,	/* data address			*/
			
 
				 	xfs_buf_rw_t		mode)	/* read/write/zero flag		*/
			
 
				 {
			
 
				-	size_t			bend, cpoff, csize;
			
 
				-	struct page		*page;
			
 
				+	size_t			bend;
			
 
				 
			
 
				 	bend = boff + bsize;
			
 
				 	while (boff < bend) {
			
 
				-		page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
			
 
				-		cpoff = xfs_buf_poff(boff + bp->b_offset);
			
 
				-		csize = min_t(size_t,
			
 
				-			      PAGE_SIZE-cpoff, bp->b_count_desired-boff);
			
 
				+		struct page	*page;
			
 
				+		int		page_index, page_offset, csize;
			
 
				+
			
 
				+		page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
			
 
				+		page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
			
 
				+		page = bp->b_pages[page_index];
			
 
				+		csize = min_t(size_t, PAGE_SIZE - page_offset,
			
 
				+				      BBTOB(bp->b_io_length) - boff);
			
 
				 
			
 
				-		ASSERT(((csize + cpoff) <= PAGE_SIZE));
			
 
				+		ASSERT((csize + page_offset) <= PAGE_SIZE);
			
 
				 
			
 
				 		switch (mode) {
			
 
				 		case XBRW_ZERO:
			
 
				-			memset(page_address(page) + cpoff, 0, csize);
			
 
				+			memset(page_address(page) + page_offset, 0, csize);
			
 
				 			break;
			
 
				 		case XBRW_READ:
			
 
				-			memcpy(data, page_address(page) + cpoff, csize);
			
 
				+			memcpy(data, page_address(page) + page_offset, csize);
			
 
				 			break;
			
 
				 		case XBRW_WRITE:
			
 
				-			memcpy(page_address(page) + cpoff, data, csize);
			
 
				+			memcpy(page_address(page) + page_offset, data, csize);
			
 
				 		}
			
 
				 
			
 
				 		boff += csize;
			
@@ -1435,11 +1427,9 @@ xfs_free_buftarg(
 
				 {
			
 
				 	unregister_shrinker(&btp->bt_shrinker);
			
 
				 
			
 
				-	xfs_flush_buftarg(btp, 1);
			
 
				 	if (mp->m_flags & XFS_MOUNT_BARRIER)
			
 
				 		xfs_blkdev_issue_flush(btp);
			
 
				 
			
 
				-	kthread_stop(btp->bt_task);
			
 
				 	kmem_free(btp);
			
 
				 }
			
 
				 
			
@@ -1491,20 +1481,6 @@ xfs_setsize_buftarg(
 
				 	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_alloc_delwri_queue(
			
 
				-	xfs_buftarg_t		*btp,
			
 
				-	const char		*fsname)
			
 
				-{
			
 
				-	INIT_LIST_HEAD(&btp->bt_delwri_queue);
			
 
				-	spin_lock_init(&btp->bt_delwri_lock);
			
 
				-	btp->bt_flags = 0;
			
 
				-	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
			
 
				-	if (IS_ERR(btp->bt_task))
			
 
				-		return PTR_ERR(btp->bt_task);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 xfs_buftarg_t *
			
 
				 xfs_alloc_buftarg(
			
 
				 	struct xfs_mount	*mp,
			
@@ -1527,8 +1503,6 @@ xfs_alloc_buftarg(
 
				 	spin_lock_init(&btp->bt_lru_lock);
			
 
				 	if (xfs_setsize_buftarg_early(btp, bdev))
			
 
				 		goto error;
			
 
				-	if (xfs_alloc_delwri_queue(btp, fsname))
			
 
				-		goto error;
			
 
				 	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
			
 
				 	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
			
 
				 	register_shrinker(&btp->bt_shrinker);
			
@@ -1539,125 +1513,52 @@ error:
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 /*
			
 
				- *	Delayed write buffer handling
			
 
				+ * Add a buffer to the delayed write list.
			
 
				+ *
			
 
				+ * This queues a buffer for writeout if it hasn't already been.  Note that
			
 
				+ * neither this routine nor the buffer list submission functions perform
			
 
				+ * any internal synchronization.  It is expected that the lists are thread-local
			
 
				+ * to the callers.
			
 
				+ *
			
 
				+ * Returns true if we queued up the buffer, or false if it already had
			
 
				+ * been on the buffer list.
			
 
				  */
			
 
				-void
			
 
				+bool
			
 
				 xfs_buf_delwri_queue(
			
 
				-	xfs_buf_t		*bp)
			
 
				+	struct xfs_buf		*bp,
			
 
				+	struct list_head	*list)
			
 
				 {
			
 
				-	struct xfs_buftarg	*btp = bp->b_target;
			
 
				-
			
 
				-	trace_xfs_buf_delwri_queue(bp, _RET_IP_);
			
 
				-
			
 
				+	ASSERT(xfs_buf_islocked(bp));
			
 
				 	ASSERT(!(bp->b_flags & XBF_READ));
			
 
				 
			
 
				-	spin_lock(&btp->bt_delwri_lock);
			
 
				-	if (!list_empty(&bp->b_list)) {
			
 
				-		/* if already in the queue, move it to the tail */
			
 
				-		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
			
 
				-		list_move_tail(&bp->b_list, &btp->bt_delwri_queue);
			
 
				-	} else {
			
 
				-		/* start xfsbufd as it is about to have something to do */
			
 
				-		if (list_empty(&btp->bt_delwri_queue))
			
 
				-			wake_up_process(bp->b_target->bt_task);
			
 
				-
			
 
				-		atomic_inc(&bp->b_hold);
			
 
				-		bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC;
			
 
				-		list_add_tail(&bp->b_list, &btp->bt_delwri_queue);
			
 
				-	}
			
 
				-	bp->b_queuetime = jiffies;
			
 
				-	spin_unlock(&btp->bt_delwri_lock);
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xfs_buf_delwri_dequeue(
			
 
				-	xfs_buf_t		*bp)
			
 
				-{
			
 
				-	int			dequeued = 0;
			
 
				-
			
 
				-	spin_lock(&bp->b_target->bt_delwri_lock);
			
 
				-	if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
			
 
				-		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
			
 
				-		list_del_init(&bp->b_list);
			
 
				-		dequeued = 1;
			
 
				+	/*
			
 
				+	 * If the buffer is already marked delwri it already is queued up
			
 
				+	 * by someone else for imediate writeout.  Just ignore it in that
			
 
				+	 * case.
			
 
				+	 */
			
 
				+	if (bp->b_flags & _XBF_DELWRI_Q) {
			
 
				+		trace_xfs_buf_delwri_queued(bp, _RET_IP_);
			
 
				+		return false;
			
 
				 	}
			
 
				-	bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
			
 
				-	spin_unlock(&bp->b_target->bt_delwri_lock);
			
 
				-
			
 
				-	if (dequeued)
			
 
				-		xfs_buf_rele(bp);
			
 
				-
			
 
				-	trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * If a delwri buffer needs to be pushed before it has aged out, then promote
			
 
				- * it to the head of the delwri queue so that it will be flushed on the next
			
 
				- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
			
 
				- * than the age currently needed to flush the buffer. Hence the next time the
			
 
				- * xfsbufd sees it is guaranteed to be considered old enough to flush.
			
 
				- */
			
 
				-void
			
 
				-xfs_buf_delwri_promote(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	struct xfs_buftarg *btp = bp->b_target;
			
 
				-	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
			
 
				 
			
 
				-	ASSERT(bp->b_flags & XBF_DELWRI);
			
 
				-	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
			
 
				+	trace_xfs_buf_delwri_queue(bp, _RET_IP_);
			
 
				 
			
 
				 	/*
			
 
				-	 * Check the buffer age before locking the delayed write queue as we
			
 
				-	 * don't need to promote buffers that are already past the flush age.
			
 
				+	 * If a buffer gets written out synchronously or marked stale while it
			
 
				+	 * is on a delwri list we lazily remove it. To do this, the other party
			
 
				+	 * clears the  _XBF_DELWRI_Q flag but otherwise leaves the buffer alone.
			
 
				+	 * It remains referenced and on the list.  In a rare corner case it
			
 
				+	 * might get readded to a delwri list after the synchronous writeout, in
			
 
				+	 * which case we need just need to re-add the flag here.
			
 
				 	 */
			
 
				-	if (bp->b_queuetime < jiffies - age)
			
 
				-		return;
			
 
				-	bp->b_queuetime = jiffies - age;
			
 
				-	spin_lock(&btp->bt_delwri_lock);
			
 
				-	list_move(&bp->b_list, &btp->bt_delwri_queue);
			
 
				-	spin_unlock(&btp->bt_delwri_lock);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Move as many buffers as specified to the supplied list
			
 
				- * idicating if we skipped any buffers to prevent deadlocks.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_buf_delwri_split(
			
 
				-	xfs_buftarg_t	*target,
			
 
				-	struct list_head *list,
			
 
				-	unsigned long	age)
			
 
				-{
			
 
				-	xfs_buf_t	*bp, *n;
			
 
				-	int		skipped = 0;
			
 
				-	int		force;
			
 
				-
			
 
				-	force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
			
 
				-	INIT_LIST_HEAD(list);
			
 
				-	spin_lock(&target->bt_delwri_lock);
			
 
				-	list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) {
			
 
				-		ASSERT(bp->b_flags & XBF_DELWRI);
			
 
				-
			
 
				-		if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
			
 
				-			if (!force &&
			
 
				-			    time_before(jiffies, bp->b_queuetime + age)) {
			
 
				-				xfs_buf_unlock(bp);
			
 
				-				break;
			
 
				-			}
			
 
				-
			
 
				-			bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
			
 
				-			bp->b_flags |= XBF_WRITE;
			
 
				-			list_move_tail(&bp->b_list, list);
			
 
				-			trace_xfs_buf_delwri_split(bp, _RET_IP_);
			
 
				-		} else
			
 
				-			skipped++;
			
 
				+	bp->b_flags |= _XBF_DELWRI_Q;
			
 
				+	if (list_empty(&bp->b_list)) {
			
 
				+		atomic_inc(&bp->b_hold);
			
 
				+		list_add_tail(&bp->b_list, list);
			
 
				 	}
			
 
				 
			
 
				-	spin_unlock(&target->bt_delwri_lock);
			
 
				-	return skipped;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1683,99 +1584,109 @@ xfs_buf_cmp(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfsbufd(
			
 
				-	void		*data)
			
 
				+static int
			
 
				+__xfs_buf_delwri_submit(
			
 
				+	struct list_head	*buffer_list,
			
 
				+	struct list_head	*io_list,
			
 
				+	bool			wait)
			
 
				 {
			
 
				-	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
			
 
				-
			
 
				-	current->flags |= PF_MEMALLOC;
			
 
				-
			
 
				-	set_freezable();
			
 
				+	struct blk_plug		plug;
			
 
				+	struct xfs_buf		*bp, *n;
			
 
				+	int			pinned = 0;
			
 
				+
			
 
				+	list_for_each_entry_safe(bp, n, buffer_list, b_list) {
			
 
				+		if (!wait) {
			
 
				+			if (xfs_buf_ispinned(bp)) {
			
 
				+				pinned++;
			
 
				+				continue;
			
 
				+			}
			
 
				+			if (!xfs_buf_trylock(bp))
			
 
				+				continue;
			
 
				+		} else {
			
 
				+			xfs_buf_lock(bp);
			
 
				+		}
			
 
				 
			
 
				-	do {
			
 
				-		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
			
 
				-		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
			
 
				-		struct list_head tmp;
			
 
				-		struct blk_plug plug;
			
 
				+		/*
			
 
				+		 * Someone else might have written the buffer synchronously or
			
 
				+		 * marked it stale in the meantime.  In that case only the
			
 
				+		 * _XBF_DELWRI_Q flag got cleared, and we have to drop the
			
 
				+		 * reference and remove it from the list here.
			
 
				+		 */
			
 
				+		if (!(bp->b_flags & _XBF_DELWRI_Q)) {
			
 
				+			list_del_init(&bp->b_list);
			
 
				+			xfs_buf_relse(bp);
			
 
				+			continue;
			
 
				+		}
			
 
				 
			
 
				-		if (unlikely(freezing(current)))
			
 
				-			try_to_freeze();
			
 
				+		list_move_tail(&bp->b_list, io_list);
			
 
				+		trace_xfs_buf_delwri_split(bp, _RET_IP_);
			
 
				+	}
			
 
				 
			
 
				-		/* sleep for a long time if there is nothing to do. */
			
 
				-		if (list_empty(&target->bt_delwri_queue))
			
 
				-			tout = MAX_SCHEDULE_TIMEOUT;
			
 
				-		schedule_timeout_interruptible(tout);
			
 
				+	list_sort(NULL, io_list, xfs_buf_cmp);
			
 
				 
			
 
				-		xfs_buf_delwri_split(target, &tmp, age);
			
 
				-		list_sort(NULL, &tmp, xfs_buf_cmp);
			
 
				+	blk_start_plug(&plug);
			
 
				+	list_for_each_entry_safe(bp, n, io_list, b_list) {
			
 
				+		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
			
 
				+		bp->b_flags |= XBF_WRITE;
			
 
				 
			
 
				-		blk_start_plug(&plug);
			
 
				-		while (!list_empty(&tmp)) {
			
 
				-			struct xfs_buf *bp;
			
 
				-			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
			
 
				+		if (!wait) {
			
 
				+			bp->b_flags |= XBF_ASYNC;
			
 
				 			list_del_init(&bp->b_list);
			
 
				-			xfs_bdstrat_cb(bp);
			
 
				 		}
			
 
				-		blk_finish_plug(&plug);
			
 
				-	} while (!kthread_should_stop());
			
 
				+		xfs_bdstrat_cb(bp);
			
 
				+	}
			
 
				+	blk_finish_plug(&plug);
			
 
				 
			
 
				-	return 0;
			
 
				+	return pinned;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- *	Go through all incore buffers, and release buffers if they belong to
			
 
				- *	the given device. This is used in filesystem error handling to
			
 
				- *	preserve the consistency of its metadata.
			
 
				+ * Write out a buffer list asynchronously.
			
 
				+ *
			
 
				+ * This will take the @buffer_list, write all non-locked and non-pinned buffers
			
 
				+ * out and not wait for I/O completion on any of the buffers.  This interface
			
 
				+ * is only safely useable for callers that can track I/O completion by higher
			
 
				+ * level means, e.g. AIL pushing as the @buffer_list is consumed in this
			
 
				+ * function.
			
 
				  */
			
 
				 int
			
 
				-xfs_flush_buftarg(
			
 
				-	xfs_buftarg_t	*target,
			
 
				-	int		wait)
			
 
				+xfs_buf_delwri_submit_nowait(
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				-	xfs_buf_t	*bp;
			
 
				-	int		pincount = 0;
			
 
				-	LIST_HEAD(tmp_list);
			
 
				-	LIST_HEAD(wait_list);
			
 
				-	struct blk_plug plug;
			
 
				+	LIST_HEAD		(io_list);
			
 
				+	return __xfs_buf_delwri_submit(buffer_list, &io_list, false);
			
 
				+}
			
 
				 
			
 
				-	flush_workqueue(xfslogd_workqueue);
			
 
				+/*
			
 
				+ * Write out a buffer list synchronously.
			
 
				+ *
			
 
				+ * This will take the @buffer_list, write all buffers out and wait for I/O
			
 
				+ * completion on all of the buffers. @buffer_list is consumed by the function,
			
 
				+ * so callers must have some other way of tracking buffers if they require such
			
 
				+ * functionality.
			
 
				+ */
			
 
				+int
			
 
				+xfs_buf_delwri_submit(
			
 
				+	struct list_head	*buffer_list)
			
 
				+{
			
 
				+	LIST_HEAD		(io_list);
			
 
				+	int			error = 0, error2;
			
 
				+	struct xfs_buf		*bp;
			
 
				 
			
 
				-	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
			
 
				-	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
			
 
				+	__xfs_buf_delwri_submit(buffer_list, &io_list, true);
			
 
				 
			
 
				-	/*
			
 
				-	 * Dropped the delayed write list lock, now walk the temporary list.
			
 
				-	 * All I/O is issued async and then if we need to wait for completion
			
 
				-	 * we do that after issuing all the IO.
			
 
				-	 */
			
 
				-	list_sort(NULL, &tmp_list, xfs_buf_cmp);
			
 
				+	/* Wait for IO to complete. */
			
 
				+	while (!list_empty(&io_list)) {
			
 
				+		bp = list_first_entry(&io_list, struct xfs_buf, b_list);
			
 
				 
			
 
				-	blk_start_plug(&plug);
			
 
				-	while (!list_empty(&tmp_list)) {
			
 
				-		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
			
 
				-		ASSERT(target == bp->b_target);
			
 
				 		list_del_init(&bp->b_list);
			
 
				-		if (wait) {
			
 
				-			bp->b_flags &= ~XBF_ASYNC;
			
 
				-			list_add(&bp->b_list, &wait_list);
			
 
				-		}
			
 
				-		xfs_bdstrat_cb(bp);
			
 
				-	}
			
 
				-	blk_finish_plug(&plug);
			
 
				-
			
 
				-	if (wait) {
			
 
				-		/* Wait for IO to complete. */
			
 
				-		while (!list_empty(&wait_list)) {
			
 
				-			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
			
 
				-
			
 
				-			list_del_init(&bp->b_list);
			
 
				-			xfs_buf_iowait(bp);
			
 
				-			xfs_buf_relse(bp);
			
 
				-		}
			
 
				+		error2 = xfs_buf_iowait(bp);
			
 
				+		xfs_buf_relse(bp);
			
 
				+		if (!error)
			
 
				+			error = error2;
			
 
				 	}
			
 
				 
			
 
				-	return pincount;
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 int __init
			
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -32,11 +32,6 @@
 
				 
			
 
				 #define XFS_BUF_DADDR_NULL	((xfs_daddr_t) (-1LL))
			
 
				 
			
 
				-#define xfs_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
			
 
				-#define xfs_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
			
 
				-#define xfs_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
			
 
				-#define xfs_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
			
 
				-
			
 
				 typedef enum {
			
 
				 	XBRW_READ = 1,			/* transfer into target memory */
			
 
				 	XBRW_WRITE = 2,			/* transfer from target memory */
			
@@ -46,11 +41,9 @@ typedef enum {
 
				 #define XBF_READ	(1 << 0) /* buffer intended for reading from device */
			
 
				 #define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
			
 
				 #define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
			
 
				-#define XBF_MAPPED	(1 << 3) /* buffer mapped (b_addr valid) */
			
 
				 #define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
			
 
				 #define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
			
 
				-#define XBF_DELWRI	(1 << 6) /* buffer has dirty pages */
			
 
				-#define XBF_STALE	(1 << 7) /* buffer has been staled, do not find it */
			
 
				+#define XBF_STALE	(1 << 6) /* buffer has been staled, do not find it */
			
 
				 
			
 
				 /* I/O hints for the BIO layer */
			
 
				 #define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
			
@@ -58,14 +51,13 @@ typedef enum {
 
				 #define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
			
 
				 
			
 
				 /* flags used only as arguments to access routines */
			
 
				-#define XBF_LOCK	(1 << 15)/* lock requested */
			
 
				 #define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
			
 
				-#define XBF_DONT_BLOCK	(1 << 17)/* do not block in current thread */
			
 
				+#define XBF_UNMAPPED	(1 << 17)/* do not map the buffer */
			
 
				 
			
 
				 /* flags used only internally */
			
 
				 #define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
			
 
				 #define _XBF_KMEM	(1 << 21)/* backed by heap memory */
			
 
				-#define _XBF_DELWRI_Q	(1 << 22)/* buffer on delwri queue */
			
 
				+#define _XBF_DELWRI_Q	(1 << 22)/* buffer on a delwri queue */
			
 
				 
			
 
				 typedef unsigned int xfs_buf_flags_t;
			
 
				 
			
@@ -73,25 +65,18 @@ typedef unsigned int xfs_buf_flags_t;
 
				 	{ XBF_READ,		"READ" }, \
			
 
				 	{ XBF_WRITE,		"WRITE" }, \
			
 
				 	{ XBF_READ_AHEAD,	"READ_AHEAD" }, \
			
 
				-	{ XBF_MAPPED,		"MAPPED" }, \
			
 
				 	{ XBF_ASYNC,		"ASYNC" }, \
			
 
				 	{ XBF_DONE,		"DONE" }, \
			
 
				-	{ XBF_DELWRI,		"DELWRI" }, \
			
 
				 	{ XBF_STALE,		"STALE" }, \
			
 
				 	{ XBF_SYNCIO,		"SYNCIO" }, \
			
 
				 	{ XBF_FUA,		"FUA" }, \
			
 
				 	{ XBF_FLUSH,		"FLUSH" }, \
			
 
				-	{ XBF_LOCK,		"LOCK" },  	/* should never be set */\
			
 
				-	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* ditto */\
			
 
				-	{ XBF_DONT_BLOCK,	"DONT_BLOCK" },	/* ditto */\
			
 
				+	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* should never be set */\
			
 
				+	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
			
 
				 	{ _XBF_PAGES,		"PAGES" }, \
			
 
				 	{ _XBF_KMEM,		"KMEM" }, \
			
 
				 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
			
 
				 
			
 
				-typedef enum {
			
 
				-	XBT_FORCE_FLUSH = 0,
			
 
				-} xfs_buftarg_flags_t;
			
 
				-
			
 
				 typedef struct xfs_buftarg {
			
 
				 	dev_t			bt_dev;
			
 
				 	struct block_device	*bt_bdev;
			
@@ -101,12 +86,6 @@ typedef struct xfs_buftarg {
 
				 	unsigned int		bt_sshift;
			
 
				 	size_t			bt_smask;
			
 
				 
			
 
				-	/* per device delwri queue */
			
 
				-	struct task_struct	*bt_task;
			
 
				-	struct list_head	bt_delwri_queue;
			
 
				-	spinlock_t		bt_delwri_lock;
			
 
				-	unsigned long		bt_flags;
			
 
				-
			
 
				 	/* LRU control structures */
			
 
				 	struct shrinker		bt_shrinker;
			
 
				 	struct list_head	bt_lru;
			
@@ -128,8 +107,8 @@ typedef struct xfs_buf {
 
				 	 * fast-path on locking.
			
 
				 	 */
			
 
				 	struct rb_node		b_rbnode;	/* rbtree node */
			
 
				-	xfs_off_t		b_file_offset;	/* offset in file */
			
 
				-	size_t			b_buffer_length;/* size of buffer in bytes */
			
 
				+	xfs_daddr_t		b_bn;		/* block number for I/O */
			
 
				+	int			b_length;	/* size of buffer in BBs */
			
 
				 	atomic_t		b_hold;		/* reference count */
			
 
				 	atomic_t		b_lru_ref;	/* lru reclaim ref count */
			
 
				 	xfs_buf_flags_t		b_flags;	/* status flags */
			
@@ -140,8 +119,6 @@ typedef struct xfs_buf {
 
				 	struct list_head	b_list;
			
 
				 	struct xfs_perag	*b_pag;		/* contains rbtree root */
			
 
				 	xfs_buftarg_t		*b_target;	/* buffer target (device) */
			
 
				-	xfs_daddr_t		b_bn;		/* block number for I/O */
			
 
				-	size_t			b_count_desired;/* desired transfer size */
			
 
				 	void			*b_addr;	/* virtual address of buffer */
			
 
				 	struct work_struct	b_iodone_work;
			
 
				 	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */
			
@@ -150,7 +127,7 @@ typedef struct xfs_buf {
 
				 	struct xfs_trans	*b_transp;
			
 
				 	struct page		**b_pages;	/* array of page pointers */
			
 
				 	struct page		*b_page_array[XB_PAGES]; /* inline pages */
			
 
				-	unsigned long		b_queuetime;	/* time buffer was queued */
			
 
				+	int			b_io_length;	/* IO size in BBs */
			
 
				 	atomic_t		b_pin_count;	/* pin count */
			
 
				 	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
			
 
				 	unsigned int		b_page_count;	/* size of page array */
			
@@ -163,26 +140,30 @@ typedef struct xfs_buf {
 
				 
			
 
				 
			
 
				 /* Finding and Reading Buffers */
			
 
				-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
			
 
				-				xfs_buf_flags_t, xfs_buf_t *);
			
 
				+struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
			
 
				+				size_t numblks, xfs_buf_flags_t flags,
			
 
				+				struct xfs_buf *new_bp);
			
 
				 #define xfs_incore(buftarg,blkno,len,lockit) \
			
 
				 	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
			
 
				 
			
 
				-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
			
 
				-				xfs_buf_flags_t);
			
 
				-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
			
 
				-				xfs_buf_flags_t);
			
 
				-
			
 
				-struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t,
			
 
				-			      xfs_buf_flags_t);
			
 
				-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
			
 
				-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
			
 
				-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
			
 
				-extern void xfs_buf_hold(xfs_buf_t *);
			
 
				-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
			
 
				-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
			
 
				-				struct xfs_buftarg *target,
			
 
				-				xfs_daddr_t daddr, size_t length, int flags);
			
 
				+struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
			
 
				+				size_t numblks, xfs_buf_flags_t flags);
			
 
				+struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno,
			
 
				+				size_t numblks, xfs_buf_flags_t flags);
			
 
				+void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno,
			
 
				+				size_t numblks);
			
 
				+
			
 
				+struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
			
 
				+struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno,
			
 
				+				size_t numblks, xfs_buf_flags_t flags);
			
 
				+void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
			
 
				+int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
			
 
				+
			
 
				+struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
			
 
				+				int flags);
			
 
				+struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
			
 
				+				xfs_daddr_t daddr, size_t numblks, int flags);
			
 
				+void xfs_buf_hold(struct xfs_buf *bp);
			
 
				 
			
 
				 /* Releasing Buffers */
			
 
				 extern void xfs_buf_free(xfs_buf_t *);
			
@@ -204,7 +185,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
 
				 extern void xfs_buf_ioend(xfs_buf_t *,	int);
			
 
				 extern void xfs_buf_ioerror(xfs_buf_t *, int);
			
 
				 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
			
 
				-extern int xfs_buf_iorequest(xfs_buf_t *);
			
 
				+extern void xfs_buf_iorequest(xfs_buf_t *);
			
 
				 extern int xfs_buf_iowait(xfs_buf_t *);
			
 
				 extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
			
 
				 				xfs_buf_rw_t);
			
@@ -220,24 +201,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
 
				 extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
			
 
				 
			
 
				 /* Delayed Write Buffer Routines */
			
 
				-extern void xfs_buf_delwri_queue(struct xfs_buf *);
			
 
				-extern void xfs_buf_delwri_dequeue(struct xfs_buf *);
			
 
				-extern void xfs_buf_delwri_promote(struct xfs_buf *);
			
 
				+extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
			
 
				+extern int xfs_buf_delwri_submit(struct list_head *);
			
 
				+extern int xfs_buf_delwri_submit_nowait(struct list_head *);
			
 
				 
			
 
				 /* Buffer Daemon Setup Routines */
			
 
				 extern int xfs_buf_init(void);
			
 
				 extern void xfs_buf_terminate(void);
			
 
				 
			
 
				 #define XFS_BUF_ZEROFLAGS(bp) \
			
 
				-	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
			
 
				+	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
			
 
				 			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
			
 
				 
			
 
				 void xfs_buf_stale(struct xfs_buf *bp);
			
 
				 #define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE)
			
 
				 #define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XBF_STALE)
			
 
				 
			
 
				-#define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)
			
 
				-
			
 
				 #define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
			
 
				 #define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
			
 
				 #define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
			
@@ -256,12 +235,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
 
				 
			
 
				 #define XFS_BUF_ADDR(bp)		((bp)->b_bn)
			
 
				 #define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
			
 
				-#define XFS_BUF_OFFSET(bp)		((bp)->b_file_offset)
			
 
				-#define XFS_BUF_SET_OFFSET(bp, off)	((bp)->b_file_offset = (off))
			
 
				-#define XFS_BUF_COUNT(bp)		((bp)->b_count_desired)
			
 
				-#define XFS_BUF_SET_COUNT(bp, cnt)	((bp)->b_count_desired = (cnt))
			
 
				-#define XFS_BUF_SIZE(bp)		((bp)->b_buffer_length)
			
 
				-#define XFS_BUF_SET_SIZE(bp, cnt)	((bp)->b_buffer_length = (cnt))
			
 
				 
			
 
				 static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
			
 
				 {
			
@@ -287,7 +260,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
 
				 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
			
 
				 extern void xfs_wait_buftarg(xfs_buftarg_t *);
			
 
				 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
			
 
				-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
			
 
				 
			
 
				 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
			
 
				 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
			
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -123,11 +122,11 @@ xfs_buf_item_log_check(
 
				 	ASSERT(bip->bli_logged != NULL);
			
 
				 
			
 
				 	bp = bip->bli_buf;
			
 
				-	ASSERT(XFS_BUF_COUNT(bp) > 0);
			
 
				+	ASSERT(bp->b_length > 0);
			
 
				 	ASSERT(bp->b_addr != NULL);
			
 
				 	orig = bip->bli_orig;
			
 
				 	buffer = bp->b_addr;
			
 
				-	for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
			
 
				+	for (x = 0; x < BBTOB(bp->b_length); x++) {
			
 
				 		if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
			
 
				 			xfs_emerg(bp->b_mount,
			
 
				 				"%s: bip %x buffer %x orig %x index %d",
			
@@ -418,7 +417,6 @@ xfs_buf_item_unpin(
 
				 	if (freed && stale) {
			
 
				 		ASSERT(bip->bli_flags & XFS_BLI_STALE);
			
 
				 		ASSERT(xfs_buf_islocked(bp));
			
 
				-		ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
			
 
				 		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				 		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
			
 
				 
			
@@ -455,42 +453,42 @@ xfs_buf_item_unpin(
 
				 			bp->b_iodone = NULL;
			
 
				 		} else {
			
 
				 			spin_lock(&ailp->xa_lock);
			
 
				-			xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
			
 
				+			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
			
 
				 			xfs_buf_item_relse(bp);
			
 
				 			ASSERT(bp->b_fspriv == NULL);
			
 
				 		}
			
 
				 		xfs_buf_relse(bp);
			
 
				+	} else if (freed && remove) {
			
 
				+		xfs_buf_lock(bp);
			
 
				+		xfs_buf_ioerror(bp, EIO);
			
 
				+		XFS_BUF_UNDONE(bp);
			
 
				+		xfs_buf_stale(bp);
			
 
				+		xfs_buf_ioend(bp, 0);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This is called to attempt to lock the buffer associated with this
			
 
				- * buf log item.  Don't sleep on the buffer lock.  If we can't get
			
 
				- * the lock right away, return 0.  If we can get the lock, take a
			
 
				- * reference to the buffer. If this is a delayed write buffer that
			
 
				- * needs AIL help to be written back, invoke the pushbuf routine
			
 
				- * rather than the normal success path.
			
 
				- */
			
 
				 STATIC uint
			
 
				-xfs_buf_item_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_buf_item_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
			
 
				 	struct xfs_buf		*bp = bip->bli_buf;
			
 
				+	uint			rval = XFS_ITEM_SUCCESS;
			
 
				 
			
 
				 	if (xfs_buf_ispinned(bp))
			
 
				 		return XFS_ITEM_PINNED;
			
 
				 	if (!xfs_buf_trylock(bp))
			
 
				 		return XFS_ITEM_LOCKED;
			
 
				 
			
 
				-	/* take a reference to the buffer.  */
			
 
				-	xfs_buf_hold(bp);
			
 
				-
			
 
				 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
			
 
				-	trace_xfs_buf_item_trylock(bip);
			
 
				-	if (XFS_BUF_ISDELAYWRITE(bp))
			
 
				-		return XFS_ITEM_PUSHBUF;
			
 
				-	return XFS_ITEM_SUCCESS;
			
 
				+
			
 
				+	trace_xfs_buf_item_push(bip);
			
 
				+
			
 
				+	if (!xfs_buf_delwri_queue(bp, buffer_list))
			
 
				+		rval = XFS_ITEM_FLUSHING;
			
 
				+	xfs_buf_unlock(bp);
			
 
				+	return rval;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -603,49 +601,6 @@ xfs_buf_item_committed(
 
				 	return lsn;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * The buffer is locked, but is not a delayed write buffer. This happens
			
 
				- * if we race with IO completion and hence we don't want to try to write it
			
 
				- * again. Just release the buffer.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_buf_item_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
			
 
				-	struct xfs_buf		*bp = bip->bli_buf;
			
 
				-
			
 
				-	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
			
 
				-	ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
			
 
				-
			
 
				-	trace_xfs_buf_item_push(bip);
			
 
				-
			
 
				-	xfs_buf_relse(bp);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The buffer is locked and is a delayed write buffer. Promote the buffer
			
 
				- * in the delayed write queue as the caller knows that they must invoke
			
 
				- * the xfsbufd to get this buffer written. We have to unlock the buffer
			
 
				- * to allow the xfsbufd to write it, too.
			
 
				- */
			
 
				-STATIC bool
			
 
				-xfs_buf_item_pushbuf(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
			
 
				-	struct xfs_buf		*bp = bip->bli_buf;
			
 
				-
			
 
				-	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
			
 
				-	ASSERT(XFS_BUF_ISDELAYWRITE(bp));
			
 
				-
			
 
				-	trace_xfs_buf_item_pushbuf(bip);
			
 
				-
			
 
				-	xfs_buf_delwri_promote(bp);
			
 
				-	xfs_buf_relse(bp);
			
 
				-	return true;
			
 
				-}
			
 
				-
			
 
				 STATIC void
			
 
				 xfs_buf_item_committing(
			
 
				 	struct xfs_log_item	*lip,
			
@@ -661,11 +616,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
 
				 	.iop_format	= xfs_buf_item_format,
			
 
				 	.iop_pin	= xfs_buf_item_pin,
			
 
				 	.iop_unpin	= xfs_buf_item_unpin,
			
 
				-	.iop_trylock	= xfs_buf_item_trylock,
			
 
				 	.iop_unlock	= xfs_buf_item_unlock,
			
 
				 	.iop_committed	= xfs_buf_item_committed,
			
 
				 	.iop_push	= xfs_buf_item_push,
			
 
				-	.iop_pushbuf	= xfs_buf_item_pushbuf,
			
 
				 	.iop_committing = xfs_buf_item_committing
			
 
				 };
			
 
				 
			
@@ -703,7 +656,8 @@ xfs_buf_item_init(
 
				 	 * truncate any pieces.  map_size is the size of the
			
 
				 	 * bitmap needed to describe the chunks of the buffer.
			
 
				 	 */
			
 
				-	chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT);
			
 
				+	chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
			
 
				+								XFS_BLF_SHIFT);
			
 
				 	map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
			
 
				 
			
 
				 	bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
			
@@ -713,7 +667,7 @@ xfs_buf_item_init(
 
				 	xfs_buf_hold(bp);
			
 
				 	bip->bli_format.blf_type = XFS_LI_BUF;
			
 
				 	bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
			
 
				-	bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
			
 
				+	bip->bli_format.blf_len = (ushort)bp->b_length;
			
 
				 	bip->bli_format.blf_map_size = map_size;
			
 
				 
			
 
				 #ifdef XFS_TRANS_DEBUG
			
@@ -725,9 +679,9 @@ xfs_buf_item_init(
 
				 	 * the buffer to indicate which bytes the callers have asked
			
 
				 	 * to have logged.
			
 
				 	 */
			
 
				-	bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
			
 
				-	memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
			
 
				-	bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
			
 
				+	bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
			
 
				+	memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
			
 
				+	bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
			
 
				 #endif
			
 
				 
			
 
				 	/*
			
@@ -984,20 +938,27 @@ xfs_buf_iodone_callbacks(
 
				 	 * If the write was asynchronous then no one will be looking for the
			
 
				 	 * error.  Clear the error state and write the buffer out again.
			
 
				 	 *
			
 
				-	 * During sync or umount we'll write all pending buffers again
			
 
				-	 * synchronous, which will catch these errors if they keep hanging
			
 
				-	 * around.
			
 
				+	 * XXX: This helps against transient write errors, but we need to find
			
 
				+	 * a way to shut the filesystem down if the writes keep failing.
			
 
				+	 *
			
 
				+	 * In practice we'll shut the filesystem down soon as non-transient
			
 
				+	 * erorrs tend to affect the whole device and a failing log write
			
 
				+	 * will make us give up.  But we really ought to do better here.
			
 
				 	 */
			
 
				 	if (XFS_BUF_ISASYNC(bp)) {
			
 
				+		ASSERT(bp->b_iodone != NULL);
			
 
				+
			
 
				+		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
			
 
				+
			
 
				 		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
			
 
				 
			
 
				 		if (!XFS_BUF_ISSTALE(bp)) {
			
 
				-			xfs_buf_delwri_queue(bp);
			
 
				-			XFS_BUF_DONE(bp);
			
 
				+			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
			
 
				+			xfs_bdstrat_cb(bp);
			
 
				+		} else {
			
 
				+			xfs_buf_relse(bp);
			
 
				 		}
			
 
				-		ASSERT(bp->b_iodone != NULL);
			
 
				-		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
			
 
				-		xfs_buf_relse(bp);
			
 
				+
			
 
				 		return;
			
 
				 	}
			
 
				 
			
@@ -1045,6 +1006,6 @@ xfs_buf_iodone(
 
				 	 * Either way, AIL is useless if we're forcing a shutdown.
			
 
				 	 */
			
 
				 	spin_lock(&ailp->xa_lock);
			
 
				-	xfs_trans_ail_delete(ailp, lip);
			
 
				+	xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
			
 
				 	xfs_buf_item_free(BUF_ITEM(lip));
			
 
				 }
			
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -2277,20 +2276,20 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
 
				 	if (nbuf == 1) {
			
 
				 		dabuf->nbuf = 1;
			
 
				 		bp = bps[0];
			
 
				-		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
			
 
				+		dabuf->bbcount = bp->b_length;
			
 
				 		dabuf->data = bp->b_addr;
			
 
				 		dabuf->bps[0] = bp;
			
 
				 	} else {
			
 
				 		dabuf->nbuf = nbuf;
			
 
				 		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
			
 
				 			dabuf->bps[i] = bp = bps[i];
			
 
				-			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
			
 
				+			dabuf->bbcount += bp->b_length;
			
 
				 		}
			
 
				 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
			
 
				-		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
			
 
				+		for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) {
			
 
				 			bp = bps[i];
			
 
				 			memcpy((char *)dabuf->data + off, bp->b_addr,
			
 
				-				XFS_BUF_COUNT(bp));
			
 
				+				BBTOB(bp->b_length));
			
 
				 		}
			
 
				 	}
			
 
				 	return dabuf;
			
@@ -2310,10 +2309,10 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
 
				 		ASSERT(dabuf->nbuf > 1);
			
 
				 		dabuf->dirty = 0;
			
 
				 		for (i = off = 0; i < dabuf->nbuf;
			
 
				-				i++, off += XFS_BUF_COUNT(bp)) {
			
 
				+				i++, off += BBTOB(bp->b_length)) {
			
 
				 			bp = dabuf->bps[i];
			
 
				 			memcpy(bp->b_addr, dabuf->data + off,
			
 
				-						XFS_BUF_COUNT(bp));
			
 
				+						BBTOB(bp->b_length));
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -2356,10 +2355,10 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
 
				 	}
			
 
				 	dabuf->dirty = 1;
			
 
				 	ASSERT(first <= last);
			
 
				-	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
			
 
				+	for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) {
			
 
				 		bp = dabuf->bps[i];
			
 
				 		f = off;
			
 
				-		l = f + XFS_BUF_COUNT(bp) - 1;
			
 
				+		l = f + BBTOB(bp->b_length) - 1;
			
 
				 		if (f < first)
			
 
				 			f = first;
			
 
				 		if (l > last)
			
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -18,7 +18,6 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -17,7 +17,6 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_sb.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
@@ -30,6 +29,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_error.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				 #include "xfs_discard.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -118,7 +118,7 @@ xfs_trim_extents(
 
				 		 * If any blocks in the range are still busy, skip the
			
 
				 		 * discard and try again the next time.
			
 
				 		 */
			
 
				-		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
			
 
				+		if (xfs_extent_busy_search(mp, agno, fbno, flen)) {
			
 
				 			trace_xfs_discard_busy(mp, agno, fbno, flen);
			
 
				 			goto next_extent;
			
 
				 		}
			
@@ -212,7 +212,7 @@ xfs_discard_extents(
 
				 	struct xfs_mount	*mp,
			
 
				 	struct list_head	*list)
			
 
				 {
			
 
				-	struct xfs_busy_extent	*busyp;
			
 
				+	struct xfs_extent_busy	*busyp;
			
 
				 	int			error = 0;
			
 
				 
			
 
				 	list_for_each_entry(busyp, list, list) {
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -857,7 +856,7 @@ xfs_qm_dqflush_done(
 
				 		/* xfs_trans_ail_delete() drops the AIL lock. */
			
 
				 		spin_lock(&ailp->xa_lock);
			
 
				 		if (lip->li_lsn == qip->qli_flush_lsn)
			
 
				-			xfs_trans_ail_delete(ailp, lip);
			
 
				+			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
			
 
				 		else
			
 
				 			spin_unlock(&ailp->xa_lock);
			
 
				 	}
			
@@ -878,8 +877,8 @@ xfs_qm_dqflush_done(
 
				  */
			
 
				 int
			
 
				 xfs_qm_dqflush(
			
 
				-	xfs_dquot_t		*dqp,
			
 
				-	uint			flags)
			
 
				+	struct xfs_dquot	*dqp,
			
 
				+	struct xfs_buf		**bpp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = dqp->q_mount;
			
 
				 	struct xfs_buf		*bp;
			
@@ -891,25 +890,30 @@ xfs_qm_dqflush(
 
				 
			
 
				 	trace_xfs_dqflush(dqp);
			
 
				 
			
 
				-	/*
			
 
				-	 * If not dirty, or it's pinned and we are not supposed to block, nada.
			
 
				-	 */
			
 
				-	if (!XFS_DQ_IS_DIRTY(dqp) ||
			
 
				-	    ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
			
 
				-		xfs_dqfunlock(dqp);
			
 
				-		return 0;
			
 
				-	}
			
 
				+	*bpp = NULL;
			
 
				+
			
 
				 	xfs_qm_dqunpin_wait(dqp);
			
 
				 
			
 
				 	/*
			
 
				 	 * This may have been unpinned because the filesystem is shutting
			
 
				 	 * down forcibly. If that's the case we must not write this dquot
			
 
				-	 * to disk, because the log record didn't make it to disk!
			
 
				+	 * to disk, because the log record didn't make it to disk.
			
 
				+	 *
			
 
				+	 * We also have to remove the log item from the AIL in this case,
			
 
				+	 * as we wait for an emptry AIL as part of the unmount process.
			
 
				 	 */
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				+		struct xfs_log_item	*lip = &dqp->q_logitem.qli_item;
			
 
				 		dqp->dq_flags &= ~XFS_DQ_DIRTY;
			
 
				-		xfs_dqfunlock(dqp);
			
 
				-		return XFS_ERROR(EIO);
			
 
				+
			
 
				+		spin_lock(&mp->m_ail->xa_lock);
			
 
				+		if (lip->li_flags & XFS_LI_IN_AIL)
			
 
				+			xfs_trans_ail_delete(mp->m_ail, lip,
			
 
				+					     SHUTDOWN_CORRUPT_INCORE);
			
 
				+		else
			
 
				+			spin_unlock(&mp->m_ail->xa_lock);
			
 
				+		error = XFS_ERROR(EIO);
			
 
				+		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -917,11 +921,8 @@ xfs_qm_dqflush(
 
				 	 */
			
 
				 	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
			
 
				 				   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
			
 
				-	if (error) {
			
 
				-		ASSERT(error != ENOENT);
			
 
				-		xfs_dqfunlock(dqp);
			
 
				-		return error;
			
 
				-	}
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/*
			
 
				 	 * Calculate the location of the dquot inside the buffer.
			
@@ -967,20 +968,13 @@ xfs_qm_dqflush(
 
				 		xfs_log_force(mp, 0);
			
 
				 	}
			
 
				 
			
 
				-	if (flags & SYNC_WAIT)
			
 
				-		error = xfs_bwrite(bp);
			
 
				-	else
			
 
				-		xfs_buf_delwri_queue(bp);
			
 
				-
			
 
				-	xfs_buf_relse(bp);
			
 
				-
			
 
				 	trace_xfs_dqflush_done(dqp);
			
 
				+	*bpp = bp;
			
 
				+	return 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * dqp is still locked, but caller is free to unlock it now.
			
 
				-	 */
			
 
				-	return error;
			
 
				-
			
 
				+out_unlock:
			
 
				+	xfs_dqfunlock(dqp);
			
 
				+	return XFS_ERROR(EIO);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1011,39 +1005,6 @@ xfs_dqlock2(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Give the buffer a little push if it is incore and
			
 
				- * wait on the flush lock.
			
 
				- */
			
 
				-void
			
 
				-xfs_dqflock_pushbuf_wait(
			
 
				-	xfs_dquot_t	*dqp)
			
 
				-{
			
 
				-	xfs_mount_t	*mp = dqp->q_mount;
			
 
				-	xfs_buf_t	*bp;
			
 
				-
			
 
				-	/*
			
 
				-	 * Check to see if the dquot has been flushed delayed
			
 
				-	 * write.  If so, grab its buffer and send it
			
 
				-	 * out immediately.  We'll be able to acquire
			
 
				-	 * the flush lock when the I/O completes.
			
 
				-	 */
			
 
				-	bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
			
 
				-			mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
			
 
				-	if (!bp)
			
 
				-		goto out_lock;
			
 
				-
			
 
				-	if (XFS_BUF_ISDELAYWRITE(bp)) {
			
 
				-		if (xfs_buf_ispinned(bp))
			
 
				-			xfs_log_force(mp, 0);
			
 
				-		xfs_buf_delwri_promote(bp);
			
 
				-		wake_up_process(bp->b_target->bt_task);
			
 
				-	}
			
 
				-	xfs_buf_relse(bp);
			
 
				-out_lock:
			
 
				-	xfs_dqflock(dqp);
			
 
				-}
			
 
				-
			
 
				 int __init
			
 
				 xfs_qm_init(void)
			
 
				 {
			
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -141,7 +141,7 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
 
				 extern int		xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
			
 
				 					uint, struct xfs_dquot	**);
			
 
				 extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
			
 
				-extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
			
 
				+extern int		xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
			
 
				 extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
			
 
				 extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
			
 
				 					xfs_disk_dquot_t *);
			
@@ -152,7 +152,6 @@ extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
 
				 extern void		xfs_qm_dqput(xfs_dquot_t *);
			
 
				 
			
 
				 extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
			
 
				-extern void		xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
			
 
				 
			
 
				 static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
			
 
				 {
			
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -108,38 +106,6 @@ xfs_qm_dquot_logitem_unpin(
 
				 		wake_up(&dqp->q_pinwait);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Given the logitem, this writes the corresponding dquot entry to disk
			
 
				- * asynchronously. This is called with the dquot entry securely locked;
			
 
				- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
			
 
				- * at the end.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_qm_dquot_logitem_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
			
 
				-	int			error;
			
 
				-
			
 
				-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
			
 
				-	ASSERT(!completion_done(&dqp->q_flush));
			
 
				-
			
 
				-	/*
			
 
				-	 * Since we were able to lock the dquot's flush lock and
			
 
				-	 * we found it on the AIL, the dquot must be dirty.  This
			
 
				-	 * is because the dquot is removed from the AIL while still
			
 
				-	 * holding the flush lock in xfs_dqflush_done().  Thus, if
			
 
				-	 * we found it in the AIL and were able to obtain the flush
			
 
				-	 * lock without sleeping, then there must not have been
			
 
				-	 * anyone in the process of flushing the dquot.
			
 
				-	 */
			
 
				-	error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
			
 
				-	if (error)
			
 
				-		xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
			
 
				-			__func__, error, dqp);
			
 
				-	xfs_dqunlock(dqp);
			
 
				-}
			
 
				-
			
 
				 STATIC xfs_lsn_t
			
 
				 xfs_qm_dquot_logitem_committed(
			
 
				 	struct xfs_log_item	*lip,
			
@@ -171,67 +137,15 @@ xfs_qm_dqunpin_wait(
 
				 	wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
			
 
				- * the dquot is locked by us, but the flush lock isn't. So, here we are
			
 
				- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
			
 
				- * If so, we want to push it out to help us take this item off the AIL as soon
			
 
				- * as possible.
			
 
				- *
			
 
				- * We must not be holding the AIL lock at this point. Calling incore() to
			
 
				- * search the buffer cache can be a time consuming thing, and AIL lock is a
			
 
				- * spinlock.
			
 
				- */
			
 
				-STATIC bool
			
 
				-xfs_qm_dquot_logitem_pushbuf(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
			
 
				-	struct xfs_dquot	*dqp = qlip->qli_dquot;
			
 
				-	struct xfs_buf		*bp;
			
 
				-	bool			ret = true;
			
 
				-
			
 
				-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
			
 
				-
			
 
				-	/*
			
 
				-	 * If flushlock isn't locked anymore, chances are that the
			
 
				-	 * inode flush completed and the inode was taken off the AIL.
			
 
				-	 * So, just get out.
			
 
				-	 */
			
 
				-	if (completion_done(&dqp->q_flush) ||
			
 
				-	    !(lip->li_flags & XFS_LI_IN_AIL)) {
			
 
				-		xfs_dqunlock(dqp);
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
			
 
				-			dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
			
 
				-	xfs_dqunlock(dqp);
			
 
				-	if (!bp)
			
 
				-		return true;
			
 
				-	if (XFS_BUF_ISDELAYWRITE(bp))
			
 
				-		xfs_buf_delwri_promote(bp);
			
 
				-	if (xfs_buf_ispinned(bp))
			
 
				-		ret = false;
			
 
				-	xfs_buf_relse(bp);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to attempt to lock the dquot associated with this
			
 
				- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
			
 
				- * If the flush lock is already held, indicating that the dquot has
			
 
				- * been or is in the process of being flushed, then see if we can
			
 
				- * find the dquot's buffer in the buffer cache without sleeping.  If
			
 
				- * we can and it is marked delayed write, then we want to send it out.
			
 
				- * We delay doing so until the push routine, though, to avoid sleeping
			
 
				- * in any device strategy routines.
			
 
				- */
			
 
				 STATIC uint
			
 
				-xfs_qm_dquot_logitem_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_qm_dquot_logitem_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				 	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
			
 
				+	struct xfs_buf		*bp = NULL;
			
 
				+	uint			rval = XFS_ITEM_SUCCESS;
			
 
				+	int			error;
			
 
				 
			
 
				 	if (atomic_read(&dqp->q_pincount) > 0)
			
 
				 		return XFS_ITEM_PINNED;
			
@@ -239,16 +153,41 @@ xfs_qm_dquot_logitem_trylock(
 
				 	if (!xfs_dqlock_nowait(dqp))
			
 
				 		return XFS_ITEM_LOCKED;
			
 
				 
			
 
				+	/*
			
 
				+	 * Re-check the pincount now that we stabilized the value by
			
 
				+	 * taking the quota lock.
			
 
				+	 */
			
 
				+	if (atomic_read(&dqp->q_pincount) > 0) {
			
 
				+		rval = XFS_ITEM_PINNED;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Someone else is already flushing the dquot.  Nothing we can do
			
 
				+	 * here but wait for the flush to finish and remove the item from
			
 
				+	 * the AIL.
			
 
				+	 */
			
 
				 	if (!xfs_dqflock_nowait(dqp)) {
			
 
				-		/*
			
 
				-		 * dquot has already been flushed to the backing buffer,
			
 
				-		 * leave it locked, pushbuf routine will unlock it.
			
 
				-		 */
			
 
				-		return XFS_ITEM_PUSHBUF;
			
 
				+		rval = XFS_ITEM_FLUSHING;
			
 
				+		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
			
 
				-	return XFS_ITEM_SUCCESS;
			
 
				+	spin_unlock(&lip->li_ailp->xa_lock);
			
 
				+
			
 
				+	error = xfs_qm_dqflush(dqp, &bp);
			
 
				+	if (error) {
			
 
				+		xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
			
 
				+			__func__, error, dqp);
			
 
				+	} else {
			
 
				+		if (!xfs_buf_delwri_queue(bp, buffer_list))
			
 
				+			rval = XFS_ITEM_FLUSHING;
			
 
				+		xfs_buf_relse(bp);
			
 
				+	}
			
 
				+
			
 
				+	spin_lock(&lip->li_ailp->xa_lock);
			
 
				+out_unlock:
			
 
				+	xfs_dqunlock(dqp);
			
 
				+	return rval;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -299,11 +238,9 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
 
				 	.iop_format	= xfs_qm_dquot_logitem_format,
			
 
				 	.iop_pin	= xfs_qm_dquot_logitem_pin,
			
 
				 	.iop_unpin	= xfs_qm_dquot_logitem_unpin,
			
 
				-	.iop_trylock	= xfs_qm_dquot_logitem_trylock,
			
 
				 	.iop_unlock	= xfs_qm_dquot_logitem_unlock,
			
 
				 	.iop_committed	= xfs_qm_dquot_logitem_committed,
			
 
				 	.iop_push	= xfs_qm_dquot_logitem_push,
			
 
				-	.iop_pushbuf	= xfs_qm_dquot_logitem_pushbuf,
			
 
				 	.iop_committing = xfs_qm_dquot_logitem_committing
			
 
				 };
			
 
				 
			
@@ -398,11 +335,13 @@ xfs_qm_qoff_logitem_unpin(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Quotaoff items have no locking, so just return success.
			
 
				+ * There isn't much you can do to push a quotaoff item.  It is simply
			
 
				+ * stuck waiting for the log to be flushed to disk.
			
 
				  */
			
 
				 STATIC uint
			
 
				-xfs_qm_qoff_logitem_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_qm_qoff_logitem_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				 	return XFS_ITEM_LOCKED;
			
 
				 }
			
@@ -429,17 +368,6 @@ xfs_qm_qoff_logitem_committed(
 
				 	return lsn;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * There isn't much you can do to push on an quotaoff item.  It is simply
			
 
				- * stuck waiting for the log to be flushed to disk.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_qm_qoff_logitem_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				-
			
 
				 STATIC xfs_lsn_t
			
 
				 xfs_qm_qoffend_logitem_committed(
			
 
				 	struct xfs_log_item	*lip,
			
@@ -454,7 +382,7 @@ xfs_qm_qoffend_logitem_committed(
 
				 	 * xfs_trans_ail_delete() drops the AIL lock.
			
 
				 	 */
			
 
				 	spin_lock(&ailp->xa_lock);
			
 
				-	xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
			
 
				+	xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
			
 
				 
			
 
				 	kmem_free(qfs);
			
 
				 	kmem_free(qfe);
			
@@ -487,7 +415,6 @@ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
 
				 	.iop_format	= xfs_qm_qoff_logitem_format,
			
 
				 	.iop_pin	= xfs_qm_qoff_logitem_pin,
			
 
				 	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
			
 
				-	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
			
 
				 	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
			
 
				 	.iop_committed	= xfs_qm_qoffend_logitem_committed,
			
 
				 	.iop_push	= xfs_qm_qoff_logitem_push,
			
@@ -502,7 +429,6 @@ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
 
				 	.iop_format	= xfs_qm_qoff_logitem_format,
			
 
				 	.iop_pin	= xfs_qm_qoff_logitem_pin,
			
 
				 	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
			
 
				-	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
			
 
				 	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
			
 
				 	.iop_committed	= xfs_qm_qoff_logitem_committed,
			
 
				 	.iop_push	= xfs_qm_qoff_logitem_push,
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -17,7 +17,6 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -0,0 +1,603 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2010 David Chinner.
			
 
				+ * Copyright (c) 2011 Christoph Hellwig.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_types.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				+#include "xfs_trace.h"
			
 
				+
			
 
				+void
			
 
				+xfs_extent_busy_insert(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agblock_t		bno,
			
 
				+	xfs_extlen_t		len,
			
 
				+	unsigned int		flags)
			
 
				+{
			
 
				+	struct xfs_extent_busy	*new;
			
 
				+	struct xfs_extent_busy	*busyp;
			
 
				+	struct xfs_perag	*pag;
			
 
				+	struct rb_node		**rbp;
			
 
				+	struct rb_node		*parent = NULL;
			
 
				+
			
 
				+	new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
			
 
				+	if (!new) {
			
 
				+		/*
			
 
				+		 * No Memory!  Since it is now not possible to track the free
			
 
				+		 * block, make this a synchronous transaction to insure that
			
 
				+		 * the block is not reused before this transaction commits.
			
 
				+		 */
			
 
				+		trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	new->agno = agno;
			
 
				+	new->bno = bno;
			
 
				+	new->length = len;
			
 
				+	INIT_LIST_HEAD(&new->list);
			
 
				+	new->flags = flags;
			
 
				+
			
 
				+	/* trace before insert to be able to see failed inserts */
			
 
				+	trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
			
 
				+
			
 
				+	pag = xfs_perag_get(tp->t_mountp, new->agno);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+	rbp = &pag->pagb_tree.rb_node;
			
 
				+	while (*rbp) {
			
 
				+		parent = *rbp;
			
 
				+		busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
			
 
				+
			
 
				+		if (new->bno < busyp->bno) {
			
 
				+			rbp = &(*rbp)->rb_left;
			
 
				+			ASSERT(new->bno + new->length <= busyp->bno);
			
 
				+		} else if (new->bno > busyp->bno) {
			
 
				+			rbp = &(*rbp)->rb_right;
			
 
				+			ASSERT(bno >= busyp->bno + busyp->length);
			
 
				+		} else {
			
 
				+			ASSERT(0);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&new->rb_node, parent, rbp);
			
 
				+	rb_insert_color(&new->rb_node, &pag->pagb_tree);
			
 
				+
			
 
				+	list_add(&new->list, &tp->t_busy);
			
 
				+	spin_unlock(&pag->pagb_lock);
			
 
				+	xfs_perag_put(pag);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Search for a busy extent within the range of the extent we are about to
			
 
				+ * allocate.  You need to be holding the busy extent tree lock when calling
			
 
				+ * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
			
 
				+ * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
			
 
				+ * match. This is done so that a non-zero return indicates an overlap that
			
 
				+ * will require a synchronous transaction, but it can still be
			
 
				+ * used to distinguish between a partial or exact match.
			
 
				+ */
			
 
				+int
			
 
				+xfs_extent_busy_search(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agblock_t		bno,
			
 
				+	xfs_extlen_t		len)
			
 
				+{
			
 
				+	struct xfs_perag	*pag;
			
 
				+	struct rb_node		*rbp;
			
 
				+	struct xfs_extent_busy	*busyp;
			
 
				+	int			match = 0;
			
 
				+
			
 
				+	pag = xfs_perag_get(mp, agno);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+
			
 
				+	rbp = pag->pagb_tree.rb_node;
			
 
				+
			
 
				+	/* find closest start bno overlap */
			
 
				+	while (rbp) {
			
 
				+		busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
			
 
				+		if (bno < busyp->bno) {
			
 
				+			/* may overlap, but exact start block is lower */
			
 
				+			if (bno + len > busyp->bno)
			
 
				+				match = -1;
			
 
				+			rbp = rbp->rb_left;
			
 
				+		} else if (bno > busyp->bno) {
			
 
				+			/* may overlap, but exact start block is higher */
			
 
				+			if (bno < busyp->bno + busyp->length)
			
 
				+				match = -1;
			
 
				+			rbp = rbp->rb_right;
			
 
				+		} else {
			
 
				+			/* bno matches busyp, length determines exact match */
			
 
				+			match = (busyp->length == len) ? 1 : -1;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	spin_unlock(&pag->pagb_lock);
			
 
				+	xfs_perag_put(pag);
			
 
				+	return match;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The found free extent [fbno, fend] overlaps part or all of the given busy
			
 
				+ * extent.  If the overlap covers the beginning, the end, or all of the busy
			
 
				+ * extent, the overlapping portion can be made unbusy and used for the
			
 
				+ * allocation.  We can't split a busy extent because we can't modify a
			
 
				+ * transaction/CIL context busy list, but we can update an entries block
			
 
				+ * number or length.
			
 
				+ *
			
 
				+ * Returns true if the extent can safely be reused, or false if the search
			
 
				+ * needs to be restarted.
			
 
				+ */
			
 
				+STATIC bool
			
 
				+xfs_extent_busy_update_extent(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_perag	*pag,
			
 
				+	struct xfs_extent_busy	*busyp,
			
 
				+	xfs_agblock_t		fbno,
			
 
				+	xfs_extlen_t		flen,
			
 
				+	bool			userdata)
			
 
				+{
			
 
				+	xfs_agblock_t		fend = fbno + flen;
			
 
				+	xfs_agblock_t		bbno = busyp->bno;
			
 
				+	xfs_agblock_t		bend = bbno + busyp->length;
			
 
				+
			
 
				+	/*
			
 
				+	 * This extent is currently being discarded.  Give the thread
			
 
				+	 * performing the discard a chance to mark the extent unbusy
			
 
				+	 * and retry.
			
 
				+	 */
			
 
				+	if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
			
 
				+		spin_unlock(&pag->pagb_lock);
			
 
				+		delay(1);
			
 
				+		spin_lock(&pag->pagb_lock);
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If there is a busy extent overlapping a user allocation, we have
			
 
				+	 * no choice but to force the log and retry the search.
			
 
				+	 *
			
 
				+	 * Fortunately this does not happen during normal operation, but
			
 
				+	 * only if the filesystem is very low on space and has to dip into
			
 
				+	 * the AGFL for normal allocations.
			
 
				+	 */
			
 
				+	if (userdata)
			
 
				+		goto out_force_log;
			
 
				+
			
 
				+	if (bbno < fbno && bend > fend) {
			
 
				+		/*
			
 
				+		 * Case 1:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +---------+
			
 
				+		 *        fbno   fend
			
 
				+		 */
			
 
				+
			
 
				+		/*
			
 
				+		 * We would have to split the busy extent to be able to track
			
 
				+		 * it correct, which we cannot do because we would have to
			
 
				+		 * modify the list of busy extents attached to the transaction
			
 
				+		 * or CIL context, which is immutable.
			
 
				+		 *
			
 
				+		 * Force out the log to clear the busy extent and retry the
			
 
				+		 * search.
			
 
				+		 */
			
 
				+		goto out_force_log;
			
 
				+	} else if (bbno >= fbno && bend <= fend) {
			
 
				+		/*
			
 
				+		 * Case 2:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +-----------------+
			
 
				+		 *    fbno           fend
			
 
				+		 *
			
 
				+		 * Case 3:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +--------------------------+
			
 
				+		 *    fbno                    fend
			
 
				+		 *
			
 
				+		 * Case 4:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +--------------------------+
			
 
				+		 *    fbno                    fend
			
 
				+		 *
			
 
				+		 * Case 5:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +-----------------------------------+
			
 
				+		 *    fbno                             fend
			
 
				+		 *
			
 
				+		 */
			
 
				+
			
 
				+		/*
			
 
				+		 * The busy extent is fully covered by the extent we are
			
 
				+		 * allocating, and can simply be removed from the rbtree.
			
 
				+		 * However we cannot remove it from the immutable list
			
 
				+		 * tracking busy extents in the transaction or CIL context,
			
 
				+		 * so set the length to zero to mark it invalid.
			
 
				+		 *
			
 
				+		 * We also need to restart the busy extent search from the
			
 
				+		 * tree root, because erasing the node can rearrange the
			
 
				+		 * tree topology.
			
 
				+		 */
			
 
				+		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				+		busyp->length = 0;
			
 
				+		return false;
			
 
				+	} else if (fend < bend) {
			
 
				+		/*
			
 
				+		 * Case 6:
			
 
				+		 *              bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *             +---------+
			
 
				+		 *             fbno   fend
			
 
				+		 *
			
 
				+		 * Case 7:
			
 
				+		 *             bbno           bend
			
 
				+		 *             +BBBBBBBBBBBBBBBBB+
			
 
				+		 *    +------------------+
			
 
				+		 *    fbno            fend
			
 
				+		 *
			
 
				+		 */
			
 
				+		busyp->bno = fend;
			
 
				+	} else if (bbno < fbno) {
			
 
				+		/*
			
 
				+		 * Case 8:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +-------------+
			
 
				+		 *        fbno       fend
			
 
				+		 *
			
 
				+		 * Case 9:
			
 
				+		 *    bbno           bend
			
 
				+		 *    +BBBBBBBBBBBBBBBBB+
			
 
				+		 *        +----------------------+
			
 
				+		 *        fbno                fend
			
 
				+		 */
			
 
				+		busyp->length = fbno - busyp->bno;
			
 
				+	} else {
			
 
				+		ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
			
 
				+	return true;
			
 
				+
			
 
				+out_force_log:
			
 
				+	spin_unlock(&pag->pagb_lock);
			
 
				+	xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				+	trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * For a given extent [fbno, flen], make sure we can reuse it safely.
			
 
				+ */
			
 
				+void
			
 
				+xfs_extent_busy_reuse(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_agnumber_t		agno,
			
 
				+	xfs_agblock_t		fbno,
			
 
				+	xfs_extlen_t		flen,
			
 
				+	bool			userdata)
			
 
				+{
			
 
				+	struct xfs_perag	*pag;
			
 
				+	struct rb_node		*rbp;
			
 
				+
			
 
				+	ASSERT(flen > 0);
			
 
				+
			
 
				+	pag = xfs_perag_get(mp, agno);
			
 
				+	spin_lock(&pag->pagb_lock);
			
 
				+restart:
			
 
				+	rbp = pag->pagb_tree.rb_node;
			
 
				+	while (rbp) {
			
 
				+		struct xfs_extent_busy *busyp =
			
 
				+			rb_entry(rbp, struct xfs_extent_busy, rb_node);
			
 
				+		xfs_agblock_t	bbno = busyp->bno;
			
 
				+		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				+
			
 
				+		if (fbno + flen <= bbno) {
			
 
				+			rbp = rbp->rb_left;
			
 
				+			continue;
			
 
				+		} else if (fbno >= bend) {
			
 
				+			rbp = rbp->rb_right;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
			
 
				+						  userdata))
			
 
				+			goto restart;
			
 
				+	}
			
 
				+	spin_unlock(&pag->pagb_lock);
			
 
				+	xfs_perag_put(pag);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * For a given extent [fbno, flen], search the busy extent list to find a
			
 
				+ * subset of the extent that is not busy.  If *rlen is smaller than
			
 
				+ * args->minlen no suitable extent could be found, and the higher level
			
 
				+ * code needs to force out the log and retry the allocation.
			
 
				+ */
			
 
				+void
			
 
				+xfs_extent_busy_trim(
			
 
				+	struct xfs_alloc_arg	*args,
			
 
				+	xfs_agblock_t		bno,
			
 
				+	xfs_extlen_t		len,
			
 
				+	xfs_agblock_t		*rbno,
			
 
				+	xfs_extlen_t		*rlen)
			
 
				+{
			
 
				+	xfs_agblock_t		fbno;
			
 
				+	xfs_extlen_t		flen;
			
 
				+	struct rb_node		*rbp;
			
 
				+
			
 
				+	ASSERT(len > 0);
			
 
				+
			
 
				+	spin_lock(&args->pag->pagb_lock);
			
 
				+restart:
			
 
				+	fbno = bno;
			
 
				+	flen = len;
			
 
				+	rbp = args->pag->pagb_tree.rb_node;
			
 
				+	while (rbp && flen >= args->minlen) {
			
 
				+		struct xfs_extent_busy *busyp =
			
 
				+			rb_entry(rbp, struct xfs_extent_busy, rb_node);
			
 
				+		xfs_agblock_t	fend = fbno + flen;
			
 
				+		xfs_agblock_t	bbno = busyp->bno;
			
 
				+		xfs_agblock_t	bend = bbno + busyp->length;
			
 
				+
			
 
				+		if (fend <= bbno) {
			
 
				+			rbp = rbp->rb_left;
			
 
				+			continue;
			
 
				+		} else if (fbno >= bend) {
			
 
				+			rbp = rbp->rb_right;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * If this is a metadata allocation, try to reuse the busy
			
 
				+		 * extent instead of trimming the allocation.
			
 
				+		 */
			
 
				+		if (!args->userdata &&
			
 
				+		    !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
			
 
				+			if (!xfs_extent_busy_update_extent(args->mp, args->pag,
			
 
				+							  busyp, fbno, flen,
			
 
				+							  false))
			
 
				+				goto restart;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (bbno <= fbno) {
			
 
				+			/* start overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 1:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +---------+
			
 
				+			 *        fbno   fend
			
 
				+			 *
			
 
				+			 * Case 2:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-------------+
			
 
				+			 *    fbno       fend
			
 
				+			 *
			
 
				+			 * Case 3:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +-------------+
			
 
				+			 *        fbno       fend
			
 
				+			 *
			
 
				+			 * Case 4:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-----------------+
			
 
				+			 *    fbno           fend
			
 
				+			 *
			
 
				+			 * No unbusy region in extent, return failure.
			
 
				+			 */
			
 
				+			if (fend <= bend)
			
 
				+				goto fail;
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 5:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *        +----------------------+
			
 
				+			 *        fbno                fend
			
 
				+			 *
			
 
				+			 * Case 6:
			
 
				+			 *    bbno           bend
			
 
				+			 *    +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +--------------------------+
			
 
				+			 *    fbno                    fend
			
 
				+			 *
			
 
				+			 * Needs to be trimmed to:
			
 
				+			 *                       +-------+
			
 
				+			 *                       fbno fend
			
 
				+			 */
			
 
				+			fbno = bend;
			
 
				+		} else if (bend >= fend) {
			
 
				+			/* end overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 7:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +------------------+
			
 
				+			 *    fbno            fend
			
 
				+			 *
			
 
				+			 * Case 8:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +--------------------------+
			
 
				+			 *    fbno                    fend
			
 
				+			 *
			
 
				+			 * Needs to be trimmed to:
			
 
				+			 *    +-------+
			
 
				+			 *    fbno fend
			
 
				+			 */
			
 
				+			fend = bbno;
			
 
				+		} else {
			
 
				+			/* middle overlap */
			
 
				+
			
 
				+			/*
			
 
				+			 * Case 9:
			
 
				+			 *             bbno           bend
			
 
				+			 *             +BBBBBBBBBBBBBBBBB+
			
 
				+			 *    +-----------------------------------+
			
 
				+			 *    fbno                             fend
			
 
				+			 *
			
 
				+			 * Can be trimmed to:
			
 
				+			 *    +-------+        OR         +-------+
			
 
				+			 *    fbno fend                   fbno fend
			
 
				+			 *
			
 
				+			 * Backward allocation leads to significant
			
 
				+			 * fragmentation of directories, which degrades
			
 
				+			 * directory performance, therefore we always want to
			
 
				+			 * choose the option that produces forward allocation
			
 
				+			 * patterns.
			
 
				+			 * Preferring the lower bno extent will make the next
			
 
				+			 * request use "fend" as the start of the next
			
 
				+			 * allocation;  if the segment is no longer busy at
			
 
				+			 * that point, we'll get a contiguous allocation, but
			
 
				+			 * even if it is still busy, we will get a forward
			
 
				+			 * allocation.
			
 
				+			 * We try to avoid choosing the segment at "bend",
			
 
				+			 * because that can lead to the next allocation
			
 
				+			 * taking the segment at "fbno", which would be a
			
 
				+			 * backward allocation.  We only use the segment at
			
 
				+			 * "fbno" if it is much larger than the current
			
 
				+			 * requested size, because in that case there's a
			
 
				+			 * good chance subsequent allocations will be
			
 
				+			 * contiguous.
			
 
				+			 */
			
 
				+			if (bbno - fbno >= args->maxlen) {
			
 
				+				/* left candidate fits perfect */
			
 
				+				fend = bbno;
			
 
				+			} else if (fend - bend >= args->maxlen * 4) {
			
 
				+				/* right candidate has enough free space */
			
 
				+				fbno = bend;
			
 
				+			} else if (bbno - fbno >= args->minlen) {
			
 
				+				/* left candidate fits minimum requirement */
			
 
				+				fend = bbno;
			
 
				+			} else {
			
 
				+				goto fail;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		flen = fend - fbno;
			
 
				+	}
			
 
				+	spin_unlock(&args->pag->pagb_lock);
			
 
				+
			
 
				+	if (fbno != bno || flen != len) {
			
 
				+		trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len,
			
 
				+					  fbno, flen);
			
 
				+	}
			
 
				+	*rbno = fbno;
			
 
				+	*rlen = flen;
			
 
				+	return;
			
 
				+fail:
			
 
				+	/*
			
 
				+	 * Return a zero extent length as failure indications.  All callers
			
 
				+	 * re-check if the trimmed extent satisfies the minlen requirement.
			
 
				+	 */
			
 
				+	spin_unlock(&args->pag->pagb_lock);
			
 
				+	trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
			
 
				+	*rbno = fbno;
			
 
				+	*rlen = 0;
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xfs_extent_busy_clear_one(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_perag	*pag,
			
 
				+	struct xfs_extent_busy	*busyp)
			
 
				+{
			
 
				+	if (busyp->length) {
			
 
				+		trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
			
 
				+						busyp->length);
			
 
				+		rb_erase(&busyp->rb_node, &pag->pagb_tree);
			
 
				+	}
			
 
				+
			
 
				+	list_del_init(&busyp->list);
			
 
				+	kmem_free(busyp);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Remove all extents on the passed in list from the busy extents tree.
			
 
				+ * If do_discard is set skip extents that need to be discarded, and mark
			
 
				+ * these as undergoing a discard operation instead.
			
 
				+ */
			
 
				+void
			
 
				+xfs_extent_busy_clear(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct list_head	*list,
			
 
				+	bool			do_discard)
			
 
				+{
			
 
				+	struct xfs_extent_busy	*busyp, *n;
			
 
				+	struct xfs_perag	*pag = NULL;
			
 
				+	xfs_agnumber_t		agno = NULLAGNUMBER;
			
 
				+
			
 
				+	list_for_each_entry_safe(busyp, n, list, list) {
			
 
				+		if (busyp->agno != agno) {
			
 
				+			if (pag) {
			
 
				+				spin_unlock(&pag->pagb_lock);
			
 
				+				xfs_perag_put(pag);
			
 
				+			}
			
 
				+			pag = xfs_perag_get(mp, busyp->agno);
			
 
				+			spin_lock(&pag->pagb_lock);
			
 
				+			agno = busyp->agno;
			
 
				+		}
			
 
				+
			
 
				+		if (do_discard && busyp->length &&
			
 
				+		    !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD))
			
 
				+			busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
			
 
				+		else
			
 
				+			xfs_extent_busy_clear_one(mp, pag, busyp);
			
 
				+	}
			
 
				+
			
 
				+	if (pag) {
			
 
				+		spin_unlock(&pag->pagb_lock);
			
 
				+		xfs_perag_put(pag);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Callback for list_sort to sort busy extents by the AG they reside in.
			
 
				+ */
			
 
				+int
			
 
				+xfs_extent_busy_ag_cmp(
			
 
				+	void			*priv,
			
 
				+	struct list_head	*a,
			
 
				+	struct list_head	*b)
			
 
				+{
			
 
				+	return container_of(a, struct xfs_extent_busy, list)->agno -
			
 
				+		container_of(b, struct xfs_extent_busy, list)->agno;
			
 
				+}
			
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -0,0 +1,69 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2010 David Chinner.
			
 
				+ * Copyright (c) 2011 Christoph Hellwig.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_EXTENT_BUSY_H__
			
 
				+#define	__XFS_EXTENT_BUSY_H__
			
 
				+
			
 
				+/*
			
 
				+ * Busy block/extent entry.  Indexed by a rbtree in perag to mark blocks that
			
 
				+ * have been freed but whose transactions aren't committed to disk yet.
			
 
				+ *
			
 
				+ * Note that we use the transaction ID to record the transaction, not the
			
 
				+ * transaction structure itself. See xfs_extent_busy_insert() for details.
			
 
				+ */
			
 
				+struct xfs_extent_busy {
			
 
				+	struct rb_node	rb_node;	/* ag by-bno indexed search tree */
			
 
				+	struct list_head list;		/* transaction busy extent list */
			
 
				+	xfs_agnumber_t	agno;
			
 
				+	xfs_agblock_t	bno;
			
 
				+	xfs_extlen_t	length;
			
 
				+	unsigned int	flags;
			
 
				+#define XFS_EXTENT_BUSY_DISCARDED	0x01	/* undergoing a discard op. */
			
 
				+#define XFS_EXTENT_BUSY_SKIP_DISCARD	0x02	/* do not discard */
			
 
				+};
			
 
				+
			
 
				+void
			
 
				+xfs_extent_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
			
 
				+	xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
			
 
				+
			
 
				+void
			
 
				+xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list,
			
 
				+	bool do_discard);
			
 
				+
			
 
				+int
			
 
				+xfs_extent_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+	xfs_agblock_t bno, xfs_extlen_t len);
			
 
				+
			
 
				+void
			
 
				+xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				+	xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
			
 
				+
			
 
				+void
			
 
				+xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno,
			
 
				+	xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen);
			
 
				+
			
 
				+int
			
 
				+xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
			
 
				+
			
 
				+static inline void xfs_extent_busy_sort(struct list_head *list)
			
 
				+{
			
 
				+	list_sort(NULL, list, xfs_extent_busy_ag_cmp);
			
 
				+}
			
 
				+
			
 
				+#endif /* __XFS_EXTENT_BUSY_H__ */
			
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_sb.h"
			
@@ -64,7 +63,8 @@ __xfs_efi_release(
 
				 	if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
			
 
				 		spin_lock(&ailp->xa_lock);
			
 
				 		/* xfs_trans_ail_delete() drops the AIL lock. */
			
 
				-		xfs_trans_ail_delete(ailp, &efip->efi_item);
			
 
				+		xfs_trans_ail_delete(ailp, &efip->efi_item,
			
 
				+				     SHUTDOWN_LOG_IO_ERROR);
			
 
				 		xfs_efi_item_free(efip);
			
 
				 	}
			
 
				 }
			
@@ -147,22 +147,20 @@ xfs_efi_item_unpin(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Efi items have no locking or pushing.  However, since EFIs are
			
 
				- * pulled from the AIL when their corresponding EFDs are committed
			
 
				- * to disk, their situation is very similar to being pinned.  Return
			
 
				- * XFS_ITEM_PINNED so that the caller will eventually flush the log.
			
 
				- * This should help in getting the EFI out of the AIL.
			
 
				+ * Efi items have no locking or pushing.  However, since EFIs are pulled from
			
 
				+ * the AIL when their corresponding EFDs are committed to disk, their situation
			
 
				+ * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
			
 
				+ * will eventually flush the log.  This should help in getting the EFI out of
			
 
				+ * the AIL.
			
 
				  */
			
 
				 STATIC uint
			
 
				-xfs_efi_item_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_efi_item_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				 	return XFS_ITEM_PINNED;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Efi items have no locking, so just return.
			
 
				- */
			
 
				 STATIC void
			
 
				 xfs_efi_item_unlock(
			
 
				 	struct xfs_log_item	*lip)
			
@@ -189,17 +187,6 @@ xfs_efi_item_committed(
 
				 	return lsn;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * There isn't much you can do to push on an efi item.  It is simply
			
 
				- * stuck waiting for all of its corresponding efd items to be
			
 
				- * committed to disk.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_efi_item_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * The EFI dependency tracking op doesn't do squat.  It can't because
			
 
				  * it doesn't know where the free extent is coming from.  The dependency
			
@@ -222,7 +209,6 @@ static const struct xfs_item_ops xfs_efi_item_ops = {
 
				 	.iop_format	= xfs_efi_item_format,
			
 
				 	.iop_pin	= xfs_efi_item_pin,
			
 
				 	.iop_unpin	= xfs_efi_item_unpin,
			
 
				-	.iop_trylock	= xfs_efi_item_trylock,
			
 
				 	.iop_unlock	= xfs_efi_item_unlock,
			
 
				 	.iop_committed	= xfs_efi_item_committed,
			
 
				 	.iop_push	= xfs_efi_item_push,
			
@@ -404,19 +390,17 @@ xfs_efd_item_unpin(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Efd items have no locking, so just return success.
			
 
				+ * There isn't much you can do to push on an efd item.  It is simply stuck
			
 
				+ * waiting for the log to be flushed to disk.
			
 
				  */
			
 
				 STATIC uint
			
 
				-xfs_efd_item_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_efd_item_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				-	return XFS_ITEM_LOCKED;
			
 
				+	return XFS_ITEM_PINNED;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Efd items have no locking or pushing, so return failure
			
 
				- * so that the caller doesn't bother with us.
			
 
				- */
			
 
				 STATIC void
			
 
				 xfs_efd_item_unlock(
			
 
				 	struct xfs_log_item	*lip)
			
@@ -450,16 +434,6 @@ xfs_efd_item_committed(
 
				 	return (xfs_lsn_t)-1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * There isn't much you can do to push on an efd item.  It is simply
			
 
				- * stuck waiting for the log to be flushed to disk.
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_efd_item_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * The EFD dependency tracking op doesn't do squat.  It can't because
			
 
				  * it doesn't know where the free extent is coming from.  The dependency
			
@@ -482,7 +456,6 @@ static const struct xfs_item_ops xfs_efd_item_ops = {
 
				 	.iop_format	= xfs_efd_item_format,
			
 
				 	.iop_pin	= xfs_efd_item_pin,
			
 
				 	.iop_unpin	= xfs_efd_item_unpin,
			
 
				-	.iop_trylock	= xfs_efd_item_trylock,
			
 
				 	.iop_unlock	= xfs_efd_item_unlock,
			
 
				 	.iop_committed	= xfs_efd_item_committed,
			
 
				 	.iop_push	= xfs_efd_item_push,
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_trans.h"
			
@@ -396,114 +394,96 @@ xfs_file_splice_write(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * This routine is called to handle zeroing any space in the last
			
 
				- * block of the file that is beyond the EOF.  We do this since the
			
 
				- * size is being increased without writing anything to that block
			
 
				- * and we don't want anyone to read the garbage on the disk.
			
 
				+ * This routine is called to handle zeroing any space in the last block of the
			
 
				+ * file that is beyond the EOF.  We do this since the size is being increased
			
 
				+ * without writing anything to that block and we don't want to read the
			
 
				+ * garbage on the disk.
			
 
				  */
			
 
				 STATIC int				/* error (positive) */
			
 
				 xfs_zero_last_block(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_fsize_t	offset,
			
 
				-	xfs_fsize_t	isize)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_fsize_t		offset,
			
 
				+	xfs_fsize_t		isize)
			
 
				 {
			
 
				-	xfs_fileoff_t	last_fsb;
			
 
				-	xfs_mount_t	*mp = ip->i_mount;
			
 
				-	int		nimaps;
			
 
				-	int		zero_offset;
			
 
				-	int		zero_len;
			
 
				-	int		error = 0;
			
 
				-	xfs_bmbt_irec_t	imap;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
 
				-
			
 
				-	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
			
 
				-	if (zero_offset == 0) {
			
 
				-		/*
			
 
				-		 * There are no extra bytes in the last block on disk to
			
 
				-		 * zero, so return.
			
 
				-		 */
			
 
				-		return 0;
			
 
				-	}
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	xfs_fileoff_t		last_fsb = XFS_B_TO_FSBT(mp, isize);
			
 
				+	int			zero_offset = XFS_B_FSB_OFFSET(mp, isize);
			
 
				+	int			zero_len;
			
 
				+	int			nimaps = 1;
			
 
				+	int			error = 0;
			
 
				+	struct xfs_bmbt_irec	imap;
			
 
				 
			
 
				-	last_fsb = XFS_B_TO_FSBT(mp, isize);
			
 
				-	nimaps = 1;
			
 
				+	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 	error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				+
			
 
				 	ASSERT(nimaps > 0);
			
 
				+
			
 
				 	/*
			
 
				 	 * If the block underlying isize is just a hole, then there
			
 
				 	 * is nothing to zero.
			
 
				 	 */
			
 
				-	if (imap.br_startblock == HOLESTARTBLOCK) {
			
 
				+	if (imap.br_startblock == HOLESTARTBLOCK)
			
 
				 		return 0;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Zero the part of the last block beyond the EOF, and write it
			
 
				-	 * out sync.  We need to drop the ilock while we do this so we
			
 
				-	 * don't deadlock when the buffer cache calls back to us.
			
 
				-	 */
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	zero_len = mp->m_sb.sb_blocksize - zero_offset;
			
 
				 	if (isize + zero_len > offset)
			
 
				 		zero_len = offset - isize;
			
 
				-	error = xfs_iozero(ip, isize, zero_len);
			
 
				-
			
 
				-	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-	ASSERT(error >= 0);
			
 
				-	return error;
			
 
				+	return xfs_iozero(ip, isize, zero_len);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Zero any on disk space between the current EOF and the new,
			
 
				- * larger EOF.  This handles the normal case of zeroing the remainder
			
 
				- * of the last block in the file and the unusual case of zeroing blocks
			
 
				- * out beyond the size of the file.  This second case only happens
			
 
				- * with fixed size extents and when the system crashes before the inode
			
 
				- * size was updated but after blocks were allocated.  If fill is set,
			
 
				- * then any holes in the range are filled and zeroed.  If not, the holes
			
 
				- * are left alone as holes.
			
 
				+ * Zero any on disk space between the current EOF and the new, larger EOF.
			
 
				+ *
			
 
				+ * This handles the normal case of zeroing the remainder of the last block in
			
 
				+ * the file and the unusual case of zeroing blocks out beyond the size of the
			
 
				+ * file.  This second case only happens with fixed size extents and when the
			
 
				+ * system crashes before the inode size was updated but after blocks were
			
 
				+ * allocated.
			
 
				+ *
			
 
				+ * Expects the iolock to be held exclusive, and will take the ilock internally.
			
 
				  */
			
 
				-
			
 
				 int					/* error (positive) */
			
 
				 xfs_zero_eof(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_off_t	offset,		/* starting I/O offset */
			
 
				-	xfs_fsize_t	isize)		/* current inode size */
			
 
				+	struct xfs_inode	*ip,
			
 
				+	xfs_off_t		offset,		/* starting I/O offset */
			
 
				+	xfs_fsize_t		isize)		/* current inode size */
			
 
				 {
			
 
				-	xfs_mount_t	*mp = ip->i_mount;
			
 
				-	xfs_fileoff_t	start_zero_fsb;
			
 
				-	xfs_fileoff_t	end_zero_fsb;
			
 
				-	xfs_fileoff_t	zero_count_fsb;
			
 
				-	xfs_fileoff_t	last_fsb;
			
 
				-	xfs_fileoff_t	zero_off;
			
 
				-	xfs_fsize_t	zero_len;
			
 
				-	int		nimaps;
			
 
				-	int		error = 0;
			
 
				-	xfs_bmbt_irec_t	imap;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	xfs_fileoff_t		start_zero_fsb;
			
 
				+	xfs_fileoff_t		end_zero_fsb;
			
 
				+	xfs_fileoff_t		zero_count_fsb;
			
 
				+	xfs_fileoff_t		last_fsb;
			
 
				+	xfs_fileoff_t		zero_off;
			
 
				+	xfs_fsize_t		zero_len;
			
 
				+	int			nimaps;
			
 
				+	int			error = 0;
			
 
				+	struct xfs_bmbt_irec	imap;
			
 
				+
			
 
				+	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
			
 
				 	ASSERT(offset > isize);
			
 
				 
			
 
				 	/*
			
 
				 	 * First handle zeroing the block on which isize resides.
			
 
				+	 *
			
 
				 	 * We only zero a part of that block so it is handled specially.
			
 
				 	 */
			
 
				-	error = xfs_zero_last_block(ip, offset, isize);
			
 
				-	if (error) {
			
 
				-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
			
 
				-		return error;
			
 
				+	if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
			
 
				+		error = xfs_zero_last_block(ip, offset, isize);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Calculate the range between the new size and the old
			
 
				-	 * where blocks needing to be zeroed may exist.  To get the
			
 
				-	 * block where the last byte in the file currently resides,
			
 
				-	 * we need to subtract one from the size and truncate back
			
 
				-	 * to a block boundary.  We subtract 1 in case the size is
			
 
				-	 * exactly on a block boundary.
			
 
				+	 * Calculate the range between the new size and the old where blocks
			
 
				+	 * needing to be zeroed may exist.
			
 
				+	 *
			
 
				+	 * To get the block where the last byte in the file currently resides,
			
 
				+	 * we need to subtract one from the size and truncate back to a block
			
 
				+	 * boundary.  We subtract 1 in case the size is exactly on a block
			
 
				+	 * boundary.
			
 
				 	 */
			
 
				 	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
			
 
				 	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
			
@@ -521,23 +501,18 @@ xfs_zero_eof(
 
				 	while (start_zero_fsb <= end_zero_fsb) {
			
 
				 		nimaps = 1;
			
 
				 		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
			
 
				+
			
 
				+		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 		error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
			
 
				 					  &imap, &nimaps, 0);
			
 
				-		if (error) {
			
 
				-			ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		if (error)
			
 
				 			return error;
			
 
				-		}
			
 
				+
			
 
				 		ASSERT(nimaps > 0);
			
 
				 
			
 
				 		if (imap.br_state == XFS_EXT_UNWRITTEN ||
			
 
				 		    imap.br_startblock == HOLESTARTBLOCK) {
			
 
				-			/*
			
 
				-			 * This loop handles initializing pages that were
			
 
				-			 * partially initialized by the code below this
			
 
				-			 * loop. It basically zeroes the part of the page
			
 
				-			 * that sits on a hole and sets the page as P_HOLE
			
 
				-			 * and calls remapf if it is a mapped file.
			
 
				-			 */
			
 
				 			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
			
 
				 			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
			
 
				 			continue;
			
@@ -545,11 +520,7 @@ xfs_zero_eof(
 
				 
			
 
				 		/*
			
 
				 		 * There are blocks we need to zero.
			
 
				-		 * Drop the inode lock while we're doing the I/O.
			
 
				-		 * We'll still have the iolock to protect us.
			
 
				 		 */
			
 
				-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-
			
 
				 		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
			
 
				 		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
			
 
				 
			
@@ -557,22 +528,14 @@ xfs_zero_eof(
 
				 			zero_len = offset - zero_off;
			
 
				 
			
 
				 		error = xfs_iozero(ip, zero_off, zero_len);
			
 
				-		if (error) {
			
 
				-			goto out_lock;
			
 
				-		}
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 
			
 
				 		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
			
 
				 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
			
 
				-
			
 
				-		xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				-
			
 
				-out_lock:
			
 
				-	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-	ASSERT(error >= 0);
			
 
				-	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -593,35 +556,29 @@ xfs_file_aio_write_checks(
 
				 	struct xfs_inode	*ip = XFS_I(inode);
			
 
				 	int			error = 0;
			
 
				 
			
 
				-	xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 restart:
			
 
				 	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
			
 
				-	if (error) {
			
 
				-		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				 
			
 
				 	/*
			
 
				 	 * If the offset is beyond the size of the file, we need to zero any
			
 
				 	 * blocks that fall between the existing EOF and the start of this
			
 
				 	 * write.  If zeroing is needed and we are currently holding the
			
 
				-	 * iolock shared, we need to update it to exclusive which involves
			
 
				-	 * dropping all locks and relocking to maintain correct locking order.
			
 
				-	 * If we do this, restart the function to ensure all checks and values
			
 
				-	 * are still valid.
			
 
				+	 * iolock shared, we need to update it to exclusive which implies
			
 
				+	 * having to redo all checks before.
			
 
				 	 */
			
 
				 	if (*pos > i_size_read(inode)) {
			
 
				 		if (*iolock == XFS_IOLOCK_SHARED) {
			
 
				-			xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
			
 
				+			xfs_rw_iunlock(ip, *iolock);
			
 
				 			*iolock = XFS_IOLOCK_EXCL;
			
 
				-			xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
			
 
				+			xfs_rw_ilock(ip, *iolock);
			
 
				 			goto restart;
			
 
				 		}
			
 
				 		error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
			
 
				+		if (error)
			
 
				+			return error;
			
 
				 	}
			
 
				-	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-	if (error)
			
 
				-		return error;
			
 
				 
			
 
				 	/*
			
 
				 	 * Updating the timestamps will grab the ilock again from
			
@@ -638,7 +595,6 @@ restart:
 
				 	 * people from modifying setuid and setgid binaries.
			
 
				 	 */
			
 
				 	return file_remove_suid(file);
			
 
				-
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1007,8 +963,149 @@ xfs_vm_page_mkwrite(
 
				 	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
			
 
				 }
			
 
				 
			
 
				+STATIC loff_t
			
 
				+xfs_seek_data(
			
 
				+	struct file		*file,
			
 
				+	loff_t			start,
			
 
				+	u32			type)
			
 
				+{
			
 
				+	struct inode		*inode = file->f_mapping->host;
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_bmbt_irec	map[2];
			
 
				+	int			nmap = 2;
			
 
				+	loff_t			uninitialized_var(offset);
			
 
				+	xfs_fsize_t		isize;
			
 
				+	xfs_fileoff_t		fsbno;
			
 
				+	xfs_filblks_t		end;
			
 
				+	uint			lock;
			
 
				+	int			error;
			
 
				+
			
 
				+	lock = xfs_ilock_map_shared(ip);
			
 
				+
			
 
				+	isize = i_size_read(inode);
			
 
				+	if (start >= isize) {
			
 
				+		error = ENXIO;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	fsbno = XFS_B_TO_FSBT(mp, start);
			
 
				+
			
 
				+	/*
			
 
				+	 * Try to read extents from the first block indicated
			
 
				+	 * by fsbno to the end block of the file.
			
 
				+	 */
			
 
				+	end = XFS_B_TO_FSB(mp, isize);
			
 
				+
			
 
				+	error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
			
 
				+			       XFS_BMAPI_ENTIRE);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	/*
			
 
				+	 * Treat unwritten extent as data extent since it might
			
 
				+	 * contains dirty data in page cache.
			
 
				+	 */
			
 
				+	if (map[0].br_startblock != HOLESTARTBLOCK) {
			
 
				+		offset = max_t(loff_t, start,
			
 
				+			       XFS_FSB_TO_B(mp, map[0].br_startoff));
			
 
				+	} else {
			
 
				+		if (nmap == 1) {
			
 
				+			error = ENXIO;
			
 
				+			goto out_unlock;
			
 
				+		}
			
 
				+
			
 
				+		offset = max_t(loff_t, start,
			
 
				+			       XFS_FSB_TO_B(mp, map[1].br_startoff));
			
 
				+	}
			
 
				+
			
 
				+	if (offset != file->f_pos)
			
 
				+		file->f_pos = offset;
			
 
				+
			
 
				+out_unlock:
			
 
				+	xfs_iunlock_map_shared(ip, lock);
			
 
				+
			
 
				+	if (error)
			
 
				+		return -error;
			
 
				+	return offset;
			
 
				+}
			
 
				+
			
 
				+STATIC loff_t
			
 
				+xfs_seek_hole(
			
 
				+	struct file		*file,
			
 
				+	loff_t			start,
			
 
				+	u32			type)
			
 
				+{
			
 
				+	struct inode		*inode = file->f_mapping->host;
			
 
				+	struct xfs_inode	*ip = XFS_I(inode);
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	loff_t			uninitialized_var(offset);
			
 
				+	loff_t			holeoff;
			
 
				+	xfs_fsize_t		isize;
			
 
				+	xfs_fileoff_t		fsbno;
			
 
				+	uint			lock;
			
 
				+	int			error;
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return -XFS_ERROR(EIO);
			
 
				+
			
 
				+	lock = xfs_ilock_map_shared(ip);
			
 
				+
			
 
				+	isize = i_size_read(inode);
			
 
				+	if (start >= isize) {
			
 
				+		error = ENXIO;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	fsbno = XFS_B_TO_FSBT(mp, start);
			
 
				+	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				+
			
 
				+	holeoff = XFS_FSB_TO_B(mp, fsbno);
			
 
				+	if (holeoff <= start)
			
 
				+		offset = start;
			
 
				+	else {
			
 
				+		/*
			
 
				+		 * xfs_bmap_first_unused() could return a value bigger than
			
 
				+		 * isize if there are no more holes past the supplied offset.
			
 
				+		 */
			
 
				+		offset = min_t(loff_t, holeoff, isize);
			
 
				+	}
			
 
				+
			
 
				+	if (offset != file->f_pos)
			
 
				+		file->f_pos = offset;
			
 
				+
			
 
				+out_unlock:
			
 
				+	xfs_iunlock_map_shared(ip, lock);
			
 
				+
			
 
				+	if (error)
			
 
				+		return -error;
			
 
				+	return offset;
			
 
				+}
			
 
				+
			
 
				+STATIC loff_t
			
 
				+xfs_file_llseek(
			
 
				+	struct file	*file,
			
 
				+	loff_t		offset,
			
 
				+	int		origin)
			
 
				+{
			
 
				+	switch (origin) {
			
 
				+	case SEEK_END:
			
 
				+	case SEEK_CUR:
			
 
				+	case SEEK_SET:
			
 
				+		return generic_file_llseek(file, offset, origin);
			
 
				+	case SEEK_DATA:
			
 
				+		return xfs_seek_data(file, offset, origin);
			
 
				+	case SEEK_HOLE:
			
 
				+		return xfs_seek_hole(file, offset, origin);
			
 
				+	default:
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 const struct file_operations xfs_file_operations = {
			
 
				-	.llseek		= generic_file_llseek,
			
 
				+	.llseek		= xfs_file_llseek,
			
 
				 	.read		= do_sync_read,
			
 
				 	.write		= do_sync_write,
			
 
				 	.aio_read	= xfs_file_aio_read,
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -18,8 +18,6 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
@@ -39,7 +37,6 @@
 
				 #include "xfs_itable.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_rtalloc.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -147,9 +144,9 @@ xfs_growfs_data_private(
 
				 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
			
 
				 		return error;
			
 
				 	dpct = pct - mp->m_sb.sb_imax_pct;
			
 
				-	bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
			
 
				+	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
			
 
				-				BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
			
 
				+				XFS_FSS_TO_BB(mp, 1), 0);
			
 
				 	if (!bp)
			
 
				 		return EIO;
			
 
				 	xfs_buf_relse(bp);
			
@@ -193,7 +190,7 @@ xfs_growfs_data_private(
 
				 		 */
			
 
				 		bp = xfs_buf_get(mp->m_ddev_targp,
			
 
				 				 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
			
 
				-				 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
			
 
				+				 XFS_FSS_TO_BB(mp, 1), 0);
			
 
				 		if (!bp) {
			
 
				 			error = ENOMEM;
			
 
				 			goto error0;
			
@@ -230,7 +227,7 @@ xfs_growfs_data_private(
 
				 		 */
			
 
				 		bp = xfs_buf_get(mp->m_ddev_targp,
			
 
				 				 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
			
 
				-				 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
			
 
				+				 XFS_FSS_TO_BB(mp, 1), 0);
			
 
				 		if (!bp) {
			
 
				 			error = ENOMEM;
			
 
				 			goto error0;
			
@@ -259,8 +256,7 @@ xfs_growfs_data_private(
 
				 		 */
			
 
				 		bp = xfs_buf_get(mp->m_ddev_targp,
			
 
				 				 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
			
 
				-				 BTOBB(mp->m_sb.sb_blocksize),
			
 
				-				 XBF_LOCK | XBF_MAPPED);
			
 
				+				 BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				 		if (!bp) {
			
 
				 			error = ENOMEM;
			
 
				 			goto error0;
			
@@ -286,8 +282,7 @@ xfs_growfs_data_private(
 
				 		 */
			
 
				 		bp = xfs_buf_get(mp->m_ddev_targp,
			
 
				 				 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
			
 
				-				 BTOBB(mp->m_sb.sb_blocksize),
			
 
				-				 XBF_LOCK | XBF_MAPPED);
			
 
				+				 BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				 		if (!bp) {
			
 
				 			error = ENOMEM;
			
 
				 			goto error0;
			
@@ -314,8 +309,7 @@ xfs_growfs_data_private(
 
				 		 */
			
 
				 		bp = xfs_buf_get(mp->m_ddev_targp,
			
 
				 				 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
			
 
				-				 BTOBB(mp->m_sb.sb_blocksize),
			
 
				-				 XBF_LOCK | XBF_MAPPED);
			
 
				+				 BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				 		if (!bp) {
			
 
				 			error = ENOMEM;
			
 
				 			goto error0;
			
@@ -405,7 +399,7 @@ xfs_growfs_data_private(
 
				 
			
 
				 	/* update secondary superblocks. */
			
 
				 	for (agno = 1; agno < nagcount; agno++) {
			
 
				-		error = xfs_read_buf(mp, mp->m_ddev_targp,
			
 
				+		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
			
 
				 				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
			
 
				 				  XFS_FSS_TO_BB(mp, 1), 0, &bp);
			
 
				 		if (error) {
			
@@ -693,3 +687,63 @@ xfs_fs_goingdown(
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Force a shutdown of the filesystem instantly while keeping the filesystem
			
 
				+ * consistent. We don't do an unmount here; just shutdown the shop, make sure
			
 
				+ * that absolutely nothing persistent happens to this filesystem after this
			
 
				+ * point.
			
 
				+ */
			
 
				+void
			
 
				+xfs_do_force_shutdown(
			
 
				+	xfs_mount_t	*mp,
			
 
				+	int		flags,
			
 
				+	char		*fname,
			
 
				+	int		lnnum)
			
 
				+{
			
 
				+	int		logerror;
			
 
				+
			
 
				+	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
			
 
				+
			
 
				+	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				+		xfs_notice(mp,
			
 
				+	"%s(0x%x) called from line %d of file %s.  Return address = 0x%p",
			
 
				+			__func__, flags, lnnum, fname, __return_address);
			
 
				+	}
			
 
				+	/*
			
 
				+	 * No need to duplicate efforts.
			
 
				+	 */
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
			
 
				+	 * queue up anybody new on the log reservations, and wakes up
			
 
				+	 * everybody who's sleeping on log reservations to tell them
			
 
				+	 * the bad news.
			
 
				+	 */
			
 
				+	if (xfs_log_force_umount(mp, logerror))
			
 
				+		return;
			
 
				+
			
 
				+	if (flags & SHUTDOWN_CORRUPT_INCORE) {
			
 
				+		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
			
 
				+    "Corruption of in-memory data detected.  Shutting down filesystem");
			
 
				+		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
			
 
				+			xfs_stack_trace();
			
 
				+	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				+		if (logerror) {
			
 
				+			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
			
 
				+		"Log I/O Error Detected.  Shutting down filesystem");
			
 
				+		} else if (flags & SHUTDOWN_DEVICE_REQ) {
			
 
				+			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
			
 
				+		"All device paths lost.  Shutting down filesystem");
			
 
				+		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
			
 
				+			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
			
 
				+		"I/O Error Detected. Shutting down filesystem");
			
 
				+		}
			
 
				+	}
			
 
				+	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				+		xfs_alert(mp,
			
 
				+	"Please umount the filesystem and rectify the problem(s)");
			
 
				+	}
			
 
				+}
			
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -200,8 +200,7 @@ xfs_ialloc_inode_init(
 
				 		 */
			
 
				 		d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
			
 
				 		fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
			
 
				-					 mp->m_bsize * blks_per_cluster,
			
 
				-					 XBF_LOCK);
			
 
				+					 mp->m_bsize * blks_per_cluster, 0);
			
 
				 		if (!fbuf)
			
 
				 			return ENOMEM;
			
 
				 		/*
			
@@ -610,6 +609,13 @@ xfs_ialloc_get_rec(
 
				 /*
			
 
				  * Visible inode allocation functions.
			
 
				  */
			
 
				+/*
			
 
				+ * Find a free (set) bit in the inode bitmask.
			
 
				+ */
			
 
				+static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
			
 
				+{
			
 
				+	return xfs_lowbit64(*fp);
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Allocate an inode on disk.
			
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -46,15 +46,6 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
 
				 		(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Find a free (set) bit in the inode bitmask.
			
 
				- */
			
 
				-static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
			
 
				-{
			
 
				-	return xfs_lowbit64(*fp);
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /*
			
 
				  * Allocate an inode on disk.
			
 
				  * Mode is used to tell whether the new inode will need space, and whether
			
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_acl.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
@@ -123,23 +122,7 @@ xfs_inode_free(
 
				 		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
			
 
				 
			
 
				 	if (ip->i_itemp) {
			
 
				-		/*
			
 
				-		 * Only if we are shutting down the fs will we see an
			
 
				-		 * inode still in the AIL. If it is there, we should remove
			
 
				-		 * it to prevent a use-after-free from occurring.
			
 
				-		 */
			
 
				-		xfs_log_item_t	*lip = &ip->i_itemp->ili_item;
			
 
				-		struct xfs_ail	*ailp = lip->li_ailp;
			
 
				-
			
 
				-		ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
			
 
				-				       XFS_FORCED_SHUTDOWN(ip->i_mount));
			
 
				-		if (lip->li_flags & XFS_LI_IN_AIL) {
			
 
				-			spin_lock(&ailp->xa_lock);
			
 
				-			if (lip->li_flags & XFS_LI_IN_AIL)
			
 
				-				xfs_trans_ail_delete(ailp, lip);
			
 
				-			else
			
 
				-				spin_unlock(&ailp->xa_lock);
			
 
				-		}
			
 
				+		ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
			
 
				 		xfs_inode_item_destroy(ip);
			
 
				 		ip->i_itemp = NULL;
			
 
				 	}
			
@@ -334,9 +317,10 @@ xfs_iget_cache_miss(
 
				 	/*
			
 
				 	 * Preload the radix tree so we can insert safely under the
			
 
				 	 * write spinlock. Note that we cannot sleep inside the preload
			
 
				-	 * region.
			
 
				+	 * region. Since we can be called from transaction context, don't
			
 
				+	 * recurse into the file system.
			
 
				 	 */
			
 
				-	if (radix_tree_preload(GFP_KERNEL)) {
			
 
				+	if (radix_tree_preload(GFP_NOFS)) {
			
 
				 		error = EAGAIN;
			
 
				 		goto out_destroy;
			
 
				 	}
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -20,7 +20,6 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
@@ -61,6 +60,20 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
 
				 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
			
 
				 
			
 
				+/*
			
 
				+ * helper function to extract extent size hint from inode
			
 
				+ */
			
 
				+xfs_extlen_t
			
 
				+xfs_get_extsz_hint(
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				+	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
			
 
				+		return ip->i_d.di_extsize;
			
 
				+	if (XFS_IS_REALTIME_INODE(ip))
			
 
				+		return ip->i_mount->m_sb.sb_rextsize;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 #ifdef DEBUG
			
 
				 /*
			
 
				  * Make sure that the extents in the given memory buffer
			
@@ -137,6 +150,7 @@ xfs_imap_to_bp(
 
				 	int		ni;
			
 
				 	xfs_buf_t	*bp;
			
 
				 
			
 
				+	buf_flags |= XBF_UNMAPPED;
			
 
				 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
			
 
				 				   (int)imap->im_len, buf_flags, &bp);
			
 
				 	if (error) {
			
@@ -226,7 +240,7 @@ xfs_inotobp(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags);
			
 
				+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -782,8 +796,7 @@ xfs_iread(
 
				 	/*
			
 
				 	 * Get pointers to the on-disk inode and the buffer containing it.
			
 
				 	 */
			
 
				-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
			
 
				-			       XBF_LOCK, iget_flags);
			
 
				+	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
			
@@ -1342,7 +1355,7 @@ xfs_iunlink(
 
				 		 * Here we put the head pointer into our next pointer,
			
 
				 		 * and then we fall through to point the head at us.
			
 
				 		 */
			
 
				-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
			
 
				+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 
			
@@ -1423,7 +1436,7 @@ xfs_iunlink_remove(
 
				 		 * of dealing with the buffer when there is no need to
			
 
				 		 * change it.
			
 
				 		 */
			
 
				-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
			
 
				+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
			
 
				 		if (error) {
			
 
				 			xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
			
 
				 				__func__, error);
			
@@ -1484,7 +1497,7 @@ xfs_iunlink_remove(
 
				 		 * Now last_ibp points to the buffer previous to us on
			
 
				 		 * the unlinked list.  Pull us from the list.
			
 
				 		 */
			
 
				-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
			
 
				+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
			
 
				 		if (error) {
			
 
				 			xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
			
 
				 				__func__, error);
			
@@ -1566,8 +1579,7 @@ xfs_ifree_cluster(
 
				 		 * to mark all the active inodes on the buffer stale.
			
 
				 		 */
			
 
				 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
			
 
				-					mp->m_bsize * blks_per_cluster,
			
 
				-					XBF_LOCK);
			
 
				+					mp->m_bsize * blks_per_cluster, 0);
			
 
				 
			
 
				 		if (!bp)
			
 
				 			return ENOMEM;
			
@@ -1737,7 +1749,7 @@ xfs_ifree(
 
				 
			
 
				 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				 
			
 
				-	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK);
			
 
				+	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -2347,11 +2359,11 @@ cluster_corrupt_out:
 
				 	 */
			
 
				 	rcu_read_unlock();
			
 
				 	/*
			
 
				-	 * Clean up the buffer.  If it was B_DELWRI, just release it --
			
 
				+	 * Clean up the buffer.  If it was delwri, just release it --
			
 
				 	 * brelse can handle it with no problems.  If not, shut down the
			
 
				 	 * filesystem before releasing the buffer.
			
 
				 	 */
			
 
				-	bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
			
 
				+	bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
			
 
				 	if (bufwasdelwri)
			
 
				 		xfs_buf_relse(bp);
			
 
				 
			
@@ -2377,30 +2389,29 @@ cluster_corrupt_out:
 
				 	/*
			
 
				 	 * Unlocks the flush lock
			
 
				 	 */
			
 
				-	xfs_iflush_abort(iq);
			
 
				+	xfs_iflush_abort(iq, false);
			
 
				 	kmem_free(ilist);
			
 
				 	xfs_perag_put(pag);
			
 
				 	return XFS_ERROR(EFSCORRUPTED);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * xfs_iflush() will write a modified inode's changes out to the
			
 
				- * inode's on disk home.  The caller must have the inode lock held
			
 
				- * in at least shared mode and the inode flush completion must be
			
 
				- * active as well.  The inode lock will still be held upon return from
			
 
				- * the call and the caller is free to unlock it.
			
 
				- * The inode flush will be completed when the inode reaches the disk.
			
 
				- * The flags indicate how the inode's buffer should be written out.
			
 
				+ * Flush dirty inode metadata into the backing buffer.
			
 
				+ *
			
 
				+ * The caller must have the inode lock and the inode flush lock held.  The
			
 
				+ * inode lock will still be held upon return to the caller, and the inode
			
 
				+ * flush lock will be released after the inode has reached the disk.
			
 
				+ *
			
 
				+ * The caller must write out the buffer returned in *bpp and release it.
			
 
				  */
			
 
				 int
			
 
				 xfs_iflush(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	uint			flags)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_buf		**bpp)
			
 
				 {
			
 
				-	xfs_inode_log_item_t	*iip;
			
 
				-	xfs_buf_t		*bp;
			
 
				-	xfs_dinode_t		*dip;
			
 
				-	xfs_mount_t		*mp;
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	struct xfs_dinode	*dip;
			
 
				 	int			error;
			
 
				 
			
 
				 	XFS_STATS_INC(xs_iflush_count);
			
@@ -2410,25 +2421,8 @@ xfs_iflush(
 
				 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
			
 
				 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
			
 
				 
			
 
				-	iip = ip->i_itemp;
			
 
				-	mp = ip->i_mount;
			
 
				+	*bpp = NULL;
			
 
				 
			
 
				-	/*
			
 
				-	 * We can't flush the inode until it is unpinned, so wait for it if we
			
 
				-	 * are allowed to block.  We know no one new can pin it, because we are
			
 
				-	 * holding the inode lock shared and you need to hold it exclusively to
			
 
				-	 * pin the inode.
			
 
				-	 *
			
 
				-	 * If we are not allowed to block, force the log out asynchronously so
			
 
				-	 * that when we come back the inode will be unpinned. If other inodes
			
 
				-	 * in the same cluster are dirty, they will probably write the inode
			
 
				-	 * out for us if they occur after the log force completes.
			
 
				-	 */
			
 
				-	if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
			
 
				-		xfs_iunpin(ip);
			
 
				-		xfs_ifunlock(ip);
			
 
				-		return EAGAIN;
			
 
				-	}
			
 
				 	xfs_iunpin_wait(ip);
			
 
				 
			
 
				 	/*
			
@@ -2447,20 +2441,20 @@ xfs_iflush(
 
				 	/*
			
 
				 	 * This may have been unpinned because the filesystem is shutting
			
 
				 	 * down forcibly. If that's the case we must not write this inode
			
 
				-	 * to disk, because the log record didn't make it to disk!
			
 
				+	 * to disk, because the log record didn't make it to disk.
			
 
				+	 *
			
 
				+	 * We also have to remove the log item from the AIL in this case,
			
 
				+	 * as we wait for an empty AIL as part of the unmount process.
			
 
				 	 */
			
 
				 	if (XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-		if (iip)
			
 
				-			iip->ili_fields = 0;
			
 
				-		xfs_ifunlock(ip);
			
 
				-		return XFS_ERROR(EIO);
			
 
				+		error = XFS_ERROR(EIO);
			
 
				+		goto abort_out;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Get the buffer containing the on-disk inode.
			
 
				 	 */
			
 
				-	error = xfs_itobp(mp, NULL, ip, &dip, &bp,
			
 
				-				(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
			
 
				+	error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
			
 
				 	if (error || !bp) {
			
 
				 		xfs_ifunlock(ip);
			
 
				 		return error;
			
@@ -2488,23 +2482,20 @@ xfs_iflush(
 
				 	if (error)
			
 
				 		goto cluster_corrupt_out;
			
 
				 
			
 
				-	if (flags & SYNC_WAIT)
			
 
				-		error = xfs_bwrite(bp);
			
 
				-	else
			
 
				-		xfs_buf_delwri_queue(bp);
			
 
				-
			
 
				-	xfs_buf_relse(bp);
			
 
				-	return error;
			
 
				+	*bpp = bp;
			
 
				+	return 0;
			
 
				 
			
 
				 corrupt_out:
			
 
				 	xfs_buf_relse(bp);
			
 
				 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
			
 
				 cluster_corrupt_out:
			
 
				+	error = XFS_ERROR(EFSCORRUPTED);
			
 
				+abort_out:
			
 
				 	/*
			
 
				 	 * Unlocks the flush lock
			
 
				 	 */
			
 
				-	xfs_iflush_abort(ip);
			
 
				-	return XFS_ERROR(EFSCORRUPTED);
			
 
				+	xfs_iflush_abort(ip, false);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -2706,27 +2697,6 @@ corrupt_out:
 
				 	return XFS_ERROR(EFSCORRUPTED);
			
 
				 }
			
 
				 
			
 
				-void
			
 
				-xfs_promote_inode(
			
 
				-	struct xfs_inode	*ip)
			
 
				-{
			
 
				-	struct xfs_buf		*bp;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
			
 
				-
			
 
				-	bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
			
 
				-			ip->i_imap.im_len, XBF_TRYLOCK);
			
 
				-	if (!bp)
			
 
				-		return;
			
 
				-
			
 
				-	if (XFS_BUF_ISDELAYWRITE(bp)) {
			
 
				-		xfs_buf_delwri_promote(bp);
			
 
				-		wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
			
 
				-	}
			
 
				-
			
 
				-	xfs_buf_relse(bp);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Return a pointer to the extent record at file index idx.
			
 
				  */
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -529,11 +529,12 @@ int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 
				 
			
 
				 void		xfs_iext_realloc(xfs_inode_t *, int, int);
			
 
				 void		xfs_iunpin_wait(xfs_inode_t *);
			
 
				-int		xfs_iflush(xfs_inode_t *, uint);
			
 
				-void		xfs_promote_inode(struct xfs_inode *);
			
 
				+int		xfs_iflush(struct xfs_inode *, struct xfs_buf **);
			
 
				 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
			
 
				 void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
			
 
				 
			
 
				+xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
			
 
				+
			
 
				 #define IHOLD(ip) \
			
 
				 do { \
			
 
				 	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -480,25 +478,16 @@ xfs_inode_item_unpin(
 
				 		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This is called to attempt to lock the inode associated with this
			
 
				- * inode log item, in preparation for the push routine which does the actual
			
 
				- * iflush.  Don't sleep on the inode lock or the flush lock.
			
 
				- *
			
 
				- * If the flush lock is already held, indicating that the inode has
			
 
				- * been or is in the process of being flushed, then (ideally) we'd like to
			
 
				- * see if the inode's buffer is still incore, and if so give it a nudge.
			
 
				- * We delay doing so until the pushbuf routine, though, to avoid holding
			
 
				- * the AIL lock across a call to the blackhole which is the buffer cache.
			
 
				- * Also we don't want to sleep in any device strategy routines, which can happen
			
 
				- * if we do the subsequent bawrite in here.
			
 
				- */
			
 
				 STATIC uint
			
 
				-xfs_inode_item_trylock(
			
 
				-	struct xfs_log_item	*lip)
			
 
				+xfs_inode_item_push(
			
 
				+	struct xfs_log_item	*lip,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
			
 
				 	struct xfs_inode	*ip = iip->ili_inode;
			
 
				+	struct xfs_buf		*bp = NULL;
			
 
				+	uint			rval = XFS_ITEM_SUCCESS;
			
 
				+	int			error;
			
 
				 
			
 
				 	if (xfs_ipincount(ip) > 0)
			
 
				 		return XFS_ITEM_PINNED;
			
@@ -506,30 +495,50 @@ xfs_inode_item_trylock(
 
				 	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
			
 
				 		return XFS_ITEM_LOCKED;
			
 
				 
			
 
				+	/*
			
 
				+	 * Re-check the pincount now that we stabilized the value by
			
 
				+	 * taking the ilock.
			
 
				+	 */
			
 
				+	if (xfs_ipincount(ip) > 0) {
			
 
				+		rval = XFS_ITEM_PINNED;
			
 
				+		goto out_unlock;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Someone else is already flushing the inode.  Nothing we can do
			
 
				+	 * here but wait for the flush to finish and remove the item from
			
 
				+	 * the AIL.
			
 
				+	 */
			
 
				 	if (!xfs_iflock_nowait(ip)) {
			
 
				-		/*
			
 
				-		 * inode has already been flushed to the backing buffer,
			
 
				-		 * leave it locked in shared mode, pushbuf routine will
			
 
				-		 * unlock it.
			
 
				-		 */
			
 
				-		return XFS_ITEM_PUSHBUF;
			
 
				+		rval = XFS_ITEM_FLUSHING;
			
 
				+		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	/* Stale items should force out the iclog */
			
 
				+	/*
			
 
				+	 * Stale inode items should force out the iclog.
			
 
				+	 */
			
 
				 	if (ip->i_flags & XFS_ISTALE) {
			
 
				 		xfs_ifunlock(ip);
			
 
				 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				 		return XFS_ITEM_PINNED;
			
 
				 	}
			
 
				 
			
 
				-#ifdef DEBUG
			
 
				-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				-		ASSERT(iip->ili_fields != 0);
			
 
				-		ASSERT(iip->ili_logged == 0);
			
 
				-		ASSERT(lip->li_flags & XFS_LI_IN_AIL);
			
 
				+	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
			
 
				+	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
			
 
				+
			
 
				+	spin_unlock(&lip->li_ailp->xa_lock);
			
 
				+
			
 
				+	error = xfs_iflush(ip, &bp);
			
 
				+	if (!error) {
			
 
				+		if (!xfs_buf_delwri_queue(bp, buffer_list))
			
 
				+			rval = XFS_ITEM_FLUSHING;
			
 
				+		xfs_buf_relse(bp);
			
 
				 	}
			
 
				-#endif
			
 
				-	return XFS_ITEM_SUCCESS;
			
 
				+
			
 
				+	spin_lock(&lip->li_ailp->xa_lock);
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				+	return rval;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -613,86 +622,6 @@ xfs_inode_item_committed(
 
				 	return lsn;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
			
 
				- * failed to get the inode flush lock but did get the inode locked SHARED.
			
 
				- * Here we're trying to see if the inode buffer is incore, and if so whether it's
			
 
				- * marked delayed write. If that's the case, we'll promote it and that will
			
 
				- * allow the caller to write the buffer by triggering the xfsbufd to run.
			
 
				- */
			
 
				-STATIC bool
			
 
				-xfs_inode_item_pushbuf(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
			
 
				-	struct xfs_inode	*ip = iip->ili_inode;
			
 
				-	struct xfs_buf		*bp;
			
 
				-	bool			ret = true;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
			
 
				-
			
 
				-	/*
			
 
				-	 * If a flush is not in progress anymore, chances are that the
			
 
				-	 * inode was taken off the AIL. So, just get out.
			
 
				-	 */
			
 
				-	if (!xfs_isiflocked(ip) ||
			
 
				-	    !(lip->li_flags & XFS_LI_IN_AIL)) {
			
 
				-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				-		return true;
			
 
				-	}
			
 
				-
			
 
				-	bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno,
			
 
				-			iip->ili_format.ilf_len, XBF_TRYLOCK);
			
 
				-
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				-	if (!bp)
			
 
				-		return true;
			
 
				-	if (XFS_BUF_ISDELAYWRITE(bp))
			
 
				-		xfs_buf_delwri_promote(bp);
			
 
				-	if (xfs_buf_ispinned(bp))
			
 
				-		ret = false;
			
 
				-	xfs_buf_relse(bp);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This is called to asynchronously write the inode associated with this
			
 
				- * inode log item out to disk. The inode will already have been locked by
			
 
				- * a successful call to xfs_inode_item_trylock().
			
 
				- */
			
 
				-STATIC void
			
 
				-xfs_inode_item_push(
			
 
				-	struct xfs_log_item	*lip)
			
 
				-{
			
 
				-	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
			
 
				-	struct xfs_inode	*ip = iip->ili_inode;
			
 
				-
			
 
				-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
			
 
				-	ASSERT(xfs_isiflocked(ip));
			
 
				-
			
 
				-	/*
			
 
				-	 * Since we were able to lock the inode's flush lock and
			
 
				-	 * we found it on the AIL, the inode must be dirty.  This
			
 
				-	 * is because the inode is removed from the AIL while still
			
 
				-	 * holding the flush lock in xfs_iflush_done().  Thus, if
			
 
				-	 * we found it in the AIL and were able to obtain the flush
			
 
				-	 * lock without sleeping, then there must not have been
			
 
				-	 * anyone in the process of flushing the inode.
			
 
				-	 */
			
 
				-	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * Push the inode to it's backing buffer. This will not remove the
			
 
				-	 * inode from the AIL - a further push will be required to trigger a
			
 
				-	 * buffer push. However, this allows all the dirty inodes to be pushed
			
 
				-	 * to the buffer before it is pushed to disk. The buffer IO completion
			
 
				-	 * will pull the inode from the AIL, mark it clean and unlock the flush
			
 
				-	 * lock.
			
 
				-	 */
			
 
				-	(void) xfs_iflush(ip, SYNC_TRYLOCK);
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * XXX rcc - this one really has to do something.  Probably needs
			
 
				  * to stamp in a new field in the incore inode.
			
@@ -713,11 +642,9 @@ static const struct xfs_item_ops xfs_inode_item_ops = {
 
				 	.iop_format	= xfs_inode_item_format,
			
 
				 	.iop_pin	= xfs_inode_item_pin,
			
 
				 	.iop_unpin	= xfs_inode_item_unpin,
			
 
				-	.iop_trylock	= xfs_inode_item_trylock,
			
 
				 	.iop_unlock	= xfs_inode_item_unlock,
			
 
				 	.iop_committed	= xfs_inode_item_committed,
			
 
				 	.iop_push	= xfs_inode_item_push,
			
 
				-	.iop_pushbuf	= xfs_inode_item_pushbuf,
			
 
				 	.iop_committing = xfs_inode_item_committing
			
 
				 };
			
 
				 
			
@@ -848,7 +775,8 @@ xfs_iflush_done(
 
				 			ASSERT(i <= need_ail);
			
 
				 		}
			
 
				 		/* xfs_trans_ail_delete_bulk() drops the AIL lock. */
			
 
				-		xfs_trans_ail_delete_bulk(ailp, log_items, i);
			
 
				+		xfs_trans_ail_delete_bulk(ailp, log_items, i,
			
 
				+					  SHUTDOWN_CORRUPT_INCORE);
			
 
				 	}
			
 
				 
			
 
				 
			
@@ -869,16 +797,15 @@ xfs_iflush_done(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * This is the inode flushing abort routine.  It is called
			
 
				- * from xfs_iflush when the filesystem is shutting down to clean
			
 
				- * up the inode state.
			
 
				- * It is responsible for removing the inode item
			
 
				- * from the AIL if it has not been re-logged, and unlocking the inode's
			
 
				- * flush lock.
			
 
				+ * This is the inode flushing abort routine.  It is called from xfs_iflush when
			
 
				+ * the filesystem is shutting down to clean up the inode state.  It is
			
 
				+ * responsible for removing the inode item from the AIL if it has not been
			
 
				+ * re-logged, and unlocking the inode's flush lock.
			
 
				  */
			
 
				 void
			
 
				 xfs_iflush_abort(
			
 
				-	xfs_inode_t		*ip)
			
 
				+	xfs_inode_t		*ip,
			
 
				+	bool			stale)
			
 
				 {
			
 
				 	xfs_inode_log_item_t	*iip = ip->i_itemp;
			
 
				 
			
@@ -888,7 +815,10 @@ xfs_iflush_abort(
 
				 			spin_lock(&ailp->xa_lock);
			
 
				 			if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
			
 
				 				/* xfs_trans_ail_delete() drops the AIL lock. */
			
 
				-				xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
			
 
				+				xfs_trans_ail_delete(ailp, &iip->ili_item,
			
 
				+						stale ?
			
 
				+						     SHUTDOWN_LOG_IO_ERROR :
			
 
				+						     SHUTDOWN_CORRUPT_INCORE);
			
 
				 			} else
			
 
				 				spin_unlock(&ailp->xa_lock);
			
 
				 		}
			
@@ -915,7 +845,7 @@ xfs_istale_done(
 
				 	struct xfs_buf		*bp,
			
 
				 	struct xfs_log_item	*lip)
			
 
				 {
			
 
				-	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode);
			
 
				+	xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -165,7 +165,7 @@ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
 
				 extern void xfs_inode_item_destroy(struct xfs_inode *);
			
 
				 extern void xfs_iflush_done(struct xfs_buf *, struct xfs_log_item *);
			
 
				 extern void xfs_istale_done(struct xfs_buf *, struct xfs_log_item *);
			
 
				-extern void xfs_iflush_abort(struct xfs_inode *);
			
 
				+extern void xfs_iflush_abort(struct xfs_inode *, bool);
			
 
				 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
			
 
				 					 xfs_inode_log_format_t *);
			
 
				 
			
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -26,11 +26,6 @@
 
				  * high agno_log-agblklog-inopblog bits - 0
			
 
				  */
			
 
				 
			
 
				-typedef	__uint32_t	xfs_agino_t;	/* within allocation grp inode number */
			
 
				-
			
 
				-#define	NULLFSINO	((xfs_ino_t)-1)
			
 
				-#define	NULLAGINO	((xfs_agino_t)-1)
			
 
				-
			
 
				 struct xfs_mount;
			
 
				 
			
 
				 #define	XFS_INO_MASK(k)			(__uint32_t)((1ULL << (k)) - 1)
			
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -22,9 +22,7 @@
 
				 #include <asm/uaccess.h>
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -37,7 +35,6 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_itable.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_trans_space.h"
			
@@ -142,11 +139,7 @@ xfs_iomap_write_direct(
 
				 	int		committed;
			
 
				 	int		error;
			
 
				 
			
 
				-	/*
			
 
				-	 * Make sure that the dquots are there. This doesn't hold
			
 
				-	 * the ilock across a disk read.
			
 
				-	 */
			
 
				-	error = xfs_qm_dqattach_locked(ip, 0);
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				 	if (error)
			
 
				 		return XFS_ERROR(error);
			
 
				 
			
@@ -158,7 +151,7 @@ xfs_iomap_write_direct(
 
				 	if ((offset + count) > XFS_ISIZE(ip)) {
			
 
				 		error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
			
 
				 		if (error)
			
 
				-			goto error_out;
			
 
				+			return XFS_ERROR(error);
			
 
				 	} else {
			
 
				 		if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
			
 
				 			last_fsb = MIN(last_fsb, (xfs_fileoff_t)
			
@@ -190,7 +183,6 @@ xfs_iomap_write_direct(
 
				 	/*
			
 
				 	 * Allocate and setup the transaction
			
 
				 	 */
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
			
 
				 	error = xfs_trans_reserve(tp, resblks,
			
 
				 			XFS_WRITE_LOG_RES(mp), resrtextents,
			
@@ -199,15 +191,16 @@ xfs_iomap_write_direct(
 
				 	/*
			
 
				 	 * Check for running out of space, note: need lock to return
			
 
				 	 */
			
 
				-	if (error)
			
 
				+	if (error) {
			
 
				 		xfs_trans_cancel(tp, 0);
			
 
				+		return XFS_ERROR(error);
			
 
				+	}
			
 
				+
			
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				-	if (error)
			
 
				-		goto error_out;
			
 
				 
			
 
				 	error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
			
 
				 	if (error)
			
 
				-		goto error1;
			
 
				+		goto out_trans_cancel;
			
 
				 
			
 
				 	xfs_trans_ijoin(tp, ip, 0);
			
 
				 
			
@@ -224,42 +217,39 @@ xfs_iomap_write_direct(
 
				 	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
			
 
				 				&firstfsb, 0, imap, &nimaps, &free_list);
			
 
				 	if (error)
			
 
				-		goto error0;
			
 
				+		goto out_bmap_cancel;
			
 
				 
			
 
				 	/*
			
 
				 	 * Complete the transaction
			
 
				 	 */
			
 
				 	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				 	if (error)
			
 
				-		goto error0;
			
 
				+		goto out_bmap_cancel;
			
 
				 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				 	if (error)
			
 
				-		goto error_out;
			
 
				+		goto out_unlock;
			
 
				 
			
 
				 	/*
			
 
				 	 * Copy any maps to caller's array and return any error.
			
 
				 	 */
			
 
				 	if (nimaps == 0) {
			
 
				-		error = ENOSPC;
			
 
				-		goto error_out;
			
 
				+		error = XFS_ERROR(ENOSPC);
			
 
				+		goto out_unlock;
			
 
				 	}
			
 
				 
			
 
				-	if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
			
 
				+	if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
			
 
				 		error = xfs_alert_fsblock_zero(ip, imap);
			
 
				-		goto error_out;
			
 
				-	}
			
 
				 
			
 
				-	return 0;
			
 
				+out_unlock:
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	return error;
			
 
				 
			
 
				-error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
			
 
				+out_bmap_cancel:
			
 
				 	xfs_bmap_cancel(&free_list);
			
 
				-	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
			
 
				-
			
 
				-error1:	/* Just cancel transaction */
			
 
				+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
			
 
				+out_trans_cancel:
			
 
				 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
			
 
				-
			
 
				-error_out:
			
 
				-	return XFS_ERROR(error);
			
 
				+	goto out_unlock;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -422,6 +412,15 @@ retry:
 
				 			return error;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Make sure preallocation does not create extents beyond the range we
			
 
				+	 * actually support in this filesystem.
			
 
				+	 */
			
 
				+	if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset))
			
 
				+		last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset);
			
 
				+
			
 
				+	ASSERT(last_fsb > offset_fsb);
			
 
				+
			
 
				 	nimaps = XFS_WRITE_IMAPS;
			
 
				 	error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb,
			
 
				 				imap, &nimaps, XFS_BMAPI_ENTIRE);
			
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_acl.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -34,7 +32,6 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_itable.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_utils.h"
			
@@ -700,7 +697,7 @@ xfs_setattr_size(
 
				 	xfs_off_t		oldsize, newsize;
			
 
				 	struct xfs_trans	*tp;
			
 
				 	int			error;
			
 
				-	uint			lock_flags;
			
 
				+	uint			lock_flags = 0;
			
 
				 	uint			commit_flags = 0;
			
 
				 
			
 
				 	trace_xfs_setattr(ip);
			
@@ -720,10 +717,10 @@ xfs_setattr_size(
 
				 			ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
			
 
				 			ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
			
 
				 
			
 
				-	lock_flags = XFS_ILOCK_EXCL;
			
 
				-	if (!(flags & XFS_ATTR_NOLOCK))
			
 
				+	if (!(flags & XFS_ATTR_NOLOCK)) {
			
 
				 		lock_flags |= XFS_IOLOCK_EXCL;
			
 
				-	xfs_ilock(ip, lock_flags);
			
 
				+		xfs_ilock(ip, lock_flags);
			
 
				+	}
			
 
				 
			
 
				 	oldsize = inode->i_size;
			
 
				 	newsize = iattr->ia_size;
			
@@ -746,7 +743,7 @@ xfs_setattr_size(
 
				 	/*
			
 
				 	 * Make sure that the dquots are attached to the inode.
			
 
				 	 */
			
 
				-	error = xfs_qm_dqattach_locked(ip, 0);
			
 
				+	error = xfs_qm_dqattach(ip, 0);
			
 
				 	if (error)
			
 
				 		goto out_unlock;
			
 
				 
			
@@ -768,8 +765,6 @@ xfs_setattr_size(
 
				 		if (error)
			
 
				 			goto out_unlock;
			
 
				 	}
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-	lock_flags &= ~XFS_ILOCK_EXCL;
			
 
				 
			
 
				 	/*
			
 
				 	 * We are going to log the inode size change in this transaction so
			
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -18,7 +18,6 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -35,7 +33,6 @@
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				 kmem_zone_t	*xfs_log_ticket_zone;
			
@@ -916,27 +913,42 @@ xfs_log_need_covered(xfs_mount_t *mp)
 
				  * We may be holding the log iclog lock upon entering this routine.
			
 
				  */
			
 
				 xfs_lsn_t
			
 
				-xlog_assign_tail_lsn(
			
 
				+xlog_assign_tail_lsn_locked(
			
 
				 	struct xfs_mount	*mp)
			
 
				 {
			
 
				-	xfs_lsn_t		tail_lsn;
			
 
				 	struct log		*log = mp->m_log;
			
 
				+	struct xfs_log_item	*lip;
			
 
				+	xfs_lsn_t		tail_lsn;
			
 
				+
			
 
				+	assert_spin_locked(&mp->m_ail->xa_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * To make sure we always have a valid LSN for the log tail we keep
			
 
				 	 * track of the last LSN which was committed in log->l_last_sync_lsn,
			
 
				-	 * and use that when the AIL was empty and xfs_ail_min_lsn returns 0.
			
 
				-	 *
			
 
				-	 * If the AIL has been emptied we also need to wake any process
			
 
				-	 * waiting for this condition.
			
 
				+	 * and use that when the AIL was empty.
			
 
				 	 */
			
 
				-	tail_lsn = xfs_ail_min_lsn(mp->m_ail);
			
 
				-	if (!tail_lsn)
			
 
				+	lip = xfs_ail_min(mp->m_ail);
			
 
				+	if (lip)
			
 
				+		tail_lsn = lip->li_lsn;
			
 
				+	else
			
 
				 		tail_lsn = atomic64_read(&log->l_last_sync_lsn);
			
 
				 	atomic64_set(&log->l_tail_lsn, tail_lsn);
			
 
				 	return tail_lsn;
			
 
				 }
			
 
				 
			
 
				+xfs_lsn_t
			
 
				+xlog_assign_tail_lsn(
			
 
				+	struct xfs_mount	*mp)
			
 
				+{
			
 
				+	xfs_lsn_t		tail_lsn;
			
 
				+
			
 
				+	spin_lock(&mp->m_ail->xa_lock);
			
 
				+	tail_lsn = xlog_assign_tail_lsn_locked(mp);
			
 
				+	spin_unlock(&mp->m_ail->xa_lock);
			
 
				+
			
 
				+	return tail_lsn;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Return the space in the log between the tail and the head.  The head
			
 
				  * is passed in the cycle/bytes formal parms.  In the special case where
			
@@ -1172,7 +1184,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 
				 	xlog_get_iclog_buffer_size(mp, log);
			
 
				 
			
 
				 	error = ENOMEM;
			
 
				-	bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0);
			
 
				+	bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
			
 
				 	if (!bp)
			
 
				 		goto out_free_log;
			
 
				 	bp->b_iodone = xlog_iodone;
			
@@ -1182,9 +1194,6 @@ xlog_alloc_log(xfs_mount_t	*mp,
 
				 	spin_lock_init(&log->l_icloglock);
			
 
				 	init_waitqueue_head(&log->l_flush_wait);
			
 
				 
			
 
				-	/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
			
 
				-	ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
			
 
				-
			
 
				 	iclogp = &log->l_iclog;
			
 
				 	/*
			
 
				 	 * The amount of memory to allocate for the iclog structure is
			
@@ -1204,7 +1213,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 
				 		prev_iclog = iclog;
			
 
				 
			
 
				 		bp = xfs_buf_get_uncached(mp->m_logdev_targp,
			
 
				-						log->l_iclog_size, 0);
			
 
				+						BTOBB(log->l_iclog_size), 0);
			
 
				 		if (!bp)
			
 
				 			goto out_free_iclog;
			
 
				 
			
@@ -1224,7 +1233,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 
				 		head->h_fmt = cpu_to_be32(XLOG_FMT);
			
 
				 		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
			
 
				 
			
 
				-		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
			
 
				+		iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
			
 
				 		iclog->ic_state = XLOG_STATE_ACTIVE;
			
 
				 		iclog->ic_log = log;
			
 
				 		atomic_set(&iclog->ic_refcnt, 0);
			
@@ -1475,7 +1484,7 @@ xlog_sync(xlog_t		*log,
 
				 	} else {
			
 
				 		iclog->ic_bwritecnt = 1;
			
 
				 	}
			
 
				-	XFS_BUF_SET_COUNT(bp, count);
			
 
				+	bp->b_io_length = BTOBB(count);
			
 
				 	bp->b_fspriv = iclog;
			
 
				 	XFS_BUF_ZEROFLAGS(bp);
			
 
				 	XFS_BUF_ASYNC(bp);
			
@@ -1573,7 +1582,7 @@ xlog_dealloc_log(xlog_t *log)
 
				 	 * always need to ensure that the extra buffer does not point to memory
			
 
				 	 * owned by another log buffer before we free it.
			
 
				 	 */
			
 
				-	xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
			
 
				+	xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
			
 
				 	xfs_buf_free(log->l_xbuf);
			
 
				 
			
 
				 	iclog = log->l_iclog;
			
@@ -2932,6 +2941,7 @@ xfs_log_force(
 
				 {
			
 
				 	int	error;
			
 
				 
			
 
				+	trace_xfs_log_force(mp, 0);
			
 
				 	error = _xfs_log_force(mp, flags, NULL);
			
 
				 	if (error)
			
 
				 		xfs_warn(mp, "%s: error %d returned.", __func__, error);
			
@@ -3080,6 +3090,7 @@ xfs_log_force_lsn(
 
				 {
			
 
				 	int	error;
			
 
				 
			
 
				+	trace_xfs_log_force(mp, lsn);
			
 
				 	error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
			
 
				 	if (error)
			
 
				 		xfs_warn(mp, "%s: error %d returned.", __func__, error);
			
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -152,6 +152,7 @@ int	  xfs_log_mount(struct xfs_mount	*mp,
 
				 			int		 	num_bblocks);
			
 
				 int	  xfs_log_mount_finish(struct xfs_mount *mp);
			
 
				 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
			
 
				+xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
			
 
				 void	  xfs_log_space_wake(struct xfs_mount *mp);
			
 
				 int	  xfs_log_notify(struct xfs_mount	*mp,
			
 
				 			 struct xlog_in_core	*iclog,
			
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_log_priv.h"
			
@@ -29,60 +27,9 @@
 
				 #include "xfs_mount.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				 #include "xfs_discard.h"
			
 
				 
			
 
				-/*
			
 
				- * Perform initial CIL structure initialisation.
			
 
				- */
			
 
				-int
			
 
				-xlog_cil_init(
			
 
				-	struct log	*log)
			
 
				-{
			
 
				-	struct xfs_cil	*cil;
			
 
				-	struct xfs_cil_ctx *ctx;
			
 
				-
			
 
				-	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
			
 
				-	if (!cil)
			
 
				-		return ENOMEM;
			
 
				-
			
 
				-	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
			
 
				-	if (!ctx) {
			
 
				-		kmem_free(cil);
			
 
				-		return ENOMEM;
			
 
				-	}
			
 
				-
			
 
				-	INIT_LIST_HEAD(&cil->xc_cil);
			
 
				-	INIT_LIST_HEAD(&cil->xc_committing);
			
 
				-	spin_lock_init(&cil->xc_cil_lock);
			
 
				-	init_rwsem(&cil->xc_ctx_lock);
			
 
				-	init_waitqueue_head(&cil->xc_commit_wait);
			
 
				-
			
 
				-	INIT_LIST_HEAD(&ctx->committing);
			
 
				-	INIT_LIST_HEAD(&ctx->busy_extents);
			
 
				-	ctx->sequence = 1;
			
 
				-	ctx->cil = cil;
			
 
				-	cil->xc_ctx = ctx;
			
 
				-	cil->xc_current_sequence = ctx->sequence;
			
 
				-
			
 
				-	cil->xc_log = log;
			
 
				-	log->l_cilp = cil;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void
			
 
				-xlog_cil_destroy(
			
 
				-	struct log	*log)
			
 
				-{
			
 
				-	if (log->l_cilp->xc_ctx) {
			
 
				-		if (log->l_cilp->xc_ctx->ticket)
			
 
				-			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
			
 
				-		kmem_free(log->l_cilp->xc_ctx);
			
 
				-	}
			
 
				-
			
 
				-	ASSERT(list_empty(&log->l_cilp->xc_cil));
			
 
				-	kmem_free(log->l_cilp);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Allocate a new ticket. Failing to get a new ticket makes it really hard to
			
 
				  * recover, so we don't allow failure here. Also, we allocate in a context that
			
@@ -390,8 +337,8 @@ xlog_cil_committed(
 
				 	xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
			
 
				 					ctx->start_lsn, abort);
			
 
				 
			
 
				-	xfs_alloc_busy_sort(&ctx->busy_extents);
			
 
				-	xfs_alloc_busy_clear(mp, &ctx->busy_extents,
			
 
				+	xfs_extent_busy_sort(&ctx->busy_extents);
			
 
				+	xfs_extent_busy_clear(mp, &ctx->busy_extents,
			
 
				 			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
			
 
				 
			
 
				 	spin_lock(&ctx->cil->xc_cil_lock);
			
@@ -404,7 +351,7 @@ xlog_cil_committed(
 
				 		ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
			
 
				 
			
 
				 		xfs_discard_extents(mp, &ctx->busy_extents);
			
 
				-		xfs_alloc_busy_clear(mp, &ctx->busy_extents, false);
			
 
				+		xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
			
 
				 	}
			
 
				 
			
 
				 	kmem_free(ctx);
			
@@ -426,8 +373,7 @@ xlog_cil_committed(
 
				  */
			
 
				 STATIC int
			
 
				 xlog_cil_push(
			
 
				-	struct log		*log,
			
 
				-	xfs_lsn_t		push_seq)
			
 
				+	struct log		*log)
			
 
				 {
			
 
				 	struct xfs_cil		*cil = log->l_cilp;
			
 
				 	struct xfs_log_vec	*lv;
			
@@ -443,39 +389,36 @@ xlog_cil_push(
 
				 	struct xfs_log_iovec	lhdr;
			
 
				 	struct xfs_log_vec	lvhdr = { NULL };
			
 
				 	xfs_lsn_t		commit_lsn;
			
 
				+	xfs_lsn_t		push_seq;
			
 
				 
			
 
				 	if (!cil)
			
 
				 		return 0;
			
 
				 
			
 
				-	ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
			
 
				-
			
 
				 	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
			
 
				 	new_ctx->ticket = xlog_cil_ticket_alloc(log);
			
 
				 
			
 
				-	/*
			
 
				-	 * Lock out transaction commit, but don't block for background pushes
			
 
				-	 * unless we are well over the CIL space limit. See the definition of
			
 
				-	 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
			
 
				-	 * used here.
			
 
				-	 */
			
 
				-	if (!down_write_trylock(&cil->xc_ctx_lock)) {
			
 
				-		if (!push_seq &&
			
 
				-		    cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
			
 
				-			goto out_free_ticket;
			
 
				-		down_write(&cil->xc_ctx_lock);
			
 
				-	}
			
 
				+	down_write(&cil->xc_ctx_lock);
			
 
				 	ctx = cil->xc_ctx;
			
 
				 
			
 
				-	/* check if we've anything to push */
			
 
				-	if (list_empty(&cil->xc_cil))
			
 
				-		goto out_skip;
			
 
				+	spin_lock(&cil->xc_cil_lock);
			
 
				+	push_seq = cil->xc_push_seq;
			
 
				+	ASSERT(push_seq <= ctx->sequence);
			
 
				 
			
 
				-	/* check for spurious background flush */
			
 
				-	if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
			
 
				+	/*
			
 
				+	 * Check if we've anything to push. If there is nothing, then we don't
			
 
				+	 * move on to a new sequence number and so we have to be able to push
			
 
				+	 * this sequence again later.
			
 
				+	 */
			
 
				+	if (list_empty(&cil->xc_cil)) {
			
 
				+		cil->xc_push_seq = 0;
			
 
				+		spin_unlock(&cil->xc_cil_lock);
			
 
				 		goto out_skip;
			
 
				+	}
			
 
				+	spin_unlock(&cil->xc_cil_lock);
			
 
				+
			
 
				 
			
 
				 	/* check for a previously pushed seqeunce */
			
 
				-	if (push_seq && push_seq < cil->xc_ctx->sequence)
			
 
				+	if (push_seq < cil->xc_ctx->sequence)
			
 
				 		goto out_skip;
			
 
				 
			
 
				 	/*
			
@@ -629,7 +572,6 @@ restart:
 
				 
			
 
				 out_skip:
			
 
				 	up_write(&cil->xc_ctx_lock);
			
 
				-out_free_ticket:
			
 
				 	xfs_log_ticket_put(new_ctx->ticket);
			
 
				 	kmem_free(new_ctx);
			
 
				 	return 0;
			
@@ -641,6 +583,82 @@ out_abort:
 
				 	return XFS_ERROR(EIO);
			
 
				 }
			
 
				 
			
 
				+static void
			
 
				+xlog_cil_push_work(
			
 
				+	struct work_struct	*work)
			
 
				+{
			
 
				+	struct xfs_cil		*cil = container_of(work, struct xfs_cil,
			
 
				+							xc_push_work);
			
 
				+	xlog_cil_push(cil->xc_log);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We need to push CIL every so often so we don't cache more than we can fit in
			
 
				+ * the log. The limit really is that a checkpoint can't be more than half the
			
 
				+ * log (the current checkpoint is not allowed to overwrite the previous
			
 
				+ * checkpoint), but commit latency and memory usage limit this to a smaller
			
 
				+ * size.
			
 
				+ */
			
 
				+static void
			
 
				+xlog_cil_push_background(
			
 
				+	struct log	*log)
			
 
				+{
			
 
				+	struct xfs_cil	*cil = log->l_cilp;
			
 
				+
			
 
				+	/*
			
 
				+	 * The cil won't be empty because we are called while holding the
			
 
				+	 * context lock so whatever we added to the CIL will still be there
			
 
				+	 */
			
 
				+	ASSERT(!list_empty(&cil->xc_cil));
			
 
				+
			
 
				+	/*
			
 
				+	 * don't do a background push if we haven't used up all the
			
 
				+	 * space available yet.
			
 
				+	 */
			
 
				+	if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&cil->xc_cil_lock);
			
 
				+	if (cil->xc_push_seq < cil->xc_current_sequence) {
			
 
				+		cil->xc_push_seq = cil->xc_current_sequence;
			
 
				+		queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
			
 
				+	}
			
 
				+	spin_unlock(&cil->xc_cil_lock);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xlog_cil_push_foreground(
			
 
				+	struct log	*log,
			
 
				+	xfs_lsn_t	push_seq)
			
 
				+{
			
 
				+	struct xfs_cil	*cil = log->l_cilp;
			
 
				+
			
 
				+	if (!cil)
			
 
				+		return;
			
 
				+
			
 
				+	ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
			
 
				+
			
 
				+	/* start on any pending background push to minimise wait time on it */
			
 
				+	flush_work(&cil->xc_push_work);
			
 
				+
			
 
				+	/*
			
 
				+	 * If the CIL is empty or we've already pushed the sequence then
			
 
				+	 * there's no work we need to do.
			
 
				+	 */
			
 
				+	spin_lock(&cil->xc_cil_lock);
			
 
				+	if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
			
 
				+		spin_unlock(&cil->xc_cil_lock);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	cil->xc_push_seq = push_seq;
			
 
				+	spin_unlock(&cil->xc_cil_lock);
			
 
				+
			
 
				+	/* do the push now */
			
 
				+	xlog_cil_push(log);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Commit a transaction with the given vector to the Committed Item List.
			
 
				  *
			
@@ -667,7 +685,6 @@ xfs_log_commit_cil(
 
				 {
			
 
				 	struct log		*log = mp->m_log;
			
 
				 	int			log_flags = 0;
			
 
				-	int			push = 0;
			
 
				 	struct xfs_log_vec	*log_vector;
			
 
				 
			
 
				 	if (flags & XFS_TRANS_RELEASE_LOG_RES)
			
@@ -719,21 +736,9 @@ xfs_log_commit_cil(
 
				 	 */
			
 
				 	xfs_trans_free_items(tp, *commit_lsn, 0);
			
 
				 
			
 
				-	/* check for background commit before unlock */
			
 
				-	if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
			
 
				-		push = 1;
			
 
				+	xlog_cil_push_background(log);
			
 
				 
			
 
				 	up_read(&log->l_cilp->xc_ctx_lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * We need to push CIL every so often so we don't cache more than we
			
 
				-	 * can fit in the log. The limit really is that a checkpoint can't be
			
 
				-	 * more than half the log (the current checkpoint is not allowed to
			
 
				-	 * overwrite the previous checkpoint), but commit latency and memory
			
 
				-	 * usage limit this to a smaller size in most cases.
			
 
				-	 */
			
 
				-	if (push)
			
 
				-		xlog_cil_push(log, 0);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -746,9 +751,6 @@ xfs_log_commit_cil(
 
				  *
			
 
				  * We return the current commit lsn to allow the callers to determine if a
			
 
				  * iclog flush is necessary following this call.
			
 
				- *
			
 
				- * XXX: Initially, just push the CIL unconditionally and return whatever
			
 
				- * commit lsn is there. It'll be empty, so this is broken for now.
			
 
				  */
			
 
				 xfs_lsn_t
			
 
				 xlog_cil_force_lsn(
			
@@ -766,8 +768,7 @@ xlog_cil_force_lsn(
 
				 	 * xlog_cil_push() handles racing pushes for the same sequence,
			
 
				 	 * so no need to deal with it here.
			
 
				 	 */
			
 
				-	if (sequence == cil->xc_current_sequence)
			
 
				-		xlog_cil_push(log, sequence);
			
 
				+	xlog_cil_push_foreground(log, sequence);
			
 
				 
			
 
				 	/*
			
 
				 	 * See if we can find a previous sequence still committing.
			
@@ -826,3 +827,57 @@ xfs_log_item_in_current_chkpt(
 
				 		return false;
			
 
				 	return true;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Perform initial CIL structure initialisation.
			
 
				+ */
			
 
				+int
			
 
				+xlog_cil_init(
			
 
				+	struct log	*log)
			
 
				+{
			
 
				+	struct xfs_cil	*cil;
			
 
				+	struct xfs_cil_ctx *ctx;
			
 
				+
			
 
				+	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
			
 
				+	if (!cil)
			
 
				+		return ENOMEM;
			
 
				+
			
 
				+	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
			
 
				+	if (!ctx) {
			
 
				+		kmem_free(cil);
			
 
				+		return ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
			
 
				+	INIT_LIST_HEAD(&cil->xc_cil);
			
 
				+	INIT_LIST_HEAD(&cil->xc_committing);
			
 
				+	spin_lock_init(&cil->xc_cil_lock);
			
 
				+	init_rwsem(&cil->xc_ctx_lock);
			
 
				+	init_waitqueue_head(&cil->xc_commit_wait);
			
 
				+
			
 
				+	INIT_LIST_HEAD(&ctx->committing);
			
 
				+	INIT_LIST_HEAD(&ctx->busy_extents);
			
 
				+	ctx->sequence = 1;
			
 
				+	ctx->cil = cil;
			
 
				+	cil->xc_ctx = ctx;
			
 
				+	cil->xc_current_sequence = ctx->sequence;
			
 
				+
			
 
				+	cil->xc_log = log;
			
 
				+	log->l_cilp = cil;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xlog_cil_destroy(
			
 
				+	struct log	*log)
			
 
				+{
			
 
				+	if (log->l_cilp->xc_ctx) {
			
 
				+		if (log->l_cilp->xc_ctx->ticket)
			
 
				+			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
			
 
				+		kmem_free(log->l_cilp->xc_ctx);
			
 
				+	}
			
 
				+
			
 
				+	ASSERT(list_empty(&log->l_cilp->xc_cil));
			
 
				+	kmem_free(log->l_cilp);
			
 
				+}
			
 
				+
			
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -417,6 +417,8 @@ struct xfs_cil {
 
				 	struct list_head	xc_committing;
			
 
				 	wait_queue_head_t	xc_commit_wait;
			
 
				 	xfs_lsn_t		xc_current_sequence;
			
 
				+	struct work_struct	xc_push_work;
			
 
				+	xfs_lsn_t		xc_push_seq;
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -40,7 +40,6 @@
 
				 #include "xfs_extfree_item.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_quota.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_utils.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
@@ -120,7 +119,7 @@ xlog_get_bp(
 
				 		nbblks += log->l_sectBBsize;
			
 
				 	nbblks = round_up(nbblks, log->l_sectBBsize);
			
 
				 
			
 
				-	bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
			
 
				+	bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
			
 
				 	if (bp)
			
 
				 		xfs_buf_unlock(bp);
			
 
				 	return bp;
			
@@ -146,7 +145,7 @@ xlog_align(
 
				 {
			
 
				 	xfs_daddr_t	offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
			
 
				 
			
 
				-	ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
			
 
				+	ASSERT(offset + nbblks <= bp->b_length);
			
 
				 	return bp->b_addr + BBTOB(offset);
			
 
				 }
			
 
				 
			
@@ -174,11 +173,12 @@ xlog_bread_noalign(
 
				 	nbblks = round_up(nbblks, log->l_sectBBsize);
			
 
				 
			
 
				 	ASSERT(nbblks > 0);
			
 
				-	ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
			
 
				+	ASSERT(nbblks <= bp->b_length);
			
 
				 
			
 
				 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
			
 
				 	XFS_BUF_READ(bp);
			
 
				-	XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
			
 
				+	bp->b_io_length = nbblks;
			
 
				+	bp->b_error = 0;
			
 
				 
			
 
				 	xfsbdstrat(log->l_mp, bp);
			
 
				 	error = xfs_buf_iowait(bp);
			
@@ -218,7 +218,7 @@ xlog_bread_offset(
 
				 	xfs_caddr_t	offset)
			
 
				 {
			
 
				 	xfs_caddr_t	orig_offset = bp->b_addr;
			
 
				-	int		orig_len = bp->b_buffer_length;
			
 
				+	int		orig_len = BBTOB(bp->b_length);
			
 
				 	int		error, error2;
			
 
				 
			
 
				 	error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
			
@@ -259,13 +259,14 @@ xlog_bwrite(
 
				 	nbblks = round_up(nbblks, log->l_sectBBsize);
			
 
				 
			
 
				 	ASSERT(nbblks > 0);
			
 
				-	ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
			
 
				+	ASSERT(nbblks <= bp->b_length);
			
 
				 
			
 
				 	XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
			
 
				 	XFS_BUF_ZEROFLAGS(bp);
			
 
				 	xfs_buf_hold(bp);
			
 
				 	xfs_buf_lock(bp);
			
 
				-	XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
			
 
				+	bp->b_io_length = nbblks;
			
 
				+	bp->b_error = 0;
			
 
				 
			
 
				 	error = xfs_bwrite(bp);
			
 
				 	if (error)
			
@@ -440,6 +441,8 @@ xlog_find_verify_cycle(
 
				 	 * a log sector, or we're out of luck.
			
 
				 	 */
			
 
				 	bufblks = 1 << ffs(nbblks);
			
 
				+	while (bufblks > log->l_logBBsize)
			
 
				+		bufblks >>= 1;
			
 
				 	while (!(bp = xlog_get_bp(log, bufblks))) {
			
 
				 		bufblks >>= 1;
			
 
				 		if (bufblks < log->l_sectBBsize)
			
@@ -1225,6 +1228,8 @@ xlog_write_log_records(
 
				 	 * log sector, or we're out of luck.
			
 
				 	 */
			
 
				 	bufblks = 1 << ffs(blocks);
			
 
				+	while (bufblks > log->l_logBBsize)
			
 
				+		bufblks >>= 1;
			
 
				 	while (!(bp = xlog_get_bp(log, bufblks))) {
			
 
				 		bufblks >>= 1;
			
 
				 		if (bufblks < sectbb)
			
@@ -1772,7 +1777,7 @@ xlog_recover_do_inode_buffer(
 
				 
			
 
				 	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
			
 
				 
			
 
				-	inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
			
 
				+	inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
			
 
				 	for (i = 0; i < inodes_per_buf; i++) {
			
 
				 		next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
			
 
				 			offsetof(xfs_dinode_t, di_next_unlinked);
			
@@ -1814,7 +1819,8 @@ xlog_recover_do_inode_buffer(
 
				 
			
 
				 		ASSERT(item->ri_buf[item_index].i_addr != NULL);
			
 
				 		ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
			
 
				-		ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp));
			
 
				+		ASSERT((reg_buf_offset + reg_buf_bytes) <=
			
 
				+							BBTOB(bp->b_io_length));
			
 
				 
			
 
				 		/*
			
 
				 		 * The current logged region contains a copy of the
			
@@ -1873,8 +1879,8 @@ xlog_recover_do_reg_buffer(
 
				 		ASSERT(nbits > 0);
			
 
				 		ASSERT(item->ri_buf[i].i_addr != NULL);
			
 
				 		ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
			
 
				-		ASSERT(XFS_BUF_COUNT(bp) >=
			
 
				-		       ((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT));
			
 
				+		ASSERT(BBTOB(bp->b_io_length) >=
			
 
				+		       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
			
 
				 
			
 
				 		/*
			
 
				 		 * Do a sanity check if this is a dquot buffer. Just checking
			
@@ -2103,6 +2109,7 @@ xlog_recover_do_dquot_buffer(
 
				 STATIC int
			
 
				 xlog_recover_buffer_pass2(
			
 
				 	xlog_t			*log,
			
 
				+	struct list_head	*buffer_list,
			
 
				 	xlog_recover_item_t	*item)
			
 
				 {
			
 
				 	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
			
@@ -2123,9 +2130,9 @@ xlog_recover_buffer_pass2(
 
				 
			
 
				 	trace_xfs_log_recover_buf_recover(log, buf_f);
			
 
				 
			
 
				-	buf_flags = XBF_LOCK;
			
 
				-	if (!(buf_f->blf_flags & XFS_BLF_INODE_BUF))
			
 
				-		buf_flags |= XBF_MAPPED;
			
 
				+	buf_flags = 0;
			
 
				+	if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
			
 
				+		buf_flags |= XBF_UNMAPPED;
			
 
				 
			
 
				 	bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
			
 
				 			  buf_flags);
			
@@ -2166,14 +2173,14 @@ xlog_recover_buffer_pass2(
 
				 	 */
			
 
				 	if (XFS_DINODE_MAGIC ==
			
 
				 	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
			
 
				-	    (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
			
 
				+	    (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
			
 
				 			(__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
			
 
				 		xfs_buf_stale(bp);
			
 
				 		error = xfs_bwrite(bp);
			
 
				 	} else {
			
 
				 		ASSERT(bp->b_target->bt_mount == mp);
			
 
				 		bp->b_iodone = xlog_recover_iodone;
			
 
				-		xfs_buf_delwri_queue(bp);
			
 
				+		xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 	}
			
 
				 
			
 
				 	xfs_buf_relse(bp);
			
@@ -2183,6 +2190,7 @@ xlog_recover_buffer_pass2(
 
				 STATIC int
			
 
				 xlog_recover_inode_pass2(
			
 
				 	xlog_t			*log,
			
 
				+	struct list_head	*buffer_list,
			
 
				 	xlog_recover_item_t	*item)
			
 
				 {
			
 
				 	xfs_inode_log_format_t	*in_f;
			
@@ -2220,8 +2228,7 @@ xlog_recover_inode_pass2(
 
				 	}
			
 
				 	trace_xfs_log_recover_inode_recover(log, in_f);
			
 
				 
			
 
				-	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
			
 
				-			  XBF_LOCK);
			
 
				+	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0);
			
 
				 	if (!bp) {
			
 
				 		error = ENOMEM;
			
 
				 		goto error;
			
@@ -2436,7 +2443,7 @@ xlog_recover_inode_pass2(
 
				 write_inode_buffer:
			
 
				 	ASSERT(bp->b_target->bt_mount == mp);
			
 
				 	bp->b_iodone = xlog_recover_iodone;
			
 
				-	xfs_buf_delwri_queue(bp);
			
 
				+	xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 	xfs_buf_relse(bp);
			
 
				 error:
			
 
				 	if (need_free)
			
@@ -2477,6 +2484,7 @@ xlog_recover_quotaoff_pass1(
 
				 STATIC int
			
 
				 xlog_recover_dquot_pass2(
			
 
				 	xlog_t			*log,
			
 
				+	struct list_head	*buffer_list,
			
 
				 	xlog_recover_item_t	*item)
			
 
				 {
			
 
				 	xfs_mount_t		*mp = log->l_mp;
			
@@ -2530,14 +2538,11 @@ xlog_recover_dquot_pass2(
 
				 		return XFS_ERROR(EIO);
			
 
				 	ASSERT(dq_f->qlf_len == 1);
			
 
				 
			
 
				-	error = xfs_read_buf(mp, mp->m_ddev_targp,
			
 
				-			     dq_f->qlf_blkno,
			
 
				-			     XFS_FSB_TO_BB(mp, dq_f->qlf_len),
			
 
				-			     0, &bp);
			
 
				-	if (error) {
			
 
				-		xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)");
			
 
				+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
			
 
				+				   XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				+
			
 
				 	ASSERT(bp);
			
 
				 	ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
			
 
				 
			
@@ -2558,7 +2563,7 @@ xlog_recover_dquot_pass2(
 
				 	ASSERT(dq_f->qlf_size == 2);
			
 
				 	ASSERT(bp->b_target->bt_mount == mp);
			
 
				 	bp->b_iodone = xlog_recover_iodone;
			
 
				-	xfs_buf_delwri_queue(bp);
			
 
				+	xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 	xfs_buf_relse(bp);
			
 
				 
			
 
				 	return (0);
			
@@ -2642,7 +2647,8 @@ xlog_recover_efd_pass2(
 
				 				 * xfs_trans_ail_delete() drops the
			
 
				 				 * AIL lock.
			
 
				 				 */
			
 
				-				xfs_trans_ail_delete(ailp, lip);
			
 
				+				xfs_trans_ail_delete(ailp, lip,
			
 
				+						     SHUTDOWN_CORRUPT_INCORE);
			
 
				 				xfs_efi_item_free(efip);
			
 
				 				spin_lock(&ailp->xa_lock);
			
 
				 				break;
			
@@ -2712,21 +2718,22 @@ STATIC int
 
				 xlog_recover_commit_pass2(
			
 
				 	struct log		*log,
			
 
				 	struct xlog_recover	*trans,
			
 
				+	struct list_head	*buffer_list,
			
 
				 	xlog_recover_item_t	*item)
			
 
				 {
			
 
				 	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
			
 
				 
			
 
				 	switch (ITEM_TYPE(item)) {
			
 
				 	case XFS_LI_BUF:
			
 
				-		return xlog_recover_buffer_pass2(log, item);
			
 
				+		return xlog_recover_buffer_pass2(log, buffer_list, item);
			
 
				 	case XFS_LI_INODE:
			
 
				-		return xlog_recover_inode_pass2(log, item);
			
 
				+		return xlog_recover_inode_pass2(log, buffer_list, item);
			
 
				 	case XFS_LI_EFI:
			
 
				 		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
			
 
				 	case XFS_LI_EFD:
			
 
				 		return xlog_recover_efd_pass2(log, item);
			
 
				 	case XFS_LI_DQUOT:
			
 
				-		return xlog_recover_dquot_pass2(log, item);
			
 
				+		return xlog_recover_dquot_pass2(log, buffer_list, item);
			
 
				 	case XFS_LI_QUOTAOFF:
			
 
				 		/* nothing to do in pass2 */
			
 
				 		return 0;
			
@@ -2750,8 +2757,9 @@ xlog_recover_commit_trans(
 
				 	struct xlog_recover	*trans,
			
 
				 	int			pass)
			
 
				 {
			
 
				-	int			error = 0;
			
 
				+	int			error = 0, error2;
			
 
				 	xlog_recover_item_t	*item;
			
 
				+	LIST_HEAD		(buffer_list);
			
 
				 
			
 
				 	hlist_del(&trans->r_list);
			
 
				 
			
@@ -2760,16 +2768,27 @@ xlog_recover_commit_trans(
 
				 		return error;
			
 
				 
			
 
				 	list_for_each_entry(item, &trans->r_itemq, ri_list) {
			
 
				-		if (pass == XLOG_RECOVER_PASS1)
			
 
				+		switch (pass) {
			
 
				+		case XLOG_RECOVER_PASS1:
			
 
				 			error = xlog_recover_commit_pass1(log, trans, item);
			
 
				-		else
			
 
				-			error = xlog_recover_commit_pass2(log, trans, item);
			
 
				+			break;
			
 
				+		case XLOG_RECOVER_PASS2:
			
 
				+			error = xlog_recover_commit_pass2(log, trans,
			
 
				+							  &buffer_list, item);
			
 
				+			break;
			
 
				+		default:
			
 
				+			ASSERT(0);
			
 
				+		}
			
 
				+
			
 
				 		if (error)
			
 
				-			return error;
			
 
				+			goto out;
			
 
				 	}
			
 
				 
			
 
				 	xlog_recover_free_trans(trans);
			
 
				-	return 0;
			
 
				+
			
 
				+out:
			
 
				+	error2 = xfs_buf_delwri_submit(&buffer_list);
			
 
				+	return error ? error : error2;
			
 
				 }
			
 
				 
			
 
				 STATIC int
			
@@ -3079,7 +3098,7 @@ xlog_recover_process_one_iunlink(
 
				 	/*
			
 
				 	 * Get the on disk inode to find the next inode in the bucket.
			
 
				 	 */
			
 
				-	error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK);
			
 
				+	error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0);
			
 
				 	if (error)
			
 
				 		goto fail_iput;
			
 
				 
			
@@ -3639,11 +3658,8 @@ xlog_do_recover(
 
				 	 * First replay the images in the log.
			
 
				 	 */
			
 
				 	error = xlog_do_log_recovery(log, head_blk, tail_blk);
			
 
				-	if (error) {
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	}
			
 
				-
			
 
				-	xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1);
			
 
				 
			
 
				 	/*
			
 
				 	 * If IO errors happened during recovery, bail out.
			
@@ -3670,7 +3686,6 @@ xlog_do_recover(
 
				 	bp = xfs_getsb(log->l_mp, 0);
			
 
				 	XFS_BUF_UNDONE(bp);
			
 
				 	ASSERT(!(XFS_BUF_ISWRITE(bp)));
			
 
				-	ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
			
 
				 	XFS_BUF_READ(bp);
			
 
				 	XFS_BUF_UNASYNC(bp);
			
 
				 	xfsbdstrat(log->l_mp, bp);
			
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -22,6 +22,7 @@
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				+#include "xfs_trans_priv.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_dir2.h"
			
@@ -37,7 +38,6 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_utils.h"
			
@@ -683,8 +683,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
 
				 	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
			
 
				 
			
 
				 reread:
			
 
				-	bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
			
 
				-					XFS_SB_DADDR, sector_size, 0);
			
 
				+	bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
			
 
				+					BTOBB(sector_size), 0);
			
 
				 	if (!bp) {
			
 
				 		if (loud)
			
 
				 			xfs_warn(mp, "SB buffer read failed");
			
@@ -1032,9 +1032,9 @@ xfs_check_sizes(xfs_mount_t *mp)
 
				 		xfs_warn(mp, "filesystem size mismatch detected");
			
 
				 		return XFS_ERROR(EFBIG);
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
			
 
				+	bp = xfs_buf_read_uncached(mp->m_ddev_targp,
			
 
				 					d - XFS_FSS_TO_BB(mp, 1),
			
 
				-					BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
			
 
				+					XFS_FSS_TO_BB(mp, 1), 0);
			
 
				 	if (!bp) {
			
 
				 		xfs_warn(mp, "last sector read failed");
			
 
				 		return EIO;
			
@@ -1047,9 +1047,9 @@ xfs_check_sizes(xfs_mount_t *mp)
 
				 			xfs_warn(mp, "log size mismatch detected");
			
 
				 			return XFS_ERROR(EFBIG);
			
 
				 		}
			
 
				-		bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
			
 
				+		bp = xfs_buf_read_uncached(mp->m_logdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_B(mp, 1), 0);
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0);
			
 
				 		if (!bp) {
			
 
				 			xfs_warn(mp, "log device read failed");
			
 
				 			return EIO;
			
@@ -1288,7 +1288,7 @@ xfs_mountfs(
 
				 			      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
			
 
				 	if (error) {
			
 
				 		xfs_warn(mp, "log mount failed");
			
 
				-		goto out_free_perag;
			
 
				+		goto out_fail_wait;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1315,7 +1315,7 @@ xfs_mountfs(
 
				 	     !mp->m_sb.sb_inprogress) {
			
 
				 		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
			
 
				 		if (error)
			
 
				-			goto out_free_perag;
			
 
				+			goto out_fail_wait;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1439,6 +1439,10 @@ xfs_mountfs(
 
				 	IRELE(rip);
			
 
				  out_log_dealloc:
			
 
				 	xfs_log_unmount(mp);
			
 
				+ out_fail_wait:
			
 
				+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
			
 
				+		xfs_wait_buftarg(mp->m_logdev_targp);
			
 
				+	xfs_wait_buftarg(mp->m_ddev_targp);
			
 
				  out_free_perag:
			
 
				 	xfs_free_perag(mp);
			
 
				  out_remove_uuid:
			
@@ -1475,15 +1479,15 @@ xfs_unmountfs(
 
				 	xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				 
			
 
				 	/*
			
 
				-	 * Do a delwri reclaim pass first so that as many dirty inodes are
			
 
				-	 * queued up for IO as possible. Then flush the buffers before making
			
 
				-	 * a synchronous path to catch all the remaining inodes are reclaimed.
			
 
				-	 * This makes the reclaim process as quick as possible by avoiding
			
 
				-	 * synchronous writeout and blocking on inodes already in the delwri
			
 
				-	 * state as much as possible.
			
 
				+	 * Flush all pending changes from the AIL.
			
 
				+	 */
			
 
				+	xfs_ail_push_all_sync(mp->m_ail);
			
 
				+
			
 
				+	/*
			
 
				+	 * And reclaim all inodes.  At this point there should be no dirty
			
 
				+	 * inode, and none should be pinned or locked, but use synchronous
			
 
				+	 * reclaim just to be sure.
			
 
				 	 */
			
 
				-	xfs_reclaim_inodes(mp, 0);
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				 	xfs_reclaim_inodes(mp, SYNC_WAIT);
			
 
				 
			
 
				 	xfs_qm_unmount(mp);
			
@@ -1519,15 +1523,12 @@ xfs_unmountfs(
 
				 	if (error)
			
 
				 		xfs_warn(mp, "Unable to update superblock counters. "
			
 
				 				"Freespace may not be correct on next mount.");
			
 
				-	xfs_unmountfs_writesb(mp);
			
 
				 
			
 
				 	/*
			
 
				-	 * Make sure all buffers have been flushed and completed before
			
 
				-	 * unmounting the log.
			
 
				+	 * At this point we might have modified the superblock again and thus
			
 
				+	 * added an item to the AIL, thus flush it again.
			
 
				 	 */
			
 
				-	error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-	if (error)
			
 
				-		xfs_warn(mp, "%d busy buffers during unmount.", error);
			
 
				+	xfs_ail_push_all_sync(mp->m_ail);
			
 
				 	xfs_wait_buftarg(mp->m_ddev_targp);
			
 
				 
			
 
				 	xfs_log_unmount_write(mp);
			
@@ -1588,36 +1589,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-int
			
 
				-xfs_unmountfs_writesb(xfs_mount_t *mp)
			
 
				-{
			
 
				-	xfs_buf_t	*sbp;
			
 
				-	int		error = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * skip superblock write if fs is read-only, or
			
 
				-	 * if we are doing a forced umount.
			
 
				-	 */
			
 
				-	if (!((mp->m_flags & XFS_MOUNT_RDONLY) ||
			
 
				-		XFS_FORCED_SHUTDOWN(mp))) {
			
 
				-
			
 
				-		sbp = xfs_getsb(mp, 0);
			
 
				-
			
 
				-		XFS_BUF_UNDONE(sbp);
			
 
				-		XFS_BUF_UNREAD(sbp);
			
 
				-		xfs_buf_delwri_dequeue(sbp);
			
 
				-		XFS_BUF_WRITE(sbp);
			
 
				-		XFS_BUF_UNASYNC(sbp);
			
 
				-		ASSERT(sbp->b_target == mp->m_ddev_targp);
			
 
				-		xfsbdstrat(mp, sbp);
			
 
				-		error = xfs_buf_iowait(sbp);
			
 
				-		if (error)
			
 
				-			xfs_buf_ioerror_alert(sbp, __func__);
			
 
				-		xfs_buf_relse(sbp);
			
 
				-	}
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * xfs_mod_sb() can be used to copy arbitrary changes to the
			
 
				  * in-core superblock into the superblock buffer to be logged.
			
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -214,6 +214,7 @@ typedef struct xfs_mount {
 
				 
			
 
				 	struct workqueue_struct	*m_data_workqueue;
			
 
				 	struct workqueue_struct	*m_unwritten_workqueue;
			
 
				+	struct workqueue_struct	*m_cil_workqueue;
			
 
				 } xfs_mount_t;
			
 
				 
			
 
				 /*
			
@@ -378,7 +379,6 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 
				 extern int	xfs_mountfs(xfs_mount_t *mp);
			
 
				 
			
 
				 extern void	xfs_unmountfs(xfs_mount_t *);
			
 
				-extern int	xfs_unmountfs_writesb(xfs_mount_t *);
			
 
				 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
			
 
				 extern int	xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
			
 
				 			uint, int);
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -65,7 +64,8 @@ STATIC int
 
				 xfs_qm_dquot_walk(
			
 
				 	struct xfs_mount	*mp,
			
 
				 	int			type,
			
 
				-	int			(*execute)(struct xfs_dquot *dqp))
			
 
				+	int			(*execute)(struct xfs_dquot *dqp, void *data),
			
 
				+	void			*data)
			
 
				 {
			
 
				 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
			
 
				 	struct radix_tree_root	*tree = XFS_DQUOT_TREE(qi, type);
			
@@ -97,7 +97,7 @@ restart:
 
				 
			
 
				 			next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
			
 
				 
			
 
				-			error = execute(batch[i]);
			
 
				+			error = execute(batch[i], data);
			
 
				 			if (error == EAGAIN) {
			
 
				 				skipped++;
			
 
				 				continue;
			
@@ -129,7 +129,8 @@ restart:
 
				  */
			
 
				 STATIC int
			
 
				 xfs_qm_dqpurge(
			
 
				-	struct xfs_dquot	*dqp)
			
 
				+	struct xfs_dquot	*dqp,
			
 
				+	void			*data)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = dqp->q_mount;
			
 
				 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
			
@@ -153,21 +154,7 @@ xfs_qm_dqpurge(
 
				 
			
 
				 	dqp->dq_flags |= XFS_DQ_FREEING;
			
 
				 
			
 
				-	/*
			
 
				-	 * If we're turning off quotas, we have to make sure that, for
			
 
				-	 * example, we don't delete quota disk blocks while dquots are
			
 
				-	 * in the process of getting written to those disk blocks.
			
 
				-	 * This dquot might well be on AIL, and we can't leave it there
			
 
				-	 * if we're turning off quotas. Basically, we need this flush
			
 
				-	 * lock, and are willing to block on it.
			
 
				-	 */
			
 
				-	if (!xfs_dqflock_nowait(dqp)) {
			
 
				-		/*
			
 
				-		 * Block on the flush lock after nudging dquot buffer,
			
 
				-		 * if it is incore.
			
 
				-		 */
			
 
				-		xfs_dqflock_pushbuf_wait(dqp);
			
 
				-	}
			
 
				+	xfs_dqflock(dqp);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we are turning this type of quotas off, we don't care
			
@@ -175,16 +162,21 @@ xfs_qm_dqpurge(
 
				 	 * we're unmounting, we do care, so we flush it and wait.
			
 
				 	 */
			
 
				 	if (XFS_DQ_IS_DIRTY(dqp)) {
			
 
				-		int	error;
			
 
				+		struct xfs_buf	*bp = NULL;
			
 
				+		int		error;
			
 
				 
			
 
				 		/*
			
 
				 		 * We don't care about getting disk errors here. We need
			
 
				 		 * to purge this dquot anyway, so we go ahead regardless.
			
 
				 		 */
			
 
				-		error = xfs_qm_dqflush(dqp, SYNC_WAIT);
			
 
				-		if (error)
			
 
				+		error = xfs_qm_dqflush(dqp, &bp);
			
 
				+		if (error) {
			
 
				 			xfs_warn(mp, "%s: dquot %p flush failed",
			
 
				 				__func__, dqp);
			
 
				+		} else {
			
 
				+			error = xfs_bwrite(bp);
			
 
				+			xfs_buf_relse(bp);
			
 
				+		}
			
 
				 		xfs_dqflock(dqp);
			
 
				 	}
			
 
				 
			
@@ -226,11 +218,11 @@ xfs_qm_dqpurge_all(
 
				 	uint			flags)
			
 
				 {
			
 
				 	if (flags & XFS_QMOPT_UQUOTA)
			
 
				-		xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
			
 
				+		xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
			
 
				 	if (flags & XFS_QMOPT_GQUOTA)
			
 
				-		xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
			
 
				+		xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
			
 
				 	if (flags & XFS_QMOPT_PQUOTA)
			
 
				-		xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
			
 
				+		xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -483,6 +475,23 @@ done:
 
				 	xfs_dqunlock(udq);
			
 
				 }
			
 
				 
			
 
				+static bool
			
 
				+xfs_qm_need_dqattach(
			
 
				+	struct xfs_inode	*ip)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+
			
 
				+	if (!XFS_IS_QUOTA_RUNNING(mp))
			
 
				+		return false;
			
 
				+	if (!XFS_IS_QUOTA_ON(mp))
			
 
				+		return false;
			
 
				+	if (!XFS_NOT_DQATTACHED(mp, ip))
			
 
				+		return false;
			
 
				+	if (ip->i_ino == mp->m_sb.sb_uquotino ||
			
 
				+	    ip->i_ino == mp->m_sb.sb_gquotino)
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
			
@@ -500,11 +509,7 @@ xfs_qm_dqattach_locked(
 
				 	uint		nquotas = 0;
			
 
				 	int		error = 0;
			
 
				 
			
 
				-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
			
 
				-	    !XFS_IS_QUOTA_ON(mp) ||
			
 
				-	    !XFS_NOT_DQATTACHED(mp, ip) ||
			
 
				-	    ip->i_ino == mp->m_sb.sb_uquotino ||
			
 
				-	    ip->i_ino == mp->m_sb.sb_gquotino)
			
 
				+	if (!xfs_qm_need_dqattach(ip))
			
 
				 		return 0;
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
			
@@ -575,6 +580,9 @@ xfs_qm_dqattach(
 
				 {
			
 
				 	int			error;
			
 
				 
			
 
				+	if (!xfs_qm_need_dqattach(ip))
			
 
				+		return 0;
			
 
				+
			
 
				 	xfs_ilock(ip, XFS_ILOCK_EXCL);
			
 
				 	error = xfs_qm_dqattach_locked(ip, flags);
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
@@ -855,15 +863,16 @@ xfs_qm_reset_dqcounts(
 
				 
			
 
				 STATIC int
			
 
				 xfs_qm_dqiter_bufs(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_dqid_t	firstid,
			
 
				-	xfs_fsblock_t	bno,
			
 
				-	xfs_filblks_t	blkcnt,
			
 
				-	uint		flags)
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_dqid_t		firstid,
			
 
				+	xfs_fsblock_t		bno,
			
 
				+	xfs_filblks_t		blkcnt,
			
 
				+	uint			flags,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				-	xfs_buf_t	*bp;
			
 
				-	int		error;
			
 
				-	int		type;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	int			error;
			
 
				+	int			type;
			
 
				 
			
 
				 	ASSERT(blkcnt > 0);
			
 
				 	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
			
@@ -887,7 +896,7 @@ xfs_qm_dqiter_bufs(
 
				 			break;
			
 
				 
			
 
				 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
			
 
				-		xfs_buf_delwri_queue(bp);
			
 
				+		xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 		xfs_buf_relse(bp);
			
 
				 		/*
			
 
				 		 * goto the next block.
			
@@ -895,6 +904,7 @@ xfs_qm_dqiter_bufs(
 
				 		bno++;
			
 
				 		firstid += mp->m_quotainfo->qi_dqperchunk;
			
 
				 	}
			
 
				+
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -904,11 +914,12 @@ xfs_qm_dqiter_bufs(
 
				  */
			
 
				 STATIC int
			
 
				 xfs_qm_dqiterate(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	xfs_inode_t	*qip,
			
 
				-	uint		flags)
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_inode	*qip,
			
 
				+	uint			flags,
			
 
				+	struct list_head	*buffer_list)
			
 
				 {
			
 
				-	xfs_bmbt_irec_t		*map;
			
 
				+	struct xfs_bmbt_irec	*map;
			
 
				 	int			i, nmaps;	/* number of map entries */
			
 
				 	int			error;		/* return value */
			
 
				 	xfs_fileoff_t		lblkno;
			
@@ -975,21 +986,17 @@ xfs_qm_dqiterate(
 
				 			 * Iterate thru all the blks in the extent and
			
 
				 			 * reset the counters of all the dquots inside them.
			
 
				 			 */
			
 
				-			if ((error = xfs_qm_dqiter_bufs(mp,
			
 
				-						       firstid,
			
 
				-						       map[i].br_startblock,
			
 
				-						       map[i].br_blockcount,
			
 
				-						       flags))) {
			
 
				-				break;
			
 
				-			}
			
 
				+			error = xfs_qm_dqiter_bufs(mp, firstid,
			
 
				+						   map[i].br_startblock,
			
 
				+						   map[i].br_blockcount,
			
 
				+						   flags, buffer_list);
			
 
				+			if (error)
			
 
				+				goto out;
			
 
				 		}
			
 
				-
			
 
				-		if (error)
			
 
				-			break;
			
 
				 	} while (nmaps > 0);
			
 
				 
			
 
				+out:
			
 
				 	kmem_free(map);
			
 
				-
			
 
				 	return error;
			
 
				 }
			
 
				 
			
@@ -1182,8 +1189,11 @@ error0:
 
				 
			
 
				 STATIC int
			
 
				 xfs_qm_flush_one(
			
 
				-	struct xfs_dquot	*dqp)
			
 
				+	struct xfs_dquot	*dqp,
			
 
				+	void			*data)
			
 
				 {
			
 
				+	struct list_head	*buffer_list = data;
			
 
				+	struct xfs_buf		*bp = NULL;
			
 
				 	int			error = 0;
			
 
				 
			
 
				 	xfs_dqlock(dqp);
			
@@ -1192,11 +1202,13 @@ xfs_qm_flush_one(
 
				 	if (!XFS_DQ_IS_DIRTY(dqp))
			
 
				 		goto out_unlock;
			
 
				 
			
 
				-	if (!xfs_dqflock_nowait(dqp))
			
 
				-		xfs_dqflock_pushbuf_wait(dqp);
			
 
				-
			
 
				-	error = xfs_qm_dqflush(dqp, 0);
			
 
				+	xfs_dqflock(dqp);
			
 
				+	error = xfs_qm_dqflush(dqp, &bp);
			
 
				+	if (error)
			
 
				+		goto out_unlock;
			
 
				 
			
 
				+	xfs_buf_delwri_queue(bp, buffer_list);
			
 
				+	xfs_buf_relse(bp);
			
 
				 out_unlock:
			
 
				 	xfs_dqunlock(dqp);
			
 
				 	return error;
			
@@ -1215,6 +1227,7 @@ xfs_qm_quotacheck(
 
				 	size_t		structsz;
			
 
				 	xfs_inode_t	*uip, *gip;
			
 
				 	uint		flags;
			
 
				+	LIST_HEAD	(buffer_list);
			
 
				 
			
 
				 	count = INT_MAX;
			
 
				 	structsz = 1;
			
@@ -1233,7 +1246,8 @@ xfs_qm_quotacheck(
 
				 	 */
			
 
				 	uip = mp->m_quotainfo->qi_uquotaip;
			
 
				 	if (uip) {
			
 
				-		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
			
 
				+		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,
			
 
				+					 &buffer_list);
			
 
				 		if (error)
			
 
				 			goto error_return;
			
 
				 		flags |= XFS_UQUOTA_CHKD;
			
@@ -1242,7 +1256,8 @@ xfs_qm_quotacheck(
 
				 	gip = mp->m_quotainfo->qi_gquotaip;
			
 
				 	if (gip) {
			
 
				 		error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
			
 
				-					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
			
 
				+					 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA,
			
 
				+					 &buffer_list);
			
 
				 		if (error)
			
 
				 			goto error_return;
			
 
				 		flags |= XFS_OQUOTA_CHKD;
			
@@ -1265,19 +1280,27 @@ xfs_qm_quotacheck(
 
				 	 * We've made all the changes that we need to make incore.  Flush them
			
 
				 	 * down to disk buffers if everything was updated successfully.
			
 
				 	 */
			
 
				-	if (XFS_IS_UQUOTA_ON(mp))
			
 
				-		error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
			
 
				+	if (XFS_IS_UQUOTA_ON(mp)) {
			
 
				+		error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one,
			
 
				+					  &buffer_list);
			
 
				+	}
			
 
				 	if (XFS_IS_GQUOTA_ON(mp)) {
			
 
				-		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
			
 
				+		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one,
			
 
				+					   &buffer_list);
			
 
				 		if (!error)
			
 
				 			error = error2;
			
 
				 	}
			
 
				 	if (XFS_IS_PQUOTA_ON(mp)) {
			
 
				-		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
			
 
				+		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one,
			
 
				+					   &buffer_list);
			
 
				 		if (!error)
			
 
				 			error = error2;
			
 
				 	}
			
 
				 
			
 
				+	error2 = xfs_buf_delwri_submit(&buffer_list);
			
 
				+	if (!error)
			
 
				+		error = error2;
			
 
				+
			
 
				 	/*
			
 
				 	 * We can get this error if we couldn't do a dquot allocation inside
			
 
				 	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
			
@@ -1290,15 +1313,6 @@ xfs_qm_quotacheck(
 
				 		goto error_return;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * We didn't log anything, because if we crashed, we'll have to
			
 
				-	 * start the quotacheck from scratch anyway. However, we must make
			
 
				-	 * sure that our dquot changes are secure before we put the
			
 
				-	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
			
 
				-	 * flush.
			
 
				-	 */
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-
			
 
				 	/*
			
 
				 	 * If one type of quotas is off, then it will lose its
			
 
				 	 * quotachecked status, since we won't be doing accounting for
			
@@ -1308,6 +1322,13 @@ xfs_qm_quotacheck(
 
				 	mp->m_qflags |= flags;
			
 
				 
			
 
				  error_return:
			
 
				+	while (!list_empty(&buffer_list)) {
			
 
				+		struct xfs_buf *bp =
			
 
				+			list_first_entry(&buffer_list, struct xfs_buf, b_list);
			
 
				+		list_del_init(&bp->b_list);
			
 
				+		xfs_buf_relse(bp);
			
 
				+	}
			
 
				+
			
 
				 	if (error) {
			
 
				 		xfs_warn(mp,
			
 
				 	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
			
@@ -1424,6 +1445,7 @@ xfs_qm_dqfree_one(
 
				 STATIC void
			
 
				 xfs_qm_dqreclaim_one(
			
 
				 	struct xfs_dquot	*dqp,
			
 
				+	struct list_head	*buffer_list,
			
 
				 	struct list_head	*dispose_list)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = dqp->q_mount;
			
@@ -1456,25 +1478,20 @@ xfs_qm_dqreclaim_one(
 
				 	if (!xfs_dqflock_nowait(dqp))
			
 
				 		goto out_busy;
			
 
				 
			
 
				-	/*
			
 
				-	 * We have the flush lock so we know that this is not in the
			
 
				-	 * process of being flushed. So, if this is dirty, flush it
			
 
				-	 * DELWRI so that we don't get a freelist infested with
			
 
				-	 * dirty dquots.
			
 
				-	 */
			
 
				 	if (XFS_DQ_IS_DIRTY(dqp)) {
			
 
				+		struct xfs_buf	*bp = NULL;
			
 
				+
			
 
				 		trace_xfs_dqreclaim_dirty(dqp);
			
 
				 
			
 
				-		/*
			
 
				-		 * We flush it delayed write, so don't bother releasing the
			
 
				-		 * freelist lock.
			
 
				-		 */
			
 
				-		error = xfs_qm_dqflush(dqp, 0);
			
 
				+		error = xfs_qm_dqflush(dqp, &bp);
			
 
				 		if (error) {
			
 
				 			xfs_warn(mp, "%s: dquot %p flush failed",
			
 
				 				 __func__, dqp);
			
 
				+			goto out_busy;
			
 
				 		}
			
 
				 
			
 
				+		xfs_buf_delwri_queue(bp, buffer_list);
			
 
				+		xfs_buf_relse(bp);
			
 
				 		/*
			
 
				 		 * Give the dquot another try on the freelist, as the
			
 
				 		 * flushing will take some time.
			
@@ -1518,8 +1535,10 @@ xfs_qm_shake(
 
				 	struct xfs_quotainfo	*qi =
			
 
				 		container_of(shrink, struct xfs_quotainfo, qi_shrinker);
			
 
				 	int			nr_to_scan = sc->nr_to_scan;
			
 
				+	LIST_HEAD		(buffer_list);
			
 
				 	LIST_HEAD		(dispose_list);
			
 
				 	struct xfs_dquot	*dqp;
			
 
				+	int			error;
			
 
				 
			
 
				 	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
			
 
				 		return 0;
			
@@ -1532,15 +1551,20 @@ xfs_qm_shake(
 
				 			break;
			
 
				 		dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
			
 
				 				       q_lru);
			
 
				-		xfs_qm_dqreclaim_one(dqp, &dispose_list);
			
 
				+		xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
			
 
				 	}
			
 
				 	mutex_unlock(&qi->qi_lru_lock);
			
 
				 
			
 
				+	error = xfs_buf_delwri_submit(&buffer_list);
			
 
				+	if (error)
			
 
				+		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
			
 
				+
			
 
				 	while (!list_empty(&dispose_list)) {
			
 
				 		dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
			
 
				 		list_del_init(&dqp->q_lru);
			
 
				 		xfs_qm_dqfree_one(dqp);
			
 
				 	}
			
 
				+
			
 
				 out:
			
 
				 	return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
			
 
				 }
			
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -22,7 +22,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -17,7 +17,6 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_sb.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_ag.h"
			
 
				 #include "xfs_mount.h"
			
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -34,7 +33,6 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_fsops.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_utils.h"
			
@@ -1872,9 +1870,9 @@ xfs_growfs_rt(
 
				 	/*
			
 
				 	 * Read in the last block of the device, make sure it exists.
			
 
				 	 */
			
 
				-	bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
			
 
				+	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 				XFS_FSB_TO_BB(mp, nrblocks - 1),
			
 
				-				XFS_FSB_TO_B(mp, 1), 0);
			
 
				+				XFS_FSB_TO_BB(mp, 1), 0);
			
 
				 	if (!bp)
			
 
				 		return EIO;
			
 
				 	xfs_buf_relse(bp);
			
@@ -2219,9 +2217,9 @@ xfs_rtmount_init(
 
				 			(unsigned long long) mp->m_sb.sb_rblocks);
			
 
				 		return XFS_ERROR(EFBIG);
			
 
				 	}
			
 
				-	bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
			
 
				+	bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
			
 
				 					d - XFS_FSB_TO_BB(mp, 1),
			
 
				-					XFS_FSB_TO_B(mp, 1), 0);
			
 
				+					XFS_FSB_TO_BB(mp, 1), 0);
			
 
				 	if (!bp) {
			
 
				 		xfs_warn(mp, "realtime device size check failed");
			
 
				 		return EIO;
			
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -1,156 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#include "xfs.h"
			
 
				-#include "xfs_fs.h"
			
 
				-#include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				-#include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				-#include "xfs_trans.h"
			
 
				-#include "xfs_sb.h"
			
 
				-#include "xfs_ag.h"
			
 
				-#include "xfs_mount.h"
			
 
				-#include "xfs_bmap_btree.h"
			
 
				-#include "xfs_dinode.h"
			
 
				-#include "xfs_inode.h"
			
 
				-#include "xfs_error.h"
			
 
				-#include "xfs_rw.h"
			
 
				-
			
 
				-/*
			
 
				- * Force a shutdown of the filesystem instantly while keeping
			
 
				- * the filesystem consistent. We don't do an unmount here; just shutdown
			
 
				- * the shop, make sure that absolutely nothing persistent happens to
			
 
				- * this filesystem after this point.
			
 
				- */
			
 
				-void
			
 
				-xfs_do_force_shutdown(
			
 
				-	xfs_mount_t	*mp,
			
 
				-	int		flags,
			
 
				-	char		*fname,
			
 
				-	int		lnnum)
			
 
				-{
			
 
				-	int		logerror;
			
 
				-
			
 
				-	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
			
 
				-
			
 
				-	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				-		xfs_notice(mp,
			
 
				-	"%s(0x%x) called from line %d of file %s.  Return address = 0x%p",
			
 
				-			__func__, flags, lnnum, fname, __return_address);
			
 
				-	}
			
 
				-	/*
			
 
				-	 * No need to duplicate efforts.
			
 
				-	 */
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
			
 
				-		return;
			
 
				-
			
 
				-	/*
			
 
				-	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
			
 
				-	 * queue up anybody new on the log reservations, and wakes up
			
 
				-	 * everybody who's sleeping on log reservations to tell them
			
 
				-	 * the bad news.
			
 
				-	 */
			
 
				-	if (xfs_log_force_umount(mp, logerror))
			
 
				-		return;
			
 
				-
			
 
				-	if (flags & SHUTDOWN_CORRUPT_INCORE) {
			
 
				-		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
			
 
				-    "Corruption of in-memory data detected.  Shutting down filesystem");
			
 
				-		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
			
 
				-			xfs_stack_trace();
			
 
				-	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				-		if (logerror) {
			
 
				-			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
			
 
				-		"Log I/O Error Detected.  Shutting down filesystem");
			
 
				-		} else if (flags & SHUTDOWN_DEVICE_REQ) {
			
 
				-			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
			
 
				-		"All device paths lost.  Shutting down filesystem");
			
 
				-		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
			
 
				-			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
			
 
				-		"I/O Error Detected. Shutting down filesystem");
			
 
				-		}
			
 
				-	}
			
 
				-	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
			
 
				-		xfs_alert(mp,
			
 
				-	"Please umount the filesystem and rectify the problem(s)");
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * This isn't an absolute requirement, but it is
			
 
				- * just a good idea to call xfs_read_buf instead of
			
 
				- * directly doing a read_buf call. For one, we shouldn't
			
 
				- * be doing this disk read if we are in SHUTDOWN state anyway,
			
 
				- * so this stops that from happening. Secondly, this does all
			
 
				- * the error checking stuff and the brelse if appropriate for
			
 
				- * the caller, so the code can be a little leaner.
			
 
				- */
			
 
				-
			
 
				-int
			
 
				-xfs_read_buf(
			
 
				-	struct xfs_mount *mp,
			
 
				-	xfs_buftarg_t	 *target,
			
 
				-	xfs_daddr_t	 blkno,
			
 
				-	int              len,
			
 
				-	uint             flags,
			
 
				-	xfs_buf_t	 **bpp)
			
 
				-{
			
 
				-	xfs_buf_t	 *bp;
			
 
				-	int		 error;
			
 
				-
			
 
				-	if (!flags)
			
 
				-		flags = XBF_LOCK | XBF_MAPPED;
			
 
				-
			
 
				-	bp = xfs_buf_read(target, blkno, len, flags);
			
 
				-	if (!bp)
			
 
				-		return XFS_ERROR(EIO);
			
 
				-	error = bp->b_error;
			
 
				-	if (!error && !XFS_FORCED_SHUTDOWN(mp)) {
			
 
				-		*bpp = bp;
			
 
				-	} else {
			
 
				-		*bpp = NULL;
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp, __func__);
			
 
				-		} else {
			
 
				-			error = XFS_ERROR(EIO);
			
 
				-		}
			
 
				-		if (bp) {
			
 
				-			XFS_BUF_UNDONE(bp);
			
 
				-			xfs_buf_stale(bp);
			
 
				-			/*
			
 
				-			 * brelse clears B_ERROR and b_error
			
 
				-			 */
			
 
				-			xfs_buf_relse(bp);
			
 
				-		}
			
 
				-	}
			
 
				-	return (error);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * helper function to extract extent size hint from inode
			
 
				- */
			
 
				-xfs_extlen_t
			
 
				-xfs_get_extsz_hint(
			
 
				-	struct xfs_inode	*ip)
			
 
				-{
			
 
				-	if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
			
 
				-		return ip->i_d.di_extsize;
			
 
				-	if (XFS_IS_REALTIME_INODE(ip))
			
 
				-		return ip->i_mount->m_sb.sb_rextsize;
			
 
				-	return 0;
			
 
				-}
			
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -1,47 +0,0 @@
 
				-/*
			
 
				- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				- * All Rights Reserved.
			
 
				- *
			
 
				- * This program is free software; you can redistribute it and/or
			
 
				- * modify it under the terms of the GNU General Public License as
			
 
				- * published by the Free Software Foundation.
			
 
				- *
			
 
				- * This program is distributed in the hope that it would be useful,
			
 
				- * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				- * GNU General Public License for more details.
			
 
				- *
			
 
				- * You should have received a copy of the GNU General Public License
			
 
				- * along with this program; if not, write the Free Software Foundation,
			
 
				- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				- */
			
 
				-#ifndef	__XFS_RW_H__
			
 
				-#define	__XFS_RW_H__
			
 
				-
			
 
				-struct xfs_buf;
			
 
				-struct xfs_inode;
			
 
				-struct xfs_mount;
			
 
				-
			
 
				-/*
			
 
				- * Convert the given file system block to a disk block.
			
 
				- * We have to treat it differently based on whether the
			
 
				- * file is a real time file or not, because the bmap code
			
 
				- * does.
			
 
				- */
			
 
				-static inline xfs_daddr_t
			
 
				-xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
			
 
				-{
			
 
				-	return (XFS_IS_REALTIME_INODE(ip) ? \
			
 
				-		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
			
 
				-		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Prototypes for functions in xfs_rw.c.
			
 
				- */
			
 
				-extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
			
 
				-			xfs_daddr_t blkno, int len, uint flags,
			
 
				-			struct xfs_buf **bpp);
			
 
				-extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
			
 
				-
			
 
				-#endif /* __XFS_RW_H__ */
			
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -17,7 +17,6 @@
 
				  */
			
 
				 
			
 
				 #include "xfs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
@@ -622,7 +621,7 @@ void
 
				 xfs_blkdev_issue_flush(
			
 
				 	xfs_buftarg_t		*buftarg)
			
 
				 {
			
 
				-	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
			
 
				+	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
@@ -773,8 +772,14 @@ xfs_init_mount_workqueues(
 
				 	if (!mp->m_unwritten_workqueue)
			
 
				 		goto out_destroy_data_iodone_queue;
			
 
				 
			
 
				+	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
			
 
				+			WQ_MEM_RECLAIM, 0, mp->m_fsname);
			
 
				+	if (!mp->m_cil_workqueue)
			
 
				+		goto out_destroy_unwritten;
			
 
				 	return 0;
			
 
				 
			
 
				+out_destroy_unwritten:
			
 
				+	destroy_workqueue(mp->m_unwritten_workqueue);
			
 
				 out_destroy_data_iodone_queue:
			
 
				 	destroy_workqueue(mp->m_data_workqueue);
			
 
				 out:
			
@@ -785,6 +790,7 @@ STATIC void
 
				 xfs_destroy_mount_workqueues(
			
 
				 	struct xfs_mount	*mp)
			
 
				 {
			
 
				+	destroy_workqueue(mp->m_cil_workqueue);
			
 
				 	destroy_workqueue(mp->m_data_workqueue);
			
 
				 	destroy_workqueue(mp->m_unwritten_workqueue);
			
 
				 }
			
@@ -981,18 +987,9 @@ xfs_fs_put_super(
 
				 {
			
 
				 	struct xfs_mount	*mp = XFS_M(sb);
			
 
				 
			
 
				-	xfs_syncd_stop(mp);
			
 
				-
			
 
				-	/*
			
 
				-	 * Blow away any referenced inode in the filestreams cache.
			
 
				-	 * This can and will cause log traffic as inodes go inactive
			
 
				-	 * here.
			
 
				-	 */
			
 
				 	xfs_filestream_unmount(mp);
			
 
				-
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-
			
 
				 	xfs_unmountfs(mp);
			
 
				+	xfs_syncd_stop(mp);
			
 
				 	xfs_freesb(mp);
			
 
				 	xfs_icsb_destroy_counters(mp);
			
 
				 	xfs_destroy_mount_workqueues(mp);
			
@@ -1072,7 +1069,7 @@ xfs_fs_statfs(
 
				 
			
 
				 	spin_unlock(&mp->m_sb_lock);
			
 
				 
			
 
				-	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
			
 
				+	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
			
 
				 	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
			
 
				 			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
			
 
				 		xfs_qm_statvfs(ip, statp);
			
@@ -1362,31 +1359,32 @@ xfs_fs_fill_super(
 
				 	sb->s_time_gran = 1;
			
 
				 	set_posix_acl_flag(sb);
			
 
				 
			
 
				-	error = xfs_mountfs(mp);
			
 
				+	error = xfs_syncd_init(mp);
			
 
				 	if (error)
			
 
				 		goto out_filestream_unmount;
			
 
				 
			
 
				-	error = xfs_syncd_init(mp);
			
 
				+	error = xfs_mountfs(mp);
			
 
				 	if (error)
			
 
				-		goto out_unmount;
			
 
				+		goto out_syncd_stop;
			
 
				 
			
 
				 	root = igrab(VFS_I(mp->m_rootip));
			
 
				 	if (!root) {
			
 
				 		error = ENOENT;
			
 
				-		goto out_syncd_stop;
			
 
				+		goto out_unmount;
			
 
				 	}
			
 
				 	if (is_bad_inode(root)) {
			
 
				 		error = EINVAL;
			
 
				-		goto out_syncd_stop;
			
 
				+		goto out_unmount;
			
 
				 	}
			
 
				 	sb->s_root = d_make_root(root);
			
 
				 	if (!sb->s_root) {
			
 
				 		error = ENOMEM;
			
 
				-		goto out_syncd_stop;
			
 
				+		goto out_unmount;
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
 
				-
			
 
				+ out_syncd_stop:
			
 
				+	xfs_syncd_stop(mp);
			
 
				  out_filestream_unmount:
			
 
				 	xfs_filestream_unmount(mp);
			
 
				  out_free_sb:
			
@@ -1403,19 +1401,10 @@ out_destroy_workqueues:
 
				  out:
			
 
				 	return -error;
			
 
				 
			
 
				- out_syncd_stop:
			
 
				-	xfs_syncd_stop(mp);
			
 
				  out_unmount:
			
 
				-	/*
			
 
				-	 * Blow away any referenced inode in the filestreams cache.
			
 
				-	 * This can and will cause log traffic as inodes go inactive
			
 
				-	 * here.
			
 
				-	 */
			
 
				 	xfs_filestream_unmount(mp);
			
 
				-
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-
			
 
				 	xfs_unmountfs(mp);
			
 
				+	xfs_syncd_stop(mp);
			
 
				 	goto out_free_sb;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -18,7 +18,6 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				 #include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
@@ -241,45 +240,6 @@ xfs_sync_inode_data(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-STATIC int
			
 
				-xfs_sync_inode_attr(
			
 
				-	struct xfs_inode	*ip,
			
 
				-	struct xfs_perag	*pag,
			
 
				-	int			flags)
			
 
				-{
			
 
				-	int			error = 0;
			
 
				-
			
 
				-	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				-	if (xfs_inode_clean(ip))
			
 
				-		goto out_unlock;
			
 
				-	if (!xfs_iflock_nowait(ip)) {
			
 
				-		if (!(flags & SYNC_WAIT))
			
 
				-			goto out_unlock;
			
 
				-		xfs_iflock(ip);
			
 
				-	}
			
 
				-
			
 
				-	if (xfs_inode_clean(ip)) {
			
 
				-		xfs_ifunlock(ip);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				-
			
 
				-	error = xfs_iflush(ip, flags);
			
 
				-
			
 
				-	/*
			
 
				-	 * We don't want to try again on non-blocking flushes that can't run
			
 
				-	 * again immediately. If an inode really must be written, then that's
			
 
				-	 * what the SYNC_WAIT flag is for.
			
 
				-	 */
			
 
				-	if (error == EAGAIN) {
			
 
				-		ASSERT(!(flags & SYNC_WAIT));
			
 
				-		error = 0;
			
 
				-	}
			
 
				-
			
 
				- out_unlock:
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Write out pagecache data for the whole filesystem.
			
 
				  */
			
@@ -300,19 +260,6 @@ xfs_sync_data(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Write out inode metadata (attributes) for the whole filesystem.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_sync_attr(
			
 
				-	struct xfs_mount	*mp,
			
 
				-	int			flags)
			
 
				-{
			
 
				-	ASSERT((flags & ~SYNC_WAIT) == 0);
			
 
				-
			
 
				-	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
			
 
				-}
			
 
				-
			
 
				 STATIC int
			
 
				 xfs_sync_fsdata(
			
 
				 	struct xfs_mount	*mp)
			
@@ -350,7 +297,7 @@ xfs_sync_fsdata(
 
				  * First stage of freeze - no writers will make progress now we are here,
			
 
				  * so we flush delwri and delalloc buffers here, then wait for all I/O to
			
 
				  * complete.  Data is frozen at that point. Metadata is not frozen,
			
 
				- * transactions can still occur here so don't bother flushing the buftarg
			
 
				+ * transactions can still occur here so don't bother emptying the AIL
			
 
				  * because it'll just get dirty again.
			
 
				  */
			
 
				 int
			
@@ -365,47 +312,13 @@ xfs_quiesce_data(
 
				 	/* write superblock and hoover up shutdown errors */
			
 
				 	error = xfs_sync_fsdata(mp);
			
 
				 
			
 
				-	/* make sure all delwri buffers are written out */
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-
			
 
				 	/* mark the log as covered if needed */
			
 
				 	if (xfs_log_need_covered(mp))
			
 
				 		error2 = xfs_fs_log_dummy(mp);
			
 
				 
			
 
				-	/* flush data-only devices */
			
 
				-	if (mp->m_rtdev_targp)
			
 
				-		xfs_flush_buftarg(mp->m_rtdev_targp, 1);
			
 
				-
			
 
				 	return error ? error : error2;
			
 
				 }
			
 
				 
			
 
				-STATIC void
			
 
				-xfs_quiesce_fs(
			
 
				-	struct xfs_mount	*mp)
			
 
				-{
			
 
				-	int	count = 0, pincount;
			
 
				-
			
 
				-	xfs_reclaim_inodes(mp, 0);
			
 
				-	xfs_flush_buftarg(mp->m_ddev_targp, 0);
			
 
				-
			
 
				-	/*
			
 
				-	 * This loop must run at least twice.  The first instance of the loop
			
 
				-	 * will flush most meta data but that will generate more meta data
			
 
				-	 * (typically directory updates).  Which then must be flushed and
			
 
				-	 * logged before we can write the unmount record. We also so sync
			
 
				-	 * reclaim of inodes to catch any that the above delwri flush skipped.
			
 
				-	 */
			
 
				-	do {
			
 
				-		xfs_reclaim_inodes(mp, SYNC_WAIT);
			
 
				-		xfs_sync_attr(mp, SYNC_WAIT);
			
 
				-		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
			
 
				-		if (!pincount) {
			
 
				-			delay(50);
			
 
				-			count++;
			
 
				-		}
			
 
				-	} while (count < 2);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Second stage of a quiesce. The data is already synced, now we have to take
			
 
				  * care of the metadata. New transactions are already blocked, so we need to
			
@@ -421,8 +334,12 @@ xfs_quiesce_attr(
 
				 	while (atomic_read(&mp->m_active_trans) > 0)
			
 
				 		delay(100);
			
 
				 
			
 
				-	/* flush inodes and push all remaining buffers out to disk */
			
 
				-	xfs_quiesce_fs(mp);
			
 
				+	/* reclaim inodes to do any IO before the freeze completes */
			
 
				+	xfs_reclaim_inodes(mp, 0);
			
 
				+	xfs_reclaim_inodes(mp, SYNC_WAIT);
			
 
				+
			
 
				+	/* flush all pending changes from the AIL */
			
 
				+	xfs_ail_push_all_sync(mp->m_ail);
			
 
				 
			
 
				 	/*
			
 
				 	 * Just warn here till VFS can correctly support
			
@@ -436,7 +353,12 @@ xfs_quiesce_attr(
 
				 		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
			
 
				 				"Frozen image may not be consistent.");
			
 
				 	xfs_log_unmount_write(mp);
			
 
				-	xfs_unmountfs_writesb(mp);
			
 
				+
			
 
				+	/*
			
 
				+	 * At this point we might have modified the superblock again and thus
			
 
				+	 * added an item to the AIL, thus flush it again.
			
 
				+	 */
			
 
				+	xfs_ail_push_all_sync(mp->m_ail);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -460,16 +382,27 @@ xfs_sync_worker(
 
				 					struct xfs_mount, m_sync_work);
			
 
				 	int		error;
			
 
				 
			
 
				-	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
			
 
				-		/* dgc: errors ignored here */
			
 
				-		if (mp->m_super->s_frozen == SB_UNFROZEN &&
			
 
				-		    xfs_log_need_covered(mp))
			
 
				-			error = xfs_fs_log_dummy(mp);
			
 
				-		else
			
 
				-			xfs_log_force(mp, 0);
			
 
				-
			
 
				-		/* start pushing all the metadata that is currently dirty */
			
 
				-		xfs_ail_push_all(mp->m_ail);
			
 
				+	/*
			
 
				+	 * We shouldn't write/force the log if we are in the mount/unmount
			
 
				+	 * process or on a read only filesystem. The workqueue still needs to be
			
 
				+	 * active in both cases, however, because it is used for inode reclaim
			
 
				+	 * during these times.  Use the s_umount semaphore to provide exclusion
			
 
				+	 * with unmount.
			
 
				+	 */
			
 
				+	if (down_read_trylock(&mp->m_super->s_umount)) {
			
 
				+		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
			
 
				+			/* dgc: errors ignored here */
			
 
				+			if (mp->m_super->s_frozen == SB_UNFROZEN &&
			
 
				+			    xfs_log_need_covered(mp))
			
 
				+				error = xfs_fs_log_dummy(mp);
			
 
				+			else
			
 
				+				xfs_log_force(mp, 0);
			
 
				+
			
 
				+			/* start pushing all the metadata that is currently
			
 
				+			 * dirty */
			
 
				+			xfs_ail_push_all(mp->m_ail);
			
 
				+		}
			
 
				+		up_read(&mp->m_super->s_umount);
			
 
				 	}
			
 
				 
			
 
				 	/* queue us up again */
			
@@ -488,14 +421,6 @@ xfs_syncd_queue_reclaim(
 
				 	struct xfs_mount        *mp)
			
 
				 {
			
 
				 
			
 
				-	/*
			
 
				-	 * We can have inodes enter reclaim after we've shut down the syncd
			
 
				-	 * workqueue during unmount, so don't allow reclaim work to be queued
			
 
				-	 * during unmount.
			
 
				-	 */
			
 
				-	if (!(mp->m_super->s_flags & MS_ACTIVE))
			
 
				-		return;
			
 
				-
			
 
				 	rcu_read_lock();
			
 
				 	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
			
 
				 		queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
			
@@ -564,7 +489,6 @@ xfs_syncd_init(
 
				 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
			
 
				 
			
 
				 	xfs_syncd_queue_sync(mp);
			
 
				-	xfs_syncd_queue_reclaim(mp);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -702,11 +626,8 @@ xfs_reclaim_inode_grab(
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Inodes in different states need to be treated differently, and the return
			
 
				- * value of xfs_iflush is not sufficient to get this right. The following table
			
 
				- * lists the inode states and the reclaim actions necessary for non-blocking
			
 
				- * reclaim:
			
 
				- *
			
 
				+ * Inodes in different states need to be treated differently. The following
			
 
				+ * table lists the inode states and the reclaim actions necessary:
			
 
				  *
			
 
				  *	inode state	     iflush ret		required action
			
 
				  *      ---------------      ----------         ---------------
			
@@ -716,39 +637,31 @@ xfs_reclaim_inode_grab(
 
				  *	stale, unpinned		0		reclaim
			
 
				  *	clean, pinned(*)	0		requeue
			
 
				  *	stale, pinned		EAGAIN		requeue
			
 
				- *	dirty, delwri ok	0		requeue
			
 
				- *	dirty, delwri blocked	EAGAIN		requeue
			
 
				- *	dirty, sync flush	0		reclaim
			
 
				+ *	dirty, async		-		requeue
			
 
				+ *	dirty, sync		0		reclaim
			
 
				  *
			
 
				  * (*) dgc: I don't think the clean, pinned state is possible but it gets
			
 
				  * handled anyway given the order of checks implemented.
			
 
				  *
			
 
				- * As can be seen from the table, the return value of xfs_iflush() is not
			
 
				- * sufficient to correctly decide the reclaim action here. The checks in
			
 
				- * xfs_iflush() might look like duplicates, but they are not.
			
 
				- *
			
 
				  * Also, because we get the flush lock first, we know that any inode that has
			
 
				  * been flushed delwri has had the flush completed by the time we check that
			
 
				- * the inode is clean. The clean inode check needs to be done before flushing
			
 
				- * the inode delwri otherwise we would loop forever requeuing clean inodes as
			
 
				- * we cannot tell apart a successful delwri flush and a clean inode from the
			
 
				- * return value of xfs_iflush().
			
 
				+ * the inode is clean.
			
 
				  *
			
 
				- * Note that because the inode is flushed delayed write by background
			
 
				- * writeback, the flush lock may already be held here and waiting on it can
			
 
				- * result in very long latencies. Hence for sync reclaims, where we wait on the
			
 
				- * flush lock, the caller should push out delayed write inodes first before
			
 
				- * trying to reclaim them to minimise the amount of time spent waiting. For
			
 
				- * background relaim, we just requeue the inode for the next pass.
			
 
				+ * Note that because the inode is flushed delayed write by AIL pushing, the
			
 
				+ * flush lock may already be held here and waiting on it can result in very
			
 
				+ * long latencies.  Hence for sync reclaims, where we wait on the flush lock,
			
 
				+ * the caller should push the AIL first before trying to reclaim inodes to
			
 
				+ * minimise the amount of time spent waiting.  For background relaim, we only
			
 
				+ * bother to reclaim clean inodes anyway.
			
 
				  *
			
 
				  * Hence the order of actions after gaining the locks should be:
			
 
				  *	bad		=> reclaim
			
 
				  *	shutdown	=> unpin and reclaim
			
 
				- *	pinned, delwri	=> requeue
			
 
				+ *	pinned, async	=> requeue
			
 
				  *	pinned, sync	=> unpin
			
 
				  *	stale		=> reclaim
			
 
				  *	clean		=> reclaim
			
 
				- *	dirty, delwri	=> flush and requeue
			
 
				+ *	dirty, async	=> requeue
			
 
				  *	dirty, sync	=> flush, wait and reclaim
			
 
				  */
			
 
				 STATIC int
			
@@ -757,7 +670,8 @@ xfs_reclaim_inode(
 
				 	struct xfs_perag	*pag,
			
 
				 	int			sync_mode)
			
 
				 {
			
 
				-	int	error;
			
 
				+	struct xfs_buf		*bp = NULL;
			
 
				+	int			error;
			
 
				 
			
 
				 restart:
			
 
				 	error = 0;
			
@@ -765,17 +679,6 @@ restart:
 
				 	if (!xfs_iflock_nowait(ip)) {
			
 
				 		if (!(sync_mode & SYNC_WAIT))
			
 
				 			goto out;
			
 
				-
			
 
				-		/*
			
 
				-		 * If we only have a single dirty inode in a cluster there is
			
 
				-		 * a fair chance that the AIL push may have pushed it into
			
 
				-		 * the buffer, but xfsbufd won't touch it until 30 seconds
			
 
				-		 * from now, and thus we will lock up here.
			
 
				-		 *
			
 
				-		 * Promote the inode buffer to the front of the delwri list
			
 
				-		 * and wake up xfsbufd now.
			
 
				-		 */
			
 
				-		xfs_promote_inode(ip);
			
 
				 		xfs_iflock(ip);
			
 
				 	}
			
 
				 
			
@@ -783,13 +686,12 @@ restart:
 
				 		goto reclaim;
			
 
				 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				 		xfs_iunpin_wait(ip);
			
 
				+		xfs_iflush_abort(ip, false);
			
 
				 		goto reclaim;
			
 
				 	}
			
 
				 	if (xfs_ipincount(ip)) {
			
 
				-		if (!(sync_mode & SYNC_WAIT)) {
			
 
				-			xfs_ifunlock(ip);
			
 
				-			goto out;
			
 
				-		}
			
 
				+		if (!(sync_mode & SYNC_WAIT))
			
 
				+			goto out_ifunlock;
			
 
				 		xfs_iunpin_wait(ip);
			
 
				 	}
			
 
				 	if (xfs_iflags_test(ip, XFS_ISTALE))
			
@@ -797,61 +699,43 @@ restart:
 
				 	if (xfs_inode_clean(ip))
			
 
				 		goto reclaim;
			
 
				 
			
 
				+	/*
			
 
				+	 * Never flush out dirty data during non-blocking reclaim, as it would
			
 
				+	 * just contend with AIL pushing trying to do the same job.
			
 
				+	 */
			
 
				+	if (!(sync_mode & SYNC_WAIT))
			
 
				+		goto out_ifunlock;
			
 
				+
			
 
				 	/*
			
 
				 	 * Now we have an inode that needs flushing.
			
 
				 	 *
			
 
				-	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
			
 
				-	 * reclaim as we can deadlock with inode cluster removal.
			
 
				+	 * Note that xfs_iflush will never block on the inode buffer lock, as
			
 
				 	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
			
 
				-	 * ip->i_lock, and we are doing the exact opposite here. As a result,
			
 
				-	 * doing a blocking xfs_itobp() to get the cluster buffer will result
			
 
				+	 * ip->i_lock, and we are doing the exact opposite here.  As a result,
			
 
				+	 * doing a blocking xfs_itobp() to get the cluster buffer would result
			
 
				 	 * in an ABBA deadlock with xfs_ifree_cluster().
			
 
				 	 *
			
 
				 	 * As xfs_ifree_cluser() must gather all inodes that are active in the
			
 
				 	 * cache to mark them stale, if we hit this case we don't actually want
			
 
				 	 * to do IO here - we want the inode marked stale so we can simply
			
 
				-	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
			
 
				-	 * just unlock the inode, back off and try again. Hopefully the next
			
 
				-	 * pass through will see the stale flag set on the inode.
			
 
				+	 * reclaim it.  Hence if we get an EAGAIN error here,  just unlock the
			
 
				+	 * inode, back off and try again.  Hopefully the next pass through will
			
 
				+	 * see the stale flag set on the inode.
			
 
				 	 */
			
 
				-	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
			
 
				-	if (sync_mode & SYNC_WAIT) {
			
 
				-		if (error == EAGAIN) {
			
 
				-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-			/* backoff longer than in xfs_ifree_cluster */
			
 
				-			delay(2);
			
 
				-			goto restart;
			
 
				-		}
			
 
				-		xfs_iflock(ip);
			
 
				-		goto reclaim;
			
 
				+	error = xfs_iflush(ip, &bp);
			
 
				+	if (error == EAGAIN) {
			
 
				+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+		/* backoff longer than in xfs_ifree_cluster */
			
 
				+		delay(2);
			
 
				+		goto restart;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * When we have to flush an inode but don't have SYNC_WAIT set, we
			
 
				-	 * flush the inode out using a delwri buffer and wait for the next
			
 
				-	 * call into reclaim to find it in a clean state instead of waiting for
			
 
				-	 * it now. We also don't return errors here - if the error is transient
			
 
				-	 * then the next reclaim pass will flush the inode, and if the error
			
 
				-	 * is permanent then the next sync reclaim will reclaim the inode and
			
 
				-	 * pass on the error.
			
 
				-	 */
			
 
				-	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
			
 
				-		xfs_warn(ip->i_mount,
			
 
				-			"inode 0x%llx background reclaim flush failed with %d",
			
 
				-			(long long)ip->i_ino, error);
			
 
				+	if (!error) {
			
 
				+		error = xfs_bwrite(bp);
			
 
				+		xfs_buf_relse(bp);
			
 
				 	}
			
 
				-out:
			
 
				-	xfs_iflags_clear(ip, XFS_IRECLAIM);
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				-	/*
			
 
				-	 * We could return EAGAIN here to make reclaim rescan the inode tree in
			
 
				-	 * a short while. However, this just burns CPU time scanning the tree
			
 
				-	 * waiting for IO to complete and xfssyncd never goes back to the idle
			
 
				-	 * state. Instead, return 0 to let the next scheduled background reclaim
			
 
				-	 * attempt to reclaim the inode again.
			
 
				-	 */
			
 
				-	return 0;
			
 
				 
			
 
				+	xfs_iflock(ip);
			
 
				 reclaim:
			
 
				 	xfs_ifunlock(ip);
			
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
@@ -884,8 +768,21 @@ reclaim:
 
				 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				 
			
 
				 	xfs_inode_free(ip);
			
 
				-
			
 
				 	return error;
			
 
				+
			
 
				+out_ifunlock:
			
 
				+	xfs_ifunlock(ip);
			
 
				+out:
			
 
				+	xfs_iflags_clear(ip, XFS_IRECLAIM);
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
 
				+	/*
			
 
				+	 * We could return EAGAIN here to make reclaim rescan the inode tree in
			
 
				+	 * a short while. However, this just burns CPU time scanning the tree
			
 
				+	 * waiting for IO to complete and xfssyncd never goes back to the idle
			
 
				+	 * state. Instead, return 0 to let the next scheduled background reclaim
			
 
				+	 * attempt to reclaim the inode again.
			
 
				+	 */
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -281,7 +281,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 
				 	TP_STRUCT__entry(
			
 
				 		__field(dev_t, dev)
			
 
				 		__field(xfs_daddr_t, bno)
			
 
				-		__field(size_t, buffer_length)
			
 
				+		__field(int, nblks)
			
 
				 		__field(int, hold)
			
 
				 		__field(int, pincount)
			
 
				 		__field(unsigned, lockval)
			
@@ -291,18 +291,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 
				 	TP_fast_assign(
			
 
				 		__entry->dev = bp->b_target->bt_dev;
			
 
				 		__entry->bno = bp->b_bn;
			
 
				-		__entry->buffer_length = bp->b_buffer_length;
			
 
				+		__entry->nblks = bp->b_length;
			
 
				 		__entry->hold = atomic_read(&bp->b_hold);
			
 
				 		__entry->pincount = atomic_read(&bp->b_pin_count);
			
 
				 		__entry->lockval = bp->b_sema.count;
			
 
				 		__entry->flags = bp->b_flags;
			
 
				 		__entry->caller_ip = caller_ip;
			
 
				 	),
			
 
				-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
			
 
				+	TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
			
 
				 		  "lock %d flags %s caller %pf",
			
 
				 		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				 		  (unsigned long long)__entry->bno,
			
 
				-		  __entry->buffer_length,
			
 
				+		  __entry->nblks,
			
 
				 		  __entry->hold,
			
 
				 		  __entry->pincount,
			
 
				 		  __entry->lockval,
			
@@ -328,7 +328,7 @@ DEFINE_BUF_EVENT(xfs_buf_unlock);
 
				 DEFINE_BUF_EVENT(xfs_buf_iowait);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_iowait_done);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
			
 
				-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
			
 
				+DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_delwri_split);
			
 
				 DEFINE_BUF_EVENT(xfs_buf_get_uncached);
			
 
				 DEFINE_BUF_EVENT(xfs_bdstrat_shut);
			
@@ -362,7 +362,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
 
				 	TP_fast_assign(
			
 
				 		__entry->dev = bp->b_target->bt_dev;
			
 
				 		__entry->bno = bp->b_bn;
			
 
				-		__entry->buffer_length = bp->b_buffer_length;
			
 
				+		__entry->buffer_length = BBTOB(bp->b_length);
			
 
				 		__entry->flags = flags;
			
 
				 		__entry->hold = atomic_read(&bp->b_hold);
			
 
				 		__entry->pincount = atomic_read(&bp->b_pin_count);
			
@@ -406,7 +406,7 @@ TRACE_EVENT(xfs_buf_ioerror,
 
				 	TP_fast_assign(
			
 
				 		__entry->dev = bp->b_target->bt_dev;
			
 
				 		__entry->bno = bp->b_bn;
			
 
				-		__entry->buffer_length = bp->b_buffer_length;
			
 
				+		__entry->buffer_length = BBTOB(bp->b_length);
			
 
				 		__entry->hold = atomic_read(&bp->b_hold);
			
 
				 		__entry->pincount = atomic_read(&bp->b_pin_count);
			
 
				 		__entry->lockval = bp->b_sema.count;
			
@@ -450,7 +450,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
 
				 		__entry->bli_recur = bip->bli_recur;
			
 
				 		__entry->bli_refcount = atomic_read(&bip->bli_refcount);
			
 
				 		__entry->buf_bno = bip->bli_buf->b_bn;
			
 
				-		__entry->buf_len = bip->bli_buf->b_buffer_length;
			
 
				+		__entry->buf_len = BBTOB(bip->bli_buf->b_length);
			
 
				 		__entry->buf_flags = bip->bli_buf->b_flags;
			
 
				 		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
			
 
				 		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
			
@@ -486,12 +486,10 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
			
 
				-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
			
 
				-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
			
 
				 DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
			
@@ -876,15 +874,30 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
 
				 		  __print_flags(__entry->flags, "|", XFS_LI_FLAGS))
			
 
				 )
			
 
				 
			
 
				+TRACE_EVENT(xfs_log_force,
			
 
				+	TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn),
			
 
				+	TP_ARGS(mp, lsn),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_lsn_t, lsn)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = mp->m_super->s_dev;
			
 
				+		__entry->lsn = lsn;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d lsn 0x%llx",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		  __entry->lsn)
			
 
				+)
			
 
				+
			
 
				 #define DEFINE_LOG_ITEM_EVENT(name) \
			
 
				 DEFINE_EVENT(xfs_log_item_class, name, \
			
 
				 	TP_PROTO(struct xfs_log_item *lip), \
			
 
				 	TP_ARGS(lip))
			
 
				 DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
			
 
				-DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf);
			
 
				-DEFINE_LOG_ITEM_EVENT(xfs_ail_pushbuf_pinned);
			
 
				 DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
			
 
				 DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
			
 
				+DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
			
 
				 
			
 
				 
			
 
				 DECLARE_EVENT_CLASS(xfs_file_class,
			
@@ -1145,7 +1158,7 @@ TRACE_EVENT(xfs_bunmap,
 
				 
			
 
				 );
			
 
				 
			
 
				-DECLARE_EVENT_CLASS(xfs_busy_class,
			
 
				+DECLARE_EVENT_CLASS(xfs_extent_busy_class,
			
 
				 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				 		 xfs_agblock_t agbno, xfs_extlen_t len),
			
 
				 	TP_ARGS(mp, agno, agbno, len),
			
@@ -1168,17 +1181,17 @@ DECLARE_EVENT_CLASS(xfs_busy_class,
 
				 		  __entry->len)
			
 
				 );
			
 
				 #define DEFINE_BUSY_EVENT(name) \
			
 
				-DEFINE_EVENT(xfs_busy_class, name, \
			
 
				+DEFINE_EVENT(xfs_extent_busy_class, name, \
			
 
				 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
			
 
				 		 xfs_agblock_t agbno, xfs_extlen_t len), \
			
 
				 	TP_ARGS(mp, agno, agbno, len))
			
 
				-DEFINE_BUSY_EVENT(xfs_alloc_busy);
			
 
				-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
			
 
				-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
			
 
				-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
			
 
				-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
			
 
				+DEFINE_BUSY_EVENT(xfs_extent_busy);
			
 
				+DEFINE_BUSY_EVENT(xfs_extent_busy_enomem);
			
 
				+DEFINE_BUSY_EVENT(xfs_extent_busy_force);
			
 
				+DEFINE_BUSY_EVENT(xfs_extent_busy_reuse);
			
 
				+DEFINE_BUSY_EVENT(xfs_extent_busy_clear);
			
 
				 
			
 
				-TRACE_EVENT(xfs_alloc_busy_trim,
			
 
				+TRACE_EVENT(xfs_extent_busy_trim,
			
 
				 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
			
 
				 		 xfs_agblock_t agbno, xfs_extlen_t len,
			
 
				 		 xfs_agblock_t tbno, xfs_extlen_t tlen),
			
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -19,9 +19,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -36,6 +34,7 @@
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_ialloc.h"
			
 
				 #include "xfs_alloc.h"
			
 
				+#include "xfs_extent_busy.h"
			
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_priv.h"
			
@@ -608,8 +607,8 @@ STATIC void
 
				 xfs_trans_free(
			
 
				 	struct xfs_trans	*tp)
			
 
				 {
			
 
				-	xfs_alloc_busy_sort(&tp->t_busy);
			
 
				-	xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false);
			
 
				+	xfs_extent_busy_sort(&tp->t_busy);
			
 
				+	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
			
 
				 
			
 
				 	atomic_dec(&tp->t_mountp->m_active_trans);
			
 
				 	xfs_trans_free_dqinfo(tp);
			
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -345,11 +345,9 @@ struct xfs_item_ops {
 
				 	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
			
 
				 	void (*iop_pin)(xfs_log_item_t *);
			
 
				 	void (*iop_unpin)(xfs_log_item_t *, int remove);
			
 
				-	uint (*iop_trylock)(xfs_log_item_t *);
			
 
				+	uint (*iop_push)(struct xfs_log_item *, struct list_head *);
			
 
				 	void (*iop_unlock)(xfs_log_item_t *);
			
 
				 	xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
			
 
				-	void (*iop_push)(xfs_log_item_t *);
			
 
				-	bool (*iop_pushbuf)(xfs_log_item_t *);
			
 
				 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
			
 
				 };
			
 
				 
			
@@ -357,20 +355,18 @@ struct xfs_item_ops {
 
				 #define IOP_FORMAT(ip,vp)	(*(ip)->li_ops->iop_format)(ip, vp)
			
 
				 #define IOP_PIN(ip)		(*(ip)->li_ops->iop_pin)(ip)
			
 
				 #define IOP_UNPIN(ip, remove)	(*(ip)->li_ops->iop_unpin)(ip, remove)
			
 
				-#define IOP_TRYLOCK(ip)		(*(ip)->li_ops->iop_trylock)(ip)
			
 
				+#define IOP_PUSH(ip, list)	(*(ip)->li_ops->iop_push)(ip, list)
			
 
				 #define IOP_UNLOCK(ip)		(*(ip)->li_ops->iop_unlock)(ip)
			
 
				 #define IOP_COMMITTED(ip, lsn)	(*(ip)->li_ops->iop_committed)(ip, lsn)
			
 
				-#define IOP_PUSH(ip)		(*(ip)->li_ops->iop_push)(ip)
			
 
				-#define IOP_PUSHBUF(ip)		(*(ip)->li_ops->iop_pushbuf)(ip)
			
 
				 #define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
			
 
				 
			
 
				 /*
			
 
				- * Return values for the IOP_TRYLOCK() routines.
			
 
				+ * Return values for the IOP_PUSH() routines.
			
 
				  */
			
 
				-#define	XFS_ITEM_SUCCESS	0
			
 
				-#define	XFS_ITEM_PINNED		1
			
 
				-#define	XFS_ITEM_LOCKED		2
			
 
				-#define XFS_ITEM_PUSHBUF	3
			
 
				+#define XFS_ITEM_SUCCESS	0
			
 
				+#define XFS_ITEM_PINNED		1
			
 
				+#define XFS_ITEM_LOCKED		2
			
 
				+#define XFS_ITEM_FLUSHING	3
			
 
				 
			
 
				 /*
			
 
				  * This is the type of function which can be given to xfs_trans_callback()
			
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -20,7 +20,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -79,7 +78,7 @@ xfs_ail_check(
 
				  * Return a pointer to the first item in the AIL.  If the AIL is empty, then
			
 
				  * return NULL.
			
 
				  */
			
 
				-static xfs_log_item_t *
			
 
				+xfs_log_item_t *
			
 
				 xfs_ail_min(
			
 
				 	struct xfs_ail  *ailp)
			
 
				 {
			
@@ -364,30 +363,31 @@ xfsaild_push(
 
				 	xfs_log_item_t		*lip;
			
 
				 	xfs_lsn_t		lsn;
			
 
				 	xfs_lsn_t		target;
			
 
				-	long			tout = 10;
			
 
				+	long			tout;
			
 
				 	int			stuck = 0;
			
 
				+	int			flushing = 0;
			
 
				 	int			count = 0;
			
 
				-	int			push_xfsbufd = 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * If last time we ran we encountered pinned items, force the log first
			
 
				-	 * and wait for it before pushing again.
			
 
				+	 * If we encountered pinned items or did not finish writing out all
			
 
				+	 * buffers the last time we ran, force the log first and wait for it
			
 
				+	 * before pushing again.
			
 
				 	 */
			
 
				-	spin_lock(&ailp->xa_lock);
			
 
				-	if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush &&
			
 
				-	    !list_empty(&ailp->xa_ail)) {
			
 
				+	if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 &&
			
 
				+	    (!list_empty_careful(&ailp->xa_buf_list) ||
			
 
				+	     xfs_ail_min_lsn(ailp))) {
			
 
				 		ailp->xa_log_flush = 0;
			
 
				-		spin_unlock(&ailp->xa_lock);
			
 
				+
			
 
				 		XFS_STATS_INC(xs_push_ail_flush);
			
 
				 		xfs_log_force(mp, XFS_LOG_SYNC);
			
 
				-		spin_lock(&ailp->xa_lock);
			
 
				 	}
			
 
				 
			
 
				-	target = ailp->xa_target;
			
 
				+	spin_lock(&ailp->xa_lock);
			
 
				 	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
			
 
				-	if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
			
 
				+	if (!lip) {
			
 
				 		/*
			
 
				-		 * AIL is empty or our push has reached the end.
			
 
				+		 * If the AIL is empty or our push has reached the end we are
			
 
				+		 * done now.
			
 
				 		 */
			
 
				 		xfs_trans_ail_cursor_done(ailp, &cur);
			
 
				 		spin_unlock(&ailp->xa_lock);
			
@@ -396,54 +396,42 @@ xfsaild_push(
 
				 
			
 
				 	XFS_STATS_INC(xs_push_ail);
			
 
				 
			
 
				-	/*
			
 
				-	 * While the item we are looking at is below the given threshold
			
 
				-	 * try to flush it out. We'd like not to stop until we've at least
			
 
				-	 * tried to push on everything in the AIL with an LSN less than
			
 
				-	 * the given threshold.
			
 
				-	 *
			
 
				-	 * However, we will stop after a certain number of pushes and wait
			
 
				-	 * for a reduced timeout to fire before pushing further. This
			
 
				-	 * prevents use from spinning when we can't do anything or there is
			
 
				-	 * lots of contention on the AIL lists.
			
 
				-	 */
			
 
				 	lsn = lip->li_lsn;
			
 
				+	target = ailp->xa_target;
			
 
				 	while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
			
 
				 		int	lock_result;
			
 
				+
			
 
				 		/*
			
 
				-		 * If we can lock the item without sleeping, unlock the AIL
			
 
				-		 * lock and flush the item.  Then re-grab the AIL lock so we
			
 
				-		 * can look for the next item on the AIL. List changes are
			
 
				-		 * handled by the AIL lookup functions internally
			
 
				-		 *
			
 
				-		 * If we can't lock the item, either its holder will flush it
			
 
				-		 * or it is already being flushed or it is being relogged.  In
			
 
				-		 * any of these case it is being taken care of and we can just
			
 
				-		 * skip to the next item in the list.
			
 
				+		 * Note that IOP_PUSH may unlock and reacquire the AIL lock.  We
			
 
				+		 * rely on the AIL cursor implementation to be able to deal with
			
 
				+		 * the dropped lock.
			
 
				 		 */
			
 
				-		lock_result = IOP_TRYLOCK(lip);
			
 
				-		spin_unlock(&ailp->xa_lock);
			
 
				+		lock_result = IOP_PUSH(lip, &ailp->xa_buf_list);
			
 
				 		switch (lock_result) {
			
 
				 		case XFS_ITEM_SUCCESS:
			
 
				 			XFS_STATS_INC(xs_push_ail_success);
			
 
				 			trace_xfs_ail_push(lip);
			
 
				 
			
 
				-			IOP_PUSH(lip);
			
 
				 			ailp->xa_last_pushed_lsn = lsn;
			
 
				 			break;
			
 
				 
			
 
				-		case XFS_ITEM_PUSHBUF:
			
 
				-			XFS_STATS_INC(xs_push_ail_pushbuf);
			
 
				-			trace_xfs_ail_pushbuf(lip);
			
 
				-
			
 
				-			if (!IOP_PUSHBUF(lip)) {
			
 
				-				trace_xfs_ail_pushbuf_pinned(lip);
			
 
				-				stuck++;
			
 
				-				ailp->xa_log_flush++;
			
 
				-			} else {
			
 
				-				ailp->xa_last_pushed_lsn = lsn;
			
 
				-			}
			
 
				-			push_xfsbufd = 1;
			
 
				+		case XFS_ITEM_FLUSHING:
			
 
				+			/*
			
 
				+			 * The item or its backing buffer is already beeing
			
 
				+			 * flushed.  The typical reason for that is that an
			
 
				+			 * inode buffer is locked because we already pushed the
			
 
				+			 * updates to it as part of inode clustering.
			
 
				+			 *
			
 
				+			 * We do not want to to stop flushing just because lots
			
 
				+			 * of items are already beeing flushed, but we need to
			
 
				+			 * re-try the flushing relatively soon if most of the
			
 
				+			 * AIL is beeing flushed.
			
 
				+			 */
			
 
				+			XFS_STATS_INC(xs_push_ail_flushing);
			
 
				+			trace_xfs_ail_flushing(lip);
			
 
				+
			
 
				+			flushing++;
			
 
				+			ailp->xa_last_pushed_lsn = lsn;
			
 
				 			break;
			
 
				 
			
 
				 		case XFS_ITEM_PINNED:
			
@@ -453,28 +441,22 @@ xfsaild_push(
 
				 			stuck++;
			
 
				 			ailp->xa_log_flush++;
			
 
				 			break;
			
 
				-
			
 
				 		case XFS_ITEM_LOCKED:
			
 
				 			XFS_STATS_INC(xs_push_ail_locked);
			
 
				 			trace_xfs_ail_locked(lip);
			
 
				+
			
 
				 			stuck++;
			
 
				 			break;
			
 
				-
			
 
				 		default:
			
 
				 			ASSERT(0);
			
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		spin_lock(&ailp->xa_lock);
			
 
				-		/* should we bother continuing? */
			
 
				-		if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-			break;
			
 
				-		ASSERT(mp->m_log);
			
 
				-
			
 
				 		count++;
			
 
				 
			
 
				 		/*
			
 
				 		 * Are there too many items we can't do anything with?
			
 
				+		 *
			
 
				 		 * If we we are skipping too many items because we can't flush
			
 
				 		 * them or they are already being flushed, we back off and
			
 
				 		 * given them time to complete whatever operation is being
			
@@ -496,42 +478,36 @@ xfsaild_push(
 
				 	xfs_trans_ail_cursor_done(ailp, &cur);
			
 
				 	spin_unlock(&ailp->xa_lock);
			
 
				 
			
 
				-	if (push_xfsbufd) {
			
 
				-		/* we've got delayed write buffers to flush */
			
 
				-		wake_up_process(mp->m_ddev_targp->bt_task);
			
 
				-	}
			
 
				+	if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
			
 
				+		ailp->xa_log_flush++;
			
 
				 
			
 
				-	/* assume we have more work to do in a short while */
			
 
				+	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
			
 
				 out_done:
			
 
				-	if (!count) {
			
 
				-		/* We're past our target or empty, so idle */
			
 
				-		ailp->xa_last_pushed_lsn = 0;
			
 
				-		ailp->xa_log_flush = 0;
			
 
				-
			
 
				-		tout = 50;
			
 
				-	} else if (XFS_LSN_CMP(lsn, target) >= 0) {
			
 
				 		/*
			
 
				-		 * We reached the target so wait a bit longer for I/O to
			
 
				-		 * complete and remove pushed items from the AIL before we
			
 
				-		 * start the next scan from the start of the AIL.
			
 
				+		 * We reached the target or the AIL is empty, so wait a bit
			
 
				+		 * longer for I/O to complete and remove pushed items from the
			
 
				+		 * AIL before we start the next scan from the start of the AIL.
			
 
				 		 */
			
 
				 		tout = 50;
			
 
				 		ailp->xa_last_pushed_lsn = 0;
			
 
				-	} else if ((stuck * 100) / count > 90) {
			
 
				+	} else if (((stuck + flushing) * 100) / count > 90) {
			
 
				 		/*
			
 
				-		 * Either there is a lot of contention on the AIL or we
			
 
				-		 * are stuck due to operations in progress. "Stuck" in this
			
 
				-		 * case is defined as >90% of the items we tried to push
			
 
				-		 * were stuck.
			
 
				+		 * Either there is a lot of contention on the AIL or we are
			
 
				+		 * stuck due to operations in progress. "Stuck" in this case
			
 
				+		 * is defined as >90% of the items we tried to push were stuck.
			
 
				 		 *
			
 
				 		 * Backoff a bit more to allow some I/O to complete before
			
 
				-		 * restarting from the start of the AIL. This prevents us
			
 
				-		 * from spinning on the same items, and if they are pinned will
			
 
				-		 * all the restart to issue a log force to unpin the stuck
			
 
				-		 * items.
			
 
				+		 * restarting from the start of the AIL. This prevents us from
			
 
				+		 * spinning on the same items, and if they are pinned will all
			
 
				+		 * the restart to issue a log force to unpin the stuck items.
			
 
				 		 */
			
 
				 		tout = 20;
			
 
				 		ailp->xa_last_pushed_lsn = 0;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Assume we have more work to do in a short while.
			
 
				+		 */
			
 
				+		tout = 10;
			
 
				 	}
			
 
				 
			
 
				 	return tout;
			
@@ -544,6 +520,8 @@ xfsaild(
 
				 	struct xfs_ail	*ailp = data;
			
 
				 	long		tout = 0;	/* milliseconds */
			
 
				 
			
 
				+	current->flags |= PF_MEMALLOC;
			
 
				+
			
 
				 	while (!kthread_should_stop()) {
			
 
				 		if (tout && tout <= 20)
			
 
				 			__set_current_state(TASK_KILLABLE);
			
@@ -610,6 +588,30 @@ xfs_ail_push_all(
 
				 		xfs_ail_push(ailp, threshold_lsn);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Push out all items in the AIL immediately and wait until the AIL is empty.
			
 
				+ */
			
 
				+void
			
 
				+xfs_ail_push_all_sync(
			
 
				+	struct xfs_ail  *ailp)
			
 
				+{
			
 
				+	struct xfs_log_item	*lip;
			
 
				+	DEFINE_WAIT(wait);
			
 
				+
			
 
				+	spin_lock(&ailp->xa_lock);
			
 
				+	while ((lip = xfs_ail_max(ailp)) != NULL) {
			
 
				+		prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
			
 
				+		ailp->xa_target = lip->li_lsn;
			
 
				+		wake_up_process(ailp->xa_task);
			
 
				+		spin_unlock(&ailp->xa_lock);
			
 
				+		schedule();
			
 
				+		spin_lock(&ailp->xa_lock);
			
 
				+	}
			
 
				+	spin_unlock(&ailp->xa_lock);
			
 
				+
			
 
				+	finish_wait(&ailp->xa_empty, &wait);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * xfs_trans_ail_update - bulk AIL insertion operation.
			
 
				  *
			
@@ -667,11 +669,15 @@ xfs_trans_ail_update_bulk(
 
				 
			
 
				 	if (!list_empty(&tmp))
			
 
				 		xfs_ail_splice(ailp, cur, &tmp, lsn);
			
 
				-	spin_unlock(&ailp->xa_lock);
			
 
				 
			
 
				-	if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
			
 
				-		xlog_assign_tail_lsn(ailp->xa_mount);
			
 
				+	if (mlip_changed) {
			
 
				+		if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
			
 
				+			xlog_assign_tail_lsn_locked(ailp->xa_mount);
			
 
				+		spin_unlock(&ailp->xa_lock);
			
 
				+
			
 
				 		xfs_log_space_wake(ailp->xa_mount);
			
 
				+	} else {
			
 
				+		spin_unlock(&ailp->xa_lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -700,7 +706,8 @@ void
 
				 xfs_trans_ail_delete_bulk(
			
 
				 	struct xfs_ail		*ailp,
			
 
				 	struct xfs_log_item	**log_items,
			
 
				-	int			nr_items) __releases(ailp->xa_lock)
			
 
				+	int			nr_items,
			
 
				+	int			shutdown_type) __releases(ailp->xa_lock)
			
 
				 {
			
 
				 	xfs_log_item_t		*mlip;
			
 
				 	int			mlip_changed = 0;
			
@@ -718,7 +725,7 @@ xfs_trans_ail_delete_bulk(
 
				 				xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
			
 
				 		"%s: attempting to delete a log item that is not in the AIL",
			
 
				 						__func__);
			
 
				-				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
			
 
				+				xfs_force_shutdown(mp, shutdown_type);
			
 
				 			}
			
 
				 			return;
			
 
				 		}
			
@@ -729,28 +736,20 @@ xfs_trans_ail_delete_bulk(
 
				 		if (mlip == lip)
			
 
				 			mlip_changed = 1;
			
 
				 	}
			
 
				-	spin_unlock(&ailp->xa_lock);
			
 
				 
			
 
				-	if (mlip_changed && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
			
 
				-		xlog_assign_tail_lsn(ailp->xa_mount);
			
 
				+	if (mlip_changed) {
			
 
				+		if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
			
 
				+			xlog_assign_tail_lsn_locked(ailp->xa_mount);
			
 
				+		if (list_empty(&ailp->xa_ail))
			
 
				+			wake_up_all(&ailp->xa_empty);
			
 
				+		spin_unlock(&ailp->xa_lock);
			
 
				+
			
 
				 		xfs_log_space_wake(ailp->xa_mount);
			
 
				+	} else {
			
 
				+		spin_unlock(&ailp->xa_lock);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * The active item list (AIL) is a doubly linked list of log
			
 
				- * items sorted by ascending lsn.  The base of the list is
			
 
				- * a forw/back pointer pair embedded in the xfs mount structure.
			
 
				- * The base is initialized with both pointers pointing to the
			
 
				- * base.  This case always needs to be distinguished, because
			
 
				- * the base has no lsn to look at.  We almost always insert
			
 
				- * at the end of the list, so on inserts we search from the
			
 
				- * end of the list to find where the new item belongs.
			
 
				- */
			
 
				-
			
 
				-/*
			
 
				- * Initialize the doubly linked list to point only to itself.
			
 
				- */
			
 
				 int
			
 
				 xfs_trans_ail_init(
			
 
				 	xfs_mount_t	*mp)
			
@@ -765,6 +764,8 @@ xfs_trans_ail_init(
 
				 	INIT_LIST_HEAD(&ailp->xa_ail);
			
 
				 	INIT_LIST_HEAD(&ailp->xa_cursors);
			
 
				 	spin_lock_init(&ailp->xa_lock);
			
 
				+	INIT_LIST_HEAD(&ailp->xa_buf_list);
			
 
				+	init_waitqueue_head(&ailp->xa_empty);
			
 
				 
			
 
				 	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
			
 
				 			ailp->xa_mount->m_fsname);
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -33,7 +31,6 @@
 
				 #include "xfs_buf_item.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				 /*
			
@@ -56,7 +53,7 @@ xfs_trans_buf_item_match(
 
				 		if (blip->bli_item.li_type == XFS_LI_BUF &&
			
 
				 		    blip->bli_buf->b_target == target &&
			
 
				 		    XFS_BUF_ADDR(blip->bli_buf) == blkno &&
			
 
				-		    XFS_BUF_COUNT(blip->bli_buf) == len)
			
 
				+		    BBTOB(blip->bli_buf->b_length) == len)
			
 
				 			return blip->bli_buf;
			
 
				 	}
			
 
				 
			
@@ -141,15 +138,11 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 
				 	xfs_buf_t		*bp;
			
 
				 	xfs_buf_log_item_t	*bip;
			
 
				 
			
 
				-	if (flags == 0)
			
 
				-		flags = XBF_LOCK | XBF_MAPPED;
			
 
				-
			
 
				 	/*
			
 
				 	 * Default to a normal get_buf() call if the tp is NULL.
			
 
				 	 */
			
 
				 	if (tp == NULL)
			
 
				-		return xfs_buf_get(target_dev, blkno, len,
			
 
				-				   flags | XBF_DONT_BLOCK);
			
 
				+		return xfs_buf_get(target_dev, blkno, len, flags);
			
 
				 
			
 
				 	/*
			
 
				 	 * If we find the buffer in the cache with this transaction
			
@@ -165,14 +158,6 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 
				 			XFS_BUF_DONE(bp);
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * If the buffer is stale then it was binval'ed
			
 
				-		 * since last read.  This doesn't matter since the
			
 
				-		 * caller isn't allowed to use the data anyway.
			
 
				-		 */
			
 
				-		else if (XFS_BUF_ISSTALE(bp))
			
 
				-			ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
			
 
				-
			
 
				 		ASSERT(bp->b_transp == tp);
			
 
				 		bip = bp->b_fspriv;
			
 
				 		ASSERT(bip != NULL);
			
@@ -182,15 +167,7 @@ xfs_trans_get_buf(xfs_trans_t	*tp,
 
				 		return (bp);
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * We always specify the XBF_DONT_BLOCK flag within a transaction
			
 
				-	 * so that get_buf does not try to push out a delayed write buffer
			
 
				-	 * which might cause another transaction to take place (if the
			
 
				-	 * buffer was delayed alloc).  Such recursive transactions can
			
 
				-	 * easily deadlock with our current transaction as well as cause
			
 
				-	 * us to run out of stack space.
			
 
				-	 */
			
 
				-	bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
			
 
				+	bp = xfs_buf_get(target_dev, blkno, len, flags);
			
 
				 	if (bp == NULL) {
			
 
				 		return NULL;
			
 
				 	}
			
@@ -282,14 +259,13 @@ xfs_trans_read_buf(
 
				 	xfs_buf_log_item_t	*bip;
			
 
				 	int			error;
			
 
				 
			
 
				-	if (flags == 0)
			
 
				-		flags = XBF_LOCK | XBF_MAPPED;
			
 
				+	*bpp = NULL;
			
 
				 
			
 
				 	/*
			
 
				 	 * Default to a normal get_buf() call if the tp is NULL.
			
 
				 	 */
			
 
				 	if (tp == NULL) {
			
 
				-		bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
			
 
				+		bp = xfs_buf_read(target, blkno, len, flags);
			
 
				 		if (!bp)
			
 
				 			return (flags & XBF_TRYLOCK) ?
			
 
				 					EAGAIN : XFS_ERROR(ENOMEM);
			
@@ -297,6 +273,8 @@ xfs_trans_read_buf(
 
				 		if (bp->b_error) {
			
 
				 			error = bp->b_error;
			
 
				 			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			XFS_BUF_UNDONE(bp);
			
 
				+			xfs_buf_stale(bp);
			
 
				 			xfs_buf_relse(bp);
			
 
				 			return error;
			
 
				 		}
			
@@ -371,15 +349,7 @@ xfs_trans_read_buf(
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * We always specify the XBF_DONT_BLOCK flag within a transaction
			
 
				-	 * so that get_buf does not try to push out a delayed write buffer
			
 
				-	 * which might cause another transaction to take place (if the
			
 
				-	 * buffer was delayed alloc).  Such recursive transactions can
			
 
				-	 * easily deadlock with our current transaction as well as cause
			
 
				-	 * us to run out of stack space.
			
 
				-	 */
			
 
				-	bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
			
 
				+	bp = xfs_buf_read(target, blkno, len, flags);
			
 
				 	if (bp == NULL) {
			
 
				 		*bpp = NULL;
			
 
				 		return (flags & XBF_TRYLOCK) ?
			
@@ -418,19 +388,6 @@ xfs_trans_read_buf(
 
				 	return 0;
			
 
				 
			
 
				 shutdown_abort:
			
 
				-	/*
			
 
				-	 * the theory here is that buffer is good but we're
			
 
				-	 * bailing out because the filesystem is being forcibly
			
 
				-	 * shut down.  So we should leave the b_flags alone since
			
 
				-	 * the buffer's not staled and just get out.
			
 
				-	 */
			
 
				-#if defined(DEBUG)
			
 
				-	if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
			
 
				-		xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
			
 
				-#endif
			
 
				-	ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) !=
			
 
				-				     (XBF_STALE|XBF_DELWRI));
			
 
				-
			
 
				 	trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
			
 
				 	xfs_buf_relse(bp);
			
 
				 	*bpp = NULL;
			
@@ -606,7 +563,7 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 
			
 
				 	ASSERT(bp->b_transp == tp);
			
 
				 	ASSERT(bip != NULL);
			
 
				-	ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
			
 
				+	ASSERT(first <= last && last < BBTOB(bp->b_length));
			
 
				 	ASSERT(bp->b_iodone == NULL ||
			
 
				 	       bp->b_iodone == xfs_buf_iodone_callbacks);
			
 
				 
			
@@ -626,8 +583,6 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 	bp->b_iodone = xfs_buf_iodone_callbacks;
			
 
				 	bip->bli_item.li_cb = xfs_buf_iodone;
			
 
				 
			
 
				-	xfs_buf_delwri_queue(bp);
			
 
				-
			
 
				 	trace_xfs_trans_log_buf(bip);
			
 
				 
			
 
				 	/*
			
@@ -651,22 +606,33 @@ xfs_trans_log_buf(xfs_trans_t	*tp,
 
				 
			
 
				 
			
 
				 /*
			
 
				- * This called to invalidate a buffer that is being used within
			
 
				- * a transaction.  Typically this is because the blocks in the
			
 
				- * buffer are being freed, so we need to prevent it from being
			
 
				- * written out when we're done.  Allowing it to be written again
			
 
				- * might overwrite data in the free blocks if they are reallocated
			
 
				- * to a file.
			
 
				+ * Invalidate a buffer that is being used within a transaction.
			
 
				+ *
			
 
				+ * Typically this is because the blocks in the buffer are being freed, so we
			
 
				+ * need to prevent it from being written out when we're done.  Allowing it
			
 
				+ * to be written again might overwrite data in the free blocks if they are
			
 
				+ * reallocated to a file.
			
 
				+ *
			
 
				+ * We prevent the buffer from being written out by marking it stale.  We can't
			
 
				+ * get rid of the buf log item at this point because the buffer may still be
			
 
				+ * pinned by another transaction.  If that is the case, then we'll wait until
			
 
				+ * the buffer is committed to disk for the last time (we can tell by the ref
			
 
				+ * count) and free it in xfs_buf_item_unpin().  Until that happens we will
			
 
				+ * keep the buffer locked so that the buffer and buf log item are not reused.
			
 
				+ *
			
 
				+ * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log
			
 
				+ * the buf item.  This will be used at recovery time to determine that copies
			
 
				+ * of the buffer in the log before this should not be replayed.
			
 
				  *
			
 
				- * We prevent the buffer from being written out by clearing the
			
 
				- * B_DELWRI flag.  We can't always
			
 
				- * get rid of the buf log item at this point, though, because
			
 
				- * the buffer may still be pinned by another transaction.  If that
			
 
				- * is the case, then we'll wait until the buffer is committed to
			
 
				- * disk for the last time (we can tell by the ref count) and
			
 
				- * free it in xfs_buf_item_unpin().  Until it is cleaned up we
			
 
				- * will keep the buffer locked so that the buffer and buf log item
			
 
				- * are not reused.
			
 
				+ * We mark the item descriptor and the transaction dirty so that we'll hold
			
 
				+ * the buffer until after the commit.
			
 
				+ *
			
 
				+ * Since we're invalidating the buffer, we also clear the state about which
			
 
				+ * parts of the buffer have been logged.  We also clear the flag indicating
			
 
				+ * that this is an inode buffer since the data in the buffer will no longer
			
 
				+ * be valid.
			
 
				+ *
			
 
				+ * We set the stale bit in the buffer as well since we're getting rid of it.
			
 
				  */
			
 
				 void
			
 
				 xfs_trans_binval(
			
@@ -686,7 +652,6 @@ xfs_trans_binval(
 
				 		 * If the buffer is already invalidated, then
			
 
				 		 * just return.
			
 
				 		 */
			
 
				-		ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
			
 
				 		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				 		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
			
 
				 		ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
			
@@ -696,27 +661,8 @@ xfs_trans_binval(
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * Clear the dirty bit in the buffer and set the STALE flag
			
 
				-	 * in the buf log item.  The STALE flag will be used in
			
 
				-	 * xfs_buf_item_unpin() to determine if it should clean up
			
 
				-	 * when the last reference to the buf item is given up.
			
 
				-	 * We set the XFS_BLF_CANCEL flag in the buf log format structure
			
 
				-	 * and log the buf item.  This will be used at recovery time
			
 
				-	 * to determine that copies of the buffer in the log before
			
 
				-	 * this should not be replayed.
			
 
				-	 * We mark the item descriptor and the transaction dirty so
			
 
				-	 * that we'll hold the buffer until after the commit.
			
 
				-	 *
			
 
				-	 * Since we're invalidating the buffer, we also clear the state
			
 
				-	 * about which parts of the buffer have been logged.  We also
			
 
				-	 * clear the flag indicating that this is an inode buffer since
			
 
				-	 * the data in the buffer will no longer be valid.
			
 
				-	 *
			
 
				-	 * We set the stale bit in the buffer as well since we're getting
			
 
				-	 * rid of it.
			
 
				-	 */
			
 
				 	xfs_buf_stale(bp);
			
 
				+
			
 
				 	bip->bli_flags |= XFS_BLI_STALE;
			
 
				 	bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
			
 
				 	bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
			
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -17,9 +17,7 @@
 
				  */
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -19,7 +19,6 @@
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -71,6 +71,8 @@ struct xfs_ail {
 
				 	spinlock_t		xa_lock;
			
 
				 	xfs_lsn_t		xa_last_pushed_lsn;
			
 
				 	int			xa_log_flush;
			
 
				+	struct list_head	xa_buf_list;
			
 
				+	wait_queue_head_t	xa_empty;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -90,18 +92,22 @@ xfs_trans_ail_update(
 
				 }
			
 
				 
			
 
				 void	xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
			
 
				-				struct xfs_log_item **log_items, int nr_items)
			
 
				+				struct xfs_log_item **log_items, int nr_items,
			
 
				+				int shutdown_type)
			
 
				 				__releases(ailp->xa_lock);
			
 
				 static inline void
			
 
				 xfs_trans_ail_delete(
			
 
				 	struct xfs_ail	*ailp,
			
 
				-	xfs_log_item_t	*lip) __releases(ailp->xa_lock)
			
 
				+	xfs_log_item_t	*lip,
			
 
				+	int		shutdown_type) __releases(ailp->xa_lock)
			
 
				 {
			
 
				-	xfs_trans_ail_delete_bulk(ailp, &lip, 1);
			
 
				+	xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
			
 
				 }
			
 
				 
			
 
				 void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
			
 
				 void			xfs_ail_push_all(struct xfs_ail *);
			
 
				+void			xfs_ail_push_all_sync(struct xfs_ail *);
			
 
				+struct xfs_log_item	*xfs_ail_min(struct xfs_ail  *ailp);
			
 
				 xfs_lsn_t		xfs_ail_min_lsn(struct xfs_ail *ailp);
			
 
				 
			
 
				 struct xfs_log_item *	xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
			
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -57,6 +57,7 @@ typedef __uint64_t __psunsigned_t;
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
			
 
				+typedef	__uint32_t	xfs_agino_t;	/* inode # within allocation grp */
			
 
				 typedef	__uint32_t	xfs_extlen_t;	/* extent length in blocks */
			
 
				 typedef	__uint32_t	xfs_agnumber_t;	/* allocation group number */
			
 
				 typedef __int32_t	xfs_extnum_t;	/* # of extents in a file */
			
@@ -101,6 +102,7 @@ typedef __uint64_t	xfs_fileoff_t;	/* block number in a file */
 
				 typedef __int64_t	xfs_sfiloff_t;	/* signed block number in a file */
			
 
				 typedef __uint64_t	xfs_filblks_t;	/* number of blocks in a file */
			
 
				 
			
 
				+
			
 
				 /*
			
 
				  * Null values for the types.
			
 
				  */
			
@@ -120,6 +122,9 @@ typedef __uint64_t	xfs_filblks_t;	/* number of blocks in a file */
 
				 
			
 
				 #define NULLCOMMITLSN	((xfs_lsn_t)-1)
			
 
				 
			
 
				+#define	NULLFSINO	((xfs_ino_t)-1)
			
 
				+#define	NULLAGINO	((xfs_agino_t)-1)
			
 
				+
			
 
				 /*
			
 
				  * Max values for extlen, extnum, aextnum.
			
 
				  */
			
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -18,9 +18,7 @@
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
 
				-#include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -21,7 +21,6 @@
 
				 #include "xfs_types.h"
			
 
				 #include "xfs_bit.h"
			
 
				 #include "xfs_log.h"
			
 
				-#include "xfs_inum.h"
			
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
@@ -39,7 +38,6 @@
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_acl.h"
			
 
				 #include "xfs_attr.h"
			
 
				-#include "xfs_rw.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_utils.h"
			
@@ -81,8 +79,7 @@ xfs_readlink_bmap(
 
				 		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
			
 
				 		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
			
 
				 
			
 
				-		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
			
 
				-				  XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
			
 
				+		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0);
			
 
				 		if (!bp)
			
 
				 			return XFS_ERROR(ENOMEM);
			
 
				 		error = bp->b_error;
			
@@ -1919,7 +1916,7 @@ xfs_alloc_file_space(
 
				 
			
 
				 error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
			
 
				 	xfs_bmap_cancel(&free_list);
			
 
				-	xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
			
 
				+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
			
 
				 
			
 
				 error1:	/* Just cancel transaction */
			
 
				 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
			
@@ -1966,7 +1963,7 @@ xfs_zero_remaining_bytes(
 
				 
			
 
				 	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
			
 
				 					mp->m_rtdev_targp : mp->m_ddev_targp,
			
 
				-				mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
			
 
				+				  BTOBB(mp->m_sb.sb_blocksize), 0);
			
 
				 	if (!bp)
			
 
				 		return XFS_ERROR(ENOMEM);
			
 
				 
			
@@ -2315,17 +2312,33 @@ xfs_change_file_space(
 
				 	case XFS_IOC_ALLOCSP64:
			
 
				 	case XFS_IOC_FREESP:
			
 
				 	case XFS_IOC_FREESP64:
			
 
				+		/*
			
 
				+		 * These operations actually do IO when extending the file, but
			
 
				+		 * the allocation is done seperately to the zeroing that is
			
 
				+		 * done. This set of operations need to be serialised against
			
 
				+		 * other IO operations, such as truncate and buffered IO. We
			
 
				+		 * need to take the IOLOCK here to serialise the allocation and
			
 
				+		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
			
 
				+		 * truncate, direct IO) from racing against the transient
			
 
				+		 * allocated but not written state we can have here.
			
 
				+		 */
			
 
				+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
			
 
				 		if (startoffset > fsize) {
			
 
				 			error = xfs_alloc_file_space(ip, fsize,
			
 
				-					startoffset - fsize, 0, attr_flags);
			
 
				-			if (error)
			
 
				+					startoffset - fsize, 0,
			
 
				+					attr_flags | XFS_ATTR_NOLOCK);
			
 
				+			if (error) {
			
 
				+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				 				break;
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		iattr.ia_valid = ATTR_SIZE;
			
 
				 		iattr.ia_size = startoffset;
			
 
				 
			
 
				-		error = xfs_setattr_size(ip, &iattr, attr_flags);
			
 
				+		error = xfs_setattr_size(ip, &iattr,
			
 
				+					 attr_flags | XFS_ATTR_NOLOCK);
			
 
				+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			
 
				 
			
 
				 		if (error)
			
 
				 			return error;