12 years ago · c8d8566952
--- a/Documentation/filesystems/xfs-self-describing-metadata.txt
+++ b/Documentation/filesystems/xfs-self-describing-metadata.txt
@@ -0,0 +1,350 @@
 
				+XFS Self Describing Metadata
			
 
				+----------------------------
			
 
				+
			
 
				+Introduction
			
 
				+------------
			
 
				+
			
 
				+The largest scalability problem facing XFS is not one of algorithmic
			
 
				+scalability, but of verification of the filesystem structure. Scalabilty of the
			
 
				+structures and indexes on disk and the algorithms for iterating them are
			
 
				+adequate for supporting PB scale filesystems with billions of inodes, however it
			
 
				+is this very scalability that causes the verification problem.
			
 
				+
			
 
				+Almost all metadata on XFS is dynamically allocated. The only fixed location
			
 
				+metadata is the allocation group headers (SB, AGF, AGFL and AGI), while all
			
 
				+other metadata structures need to be discovered by walking the filesystem
			
 
				+structure in different ways. While this is already done by userspace tools for
			
 
				+validating and repairing the structure, there are limits to what they can
			
 
				+verify, and this in turn limits the supportable size of an XFS filesystem.
			
 
				+
			
 
				+For example, it is entirely possible to manually use xfs_db and a bit of
			
 
				+scripting to analyse the structure of a 100TB filesystem when trying to
			
 
				+determine the root cause of a corruption problem, but it is still mainly a
			
 
				+manual task of verifying that things like single bit errors or misplaced writes
			
 
				+weren't the ultimate cause of a corruption event. It may take a few hours to a
			
 
				+few days to perform such forensic analysis, so for at this scale root cause
			
 
				+analysis is entirely possible.
			
 
				+
			
 
				+However, if we scale the filesystem up to 1PB, we now have 10x as much metadata
			
 
				+to analyse and so that analysis blows out towards weeks/months of forensic work.
			
 
				+Most of the analysis work is slow and tedious, so as the amount of analysis goes
			
 
				+up, the more likely that the cause will be lost in the noise.  Hence the primary
			
 
				+concern for supporting PB scale filesystems is minimising the time and effort
			
 
				+required for basic forensic analysis of the filesystem structure.
			
 
				+
			
 
				+
			
 
				+Self Describing Metadata
			
 
				+------------------------
			
 
				+
			
 
				+One of the problems with the current metadata format is that apart from the
			
 
				+magic number in the metadata block, we have no other way of identifying what it
			
 
				+is supposed to be. We can't even identify if it is the right place. Put simply,
			
 
				+you can't look at a single metadata block in isolation and say "yes, it is
			
 
				+supposed to be there and the contents are valid".
			
 
				+
			
 
				+Hence most of the time spent on forensic analysis is spent doing basic
			
 
				+verification of metadata values, looking for values that are in range (and hence
			
 
				+not detected by automated verification checks) but are not correct. Finding and
			
 
				+understanding how things like cross linked block lists (e.g. sibling
			
 
				+pointers in a btree end up with loops in them) are the key to understanding what
			
 
				+went wrong, but it is impossible to tell what order the blocks were linked into
			
 
				+each other or written to disk after the fact.
			
 
				+
			
 
				+Hence we need to record more information into the metadata to allow us to
			
 
				+quickly determine if the metadata is intact and can be ignored for the purpose
			
 
				+of analysis. We can't protect against every possible type of error, but we can
			
 
				+ensure that common types of errors are easily detectable.  Hence the concept of
			
 
				+self describing metadata.
			
 
				+
			
 
				+The first, fundamental requirement of self describing metadata is that the
			
 
				+metadata object contains some form of unique identifier in a well known
			
 
				+location. This allows us to identify the expected contents of the block and
			
 
				+hence parse and verify the metadata object. IF we can't independently identify
			
 
				+the type of metadata in the object, then the metadata doesn't describe itself
			
 
				+very well at all!
			
 
				+
			
 
				+Luckily, almost all XFS metadata has magic numbers embedded already - only the
			
 
				+AGFL, remote symlinks and remote attribute blocks do not contain identifying
			
 
				+magic numbers. Hence we can change the on-disk format of all these objects to
			
 
				+add more identifying information and detect this simply by changing the magic
			
 
				+numbers in the metadata objects. That is, if it has the current magic number,
			
 
				+the metadata isn't self identifying. If it contains a new magic number, it is
			
 
				+self identifying and we can do much more expansive automated verification of the
			
 
				+metadata object at runtime, during forensic analysis or repair.
			
 
				+
			
 
				+As a primary concern, self describing metadata needs some form of overall
			
 
				+integrity checking. We cannot trust the metadata if we cannot verify that it has
			
 
				+not been changed as a result of external influences. Hence we need some form of
			
 
				+integrity check, and this is done by adding CRC32c validation to the metadata
			
 
				+block. If we can verify the block contains the metadata it was intended to
			
 
				+contain, a large amount of the manual verification work can be skipped.
			
 
				+
			
 
				+CRC32c was selected as metadata cannot be more than 64k in length in XFS and
			
 
				+hence a 32 bit CRC is more than sufficient to detect multi-bit errors in
			
 
				+metadata blocks. CRC32c is also now hardware accelerated on common CPUs so it is
			
 
				+fast. So while CRC32c is not the strongest of possible integrity checks that
			
 
				+could be used, it is more than sufficient for our needs and has relatively
			
 
				+little overhead. Adding support for larger integrity fields and/or algorithms
			
 
				+does really provide any extra value over CRC32c, but it does add a lot of
			
 
				+complexity and so there is no provision for changing the integrity checking
			
 
				+mechanism.
			
 
				+
			
 
				+Self describing metadata needs to contain enough information so that the
			
 
				+metadata block can be verified as being in the correct place without needing to
			
 
				+look at any other metadata. This means it needs to contain location information.
			
 
				+Just adding a block number to the metadata is not sufficient to protect against
			
 
				+mis-directed writes - a write might be misdirected to the wrong LUN and so be
			
 
				+written to the "correct block" of the wrong filesystem. Hence location
			
 
				+information must contain a filesystem identifier as well as a block number.
			
 
				+
			
 
				+Another key information point in forensic analysis is knowing who the metadata
			
 
				+block belongs to. We already know the type, the location, that it is valid
			
 
				+and/or corrupted, and how long ago that it was last modified. Knowing the owner
			
 
				+of the block is important as it allows us to find other related metadata to
			
 
				+determine the scope of the corruption. For example, if we have a extent btree
			
 
				+object, we don't know what inode it belongs to and hence have to walk the entire
			
 
				+filesystem to find the owner of the block. Worse, the corruption could mean that
			
 
				+no owner can be found (i.e. it's an orphan block), and so without an owner field
			
 
				+in the metadata we have no idea of the scope of the corruption. If we have an
			
 
				+owner field in the metadata object, we can immediately do top down validation to
			
 
				+determine the scope of the problem.
			
 
				+
			
 
				+Different types of metadata have different owner identifiers. For example,
			
 
				+directory, attribute and extent tree blocks are all owned by an inode, whilst
			
 
				+freespace btree blocks are owned by an allocation group. Hence the size and
			
 
				+contents of the owner field are determined by the type of metadata object we are
			
 
				+looking at.  The owner information can also identify misplaced writes (e.g.
			
 
				+freespace btree block written to the wrong AG).
			
 
				+
			
 
				+Self describing metadata also needs to contain some indication of when it was
			
 
				+written to the filesystem. One of the key information points when doing forensic
			
 
				+analysis is how recently the block was modified. Correlation of set of corrupted
			
 
				+metadata blocks based on modification times is important as it can indicate
			
 
				+whether the corruptions are related, whether there's been multiple corruption
			
 
				+events that lead to the eventual failure, and even whether there are corruptions
			
 
				+present that the run-time verification is not detecting.
			
 
				+
			
 
				+For example, we can determine whether a metadata object is supposed to be free
			
 
				+space or still allocated if it is still referenced by its owner by looking at
			
 
				+when the free space btree block that contains the block was last written
			
 
				+compared to when the metadata object itself was last written.  If the free space
			
 
				+block is more recent than the object and the object's owner, then there is a
			
 
				+very good chance that the block should have been removed from the owner.
			
 
				+
			
 
				+To provide this "written timestamp", each metadata block gets the Log Sequence
			
 
				+Number (LSN) of the most recent transaction it was modified on written into it.
			
 
				+This number will always increase over the life of the filesystem, and the only
			
 
				+thing that resets it is running xfs_repair on the filesystem. Further, by use of
			
 
				+the LSN we can tell if the corrupted metadata all belonged to the same log
			
 
				+checkpoint and hence have some idea of how much modification occurred between
			
 
				+the first and last instance of corrupt metadata on disk and, further, how much
			
 
				+modification occurred between the corruption being written and when it was
			
 
				+detected.
			
 
				+
			
 
				+Runtime Validation
			
 
				+------------------
			
 
				+
			
 
				+Validation of self-describing metadata takes place at runtime in two places:
			
 
				+
			
 
				+	- immediately after a successful read from disk
			
 
				+	- immediately prior to write IO submission
			
 
				+
			
 
				+The verification is completely stateless - it is done independently of the
			
 
				+modification process, and seeks only to check that the metadata is what it says
			
 
				+it is and that the metadata fields are within bounds and internally consistent.
			
 
				+As such, we cannot catch all types of corruption that can occur within a block
			
 
				+as there may be certain limitations that operational state enforces of the
			
 
				+metadata, or there may be corruption of interblock relationships (e.g. corrupted
			
 
				+sibling pointer lists). Hence we still need stateful checking in the main code
			
 
				+body, but in general most of the per-field validation is handled by the
			
 
				+verifiers.
			
 
				+
			
 
				+For read verification, the caller needs to specify the expected type of metadata
			
 
				+that it should see, and the IO completion process verifies that the metadata
			
 
				+object matches what was expected. If the verification process fails, then it
			
 
				+marks the object being read as EFSCORRUPTED. The caller needs to catch this
			
 
				+error (same as for IO errors), and if it needs to take special action due to a
			
 
				+verification error it can do so by catching the EFSCORRUPTED error value. If we
			
 
				+need more discrimination of error type at higher levels, we can define new
			
 
				+error numbers for different errors as necessary.
			
 
				+
			
 
				+The first step in read verification is checking the magic number and determining
			
 
				+whether CRC validating is necessary. If it is, the CRC32c is calculated and
			
 
				+compared against the value stored in the object itself. Once this is validated,
			
 
				+further checks are made against the location information, followed by extensive
			
 
				+object specific metadata validation. If any of these checks fail, then the
			
 
				+buffer is considered corrupt and the EFSCORRUPTED error is set appropriately.
			
 
				+
			
 
				+Write verification is the opposite of the read verification - first the object
			
 
				+is extensively verified and if it is OK we then update the LSN from the last
			
 
				+modification made to the object, After this, we calculate the CRC and insert it
			
 
				+into the object. Once this is done the write IO is allowed to continue. If any
			
 
				+error occurs during this process, the buffer is again marked with a EFSCORRUPTED
			
 
				+error for the higher layers to catch.
			
 
				+
			
 
				+Structures
			
 
				+----------
			
 
				+
			
 
				+A typical on-disk structure needs to contain the following information:
			
 
				+
			
 
				+struct xfs_ondisk_hdr {
			
 
				+        __be32  magic;		/* magic number */
			
 
				+        __be32  crc;		/* CRC, not logged */
			
 
				+        uuid_t  uuid;		/* filesystem identifier */
			
 
				+        __be64  owner;		/* parent object */
			
 
				+        __be64  blkno;		/* location on disk */
			
 
				+        __be64  lsn;		/* last modification in log, not logged */
			
 
				+};
			
 
				+
			
 
				+Depending on the metadata, this information may be part of a header structure
			
 
				+separate to the metadata contents, or may be distributed through an existing
			
 
				+structure. The latter occurs with metadata that already contains some of this
			
 
				+information, such as the superblock and AG headers.
			
 
				+
			
 
				+Other metadata may have different formats for the information, but the same
			
 
				+level of information is generally provided. For example:
			
 
				+
			
 
				+	- short btree blocks have a 32 bit owner (ag number) and a 32 bit block
			
 
				+	  number for location. The two of these combined provide the same
			
 
				+	  information as @owner and @blkno in eh above structure, but using 8
			
 
				+	  bytes less space on disk.
			
 
				+
			
 
				+	- directory/attribute node blocks have a 16 bit magic number, and the
			
 
				+	  header that contains the magic number has other information in it as
			
 
				+	  well. hence the additional metadata headers change the overall format
			
 
				+	  of the metadata.
			
 
				+
			
 
				+A typical buffer read verifier is structured as follows:
			
 
				+
			
 
				+#define XFS_FOO_CRC_OFF		offsetof(struct xfs_ondisk_hdr, crc)
			
 
				+
			
 
				+static void
			
 
				+xfs_foo_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+       struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					XFS_FOO_CRC_OFF)) ||
			
 
				+            !xfs_foo_verify(bp)) {
			
 
				+                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+                xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+        }
			
 
				+}
			
 
				+
			
 
				+The code ensures that the CRC is only checked if the filesystem has CRCs enabled
			
 
				+by checking the superblock of the feature bit, and then if the CRC verifies OK
			
 
				+(or is not needed) it verifies the actual contents of the block.
			
 
				+
			
 
				+The verifier function will take a couple of different forms, depending on
			
 
				+whether the magic number can be used to determine the format of the block. In
			
 
				+the case it can't, the code is structured as follows:
			
 
				+
			
 
				+static bool
			
 
				+xfs_foo_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+        struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
			
 
				+
			
 
				+        if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
			
 
				+                return false;
			
 
				+
			
 
				+        if (!xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (bp->b_bn != be64_to_cpu(hdr->blkno))
			
 
				+			return false;
			
 
				+		if (hdr->owner == 0)
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				+	/* object specific verification checks here */
			
 
				+
			
 
				+        return true;
			
 
				+}
			
 
				+
			
 
				+If there are different magic numbers for the different formats, the verifier
			
 
				+will look like:
			
 
				+
			
 
				+static bool
			
 
				+xfs_foo_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+        struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
			
 
				+
			
 
				+        if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
			
 
				+		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (bp->b_bn != be64_to_cpu(hdr->blkno))
			
 
				+			return false;
			
 
				+		if (hdr->owner == 0)
			
 
				+			return false;
			
 
				+	} else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
			
 
				+		return false;
			
 
				+
			
 
				+	/* object specific verification checks here */
			
 
				+
			
 
				+        return true;
			
 
				+}
			
 
				+
			
 
				+Write verifiers are very similar to the read verifiers, they just do things in
			
 
				+the opposite order to the read verifiers. A typical write verifier:
			
 
				+
			
 
				+static void
			
 
				+xfs_foo_write_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!xfs_foo_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+
			
 
				+	if (bip) {
			
 
				+		struct xfs_ondisk_hdr	*hdr = bp->b_addr;
			
 
				+		hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	}
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
			
 
				+}
			
 
				+
			
 
				+This will verify the internal structure of the metadata before we go any
			
 
				+further, detecting corruptions that have occurred as the metadata has been
			
 
				+modified in memory. If the metadata verifies OK, and CRCs are enabled, we then
			
 
				+update the LSN field (when it was last modified) and calculate the CRC on the
			
 
				+metadata. Once this is done, we can issue the IO.
			
 
				+
			
 
				+Inodes and Dquots
			
 
				+-----------------
			
 
				+
			
 
				+Inodes and dquots are special snowflakes. They have per-object CRC and
			
 
				+self-identifiers, but they are packed so that there are multiple objects per
			
 
				+buffer. Hence we do not use per-buffer verifiers to do the work of per-object
			
 
				+verification and CRC calculations. The per-buffer verifiers simply perform basic
			
 
				+identification of the buffer - that they contain inodes or dquots, and that
			
 
				+there are magic numbers in all the expected spots. All further CRC and
			
 
				+verification checks are done when each inode is read from or written back to the
			
 
				+buffer.
			
 
				+
			
 
				+The structure of the verifiers and the identifiers checks is very similar to the
			
 
				+buffer code described above. The only difference is where they are called. For
			
 
				+example, inode read verification is done in xfs_iread() when the inode is first
			
 
				+read out of the buffer and the struct xfs_inode is instantiated. The inode is
			
 
				+already extensively verified during writeback in xfs_iflush_int, so the only
			
 
				+addition here is to add the LSN and CRC to the inode as it is copied back into
			
 
				+the buffer.
			
 
				+
			
 
				+XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
			
 
				+the unlinked list modifications check or update CRCs, neither during unlink nor
			
 
				+log recovery. So, it's gone unnoticed until now. This won't matter immediately -
			
 
				+repair will probably complain about it - but it needs to be fixed.
			
 
				+
			
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -45,11 +45,11 @@ xfs-y				+= xfs_aops.o \
 
				 				   xfs_itable.o \
			
 
				 				   xfs_message.o \
			
 
				 				   xfs_mru_cache.o \
			
 
				-				   xfs_super.o \
			
 
				-				   xfs_xattr.o \
			
 
				 				   xfs_rename.o \
			
 
				+				   xfs_super.o \
			
 
				 				   xfs_utils.o \
			
 
				 				   xfs_vnodeops.o \
			
 
				+				   xfs_xattr.o \
			
 
				 				   kmem.o \
			
 
				 				   uuid.o
			
 
				 
			
@@ -58,6 +58,7 @@ xfs-y				+= xfs_alloc.o \
 
				 				   xfs_alloc_btree.o \
			
 
				 				   xfs_attr.o \
			
 
				 				   xfs_attr_leaf.o \
			
 
				+				   xfs_attr_remote.o \
			
 
				 				   xfs_bmap.o \
			
 
				 				   xfs_bmap_btree.o \
			
 
				 				   xfs_btree.o \
			
@@ -73,6 +74,7 @@ xfs-y				+= xfs_alloc.o \
 
				 				   xfs_inode.o \
			
 
				 				   xfs_log_recover.o \
			
 
				 				   xfs_mount.o \
			
 
				+				   xfs_symlink.o \
			
 
				 				   xfs_trans.o
			
 
				 
			
 
				 # low-level transaction/log code
			
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -30,6 +30,7 @@ struct xfs_trans;
 
				 
			
 
				 #define	XFS_AGF_MAGIC	0x58414746	/* 'XAGF' */
			
 
				 #define	XFS_AGI_MAGIC	0x58414749	/* 'XAGI' */
			
 
				+#define	XFS_AGFL_MAGIC	0x5841464c	/* 'XAFL' */
			
 
				 #define	XFS_AGF_VERSION	1
			
 
				 #define	XFS_AGI_VERSION	1
			
 
				 
			
@@ -63,12 +64,29 @@ typedef struct xfs_agf {
 
				 	__be32		agf_spare0;	/* spare field */
			
 
				 	__be32		agf_levels[XFS_BTNUM_AGF];	/* btree levels */
			
 
				 	__be32		agf_spare1;	/* spare field */
			
 
				+
			
 
				 	__be32		agf_flfirst;	/* first freelist block's index */
			
 
				 	__be32		agf_fllast;	/* last freelist block's index */
			
 
				 	__be32		agf_flcount;	/* count of blocks in freelist */
			
 
				 	__be32		agf_freeblks;	/* total free blocks */
			
 
				+
			
 
				 	__be32		agf_longest;	/* longest free space */
			
 
				 	__be32		agf_btreeblks;	/* # of blocks held in AGF btrees */
			
 
				+	uuid_t		agf_uuid;	/* uuid of filesystem */
			
 
				+
			
 
				+	/*
			
 
				+	 * reserve some contiguous space for future logged fields before we add
			
 
				+	 * the unlogged fields. This makes the range logging via flags and
			
 
				+	 * structure offsets much simpler.
			
 
				+	 */
			
 
				+	__be64		agf_spare64[16];
			
 
				+
			
 
				+	/* unlogged fields, written during buffer writeback. */
			
 
				+	__be64		agf_lsn;	/* last write sequence */
			
 
				+	__be32		agf_crc;	/* crc of agf sector */
			
 
				+	__be32		agf_spare2;
			
 
				+
			
 
				+	/* structure must be padded to 64 bit alignment */
			
 
				 } xfs_agf_t;
			
 
				 
			
 
				 #define	XFS_AGF_MAGICNUM	0x00000001
			
@@ -83,7 +101,8 @@ typedef struct xfs_agf {
 
				 #define	XFS_AGF_FREEBLKS	0x00000200
			
 
				 #define	XFS_AGF_LONGEST		0x00000400
			
 
				 #define	XFS_AGF_BTREEBLKS	0x00000800
			
 
				-#define	XFS_AGF_NUM_BITS	12
			
 
				+#define	XFS_AGF_UUID		0x00001000
			
 
				+#define	XFS_AGF_NUM_BITS	13
			
 
				 #define	XFS_AGF_ALL_BITS	((1 << XFS_AGF_NUM_BITS) - 1)
			
 
				 
			
 
				 #define XFS_AGF_FLAGS \
			
@@ -98,7 +117,8 @@ typedef struct xfs_agf {
 
				 	{ XFS_AGF_FLCOUNT,	"FLCOUNT" }, \
			
 
				 	{ XFS_AGF_FREEBLKS,	"FREEBLKS" }, \
			
 
				 	{ XFS_AGF_LONGEST,	"LONGEST" }, \
			
 
				-	{ XFS_AGF_BTREEBLKS,	"BTREEBLKS" }
			
 
				+	{ XFS_AGF_BTREEBLKS,	"BTREEBLKS" }, \
			
 
				+	{ XFS_AGF_UUID,		"UUID" }
			
 
				 
			
 
				 /* disk block (xfs_daddr_t) in the AG */
			
 
				 #define XFS_AGF_DADDR(mp)	((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
			
@@ -132,6 +152,7 @@ typedef struct xfs_agi {
 
				 	__be32		agi_root;	/* root of inode btree */
			
 
				 	__be32		agi_level;	/* levels in inode btree */
			
 
				 	__be32		agi_freecount;	/* number of free inodes */
			
 
				+
			
 
				 	__be32		agi_newino;	/* new inode just allocated */
			
 
				 	__be32		agi_dirino;	/* last directory inode chunk */
			
 
				 	/*
			
@@ -139,6 +160,13 @@ typedef struct xfs_agi {
 
				 	 * still being referenced.
			
 
				 	 */
			
 
				 	__be32		agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
			
 
				+
			
 
				+	uuid_t		agi_uuid;	/* uuid of filesystem */
			
 
				+	__be32		agi_crc;	/* crc of agi sector */
			
 
				+	__be32		agi_pad32;
			
 
				+	__be64		agi_lsn;	/* last write sequence */
			
 
				+
			
 
				+	/* structure must be padded to 64 bit alignment */
			
 
				 } xfs_agi_t;
			
 
				 
			
 
				 #define	XFS_AGI_MAGICNUM	0x00000001
			
@@ -171,11 +199,31 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
 
				  */
			
 
				 #define XFS_AGFL_DADDR(mp)	((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
			
 
				 #define	XFS_AGFL_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
			
 
				-#define XFS_AGFL_SIZE(mp)	((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
			
 
				 #define	XFS_BUF_TO_AGFL(bp)	((xfs_agfl_t *)((bp)->b_addr))
			
 
				 
			
 
				+#define XFS_BUF_TO_AGFL_BNO(mp, bp) \
			
 
				+	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
			
 
				+		&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
			
 
				+		(__be32 *)(bp)->b_addr)
			
 
				+
			
 
				+/*
			
 
				+ * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of
			
 
				+ * slots in the beginning of the block for a proper header with the
			
 
				+ * location information and CRC.
			
 
				+ */
			
 
				+#define XFS_AGFL_SIZE(mp) \
			
 
				+	(((mp)->m_sb.sb_sectsize - \
			
 
				+	 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
			
 
				+		sizeof(struct xfs_agfl) : 0)) / \
			
 
				+	  sizeof(xfs_agblock_t))
			
 
				+
			
 
				 typedef struct xfs_agfl {
			
 
				-	__be32		agfl_bno[1];	/* actually XFS_AGFL_SIZE(mp) */
			
 
				+	__be32		agfl_magicnum;
			
 
				+	__be32		agfl_seqno;
			
 
				+	uuid_t		agfl_uuid;
			
 
				+	__be64		agfl_lsn;
			
 
				+	__be32		agfl_crc;
			
 
				+	__be32		agfl_bno[];	/* actually XFS_AGFL_SIZE(mp) */
			
 
				 } xfs_agfl_t;
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -33,7 +33,9 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_extent_busy.h"
			
 
				 #include "xfs_error.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 
			
 
				 struct workqueue_struct *xfs_alloc_wq;
			
 
				 
			
@@ -430,53 +432,84 @@ xfs_alloc_fixup_trees(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+static bool
			
 
				 xfs_agfl_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-#ifdef WHEN_CRCS_COME_ALONG
			
 
				-	/*
			
 
				-	 * we cannot actually do any verification of the AGFL because mkfs does
			
 
				-	 * not initialise the AGFL to zero or NULL. Hence the only valid part of
			
 
				-	 * the AGFL is what the AGF says is active. We can't get to the AGF, so
			
 
				-	 * we can't verify just those entries are valid.
			
 
				-	 *
			
 
				-	 * This problem goes away when the CRC format change comes along as that
			
 
				-	 * requires the AGFL to be initialised by mkfs. At that point, we can
			
 
				-	 * verify the blocks in the agfl -active or not- lie within the bounds
			
 
				-	 * of the AG. Until then, just leave this check ifdef'd out.
			
 
				-	 */
			
 
				 	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
			
 
				-	int		agfl_ok = 1;
			
 
				-
			
 
				 	int		i;
			
 
				 
			
 
				+	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
			
 
				+		return false;
			
 
				+	/*
			
 
				+	 * during growfs operations, the perag is not fully initialised,
			
 
				+	 * so we can't use it for any useful checking. growfs ensures we can't
			
 
				+	 * use it by using uncached buffers that don't have the perag attached
			
 
				+	 * so we can detect and avoid this problem.
			
 
				+	 */
			
 
				+	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
			
 
				+		return false;
			
 
				+
			
 
				 	for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
			
 
				-		if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
			
 
				+		if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
			
 
				 		    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
			
 
				-			agfl_ok = 0;
			
 
				+			return false;
			
 
				 	}
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_agfl_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	int		agfl_ok = 1;
			
 
				+
			
 
				+	/*
			
 
				+	 * There is no verification of non-crc AGFLs because mkfs does not
			
 
				+	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
			
 
				+	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
			
 
				+	 * can't verify just those entries are valid.
			
 
				+	 */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+				   offsetof(struct xfs_agfl, agfl_crc));
			
 
				+
			
 
				+	agfl_ok = agfl_ok && xfs_agfl_verify(bp);
			
 
				 
			
 
				 	if (!agfl_ok) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				 		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				 	}
			
 
				-#endif
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_agfl_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_agfl_verify(bp);
			
 
				-}
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				 
			
 
				-static void
			
 
				-xfs_agfl_read_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				-{
			
 
				-	xfs_agfl_verify(bp);
			
 
				+	/* no verification of non-crc AGFLs */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_agfl_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (bip)
			
 
				+		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_agfl, agfl_crc));
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_agfl_buf_ops = {
			
@@ -842,7 +875,7 @@ xfs_alloc_ag_vextent_near(
 
				 	 */
			
 
				 	int		dofirst;	/* set to do first algorithm */
			
 
				 
			
 
				-	dofirst = random32() & 1;
			
 
				+	dofirst = prandom_u32() & 1;
			
 
				 #endif
			
 
				 
			
 
				 restart:
			
@@ -1982,18 +2015,18 @@ xfs_alloc_get_freelist(
 
				 	int		btreeblk) /* destination is a AGF btree */
			
 
				 {
			
 
				 	xfs_agf_t	*agf;	/* a.g. freespace structure */
			
 
				-	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
			
 
				 	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
			
 
				 	xfs_agblock_t	bno;	/* block number returned */
			
 
				+	__be32		*agfl_bno;
			
 
				 	int		error;
			
 
				 	int		logflags;
			
 
				-	xfs_mount_t	*mp;	/* mount structure */
			
 
				+	xfs_mount_t	*mp = tp->t_mountp;
			
 
				 	xfs_perag_t	*pag;	/* per allocation group data */
			
 
				 
			
 
				-	agf = XFS_BUF_TO_AGF(agbp);
			
 
				 	/*
			
 
				 	 * Freelist is empty, give up.
			
 
				 	 */
			
 
				+	agf = XFS_BUF_TO_AGF(agbp);
			
 
				 	if (!agf->agf_flcount) {
			
 
				 		*bnop = NULLAGBLOCK;
			
 
				 		return 0;
			
@@ -2001,15 +2034,17 @@ xfs_alloc_get_freelist(
 
				 	/*
			
 
				 	 * Read the array of free blocks.
			
 
				 	 */
			
 
				-	mp = tp->t_mountp;
			
 
				-	if ((error = xfs_alloc_read_agfl(mp, tp,
			
 
				-			be32_to_cpu(agf->agf_seqno), &agflbp)))
			
 
				+	error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
			
 
				+				    &agflbp);
			
 
				+	if (error)
			
 
				 		return error;
			
 
				-	agfl = XFS_BUF_TO_AGFL(agflbp);
			
 
				+
			
 
				+
			
 
				 	/*
			
 
				 	 * Get the block number and update the data structures.
			
 
				 	 */
			
 
				-	bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
			
 
				+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
			
 
				+	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
			
 
				 	be32_add_cpu(&agf->agf_flfirst, 1);
			
 
				 	xfs_trans_brelse(tp, agflbp);
			
 
				 	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
			
@@ -2058,11 +2093,14 @@ xfs_alloc_log_agf(
 
				 		offsetof(xfs_agf_t, agf_freeblks),
			
 
				 		offsetof(xfs_agf_t, agf_longest),
			
 
				 		offsetof(xfs_agf_t, agf_btreeblks),
			
 
				+		offsetof(xfs_agf_t, agf_uuid),
			
 
				 		sizeof(xfs_agf_t)
			
 
				 	};
			
 
				 
			
 
				 	trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
			
 
				 
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
			
 
				+
			
 
				 	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
			
 
				 	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
			
 
				 }
			
@@ -2099,12 +2137,13 @@ xfs_alloc_put_freelist(
 
				 	int			btreeblk) /* block came from a AGF btree */
			
 
				 {
			
 
				 	xfs_agf_t		*agf;	/* a.g. freespace structure */
			
 
				-	xfs_agfl_t		*agfl;	/* a.g. free block array */
			
 
				 	__be32			*blockp;/* pointer to array entry */
			
 
				 	int			error;
			
 
				 	int			logflags;
			
 
				 	xfs_mount_t		*mp;	/* mount structure */
			
 
				 	xfs_perag_t		*pag;	/* per allocation group data */
			
 
				+	__be32			*agfl_bno;
			
 
				+	int			startoff;
			
 
				 
			
 
				 	agf = XFS_BUF_TO_AGF(agbp);
			
 
				 	mp = tp->t_mountp;
			
@@ -2112,7 +2151,6 @@ xfs_alloc_put_freelist(
 
				 	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
			
 
				 			be32_to_cpu(agf->agf_seqno), &agflbp)))
			
 
				 		return error;
			
 
				-	agfl = XFS_BUF_TO_AGFL(agflbp);
			
 
				 	be32_add_cpu(&agf->agf_fllast, 1);
			
 
				 	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
			
 
				 		agf->agf_fllast = 0;
			
@@ -2133,32 +2171,38 @@ xfs_alloc_put_freelist(
 
				 	xfs_alloc_log_agf(tp, agbp, logflags);
			
 
				 
			
 
				 	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
			
 
				-	blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
			
 
				+
			
 
				+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
			
 
				+	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
			
 
				 	*blockp = cpu_to_be32(bno);
			
 
				+	startoff = (char *)blockp - (char *)agflbp->b_addr;
			
 
				+
			
 
				 	xfs_alloc_log_agf(tp, agbp, logflags);
			
 
				-	xfs_trans_log_buf(tp, agflbp,
			
 
				-		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
			
 
				-		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
			
 
				-			sizeof(xfs_agblock_t) - 1));
			
 
				+
			
 
				+	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
			
 
				+	xfs_trans_log_buf(tp, agflbp, startoff,
			
 
				+			  startoff + sizeof(xfs_agblock_t) - 1);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+static bool
			
 
				 xfs_agf_verify(
			
 
				+	struct xfs_mount *mp,
			
 
				 	struct xfs_buf	*bp)
			
 
				  {
			
 
				-	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_agf	*agf;
			
 
				-	int		agf_ok;
			
 
				+	struct xfs_agf	*agf = XFS_BUF_TO_AGF(bp);
			
 
				 
			
 
				-	agf = XFS_BUF_TO_AGF(bp);
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+	    !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				 
			
 
				-	agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
			
 
				-		XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
			
 
				-		be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
			
 
				-		be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
			
 
				-		be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
			
 
				-		be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
			
 
				+	if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
			
 
				+	      XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
			
 
				+	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
			
 
				+	      be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
			
 
				+	      be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
			
 
				+	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
			
 
				+		return false;
			
 
				 
			
 
				 	/*
			
 
				 	 * during growfs operations, the perag is not fully initialised,
			
@@ -2166,33 +2210,58 @@ xfs_agf_verify(
 
				 	 * use it by using uncached buffers that don't have the perag attached
			
 
				 	 * so we can detect and avoid this problem.
			
 
				 	 */
			
 
				-	if (bp->b_pag)
			
 
				-		agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) ==
			
 
				-						bp->b_pag->pag_agno;
			
 
				+	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
			
 
				+		return false;
			
 
				 
			
 
				-	if (xfs_sb_version_haslazysbcount(&mp->m_sb))
			
 
				-		agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
			
 
				-						be32_to_cpu(agf->agf_length);
			
 
				+	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
			
 
				+	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;;
			
 
				 
			
 
				-	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
			
 
				-			XFS_RANDOM_ALLOC_READ_AGF))) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_agf_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_agf_verify(bp);
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	int		agf_ok = 1;
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					  offsetof(struct xfs_agf, agf_crc));
			
 
				+
			
 
				+	agf_ok = agf_ok && xfs_agf_verify(mp, bp);
			
 
				+
			
 
				+	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
			
 
				+			XFS_RANDOM_ALLOC_READ_AGF))) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_agf_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_agf_verify(bp);
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!xfs_agf_verify(mp, bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_agf, agf_crc));
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_agf_buf_ops = {
			
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -33,6 +33,7 @@
 
				 #include "xfs_extent_busy.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 
			
 
				 STATIC struct xfs_btree_cur *
			
@@ -272,7 +273,7 @@ xfs_allocbt_key_diff(
 
				 	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+static bool
			
 
				 xfs_allocbt_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
@@ -280,66 +281,103 @@ xfs_allocbt_verify(
 
				 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				 	struct xfs_perag	*pag = bp->b_pag;
			
 
				 	unsigned int		level;
			
 
				-	int			sblock_ok; /* block passes checks */
			
 
				 
			
 
				 	/*
			
 
				 	 * magic number and level verification
			
 
				 	 *
			
 
				-	 * During growfs operations, we can't verify the exact level as the
			
 
				-	 * perag is not fully initialised and hence not attached to the buffer.
			
 
				-	 * In this case, check against the maximum tree depth.
			
 
				+	 * During growfs operations, we can't verify the exact level or owner as
			
 
				+	 * the perag is not fully initialised and hence not attached to the
			
 
				+	 * buffer.  In this case, check against the maximum tree depth.
			
 
				+	 *
			
 
				+	 * Similarly, during log recovery we will have a perag structure
			
 
				+	 * attached, but the agf information will not yet have been initialised
			
 
				+	 * from the on disk AGF. Again, we can only check against maximum limits
			
 
				+	 * in this case.
			
 
				 	 */
			
 
				 	level = be16_to_cpu(block->bb_level);
			
 
				 	switch (block->bb_magic) {
			
 
				+	case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
			
 
				+			return false;
			
 
				+		if (pag &&
			
 
				+		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
			
 
				+			return false;
			
 
				+		/* fall through */
			
 
				 	case cpu_to_be32(XFS_ABTB_MAGIC):
			
 
				-		if (pag)
			
 
				-			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
			
 
				-		else
			
 
				-			sblock_ok = level < mp->m_ag_maxlevels;
			
 
				+		if (pag && pag->pagf_init) {
			
 
				+			if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
			
 
				+				return false;
			
 
				+		} else if (level >= mp->m_ag_maxlevels)
			
 
				+			return false;
			
 
				 		break;
			
 
				+	case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
			
 
				+			return false;
			
 
				+		if (pag &&
			
 
				+		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
			
 
				+			return false;
			
 
				+		/* fall through */
			
 
				 	case cpu_to_be32(XFS_ABTC_MAGIC):
			
 
				-		if (pag)
			
 
				-			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
			
 
				-		else
			
 
				-			sblock_ok = level < mp->m_ag_maxlevels;
			
 
				+		if (pag && pag->pagf_init) {
			
 
				+			if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
			
 
				+				return false;
			
 
				+		} else if (level >= mp->m_ag_maxlevels)
			
 
				+			return false;
			
 
				 		break;
			
 
				 	default:
			
 
				-		sblock_ok = 0;
			
 
				-		break;
			
 
				+		return false;
			
 
				 	}
			
 
				 
			
 
				 	/* numrecs verification */
			
 
				-	sblock_ok = sblock_ok &&
			
 
				-		be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
			
 
				+	if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
			
 
				+		return false;
			
 
				 
			
 
				 	/* sibling pointer verification */
			
 
				-	sblock_ok = sblock_ok &&
			
 
				-		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
			
 
				-		block->bb_u.s.bb_leftsib &&
			
 
				-		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
			
 
				-		block->bb_u.s.bb_rightsib;
			
 
				-
			
 
				-	if (!sblock_ok) {
			
 
				-		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				+	if (!block->bb_u.s.bb_leftsib ||
			
 
				+	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
			
 
				+	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
			
 
				+		return false;
			
 
				+	if (!block->bb_u.s.bb_rightsib ||
			
 
				+	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
			
 
				+	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_allocbt_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_allocbt_verify(bp);
			
 
				+	if (!(xfs_btree_sblock_verify_crc(bp) &&
			
 
				+	      xfs_allocbt_verify(bp))) {
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_allocbt_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_allocbt_verify(bp);
			
 
				+	if (!xfs_allocbt_verify(bp)) {
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+	xfs_btree_sblock_calc_crc(bp);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_allocbt_buf_ops = {
			
@@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
 
				 	cur->bc_private.a.agbp = agbp;
			
 
				 	cur->bc_private.a.agno = agno;
			
 
				 
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
			
 
				+
			
 
				 	return cur;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -31,8 +31,10 @@ struct xfs_mount;
 
				  * by blockcount and blockno.  All blocks look the same to make the code
			
 
				  * simpler; if we have time later, we'll make the optimizations.
			
 
				  */
			
 
				-#define	XFS_ABTB_MAGIC	0x41425442	/* 'ABTB' for bno tree */
			
 
				-#define	XFS_ABTC_MAGIC	0x41425443	/* 'ABTC' for cnt tree */
			
 
				+#define	XFS_ABTB_MAGIC		0x41425442	/* 'ABTB' for bno tree */
			
 
				+#define	XFS_ABTB_CRC_MAGIC	0x41423342	/* 'AB3B' */
			
 
				+#define	XFS_ABTC_MAGIC		0x41425443	/* 'ABTC' for cnt tree */
			
 
				+#define	XFS_ABTC_CRC_MAGIC	0x41423343	/* 'AB3C' */
			
 
				 
			
 
				 /*
			
 
				  * Data record/key structure
			
@@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;
 
				 
			
 
				 /*
			
 
				  * Btree block header size depends on a superblock flag.
			
 
				- *
			
 
				- * (not quite yet, but soon)
			
 
				  */
			
 
				-#define XFS_ALLOC_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN
			
 
				+#define XFS_ALLOC_BLOCK_LEN(mp) \
			
 
				+	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
			
 
				+		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
			
 
				 
			
 
				 /*
			
 
				  * Record, key, and pointer address macros for btree blocks.
			
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -953,13 +953,13 @@ xfs_vm_writepage(
 
				 		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
			
 
				 
			
 
				 		/*
			
 
				-		 * Just skip the page if it is fully outside i_size, e.g. due
			
 
				-		 * to a truncate operation that is in progress.
			
 
				+		 * Skip the page if it is fully outside i_size, e.g. due to a
			
 
				+		 * truncate operation that is in progress. We must redirty the
			
 
				+		 * page so that reclaim stops reclaiming it. Otherwise
			
 
				+		 * xfs_vm_releasepage() is called on it and gets confused.
			
 
				 		 */
			
 
				-		if (page->index >= end_index + 1 || offset_into_page == 0) {
			
 
				-			unlock_page(page);
			
 
				-			return 0;
			
 
				-		}
			
 
				+		if (page->index >= end_index + 1 || offset_into_page == 0)
			
 
				+			goto redirty;
			
 
				 
			
 
				 		/*
			
 
				 		 * The page straddles i_size.  It must be zeroed out on each
			
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -15,7 +15,6 @@
 
				  * along with this program; if not, write the Free Software Foundation,
			
 
				  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				  */
			
 
				-
			
 
				 #include "xfs.h"
			
 
				 #include "xfs_fs.h"
			
 
				 #include "xfs_types.h"
			
@@ -35,6 +34,7 @@
 
				 #include "xfs_bmap.h"
			
 
				 #include "xfs_attr.h"
			
 
				 #include "xfs_attr_leaf.h"
			
 
				+#include "xfs_attr_remote.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trans_space.h"
			
@@ -74,13 +74,6 @@ STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
 
				 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
			
 
				 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
			
 
				 
			
 
				-/*
			
 
				- * Routines to manipulate out-of-line attribute values.
			
 
				- */
			
 
				-STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
			
 
				-STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
			
 
				-
			
 
				-#define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
			
 
				 
			
 
				 STATIC int
			
 
				 xfs_attr_name_to_xname(
			
@@ -820,7 +813,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
 
				 		error = 0;
			
 
				 		goto out;
			
 
				 	}
			
 
				-	error = xfs_attr_root_inactive(&trans, dp);
			
 
				+	error = xfs_attr3_root_inactive(&trans, dp);
			
 
				 	if (error)
			
 
				 		goto out;
			
 
				 
			
@@ -906,7 +899,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 	 */
			
 
				 	dp = args->dp;
			
 
				 	args->blkno = 0;
			
 
				-	error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				+	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -914,14 +907,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 	 * Look up the given attribute in the leaf block.  Figure out if
			
 
				 	 * the given flags produce an error or call for an atomic rename.
			
 
				 	 */
			
 
				-	retval = xfs_attr_leaf_lookup_int(bp, args);
			
 
				+	retval = xfs_attr3_leaf_lookup_int(bp, args);
			
 
				 	if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
			
 
				 		xfs_trans_brelse(args->trans, bp);
			
 
				-		return(retval);
			
 
				+		return retval;
			
 
				 	} else if (retval == EEXIST) {
			
 
				 		if (args->flags & ATTR_CREATE) {	/* pure create op */
			
 
				 			xfs_trans_brelse(args->trans, bp);
			
 
				-			return(retval);
			
 
				+			return retval;
			
 
				 		}
			
 
				 
			
 
				 		trace_xfs_attr_leaf_replace(args);
			
@@ -937,7 +930,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 	 * Add the attribute to the leaf block, transitioning to a Btree
			
 
				 	 * if required.
			
 
				 	 */
			
 
				-	retval = xfs_attr_leaf_add(bp, args);
			
 
				+	retval = xfs_attr3_leaf_add(bp, args);
			
 
				 	if (retval == ENOSPC) {
			
 
				 		/*
			
 
				 		 * Promote the attribute list to the Btree format, then
			
@@ -945,7 +938,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		 * can manage its own transactions.
			
 
				 		 */
			
 
				 		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		error = xfs_attr_leaf_to_node(args);
			
 
				+		error = xfs_attr3_leaf_to_node(args);
			
 
				 		if (!error) {
			
 
				 			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				 						&committed);
			
@@ -1010,7 +1003,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		 * In a separate transaction, set the incomplete flag on the
			
 
				 		 * "old" attr and clear the incomplete flag on the "new" attr.
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_flipflags(args);
			
 
				+		error = xfs_attr3_leaf_flipflags(args);
			
 
				 		if (error)
			
 
				 			return(error);
			
 
				 
			
@@ -1032,19 +1025,19 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		 * Read in the block containing the "old" attr, then
			
 
				 		 * remove the "old" attr from that block (neat, huh!)
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno,
			
 
				+		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
			
 
				 					   -1, &bp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 
			
 
				-		xfs_attr_leaf_remove(bp, args);
			
 
				+		xfs_attr3_leaf_remove(bp, args);
			
 
				 
			
 
				 		/*
			
 
				 		 * If the result is small enough, shrink it all into the inode.
			
 
				 		 */
			
 
				 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
			
 
				 			xfs_bmap_init(args->flist, args->firstblock);
			
 
				-			error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
			
 
				+			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 			/* bp is gone due to xfs_da_shrink_inode */
			
 
				 			if (!error) {
			
 
				 				error = xfs_bmap_finish(&args->trans,
			
@@ -1076,9 +1069,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
 
				 		/*
			
 
				 		 * Added a "remote" value, just clear the incomplete flag.
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_clearflag(args);
			
 
				+		error = xfs_attr3_leaf_clearflag(args);
			
 
				 	}
			
 
				-	return(error);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1101,24 +1094,24 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 
				 	 */
			
 
				 	dp = args->dp;
			
 
				 	args->blkno = 0;
			
 
				-	error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				+	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	error = xfs_attr_leaf_lookup_int(bp, args);
			
 
				+	error = xfs_attr3_leaf_lookup_int(bp, args);
			
 
				 	if (error == ENOATTR) {
			
 
				 		xfs_trans_brelse(args->trans, bp);
			
 
				-		return(error);
			
 
				+		return error;
			
 
				 	}
			
 
				 
			
 
				-	xfs_attr_leaf_remove(bp, args);
			
 
				+	xfs_attr3_leaf_remove(bp, args);
			
 
				 
			
 
				 	/*
			
 
				 	 * If the result is small enough, shrink it all into the inode.
			
 
				 	 */
			
 
				 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
			
 
				 		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
			
 
				+		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 		/* bp is gone due to xfs_da_shrink_inode */
			
 
				 		if (!error) {
			
 
				 			error = xfs_bmap_finish(&args->trans, args->flist,
			
@@ -1128,7 +1121,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 
				 			ASSERT(committed);
			
 
				 			args->trans = NULL;
			
 
				 			xfs_bmap_cancel(args->flist);
			
 
				-			return(error);
			
 
				+			return error;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -1138,7 +1131,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
 
				 		if (committed)
			
 
				 			xfs_trans_ijoin(args->trans, dp, 0);
			
 
				 	}
			
 
				-	return(0);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1156,21 +1149,21 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
 
				 	trace_xfs_attr_leaf_get(args);
			
 
				 
			
 
				 	args->blkno = 0;
			
 
				-	error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				+	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	error = xfs_attr_leaf_lookup_int(bp, args);
			
 
				+	error = xfs_attr3_leaf_lookup_int(bp, args);
			
 
				 	if (error != EEXIST)  {
			
 
				 		xfs_trans_brelse(args->trans, bp);
			
 
				-		return(error);
			
 
				+		return error;
			
 
				 	}
			
 
				-	error = xfs_attr_leaf_getvalue(bp, args);
			
 
				+	error = xfs_attr3_leaf_getvalue(bp, args);
			
 
				 	xfs_trans_brelse(args->trans, bp);
			
 
				 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
			
 
				 		error = xfs_attr_rmtval_get(args);
			
 
				 	}
			
 
				-	return(error);
			
 
				+	return error;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1185,11 +1178,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 
				 	trace_xfs_attr_leaf_list(context);
			
 
				 
			
 
				 	context->cursor->blkno = 0;
			
 
				-	error = xfs_attr_leaf_read(NULL, context->dp, 0, -1, &bp);
			
 
				+	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
			
 
				 	if (error)
			
 
				 		return XFS_ERROR(error);
			
 
				 
			
 
				-	error = xfs_attr_leaf_list_int(bp, context);
			
 
				+	error = xfs_attr3_leaf_list_int(bp, context);
			
 
				 	xfs_trans_brelse(NULL, bp);
			
 
				 	return XFS_ERROR(error);
			
 
				 }
			
@@ -1236,7 +1229,7 @@ restart:
 
				 	 * Search to see if name already exists, and get back a pointer
			
 
				 	 * to where it should go.
			
 
				 	 */
			
 
				-	error = xfs_da_node_lookup_int(state, &retval);
			
 
				+	error = xfs_da3_node_lookup_int(state, &retval);
			
 
				 	if (error)
			
 
				 		goto out;
			
 
				 	blk = &state->path.blk[ state->path.active-1 ];
			
@@ -1258,7 +1251,7 @@ restart:
 
				 		args->rmtblkcnt = 0;
			
 
				 	}
			
 
				 
			
 
				-	retval = xfs_attr_leaf_add(blk->bp, state->args);
			
 
				+	retval = xfs_attr3_leaf_add(blk->bp, state->args);
			
 
				 	if (retval == ENOSPC) {
			
 
				 		if (state->path.active == 1) {
			
 
				 			/*
			
@@ -1268,7 +1261,7 @@ restart:
 
				 			 */
			
 
				 			xfs_da_state_free(state);
			
 
				 			xfs_bmap_init(args->flist, args->firstblock);
			
 
				-			error = xfs_attr_leaf_to_node(args);
			
 
				+			error = xfs_attr3_leaf_to_node(args);
			
 
				 			if (!error) {
			
 
				 				error = xfs_bmap_finish(&args->trans,
			
 
				 							args->flist,
			
@@ -1307,7 +1300,7 @@ restart:
 
				 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
			
 
				 		 */
			
 
				 		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		error = xfs_da_split(state);
			
 
				+		error = xfs_da3_split(state);
			
 
				 		if (!error) {
			
 
				 			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				 						&committed);
			
@@ -1329,7 +1322,7 @@ restart:
 
				 		/*
			
 
				 		 * Addition succeeded, update Btree hashvals.
			
 
				 		 */
			
 
				-		xfs_da_fixhashpath(state, &state->path);
			
 
				+		xfs_da3_fixhashpath(state, &state->path);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1370,7 +1363,7 @@ restart:
 
				 		 * In a separate transaction, set the incomplete flag on the
			
 
				 		 * "old" attr and clear the incomplete flag on the "new" attr.
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_flipflags(args);
			
 
				+		error = xfs_attr3_leaf_flipflags(args);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
@@ -1400,7 +1393,7 @@ restart:
 
				 		state->blocksize = state->mp->m_sb.sb_blocksize;
			
 
				 		state->node_ents = state->mp->m_attr_node_ents;
			
 
				 		state->inleaf = 0;
			
 
				-		error = xfs_da_node_lookup_int(state, &retval);
			
 
				+		error = xfs_da3_node_lookup_int(state, &retval);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
@@ -1409,15 +1402,15 @@ restart:
 
				 		 */
			
 
				 		blk = &state->path.blk[ state->path.active-1 ];
			
 
				 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
			
 
				-		error = xfs_attr_leaf_remove(blk->bp, args);
			
 
				-		xfs_da_fixhashpath(state, &state->path);
			
 
				+		error = xfs_attr3_leaf_remove(blk->bp, args);
			
 
				+		xfs_da3_fixhashpath(state, &state->path);
			
 
				 
			
 
				 		/*
			
 
				 		 * Check to see if the tree needs to be collapsed.
			
 
				 		 */
			
 
				 		if (retval && (state->path.active > 1)) {
			
 
				 			xfs_bmap_init(args->flist, args->firstblock);
			
 
				-			error = xfs_da_join(state);
			
 
				+			error = xfs_da3_join(state);
			
 
				 			if (!error) {
			
 
				 				error = xfs_bmap_finish(&args->trans,
			
 
				 							args->flist,
			
@@ -1450,7 +1443,7 @@ restart:
 
				 		/*
			
 
				 		 * Added a "remote" value, just clear the incomplete flag.
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_clearflag(args);
			
 
				+		error = xfs_attr3_leaf_clearflag(args);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 	}
			
@@ -1495,7 +1488,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 	/*
			
 
				 	 * Search to see if name exists, and get back a pointer to it.
			
 
				 	 */
			
 
				-	error = xfs_da_node_lookup_int(state, &retval);
			
 
				+	error = xfs_da3_node_lookup_int(state, &retval);
			
 
				 	if (error || (retval != EEXIST)) {
			
 
				 		if (error == 0)
			
 
				 			error = retval;
			
@@ -1524,7 +1517,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
			
 
				 		 * remote value.
			
 
				 		 */
			
 
				-		error = xfs_attr_leaf_setflag(args);
			
 
				+		error = xfs_attr3_leaf_setflag(args);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 		error = xfs_attr_rmtval_remove(args);
			
@@ -1545,15 +1538,15 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 	 */
			
 
				 	blk = &state->path.blk[ state->path.active-1 ];
			
 
				 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
			
 
				-	retval = xfs_attr_leaf_remove(blk->bp, args);
			
 
				-	xfs_da_fixhashpath(state, &state->path);
			
 
				+	retval = xfs_attr3_leaf_remove(blk->bp, args);
			
 
				+	xfs_da3_fixhashpath(state, &state->path);
			
 
				 
			
 
				 	/*
			
 
				 	 * Check to see if the tree needs to be collapsed.
			
 
				 	 */
			
 
				 	if (retval && (state->path.active > 1)) {
			
 
				 		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		error = xfs_da_join(state);
			
 
				+		error = xfs_da3_join(state);
			
 
				 		if (!error) {
			
 
				 			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				 						&committed);
			
@@ -1591,13 +1584,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
 
				 		ASSERT(state->path.blk[0].bp);
			
 
				 		state->path.blk[0].bp = NULL;
			
 
				 
			
 
				-		error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp);
			
 
				+		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
			
 
				 		if (error)
			
 
				 			goto out;
			
 
				 
			
 
				 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
			
 
				 			xfs_bmap_init(args->flist, args->firstblock);
			
 
				-			error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
			
 
				+			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
			
 
				 			/* bp is gone due to xfs_da_shrink_inode */
			
 
				 			if (!error) {
			
 
				 				error = xfs_bmap_finish(&args->trans,
			
@@ -1699,7 +1692,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
 
				 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
			
 
				 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
			
 
				 		if (blk->disk_blkno) {
			
 
				-			error = xfs_da_node_read(state->args->trans,
			
 
				+			error = xfs_da3_node_read(state->args->trans,
			
 
				 						state->args->dp,
			
 
				 						blk->blkno, blk->disk_blkno,
			
 
				 						&blk->bp, XFS_ATTR_FORK);
			
@@ -1718,7 +1711,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
 
				 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
			
 
				 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
			
 
				 		if (blk->disk_blkno) {
			
 
				-			error = xfs_da_node_read(state->args->trans,
			
 
				+			error = xfs_da3_node_read(state->args->trans,
			
 
				 						state->args->dp,
			
 
				 						blk->blkno, blk->disk_blkno,
			
 
				 						&blk->bp, XFS_ATTR_FORK);
			
@@ -1758,7 +1751,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
 
				 	/*
			
 
				 	 * Search to see if name exists, and get back a pointer to it.
			
 
				 	 */
			
 
				-	error = xfs_da_node_lookup_int(state, &retval);
			
 
				+	error = xfs_da3_node_lookup_int(state, &retval);
			
 
				 	if (error) {
			
 
				 		retval = error;
			
 
				 	} else if (retval == EEXIST) {
			
@@ -1769,7 +1762,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
 
				 		/*
			
 
				 		 * Get the value, local or "remote"
			
 
				 		 */
			
 
				-		retval = xfs_attr_leaf_getvalue(blk->bp, args);
			
 
				+		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
			
 
				 		if (!retval && (args->rmtblkno > 0)
			
 
				 		    && !(args->flags & ATTR_KERNOVAL)) {
			
 
				 			retval = xfs_attr_rmtval_get(args);
			
@@ -1794,7 +1787,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 	attrlist_cursor_kern_t *cursor;
			
 
				 	xfs_attr_leafblock_t *leaf;
			
 
				 	xfs_da_intnode_t *node;
			
 
				-	xfs_da_node_entry_t *btree;
			
 
				+	struct xfs_attr3_icleaf_hdr leafhdr;
			
 
				+	struct xfs_da3_icnode_hdr nodehdr;
			
 
				+	struct xfs_da_node_entry *btree;
			
 
				 	int error, i;
			
 
				 	struct xfs_buf *bp;
			
 
				 
			
@@ -1810,27 +1805,33 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 	 */
			
 
				 	bp = NULL;
			
 
				 	if (cursor->blkno > 0) {
			
 
				-		error = xfs_da_node_read(NULL, context->dp, cursor->blkno, -1,
			
 
				+		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
			
 
				 					      &bp, XFS_ATTR_FORK);
			
 
				 		if ((error != 0) && (error != EFSCORRUPTED))
			
 
				 			return(error);
			
 
				 		if (bp) {
			
 
				+			struct xfs_attr_leaf_entry *entries;
			
 
				+
			
 
				 			node = bp->b_addr;
			
 
				 			switch (be16_to_cpu(node->hdr.info.magic)) {
			
 
				 			case XFS_DA_NODE_MAGIC:
			
 
				+			case XFS_DA3_NODE_MAGIC:
			
 
				 				trace_xfs_attr_list_wrong_blk(context);
			
 
				 				xfs_trans_brelse(NULL, bp);
			
 
				 				bp = NULL;
			
 
				 				break;
			
 
				 			case XFS_ATTR_LEAF_MAGIC:
			
 
				+			case XFS_ATTR3_LEAF_MAGIC:
			
 
				 				leaf = bp->b_addr;
			
 
				-				if (cursor->hashval > be32_to_cpu(leaf->entries[
			
 
				-				    be16_to_cpu(leaf->hdr.count)-1].hashval)) {
			
 
				+				xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+				entries = xfs_attr3_leaf_entryp(leaf);
			
 
				+				if (cursor->hashval > be32_to_cpu(
			
 
				+						entries[leafhdr.count - 1].hashval)) {
			
 
				 					trace_xfs_attr_list_wrong_blk(context);
			
 
				 					xfs_trans_brelse(NULL, bp);
			
 
				 					bp = NULL;
			
 
				-				} else if (cursor->hashval <=
			
 
				-					     be32_to_cpu(leaf->entries[0].hashval)) {
			
 
				+				} else if (cursor->hashval <= be32_to_cpu(
			
 
				+						entries[0].hashval)) {
			
 
				 					trace_xfs_attr_list_wrong_blk(context);
			
 
				 					xfs_trans_brelse(NULL, bp);
			
 
				 					bp = NULL;
			
@@ -1852,27 +1853,31 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 	if (bp == NULL) {
			
 
				 		cursor->blkno = 0;
			
 
				 		for (;;) {
			
 
				-			error = xfs_da_node_read(NULL, context->dp,
			
 
				+			__uint16_t magic;
			
 
				+
			
 
				+			error = xfs_da3_node_read(NULL, context->dp,
			
 
				 						      cursor->blkno, -1, &bp,
			
 
				 						      XFS_ATTR_FORK);
			
 
				 			if (error)
			
 
				 				return(error);
			
 
				 			node = bp->b_addr;
			
 
				-			if (node->hdr.info.magic ==
			
 
				-			    cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
			
 
				+			magic = be16_to_cpu(node->hdr.info.magic);
			
 
				+			if (magic == XFS_ATTR_LEAF_MAGIC ||
			
 
				+			    magic == XFS_ATTR3_LEAF_MAGIC)
			
 
				 				break;
			
 
				-			if (unlikely(node->hdr.info.magic !=
			
 
				-				     cpu_to_be16(XFS_DA_NODE_MAGIC))) {
			
 
				+			if (magic != XFS_DA_NODE_MAGIC &&
			
 
				+			    magic != XFS_DA3_NODE_MAGIC) {
			
 
				 				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
			
 
				 						     XFS_ERRLEVEL_LOW,
			
 
				 						     context->dp->i_mount,
			
 
				 						     node);
			
 
				 				xfs_trans_brelse(NULL, bp);
			
 
				-				return(XFS_ERROR(EFSCORRUPTED));
			
 
				+				return XFS_ERROR(EFSCORRUPTED);
			
 
				 			}
			
 
				-			btree = node->btree;
			
 
				-			for (i = 0; i < be16_to_cpu(node->hdr.count);
			
 
				-								btree++, i++) {
			
 
				+
			
 
				+			xfs_da3_node_hdr_from_disk(&nodehdr, node);
			
 
				+			btree = xfs_da3_node_tree_p(node);
			
 
				+			for (i = 0; i < nodehdr.count; btree++, i++) {
			
 
				 				if (cursor->hashval
			
 
				 						<= be32_to_cpu(btree->hashval)) {
			
 
				 					cursor->blkno = be32_to_cpu(btree->before);
			
@@ -1881,9 +1886,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 					break;
			
 
				 				}
			
 
				 			}
			
 
				-			if (i == be16_to_cpu(node->hdr.count)) {
			
 
				+			if (i == nodehdr.count) {
			
 
				 				xfs_trans_brelse(NULL, bp);
			
 
				-				return(0);
			
 
				+				return 0;
			
 
				 			}
			
 
				 			xfs_trans_brelse(NULL, bp);
			
 
				 		}
			
@@ -1897,310 +1902,21 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 
				 	 */
			
 
				 	for (;;) {
			
 
				 		leaf = bp->b_addr;
			
 
				-		error = xfs_attr_leaf_list_int(bp, context);
			
 
				+		error = xfs_attr3_leaf_list_int(bp, context);
			
 
				 		if (error) {
			
 
				 			xfs_trans_brelse(NULL, bp);
			
 
				 			return error;
			
 
				 		}
			
 
				-		if (context->seen_enough || leaf->hdr.info.forw == 0)
			
 
				+		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+		if (context->seen_enough || leafhdr.forw == 0)
			
 
				 			break;
			
 
				-		cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
			
 
				+		cursor->blkno = leafhdr.forw;
			
 
				 		xfs_trans_brelse(NULL, bp);
			
 
				-		error = xfs_attr_leaf_read(NULL, context->dp, cursor->blkno, -1,
			
 
				+		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
			
 
				 					   &bp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
 
				 	xfs_trans_brelse(NULL, bp);
			
 
				-	return(0);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-/*========================================================================
			
 
				- * External routines for manipulating out-of-line attribute values.
			
 
				- *========================================================================*/
			
 
				-
			
 
				-/*
			
 
				- * Read the value associated with an attribute from the out-of-line buffer
			
 
				- * that we stored it in.
			
 
				- */
			
 
				-int
			
 
				-xfs_attr_rmtval_get(xfs_da_args_t *args)
			
 
				-{
			
 
				-	xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
			
 
				-	xfs_mount_t *mp;
			
 
				-	xfs_daddr_t dblkno;
			
 
				-	void *dst;
			
 
				-	xfs_buf_t *bp;
			
 
				-	int nmap, error, tmp, valuelen, blkcnt, i;
			
 
				-	xfs_dablk_t lblkno;
			
 
				-
			
 
				-	trace_xfs_attr_rmtval_get(args);
			
 
				-
			
 
				-	ASSERT(!(args->flags & ATTR_KERNOVAL));
			
 
				-
			
 
				-	mp = args->dp->i_mount;
			
 
				-	dst = args->value;
			
 
				-	valuelen = args->valuelen;
			
 
				-	lblkno = args->rmtblkno;
			
 
				-	while (valuelen > 0) {
			
 
				-		nmap = ATTR_RMTVALUE_MAPSIZE;
			
 
				-		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
			
 
				-				       args->rmtblkcnt, map, &nmap,
			
 
				-				       XFS_BMAPI_ATTRFORK);
			
 
				-		if (error)
			
 
				-			return(error);
			
 
				-		ASSERT(nmap >= 1);
			
 
				-
			
 
				-		for (i = 0; (i < nmap) && (valuelen > 0); i++) {
			
 
				-			ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
			
 
				-			       (map[i].br_startblock != HOLESTARTBLOCK));
			
 
				-			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
			
 
				-			blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				-			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
			
 
				-						   dblkno, blkcnt, 0, &bp, NULL);
			
 
				-			if (error)
			
 
				-				return(error);
			
 
				-
			
 
				-			tmp = min_t(int, valuelen, BBTOB(bp->b_length));
			
 
				-			xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
			
 
				-			xfs_buf_relse(bp);
			
 
				-			dst += tmp;
			
 
				-			valuelen -= tmp;
			
 
				-
			
 
				-			lblkno += map[i].br_blockcount;
			
 
				-		}
			
 
				-	}
			
 
				-	ASSERT(valuelen == 0);
			
 
				-	return(0);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Write the value associated with an attribute into the out-of-line buffer
			
 
				- * that we have defined for it.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr_rmtval_set(xfs_da_args_t *args)
			
 
				-{
			
 
				-	xfs_mount_t *mp;
			
 
				-	xfs_fileoff_t lfileoff;
			
 
				-	xfs_inode_t *dp;
			
 
				-	xfs_bmbt_irec_t map;
			
 
				-	xfs_daddr_t dblkno;
			
 
				-	void *src;
			
 
				-	xfs_buf_t *bp;
			
 
				-	xfs_dablk_t lblkno;
			
 
				-	int blkcnt, valuelen, nmap, error, tmp, committed;
			
 
				-
			
 
				-	trace_xfs_attr_rmtval_set(args);
			
 
				-
			
 
				-	dp = args->dp;
			
 
				-	mp = dp->i_mount;
			
 
				-	src = args->value;
			
 
				-
			
 
				-	/*
			
 
				-	 * Find a "hole" in the attribute address space large enough for
			
 
				-	 * us to drop the new attribute's value into.
			
 
				-	 */
			
 
				-	blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
			
 
				-	lfileoff = 0;
			
 
				-	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
			
 
				-						   XFS_ATTR_FORK);
			
 
				-	if (error) {
			
 
				-		return(error);
			
 
				-	}
			
 
				-	args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
			
 
				-	args->rmtblkcnt = blkcnt;
			
 
				-
			
 
				-	/*
			
 
				-	 * Roll through the "value", allocating blocks on disk as required.
			
 
				-	 */
			
 
				-	while (blkcnt > 0) {
			
 
				-		/*
			
 
				-		 * Allocate a single extent, up to the size of the value.
			
 
				-		 */
			
 
				-		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		nmap = 1;
			
 
				-		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
			
 
				-				  blkcnt,
			
 
				-				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
			
 
				-				  args->firstblock, args->total, &map, &nmap,
			
 
				-				  args->flist);
			
 
				-		if (!error) {
			
 
				-			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				-						&committed);
			
 
				-		}
			
 
				-		if (error) {
			
 
				-			ASSERT(committed);
			
 
				-			args->trans = NULL;
			
 
				-			xfs_bmap_cancel(args->flist);
			
 
				-			return(error);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * bmap_finish() may have committed the last trans and started
			
 
				-		 * a new one.  We need the inode to be in all transactions.
			
 
				-		 */
			
 
				-		if (committed)
			
 
				-			xfs_trans_ijoin(args->trans, dp, 0);
			
 
				-
			
 
				-		ASSERT(nmap == 1);
			
 
				-		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				-		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				-		lblkno += map.br_blockcount;
			
 
				-		blkcnt -= map.br_blockcount;
			
 
				-
			
 
				-		/*
			
 
				-		 * Start the next trans in the chain.
			
 
				-		 */
			
 
				-		error = xfs_trans_roll(&args->trans, dp);
			
 
				-		if (error)
			
 
				-			return (error);
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Roll through the "value", copying the attribute value to the
			
 
				-	 * already-allocated blocks.  Blocks are written synchronously
			
 
				-	 * so that we can know they are all on disk before we turn off
			
 
				-	 * the INCOMPLETE flag.
			
 
				-	 */
			
 
				-	lblkno = args->rmtblkno;
			
 
				-	valuelen = args->valuelen;
			
 
				-	while (valuelen > 0) {
			
 
				-		int buflen;
			
 
				-
			
 
				-		/*
			
 
				-		 * Try to remember where we decided to put the value.
			
 
				-		 */
			
 
				-		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		nmap = 1;
			
 
				-		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
			
 
				-				       args->rmtblkcnt, &map, &nmap,
			
 
				-				       XFS_BMAPI_ATTRFORK);
			
 
				-		if (error)
			
 
				-			return(error);
			
 
				-		ASSERT(nmap == 1);
			
 
				-		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				-		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				-
			
 
				-		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				-		blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
			
 
				-
			
 
				-		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
			
 
				-		if (!bp)
			
 
				-			return ENOMEM;
			
 
				-
			
 
				-		buflen = BBTOB(bp->b_length);
			
 
				-		tmp = min_t(int, valuelen, buflen);
			
 
				-		xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
			
 
				-		if (tmp < buflen)
			
 
				-			xfs_buf_zero(bp, tmp, buflen - tmp);
			
 
				-
			
 
				-		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
			
 
				-		xfs_buf_relse(bp);
			
 
				-		if (error)
			
 
				-			return error;
			
 
				-		src += tmp;
			
 
				-		valuelen -= tmp;
			
 
				-
			
 
				-		lblkno += map.br_blockcount;
			
 
				-	}
			
 
				-	ASSERT(valuelen == 0);
			
 
				-	return(0);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Remove the value associated with an attribute by deleting the
			
 
				- * out-of-line buffer that it is stored on.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_attr_rmtval_remove(xfs_da_args_t *args)
			
 
				-{
			
 
				-	xfs_mount_t *mp;
			
 
				-	xfs_bmbt_irec_t map;
			
 
				-	xfs_buf_t *bp;
			
 
				-	xfs_daddr_t dblkno;
			
 
				-	xfs_dablk_t lblkno;
			
 
				-	int valuelen, blkcnt, nmap, error, done, committed;
			
 
				-
			
 
				-	trace_xfs_attr_rmtval_remove(args);
			
 
				-
			
 
				-	mp = args->dp->i_mount;
			
 
				-
			
 
				-	/*
			
 
				-	 * Roll through the "value", invalidating the attribute value's
			
 
				-	 * blocks.
			
 
				-	 */
			
 
				-	lblkno = args->rmtblkno;
			
 
				-	valuelen = args->rmtblkcnt;
			
 
				-	while (valuelen > 0) {
			
 
				-		/*
			
 
				-		 * Try to remember where we decided to put the value.
			
 
				-		 */
			
 
				-		nmap = 1;
			
 
				-		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
			
 
				-				       args->rmtblkcnt, &map, &nmap,
			
 
				-				       XFS_BMAPI_ATTRFORK);
			
 
				-		if (error)
			
 
				-			return(error);
			
 
				-		ASSERT(nmap == 1);
			
 
				-		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				-		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				-
			
 
				-		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				-		blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
			
 
				-
			
 
				-		/*
			
 
				-		 * If the "remote" value is in the cache, remove it.
			
 
				-		 */
			
 
				-		bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
			
 
				-		if (bp) {
			
 
				-			xfs_buf_stale(bp);
			
 
				-			xfs_buf_relse(bp);
			
 
				-			bp = NULL;
			
 
				-		}
			
 
				-
			
 
				-		valuelen -= map.br_blockcount;
			
 
				-
			
 
				-		lblkno += map.br_blockcount;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Keep de-allocating extents until the remote-value region is gone.
			
 
				-	 */
			
 
				-	lblkno = args->rmtblkno;
			
 
				-	blkcnt = args->rmtblkcnt;
			
 
				-	done = 0;
			
 
				-	while (!done) {
			
 
				-		xfs_bmap_init(args->flist, args->firstblock);
			
 
				-		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
			
 
				-				    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
			
 
				-				    1, args->firstblock, args->flist,
			
 
				-				    &done);
			
 
				-		if (!error) {
			
 
				-			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				-						&committed);
			
 
				-		}
			
 
				-		if (error) {
			
 
				-			ASSERT(committed);
			
 
				-			args->trans = NULL;
			
 
				-			xfs_bmap_cancel(args->flist);
			
 
				-			return(error);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * bmap_finish() may have committed the last trans and started
			
 
				-		 * a new one.  We need the inode to be in all transactions.
			
 
				-		 */
			
 
				-		if (committed)
			
 
				-			xfs_trans_ijoin(args->trans, args->dp, 0);
			
 
				-
			
 
				-		/*
			
 
				-		 * Close out trans and start the next one in the chain.
			
 
				-		 */
			
 
				-		error = xfs_trans_roll(&args->trans, args->dp);
			
 
				-		if (error)
			
 
				-			return (error);
			
 
				-	}
			
 
				-	return(0);
			
 
				+	return 0;
			
 
				 }
			
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -140,7 +140,6 @@ typedef struct xfs_attr_list_context {
 
				  * Overall external interface routines.
			
 
				  */
			
 
				 int xfs_attr_inactive(struct xfs_inode *dp);
			
 
				-int xfs_attr_rmtval_get(struct xfs_da_args *args);
			
 
				 int xfs_attr_list_int(struct xfs_attr_list_context *);
			
 
				 
			
 
				 #endif	/* __XFS_ATTR_H__ */
			
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -89,7 +90,7 @@ typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
 
				 
			
 
				 typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
			
 
				 	__be32	hashval;		/* hash value of name */
			
 
				- 	__be16	nameidx;		/* index into buffer of name/value */
			
 
				+	__be16	nameidx;		/* index into buffer of name/value */
			
 
				 	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
			
 
				 	__u8	pad2;			/* unused pad byte */
			
 
				 } xfs_attr_leaf_entry_t;
			
@@ -114,6 +115,54 @@ typedef struct xfs_attr_leafblock {
 
				 	xfs_attr_leaf_name_remote_t valuelist;	/* grows from bottom of buf */
			
 
				 } xfs_attr_leafblock_t;
			
 
				 
			
 
				+/*
			
 
				+ * CRC enabled leaf structures. Called "version 3" structures to match the
			
 
				+ * version number of the directory and dablk structures for this feature, and
			
 
				+ * attr2 is already taken by the variable inode attribute fork size feature.
			
 
				+ */
			
 
				+struct xfs_attr3_leaf_hdr {
			
 
				+	struct xfs_da3_blkinfo	info;
			
 
				+	__be16			count;
			
 
				+	__be16			usedbytes;
			
 
				+	__be16			firstused;
			
 
				+	__u8			holes;
			
 
				+	__u8			pad1;
			
 
				+	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
			
 
				+};
			
 
				+
			
 
				+#define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc))
			
 
				+
			
 
				+struct xfs_attr3_leafblock {
			
 
				+	struct xfs_attr3_leaf_hdr	hdr;
			
 
				+	struct xfs_attr_leaf_entry	entries[1];
			
 
				+
			
 
				+	/*
			
 
				+	 * The rest of the block contains the following structures after the
			
 
				+	 * leaf entries, growing from the bottom up. The variables are never
			
 
				+	 * referenced, the locations accessed purely from helper functions.
			
 
				+	 *
			
 
				+	 * struct xfs_attr_leaf_name_local
			
 
				+	 * struct xfs_attr_leaf_name_remote
			
 
				+	 */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * incore, neutral version of the attribute leaf header
			
 
				+ */
			
 
				+struct xfs_attr3_icleaf_hdr {
			
 
				+	__uint32_t	forw;
			
 
				+	__uint32_t	back;
			
 
				+	__uint16_t	magic;
			
 
				+	__uint16_t	count;
			
 
				+	__uint16_t	usedbytes;
			
 
				+	__uint16_t	firstused;
			
 
				+	__u8		holes;
			
 
				+	struct {
			
 
				+		__uint16_t	base;
			
 
				+		__uint16_t	size;
			
 
				+	} freemap[XFS_ATTR_LEAF_MAPSIZE];
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Flags used in the leaf_entry[i].flags field.
			
 
				  * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
			
@@ -147,26 +196,43 @@ typedef struct xfs_attr_leafblock {
 
				  */
			
 
				 #define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t))
			
 
				 
			
 
				+static inline int
			
 
				+xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
			
 
				+{
			
 
				+	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
			
 
				+		return sizeof(struct xfs_attr3_leaf_hdr);
			
 
				+	return sizeof(struct xfs_attr_leaf_hdr);
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_attr_leaf_entry *
			
 
				+xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
			
 
				+{
			
 
				+	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
			
 
				+		return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
			
 
				+	return &leafp->entries[0];
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Cast typed pointers for "local" and "remote" name/value structs.
			
 
				  */
			
 
				-static inline xfs_attr_leaf_name_remote_t *
			
 
				-xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
			
 
				+static inline char *
			
 
				+xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
			
 
				 {
			
 
				-	return (xfs_attr_leaf_name_remote_t *)
			
 
				-		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
			
 
				+	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
			
 
				+
			
 
				+	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
			
 
				 }
			
 
				 
			
 
				-static inline xfs_attr_leaf_name_local_t *
			
 
				-xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
			
 
				+static inline xfs_attr_leaf_name_remote_t *
			
 
				+xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
			
 
				 {
			
 
				-	return (xfs_attr_leaf_name_local_t *)
			
 
				-		&((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
			
 
				+	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
			
 
				 }
			
 
				 
			
 
				-static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
			
 
				+static inline xfs_attr_leaf_name_local_t *
			
 
				+xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
			
 
				 {
			
 
				-	return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)];
			
 
				+	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -221,37 +287,37 @@ int	xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
 
				 /*
			
 
				  * Internal routines when attribute fork size == XFS_LBSIZE(mp).
			
 
				  */
			
 
				-int	xfs_attr_leaf_to_node(struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_to_shortform(struct xfs_buf *bp,
			
 
				+int	xfs_attr3_leaf_to_node(struct xfs_da_args *args);
			
 
				+int	xfs_attr3_leaf_to_shortform(struct xfs_buf *bp,
			
 
				 				   struct xfs_da_args *args, int forkoff);
			
 
				-int	xfs_attr_leaf_clearflag(struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_setflag(struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_flipflags(xfs_da_args_t *args);
			
 
				+int	xfs_attr3_leaf_clearflag(struct xfs_da_args *args);
			
 
				+int	xfs_attr3_leaf_setflag(struct xfs_da_args *args);
			
 
				+int	xfs_attr3_leaf_flipflags(struct xfs_da_args *args);
			
 
				 
			
 
				 /*
			
 
				  * Routines used for growing the Btree.
			
 
				  */
			
 
				-int	xfs_attr_leaf_split(struct xfs_da_state *state,
			
 
				+int	xfs_attr3_leaf_split(struct xfs_da_state *state,
			
 
				 				   struct xfs_da_state_blk *oldblk,
			
 
				 				   struct xfs_da_state_blk *newblk);
			
 
				-int	xfs_attr_leaf_lookup_int(struct xfs_buf *leaf,
			
 
				+int	xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
			
 
				 					struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_add(struct xfs_buf *leaf_buffer,
			
 
				+int	xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
			
 
				+int	xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
			
 
				 				 struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer,
			
 
				+int	xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
			
 
				 				    struct xfs_da_args *args);
			
 
				-int	xfs_attr_leaf_list_int(struct xfs_buf *bp,
			
 
				+int	xfs_attr3_leaf_list_int(struct xfs_buf *bp,
			
 
				 				      struct xfs_attr_list_context *context);
			
 
				 
			
 
				 /*
			
 
				  * Routines used for shrinking the Btree.
			
 
				  */
			
 
				-int	xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval);
			
 
				-void	xfs_attr_leaf_unbalance(struct xfs_da_state *state,
			
 
				+int	xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
			
 
				+void	xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
			
 
				 				       struct xfs_da_state_blk *drop_blk,
			
 
				 				       struct xfs_da_state_blk *save_blk);
			
 
				-int	xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
			
 
				+int	xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
			
 
				 
			
 
				 /*
			
 
				  * Utility routines.
			
@@ -261,10 +327,12 @@ int	xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
 
				 				   struct xfs_buf *leaf2_bp);
			
 
				 int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
			
 
				 					int *local);
			
 
				-int	xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+int	xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 			xfs_dablk_t bno, xfs_daddr_t mappedbno,
			
 
				 			struct xfs_buf **bpp);
			
 
				+void	xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
			
 
				+				     struct xfs_attr_leafblock *from);
			
 
				 
			
 
				-extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
			
 
				 
			
 
				 #endif	/* __XFS_ATTR_LEAF_H__ */
			
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -0,0 +1,541 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_types.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_inode_item.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_attr.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "xfs_attr_remote.h"
			
 
				+#include "xfs_trans_space.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+
			
 
				+#define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
			
 
				+
			
 
				+/*
			
 
				+ * Each contiguous block has a header, so it is not just a simple attribute
			
 
				+ * length to FSB conversion.
			
 
				+ */
			
 
				+static int
			
 
				+xfs_attr3_rmt_blocks(
			
 
				+	struct xfs_mount *mp,
			
 
				+	int		attrlen)
			
 
				+{
			
 
				+	int		buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
			
 
				+							 mp->m_sb.sb_blocksize);
			
 
				+	return (attrlen + buflen - 1) / buflen;
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+xfs_attr3_rmt_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return false;
			
 
				+	if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
			
 
				+		return false;
			
 
				+	if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
			
 
				+		return false;
			
 
				+	if (be32_to_cpu(rmt->rm_offset) +
			
 
				+				be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
			
 
				+		return false;
			
 
				+	if (rmt->rm_owner == 0)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_attr3_rmt_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			      XFS_ATTR3_RMT_CRC_OFF) ||
			
 
				+	    !xfs_attr3_rmt_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_attr3_rmt_write_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_attr3_rmt_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (bip) {
			
 
				+		struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
			
 
				+		rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	}
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 XFS_ATTR3_RMT_CRC_OFF);
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
			
 
				+	.verify_read = xfs_attr3_rmt_read_verify,
			
 
				+	.verify_write = xfs_attr3_rmt_write_verify,
			
 
				+};
			
 
				+
			
 
				+static int
			
 
				+xfs_attr3_rmt_hdr_set(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return 0;
			
 
				+
			
 
				+	rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
			
 
				+	rmt->rm_offset = cpu_to_be32(offset);
			
 
				+	rmt->rm_bytes = cpu_to_be32(size);
			
 
				+	uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
			
 
				+	rmt->rm_owner = cpu_to_be64(ino);
			
 
				+	rmt->rm_blkno = cpu_to_be64(bp->b_bn);
			
 
				+	bp->b_ops = &xfs_attr3_rmt_buf_ops;
			
 
				+
			
 
				+	return sizeof(struct xfs_attr3_rmt_hdr);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Checking of the remote attribute header is split into two parts. the verifier
			
 
				+ * does CRC, location and bounds checking, the unpacking function checks the
			
 
				+ * attribute parameters and owner.
			
 
				+ */
			
 
				+static bool
			
 
				+xfs_attr3_rmt_hdr_ok(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
			
 
				+
			
 
				+	if (offset != be32_to_cpu(rmt->rm_offset))
			
 
				+		return false;
			
 
				+	if (size != be32_to_cpu(rmt->rm_bytes))
			
 
				+		return false;
			
 
				+	if (ino != be64_to_cpu(rmt->rm_owner))
			
 
				+		return false;
			
 
				+
			
 
				+	/* ok */
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read the value associated with an attribute from the out-of-line buffer
			
 
				+ * that we stored it in.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr_rmtval_get(
			
 
				+	struct xfs_da_args	*args)
			
 
				+{
			
 
				+	struct xfs_bmbt_irec	map[ATTR_RMTVALUE_MAPSIZE];
			
 
				+	struct xfs_mount	*mp = args->dp->i_mount;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_daddr_t		dblkno;
			
 
				+	xfs_dablk_t		lblkno = args->rmtblkno;
			
 
				+	void			*dst = args->value;
			
 
				+	int			valuelen = args->valuelen;
			
 
				+	int			nmap;
			
 
				+	int			error;
			
 
				+	int			blkcnt;
			
 
				+	int			i;
			
 
				+	int			offset = 0;
			
 
				+
			
 
				+	trace_xfs_attr_rmtval_get(args);
			
 
				+
			
 
				+	ASSERT(!(args->flags & ATTR_KERNOVAL));
			
 
				+
			
 
				+	while (valuelen > 0) {
			
 
				+		nmap = ATTR_RMTVALUE_MAPSIZE;
			
 
				+		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
			
 
				+				       args->rmtblkcnt, map, &nmap,
			
 
				+				       XFS_BMAPI_ATTRFORK);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+		ASSERT(nmap >= 1);
			
 
				+
			
 
				+		for (i = 0; (i < nmap) && (valuelen > 0); i++) {
			
 
				+			int	byte_cnt;
			
 
				+			char	*src;
			
 
				+
			
 
				+			ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
			
 
				+			       (map[i].br_startblock != HOLESTARTBLOCK));
			
 
				+			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
			
 
				+			blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
			
 
				+			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
			
 
				+						   dblkno, blkcnt, 0, &bp,
			
 
				+						   &xfs_attr3_rmt_buf_ops);
			
 
				+			if (error)
			
 
				+				return error;
			
 
				+
			
 
				+			byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
			
 
				+			byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
			
 
				+
			
 
				+			src = bp->b_addr;
			
 
				+			if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+				if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
			
 
				+							offset, byte_cnt, bp)) {
			
 
				+					xfs_alert(mp,
			
 
				+"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
			
 
				+						offset, byte_cnt, args->dp->i_ino);
			
 
				+					xfs_buf_relse(bp);
			
 
				+					return EFSCORRUPTED;
			
 
				+
			
 
				+				}
			
 
				+
			
 
				+				src += sizeof(struct xfs_attr3_rmt_hdr);
			
 
				+			}
			
 
				+
			
 
				+			memcpy(dst, src, byte_cnt);
			
 
				+			xfs_buf_relse(bp);
			
 
				+
			
 
				+			offset += byte_cnt;
			
 
				+			dst += byte_cnt;
			
 
				+			valuelen -= byte_cnt;
			
 
				+
			
 
				+			lblkno += map[i].br_blockcount;
			
 
				+		}
			
 
				+	}
			
 
				+	ASSERT(valuelen == 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Write the value associated with an attribute into the out-of-line buffer
			
 
				+ * that we have defined for it.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr_rmtval_set(
			
 
				+	struct xfs_da_args	*args)
			
 
				+{
			
 
				+	struct xfs_inode	*dp = args->dp;
			
 
				+	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_bmbt_irec	map;
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_daddr_t		dblkno;
			
 
				+	xfs_dablk_t		lblkno;
			
 
				+	xfs_fileoff_t		lfileoff = 0;
			
 
				+	void			*src = args->value;
			
 
				+	int			blkcnt;
			
 
				+	int			valuelen;
			
 
				+	int			nmap;
			
 
				+	int			error;
			
 
				+	int			hdrcnt = 0;
			
 
				+	bool			crcs = xfs_sb_version_hascrc(&mp->m_sb);
			
 
				+	int			offset = 0;
			
 
				+
			
 
				+	trace_xfs_attr_rmtval_set(args);
			
 
				+
			
 
				+	/*
			
 
				+	 * Find a "hole" in the attribute address space large enough for
			
 
				+	 * us to drop the new attribute's value into. Because CRC enable
			
 
				+	 * attributes have headers, we can't just do a straight byte to FSB
			
 
				+	 * conversion. We calculate the worst case block count in this case
			
 
				+	 * and we may not need that many, so we have to handle this when
			
 
				+	 * allocating the blocks below. 
			
 
				+	 */
			
 
				+	if (!crcs)
			
 
				+		blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
			
 
				+	else
			
 
				+		blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
			
 
				+
			
 
				+	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
			
 
				+						   XFS_ATTR_FORK);
			
 
				+	if (error)
			
 
				+		return error;
			
 
				+
			
 
				+	/* Start with the attribute data. We'll allocate the rest afterwards. */
			
 
				+	if (crcs)
			
 
				+		blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
			
 
				+
			
 
				+	args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
			
 
				+	args->rmtblkcnt = blkcnt;
			
 
				+
			
 
				+	/*
			
 
				+	 * Roll through the "value", allocating blocks on disk as required.
			
 
				+	 */
			
 
				+	while (blkcnt > 0) {
			
 
				+		int	committed;
			
 
				+
			
 
				+		/*
			
 
				+		 * Allocate a single extent, up to the size of the value.
			
 
				+		 */
			
 
				+		xfs_bmap_init(args->flist, args->firstblock);
			
 
				+		nmap = 1;
			
 
				+		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
			
 
				+				  blkcnt,
			
 
				+				  XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
			
 
				+				  args->firstblock, args->total, &map, &nmap,
			
 
				+				  args->flist);
			
 
				+		if (!error) {
			
 
				+			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				+						&committed);
			
 
				+		}
			
 
				+		if (error) {
			
 
				+			ASSERT(committed);
			
 
				+			args->trans = NULL;
			
 
				+			xfs_bmap_cancel(args->flist);
			
 
				+			return(error);
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * bmap_finish() may have committed the last trans and started
			
 
				+		 * a new one.  We need the inode to be in all transactions.
			
 
				+		 */
			
 
				+		if (committed)
			
 
				+			xfs_trans_ijoin(args->trans, dp, 0);
			
 
				+
			
 
				+		ASSERT(nmap == 1);
			
 
				+		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				+		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				+		lblkno += map.br_blockcount;
			
 
				+		blkcnt -= map.br_blockcount;
			
 
				+		hdrcnt++;
			
 
				+
			
 
				+		/*
			
 
				+		 * If we have enough blocks for the attribute data, calculate
			
 
				+		 * how many extra blocks we need for headers. We might run
			
 
				+		 * through this multiple times in the case that the additional
			
 
				+		 * headers in the blocks needed for the data fragments spills
			
 
				+		 * into requiring more blocks. e.g. for 512 byte blocks, we'll
			
 
				+		 * spill for another block every 9 headers we require in this
			
 
				+		 * loop.
			
 
				+		 */
			
 
				+		if (crcs && blkcnt == 0) {
			
 
				+			int total_len;
			
 
				+
			
 
				+			total_len = args->valuelen +
			
 
				+				    hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
			
 
				+			blkcnt = XFS_B_TO_FSB(mp, total_len);
			
 
				+			blkcnt -= args->rmtblkcnt;
			
 
				+			args->rmtblkcnt += blkcnt;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Start the next trans in the chain.
			
 
				+		 */
			
 
				+		error = xfs_trans_roll(&args->trans, dp);
			
 
				+		if (error)
			
 
				+			return (error);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Roll through the "value", copying the attribute value to the
			
 
				+	 * already-allocated blocks.  Blocks are written synchronously
			
 
				+	 * so that we can know they are all on disk before we turn off
			
 
				+	 * the INCOMPLETE flag.
			
 
				+	 */
			
 
				+	lblkno = args->rmtblkno;
			
 
				+	valuelen = args->valuelen;
			
 
				+	while (valuelen > 0) {
			
 
				+		int	byte_cnt;
			
 
				+		char	*buf;
			
 
				+
			
 
				+		/*
			
 
				+		 * Try to remember where we decided to put the value.
			
 
				+		 */
			
 
				+		xfs_bmap_init(args->flist, args->firstblock);
			
 
				+		nmap = 1;
			
 
				+		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
			
 
				+				       args->rmtblkcnt, &map, &nmap,
			
 
				+				       XFS_BMAPI_ATTRFORK);
			
 
				+		if (error)
			
 
				+			return(error);
			
 
				+		ASSERT(nmap == 1);
			
 
				+		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				+		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				+
			
 
				+		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				+		blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
			
 
				+
			
 
				+		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
			
 
				+		if (!bp)
			
 
				+			return ENOMEM;
			
 
				+		bp->b_ops = &xfs_attr3_rmt_buf_ops;
			
 
				+
			
 
				+		byte_cnt = BBTOB(bp->b_length);
			
 
				+		byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
			
 
				+		if (valuelen < byte_cnt)
			
 
				+			byte_cnt = valuelen;
			
 
				+
			
 
				+		buf = bp->b_addr;
			
 
				+		buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
			
 
				+					     byte_cnt, bp);
			
 
				+		memcpy(buf, src, byte_cnt);
			
 
				+
			
 
				+		if (byte_cnt < BBTOB(bp->b_length))
			
 
				+			xfs_buf_zero(bp, byte_cnt,
			
 
				+				     BBTOB(bp->b_length) - byte_cnt);
			
 
				+
			
 
				+		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
			
 
				+		xfs_buf_relse(bp);
			
 
				+		if (error)
			
 
				+			return error;
			
 
				+
			
 
				+		src += byte_cnt;
			
 
				+		valuelen -= byte_cnt;
			
 
				+		offset += byte_cnt;
			
 
				+		hdrcnt--;
			
 
				+
			
 
				+		lblkno += map.br_blockcount;
			
 
				+	}
			
 
				+	ASSERT(valuelen == 0);
			
 
				+	ASSERT(hdrcnt == 0);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Remove the value associated with an attribute by deleting the
			
 
				+ * out-of-line buffer that it is stored on.
			
 
				+ */
			
 
				+int
			
 
				+xfs_attr_rmtval_remove(xfs_da_args_t *args)
			
 
				+{
			
 
				+	xfs_mount_t *mp;
			
 
				+	xfs_bmbt_irec_t map;
			
 
				+	xfs_buf_t *bp;
			
 
				+	xfs_daddr_t dblkno;
			
 
				+	xfs_dablk_t lblkno;
			
 
				+	int valuelen, blkcnt, nmap, error, done, committed;
			
 
				+
			
 
				+	trace_xfs_attr_rmtval_remove(args);
			
 
				+
			
 
				+	mp = args->dp->i_mount;
			
 
				+
			
 
				+	/*
			
 
				+	 * Roll through the "value", invalidating the attribute value's
			
 
				+	 * blocks.
			
 
				+	 */
			
 
				+	lblkno = args->rmtblkno;
			
 
				+	valuelen = args->rmtblkcnt;
			
 
				+	while (valuelen > 0) {
			
 
				+		/*
			
 
				+		 * Try to remember where we decided to put the value.
			
 
				+		 */
			
 
				+		nmap = 1;
			
 
				+		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
			
 
				+				       args->rmtblkcnt, &map, &nmap,
			
 
				+				       XFS_BMAPI_ATTRFORK);
			
 
				+		if (error)
			
 
				+			return(error);
			
 
				+		ASSERT(nmap == 1);
			
 
				+		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
			
 
				+		       (map.br_startblock != HOLESTARTBLOCK));
			
 
				+
			
 
				+		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
			
 
				+		blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
			
 
				+
			
 
				+		/*
			
 
				+		 * If the "remote" value is in the cache, remove it.
			
 
				+		 */
			
 
				+		bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
			
 
				+		if (bp) {
			
 
				+			xfs_buf_stale(bp);
			
 
				+			xfs_buf_relse(bp);
			
 
				+			bp = NULL;
			
 
				+		}
			
 
				+
			
 
				+		valuelen -= map.br_blockcount;
			
 
				+
			
 
				+		lblkno += map.br_blockcount;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Keep de-allocating extents until the remote-value region is gone.
			
 
				+	 */
			
 
				+	lblkno = args->rmtblkno;
			
 
				+	blkcnt = args->rmtblkcnt;
			
 
				+	done = 0;
			
 
				+	while (!done) {
			
 
				+		xfs_bmap_init(args->flist, args->firstblock);
			
 
				+		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
			
 
				+				    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
			
 
				+				    1, args->firstblock, args->flist,
			
 
				+				    &done);
			
 
				+		if (!error) {
			
 
				+			error = xfs_bmap_finish(&args->trans, args->flist,
			
 
				+						&committed);
			
 
				+		}
			
 
				+		if (error) {
			
 
				+			ASSERT(committed);
			
 
				+			args->trans = NULL;
			
 
				+			xfs_bmap_cancel(args->flist);
			
 
				+			return error;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * bmap_finish() may have committed the last trans and started
			
 
				+		 * a new one.  We need the inode to be in all transactions.
			
 
				+		 */
			
 
				+		if (committed)
			
 
				+			xfs_trans_ijoin(args->trans, args->dp, 0);
			
 
				+
			
 
				+		/*
			
 
				+		 * Close out trans and start the next one in the chain.
			
 
				+		 */
			
 
				+		error = xfs_trans_roll(&args->trans, args->dp);
			
 
				+		if (error)
			
 
				+			return (error);
			
 
				+	}
			
 
				+	return(0);
			
 
				+}
			
 
				+
			
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -0,0 +1,46 @@
 
				+/*
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				+ * All Rights Reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_ATTR_REMOTE_H__
			
 
				+#define	__XFS_ATTR_REMOTE_H__
			
 
				+
			
 
				+#define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */
			
 
				+
			
 
				+struct xfs_attr3_rmt_hdr {
			
 
				+	__be32	rm_magic;
			
 
				+	__be32	rm_offset;
			
 
				+	__be32	rm_bytes;
			
 
				+	__be32	rm_crc;
			
 
				+	uuid_t	rm_uuid;
			
 
				+	__be64	rm_owner;
			
 
				+	__be64	rm_blkno;
			
 
				+	__be64	rm_lsn;
			
 
				+};
			
 
				+
			
 
				+#define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
			
 
				+
			
 
				+#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\
			
 
				+	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
			
 
				+			sizeof(struct xfs_attr3_rmt_hdr) : 0))
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
			
 
				+
			
 
				+int xfs_attr_rmtval_get(struct xfs_da_args *args);
			
 
				+int xfs_attr_rmtval_set(struct xfs_da_args *args);
			
 
				+int xfs_attr_rmtval_remove(struct xfs_da_args *args);
			
 
				+
			
 
				+#endif /* __XFS_ATTR_REMOTE_H__ */
			
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,6 +37,7 @@
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 /*
			
 
				  * Determine the extent state.
			
@@ -59,24 +60,31 @@ xfs_extent_state(
 
				  */
			
 
				 void
			
 
				 xfs_bmdr_to_bmbt(
			
 
				-	struct xfs_mount	*mp,
			
 
				+	struct xfs_inode	*ip,
			
 
				 	xfs_bmdr_block_t	*dblock,
			
 
				 	int			dblocklen,
			
 
				 	struct xfs_btree_block	*rblock,
			
 
				 	int			rblocklen)
			
 
				 {
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	int			dmxr;
			
 
				 	xfs_bmbt_key_t		*fkp;
			
 
				 	__be64			*fpp;
			
 
				 	xfs_bmbt_key_t		*tkp;
			
 
				 	__be64			*tpp;
			
 
				 
			
 
				-	rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
			
 
				+				 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
			
 
				+				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
			
 
				+	else
			
 
				+		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
			
 
				+				 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
			
 
				+				 XFS_BTREE_LONG_PTRS);
			
 
				+
			
 
				 	rblock->bb_level = dblock->bb_level;
			
 
				 	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
			
 
				 	rblock->bb_numrecs = dblock->bb_numrecs;
			
 
				-	rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
			
 
				-	rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
			
 
				 	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
			
 
				 	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
			
 
				 	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
			
@@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(
 
				 	xfs_bmbt_key_t		*tkp;
			
 
				 	__be64			*tpp;
			
 
				 
			
 
				-	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
			
 
				+		ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
			
 
				+		ASSERT(rblock->bb_u.l.bb_blkno ==
			
 
				+		       cpu_to_be64(XFS_BUF_DADDR_NULL));
			
 
				+	} else
			
 
				+		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
			
 
				 	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
			
 
				 	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
			
 
				 	ASSERT(rblock->bb_level != 0);
			
@@ -708,59 +722,89 @@ xfs_bmbt_key_diff(
 
				 				      cur->bc_rec.b.br_startoff;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+static int
			
 
				 xfs_bmbt_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				 	unsigned int		level;
			
 
				-	int			lblock_ok; /* block passes checks */
			
 
				 
			
 
				-	/* magic number and level verification.
			
 
				+	switch (block->bb_magic) {
			
 
				+	case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
			
 
				+			return false;
			
 
				+		/*
			
 
				+		 * XXX: need a better way of verifying the owner here. Right now
			
 
				+		 * just make sure there has been one set.
			
 
				+		 */
			
 
				+		if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
			
 
				+			return false;
			
 
				+		/* fall through */
			
 
				+	case cpu_to_be32(XFS_BMAP_MAGIC):
			
 
				+		break;
			
 
				+	default:
			
 
				+		return false;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * numrecs and level verification.
			
 
				 	 *
			
 
				-	 * We don't know waht fork we belong to, so just verify that the level
			
 
				+	 * We don't know what fork we belong to, so just verify that the level
			
 
				 	 * is less than the maximum of the two. Later checks will be more
			
 
				 	 * precise.
			
 
				 	 */
			
 
				 	level = be16_to_cpu(block->bb_level);
			
 
				-	lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) &&
			
 
				-		    level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]);
			
 
				-
			
 
				-	/* numrecs verification */
			
 
				-	lblock_ok = lblock_ok &&
			
 
				-		be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
			
 
				+	if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
			
 
				+		return false;
			
 
				+	if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
			
 
				+		return false;
			
 
				 
			
 
				 	/* sibling pointer verification */
			
 
				-	lblock_ok = lblock_ok &&
			
 
				-		block->bb_u.l.bb_leftsib &&
			
 
				-		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
			
 
				-		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
			
 
				-		block->bb_u.l.bb_rightsib &&
			
 
				-		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
			
 
				-		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-			be64_to_cpu(block->bb_u.l.bb_rightsib)));
			
 
				-
			
 
				-	if (!lblock_ok) {
			
 
				-		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				+	if (!block->bb_u.l.bb_leftsib ||
			
 
				+	    (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
			
 
				+	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
			
 
				+		return false;
			
 
				+	if (!block->bb_u.l.bb_rightsib ||
			
 
				+	    (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
			
 
				+	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_bmbt_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_bmbt_verify(bp);
			
 
				+	if (!(xfs_btree_lblock_verify_crc(bp) &&
			
 
				+	      xfs_bmbt_verify(bp))) {
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_bmbt_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_bmbt_verify(bp);
			
 
				+	if (!xfs_bmbt_verify(bp)) {
			
 
				+		xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+	xfs_btree_lblock_calc_crc(bp);
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_bmbt_buf_ops = {
			
@@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(
 
				 
			
 
				 	cur->bc_ops = &xfs_bmbt_ops;
			
 
				 	cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
			
 
				 
			
 
				 	cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
			
 
				 	cur->bc_private.b.ip = ip;
			
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -18,7 +18,8 @@
 
				 #ifndef __XFS_BMAP_BTREE_H__
			
 
				 #define __XFS_BMAP_BTREE_H__
			
 
				 
			
 
				-#define XFS_BMAP_MAGIC	0x424d4150	/* 'BMAP' */
			
 
				+#define XFS_BMAP_MAGIC		0x424d4150	/* 'BMAP' */
			
 
				+#define XFS_BMAP_CRC_MAGIC	0x424d4133	/* 'BMA3' */
			
 
				 
			
 
				 struct xfs_btree_cur;
			
 
				 struct xfs_btree_block;
			
@@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
 
				 
			
 
				 /*
			
 
				  * Btree block header size depends on a superblock flag.
			
 
				- *
			
 
				- * (not quite yet, but soon)
			
 
				  */
			
 
				-#define XFS_BMBT_BLOCK_LEN(mp)	XFS_BTREE_LBLOCK_LEN
			
 
				+#define XFS_BMBT_BLOCK_LEN(mp) \
			
 
				+	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
			
 
				+		XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
			
 
				 
			
 
				 #define XFS_BMBT_REC_ADDR(mp, block, index) \
			
 
				 	((xfs_bmbt_rec_t *) \
			
@@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
 
				 #define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
			
 
				 	XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
			
 
				 
			
 
				-#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
			
 
				-	(int)(XFS_BTREE_LBLOCK_LEN + \
			
 
				+#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
			
 
				+	(int)(XFS_BMBT_BLOCK_LEN(mp) + \
			
 
				 	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
			
 
				 
			
 
				-#define XFS_BMAP_BROOT_SPACE(bb) \
			
 
				-	(XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
			
 
				+#define XFS_BMAP_BROOT_SPACE(mp, bb) \
			
 
				+	(XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
			
 
				 #define XFS_BMDR_SPACE_CALC(nrecs) \
			
 
				 	(int)(sizeof(xfs_bmdr_block_t) + \
			
 
				 	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
			
@@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
 
				 /*
			
 
				  * Prototypes for xfs_bmap.c to call.
			
 
				  */
			
 
				-extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
			
 
				+extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
			
 
				 			struct xfs_btree_block *, int);
			
 
				 extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
			
 
				 extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
			
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -30,9 +30,11 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 /*
			
 
				  * Cursor allocation zone.
			
@@ -42,9 +44,13 @@ kmem_zone_t	*xfs_btree_cur_zone;
 
				 /*
			
 
				  * Btree magic numbers.
			
 
				  */
			
 
				-const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
			
 
				-	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
			
 
				+static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
			
 
				+	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
			
 
				+	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
			
 
				+	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
			
 
				 };
			
 
				+#define xfs_btree_magic(cur) \
			
 
				+	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
			
 
				 
			
 
				 
			
 
				 STATIC int				/* error (0 or EFSCORRUPTED) */
			
@@ -54,30 +60,38 @@ xfs_btree_check_lblock(
 
				 	int			level,	/* level of the btree block */
			
 
				 	struct xfs_buf		*bp)	/* buffer for block, if any */
			
 
				 {
			
 
				-	int			lblock_ok; /* block passes checks */
			
 
				+	int			lblock_ok = 1; /* block passes checks */
			
 
				 	struct xfs_mount	*mp;	/* file system mount point */
			
 
				 
			
 
				 	mp = cur->bc_mp;
			
 
				-	lblock_ok =
			
 
				-		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		lblock_ok = lblock_ok &&
			
 
				+			uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
			
 
				+			block->bb_u.l.bb_blkno == cpu_to_be64(
			
 
				+				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
			
 
				+	}
			
 
				+
			
 
				+	lblock_ok = lblock_ok &&
			
 
				+		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
			
 
				 		be16_to_cpu(block->bb_level) == level &&
			
 
				 		be16_to_cpu(block->bb_numrecs) <=
			
 
				 			cur->bc_ops->get_maxrecs(cur, level) &&
			
 
				 		block->bb_u.l.bb_leftsib &&
			
 
				 		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
			
 
				 		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
			
 
				+			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
			
 
				 		block->bb_u.l.bb_rightsib &&
			
 
				 		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
			
 
				 		 XFS_FSB_SANITY_CHECK(mp,
			
 
				-		 	be64_to_cpu(block->bb_u.l.bb_rightsib)));
			
 
				+			be64_to_cpu(block->bb_u.l.bb_rightsib)));
			
 
				+
			
 
				 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
			
 
				 			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
			
 
				 			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
			
 
				 		if (bp)
			
 
				 			trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				-		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
			
 
				-				 mp);
			
 
				+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
			
 
				 		return XFS_ERROR(EFSCORRUPTED);
			
 
				 	}
			
 
				 	return 0;
			
@@ -90,16 +104,26 @@ xfs_btree_check_sblock(
 
				 	int			level,	/* level of the btree block */
			
 
				 	struct xfs_buf		*bp)	/* buffer containing block */
			
 
				 {
			
 
				+	struct xfs_mount	*mp;	/* file system mount point */
			
 
				 	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */
			
 
				 	struct xfs_agf		*agf;	/* ag. freespace structure */
			
 
				 	xfs_agblock_t		agflen;	/* native ag. freespace length */
			
 
				-	int			sblock_ok; /* block passes checks */
			
 
				+	int			sblock_ok = 1; /* block passes checks */
			
 
				 
			
 
				+	mp = cur->bc_mp;
			
 
				 	agbp = cur->bc_private.a.agbp;
			
 
				 	agf = XFS_BUF_TO_AGF(agbp);
			
 
				 	agflen = be32_to_cpu(agf->agf_length);
			
 
				-	sblock_ok =
			
 
				-		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		sblock_ok = sblock_ok &&
			
 
				+			uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
			
 
				+			block->bb_u.s.bb_blkno == cpu_to_be64(
			
 
				+				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
			
 
				+	}
			
 
				+
			
 
				+	sblock_ok = sblock_ok &&
			
 
				+		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
			
 
				 		be16_to_cpu(block->bb_level) == level &&
			
 
				 		be16_to_cpu(block->bb_numrecs) <=
			
 
				 			cur->bc_ops->get_maxrecs(cur, level) &&
			
@@ -109,13 +133,13 @@ xfs_btree_check_sblock(
 
				 		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				 		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
			
 
				 		block->bb_u.s.bb_rightsib;
			
 
				-	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
			
 
				+
			
 
				+	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
			
 
				 			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
			
 
				 			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
			
 
				 		if (bp)
			
 
				 			trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				-		XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
			
 
				-			XFS_ERRLEVEL_LOW, cur->bc_mp, block);
			
 
				+		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
			
 
				 		return XFS_ERROR(EFSCORRUPTED);
			
 
				 	}
			
 
				 	return 0;
			
@@ -193,6 +217,72 @@ xfs_btree_check_ptr(
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * Calculate CRC on the whole btree block and stuff it into the
			
 
				+ * long-form btree header.
			
 
				+ *
			
 
				+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
			
 
				+ * it into the buffer so recovery knows what the last modifcation was that made
			
 
				+ * it to disk.
			
 
				+ */
			
 
				+void
			
 
				+xfs_btree_lblock_calc_crc(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
			
 
				+		return;
			
 
				+	if (bip)
			
 
				+		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 XFS_BTREE_LBLOCK_CRC_OFF);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+xfs_btree_lblock_verify_crc(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
			
 
				+		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					XFS_BTREE_LBLOCK_CRC_OFF);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Calculate CRC on the whole btree block and stuff it into the
			
 
				+ * short-form btree header.
			
 
				+ *
			
 
				+ * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
			
 
				+ * it into the buffer so recovery knows what the last modifcation was that made
			
 
				+ * it to disk.
			
 
				+ */
			
 
				+void
			
 
				+xfs_btree_sblock_calc_crc(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
			
 
				+		return;
			
 
				+	if (bip)
			
 
				+		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 XFS_BTREE_SBLOCK_CRC_OFF);
			
 
				+}
			
 
				+
			
 
				+bool
			
 
				+xfs_btree_sblock_verify_crc(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
			
 
				+		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					XFS_BTREE_SBLOCK_CRC_OFF);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Delete the btree cursor.
			
 
				  */
			
@@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
 
				 				*ncur = NULL;
			
 
				 				return error;
			
 
				 			}
			
 
				-			new->bc_bufs[i] = bp;
			
 
				-			ASSERT(!xfs_buf_geterror(bp));
			
 
				-		} else
			
 
				-			new->bc_bufs[i] = NULL;
			
 
				+		}
			
 
				+		new->bc_bufs[i] = bp;
			
 
				 	}
			
 
				 	*ncur = new;
			
 
				 	return 0;
			
@@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
 
				  */
			
 
				 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
			
 
				 {
			
 
				-	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
			
 
				-		XFS_BTREE_LBLOCK_LEN :
			
 
				-		XFS_BTREE_SBLOCK_LEN;
			
 
				+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
			
 
				+		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
			
 
				+			return XFS_BTREE_LBLOCK_CRC_LEN;
			
 
				+		return XFS_BTREE_LBLOCK_LEN;
			
 
				+	}
			
 
				+	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
			
 
				+		return XFS_BTREE_SBLOCK_CRC_LEN;
			
 
				+	return XFS_BTREE_SBLOCK_LEN;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -862,6 +955,44 @@ xfs_btree_set_sibling(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void
			
 
				+xfs_btree_init_block_int(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_btree_block	*buf,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	__u32			magic,
			
 
				+	__u16			level,
			
 
				+	__u16			numrecs,
			
 
				+	__u64			owner,
			
 
				+	unsigned int		flags)
			
 
				+{
			
 
				+	buf->bb_magic = cpu_to_be32(magic);
			
 
				+	buf->bb_level = cpu_to_be16(level);
			
 
				+	buf->bb_numrecs = cpu_to_be16(numrecs);
			
 
				+
			
 
				+	if (flags & XFS_BTREE_LONG_PTRS) {
			
 
				+		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
			
 
				+		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
			
 
				+		if (flags & XFS_BTREE_CRC_BLOCKS) {
			
 
				+			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
			
 
				+			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
			
 
				+			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
			
 
				+			buf->bb_u.l.bb_pad = 0;
			
 
				+		}
			
 
				+	} else {
			
 
				+		/* owner is a 32 bit value on short blocks */
			
 
				+		__u32 __owner = (__u32)owner;
			
 
				+
			
 
				+		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
			
 
				+		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
			
 
				+		if (flags & XFS_BTREE_CRC_BLOCKS) {
			
 
				+			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
			
 
				+			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
			
 
				+			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void
			
 
				 xfs_btree_init_block(
			
 
				 	struct xfs_mount *mp,
			
@@ -869,37 +1000,41 @@ xfs_btree_init_block(
 
				 	__u32		magic,
			
 
				 	__u16		level,
			
 
				 	__u16		numrecs,
			
 
				+	__u64		owner,
			
 
				 	unsigned int	flags)
			
 
				 {
			
 
				-	struct xfs_btree_block	*new = XFS_BUF_TO_BLOCK(bp);
			
 
				-
			
 
				-	new->bb_magic = cpu_to_be32(magic);
			
 
				-	new->bb_level = cpu_to_be16(level);
			
 
				-	new->bb_numrecs = cpu_to_be16(numrecs);
			
 
				-
			
 
				-	if (flags & XFS_BTREE_LONG_PTRS) {
			
 
				-		new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
			
 
				-		new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
			
 
				-	} else {
			
 
				-		new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
			
 
				-		new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
			
 
				-	}
			
 
				+	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
			
 
				+				 magic, level, numrecs, owner, flags);
			
 
				 }
			
 
				 
			
 
				 STATIC void
			
 
				 xfs_btree_init_block_cur(
			
 
				 	struct xfs_btree_cur	*cur,
			
 
				+	struct xfs_buf		*bp,
			
 
				 	int			level,
			
 
				-	int			numrecs,
			
 
				-	struct xfs_buf		*bp)
			
 
				+	int			numrecs)
			
 
				 {
			
 
				-	xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
			
 
				-			       level, numrecs, cur->bc_flags);
			
 
				+	__u64 owner;
			
 
				+
			
 
				+	/*
			
 
				+	 * we can pull the owner from the cursor right now as the different
			
 
				+	 * owners align directly with the pointer size of the btree. This may
			
 
				+	 * change in future, but is safe for current users of the generic btree
			
 
				+	 * code.
			
 
				+	 */
			
 
				+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
			
 
				+		owner = cur->bc_private.b.ip->i_ino;
			
 
				+	else
			
 
				+		owner = cur->bc_private.a.agno;
			
 
				+
			
 
				+	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
			
 
				+				 xfs_btree_magic(cur), level, numrecs,
			
 
				+				 owner, cur->bc_flags);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Return true if ptr is the last record in the btree and
			
 
				- * we need to track updateѕ to this record.  The decision
			
 
				+ * we need to track updates to this record.  The decision
			
 
				  * will be further refined in the update_lastrec method.
			
 
				  */
			
 
				 STATIC int
			
@@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
 
				 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
			
 
				 
			
 
				 	if (bp) {
			
 
				+		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
			
 
				 		xfs_trans_log_buf(cur->bc_tp, bp,
			
 
				 				  xfs_btree_key_offset(cur, first),
			
 
				 				  xfs_btree_key_offset(cur, last + 1) - 1);
			
@@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
 
				 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
			
 
				 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
			
 
				 
			
 
				+	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
			
 
				 	xfs_trans_log_buf(cur->bc_tp, bp,
			
 
				 			  xfs_btree_rec_offset(cur, first),
			
 
				 			  xfs_btree_rec_offset(cur, last + 1) - 1);
			
@@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
 
				 		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				 		int			level = xfs_btree_get_level(block);
			
 
				 
			
 
				+		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
			
 
				 		xfs_trans_log_buf(cur->bc_tp, bp,
			
 
				 				xfs_btree_ptr_offset(cur, first, level),
			
 
				 				xfs_btree_ptr_offset(cur, last + 1, level) - 1);
			
@@ -1223,7 +1361,12 @@ xfs_btree_log_block(
 
				 		offsetof(struct xfs_btree_block, bb_numrecs),
			
 
				 		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
			
 
				 		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
			
 
				-		XFS_BTREE_SBLOCK_LEN
			
 
				+		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
			
 
				+		XFS_BTREE_SBLOCK_CRC_LEN
			
 
				 	};
			
 
				 	static const short	loffsets[] = {	/* table of offsets (long) */
			
 
				 		offsetof(struct xfs_btree_block, bb_magic),
			
@@ -1231,17 +1374,40 @@ xfs_btree_log_block(
 
				 		offsetof(struct xfs_btree_block, bb_numrecs),
			
 
				 		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
			
 
				 		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
			
 
				-		XFS_BTREE_LBLOCK_LEN
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
			
 
				+		offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
			
 
				+		XFS_BTREE_LBLOCK_CRC_LEN
			
 
				 	};
			
 
				 
			
 
				 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
			
 
				 	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
			
 
				 
			
 
				 	if (bp) {
			
 
				+		int nbits;
			
 
				+
			
 
				+		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
			
 
				+			/*
			
 
				+			 * We don't log the CRC when updating a btree
			
 
				+			 * block but instead recreate it during log
			
 
				+			 * recovery.  As the log buffers have checksums
			
 
				+			 * of their own this is safe and avoids logging a crc
			
 
				+			 * update in a lot of places.
			
 
				+			 */
			
 
				+			if (fields == XFS_BB_ALL_BITS)
			
 
				+				fields = XFS_BB_ALL_BITS_CRC;
			
 
				+			nbits = XFS_BB_NUM_BITS_CRC;
			
 
				+		} else {
			
 
				+			nbits = XFS_BB_NUM_BITS;
			
 
				+		}
			
 
				 		xfs_btree_offsets(fields,
			
 
				 				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
			
 
				 					loffsets : soffsets,
			
 
				-				  XFS_BB_NUM_BITS, &first, &last);
			
 
				+				  nbits, &first, &last);
			
 
				+		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
			
 
				 		xfs_trans_log_buf(cur->bc_tp, bp, first, last);
			
 
				 	} else {
			
 
				 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
			
@@ -2204,7 +2370,7 @@ xfs_btree_split(
 
				 		goto error0;
			
 
				 
			
 
				 	/* Fill in the btree header for the new right block. */
			
 
				-	xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
			
 
				+	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
			
 
				 
			
 
				 	/*
			
 
				 	 * Split the entries between the old and the new block evenly.
			
@@ -2513,7 +2679,7 @@ xfs_btree_new_root(
 
				 		nptr = 2;
			
 
				 	}
			
 
				 	/* Fill in the new block's btree header and log it. */
			
 
				-	xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
			
 
				+	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
			
 
				 	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
			
 
				 	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
			
 
				 			!xfs_btree_ptr_is_null(cur, &rptr));
			
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -42,11 +42,15 @@ extern kmem_zone_t	*xfs_btree_cur_zone;
 
				  * Generic btree header.
			
 
				  *
			
 
				  * This is a combination of the actual format used on disk for short and long
			
 
				- * format btrees.  The first three fields are shared by both format, but
			
 
				- * the pointers are different and should be used with care.
			
 
				+ * format btrees.  The first three fields are shared by both format, but the
			
 
				+ * pointers are different and should be used with care.
			
 
				  *
			
 
				- * To get the size of the actual short or long form headers please use
			
 
				- * the size macros below.  Never use sizeof(xfs_btree_block).
			
 
				+ * To get the size of the actual short or long form headers please use the size
			
 
				+ * macros below.  Never use sizeof(xfs_btree_block).
			
 
				+ *
			
 
				+ * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
			
 
				+ * with the crc feature bit, and all accesses to them must be conditional on
			
 
				+ * that flag.
			
 
				  */
			
 
				 struct xfs_btree_block {
			
 
				 	__be32		bb_magic;	/* magic number for block type */
			
@@ -56,10 +60,23 @@ struct xfs_btree_block {
 
				 		struct {
			
 
				 			__be32		bb_leftsib;
			
 
				 			__be32		bb_rightsib;
			
 
				+
			
 
				+			__be64		bb_blkno;
			
 
				+			__be64		bb_lsn;
			
 
				+			uuid_t		bb_uuid;
			
 
				+			__be32		bb_owner;
			
 
				+			__le32		bb_crc;
			
 
				 		} s;			/* short form pointers */
			
 
				 		struct	{
			
 
				 			__be64		bb_leftsib;
			
 
				 			__be64		bb_rightsib;
			
 
				+
			
 
				+			__be64		bb_blkno;
			
 
				+			__be64		bb_lsn;
			
 
				+			uuid_t		bb_uuid;
			
 
				+			__be64		bb_owner;
			
 
				+			__le32		bb_crc;
			
 
				+			__be32		bb_pad; /* padding for alignment */
			
 
				 		} l;			/* long form pointers */
			
 
				 	} bb_u;				/* rest */
			
 
				 };
			
@@ -67,6 +84,16 @@ struct xfs_btree_block {
 
				 #define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */
			
 
				 #define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */
			
 
				 
			
 
				+/* sizes of CRC enabled btree blocks */
			
 
				+#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40)
			
 
				+#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48)
			
 
				+
			
 
				+
			
 
				+#define XFS_BTREE_SBLOCK_CRC_OFF \
			
 
				+	offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
			
 
				+#define XFS_BTREE_LBLOCK_CRC_OFF \
			
 
				+	offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * Generic key, ptr and record wrapper structures.
			
@@ -101,13 +128,11 @@ union xfs_btree_rec {
 
				 #define	XFS_BB_NUMRECS		0x04
			
 
				 #define	XFS_BB_LEFTSIB		0x08
			
 
				 #define	XFS_BB_RIGHTSIB		0x10
			
 
				+#define	XFS_BB_BLKNO		0x20
			
 
				 #define	XFS_BB_NUM_BITS		5
			
 
				 #define	XFS_BB_ALL_BITS		((1 << XFS_BB_NUM_BITS) - 1)
			
 
				-
			
 
				-/*
			
 
				- * Magic numbers for btree blocks.
			
 
				- */
			
 
				-extern const __uint32_t	xfs_magics[];
			
 
				+#define	XFS_BB_NUM_BITS_CRC	8
			
 
				+#define	XFS_BB_ALL_BITS_CRC	((1 << XFS_BB_NUM_BITS_CRC) - 1)
			
 
				 
			
 
				 /*
			
 
				  * Generic stats interface
			
@@ -256,6 +281,7 @@ typedef struct xfs_btree_cur
 
				 #define XFS_BTREE_LONG_PTRS		(1<<0)	/* pointers are 64bits long */
			
 
				 #define XFS_BTREE_ROOT_IN_INODE		(1<<1)	/* root may be variable size */
			
 
				 #define XFS_BTREE_LASTREC_UPDATE	(1<<2)	/* track last rec externally */
			
 
				+#define XFS_BTREE_CRC_BLOCKS		(1<<3)	/* uses extended btree blocks */
			
 
				 
			
 
				 
			
 
				 #define	XFS_BTREE_NOERROR	0
			
@@ -393,8 +419,20 @@ xfs_btree_init_block(
 
				 	__u32		magic,
			
 
				 	__u16		level,
			
 
				 	__u16		numrecs,
			
 
				+	__u64		owner,
			
 
				 	unsigned int	flags);
			
 
				 
			
 
				+void
			
 
				+xfs_btree_init_block_int(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_btree_block	*buf,
			
 
				+	xfs_daddr_t		blkno,
			
 
				+	__u32			magic,
			
 
				+	__u16			level,
			
 
				+	__u16			numrecs,
			
 
				+	__u64			owner,
			
 
				+	unsigned int		flags);
			
 
				+
			
 
				 /*
			
 
				  * Common btree core entry points.
			
 
				  */
			
@@ -407,6 +445,14 @@ int xfs_btree_insert(struct xfs_btree_cur *, int *);
 
				 int xfs_btree_delete(struct xfs_btree_cur *, int *);
			
 
				 int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
			
 
				 
			
 
				+/*
			
 
				+ * btree block CRC helpers
			
 
				+ */
			
 
				+void xfs_btree_lblock_calc_crc(struct xfs_buf *);
			
 
				+bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
			
 
				+void xfs_btree_sblock_calc_crc(struct xfs_buf *);
			
 
				+bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
			
 
				+
			
 
				 /*
			
 
				  * Internal btree helpers also used by xfs_bmap.c.
			
 
				  */
			
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1022,7 +1022,9 @@ xfs_buf_iodone_work(
 
				 	bool			read = !!(bp->b_flags & XBF_READ);
			
 
				 
			
 
				 	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
			
 
				-	if (read && bp->b_ops)
			
 
				+
			
 
				+	/* only validate buffers that were read without errors */
			
 
				+	if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
			
 
				 		bp->b_ops->verify_read(bp);
			
 
				 
			
 
				 	if (bp->b_iodone)
			
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -24,19 +24,20 @@ extern kmem_zone_t	*xfs_buf_item_zone;
 
				  * This flag indicates that the buffer contains on disk inodes
			
 
				  * and requires special recovery handling.
			
 
				  */
			
 
				-#define	XFS_BLF_INODE_BUF	0x1
			
 
				+#define	XFS_BLF_INODE_BUF	(1<<0)
			
 
				 /*
			
 
				  * This flag indicates that the buffer should not be replayed
			
 
				  * during recovery because its blocks are being freed.
			
 
				  */
			
 
				-#define	XFS_BLF_CANCEL		0x2
			
 
				+#define	XFS_BLF_CANCEL		(1<<1)
			
 
				+
			
 
				 /*
			
 
				  * This flag indicates that the buffer contains on disk
			
 
				  * user or group dquots and may require special recovery handling.
			
 
				  */
			
 
				-#define	XFS_BLF_UDQUOT_BUF	0x4
			
 
				-#define XFS_BLF_PDQUOT_BUF	0x8
			
 
				-#define	XFS_BLF_GDQUOT_BUF	0x10
			
 
				+#define	XFS_BLF_UDQUOT_BUF	(1<<2)
			
 
				+#define XFS_BLF_PDQUOT_BUF	(1<<3)
			
 
				+#define	XFS_BLF_GDQUOT_BUF	(1<<4)
			
 
				 
			
 
				 #define	XFS_BLF_CHUNK		128
			
 
				 #define	XFS_BLF_SHIFT		7
			
@@ -60,6 +61,55 @@ typedef struct xfs_buf_log_format {
 
				 	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
			
 
				 } xfs_buf_log_format_t;
			
 
				 
			
 
				+/*
			
 
				+ * All buffers now need to tell recovery where the magic number
			
 
				+ * is so that it can verify and calculate the CRCs on the buffer correctly
			
 
				+ * once the changes have been replayed into the buffer.
			
 
				+ *
			
 
				+ * The type value is held in the upper 5 bits of the blf_flags field, which is
			
 
				+ * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
			
 
				+ */
			
 
				+#define XFS_BLFT_BITS	5
			
 
				+#define XFS_BLFT_SHIFT	11
			
 
				+#define XFS_BLFT_MASK	(((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
			
 
				+
			
 
				+enum xfs_blft {
			
 
				+	XFS_BLFT_UNKNOWN_BUF = 0,
			
 
				+	XFS_BLFT_UDQUOT_BUF,
			
 
				+	XFS_BLFT_PDQUOT_BUF,
			
 
				+	XFS_BLFT_GDQUOT_BUF,
			
 
				+	XFS_BLFT_BTREE_BUF,
			
 
				+	XFS_BLFT_AGF_BUF,
			
 
				+	XFS_BLFT_AGFL_BUF,
			
 
				+	XFS_BLFT_AGI_BUF,
			
 
				+	XFS_BLFT_DINO_BUF,
			
 
				+	XFS_BLFT_SYMLINK_BUF,
			
 
				+	XFS_BLFT_DIR_BLOCK_BUF,
			
 
				+	XFS_BLFT_DIR_DATA_BUF,
			
 
				+	XFS_BLFT_DIR_FREE_BUF,
			
 
				+	XFS_BLFT_DIR_LEAF1_BUF,
			
 
				+	XFS_BLFT_DIR_LEAFN_BUF,
			
 
				+	XFS_BLFT_DA_NODE_BUF,
			
 
				+	XFS_BLFT_ATTR_LEAF_BUF,
			
 
				+	XFS_BLFT_ATTR_RMT_BUF,
			
 
				+	XFS_BLFT_SB_BUF,
			
 
				+	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
			
 
				+};
			
 
				+
			
 
				+static inline void
			
 
				+xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
			
 
				+{
			
 
				+	ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
			
 
				+	blf->blf_flags &= ~XFS_BLFT_MASK;
			
 
				+	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
			
 
				+}
			
 
				+
			
 
				+static inline __uint16_t
			
 
				+xfs_blft_from_flags(struct xfs_buf_log_format *blf)
			
 
				+{
			
 
				+	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * buf log item flags
			
 
				  */
			
@@ -113,6 +163,10 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
 
				 void	xfs_buf_iodone_callbacks(struct xfs_buf *);
			
 
				 void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
			
 
				 
			
 
				+void	xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
			
 
				+			       enum xfs_blft);
			
 
				+void	xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				 #endif	/* __XFS_BUF_ITEM_H__ */
			
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -20,7 +21,6 @@
 
				 
			
 
				 struct xfs_bmap_free;
			
 
				 struct xfs_inode;
			
 
				-struct xfs_mount;
			
 
				 struct xfs_trans;
			
 
				 struct zone;
			
 
				 
			
@@ -46,6 +46,33 @@ typedef struct xfs_da_blkinfo {
 
				 	__be16		pad;			/* unused */
			
 
				 } xfs_da_blkinfo_t;
			
 
				 
			
 
				+/*
			
 
				+ * CRC enabled directory structure types
			
 
				+ *
			
 
				+ * The headers change size for the additional verification information, but
			
 
				+ * otherwise the tree layouts and contents are unchanged. Hence the da btree
			
 
				+ * code can use the struct xfs_da_blkinfo for manipulating the tree links and
			
 
				+ * magic numbers without modification for both v2 and v3 nodes.
			
 
				+ */
			
 
				+#define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */
			
 
				+#define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */
			
 
				+#define	XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v2 dirlf single blks */
			
 
				+#define	XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v2 dirlf multi blks */
			
 
				+
			
 
				+struct xfs_da3_blkinfo {
			
 
				+	/*
			
 
				+	 * the node link manipulation code relies on the fact that the first
			
 
				+	 * element of this structure is the struct xfs_da_blkinfo so it can
			
 
				+	 * ignore the differences in the rest of the structures.
			
 
				+	 */
			
 
				+	struct xfs_da_blkinfo	hdr;
			
 
				+	__be32			crc;	/* CRC of block */
			
 
				+	__be64			blkno;	/* first block of the buffer */
			
 
				+	__be64			lsn;	/* sequence number of last write */
			
 
				+	uuid_t			uuid;	/* filesystem we belong to */
			
 
				+	__be64			owner;	/* inode that owns the block */
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * This is the structure of the root and intermediate nodes in the Btree.
			
 
				  * The leaf nodes are defined above.
			
@@ -57,19 +84,76 @@ typedef struct xfs_da_blkinfo {
 
				  */
			
 
				 #define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */
			
 
				 
			
 
				+typedef struct xfs_da_node_hdr {
			
 
				+	struct xfs_da_blkinfo	info;	/* block type, links, etc. */
			
 
				+	__be16			__count; /* count of active entries */
			
 
				+	__be16			__level; /* level above leaves (leaf == 0) */
			
 
				+} xfs_da_node_hdr_t;
			
 
				+
			
 
				+struct xfs_da3_node_hdr {
			
 
				+	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */
			
 
				+	__be16			__count; /* count of active entries */
			
 
				+	__be16			__level; /* level above leaves (leaf == 0) */
			
 
				+	__be32			__pad32;
			
 
				+};
			
 
				+
			
 
				+#define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc))
			
 
				+
			
 
				+typedef struct xfs_da_node_entry {
			
 
				+	__be32	hashval;	/* hash value for this descendant */
			
 
				+	__be32	before;		/* Btree block before this key */
			
 
				+} xfs_da_node_entry_t;
			
 
				+
			
 
				 typedef struct xfs_da_intnode {
			
 
				-	struct xfs_da_node_hdr {	/* constant-structure header block */
			
 
				-		xfs_da_blkinfo_t info;	/* block type, links, etc. */
			
 
				-		__be16	count;		/* count of active entries */
			
 
				-		__be16	level;		/* level above leaves (leaf == 0) */
			
 
				-	} hdr;
			
 
				-	struct xfs_da_node_entry {
			
 
				-		__be32	hashval;	/* hash value for this descendant */
			
 
				-		__be32	before;		/* Btree block before this key */
			
 
				-	} btree[1];			/* variable sized array of keys */
			
 
				+	struct xfs_da_node_hdr	hdr;
			
 
				+	struct xfs_da_node_entry __btree[];
			
 
				 } xfs_da_intnode_t;
			
 
				-typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
			
 
				-typedef struct xfs_da_node_entry xfs_da_node_entry_t;
			
 
				+
			
 
				+struct xfs_da3_intnode {
			
 
				+	struct xfs_da3_node_hdr	hdr;
			
 
				+	struct xfs_da_node_entry __btree[];
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * In-core version of the node header to abstract the differences in the v2 and
			
 
				+ * v3 disk format of the headers. Callers need to convert to/from disk format as
			
 
				+ * appropriate.
			
 
				+ */
			
 
				+struct xfs_da3_icnode_hdr {
			
 
				+	__uint32_t	forw;
			
 
				+	__uint32_t	back;
			
 
				+	__uint16_t	magic;
			
 
				+	__uint16_t	count;
			
 
				+	__uint16_t	level;
			
 
				+};
			
 
				+
			
 
				+extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to,
			
 
				+				       struct xfs_da_intnode *from);
			
 
				+extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to,
			
 
				+				     struct xfs_da3_icnode_hdr *from);
			
 
				+
			
 
				+static inline int
			
 
				+xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
			
 
				+{
			
 
				+	if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC))
			
 
				+		return sizeof(struct xfs_da3_node_hdr);
			
 
				+	return sizeof(struct xfs_da_node_hdr);
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_da_node_entry *
			
 
				+xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
			
 
				+{
			
 
				+	if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
			
 
				+		struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap;
			
 
				+		return dap3->__btree;
			
 
				+	}
			
 
				+	return dap->__btree;
			
 
				+}
			
 
				+
			
 
				+extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to,
			
 
				+				      struct xfs_da_intnode *from);
			
 
				+extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to,
			
 
				+				    struct xfs_da3_icnode_hdr *from);
			
 
				 
			
 
				 #define	XFS_LBSIZE(mp)	(mp)->m_sb.sb_blocksize
			
 
				 
			
@@ -191,32 +275,34 @@ struct xfs_nameops {
 
				 /*
			
 
				  * Routines used for growing the Btree.
			
 
				  */
			
 
				-int	xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
			
 
				-					 struct xfs_buf **bpp, int whichfork);
			
 
				-int	xfs_da_split(xfs_da_state_t *state);
			
 
				+int	xfs_da3_node_create(struct xfs_da_args *args, xfs_dablk_t blkno,
			
 
				+			    int level, struct xfs_buf **bpp, int whichfork);
			
 
				+int	xfs_da3_split(xfs_da_state_t *state);
			
 
				 
			
 
				 /*
			
 
				  * Routines used for shrinking the Btree.
			
 
				  */
			
 
				-int	xfs_da_join(xfs_da_state_t *state);
			
 
				-void	xfs_da_fixhashpath(xfs_da_state_t *state,
			
 
				-					  xfs_da_state_path_t *path_to_to_fix);
			
 
				+int	xfs_da3_join(xfs_da_state_t *state);
			
 
				+void	xfs_da3_fixhashpath(struct xfs_da_state *state,
			
 
				+			    struct xfs_da_state_path *path_to_to_fix);
			
 
				 
			
 
				 /*
			
 
				  * Routines used for finding things in the Btree.
			
 
				  */
			
 
				-int	xfs_da_node_lookup_int(xfs_da_state_t *state, int *result);
			
 
				-int	xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
			
 
				+int	xfs_da3_node_lookup_int(xfs_da_state_t *state, int *result);
			
 
				+int	xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
			
 
				 					 int forward, int release, int *result);
			
 
				 /*
			
 
				  * Utility routines.
			
 
				  */
			
 
				-int	xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
			
 
				+int	xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
			
 
				 				       xfs_da_state_blk_t *new_blk);
			
 
				-int	xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+int	xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 			 xfs_dablk_t bno, xfs_daddr_t mappedbno,
			
 
				 			 struct xfs_buf **bpp, int which_fork);
			
 
				 
			
 
				+extern const struct xfs_buf_ops xfs_da3_node_buf_ops;
			
 
				+
			
 
				 /*
			
 
				  * Utility routines.
			
 
				  */
			
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -19,7 +19,7 @@
 
				 #define	__XFS_DINODE_H__
			
 
				 
			
 
				 #define	XFS_DINODE_MAGIC		0x494e	/* 'IN' */
			
 
				-#define XFS_DINODE_GOOD_VERSION(v)	(((v) == 1 || (v) == 2))
			
 
				+#define XFS_DINODE_GOOD_VERSION(v)	((v) >= 1 && (v) <= 3)
			
 
				 
			
 
				 typedef struct xfs_timestamp {
			
 
				 	__be32		t_sec;		/* timestamp seconds */
			
@@ -70,10 +70,35 @@ typedef struct xfs_dinode {
 
				 
			
 
				 	/* di_next_unlinked is the only non-core field in the old dinode */
			
 
				 	__be32		di_next_unlinked;/* agi unlinked list ptr */
			
 
				-} __attribute__((packed)) xfs_dinode_t;
			
 
				+
			
 
				+	/* start of the extended dinode, writable fields */
			
 
				+	__le32		di_crc;		/* CRC of the inode */
			
 
				+	__be64		di_changecount;	/* number of attribute changes */
			
 
				+	__be64		di_lsn;		/* flush sequence */
			
 
				+	__be64		di_flags2;	/* more random flags */
			
 
				+	__u8		di_pad2[16];	/* more padding for future expansion */
			
 
				+
			
 
				+	/* fields only written to during inode creation */
			
 
				+	xfs_timestamp_t	di_crtime;	/* time created */
			
 
				+	__be64		di_ino;		/* inode number */
			
 
				+	uuid_t		di_uuid;	/* UUID of the filesystem */
			
 
				+
			
 
				+	/* structure must be padded to 64 bit alignment */
			
 
				+} xfs_dinode_t;
			
 
				 
			
 
				 #define DI_MAX_FLUSH 0xffff
			
 
				 
			
 
				+/*
			
 
				+ * Size of the core inode on disk.  Version 1 and 2 inodes have
			
 
				+ * the same size, but version 3 has grown a few additional fields.
			
 
				+ */
			
 
				+static inline uint xfs_dinode_size(int version)
			
 
				+{
			
 
				+	if (version == 3)
			
 
				+		return sizeof(struct xfs_dinode);
			
 
				+	return offsetof(struct xfs_dinode, di_crc);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
			
 
				  * Since the pathconf interface is signed, we use 2^31 - 1 instead.
			
@@ -104,11 +129,11 @@ typedef enum xfs_dinode_fmt {
 
				 /*
			
 
				  * Inode size for given fs.
			
 
				  */
			
 
				-#define XFS_LITINO(mp) \
			
 
				-	((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
			
 
				+#define XFS_LITINO(mp, version) \
			
 
				+	((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
			
 
				 
			
 
				-#define	XFS_BROOT_SIZE_ADJ	\
			
 
				-	(XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
			
 
				+#define XFS_BROOT_SIZE_ADJ(ip) \
			
 
				+	(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
			
 
				 
			
 
				 /*
			
 
				  * Inode data & attribute fork sizes, per inode.
			
@@ -119,10 +144,10 @@ typedef enum xfs_dinode_fmt {
 
				 #define XFS_DFORK_DSIZE(dip,mp) \
			
 
				 	(XFS_DFORK_Q(dip) ? \
			
 
				 		XFS_DFORK_BOFF(dip) : \
			
 
				-		XFS_LITINO(mp))
			
 
				+		XFS_LITINO(mp, (dip)->di_version))
			
 
				 #define XFS_DFORK_ASIZE(dip,mp) \
			
 
				 	(XFS_DFORK_Q(dip) ? \
			
 
				-		XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \
			
 
				+		XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \
			
 
				 		0)
			
 
				 #define XFS_DFORK_SIZE(dip,mp,w) \
			
 
				 	((w) == XFS_DATA_FORK ? \
			
@@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt {
 
				  * Return pointers to the data or attribute forks.
			
 
				  */
			
 
				 #define XFS_DFORK_DPTR(dip) \
			
 
				-	((char *)(dip) + sizeof(struct xfs_dinode))
			
 
				+	((char *)dip + xfs_dinode_size(dip->di_version))
			
 
				 #define XFS_DFORK_APTR(dip)	\
			
 
				 	(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
			
 
				 #define XFS_DFORK_PTR(dip,w)	\
			
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -28,11 +29,13 @@
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 #include "xfs_dir2.h"
			
 
				 #include "xfs_dir2_format.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 /*
			
 
				  * Local function prototypes.
			
@@ -56,52 +59,110 @@ xfs_dir_startup(void)
 
				 	xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-xfs_dir2_block_verify(
			
 
				+static bool
			
 
				+xfs_dir3_block_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
			
 
				-	int			block_ok = 0;
			
 
				-
			
 
				-	block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
			
 
				-	block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
			
 
				-
			
 
				-	if (!block_ok) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
			
 
				+			return false;
			
 
				+	} else {
			
 
				+		if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
			
 
				+			return false;
			
 
				 	}
			
 
				+	if (__xfs_dir3_data_check(NULL, bp))
			
 
				+		return false;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				-xfs_dir2_block_read_verify(
			
 
				+xfs_dir3_block_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dir2_block_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	if ((xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					  XFS_DIR3_DATA_CRC_OFF)) ||
			
 
				+	    !xfs_dir3_block_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				-xfs_dir2_block_write_verify(
			
 
				+xfs_dir3_block_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dir2_block_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_dir3_block_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
			
 
				 }
			
 
				 
			
 
				-const struct xfs_buf_ops xfs_dir2_block_buf_ops = {
			
 
				-	.verify_read = xfs_dir2_block_read_verify,
			
 
				-	.verify_write = xfs_dir2_block_write_verify,
			
 
				+const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
			
 
				+	.verify_read = xfs_dir3_block_read_verify,
			
 
				+	.verify_write = xfs_dir3_block_write_verify,
			
 
				 };
			
 
				 
			
 
				 static int
			
 
				-xfs_dir2_block_read(
			
 
				+xfs_dir3_block_read(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*dp,
			
 
				 	struct xfs_buf		**bpp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	int			err;
			
 
				 
			
 
				-	return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
			
 
				-				XFS_DATA_FORK, &xfs_dir2_block_buf_ops);
			
 
				+	err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
			
 
				+				XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
			
 
				+	if (!err && tp)
			
 
				+		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_dir3_block_init(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	struct xfs_inode	*dp)
			
 
				+{
			
 
				+	struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
			
 
				+
			
 
				+	bp->b_ops = &xfs_dir3_block_buf_ops;
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		memset(hdr3, 0, sizeof(*hdr3));
			
 
				+		hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
			
 
				+		hdr3->blkno = cpu_to_be64(bp->b_bn);
			
 
				+		hdr3->owner = cpu_to_be64(dp->i_ino);
			
 
				+		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
			
 
				+		return;
			
 
				+
			
 
				+	}
			
 
				+	hdr3->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -121,7 +182,7 @@ xfs_dir2_block_need_space(
 
				 	struct xfs_dir2_data_unused	*enddup = NULL;
			
 
				 
			
 
				 	*compact = 0;
			
 
				-	bf = hdr->bestfree;
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				 
			
 
				 	/*
			
 
				 	 * If there are stale entries we'll use one for the leaf.
			
@@ -303,7 +364,7 @@ xfs_dir2_block_addname(
 
				 	mp = dp->i_mount;
			
 
				 
			
 
				 	/* Read the (one and only) directory block into bp. */
			
 
				-	error = xfs_dir2_block_read(tp, dp, &bp);
			
 
				+	error = xfs_dir3_block_read(tp, dp, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -498,7 +559,7 @@ xfs_dir2_block_addname(
 
				 		xfs_dir2_data_log_header(tp, bp);
			
 
				 	xfs_dir2_block_log_tail(tp, bp);
			
 
				 	xfs_dir2_data_log_entry(tp, bp, dep);
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -531,7 +592,7 @@ xfs_dir2_block_getdents(
 
				 	if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk)
			
 
				 		return 0;
			
 
				 
			
 
				-	error = xfs_dir2_block_read(NULL, dp, &bp);
			
 
				+	error = xfs_dir3_block_read(NULL, dp, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
@@ -541,12 +602,12 @@ xfs_dir2_block_getdents(
 
				 	 */
			
 
				 	wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
			
 
				 	hdr = bp->b_addr;
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	/*
			
 
				 	 * Set up values for the loop.
			
 
				 	 */
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				-	ptr = (char *)(hdr + 1);
			
 
				+	ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				 
			
 
				 	/*
			
@@ -665,7 +726,7 @@ xfs_dir2_block_lookup(
 
				 	dp = args->dp;
			
 
				 	mp = dp->i_mount;
			
 
				 	hdr = bp->b_addr;
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 	blp = xfs_dir2_block_leaf_p(btp);
			
 
				 	/*
			
@@ -711,12 +772,12 @@ xfs_dir2_block_lookup_int(
 
				 	tp = args->trans;
			
 
				 	mp = dp->i_mount;
			
 
				 
			
 
				-	error = xfs_dir2_block_read(tp, dp, &bp);
			
 
				+	error = xfs_dir3_block_read(tp, dp, &bp);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				 	hdr = bp->b_addr;
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 	blp = xfs_dir2_block_leaf_p(btp);
			
 
				 	/*
			
@@ -853,7 +914,7 @@ xfs_dir2_block_removename(
 
				 		xfs_dir2_data_freescan(mp, hdr, &needlog);
			
 
				 	if (needlog)
			
 
				 		xfs_dir2_data_log_header(tp, bp);
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	/*
			
 
				 	 * See if the size as a shortform is good enough.
			
 
				 	 */
			
@@ -910,7 +971,7 @@ xfs_dir2_block_replace(
 
				 	 */
			
 
				 	dep->inumber = cpu_to_be64(args->inumber);
			
 
				 	xfs_dir2_data_log_entry(args->trans, bp, dep);
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -958,6 +1019,8 @@ xfs_dir2_leaf_to_block(
 
				 	__be16			*tagp;		/* end of entry (tag) */
			
 
				 	int			to;		/* block/leaf to index */
			
 
				 	xfs_trans_t		*tp;		/* transaction pointer */
			
 
				+	struct xfs_dir2_leaf_entry *ents;
			
 
				+	struct xfs_dir3_icleaf_hdr leafhdr;
			
 
				 
			
 
				 	trace_xfs_dir2_leaf_to_block(args);
			
 
				 
			
@@ -965,8 +1028,12 @@ xfs_dir2_leaf_to_block(
 
				 	tp = args->trans;
			
 
				 	mp = dp->i_mount;
			
 
				 	leaf = lbp->b_addr;
			
 
				-	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
			
 
				+	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
			
 
				+	ents = xfs_dir3_leaf_ents_p(leaf);
			
 
				 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
			
 
				+
			
 
				+	ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
			
 
				+	       leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
			
 
				 	/*
			
 
				 	 * If there are data blocks other than the first one, take this
			
 
				 	 * opportunity to remove trailing empty data blocks that may have
			
@@ -974,9 +1041,12 @@ xfs_dir2_leaf_to_block(
 
				 	 * These will show up in the leaf bests table.
			
 
				 	 */
			
 
				 	while (dp->i_d.di_size > mp->m_dirblksize) {
			
 
				+		int hdrsz;
			
 
				+
			
 
				+		hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
			
 
				 		bestsp = xfs_dir2_leaf_bests_p(ltp);
			
 
				 		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
			
 
				-		    mp->m_dirblksize - (uint)sizeof(*hdr)) {
			
 
				+					    mp->m_dirblksize - hdrsz) {
			
 
				 			if ((error =
			
 
				 			    xfs_dir2_leaf_trim_data(args, lbp,
			
 
				 				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
			
@@ -988,17 +1058,19 @@ xfs_dir2_leaf_to_block(
 
				 	 * Read the data block if we don't already have it, give up if it fails.
			
 
				 	 */
			
 
				 	if (!dbp) {
			
 
				-		error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
			
 
				+		error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
			
 
				 		if (error)
			
 
				 			return error;
			
 
				 	}
			
 
				 	hdr = dbp->b_addr;
			
 
				-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
			
 
				+	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
			
 
				+
			
 
				 	/*
			
 
				 	 * Size of the "leaf" area in the block.
			
 
				 	 */
			
 
				 	size = (uint)sizeof(xfs_dir2_block_tail_t) +
			
 
				-	       (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
			
 
				+	       (uint)sizeof(*lep) * (leafhdr.count - leafhdr.stale);
			
 
				 	/*
			
 
				 	 * Look at the last data entry.
			
 
				 	 */
			
@@ -1014,8 +1086,8 @@ xfs_dir2_leaf_to_block(
 
				 	/*
			
 
				 	 * Start converting it to block form.
			
 
				 	 */
			
 
				-	dbp->b_ops = &xfs_dir2_block_buf_ops;
			
 
				-	hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
			
 
				+	xfs_dir3_block_init(mp, tp, dbp, dp);
			
 
				+
			
 
				 	needlog = 1;
			
 
				 	needscan = 0;
			
 
				 	/*
			
@@ -1027,18 +1099,17 @@ xfs_dir2_leaf_to_block(
 
				 	 * Initialize the block tail.
			
 
				 	 */
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				-	btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
			
 
				+	btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);
			
 
				 	btp->stale = 0;
			
 
				 	xfs_dir2_block_log_tail(tp, dbp);
			
 
				 	/*
			
 
				 	 * Initialize the block leaf area.  We compact out stale entries.
			
 
				 	 */
			
 
				 	lep = xfs_dir2_block_leaf_p(btp);
			
 
				-	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
			
 
				-		if (leaf->ents[from].address ==
			
 
				-		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			
 
				+	for (from = to = 0; from < leafhdr.count; from++) {
			
 
				+		if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			
 
				 			continue;
			
 
				-		lep[to++] = leaf->ents[from];
			
 
				+		lep[to++] = ents[from];
			
 
				 	}
			
 
				 	ASSERT(to == be32_to_cpu(btp->count));
			
 
				 	xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
			
@@ -1137,16 +1208,16 @@ xfs_dir2_sf_to_block(
 
				 		return error;
			
 
				 	}
			
 
				 	/*
			
 
				-	 * Initialize the data block.
			
 
				+	 * Initialize the data block, then convert it to block format.
			
 
				 	 */
			
 
				-	error = xfs_dir2_data_init(args, blkno, &bp);
			
 
				+	error = xfs_dir3_data_init(args, blkno, &bp);
			
 
				 	if (error) {
			
 
				 		kmem_free(sfp);
			
 
				 		return error;
			
 
				 	}
			
 
				-	bp->b_ops = &xfs_dir2_block_buf_ops;
			
 
				+	xfs_dir3_block_init(mp, tp, bp, dp);
			
 
				 	hdr = bp->b_addr;
			
 
				-	hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
			
 
				+
			
 
				 	/*
			
 
				 	 * Compute size of block "tail" area.
			
 
				 	 */
			
@@ -1156,7 +1227,7 @@ xfs_dir2_sf_to_block(
 
				 	 * The whole thing is initialized to free by the init routine.
			
 
				 	 * Say we're using the leaf and tail area.
			
 
				 	 */
			
 
				-	dup = (xfs_dir2_data_unused_t *)(hdr + 1);
			
 
				+	dup = xfs_dir3_data_unused_p(hdr);
			
 
				 	needlog = needscan = 0;
			
 
				 	xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
			
 
				 		&needscan);
			
@@ -1178,8 +1249,7 @@ xfs_dir2_sf_to_block(
 
				 	/*
			
 
				 	 * Create entry for .
			
 
				 	 */
			
 
				-	dep = (xfs_dir2_data_entry_t *)
			
 
				-	      ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
			
 
				+	dep = xfs_dir3_data_dot_entry_p(hdr);
			
 
				 	dep->inumber = cpu_to_be64(dp->i_ino);
			
 
				 	dep->namelen = 1;
			
 
				 	dep->name[0] = '.';
			
@@ -1192,8 +1262,7 @@ xfs_dir2_sf_to_block(
 
				 	/*
			
 
				 	 * Create entry for ..
			
 
				 	 */
			
 
				-	dep = (xfs_dir2_data_entry_t *)
			
 
				-		((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
			
 
				+	dep = xfs_dir3_data_dotdot_entry_p(hdr);
			
 
				 	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
			
 
				 	dep->namelen = 2;
			
 
				 	dep->name[0] = dep->name[1] = '.';
			
@@ -1203,7 +1272,7 @@ xfs_dir2_sf_to_block(
 
				 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
			
 
				 	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
			
 
				 				(char *)dep - (char *)hdr));
			
 
				-	offset = XFS_DIR2_DATA_FIRST_OFFSET;
			
 
				+	offset = xfs_dir3_data_first_offset(hdr);
			
 
				 	/*
			
 
				 	 * Loop over existing entries, stuff them in.
			
 
				 	 */
			
@@ -1273,6 +1342,6 @@ xfs_dir2_sf_to_block(
 
				 	ASSERT(needscan == 0);
			
 
				 	xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
			
 
				 	xfs_dir2_block_log_tail(tp, bp);
			
 
				-	xfs_dir2_data_check(dp, bp);
			
 
				+	xfs_dir3_data_check(dp, bp);
			
 
				 	return 0;
			
 
				 }
			
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -30,6 +31,8 @@
 
				 #include "xfs_dir2_format.h"
			
 
				 #include "xfs_dir2_priv.h"
			
 
				 #include "xfs_error.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 STATIC xfs_dir2_data_free_t *
			
 
				 xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
			
@@ -40,7 +43,7 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
 
				  * Return 0 is the buffer is good, otherwise an error.
			
 
				  */
			
 
				 int
			
 
				-__xfs_dir2_data_check(
			
 
				+__xfs_dir3_data_check(
			
 
				 	struct xfs_inode	*dp,		/* incore inode pointer */
			
 
				 	struct xfs_buf		*bp)		/* data block's buffer */
			
 
				 {
			
@@ -65,15 +68,17 @@ __xfs_dir2_data_check(
 
				 
			
 
				 	mp = bp->b_target->bt_mount;
			
 
				 	hdr = bp->b_addr;
			
 
				-	bf = hdr->bestfree;
			
 
				-	p = (char *)(hdr + 1);
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+	p = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				 
			
 
				 	switch (hdr->magic) {
			
 
				+	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
			
 
				 	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
			
 
				 		btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 		lep = xfs_dir2_block_leaf_p(btp);
			
 
				 		endp = (char *)lep;
			
 
				 		break;
			
 
				+	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
			
 
				 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
			
 
				 		endp = (char *)hdr + mp->m_dirblksize;
			
 
				 		break;
			
@@ -148,7 +153,8 @@ __xfs_dir2_data_check(
 
				 					       (char *)dep - (char *)hdr);
			
 
				 		count++;
			
 
				 		lastfree = 0;
			
 
				-		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
			
 
				+		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
			
 
				 			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				 				(xfs_dir2_data_aoff_t)
			
 
				 				((char *)dep - (char *)hdr));
			
@@ -168,7 +174,8 @@ __xfs_dir2_data_check(
 
				 	 * Need to have seen all the entries and all the bestfree slots.
			
 
				 	 */
			
 
				 	XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
			
 
				-	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
			
 
				+	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
			
 
				 		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
			
 
				 			if (lep[i].address ==
			
 
				 			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
			
@@ -185,21 +192,27 @@ __xfs_dir2_data_check(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				-xfs_dir2_data_verify(
			
 
				+static bool
			
 
				+xfs_dir3_data_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				-	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
			
 
				-	int			block_ok = 0;
			
 
				+	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
			
 
				 
			
 
				-	block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC);
			
 
				-	block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
			
 
				-
			
 
				-	if (!block_ok) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
			
 
				+			return false;
			
 
				+	} else {
			
 
				+		if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
			
 
				+			return false;
			
 
				 	}
			
 
				+	if (__xfs_dir3_data_check(NULL, bp))
			
 
				+		return false;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -208,7 +221,7 @@ xfs_dir2_data_verify(
 
				  * format buffer or a data format buffer on readahead.
			
 
				  */
			
 
				 static void
			
 
				-xfs_dir2_data_reada_verify(
			
 
				+xfs_dir3_data_reada_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
@@ -216,11 +229,13 @@ xfs_dir2_data_reada_verify(
 
				 
			
 
				 	switch (hdr->magic) {
			
 
				 	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
			
 
				-		bp->b_ops = &xfs_dir2_block_buf_ops;
			
 
				+	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
			
 
				+		bp->b_ops = &xfs_dir3_block_buf_ops;
			
 
				 		bp->b_ops->verify_read(bp);
			
 
				 		return;
			
 
				 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
			
 
				-		xfs_dir2_data_verify(bp);
			
 
				+	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
			
 
				+		xfs_dir3_data_verify(bp);
			
 
				 		return;
			
 
				 	default:
			
 
				 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
			
@@ -230,51 +245,80 @@ xfs_dir2_data_reada_verify(
 
				 }
			
 
				 
			
 
				 static void
			
 
				-xfs_dir2_data_read_verify(
			
 
				+xfs_dir3_data_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dir2_data_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	if ((xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					  XFS_DIR3_DATA_CRC_OFF)) ||
			
 
				+	    !xfs_dir3_data_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				-xfs_dir2_data_write_verify(
			
 
				+xfs_dir3_data_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dir2_data_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_dir3_data_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
			
 
				 }
			
 
				 
			
 
				-const struct xfs_buf_ops xfs_dir2_data_buf_ops = {
			
 
				-	.verify_read = xfs_dir2_data_read_verify,
			
 
				-	.verify_write = xfs_dir2_data_write_verify,
			
 
				+const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
			
 
				+	.verify_read = xfs_dir3_data_read_verify,
			
 
				+	.verify_write = xfs_dir3_data_write_verify,
			
 
				 };
			
 
				 
			
 
				-static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = {
			
 
				-	.verify_read = xfs_dir2_data_reada_verify,
			
 
				-	.verify_write = xfs_dir2_data_write_verify,
			
 
				+static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
			
 
				+	.verify_read = xfs_dir3_data_reada_verify,
			
 
				+	.verify_write = xfs_dir3_data_write_verify,
			
 
				 };
			
 
				 
			
 
				 
			
 
				 int
			
 
				-xfs_dir2_data_read(
			
 
				+xfs_dir3_data_read(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*dp,
			
 
				 	xfs_dablk_t		bno,
			
 
				 	xfs_daddr_t		mapped_bno,
			
 
				 	struct xfs_buf		**bpp)
			
 
				 {
			
 
				-	return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
			
 
				-				XFS_DATA_FORK, &xfs_dir2_data_buf_ops);
			
 
				+	int			err;
			
 
				+
			
 
				+	err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
			
 
				+				XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
			
 
				+	if (!err && tp)
			
 
				+		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 int
			
 
				-xfs_dir2_data_readahead(
			
 
				+xfs_dir3_data_readahead(
			
 
				 	struct xfs_trans	*tp,
			
 
				 	struct xfs_inode	*dp,
			
 
				 	xfs_dablk_t		bno,
			
 
				 	xfs_daddr_t		mapped_bno)
			
 
				 {
			
 
				 	return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
			
 
				-				XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops);
			
 
				+				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -288,12 +332,15 @@ xfs_dir2_data_freefind(
 
				 {
			
 
				 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
			
 
				 	xfs_dir2_data_aoff_t	off;		/* offset value needed */
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				 #if defined(DEBUG) && defined(__KERNEL__)
			
 
				 	int			matched;	/* matched the value */
			
 
				 	int			seenzero;	/* saw a 0 bestfree entry */
			
 
				 #endif
			
 
				 
			
 
				 	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+
			
 
				 #if defined(DEBUG) && defined(__KERNEL__)
			
 
				 	/*
			
 
				 	 * Validate some consistency in the bestfree table.
			
@@ -301,9 +348,11 @@ xfs_dir2_data_freefind(
 
				 	 * one we're looking for it has to be exact.
			
 
				 	 */
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				-	for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
			
 
				-	     dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				+	for (dfp = &bf[0], seenzero = matched = 0;
			
 
				+	     dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
			
 
				 	     dfp++) {
			
 
				 		if (!dfp->offset) {
			
 
				 			ASSERT(!dfp->length);
			
@@ -319,7 +368,7 @@ xfs_dir2_data_freefind(
 
				 		else
			
 
				 			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
			
 
				 		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
			
 
				-		if (dfp > &hdr->bestfree[0])
			
 
				+		if (dfp > &bf[0])
			
 
				 			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
			
 
				 	}
			
 
				 #endif
			
@@ -328,14 +377,12 @@ xfs_dir2_data_freefind(
 
				 	 * it can't be there since they're sorted.
			
 
				 	 */
			
 
				 	if (be16_to_cpu(dup->length) <
			
 
				-	    be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
			
 
				+	    be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
			
 
				 		return NULL;
			
 
				 	/*
			
 
				 	 * Look at the three bestfree entries for our guy.
			
 
				 	 */
			
 
				-	for (dfp = &hdr->bestfree[0];
			
 
				-	     dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
			
 
				-	     dfp++) {
			
 
				+	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
			
 
				 		if (!dfp->offset)
			
 
				 			return NULL;
			
 
				 		if (be16_to_cpu(dfp->offset) == off)
			
@@ -359,11 +406,12 @@ xfs_dir2_data_freeinsert(
 
				 	xfs_dir2_data_free_t	*dfp;		/* bestfree table pointer */
			
 
				 	xfs_dir2_data_free_t	new;		/* new bestfree entry */
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				-#endif
			
 
				-	dfp = hdr->bestfree;
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				+
			
 
				+	dfp = xfs_dir3_data_bestfree_p(hdr);
			
 
				 	new.length = dup->length;
			
 
				 	new.offset = cpu_to_be16((char *)dup - (char *)hdr);
			
 
				 
			
@@ -400,32 +448,36 @@ xfs_dir2_data_freeremove(
 
				 	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */
			
 
				 	int			*loghead)	/* out: log data header */
			
 
				 {
			
 
				-#ifdef __KERNEL__
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				+
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				-#endif
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				+
			
 
				 	/*
			
 
				 	 * It's the first entry, slide the next 2 up.
			
 
				 	 */
			
 
				-	if (dfp == &hdr->bestfree[0]) {
			
 
				-		hdr->bestfree[0] = hdr->bestfree[1];
			
 
				-		hdr->bestfree[1] = hdr->bestfree[2];
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+	if (dfp == &bf[0]) {
			
 
				+		bf[0] = bf[1];
			
 
				+		bf[1] = bf[2];
			
 
				 	}
			
 
				 	/*
			
 
				 	 * It's the second entry, slide the 3rd entry up.
			
 
				 	 */
			
 
				-	else if (dfp == &hdr->bestfree[1])
			
 
				-		hdr->bestfree[1] = hdr->bestfree[2];
			
 
				+	else if (dfp == &bf[1])
			
 
				+		bf[1] = bf[2];
			
 
				 	/*
			
 
				 	 * Must be the last entry.
			
 
				 	 */
			
 
				 	else
			
 
				-		ASSERT(dfp == &hdr->bestfree[2]);
			
 
				+		ASSERT(dfp == &bf[2]);
			
 
				 	/*
			
 
				 	 * Clear the 3rd entry, must be zero now.
			
 
				 	 */
			
 
				-	hdr->bestfree[2].length = 0;
			
 
				-	hdr->bestfree[2].offset = 0;
			
 
				+	bf[2].length = 0;
			
 
				+	bf[2].offset = 0;
			
 
				 	*loghead = 1;
			
 
				 }
			
 
				 
			
@@ -441,23 +493,27 @@ xfs_dir2_data_freescan(
 
				 	xfs_dir2_block_tail_t	*btp;		/* block tail */
			
 
				 	xfs_dir2_data_entry_t	*dep;		/* active data entry */
			
 
				 	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				 	char			*endp;		/* end of block's data */
			
 
				 	char			*p;		/* current entry pointer */
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				-#endif
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				+
			
 
				 	/*
			
 
				 	 * Start by clearing the table.
			
 
				 	 */
			
 
				-	memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+	memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
			
 
				 	*loghead = 1;
			
 
				 	/*
			
 
				 	 * Set up pointers.
			
 
				 	 */
			
 
				-	p = (char *)(hdr + 1);
			
 
				-	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
			
 
				+	p = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				+	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
			
 
				 		btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 		endp = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				 	} else
			
@@ -493,7 +549,7 @@ xfs_dir2_data_freescan(
 
				  * Give back the buffer for the created block.
			
 
				  */
			
 
				 int						/* error */
			
 
				-xfs_dir2_data_init(
			
 
				+xfs_dir3_data_init(
			
 
				 	xfs_da_args_t		*args,		/* directory operation args */
			
 
				 	xfs_dir2_db_t		blkno,		/* logical dir block number */
			
 
				 	struct xfs_buf		**bpp)		/* output block buffer */
			
@@ -502,6 +558,7 @@ xfs_dir2_data_init(
 
				 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
			
 
				 	xfs_inode_t		*dp;		/* incore directory inode */
			
 
				 	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				 	int			error;		/* error return value */
			
 
				 	int			i;		/* bestfree index */
			
 
				 	xfs_mount_t		*mp;		/* filesystem mount point */
			
@@ -518,27 +575,40 @@ xfs_dir2_data_init(
 
				 		XFS_DATA_FORK);
			
 
				 	if (error)
			
 
				 		return error;
			
 
				-	bp->b_ops = &xfs_dir2_data_buf_ops;
			
 
				+	bp->b_ops = &xfs_dir3_data_buf_ops;
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
			
 
				 
			
 
				 	/*
			
 
				 	 * Initialize the header.
			
 
				 	 */
			
 
				 	hdr = bp->b_addr;
			
 
				-	hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
			
 
				-	hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
			
 
				+
			
 
				+		memset(hdr3, 0, sizeof(*hdr3));
			
 
				+		hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
			
 
				+		hdr3->blkno = cpu_to_be64(bp->b_bn);
			
 
				+		hdr3->owner = cpu_to_be64(dp->i_ino);
			
 
				+		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
			
 
				+
			
 
				+	} else
			
 
				+		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
			
 
				+
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+	bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr));
			
 
				 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
			
 
				-		hdr->bestfree[i].length = 0;
			
 
				-		hdr->bestfree[i].offset = 0;
			
 
				+		bf[i].length = 0;
			
 
				+		bf[i].offset = 0;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Set up an unused entry for the block's body.
			
 
				 	 */
			
 
				-	dup = (xfs_dir2_data_unused_t *)(hdr + 1);
			
 
				+	dup = xfs_dir3_data_unused_p(hdr);
			
 
				 	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
			
 
				 
			
 
				-	t = mp->m_dirblksize - (uint)sizeof(*hdr);
			
 
				-	hdr->bestfree[0].length = cpu_to_be16(t);
			
 
				+	t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr);
			
 
				+	bf[0].length = cpu_to_be16(t);
			
 
				 	dup->length = cpu_to_be16(t);
			
 
				 	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
			
 
				 	/*
			
@@ -562,7 +632,9 @@ xfs_dir2_data_log_entry(
 
				 	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
			
 
				 
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 
			
 
				 	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
			
 
				 		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
			
@@ -580,9 +652,11 @@ xfs_dir2_data_log_header(
 
				 	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
			
 
				 
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 
			
 
				-	xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
			
 
				+	xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -597,7 +671,9 @@ xfs_dir2_data_log_unused(
 
				 	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
			
 
				 
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 
			
 
				 	/*
			
 
				 	 * Log the first part of the unused entry.
			
@@ -635,6 +711,7 @@ xfs_dir2_data_make_free(
 
				 	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
			
 
				 	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
			
 
				 	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				 
			
 
				 	mp = tp->t_mountp;
			
 
				 	hdr = bp->b_addr;
			
@@ -642,12 +719,14 @@ xfs_dir2_data_make_free(
 
				 	/*
			
 
				 	 * Figure out where the end of the data area is.
			
 
				 	 */
			
 
				-	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
			
 
				+	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				+	    hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
			
 
				 		endptr = (char *)hdr + mp->m_dirblksize;
			
 
				 	else {
			
 
				 		xfs_dir2_block_tail_t	*btp;	/* block tail */
			
 
				 
			
 
				-		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				+		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+			hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 		btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				 		endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				 	}
			
@@ -655,7 +734,7 @@ xfs_dir2_data_make_free(
 
				 	 * If this isn't the start of the block, then back up to
			
 
				 	 * the previous entry and see if it's free.
			
 
				 	 */
			
 
				-	if (offset > sizeof(*hdr)) {
			
 
				+	if (offset > xfs_dir3_data_entry_offset(hdr)) {
			
 
				 		__be16			*tagp;	/* tag just before us */
			
 
				 
			
 
				 		tagp = (__be16 *)((char *)hdr + offset) - 1;
			
@@ -681,6 +760,7 @@ xfs_dir2_data_make_free(
 
				 	 * Previous and following entries are both free,
			
 
				 	 * merge everything into a single free entry.
			
 
				 	 */
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				 	if (prevdup && postdup) {
			
 
				 		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
			
 
				 
			
@@ -695,7 +775,7 @@ xfs_dir2_data_make_free(
 
				 		 * since the third bestfree is there, there might be more
			
 
				 		 * entries.
			
 
				 		 */
			
 
				-		needscan = (hdr->bestfree[2].length != 0);
			
 
				+		needscan = (bf[2].length != 0);
			
 
				 		/*
			
 
				 		 * Fix up the new big freespace.
			
 
				 		 */
			
@@ -711,10 +791,10 @@ xfs_dir2_data_make_free(
 
				 			 * Remove entry 1 first then entry 0.
			
 
				 			 */
			
 
				 			ASSERT(dfp && dfp2);
			
 
				-			if (dfp == &hdr->bestfree[1]) {
			
 
				-				dfp = &hdr->bestfree[0];
			
 
				+			if (dfp == &bf[1]) {
			
 
				+				dfp = &bf[0];
			
 
				 				ASSERT(dfp2 == dfp);
			
 
				-				dfp2 = &hdr->bestfree[1];
			
 
				+				dfp2 = &bf[1];
			
 
				 			}
			
 
				 			xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
			
 
				 			xfs_dir2_data_freeremove(hdr, dfp, needlogp);
			
@@ -722,7 +802,7 @@ xfs_dir2_data_make_free(
 
				 			 * Now insert the new entry.
			
 
				 			 */
			
 
				 			dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
			
 
				-			ASSERT(dfp == &hdr->bestfree[0]);
			
 
				+			ASSERT(dfp == &bf[0]);
			
 
				 			ASSERT(dfp->length == prevdup->length);
			
 
				 			ASSERT(!dfp[1].length);
			
 
				 			ASSERT(!dfp[2].length);
			
@@ -751,7 +831,7 @@ xfs_dir2_data_make_free(
 
				 		 */
			
 
				 		else {
			
 
				 			needscan = be16_to_cpu(prevdup->length) >
			
 
				-				   be16_to_cpu(hdr->bestfree[2].length);
			
 
				+				   be16_to_cpu(bf[2].length);
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -779,7 +859,7 @@ xfs_dir2_data_make_free(
 
				 		 */
			
 
				 		else {
			
 
				 			needscan = be16_to_cpu(newdup->length) >
			
 
				-				   be16_to_cpu(hdr->bestfree[2].length);
			
 
				+				   be16_to_cpu(bf[2].length);
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -818,10 +898,13 @@ xfs_dir2_data_use_free(
 
				 	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
			
 
				 	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
			
 
				 	int			oldlen;		/* old unused entry's length */
			
 
				+	struct xfs_dir2_data_free *bf;
			
 
				 
			
 
				 	hdr = bp->b_addr;
			
 
				 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
			
 
				-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
			
 
				+	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
			
 
				 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
			
 
				 	ASSERT(offset >= (char *)dup - (char *)hdr);
			
 
				 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
			
@@ -831,7 +914,8 @@ xfs_dir2_data_use_free(
 
				 	 */
			
 
				 	dfp = xfs_dir2_data_freefind(hdr, dup);
			
 
				 	oldlen = be16_to_cpu(dup->length);
			
 
				-	ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
			
 
				+	bf = xfs_dir3_data_bestfree_p(hdr);
			
 
				+	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
			
 
				 	/*
			
 
				 	 * Check for alignment with front and back of the entry.
			
 
				 	 */
			
@@ -845,7 +929,7 @@ xfs_dir2_data_use_free(
 
				 	 */
			
 
				 	if (matchfront && matchback) {
			
 
				 		if (dfp) {
			
 
				-			needscan = (hdr->bestfree[2].offset != 0);
			
 
				+			needscan = (bf[2].offset != 0);
			
 
				 			if (!needscan)
			
 
				 				xfs_dir2_data_freeremove(hdr, dfp, needlogp);
			
 
				 		}
			
@@ -875,7 +959,7 @@ xfs_dir2_data_use_free(
 
				 			 * that means we don't know if there was a better
			
 
				 			 * choice for the last slot, or not.  Rescan.
			
 
				 			 */
			
 
				-			needscan = dfp == &hdr->bestfree[2];
			
 
				+			needscan = dfp == &bf[2];
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -902,7 +986,7 @@ xfs_dir2_data_use_free(
 
				 			 * that means we don't know if there was a better
			
 
				 			 * choice for the last slot, or not.  Rescan.
			
 
				 			 */
			
 
				-			needscan = dfp == &hdr->bestfree[2];
			
 
				+			needscan = dfp == &bf[2];
			
 
				 		}
			
 
				 	}
			
 
				 	/*
			
@@ -930,7 +1014,7 @@ xfs_dir2_data_use_free(
 
				 		 * the 2 new will work.
			
 
				 		 */
			
 
				 		if (dfp) {
			
 
				-			needscan = (hdr->bestfree[2].length != 0);
			
 
				+			needscan = (bf[2].length != 0);
			
 
				 			if (!needscan) {
			
 
				 				xfs_dir2_data_freeremove(hdr, dfp, needlogp);
			
 
				 				xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
			
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2013 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -35,6 +36,38 @@
 
				 #define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: multiblock dirs */
			
 
				 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F: free index blocks */
			
 
				 
			
 
				+/*
			
 
				+ * Directory Version 3 With CRCs.
			
 
				+ *
			
 
				+ * The tree formats are the same as for version 2 directories.  The difference
			
 
				+ * is in the block header and dirent formats. In many cases the v3 structures
			
 
				+ * use v2 definitions as they are no different and this makes code sharing much
			
 
				+ * easier.
			
 
				+ *
			
 
				+ * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
			
 
				+ * format is v2 then they switch to the existing v2 code, or the format is v3
			
 
				+ * they implement the v3 functionality. This means the existing dir2 is a mix of
			
 
				+ * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
			
 
				+ * where there is a difference in the formats, otherwise the code is unchanged.
			
 
				+ *
			
 
				+ * Where it is possible, the code decides what to do based on the magic numbers
			
 
				+ * in the blocks rather than feature bits in the superblock. This means the code
			
 
				+ * is as independent of the external XFS code as possible as doesn't require
			
 
				+ * passing struct xfs_mount pointers into places where it isn't really
			
 
				+ * necessary.
			
 
				+ *
			
 
				+ * Version 3 includes:
			
 
				+ *
			
 
				+ *	- a larger block header for CRC and identification purposes and so the
			
 
				+ *	offsets of all the structures inside the blocks are different.
			
 
				+ *
			
 
				+ *	- new magic numbers to be able to detect the v2/v3 types on the fly.
			
 
				+ */
			
 
				+
			
 
				+#define	XFS_DIR3_BLOCK_MAGIC	0x58444233	/* XDB3: single block dirs */
			
 
				+#define	XFS_DIR3_DATA_MAGIC	0x58444433	/* XDD3: multiblock dirs */
			
 
				+#define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
			
 
				+
			
 
				 /*
			
 
				  * Byte offset in data block and shortform entry.
			
 
				  */
			
@@ -194,16 +227,6 @@ xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
 
				 #define	XFS_DIR2_DATA_FIRSTDB(mp)	\
			
 
				 	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
			
 
				 
			
 
				-/*
			
 
				- * Offsets of . and .. in data space (always block 0)
			
 
				- */
			
 
				-#define	XFS_DIR2_DATA_DOT_OFFSET	\
			
 
				-	((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
			
 
				-#define	XFS_DIR2_DATA_DOTDOT_OFFSET	\
			
 
				-	(XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
			
 
				-#define	XFS_DIR2_DATA_FIRST_OFFSET		\
			
 
				-	(XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
			
 
				-
			
 
				 /*
			
 
				  * Describe a free area in the data block.
			
 
				  *
			
@@ -225,6 +248,39 @@ typedef struct xfs_dir2_data_hdr {
 
				 	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
			
 
				 } xfs_dir2_data_hdr_t;
			
 
				 
			
 
				+/*
			
 
				+ * define a structure for all the verification fields we are adding to the
			
 
				+ * directory block structures. This will be used in several structures.
			
 
				+ * The magic number must be the first entry to align with all the dir2
			
 
				+ * structures so we determine how to decode them just by the magic number.
			
 
				+ */
			
 
				+struct xfs_dir3_blk_hdr {
			
 
				+	__be32			magic;	/* magic number */
			
 
				+	__be32			crc;	/* CRC of block */
			
 
				+	__be64			blkno;	/* first block of the buffer */
			
 
				+	__be64			lsn;	/* sequence number of last write */
			
 
				+	uuid_t			uuid;	/* filesystem we belong to */
			
 
				+	__be64			owner;	/* inode that owns the block */
			
 
				+};
			
 
				+
			
 
				+struct xfs_dir3_data_hdr {
			
 
				+	struct xfs_dir3_blk_hdr	hdr;
			
 
				+	xfs_dir2_data_free_t	best_free[XFS_DIR2_DATA_FD_COUNT];
			
 
				+};
			
 
				+
			
 
				+#define XFS_DIR3_DATA_CRC_OFF  offsetof(struct xfs_dir3_data_hdr, hdr.crc)
			
 
				+
			
 
				+static inline struct xfs_dir2_data_free *
			
 
				+xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
			
 
				+		struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr;
			
 
				+		return hdr3->best_free;
			
 
				+	}
			
 
				+	return hdr->bestfree;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Active entry in a data block.
			
 
				  *
			
@@ -280,6 +336,94 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
 
				 			be16_to_cpu(dup->length) - sizeof(__be16));
			
 
				 }
			
 
				 
			
 
				+static inline size_t
			
 
				+xfs_dir3_data_hdr_size(bool dir3)
			
 
				+{
			
 
				+	if (dir3)
			
 
				+		return sizeof(struct xfs_dir3_data_hdr);
			
 
				+	return sizeof(struct xfs_dir2_data_hdr);
			
 
				+}
			
 
				+
			
 
				+static inline size_t
			
 
				+xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
			
 
				+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
			
 
				+	return xfs_dir3_data_hdr_size(dir3);
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_dir2_data_entry *
			
 
				+xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return (struct xfs_dir2_data_entry *)
			
 
				+		((char *)hdr + xfs_dir3_data_entry_offset(hdr));
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_dir2_data_unused *
			
 
				+xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return (struct xfs_dir2_data_unused *)
			
 
				+		((char *)hdr + xfs_dir3_data_entry_offset(hdr));
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Offsets of . and .. in data space (always block 0)
			
 
				+ *
			
 
				+ * The macros are used for shortform directories as they have no headers to read
			
 
				+ * the magic number out of. Shortform directories need to know the size of the
			
 
				+ * data block header because the sfe embeds the block offset of the entry into
			
 
				+ * it so that it doesn't change when format conversion occurs. Bad Things Happen
			
 
				+ * if we don't follow this rule.
			
 
				+ */
			
 
				+#define	XFS_DIR3_DATA_DOT_OFFSET(mp)	\
			
 
				+	xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
			
 
				+#define	XFS_DIR3_DATA_DOTDOT_OFFSET(mp)	\
			
 
				+	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir2_data_entsize(1))
			
 
				+#define	XFS_DIR3_DATA_FIRST_OFFSET(mp)		\
			
 
				+	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir2_data_entsize(2))
			
 
				+
			
 
				+static inline xfs_dir2_data_aoff_t
			
 
				+xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return xfs_dir3_data_entry_offset(hdr);
			
 
				+}
			
 
				+
			
 
				+static inline xfs_dir2_data_aoff_t
			
 
				+xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return xfs_dir3_data_dot_offset(hdr) + xfs_dir2_data_entsize(1);
			
 
				+}
			
 
				+
			
 
				+static inline xfs_dir2_data_aoff_t
			
 
				+xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return xfs_dir3_data_dotdot_offset(hdr) + xfs_dir2_data_entsize(2);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * location of . and .. in data space (always block 0)
			
 
				+ */
			
 
				+static inline struct xfs_dir2_data_entry *
			
 
				+xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return (struct xfs_dir2_data_entry *)
			
 
				+		((char *)hdr + xfs_dir3_data_dot_offset(hdr));
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_dir2_data_entry *
			
 
				+xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return (struct xfs_dir2_data_entry *)
			
 
				+		((char *)hdr + xfs_dir3_data_dotdot_offset(hdr));
			
 
				+}
			
 
				+
			
 
				+static inline struct xfs_dir2_data_entry *
			
 
				+xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)
			
 
				+{
			
 
				+	return (struct xfs_dir2_data_entry *)
			
 
				+		((char *)hdr + xfs_dir3_data_first_offset(hdr));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Leaf block structures.
			
 
				  *
			
@@ -329,6 +473,21 @@ typedef struct xfs_dir2_leaf_hdr {
 
				 	__be16			stale;		/* count of stale entries */
			
 
				 } xfs_dir2_leaf_hdr_t;
			
 
				 
			
 
				+struct xfs_dir3_leaf_hdr {
			
 
				+	struct xfs_da3_blkinfo	info;		/* header for da routines */
			
 
				+	__be16			count;		/* count of entries */
			
 
				+	__be16			stale;		/* count of stale entries */
			
 
				+	__be32			pad;
			
 
				+};
			
 
				+
			
 
				+struct xfs_dir3_icleaf_hdr {
			
 
				+	__uint32_t		forw;
			
 
				+	__uint32_t		back;
			
 
				+	__uint16_t		magic;
			
 
				+	__uint16_t		count;
			
 
				+	__uint16_t		stale;
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * Leaf block entry.
			
 
				  */
			
@@ -348,20 +507,47 @@ typedef struct xfs_dir2_leaf_tail {
 
				  * Leaf block.
			
 
				  */
			
 
				 typedef struct xfs_dir2_leaf {
			
 
				-	xfs_dir2_leaf_hdr_t	hdr;		/* leaf header */
			
 
				-	xfs_dir2_leaf_entry_t	ents[];		/* entries */
			
 
				+	xfs_dir2_leaf_hdr_t	hdr;			/* leaf header */
			
 
				+	xfs_dir2_leaf_entry_t	__ents[];		/* entries */
			
 
				 } xfs_dir2_leaf_t;
			
 
				 
			
 
				-/*
			
 
				- * DB blocks here are logical directory block numbers, not filesystem blocks.
			
 
				- */
			
 
				+struct xfs_dir3_leaf {
			
 
				+	struct xfs_dir3_leaf_hdr	hdr;		/* leaf header */
			
 
				+	struct xfs_dir2_leaf_entry	__ents[];	/* entries */
			
 
				+};
			
 
				 
			
 
				-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
			
 
				+#define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
			
 
				+
			
 
				+static inline int
			
 
				+xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
			
 
				 {
			
 
				-	return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
			
 
				+	if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
			
 
				+	    lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
			
 
				+		return sizeof(struct xfs_dir3_leaf_hdr);
			
 
				+	return sizeof(struct xfs_dir2_leaf_hdr);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
			
 
				+{
			
 
				+	return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) /
			
 
				 		(uint)sizeof(struct xfs_dir2_leaf_entry);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Get address of the bestcount field in the single-leaf block.
			
 
				+ */
			
 
				+static inline struct xfs_dir2_leaf_entry *
			
 
				+xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
			
 
				+{
			
 
				+	if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
			
 
				+	    lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
			
 
				+		struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp;
			
 
				+		return lp3->__ents;
			
 
				+	}
			
 
				+	return lp->__ents;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Get address of the bestcount field in the single-leaf block.
			
 
				  */
			
@@ -382,6 +568,10 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
 
				 	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * DB blocks here are logical directory block numbers, not filesystem blocks.
			
 
				+ */
			
 
				+
			
 
				 /*
			
 
				  * Convert dataptr to byte in file space
			
 
				  */
			
@@ -520,19 +710,65 @@ typedef struct xfs_dir2_free {
 
				 						/* unused entries are -1 */
			
 
				 } xfs_dir2_free_t;
			
 
				 
			
 
				-static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
			
 
				+struct xfs_dir3_free_hdr {
			
 
				+	struct xfs_dir3_blk_hdr	hdr;
			
 
				+	__be32			firstdb;	/* db of first entry */
			
 
				+	__be32			nvalid;		/* count of valid entries */
			
 
				+	__be32			nused;		/* count of used entries */
			
 
				+};
			
 
				+
			
 
				+struct xfs_dir3_free {
			
 
				+	struct xfs_dir3_free_hdr hdr;
			
 
				+	__be16			bests[];	/* best free counts */
			
 
				+						/* unused entries are -1 */
			
 
				+};
			
 
				+
			
 
				+#define XFS_DIR3_FREE_CRC_OFF  offsetof(struct xfs_dir3_free, hdr.hdr.crc)
			
 
				+
			
 
				+/*
			
 
				+ * In core version of the free block header, abstracted away from on-disk format
			
 
				+ * differences. Use this in the code, and convert to/from the disk version using
			
 
				+ * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
			
 
				+ */
			
 
				+struct xfs_dir3_icfree_hdr {
			
 
				+	__uint32_t	magic;
			
 
				+	__uint32_t	firstdb;
			
 
				+	__uint32_t	nvalid;
			
 
				+	__uint32_t	nused;
			
 
				+
			
 
				+};
			
 
				+
			
 
				+void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to,
			
 
				+				 struct xfs_dir2_free *from);
			
 
				+
			
 
				+static inline int
			
 
				+xfs_dir3_free_hdr_size(struct xfs_mount *mp)
			
 
				 {
			
 
				-	return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return sizeof(struct xfs_dir3_free_hdr);
			
 
				+	return sizeof(struct xfs_dir2_free_hdr);
			
 
				+}
			
 
				+
			
 
				+static inline int
			
 
				+xfs_dir3_free_max_bests(struct xfs_mount *mp)
			
 
				+{
			
 
				+	return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) /
			
 
				 		sizeof(xfs_dir2_data_off_t);
			
 
				 }
			
 
				 
			
 
				+static inline __be16 *
			
 
				+xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free)
			
 
				+{
			
 
				+	return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp));
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Convert data space db to the corresponding free db.
			
 
				  */
			
 
				 static inline xfs_dir2_db_t
			
 
				 xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
			
 
				 {
			
 
				-	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
			
 
				+	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -541,7 +777,7 @@ xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
 
				 static inline int
			
 
				 xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
			
 
				 {
			
 
				-	return db % xfs_dir2_free_max_bests(mp);
			
 
				+	return db % xfs_dir3_free_max_bests(mp);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -30,7 +30,7 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
 
				 				const unsigned char *name, int len);
			
 
				 
			
 
				 /* xfs_dir2_block.c */
			
 
				-extern const struct xfs_buf_ops xfs_dir2_block_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
			
 
				 
			
 
				 extern int xfs_dir2_block_addname(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
			
@@ -43,17 +43,18 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
 
				 
			
 
				 /* xfs_dir2_data.c */
			
 
				 #ifdef DEBUG
			
 
				-#define	xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp);
			
 
				+#define	xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
			
 
				 #else
			
 
				-#define	xfs_dir2_data_check(dp,bp)
			
 
				+#define	xfs_dir3_data_check(dp,bp)
			
 
				 #endif
			
 
				 
			
 
				-extern const struct xfs_buf_ops xfs_dir2_data_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
			
 
				 
			
 
				-extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
			
 
				-extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
			
 
				+extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
			
 
				-extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 		xfs_dablk_t bno, xfs_daddr_t mapped_bno);
			
 
				 
			
 
				 extern struct xfs_dir2_data_free *
			
@@ -61,7 +62,7 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
 
				 		struct xfs_dir2_data_unused *dup, int *loghead);
			
 
				 extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
			
 
				 		struct xfs_dir2_data_hdr *hdr, int *loghead);
			
 
				-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
			
 
				+extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
			
 
				 		struct xfs_buf **bpp);
			
 
				 extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				 		struct xfs_dir2_data_entry *dep);
			
@@ -77,24 +78,26 @@ extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
 
				 		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
			
 
				 
			
 
				 /* xfs_dir2_leaf.c */
			
 
				-extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
			
 
				+extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
			
 
				 
			
 
				-extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				+extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
			
 
				 		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
			
 
				 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
			
 
				 		struct xfs_buf *dbp);
			
 
				 extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
			
 
				-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
			
 
				-		struct xfs_buf *bp);
			
 
				-extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp,
			
 
				+extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,
			
 
				+		struct xfs_dir3_icleaf_hdr *leafhdr, struct xfs_buf *bp);
			
 
				+extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
			
 
				+		struct xfs_dir2_leaf_entry *ents, int *indexp,
			
 
				 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
			
 
				 extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
			
 
				 		size_t bufsize, xfs_off_t *offset, filldir_t filldir);
			
 
				-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
			
 
				-		struct xfs_buf **bpp, int magic);
			
 
				-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
			
 
				+		struct xfs_buf **bpp, __uint16_t magic);
			
 
				+extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				 		int first, int last);
			
 
				-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
			
 
				+extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp,
			
 
				 		struct xfs_buf *bp);
			
 
				 extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
			
 
				 extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
			
@@ -104,11 +107,18 @@ extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
 
				 extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
			
 
				 		struct xfs_buf *lbp, xfs_dir2_db_t db);
			
 
				 extern struct xfs_dir2_leaf_entry *
			
 
				-xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
			
 
				-		int lowstale, int highstale,
			
 
				-		int *lfloglow, int *lfloghigh);
			
 
				+xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
			
 
				+		struct xfs_dir2_leaf_entry *ents, int index, int compact,
			
 
				+		int lowstale, int highstale, int *lfloglow, int *lfloghigh);
			
 
				 extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
			
 
				 
			
 
				+extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
			
 
				+		struct xfs_dir2_leaf *from);
			
 
				+extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to,
			
 
				+		struct xfs_dir3_icleaf_hdr *from);
			
 
				+extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp,
			
 
				+		struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
			
 
				+
			
 
				 /* xfs_dir2_node.c */
			
 
				 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
			
 
				 		struct xfs_buf *lbp);
			
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -278,7 +278,7 @@ xfs_dir2_block_to_sf(
 
				 	 * Set up to loop over the block's entries.
			
 
				 	 */
			
 
				 	btp = xfs_dir2_block_tail_p(mp, hdr);
			
 
				-	ptr = (char *)(hdr + 1);
			
 
				+	ptr = (char *)xfs_dir3_data_entry_p(hdr);
			
 
				 	endptr = (char *)xfs_dir2_block_leaf_p(btp);
			
 
				 	sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				 	/*
			
@@ -535,7 +535,7 @@ xfs_dir2_sf_addname_hard(
 
				 	 * to insert the new entry.
			
 
				 	 * If it's going to end up at the end then oldsfep will point there.
			
 
				 	 */
			
 
				-	for (offset = XFS_DIR2_DATA_FIRST_OFFSET,
			
 
				+	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount),
			
 
				 	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
			
 
				 	      add_datasize = xfs_dir2_data_entsize(args->namelen),
			
 
				 	      eof = (char *)oldsfep == &buf[old_isize];
			
@@ -617,7 +617,7 @@ xfs_dir2_sf_addname_pick(
 
				 
			
 
				 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				 	size = xfs_dir2_data_entsize(args->namelen);
			
 
				-	offset = XFS_DIR2_DATA_FIRST_OFFSET;
			
 
				+	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
			
 
				 	sfep = xfs_dir2_sf_firstentry(sfp);
			
 
				 	holefit = 0;
			
 
				 	/*
			
@@ -688,7 +688,7 @@ xfs_dir2_sf_check(
 
				 	dp = args->dp;
			
 
				 
			
 
				 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
			
 
				-	offset = XFS_DIR2_DATA_FIRST_OFFSET;
			
 
				+	offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount);
			
 
				 	ino = xfs_dir2_sf_get_parent_ino(sfp);
			
 
				 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
			
 
				 
			
@@ -812,9 +812,9 @@ xfs_dir2_sf_getdents(
 
				 	 * mp->m_dirdatablk.
			
 
				 	 */
			
 
				 	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-					     XFS_DIR2_DATA_DOT_OFFSET);
			
 
				+					     XFS_DIR3_DATA_DOT_OFFSET(mp));
			
 
				 	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
			
 
				-						XFS_DIR2_DATA_DOTDOT_OFFSET);
			
 
				+						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
			
 
				 
			
 
				 	/*
			
 
				 	 * Put . entry unless we're starting past it.
			
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -36,6 +36,7 @@
 
				 #include "xfs_trans_space.h"
			
 
				 #include "xfs_trans_priv.h"
			
 
				 #include "xfs_qm.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
 
				 
			
 
				 /*
			
@@ -85,17 +86,23 @@ xfs_qm_dqdestroy(
 
				  */
			
 
				 void
			
 
				 xfs_qm_adjust_dqlimits(
			
 
				-	xfs_mount_t		*mp,
			
 
				-	xfs_disk_dquot_t	*d)
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dquot	*dq)
			
 
				 {
			
 
				-	xfs_quotainfo_t		*q = mp->m_quotainfo;
			
 
				+	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				+	struct xfs_disk_dquot	*d = &dq->q_core;
			
 
				+	int			prealloc = 0;
			
 
				 
			
 
				 	ASSERT(d->d_id);
			
 
				 
			
 
				-	if (q->qi_bsoftlimit && !d->d_blk_softlimit)
			
 
				+	if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
			
 
				 		d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
			
 
				-	if (q->qi_bhardlimit && !d->d_blk_hardlimit)
			
 
				+		prealloc = 1;
			
 
				+	}
			
 
				+	if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
			
 
				 		d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
			
 
				+		prealloc = 1;
			
 
				+	}
			
 
				 	if (q->qi_isoftlimit && !d->d_ino_softlimit)
			
 
				 		d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
			
 
				 	if (q->qi_ihardlimit && !d->d_ino_hardlimit)
			
@@ -104,6 +111,9 @@ xfs_qm_adjust_dqlimits(
 
				 		d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
			
 
				 	if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
			
 
				 		d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
			
 
				+
			
 
				+	if (prealloc)
			
 
				+		xfs_dquot_set_prealloc_limits(dq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -239,6 +249,8 @@ xfs_qm_init_dquot_blk(
 
				 		d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
			
 
				 		d->dd_diskdq.d_id = cpu_to_be32(curid);
			
 
				 		d->dd_diskdq.d_flags = type;
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
			
 
				 	}
			
 
				 
			
 
				 	xfs_trans_dquot_buf(tp, bp,
			
@@ -248,16 +260,103 @@ xfs_qm_init_dquot_blk(
 
				 	xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+/*
			
 
				+ * Initialize the dynamic speculative preallocation thresholds. The lo/hi
			
 
				+ * watermarks correspond to the soft and hard limits by default. If a soft limit
			
 
				+ * is not specified, we use 95% of the hard limit.
			
 
				+ */
			
 
				+void
			
 
				+xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
			
 
				+{
			
 
				+	__uint64_t space;
			
 
				+
			
 
				+	dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
			
 
				+	dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
			
 
				+	if (!dqp->q_prealloc_lo_wmark) {
			
 
				+		dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
			
 
				+		do_div(dqp->q_prealloc_lo_wmark, 100);
			
 
				+		dqp->q_prealloc_lo_wmark *= 95;
			
 
				+	}
			
 
				+
			
 
				+	space = dqp->q_prealloc_hi_wmark;
			
 
				+
			
 
				+	do_div(space, 100);
			
 
				+	dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
			
 
				+	dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
			
 
				+	dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xfs_dquot_buf_calc_crc(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
			
 
				+	int			i;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
			
 
				+		xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
			
 
				+				 offsetof(struct xfs_dqblk, dd_crc));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+STATIC bool
			
 
				+xfs_dquot_buf_verify_crc(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
			
 
				+	int			ndquots;
			
 
				+	int			i;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return true;
			
 
				+
			
 
				+	/*
			
 
				+	 * if we are in log recovery, the quota subsystem has not been
			
 
				+	 * initialised so we have no quotainfo structure. In that case, we need
			
 
				+	 * to manually calculate the number of dquots in the buffer.
			
 
				+	 */
			
 
				+	if (mp->m_quotainfo)
			
 
				+		ndquots = mp->m_quotainfo->qi_dqperchunk;
			
 
				+	else
			
 
				+		ndquots = xfs_qm_calc_dquots_per_chunk(mp,
			
 
				+					XFS_BB_TO_FSB(mp, bp->b_length));
			
 
				+
			
 
				+	for (i = 0; i < ndquots; i++, d++) {
			
 
				+		if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
			
 
				+				 offsetof(struct xfs_dqblk, dd_crc)))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+STATIC bool
			
 
				 xfs_dquot_buf_verify(
			
 
				+	struct xfs_mount	*mp,
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
			
 
				-	struct xfs_disk_dquot	*ddq;
			
 
				 	xfs_dqid_t		id = 0;
			
 
				+	int			ndquots;
			
 
				 	int			i;
			
 
				 
			
 
				+	/*
			
 
				+	 * if we are in log recovery, the quota subsystem has not been
			
 
				+	 * initialised so we have no quotainfo structure. In that case, we need
			
 
				+	 * to manually calculate the number of dquots in the buffer.
			
 
				+	 */
			
 
				+	if (mp->m_quotainfo)
			
 
				+		ndquots = mp->m_quotainfo->qi_dqperchunk;
			
 
				+	else
			
 
				+		ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length);
			
 
				+
			
 
				 	/*
			
 
				 	 * On the first read of the buffer, verify that each dquot is valid.
			
 
				 	 * We don't know what the id of the dquot is supposed to be, just that
			
@@ -265,8 +364,9 @@ xfs_dquot_buf_verify(
 
				 	 * first id is corrupt, then it will fail on the second dquot in the
			
 
				 	 * buffer so corruptions could point to the wrong dquot in this case.
			
 
				 	 */
			
 
				-	for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
			
 
				-		int	error;
			
 
				+	for (i = 0; i < ndquots; i++) {
			
 
				+		struct xfs_disk_dquot	*ddq;
			
 
				+		int			error;
			
 
				 
			
 
				 		ddq = &d[i].dd_diskdq;
			
 
				 
			
@@ -274,27 +374,37 @@ xfs_dquot_buf_verify(
 
				 			id = be32_to_cpu(ddq->d_id);
			
 
				 
			
 
				 		error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
			
 
				-					"xfs_dquot_read_verify");
			
 
				-		if (error) {
			
 
				-			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d);
			
 
				-			xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-			break;
			
 
				-		}
			
 
				+				       "xfs_dquot_buf_verify");
			
 
				+		if (error)
			
 
				+			return false;
			
 
				 	}
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_dquot_buf_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dquot_buf_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void
			
 
				 xfs_dquot_buf_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_dquot_buf_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	if (!xfs_dquot_buf_verify(mp, bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+	xfs_dquot_buf_calc_crc(mp, bp);
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_dquot_buf_ops = {
			
@@ -648,6 +758,9 @@ xfs_qm_dqread(
 
				 	dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
			
 
				 	dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
			
 
				 
			
 
				+	/* initialize the dquot speculative prealloc thresholds */
			
 
				+	xfs_dquot_set_prealloc_limits(dqp);
			
 
				+
			
 
				 	/* Mark the buf so that this will stay incore a little longer */
			
 
				 	xfs_buf_set_ref(bp, XFS_DQUOT_REF);
			
 
				 
			
@@ -1034,6 +1147,17 @@ xfs_qm_dqflush(
 
				 	xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
			
 
				 					&dqp->q_logitem.qli_item.li_lsn);
			
 
				 
			
 
				+	/*
			
 
				+	 * copy the lsn into the on-disk dquot now while we have the in memory
			
 
				+	 * dquot here. This can't be done later in the write verifier as we
			
 
				+	 * can't get access to the log item at that point in time.
			
 
				+	 */
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
			
 
				+
			
 
				+		dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Attach an iodone routine so that we can remove this dquot from the
			
 
				 	 * AIL and release the flush lock once the dquot is synced to disk.
			
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -32,6 +32,13 @@
 
				 struct xfs_mount;
			
 
				 struct xfs_trans;
			
 
				 
			
 
				+enum {
			
 
				+	XFS_QLOWSP_1_PCNT = 0,
			
 
				+	XFS_QLOWSP_3_PCNT,
			
 
				+	XFS_QLOWSP_5_PCNT,
			
 
				+	XFS_QLOWSP_MAX
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * The incore dquot structure
			
 
				  */
			
@@ -51,6 +58,9 @@ typedef struct xfs_dquot {
 
				 	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
			
 
				 	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
			
 
				 	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
			
 
				+	xfs_qcnt_t	 q_prealloc_lo_wmark;/* prealloc throttle wmark */
			
 
				+	xfs_qcnt_t	 q_prealloc_hi_wmark;/* prealloc disabled wmark */
			
 
				+	int64_t		 q_low_space[XFS_QLOWSP_MAX];
			
 
				 	struct mutex	 q_qlock;	/* quota lock */
			
 
				 	struct completion q_flush;	/* flush completion queue */
			
 
				 	atomic_t          q_pincount;	/* dquot pin count */
			
@@ -145,14 +155,16 @@ extern int		xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
 
				 extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
			
 
				 extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
			
 
				 					xfs_disk_dquot_t *);
			
 
				-extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
			
 
				-					xfs_disk_dquot_t *);
			
 
				+extern void		xfs_qm_adjust_dqlimits(struct xfs_mount *,
			
 
				+					       struct xfs_dquot *);
			
 
				 extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
			
 
				 					xfs_dqid_t, uint, uint, xfs_dquot_t **);
			
 
				 extern void		xfs_qm_dqput(xfs_dquot_t *);
			
 
				 
			
 
				 extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
			
 
				 
			
 
				+extern void		xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
			
 
				+
			
 
				 static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
			
 
				 {
			
 
				 	xfs_dqlock(dqp);
			
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,7 +66,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
 
				 	int i;
			
 
				 	int64_t fsid;
			
 
				 
			
 
				-	if (random32() % randfactor)
			
 
				+	if (prandom_u32() % randfactor)
			
 
				 		return 0;
			
 
				 
			
 
				 	memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
			
@@ -178,7 +178,7 @@ xfs_corruption_error(
 
				 	inst_t			*ra)
			
 
				 {
			
 
				 	if (level <= xfs_error_level)
			
 
				-		xfs_hex_dump(p, 16);
			
 
				+		xfs_hex_dump(p, 64);
			
 
				 	xfs_error_report(tag, level, mp, filename, linenum, ra);
			
 
				 	xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
			
 
				 }
			
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -50,9 +50,8 @@ xfs_efi_item_free(
 
				  * Freeing the efi requires that we remove it from the AIL if it has already
			
 
				  * been placed there. However, the EFI may not yet have been placed in the AIL
			
 
				  * when called by xfs_efi_release() from EFD processing due to the ordering of
			
 
				- * committed vs unpin operations in bulk insert operations. Hence the
			
 
				- * test_and_clear_bit(XFS_EFI_COMMITTED) to ensure only the last caller frees
			
 
				- * the EFI.
			
 
				+ * committed vs unpin operations in bulk insert operations. Hence the reference
			
 
				+ * count to ensure only the last caller frees the EFI.
			
 
				  */
			
 
				 STATIC void
			
 
				 __xfs_efi_release(
			
@@ -60,7 +59,7 @@ __xfs_efi_release(
 
				 {
			
 
				 	struct xfs_ail		*ailp = efip->efi_item.li_ailp;
			
 
				 
			
 
				-	if (!test_and_clear_bit(XFS_EFI_COMMITTED, &efip->efi_flags)) {
			
 
				+	if (atomic_dec_and_test(&efip->efi_refcount)) {
			
 
				 		spin_lock(&ailp->xa_lock);
			
 
				 		/* xfs_trans_ail_delete() drops the AIL lock. */
			
 
				 		xfs_trans_ail_delete(ailp, &efip->efi_item,
			
@@ -126,8 +125,8 @@ xfs_efi_item_pin(
 
				  * which the EFI is manipulated during a transaction.  If we are being asked to
			
 
				  * remove the EFI it's because the transaction has been cancelled and by
			
 
				  * definition that means the EFI cannot be in the AIL so remove it from the
			
 
				- * transaction and free it.  Otherwise coordinate with xfs_efi_release() (via
			
 
				- * XFS_EFI_COMMITTED) to determine who gets to free the EFI.
			
 
				+ * transaction and free it.  Otherwise coordinate with xfs_efi_release()
			
 
				+ * to determine who gets to free the EFI.
			
 
				  */
			
 
				 STATIC void
			
 
				 xfs_efi_item_unpin(
			
@@ -171,19 +170,13 @@ xfs_efi_item_unlock(
 
				 
			
 
				 /*
			
 
				  * The EFI is logged only once and cannot be moved in the log, so simply return
			
 
				- * the lsn at which it's been logged.  For bulk transaction committed
			
 
				- * processing, the EFI may be processed but not yet unpinned prior to the EFD
			
 
				- * being processed. Set the XFS_EFI_COMMITTED flag so this case can be detected
			
 
				- * when processing the EFD.
			
 
				+ * the lsn at which it's been logged.
			
 
				  */
			
 
				 STATIC xfs_lsn_t
			
 
				 xfs_efi_item_committed(
			
 
				 	struct xfs_log_item	*lip,
			
 
				 	xfs_lsn_t		lsn)
			
 
				 {
			
 
				-	struct xfs_efi_log_item	*efip = EFI_ITEM(lip);
			
 
				-
			
 
				-	set_bit(XFS_EFI_COMMITTED, &efip->efi_flags);
			
 
				 	return lsn;
			
 
				 }
			
 
				 
			
@@ -241,6 +234,7 @@ xfs_efi_init(
 
				 	efip->efi_format.efi_nextents = nextents;
			
 
				 	efip->efi_format.efi_id = (__psint_t)(void*)efip;
			
 
				 	atomic_set(&efip->efi_next_extent, 0);
			
 
				+	atomic_set(&efip->efi_refcount, 2);
			
 
				 
			
 
				 	return efip;
			
 
				 }
			
@@ -310,8 +304,13 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 
				 		uint			nextents)
			
 
				 {
			
 
				 	ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
			
 
				-	if (atomic_sub_and_test(nextents, &efip->efi_next_extent))
			
 
				+	if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
			
 
				 		__xfs_efi_release(efip);
			
 
				+
			
 
				+		/* recovery needs us to drop the EFI reference, too */
			
 
				+		if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
			
 
				+			__xfs_efi_release(efip);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
			
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -114,16 +114,20 @@ typedef struct xfs_efd_log_format_64 {
 
				  * Define EFI flag bits. Manipulated by set/clear/test_bit operators.
			
 
				  */
			
 
				 #define	XFS_EFI_RECOVERED	1
			
 
				-#define	XFS_EFI_COMMITTED	2
			
 
				 
			
 
				 /*
			
 
				- * This is the "extent free intention" log item.  It is used
			
 
				- * to log the fact that some extents need to be free.  It is
			
 
				- * used in conjunction with the "extent free done" log item
			
 
				- * described below.
			
 
				+ * This is the "extent free intention" log item.  It is used to log the fact
			
 
				+ * that some extents need to be free.  It is used in conjunction with the
			
 
				+ * "extent free done" log item described below.
			
 
				+ *
			
 
				+ * The EFI is reference counted so that it is not freed prior to both the EFI
			
 
				+ * and EFD being committed and unpinned. This ensures that when the last
			
 
				+ * reference goes away the EFI will always be in the AIL as it has been
			
 
				+ * unpinned, regardless of whether the EFD is processed before or after the EFI.
			
 
				  */
			
 
				 typedef struct xfs_efi_log_item {
			
 
				 	xfs_log_item_t		efi_item;
			
 
				+	atomic_t		efi_refcount;
			
 
				 	atomic_t		efi_next_extent;
			
 
				 	unsigned long		efi_flags;	/* misc flags */
			
 
				 	xfs_efi_log_format_t	efi_format;
			
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -890,7 +890,7 @@ xfs_dir_open(
 
				 	 */
			
 
				 	mode = xfs_ilock_map_shared(ip);
			
 
				 	if (ip->i_d.di_nextents > 0)
			
 
				-		xfs_dir2_data_readahead(NULL, ip, 0, -1);
			
 
				+		xfs_dir3_data_readahead(NULL, ip, 0, -1);
			
 
				 	xfs_iunlock(ip, mode);
			
 
				 	return 0;
			
 
				 }
			
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -247,6 +247,9 @@ xfs_growfs_data_private(
 
				 		tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
			
 
				 		agf->agf_freeblks = cpu_to_be32(tmpsize);
			
 
				 		agf->agf_longest = cpu_to_be32(tmpsize);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
			
 
				+
			
 
				 		error = xfs_bwrite(bp);
			
 
				 		xfs_buf_relse(bp);
			
 
				 		if (error)
			
@@ -265,6 +268,11 @@ xfs_growfs_data_private(
 
				 		}
			
 
				 
			
 
				 		agfl = XFS_BUF_TO_AGFL(bp);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+			agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
			
 
				+			agfl->agfl_seqno = cpu_to_be32(agno);
			
 
				+			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
			
 
				+		}
			
 
				 		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
			
 
				 			agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
			
 
				 
			
@@ -296,8 +304,11 @@ xfs_growfs_data_private(
 
				 		agi->agi_freecount = 0;
			
 
				 		agi->agi_newino = cpu_to_be32(NULLAGINO);
			
 
				 		agi->agi_dirino = cpu_to_be32(NULLAGINO);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
			
 
				 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
			
 
				 			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
			
 
				+
			
 
				 		error = xfs_bwrite(bp);
			
 
				 		xfs_buf_relse(bp);
			
 
				 		if (error)
			
@@ -316,7 +327,13 @@ xfs_growfs_data_private(
 
				 			goto error0;
			
 
				 		}
			
 
				 
			
 
				-		xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
			
 
				+						agno, XFS_BTREE_CRC_BLOCKS);
			
 
				+		else
			
 
				+			xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
			
 
				+						agno, 0);
			
 
				+
			
 
				 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
			
 
				 		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
			
 
				 		arec->ar_blockcount = cpu_to_be32(
			
@@ -339,7 +356,13 @@ xfs_growfs_data_private(
 
				 			goto error0;
			
 
				 		}
			
 
				 
			
 
				-		xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
			
 
				+						agno, XFS_BTREE_CRC_BLOCKS);
			
 
				+		else
			
 
				+			xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
			
 
				+						agno, 0);
			
 
				+
			
 
				 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
			
 
				 		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
			
 
				 		arec->ar_blockcount = cpu_to_be32(
			
@@ -363,7 +386,12 @@ xfs_growfs_data_private(
 
				 			goto error0;
			
 
				 		}
			
 
				 
			
 
				-		xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0);
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
			
 
				+						agno, XFS_BTREE_CRC_BLOCKS);
			
 
				+		else
			
 
				+			xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
			
 
				+						agno, 0);
			
 
				 
			
 
				 		error = xfs_bwrite(bp);
			
 
				 		xfs_buf_relse(bp);
			
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -36,6 +36,8 @@
 
				 #include "xfs_rtalloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_bmap.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -165,6 +167,7 @@ xfs_ialloc_inode_init(
 
				 	int			version;
			
 
				 	int			i, j;
			
 
				 	xfs_daddr_t		d;
			
 
				+	xfs_ino_t		ino = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * Loop over the new block(s), filling in the inodes.
			
@@ -183,13 +186,29 @@ xfs_ialloc_inode_init(
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Figure out what version number to use in the inodes we create.
			
 
				-	 * If the superblock version has caught up to the one that supports
			
 
				-	 * the new inode format, then use the new inode version.  Otherwise
			
 
				-	 * use the old version so that old kernels will continue to be
			
 
				-	 * able to use the file system.
			
 
				+	 * Figure out what version number to use in the inodes we create.  If
			
 
				+	 * the superblock version has caught up to the one that supports the new
			
 
				+	 * inode format, then use the new inode version.  Otherwise use the old
			
 
				+	 * version so that old kernels will continue to be able to use the file
			
 
				+	 * system.
			
 
				+	 *
			
 
				+	 * For v3 inodes, we also need to write the inode number into the inode,
			
 
				+	 * so calculate the first inode number of the chunk here as
			
 
				+	 * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
			
 
				+	 * across multiple filesystem blocks (such as a cluster) and so cannot
			
 
				+	 * be used in the cluster buffer loop below.
			
 
				+	 *
			
 
				+	 * Further, because we are writing the inode directly into the buffer
			
 
				+	 * and calculating a CRC on the entire inode, we have ot log the entire
			
 
				+	 * inode so that the entire range the CRC covers is present in the log.
			
 
				+	 * That means for v3 inode we log the entire buffer rather than just the
			
 
				+	 * inode cores.
			
 
				 	 */
			
 
				-	if (xfs_sb_version_hasnlink(&mp->m_sb))
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		version = 3;
			
 
				+		ino = XFS_AGINO_TO_INO(mp, agno,
			
 
				+				       XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
			
 
				+	} else if (xfs_sb_version_hasnlink(&mp->m_sb))
			
 
				 		version = 2;
			
 
				 	else
			
 
				 		version = 1;
			
@@ -212,17 +231,32 @@ xfs_ialloc_inode_init(
 
				 		 *	individual transactions causing a lot of log traffic.
			
 
				 		 */
			
 
				 		fbuf->b_ops = &xfs_inode_buf_ops;
			
 
				-		xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
			
 
				+		xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
			
 
				 		for (i = 0; i < ninodes; i++) {
			
 
				 			int	ioffset = i << mp->m_sb.sb_inodelog;
			
 
				-			uint	isize = sizeof(struct xfs_dinode);
			
 
				+			uint	isize = xfs_dinode_size(version);
			
 
				 
			
 
				 			free = xfs_make_iptr(mp, fbuf, i);
			
 
				 			free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
			
 
				 			free->di_version = version;
			
 
				 			free->di_gen = cpu_to_be32(gen);
			
 
				 			free->di_next_unlinked = cpu_to_be32(NULLAGINO);
			
 
				-			xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
			
 
				+
			
 
				+			if (version == 3) {
			
 
				+				free->di_ino = cpu_to_be64(ino);
			
 
				+				ino++;
			
 
				+				uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
			
 
				+				xfs_dinode_calc_crc(mp, free);
			
 
				+			} else {
			
 
				+				/* just log the inode core */
			
 
				+				xfs_trans_log_buf(tp, fbuf, ioffset,
			
 
				+						  ioffset + isize - 1);
			
 
				+			}
			
 
				+		}
			
 
				+		if (version == 3) {
			
 
				+			/* need to log the entire buffer */
			
 
				+			xfs_trans_log_buf(tp, fbuf, 0,
			
 
				+					  BBTOB(fbuf->b_length) - 1);
			
 
				 		}
			
 
				 		xfs_trans_inode_alloc_buf(tp, fbuf);
			
 
				 	}
			
@@ -369,7 +403,7 @@ xfs_ialloc_ag_alloc(
 
				 	 * number from being easily guessable.
			
 
				 	 */
			
 
				 	error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
			
 
				-			args.len, random32());
			
 
				+			args.len, prandom_u32());
			
 
				 
			
 
				 	if (error)
			
 
				 		return error;
			
@@ -1453,6 +1487,7 @@ xfs_ialloc_log_agi(
 
				 	/*
			
 
				 	 * Log the allocation group inode header buffer.
			
 
				 	 */
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
			
 
				 	xfs_trans_log_buf(tp, bp, first, last);
			
 
				 }
			
 
				 
			
@@ -1470,19 +1505,23 @@ xfs_check_agi_unlinked(
 
				 #define xfs_check_agi_unlinked(agi)
			
 
				 #endif
			
 
				 
			
 
				-static void
			
 
				+static bool
			
 
				 xfs_agi_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				 	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_agi	*agi = XFS_BUF_TO_AGI(bp);
			
 
				-	int		agi_ok;
			
 
				 
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb) &&
			
 
				+	    !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				 	/*
			
 
				 	 * Validate the magic number of the agi block.
			
 
				 	 */
			
 
				-	agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
			
 
				-		XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
			
 
				+	if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
			
 
				+		return false;
			
 
				+	if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
			
 
				+		return false;
			
 
				 
			
 
				 	/*
			
 
				 	 * during growfs operations, the perag is not fully initialised,
			
@@ -1490,30 +1529,52 @@ xfs_agi_verify(
 
				 	 * use it by using uncached buffers that don't have the perag attached
			
 
				 	 * so we can detect and avoid this problem.
			
 
				 	 */
			
 
				-	if (bp->b_pag)
			
 
				-		agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) ==
			
 
				-						bp->b_pag->pag_agno;
			
 
				+	if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
			
 
				+		return false;
			
 
				 
			
 
				-	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
			
 
				-			XFS_RANDOM_IALLOC_READ_AGI))) {
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				 	xfs_check_agi_unlinked(agi);
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_agi_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_agi_verify(bp);
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	int		agi_ok = 1;
			
 
				+
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+					  offsetof(struct xfs_agi, agi_crc));
			
 
				+	agi_ok = agi_ok && xfs_agi_verify(bp);
			
 
				+
			
 
				+	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
			
 
				+			XFS_RANDOM_IALLOC_READ_AGI))) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_agi_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_agi_verify(bp);
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!xfs_agi_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_agi, agi_crc));
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_agi_buf_ops = {
			
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -34,6 +34,7 @@
 
				 #include "xfs_alloc.h"
			
 
				 #include "xfs_error.h"
			
 
				 #include "xfs_trace.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 
			
 
				 
			
 
				 STATIC int
			
@@ -182,52 +183,88 @@ xfs_inobt_key_diff(
 
				 			  cur->bc_rec.i.ir_startino;
			
 
				 }
			
 
				 
			
 
				-void
			
 
				+static int
			
 
				 xfs_inobt_verify(
			
 
				 	struct xfs_buf		*bp)
			
 
				 {
			
 
				 	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
			
 
				+	struct xfs_perag	*pag = bp->b_pag;
			
 
				 	unsigned int		level;
			
 
				-	int			sblock_ok; /* block passes checks */
			
 
				 
			
 
				-	/* magic number and level verification */
			
 
				-	level = be16_to_cpu(block->bb_level);
			
 
				-	sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
			
 
				-		    level < mp->m_in_maxlevels;
			
 
				+	/*
			
 
				+	 * During growfs operations, we can't verify the exact owner as the
			
 
				+	 * perag is not fully initialised and hence not attached to the buffer.
			
 
				+	 *
			
 
				+	 * Similarly, during log recovery we will have a perag structure
			
 
				+	 * attached, but the agi information will not yet have been initialised
			
 
				+	 * from the on disk AGI. We don't currently use any of this information,
			
 
				+	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
			
 
				+	 * ever do.
			
 
				+	 */
			
 
				+	switch (block->bb_magic) {
			
 
				+	case cpu_to_be32(XFS_IBT_CRC_MAGIC):
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			return false;
			
 
				+		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
			
 
				+			return false;
			
 
				+		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
			
 
				+			return false;
			
 
				+		if (pag &&
			
 
				+		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
			
 
				+			return false;
			
 
				+		/* fall through */
			
 
				+	case cpu_to_be32(XFS_IBT_MAGIC):
			
 
				+		break;
			
 
				+	default:
			
 
				+		return 0;
			
 
				+	}
			
 
				 
			
 
				-	/* numrecs verification */
			
 
				-	sblock_ok = sblock_ok &&
			
 
				-		be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
			
 
				+	/* numrecs and level verification */
			
 
				+	level = be16_to_cpu(block->bb_level);
			
 
				+	if (level >= mp->m_in_maxlevels)
			
 
				+		return false;
			
 
				+	if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
			
 
				+		return false;
			
 
				 
			
 
				 	/* sibling pointer verification */
			
 
				-	sblock_ok = sblock_ok &&
			
 
				-		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
			
 
				-		block->bb_u.s.bb_leftsib &&
			
 
				-		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
			
 
				-		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
			
 
				-		block->bb_u.s.bb_rightsib;
			
 
				-
			
 
				-	if (!sblock_ok) {
			
 
				-		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
			
 
				-		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				-	}
			
 
				+	if (!block->bb_u.s.bb_leftsib ||
			
 
				+	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
			
 
				+	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
			
 
				+		return false;
			
 
				+	if (!block->bb_u.s.bb_rightsib ||
			
 
				+	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
			
 
				+	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_inobt_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_inobt_verify(bp);
			
 
				+	if (!(xfs_btree_sblock_verify_crc(bp) &&
			
 
				+	      xfs_inobt_verify(bp))) {
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 xfs_inobt_write_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_inobt_verify(bp);
			
 
				+	if (!xfs_inobt_verify(bp)) {
			
 
				+		trace_xfs_btree_corrupt(bp, _RET_IP_);
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
			
 
				+				     bp->b_target->bt_mount, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+	xfs_btree_sblock_calc_crc(bp);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_inobt_buf_ops = {
			
@@ -301,6 +338,8 @@ xfs_inobt_init_cursor(
 
				 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
			
 
				 
			
 
				 	cur->bc_ops = &xfs_inobt_ops;
			
 
				+	if (xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
			
 
				 
			
 
				 	cur->bc_private.a.agbp = agbp;
			
 
				 	cur->bc_private.a.agno = agno;
			
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -29,7 +29,8 @@ struct xfs_mount;
 
				 /*
			
 
				  * There is a btree for the inode map per allocation group.
			
 
				  */
			
 
				-#define	XFS_IBT_MAGIC	0x49414254	/* 'IABT' */
			
 
				+#define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */
			
 
				+#define	XFS_IBT_CRC_MAGIC	0x49414233	/* 'IAB3' */
			
 
				 
			
 
				 typedef	__uint64_t	xfs_inofree_t;
			
 
				 #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
			
@@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;
 
				 
			
 
				 /*
			
 
				  * Btree block header size depends on a superblock flag.
			
 
				- *
			
 
				- * (not quite yet, but soon)
			
 
				  */
			
 
				-#define XFS_INOBT_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN
			
 
				+#define XFS_INOBT_BLOCK_LEN(mp) \
			
 
				+	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
			
 
				+		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
			
 
				 
			
 
				 /*
			
 
				  * Record, key, and pointer address macros for btree blocks.
			
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -44,6 +44,7 @@
 
				 #include "xfs_quota.h"
			
 
				 #include "xfs_filestream.h"
			
 
				 #include "xfs_vnodeops.h"
			
 
				+#include "xfs_cksum.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				 
			
@@ -786,6 +787,7 @@ xfs_iformat_btree(
 
				 	xfs_dinode_t		*dip,
			
 
				 	int			whichfork)
			
 
				 {
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				 	xfs_bmdr_block_t	*dfp;
			
 
				 	xfs_ifork_t		*ifp;
			
 
				 	/* REFERENCED */
			
@@ -794,7 +796,7 @@ xfs_iformat_btree(
 
				 
			
 
				 	ifp = XFS_IFORK_PTR(ip, whichfork);
			
 
				 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
			
 
				-	size = XFS_BMAP_BROOT_SPACE(dfp);
			
 
				+	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
			
 
				 	nrecs = be16_to_cpu(dfp->bb_numrecs);
			
 
				 
			
 
				 	/*
			
@@ -805,14 +807,14 @@ xfs_iformat_btree(
 
				 	 * blocks.
			
 
				 	 */
			
 
				 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
			
 
				-			XFS_IFORK_MAXEXT(ip, whichfork) ||
			
 
				+					XFS_IFORK_MAXEXT(ip, whichfork) ||
			
 
				 		     XFS_BMDR_SPACE_CALC(nrecs) >
			
 
				-			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
			
 
				+					XFS_DFORK_SIZE(dip, mp, whichfork) ||
			
 
				 		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
			
 
				-		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
			
 
				-			(unsigned long long) ip->i_ino);
			
 
				+		xfs_warn(mp, "corrupt inode %Lu (btree).",
			
 
				+					(unsigned long long) ip->i_ino);
			
 
				 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
			
 
				-				 ip->i_mount, dip);
			
 
				+					 mp, dip);
			
 
				 		return XFS_ERROR(EFSCORRUPTED);
			
 
				 	}
			
 
				 
			
@@ -823,8 +825,7 @@ xfs_iformat_btree(
 
				 	 * Copy and convert from the on-disk structure
			
 
				 	 * to the in-memory structure.
			
 
				 	 */
			
 
				-	xfs_bmdr_to_bmbt(ip->i_mount, dfp,
			
 
				-			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
			
 
				+	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
			
 
				 			 ifp->if_broot, size);
			
 
				 	ifp->if_flags &= ~XFS_IFEXTENTS;
			
 
				 	ifp->if_flags |= XFS_IFBROOT;
			
@@ -866,6 +867,17 @@ xfs_dinode_from_disk(
 
				 	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
			
 
				 	to->di_flags	= be16_to_cpu(from->di_flags);
			
 
				 	to->di_gen	= be32_to_cpu(from->di_gen);
			
 
				+
			
 
				+	if (to->di_version == 3) {
			
 
				+		to->di_changecount = be64_to_cpu(from->di_changecount);
			
 
				+		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
			
 
				+		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
			
 
				+		to->di_flags2 = be64_to_cpu(from->di_flags2);
			
 
				+		to->di_ino = be64_to_cpu(from->di_ino);
			
 
				+		to->di_lsn = be64_to_cpu(from->di_lsn);
			
 
				+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 void
			
@@ -902,6 +914,17 @@ xfs_dinode_to_disk(
 
				 	to->di_dmstate = cpu_to_be16(from->di_dmstate);
			
 
				 	to->di_flags = cpu_to_be16(from->di_flags);
			
 
				 	to->di_gen = cpu_to_be32(from->di_gen);
			
 
				+
			
 
				+	if (from->di_version == 3) {
			
 
				+		to->di_changecount = cpu_to_be64(from->di_changecount);
			
 
				+		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
			
 
				+		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
			
 
				+		to->di_flags2 = cpu_to_be64(from->di_flags2);
			
 
				+		to->di_ino = cpu_to_be64(from->di_ino);
			
 
				+		to->di_lsn = cpu_to_be64(from->di_lsn);
			
 
				+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
			
 
				+		uuid_copy(&to->di_uuid, &from->di_uuid);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 STATIC uint
			
@@ -962,6 +985,47 @@ xfs_dic2xflags(
 
				 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
			
 
				 }
			
 
				 
			
 
				+static bool
			
 
				+xfs_dinode_verify(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_dinode	*dip)
			
 
				+{
			
 
				+	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
			
 
				+		return false;
			
 
				+
			
 
				+	/* only version 3 or greater inodes are extensively verified here */
			
 
				+	if (dip->di_version < 3)
			
 
				+		return true;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return false;
			
 
				+	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				+			      offsetof(struct xfs_dinode, di_crc)))
			
 
				+		return false;
			
 
				+	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
			
 
				+		return false;
			
 
				+	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_dinode_calc_crc(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_dinode	*dip)
			
 
				+{
			
 
				+	__uint32_t		crc;
			
 
				+
			
 
				+	if (dip->di_version < 3)
			
 
				+		return;
			
 
				+
			
 
				+	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
			
 
				+	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
			
 
				+			      offsetof(struct xfs_dinode, di_crc));
			
 
				+	dip->di_crc = xfs_end_cksum(crc);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Read the disk inode attributes into the in-core inode structure.
			
 
				  */
			
@@ -990,17 +1054,13 @@ xfs_iread(
 
				 	if (error)
			
 
				 		return error;
			
 
				 
			
 
				-	/*
			
 
				-	 * If we got something that isn't an inode it means someone
			
 
				-	 * (nfs or dmi) has a stale handle.
			
 
				-	 */
			
 
				-	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
			
 
				-#ifdef DEBUG
			
 
				-		xfs_alert(mp,
			
 
				-			"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
			
 
				-			__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
			
 
				-#endif /* DEBUG */
			
 
				-		error = XFS_ERROR(EINVAL);
			
 
				+	/* even unallocated inodes are verified */
			
 
				+	if (!xfs_dinode_verify(mp, ip, dip)) {
			
 
				+		xfs_alert(mp, "%s: validation failed for inode %lld failed",
			
 
				+				__func__, ip->i_ino);
			
 
				+
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
			
 
				+		error = XFS_ERROR(EFSCORRUPTED);
			
 
				 		goto out_brelse;
			
 
				 	}
			
 
				 
			
@@ -1022,10 +1082,20 @@ xfs_iread(
 
				 			goto out_brelse;
			
 
				 		}
			
 
				 	} else {
			
 
				+		/*
			
 
				+		 * Partial initialisation of the in-core inode. Just the bits
			
 
				+		 * that xfs_ialloc won't overwrite or relies on being correct.
			
 
				+		 */
			
 
				 		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
			
 
				 		ip->i_d.di_version = dip->di_version;
			
 
				 		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
			
 
				 		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
			
 
				+
			
 
				+		if (dip->di_version == 3) {
			
 
				+			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
			
 
				+			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
			
 
				+		}
			
 
				+
			
 
				 		/*
			
 
				 		 * Make sure to pull in the mode here as well in
			
 
				 		 * case the inode is released without being used.
			
@@ -1161,6 +1231,7 @@ xfs_ialloc(
 
				 	xfs_buf_t	**ialloc_context,
			
 
				 	xfs_inode_t	**ipp)
			
 
				 {
			
 
				+	struct xfs_mount *mp = tp->t_mountp;
			
 
				 	xfs_ino_t	ino;
			
 
				 	xfs_inode_t	*ip;
			
 
				 	uint		flags;
			
@@ -1187,7 +1258,7 @@ xfs_ialloc(
 
				 	 * This is because we're setting fields here we need
			
 
				 	 * to prevent others from looking at until we're done.
			
 
				 	 */
			
 
				-	error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
			
 
				+	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
			
 
				 			 XFS_ILOCK_EXCL, &ip);
			
 
				 	if (error)
			
 
				 		return error;
			
@@ -1208,7 +1279,7 @@ xfs_ialloc(
 
				 	 * the inode version number now.  This way we only do the conversion
			
 
				 	 * here rather than here and in the flush/logging code.
			
 
				 	 */
			
 
				-	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
			
 
				+	if (xfs_sb_version_hasnlink(&mp->m_sb) &&
			
 
				 	    ip->i_d.di_version == 1) {
			
 
				 		ip->i_d.di_version = 2;
			
 
				 		/*
			
@@ -1258,6 +1329,19 @@ xfs_ialloc(
 
				 	ip->i_d.di_dmevmask = 0;
			
 
				 	ip->i_d.di_dmstate = 0;
			
 
				 	ip->i_d.di_flags = 0;
			
 
				+
			
 
				+	if (ip->i_d.di_version == 3) {
			
 
				+		ASSERT(ip->i_d.di_ino == ino);
			
 
				+		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
			
 
				+		ip->i_d.di_crc = 0;
			
 
				+		ip->i_d.di_changecount = 1;
			
 
				+		ip->i_d.di_lsn = 0;
			
 
				+		ip->i_d.di_flags2 = 0;
			
 
				+		memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
			
 
				+		ip->i_d.di_crtime = ip->i_d.di_mtime;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				 	flags = XFS_ILOG_CORE;
			
 
				 	switch (mode & S_IFMT) {
			
 
				 	case S_IFIFO:
			
@@ -2037,7 +2121,7 @@ xfs_iroot_realloc(
 
				 		 * allocate it now and get out.
			
 
				 		 */
			
 
				 		if (ifp->if_broot_bytes == 0) {
			
 
				-			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
			
 
				+			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
			
 
				 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
			
 
				 			ifp->if_broot_bytes = (int)new_size;
			
 
				 			return;
			
@@ -2051,9 +2135,9 @@ xfs_iroot_realloc(
 
				 		 */
			
 
				 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
			
 
				 		new_max = cur_max + rec_diff;
			
 
				-		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
			
 
				+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
			
 
				-				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
			
 
				+				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
			
 
				 				KM_SLEEP | KM_NOFS);
			
 
				 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
			
 
				 						     ifp->if_broot_bytes);
			
@@ -2061,7 +2145,7 @@ xfs_iroot_realloc(
 
				 						     (int)new_size);
			
 
				 		ifp->if_broot_bytes = (int)new_size;
			
 
				 		ASSERT(ifp->if_broot_bytes <=
			
 
				-			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
			
 
				+			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
			
 
				 		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
			
 
				 		return;
			
 
				 	}
			
@@ -2076,7 +2160,7 @@ xfs_iroot_realloc(
 
				 	new_max = cur_max + rec_diff;
			
 
				 	ASSERT(new_max >= 0);
			
 
				 	if (new_max > 0)
			
 
				-		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
			
 
				+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
			
 
				 	else
			
 
				 		new_size = 0;
			
 
				 	if (new_size > 0) {
			
@@ -2084,7 +2168,8 @@ xfs_iroot_realloc(
 
				 		/*
			
 
				 		 * First copy over the btree block header.
			
 
				 		 */
			
 
				-		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
			
 
				+		memcpy(new_broot, ifp->if_broot,
			
 
				+			XFS_BMBT_BLOCK_LEN(ip->i_mount));
			
 
				 	} else {
			
 
				 		new_broot = NULL;
			
 
				 		ifp->if_flags &= ~XFS_IFBROOT;
			
@@ -2114,7 +2199,7 @@ xfs_iroot_realloc(
 
				 	ifp->if_broot = new_broot;
			
 
				 	ifp->if_broot_bytes = (int)new_size;
			
 
				 	ASSERT(ifp->if_broot_bytes <=
			
 
				-		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
			
 
				+		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
			
 
				 	return;
			
 
				 }
			
 
				 
			
@@ -2427,7 +2512,7 @@ xfs_iflush_fork(
 
				 			ASSERT(ifp->if_broot != NULL);
			
 
				 			ASSERT(ifp->if_broot_bytes <=
			
 
				 			       (XFS_IFORK_SIZE(ip, whichfork) +
			
 
				-				XFS_BROOT_SIZE_ADJ));
			
 
				+				XFS_BROOT_SIZE_ADJ(ip)));
			
 
				 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
			
 
				 				(xfs_bmdr_block_t *)cp,
			
 
				 				XFS_DFORK_SIZE(dip, mp, whichfork));
			
@@ -2715,20 +2800,18 @@ abort_out:
 
				 
			
 
				 STATIC int
			
 
				 xfs_iflush_int(
			
 
				-	xfs_inode_t		*ip,
			
 
				-	xfs_buf_t		*bp)
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	xfs_inode_log_item_t	*iip;
			
 
				-	xfs_dinode_t		*dip;
			
 
				-	xfs_mount_t		*mp;
			
 
				+	struct xfs_inode_log_item *iip = ip->i_itemp;
			
 
				+	struct xfs_dinode	*dip;
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				 
			
 
				 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
			
 
				 	ASSERT(xfs_isiflocked(ip));
			
 
				 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
			
 
				 	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
			
 
				-
			
 
				-	iip = ip->i_itemp;
			
 
				-	mp = ip->i_mount;
			
 
				+	ASSERT(iip != NULL && iip->ili_fields != 0);
			
 
				 
			
 
				 	/* set *dip = inode's place in the buffer */
			
 
				 	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
			
@@ -2789,9 +2872,9 @@ xfs_iflush_int(
 
				 	}
			
 
				 	/*
			
 
				 	 * bump the flush iteration count, used to detect flushes which
			
 
				-	 * postdate a log record during recovery.
			
 
				+	 * postdate a log record during recovery. This is redundant as we now
			
 
				+	 * log every change and hence this can't happen. Still, it doesn't hurt.
			
 
				 	 */
			
 
				-
			
 
				 	ip->i_d.di_flushiter++;
			
 
				 
			
 
				 	/*
			
@@ -2867,41 +2950,30 @@ xfs_iflush_int(
 
				 	 * need the AIL lock, because it is a 64 bit value that cannot be read
			
 
				 	 * atomically.
			
 
				 	 */
			
 
				-	if (iip != NULL && iip->ili_fields != 0) {
			
 
				-		iip->ili_last_fields = iip->ili_fields;
			
 
				-		iip->ili_fields = 0;
			
 
				-		iip->ili_logged = 1;
			
 
				+	iip->ili_last_fields = iip->ili_fields;
			
 
				+	iip->ili_fields = 0;
			
 
				+	iip->ili_logged = 1;
			
 
				 
			
 
				-		xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
			
 
				-					&iip->ili_item.li_lsn);
			
 
				+	xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
			
 
				+				&iip->ili_item.li_lsn);
			
 
				 
			
 
				-		/*
			
 
				-		 * Attach the function xfs_iflush_done to the inode's
			
 
				-		 * buffer.  This will remove the inode from the AIL
			
 
				-		 * and unlock the inode's flush lock when the inode is
			
 
				-		 * completely written to disk.
			
 
				-		 */
			
 
				-		xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
			
 
				+	/*
			
 
				+	 * Attach the function xfs_iflush_done to the inode's
			
 
				+	 * buffer.  This will remove the inode from the AIL
			
 
				+	 * and unlock the inode's flush lock when the inode is
			
 
				+	 * completely written to disk.
			
 
				+	 */
			
 
				+	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
			
 
				 
			
 
				-		ASSERT(bp->b_fspriv != NULL);
			
 
				-		ASSERT(bp->b_iodone != NULL);
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * We're flushing an inode which is not in the AIL and has
			
 
				-		 * not been logged.  For this case we can immediately drop
			
 
				-		 * the inode flush lock because we can avoid the whole
			
 
				-		 * AIL state thing.  It's OK to drop the flush lock now,
			
 
				-		 * because we've already locked the buffer and to do anything
			
 
				-		 * you really need both.
			
 
				-		 */
			
 
				-		if (iip != NULL) {
			
 
				-			ASSERT(iip->ili_logged == 0);
			
 
				-			ASSERT(iip->ili_last_fields == 0);
			
 
				-			ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
			
 
				-		}
			
 
				-		xfs_ifunlock(ip);
			
 
				-	}
			
 
				+	/* update the lsn in the on disk inode if required */
			
 
				+	if (ip->i_d.di_version == 3)
			
 
				+		dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
			
 
				+
			
 
				+	/* generate the checksum. */
			
 
				+	xfs_dinode_calc_crc(mp, dip);
			
 
				 
			
 
				+	ASSERT(bp->b_fspriv != NULL);
			
 
				+	ASSERT(bp->b_iodone != NULL);
			
 
				 	return 0;
			
 
				 
			
 
				 corrupt_out:
			
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -150,13 +150,38 @@ typedef struct xfs_icdinode {
 
				 	__uint16_t	di_dmstate;	/* DMIG state info */
			
 
				 	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
			
 
				 	__uint32_t	di_gen;		/* generation number */
			
 
				+
			
 
				+	/* di_next_unlinked is the only non-core field in the old dinode */
			
 
				+	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
			
 
				+
			
 
				+	/* start of the extended dinode, writable fields */
			
 
				+	__uint32_t	di_crc;		/* CRC of the inode */
			
 
				+	__uint64_t	di_changecount;	/* number of attribute changes */
			
 
				+	xfs_lsn_t	di_lsn;		/* flush sequence */
			
 
				+	__uint64_t	di_flags2;	/* more random flags */
			
 
				+	__uint8_t	di_pad2[16];	/* more padding for future expansion */
			
 
				+
			
 
				+	/* fields only written to during inode creation */
			
 
				+	xfs_ictimestamp_t di_crtime;	/* time created */
			
 
				+	xfs_ino_t	di_ino;		/* inode number */
			
 
				+	uuid_t		di_uuid;	/* UUID of the filesystem */
			
 
				+
			
 
				+	/* structure must be padded to 64 bit alignment */
			
 
				 } xfs_icdinode_t;
			
 
				 
			
 
				+static inline uint xfs_icdinode_size(int version)
			
 
				+{
			
 
				+	if (version == 3)
			
 
				+		return sizeof(struct xfs_icdinode);
			
 
				+	return offsetof(struct xfs_icdinode, di_next_unlinked);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Flags for xfs_ichgtime().
			
 
				  */
			
 
				 #define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
			
 
				 #define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */
			
 
				+#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */
			
 
				 
			
 
				 /*
			
 
				  * Per-fork incore inode flags.
			
@@ -180,10 +205,11 @@ typedef struct xfs_icdinode {
 
				 #define XFS_IFORK_DSIZE(ip) \
			
 
				 	(XFS_IFORK_Q(ip) ? \
			
 
				 		XFS_IFORK_BOFF(ip) : \
			
 
				-		XFS_LITINO((ip)->i_mount))
			
 
				+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
			
 
				 #define XFS_IFORK_ASIZE(ip) \
			
 
				 	(XFS_IFORK_Q(ip) ? \
			
 
				-		XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
			
 
				+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
			
 
				+			XFS_IFORK_BOFF(ip) : \
			
 
				 		0)
			
 
				 #define XFS_IFORK_SIZE(ip,w) \
			
 
				 	((w) == XFS_DATA_FORK ? \
			
@@ -555,6 +581,7 @@ int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
 
				 			       struct xfs_buf **, uint, uint);
			
 
				 int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
			
 
				 			  struct xfs_inode *, uint);
			
 
				+void		xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
			
 
				 void		xfs_dinode_to_disk(struct xfs_dinode *,
			
 
				 				   struct xfs_icdinode *);
			
 
				 void		xfs_idestroy_fork(struct xfs_inode *, int);
			
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -179,7 +179,7 @@ xfs_inode_item_format(
 
				 	nvecs	     = 1;
			
 
				 
			
 
				 	vecp->i_addr = &ip->i_d;
			
 
				-	vecp->i_len  = sizeof(struct xfs_icdinode);
			
 
				+	vecp->i_len  = xfs_icdinode_size(ip->i_d.di_version);
			
 
				 	vecp->i_type = XLOG_REG_TYPE_ICORE;
			
 
				 	vecp++;
			
 
				 	nvecs++;
			
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,6 +42,8 @@
 
				 #include "xfs_iomap.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_dquot_item.h"
			
 
				+#include "xfs_dquot.h"
			
 
				 
			
 
				 
			
 
				 #define XFS_WRITEIO_ALIGN(mp,off)	(((off) >> mp->m_writeio_log) \
			
@@ -362,10 +364,65 @@ xfs_iomap_eof_prealloc_initial_size(
 
				 	if (imap[0].br_startblock == HOLESTARTBLOCK)
			
 
				 		return 0;
			
 
				 	if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
			
 
				-		return imap[0].br_blockcount;
			
 
				+		return imap[0].br_blockcount << 1;
			
 
				 	return XFS_B_TO_FSB(mp, offset);
			
 
				 }
			
 
				 
			
 
				+STATIC bool
			
 
				+xfs_quota_need_throttle(
			
 
				+	struct xfs_inode *ip,
			
 
				+	int type,
			
 
				+	xfs_fsblock_t alloc_blocks)
			
 
				+{
			
 
				+	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
			
 
				+
			
 
				+	if (!dq || !xfs_this_quota_on(ip->i_mount, type))
			
 
				+		return false;
			
 
				+
			
 
				+	/* no hi watermark, no throttle */
			
 
				+	if (!dq->q_prealloc_hi_wmark)
			
 
				+		return false;
			
 
				+
			
 
				+	/* under the lo watermark, no throttle */
			
 
				+	if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+STATIC void
			
 
				+xfs_quota_calc_throttle(
			
 
				+	struct xfs_inode *ip,
			
 
				+	int type,
			
 
				+	xfs_fsblock_t *qblocks,
			
 
				+	int *qshift)
			
 
				+{
			
 
				+	int64_t freesp;
			
 
				+	int shift = 0;
			
 
				+	struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
			
 
				+
			
 
				+	/* over hi wmark, squash the prealloc completely */
			
 
				+	if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
			
 
				+		*qblocks = 0;
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
			
 
				+	if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
			
 
				+		shift = 2;
			
 
				+		if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
			
 
				+			shift += 2;
			
 
				+		if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
			
 
				+			shift += 2;
			
 
				+	}
			
 
				+
			
 
				+	/* only overwrite the throttle values if we are more aggressive */
			
 
				+	if ((freesp >> shift) < (*qblocks >> *qshift)) {
			
 
				+		*qblocks = freesp;
			
 
				+		*qshift = shift;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * If we don't have a user specified preallocation size, dynamically increase
			
 
				  * the preallocation size as the size of the file grows. Cap the maximum size
			
@@ -381,45 +438,89 @@ xfs_iomap_prealloc_size(
 
				 	int			nimaps)
			
 
				 {
			
 
				 	xfs_fsblock_t		alloc_blocks = 0;
			
 
				+	int			shift = 0;
			
 
				+	int64_t			freesp;
			
 
				+	xfs_fsblock_t		qblocks;
			
 
				+	int			qshift = 0;
			
 
				 
			
 
				 	alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
			
 
				 							   imap, nimaps);
			
 
				-	if (alloc_blocks > 0) {
			
 
				-		int shift = 0;
			
 
				-		int64_t freesp;
			
 
				-
			
 
				-		alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
			
 
				-					rounddown_pow_of_two(alloc_blocks));
			
 
				-
			
 
				-		xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
			
 
				-		freesp = mp->m_sb.sb_fdblocks;
			
 
				-		if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
			
 
				-			shift = 2;
			
 
				-			if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
			
 
				-				shift++;
			
 
				-			if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
			
 
				-				shift++;
			
 
				-			if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
			
 
				-				shift++;
			
 
				-			if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
			
 
				-				shift++;
			
 
				-		}
			
 
				-		if (shift)
			
 
				-			alloc_blocks >>= shift;
			
 
				+	if (!alloc_blocks)
			
 
				+		goto check_writeio;
			
 
				+	qblocks = alloc_blocks;
			
 
				 
			
 
				-		/*
			
 
				-		 * If we are still trying to allocate more space than is
			
 
				-		 * available, squash the prealloc hard. This can happen if we
			
 
				-		 * have a large file on a small filesystem and the above
			
 
				-		 * lowspace thresholds are smaller than MAXEXTLEN.
			
 
				-		 */
			
 
				-		while (alloc_blocks && alloc_blocks >= freesp)
			
 
				-			alloc_blocks >>= 4;
			
 
				+	/*
			
 
				+	 * MAXEXTLEN is not a power of two value but we round the prealloc down
			
 
				+	 * to the nearest power of two value after throttling. To prevent the
			
 
				+	 * round down from unconditionally reducing the maximum supported prealloc
			
 
				+	 * size, we round up first, apply appropriate throttling, round down and
			
 
				+	 * cap the value to MAXEXTLEN.
			
 
				+	 */
			
 
				+	alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
			
 
				+				       alloc_blocks);
			
 
				+
			
 
				+	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
			
 
				+	freesp = mp->m_sb.sb_fdblocks;
			
 
				+	if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
			
 
				+		shift = 2;
			
 
				+		if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
			
 
				+			shift++;
			
 
				+		if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
			
 
				+			shift++;
			
 
				+		if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
			
 
				+			shift++;
			
 
				+		if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
			
 
				+			shift++;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Check each quota to cap the prealloc size and provide a shift
			
 
				+	 * value to throttle with.
			
 
				+	 */
			
 
				+	if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
			
 
				+		xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift);
			
 
				+	if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
			
 
				+		xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift);
			
 
				+	if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
			
 
				+		xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift);
			
 
				+
			
 
				+	/*
			
 
				+	 * The final prealloc size is set to the minimum of free space available
			
 
				+	 * in each of the quotas and the overall filesystem.
			
 
				+	 *
			
 
				+	 * The shift throttle value is set to the maximum value as determined by
			
 
				+	 * the global low free space values and per-quota low free space values.
			
 
				+	 */
			
 
				+	alloc_blocks = MIN(alloc_blocks, qblocks);
			
 
				+	shift = MAX(shift, qshift);
			
 
				+
			
 
				+	if (shift)
			
 
				+		alloc_blocks >>= shift;
			
 
				+	/*
			
 
				+	 * rounddown_pow_of_two() returns an undefined result if we pass in
			
 
				+	 * alloc_blocks = 0.
			
 
				+	 */
			
 
				+	if (alloc_blocks)
			
 
				+		alloc_blocks = rounddown_pow_of_two(alloc_blocks);
			
 
				+	if (alloc_blocks > MAXEXTLEN)
			
 
				+		alloc_blocks = MAXEXTLEN;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are still trying to allocate more space than is
			
 
				+	 * available, squash the prealloc hard. This can happen if we
			
 
				+	 * have a large file on a small filesystem and the above
			
 
				+	 * lowspace thresholds are smaller than MAXEXTLEN.
			
 
				+	 */
			
 
				+	while (alloc_blocks && alloc_blocks >= freesp)
			
 
				+		alloc_blocks >>= 4;
			
 
				+
			
 
				+check_writeio:
			
 
				 	if (alloc_blocks < mp->m_writeio_blocks)
			
 
				 		alloc_blocks = mp->m_writeio_blocks;
			
 
				 
			
 
				+	trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
			
 
				+				      mp->m_writeio_blocks);
			
 
				+
			
 
				 	return alloc_blocks;
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -72,6 +72,7 @@
 
				 #include <linux/kthread.h>
			
 
				 #include <linux/freezer.h>
			
 
				 #include <linux/list_sort.h>
			
 
				+#include <linux/ratelimit.h>
			
 
				 
			
 
				 #include <asm/page.h>
			
 
				 #include <asm/div64.h>
			
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3485,7 +3485,7 @@ xlog_ticket_alloc(
 
				 	tic->t_curr_res		= unit_bytes;
			
 
				 	tic->t_cnt		= cnt;
			
 
				 	tic->t_ocnt		= cnt;
			
 
				-	tic->t_tid		= random32();
			
 
				+	tic->t_tid		= prandom_u32();
			
 
				 	tic->t_clientid		= client;
			
 
				 	tic->t_flags		= XLOG_TIC_INITED;
			
 
				 	tic->t_trans_type	= 0;
			
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -668,10 +668,6 @@ xlog_cil_push_foreground(
 
				  * transaction to the checkpoint context so we carry the busy extents through
			
 
				  * to checkpoint completion, and then unlock all the items in the transaction.
			
 
				  *
			
 
				- * For more specific information about the order of operations in
			
 
				- * xfs_log_commit_cil() please refer to the comments in
			
 
				- * xfs_trans_commit_iclog().
			
 
				- *
			
 
				  * Called with the context lock already held in read mode to lock out
			
 
				  * background commit, returns without it held once background commits are
			
 
				  * allowed again.
			
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -468,7 +468,6 @@ struct xfs_cil {
 
				  * threshold, yet give us plenty of space for aggregation on large logs.
			
 
				  */
			
 
				 #define XLOG_CIL_SPACE_LIMIT(log)	(log->l_logsize >> 3)
			
 
				-#define XLOG_CIL_HARD_SPACE_LIMIT(log)	(3 * (log->l_logsize >> 4))
			
 
				 
			
 
				 /*
			
 
				  * ticket grant locks, queues and accounting have their own cachlines
			
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -29,6 +29,7 @@
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_btree.h"
			
 
				 #include "xfs_dinode.h"
			
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_inode_item.h"
			
@@ -45,6 +46,14 @@
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				 
			
 
				+/* Need all the magic numbers and buffer ops structures from these headers */
			
 
				+#include "xfs_symlink.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_dir2_format.h"
			
 
				+#include "xfs_dir2_priv.h"
			
 
				+#include "xfs_attr_leaf.h"
			
 
				+#include "xfs_attr_remote.h"
			
 
				+
			
 
				 STATIC int
			
 
				 xlog_find_zeroed(
			
 
				 	struct xlog	*,
			
@@ -1785,6 +1794,7 @@ xlog_recover_do_inode_buffer(
 
				 	xfs_agino_t		*buffer_nextp;
			
 
				 
			
 
				 	trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
			
 
				+	bp->b_ops = &xfs_inode_buf_ops;
			
 
				 
			
 
				 	inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
			
 
				 	for (i = 0; i < inodes_per_buf; i++) {
			
@@ -1856,6 +1866,201 @@ xlog_recover_do_inode_buffer(
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Validate the recovered buffer is of the correct type and attach the
			
 
				+ * appropriate buffer operations to them for writeback. Magic numbers are in a
			
 
				+ * few places:
			
 
				+ *	the first 16 bits of the buffer (inode buffer, dquot buffer),
			
 
				+ *	the first 32 bits of the buffer (most blocks),
			
 
				+ *	inside a struct xfs_da_blkinfo at the start of the buffer.
			
 
				+ */
			
 
				+static void
			
 
				+xlog_recovery_validate_buf_type(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	xfs_buf_log_format_t	*buf_f)
			
 
				+{
			
 
				+	struct xfs_da_blkinfo	*info = bp->b_addr;
			
 
				+	__uint32_t		magic32;
			
 
				+	__uint16_t		magic16;
			
 
				+	__uint16_t		magicda;
			
 
				+
			
 
				+	magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
			
 
				+	magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
			
 
				+	magicda = be16_to_cpu(info->magic);
			
 
				+	switch (xfs_blft_from_flags(buf_f)) {
			
 
				+	case XFS_BLFT_BTREE_BUF:
			
 
				+		switch (magic32) {
			
 
				+		case XFS_ABTB_CRC_MAGIC:
			
 
				+		case XFS_ABTC_CRC_MAGIC:
			
 
				+		case XFS_ABTB_MAGIC:
			
 
				+		case XFS_ABTC_MAGIC:
			
 
				+			bp->b_ops = &xfs_allocbt_buf_ops;
			
 
				+			break;
			
 
				+		case XFS_IBT_CRC_MAGIC:
			
 
				+		case XFS_IBT_MAGIC:
			
 
				+			bp->b_ops = &xfs_inobt_buf_ops;
			
 
				+			break;
			
 
				+		case XFS_BMAP_CRC_MAGIC:
			
 
				+		case XFS_BMAP_MAGIC:
			
 
				+			bp->b_ops = &xfs_bmbt_buf_ops;
			
 
				+			break;
			
 
				+		default:
			
 
				+			xfs_warn(mp, "Bad btree block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		break;
			
 
				+	case XFS_BLFT_AGF_BUF:
			
 
				+		if (magic32 != XFS_AGF_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad AGF block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_agf_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_AGFL_BUF:
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			break;
			
 
				+		if (magic32 != XFS_AGFL_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad AGFL block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_agfl_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_AGI_BUF:
			
 
				+		if (magic32 != XFS_AGI_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad AGI block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_agi_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_UDQUOT_BUF:
			
 
				+	case XFS_BLFT_PDQUOT_BUF:
			
 
				+	case XFS_BLFT_GDQUOT_BUF:
			
 
				+#ifdef CONFIG_XFS_QUOTA
			
 
				+		if (magic16 != XFS_DQUOT_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad DQUOT block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dquot_buf_ops;
			
 
				+#else
			
 
				+		xfs_alert(mp,
			
 
				+	"Trying to recover dquots without QUOTA support built in!");
			
 
				+		ASSERT(0);
			
 
				+#endif
			
 
				+		break;
			
 
				+	case XFS_BLFT_DINO_BUF:
			
 
				+		/*
			
 
				+		 * we get here with inode allocation buffers, not buffers that
			
 
				+		 * track unlinked list changes.
			
 
				+		 */
			
 
				+		if (magic16 != XFS_DINODE_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad INODE block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_inode_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_SYMLINK_BUF:
			
 
				+		if (magic32 != XFS_SYMLINK_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad symlink block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DIR_BLOCK_BUF:
			
 
				+		if (magic32 != XFS_DIR2_BLOCK_MAGIC &&
			
 
				+		    magic32 != XFS_DIR3_BLOCK_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad dir block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dir3_block_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DIR_DATA_BUF:
			
 
				+		if (magic32 != XFS_DIR2_DATA_MAGIC &&
			
 
				+		    magic32 != XFS_DIR3_DATA_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad dir data magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dir3_data_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DIR_FREE_BUF:
			
 
				+		if (magic32 != XFS_DIR2_FREE_MAGIC &&
			
 
				+		    magic32 != XFS_DIR3_FREE_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad dir3 free magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dir3_free_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DIR_LEAF1_BUF:
			
 
				+		if (magicda != XFS_DIR2_LEAF1_MAGIC &&
			
 
				+		    magicda != XFS_DIR3_LEAF1_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad dir leaf1 magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DIR_LEAFN_BUF:
			
 
				+		if (magicda != XFS_DIR2_LEAFN_MAGIC &&
			
 
				+		    magicda != XFS_DIR3_LEAFN_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad dir leafn magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_dir3_leafn_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_DA_NODE_BUF:
			
 
				+		if (magicda != XFS_DA_NODE_MAGIC &&
			
 
				+		    magicda != XFS_DA3_NODE_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad da node magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_da3_node_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_ATTR_LEAF_BUF:
			
 
				+		if (magicda != XFS_ATTR_LEAF_MAGIC &&
			
 
				+		    magicda != XFS_ATTR3_LEAF_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad attr leaf magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_attr3_leaf_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_ATTR_RMT_BUF:
			
 
				+		if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+			break;
			
 
				+		if (magic32 != XFS_ATTR3_RMT_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad attr remote magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_attr3_rmt_buf_ops;
			
 
				+		break;
			
 
				+	case XFS_BLFT_SB_BUF:
			
 
				+		if (magic32 != XFS_SB_MAGIC) {
			
 
				+			xfs_warn(mp, "Bad SB block magic!");
			
 
				+			ASSERT(0);
			
 
				+			break;
			
 
				+		}
			
 
				+		bp->b_ops = &xfs_sb_buf_ops;
			
 
				+		break;
			
 
				+	default:
			
 
				+		xfs_warn(mp, "Unknown buffer type %d!",
			
 
				+			 xfs_blft_from_flags(buf_f));
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Perform a 'normal' buffer recovery.  Each logged region of the
			
 
				  * buffer should be copied over the corresponding region in the
			
@@ -1928,6 +2133,8 @@ xlog_recover_do_reg_buffer(
 
				 
			
 
				 	/* Shouldn't be any more regions */
			
 
				 	ASSERT(i == item->ri_total);
			
 
				+
			
 
				+	xlog_recovery_validate_buf_type(mp, bp, buf_f);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2213,6 +2420,7 @@ xlog_recover_inode_pass2(
 
				 	int			attr_index;
			
 
				 	uint			fields;
			
 
				 	xfs_icdinode_t		*dicp;
			
 
				+	uint			isize;
			
 
				 	int			need_free = 0;
			
 
				 
			
 
				 	if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
			
@@ -2238,7 +2446,7 @@ xlog_recover_inode_pass2(
 
				 	trace_xfs_log_recover_inode_recover(log, in_f);
			
 
				 
			
 
				 	bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
			
 
				-			  NULL);
			
 
				+			  &xfs_inode_buf_ops);
			
 
				 	if (!bp) {
			
 
				 		error = ENOMEM;
			
 
				 		goto error;
			
@@ -2349,7 +2557,8 @@ xlog_recover_inode_pass2(
 
				 		error = EFSCORRUPTED;
			
 
				 		goto error;
			
 
				 	}
			
 
				-	if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
			
 
				+	isize = xfs_icdinode_size(dicp->di_version);
			
 
				+	if (unlikely(item->ri_buf[1].i_len > isize)) {
			
 
				 		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
			
 
				 				     XFS_ERRLEVEL_LOW, mp, dicp);
			
 
				 		xfs_buf_relse(bp);
			
@@ -2361,13 +2570,13 @@ xlog_recover_inode_pass2(
 
				 	}
			
 
				 
			
 
				 	/* The core is in in-core format */
			
 
				-	xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
			
 
				+	xfs_dinode_to_disk(dip, dicp);
			
 
				 
			
 
				 	/* the rest is in on-disk format */
			
 
				-	if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
			
 
				-		memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
			
 
				-			item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
			
 
				-			item->ri_buf[1].i_len  - sizeof(struct xfs_icdinode));
			
 
				+	if (item->ri_buf[1].i_len > isize) {
			
 
				+		memcpy((char *)dip + isize,
			
 
				+			item->ri_buf[1].i_addr + isize,
			
 
				+			item->ri_buf[1].i_len - isize);
			
 
				 	}
			
 
				 
			
 
				 	fields = in_f->ilf_fields;
			
@@ -2451,6 +2660,9 @@ xlog_recover_inode_pass2(
 
				 	}
			
 
				 
			
 
				 write_inode_buffer:
			
 
				+	/* re-generate the checksum. */
			
 
				+	xfs_dinode_calc_crc(log->l_mp, dip);
			
 
				+
			
 
				 	ASSERT(bp->b_target->bt_mount == mp);
			
 
				 	bp->b_iodone = xlog_recover_iodone;
			
 
				 	xfs_buf_delwri_queue(bp, buffer_list);
			
@@ -2948,6 +3160,7 @@ xlog_recover_process_efi(
 
				 			 * This will pull the EFI from the AIL and
			
 
				 			 * free the memory associated with it.
			
 
				 			 */
			
 
				+			set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
			
 
				 			xfs_efi_release(efip, efip->efi_format.efi_nextents);
			
 
				 			return XFS_ERROR(EIO);
			
 
				 		}
			
@@ -3751,6 +3964,25 @@ xlog_recover(
 
				 			return error;
			
 
				 		}
			
 
				 
			
 
				+		/*
			
 
				+		 * Version 5 superblock log feature mask validation. We know the
			
 
				+		 * log is dirty so check if there are any unknown log features
			
 
				+		 * in what we need to recover. If there are unknown features
			
 
				+		 * (e.g. unsupported transactions, then simply reject the
			
 
				+		 * attempt at recovery before touching anything.
			
 
				+		 */
			
 
				+		if (XFS_SB_VERSION_NUM(&log->l_mp->m_sb) == XFS_SB_VERSION_5 &&
			
 
				+		    xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
			
 
				+					XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
			
 
				+			xfs_warn(log->l_mp,
			
 
				+"Superblock has unknown incompatible log features (0x%x) enabled.\n"
			
 
				+"The log can not be fully and/or safely recovered by this kernel.\n"
			
 
				+"Please recover the log on a kernel that supports the unknown features.",
			
 
				+				(log->l_mp->m_sb.sb_features_log_incompat &
			
 
				+					XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
			
 
				+			return EINVAL;
			
 
				+		}
			
 
				+
			
 
				 		xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
			
 
				 				log->l_mp->m_logname ? log->l_mp->m_logname
			
 
				 						     : "internal");
			
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -30,6 +30,32 @@ void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#define xfs_printk_ratelimited(func, dev, fmt, ...)		\
			
 
				+do {									\
			
 
				+	static DEFINE_RATELIMIT_STATE(_rs,				\
			
 
				+				      DEFAULT_RATELIMIT_INTERVAL,	\
			
 
				+				      DEFAULT_RATELIMIT_BURST);		\
			
 
				+	if (__ratelimit(&_rs))						\
			
 
				+		func(dev, fmt, ##__VA_ARGS__);			\
			
 
				+} while (0)
			
 
				+
			
 
				+#define xfs_emerg_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_emerg, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_alert_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_alert, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_crit_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_crit, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_err_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_err, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_warn_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_warn, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_notice_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_notice, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_info_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_info, dev, fmt, ##__VA_ARGS__)
			
 
				+#define xfs_debug_ratelimited(dev, fmt, ...)				\
			
 
				+	xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
			
 
				+
			
 
				 extern void assfail(char *expr, char *f, int l);
			
 
				 
			
 
				 extern void xfs_hex_dump(void *p, int length);
			
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,6 +43,8 @@
 
				 #include "xfs_utils.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				 
			
 
				 
			
 
				 #ifdef HAVE_PERCPU_SB
			
@@ -109,6 +111,14 @@ static const struct {
 
				     { offsetof(xfs_sb_t, sb_logsunit),	 0 },
			
 
				     { offsetof(xfs_sb_t, sb_features2),	 0 },
			
 
				     { offsetof(xfs_sb_t, sb_bad_features2), 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_features_compat), 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_features_incompat), 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_crc),	 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_pad),	 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_pquotino),	 0 },
			
 
				+    { offsetof(xfs_sb_t, sb_lsn),	 0 },
			
 
				     { sizeof(xfs_sb_t),			 0 }
			
 
				 };
			
 
				 
			
@@ -319,11 +329,54 @@ xfs_mount_validate_sb(
 
				 		return XFS_ERROR(EWRONGFS);
			
 
				 	}
			
 
				 
			
 
				+
			
 
				 	if (!xfs_sb_good_version(sbp)) {
			
 
				 		xfs_warn(mp, "bad version");
			
 
				 		return XFS_ERROR(EWRONGFS);
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Version 5 superblock feature mask validation. Reject combinations the
			
 
				+	 * kernel cannot support up front before checking anything else.
			
 
				+	 */
			
 
				+	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
			
 
				+		xfs_alert(mp,
			
 
				+"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
			
 
				+"Use of these features in this kernel is at your own risk!");
			
 
				+
			
 
				+		if (xfs_sb_has_compat_feature(sbp,
			
 
				+					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
			
 
				+			xfs_warn(mp,
			
 
				+"Superblock has unknown compatible features (0x%x) enabled.\n"
			
 
				+"Using a more recent kernel is recommended.",
			
 
				+				(sbp->sb_features_compat &
			
 
				+						XFS_SB_FEAT_COMPAT_UNKNOWN));
			
 
				+		}
			
 
				+
			
 
				+		if (xfs_sb_has_ro_compat_feature(sbp,
			
 
				+					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
			
 
				+			xfs_alert(mp,
			
 
				+"Superblock has unknown read-only compatible features (0x%x) enabled.",
			
 
				+				(sbp->sb_features_ro_compat &
			
 
				+						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
			
 
				+			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
			
 
				+				xfs_warn(mp,
			
 
				+"Attempted to mount read-only compatible filesystem read-write.\n"
			
 
				+"Filesystem can only be safely mounted read only.");
			
 
				+				return XFS_ERROR(EINVAL);
			
 
				+			}
			
 
				+		}
			
 
				+		if (xfs_sb_has_incompat_feature(sbp,
			
 
				+					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
			
 
				+			xfs_warn(mp,
			
 
				+"Superblock has unknown incompatible features (0x%x) enabled.\n"
			
 
				+"Filesystem can not be safely mounted by this kernel.",
			
 
				+				(sbp->sb_features_incompat &
			
 
				+						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
			
 
				+			return XFS_ERROR(EINVAL);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	if (unlikely(
			
 
				 	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
			
 
				 		xfs_warn(mp,
			
@@ -557,6 +610,14 @@ xfs_sb_from_disk(
 
				 	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
			
 
				 	to->sb_features2 = be32_to_cpu(from->sb_features2);
			
 
				 	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
			
 
				+	to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
			
 
				+	to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
			
 
				+	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
			
 
				+	to->sb_features_log_incompat =
			
 
				+				be32_to_cpu(from->sb_features_log_incompat);
			
 
				+	to->sb_pad = 0;
			
 
				+	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
			
 
				+	to->sb_lsn = be64_to_cpu(from->sb_lsn);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -612,13 +673,12 @@ xfs_sb_to_disk(
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void
			
 
				+static int
			
 
				 xfs_sb_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				 	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				 	struct xfs_sb	sb;
			
 
				-	int		error;
			
 
				 
			
 
				 	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
			
 
				 
			
@@ -626,16 +686,46 @@ xfs_sb_verify(
 
				 	 * Only check the in progress field for the primary superblock as
			
 
				 	 * mkfs.xfs doesn't clear it from secondary superblocks.
			
 
				 	 */
			
 
				-	error = xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR);
			
 
				-	if (error)
			
 
				-		xfs_buf_ioerror(bp, error);
			
 
				+	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * If the superblock has the CRC feature bit set or the CRC field is non-null,
			
 
				+ * check that the CRC is valid.  We check the CRC field is non-null because a
			
 
				+ * single bit error could clear the feature bit and unused parts of the
			
 
				+ * superblock are supposed to be zero. Hence a non-null crc field indicates that
			
 
				+ * we've potentially lost a feature bit and we should check it anyway.
			
 
				+ */
			
 
				 static void
			
 
				 xfs_sb_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	xfs_sb_verify(bp);
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				+	int		error;
			
 
				+
			
 
				+	/*
			
 
				+	 * open code the version check to avoid needing to convert the entire
			
 
				+	 * superblock from disk order just to check the version number
			
 
				+	 */
			
 
				+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
			
 
				+	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
			
 
				+						XFS_SB_VERSION_5) ||
			
 
				+	     dsb->sb_crc != 0)) {
			
 
				+
			
 
				+		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
			
 
				+				      offsetof(struct xfs_sb, sb_crc))) {
			
 
				+			error = EFSCORRUPTED;
			
 
				+			goto out_error;
			
 
				+		}
			
 
				+	}
			
 
				+	error = xfs_sb_verify(bp);
			
 
				+
			
 
				+out_error:
			
 
				+	if (error) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, error);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -648,11 +738,10 @@ static void
 
				 xfs_sb_quiet_read_verify(
			
 
				 	struct xfs_buf	*bp)
			
 
				 {
			
 
				-	struct xfs_sb	sb;
			
 
				+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
			
 
				 
			
 
				-	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
			
 
				 
			
 
				-	if (sb.sb_magicnum == XFS_SB_MAGIC) {
			
 
				+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
			
 
				 		/* XFS filesystem, verify noisily! */
			
 
				 		xfs_sb_read_verify(bp);
			
 
				 		return;
			
@@ -663,9 +752,27 @@ xfs_sb_quiet_read_verify(
 
				 
			
 
				 static void
			
 
				 xfs_sb_write_verify(
			
 
				-	struct xfs_buf	*bp)
			
 
				+	struct xfs_buf		*bp)
			
 
				 {
			
 
				-	xfs_sb_verify(bp);
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+	int			error;
			
 
				+
			
 
				+	error = xfs_sb_verify(bp);
			
 
				+	if (error) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, error);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (bip)
			
 
				+		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_sb, sb_crc));
			
 
				 }
			
 
				 
			
 
				 const struct xfs_buf_ops xfs_sb_buf_ops = {
			
@@ -687,7 +794,8 @@ int
 
				 xfs_readsb(xfs_mount_t *mp, int flags)
			
 
				 {
			
 
				 	unsigned int	sector_size;
			
 
				-	xfs_buf_t	*bp;
			
 
				+	struct xfs_buf	*bp;
			
 
				+	struct xfs_sb	*sbp = &mp->m_sb;
			
 
				 	int		error;
			
 
				 	int		loud = !(flags & XFS_MFSI_QUIET);
			
 
				 
			
@@ -714,7 +822,7 @@ reread:
 
				 	if (bp->b_error) {
			
 
				 		error = bp->b_error;
			
 
				 		if (loud)
			
 
				-			xfs_warn(mp, "SB validate failed");
			
 
				+			xfs_warn(mp, "SB validate failed with error %d.", error);
			
 
				 		goto release_buf;
			
 
				 	}
			
 
				 
			
@@ -726,10 +834,10 @@ reread:
 
				 	/*
			
 
				 	 * We must be able to do sector-sized and sector-aligned IO.
			
 
				 	 */
			
 
				-	if (sector_size > mp->m_sb.sb_sectsize) {
			
 
				+	if (sector_size > sbp->sb_sectsize) {
			
 
				 		if (loud)
			
 
				 			xfs_warn(mp, "device supports %u byte sectors (not %u)",
			
 
				-				sector_size, mp->m_sb.sb_sectsize);
			
 
				+				sector_size, sbp->sb_sectsize);
			
 
				 		error = ENOSYS;
			
 
				 		goto release_buf;
			
 
				 	}
			
@@ -738,15 +846,18 @@ reread:
 
				 	 * If device sector size is smaller than the superblock size,
			
 
				 	 * re-read the superblock so the buffer is correctly sized.
			
 
				 	 */
			
 
				-	if (sector_size < mp->m_sb.sb_sectsize) {
			
 
				+	if (sector_size < sbp->sb_sectsize) {
			
 
				 		xfs_buf_relse(bp);
			
 
				-		sector_size = mp->m_sb.sb_sectsize;
			
 
				+		sector_size = sbp->sb_sectsize;
			
 
				 		goto reread;
			
 
				 	}
			
 
				 
			
 
				 	/* Initialize per-cpu counters */
			
 
				 	xfs_icsb_reinit_counters(mp);
			
 
				 
			
 
				+	/* no need to be quiet anymore, so reset the buf ops */
			
 
				+	bp->b_ops = &xfs_sb_buf_ops;
			
 
				+
			
 
				 	mp->m_sb_bp = bp;
			
 
				 	xfs_buf_unlock(bp);
			
 
				 	return 0;
			
@@ -1633,6 +1744,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
 
				 	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
			
 
				 	first = xfs_sb_info[f].offset;
			
 
				 
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
			
 
				 	xfs_trans_log_buf(tp, bp, first, last);
			
 
				 }
			
 
				 
			
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -207,7 +207,6 @@ typedef struct xfs_mount {
 
				 						     trimming */
			
 
				 	__int64_t		m_update_flags;	/* sb flags we need to update
			
 
				 						   on the next remount,rw */
			
 
				-	struct shrinker		m_inode_shrink;	/* inode reclaim shrinker */
			
 
				 	int64_t			m_low_space[XFS_LOWSP_MAX];
			
 
				 						/* low free space thresholds */
			
 
				 
			
@@ -392,6 +391,7 @@ extern void	xfs_set_low_space_thresholds(struct xfs_mount *);
 
				 
			
 
				 #endif	/* __KERNEL__ */
			
 
				 
			
 
				+extern void	xfs_sb_calc_crc(struct xfs_buf	*);
			
 
				 extern void	xfs_mod_sb(struct xfs_trans *, __int64_t);
			
 
				 extern int	xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
			
 
				 					xfs_agnumber_t *);
			
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -617,6 +617,20 @@ xfs_qm_dqdetach(
 
				 	}
			
 
				 }
			
 
				 
			
 
				+int
			
 
				+xfs_qm_calc_dquots_per_chunk(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	unsigned int		nbblks)	/* basic block units */
			
 
				+{
			
 
				+	unsigned int	ndquots;
			
 
				+
			
 
				+	ASSERT(nbblks > 0);
			
 
				+	ndquots = BBTOB(nbblks);
			
 
				+	do_div(ndquots, sizeof(xfs_dqblk_t));
			
 
				+
			
 
				+	return ndquots;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * This initializes all the quota information that's kept in the
			
 
				  * mount structure
			
@@ -656,9 +670,8 @@ xfs_qm_init_quotainfo(
 
				 
			
 
				 	/* Precalc some constants */
			
 
				 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
			
 
				-	ASSERT(qinf->qi_dqchunklen);
			
 
				-	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
			
 
				-	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
			
 
				+	qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp,
			
 
				+							qinf->qi_dqchunklen);
			
 
				 
			
 
				 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
			
 
				 
			
@@ -897,6 +910,10 @@ xfs_qm_dqiter_bufs(
 
				 		if (error)
			
 
				 			break;
			
 
				 
			
 
				+		/*
			
 
				+		 * XXX(hch): need to figure out if it makes sense to validate
			
 
				+		 *	     the CRC here.
			
 
				+		 */
			
 
				 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
			
 
				 		xfs_buf_delwri_queue(bp, buffer_list);
			
 
				 		xfs_buf_relse(bp);
			
@@ -1057,7 +1074,7 @@ xfs_qm_quotacheck_dqadjust(
 
				 	 * There are no timers for the default values set in the root dquot.
			
 
				 	 */
			
 
				 	if (dqp->q_core.d_id) {
			
 
				-		xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
			
 
				+		xfs_qm_adjust_dqlimits(mp, dqp);
			
 
				 		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
			
 
				 	}
			
 
				 
			
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -75,6 +75,8 @@ typedef struct xfs_quotainfo {
 
				 	 &((qi)->qi_gquota_tree))
			
 
				 
			
 
				 
			
 
				+extern int	xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,
			
 
				+					     unsigned int nbblks);
			
 
				 extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
			
 
				 extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
			
 
				 			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
			
@@ -116,7 +118,7 @@ extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
 
				 extern int		xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
			
 
				 extern int		xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
			
 
				 					fs_disk_quota_t *);
			
 
				-extern int		xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
			
 
				+extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
			
 
				 					fs_disk_quota_t *);
			
 
				 extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
			
 
				 extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
			
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -472,15 +472,15 @@ xfs_qm_scall_getqstat(
 
				  */
			
 
				 int
			
 
				 xfs_qm_scall_setqlim(
			
 
				-	xfs_mount_t		*mp,
			
 
				+	struct xfs_mount	*mp,
			
 
				 	xfs_dqid_t		id,
			
 
				 	uint			type,
			
 
				 	fs_disk_quota_t		*newlim)
			
 
				 {
			
 
				 	struct xfs_quotainfo	*q = mp->m_quotainfo;
			
 
				-	xfs_disk_dquot_t	*ddq;
			
 
				-	xfs_dquot_t		*dqp;
			
 
				-	xfs_trans_t		*tp;
			
 
				+	struct xfs_disk_dquot	*ddq;
			
 
				+	struct xfs_dquot	*dqp;
			
 
				+	struct xfs_trans	*tp;
			
 
				 	int			error;
			
 
				 	xfs_qcnt_t		hard, soft;
			
 
				 
			
@@ -529,6 +529,7 @@ xfs_qm_scall_setqlim(
 
				 	if (hard == 0 || hard >= soft) {
			
 
				 		ddq->d_blk_hardlimit = cpu_to_be64(hard);
			
 
				 		ddq->d_blk_softlimit = cpu_to_be64(soft);
			
 
				+		xfs_dquot_set_prealloc_limits(dqp);
			
 
				 		if (id == 0) {
			
 
				 			q->qi_bhardlimit = hard;
			
 
				 			q->qi_bsoftlimit = soft;
			
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -77,7 +77,14 @@ typedef struct	xfs_disk_dquot {
 
				  */
			
 
				 typedef struct xfs_dqblk {
			
 
				 	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
			
 
				-	char		  dd_fill[32];	/* filling for posterity */
			
 
				+	char		  dd_fill[4];	/* filling for posterity */
			
 
				+
			
 
				+	/*
			
 
				+	 * These two are only present on filesystems with the CRC bits set.
			
 
				+	 */
			
 
				+	__be32		  dd_crc;	/* checksum */
			
 
				+	__be64		  dd_lsn;	/* last modification in log */
			
 
				+	uuid_t		  dd_uuid;	/* location information */
			
 
				 } xfs_dqblk_t;
			
 
				 
			
 
				 /*
			
@@ -380,5 +387,7 @@ extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
 
				 				xfs_dqid_t, uint, uint, char *);
			
 
				 extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
			
 
				 
			
 
				+extern const struct xfs_buf_ops xfs_dquot_buf_ops;
			
 
				+
			
 
				 #endif	/* __KERNEL__ */
			
 
				 #endif	/* __XFS_QUOTA_H__ */
			
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -32,6 +32,7 @@ struct xfs_mount;
 
				 #define	XFS_SB_VERSION_2	2		/* 6.2 - attributes */
			
 
				 #define	XFS_SB_VERSION_3	3		/* 6.2 - new inode version */
			
 
				 #define	XFS_SB_VERSION_4	4		/* 6.2+ - bitmask version */
			
 
				+#define	XFS_SB_VERSION_5	5		/* CRC enabled filesystem */
			
 
				 #define	XFS_SB_VERSION_NUMBITS		0x000f
			
 
				 #define	XFS_SB_VERSION_ALLFBITS		0xfff0
			
 
				 #define	XFS_SB_VERSION_SASHFBITS	0xf000
			
@@ -161,6 +162,20 @@ typedef struct xfs_sb {
 
				 	 */
			
 
				 	__uint32_t	sb_bad_features2;
			
 
				 
			
 
				+	/* version 5 superblock fields start here */
			
 
				+
			
 
				+	/* feature masks */
			
 
				+	__uint32_t	sb_features_compat;
			
 
				+	__uint32_t	sb_features_ro_compat;
			
 
				+	__uint32_t	sb_features_incompat;
			
 
				+	__uint32_t	sb_features_log_incompat;
			
 
				+
			
 
				+	__uint32_t	sb_crc;		/* superblock crc */
			
 
				+	__uint32_t	sb_pad;
			
 
				+
			
 
				+	xfs_ino_t	sb_pquotino;	/* project quota inode */
			
 
				+	xfs_lsn_t	sb_lsn;		/* last write sequence */
			
 
				+
			
 
				 	/* must be padded to 64 bit alignment */
			
 
				 } xfs_sb_t;
			
 
				 
			
@@ -229,7 +244,21 @@ typedef struct xfs_dsb {
 
				 	 * for features2 bits. Easiest just to mark it bad and not use
			
 
				 	 * it for anything else.
			
 
				 	 */
			
 
				-	__be32	sb_bad_features2;
			
 
				+	__be32		sb_bad_features2;
			
 
				+
			
 
				+	/* version 5 superblock fields start here */
			
 
				+
			
 
				+	/* feature masks */
			
 
				+	__be32		sb_features_compat;
			
 
				+	__be32		sb_features_ro_compat;
			
 
				+	__be32		sb_features_incompat;
			
 
				+	__be32		sb_features_log_incompat;
			
 
				+
			
 
				+	__le32		sb_crc;		/* superblock crc */
			
 
				+	__be32		sb_pad;
			
 
				+
			
 
				+	__be64		sb_pquotino;	/* project quota inode */
			
 
				+	__be64		sb_lsn;		/* last write sequence */
			
 
				 
			
 
				 	/* must be padded to 64 bit alignment */
			
 
				 } xfs_dsb_t;
			
@@ -250,7 +279,10 @@ typedef enum {
 
				 	XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
			
 
				 	XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
			
 
				 	XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
			
 
				-	XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2,
			
 
				+	XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
			
 
				+	XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
			
 
				+	XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
			
 
				+	XFS_SBS_PQUOTINO, XFS_SBS_LSN,
			
 
				 	XFS_SBS_FIELDCOUNT
			
 
				 } xfs_sb_field_t;
			
 
				 
			
@@ -276,6 +308,12 @@ typedef enum {
 
				 #define XFS_SB_FDBLOCKS		XFS_SB_MVAL(FDBLOCKS)
			
 
				 #define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
			
 
				 #define XFS_SB_BAD_FEATURES2	XFS_SB_MVAL(BAD_FEATURES2)
			
 
				+#define XFS_SB_FEATURES_COMPAT	XFS_SB_MVAL(FEATURES_COMPAT)
			
 
				+#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
			
 
				+#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
			
 
				+#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
			
 
				+#define XFS_SB_CRC		XFS_SB_MVAL(CRC)
			
 
				+#define XFS_SB_PQUOTINO		XFS_SB_MVAL(PQUOTINO)
			
 
				 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
			
 
				 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
			
 
				 #define	XFS_SB_MOD_BITS		\
			
@@ -283,7 +321,9 @@ typedef enum {
 
				 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
			
 
				 	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
			
 
				 	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
			
 
				-	 XFS_SB_BAD_FEATURES2)
			
 
				+	 XFS_SB_BAD_FEATURES2 | XFS_SB_FEATURES_COMPAT | \
			
 
				+	 XFS_SB_FEATURES_RO_COMPAT | XFS_SB_FEATURES_INCOMPAT | \
			
 
				+	 XFS_SB_FEATURES_LOG_INCOMPAT | XFS_SB_PQUOTINO)
			
 
				 
			
 
				 
			
 
				 /*
			
@@ -325,6 +365,8 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 
				 
			
 
				 		return 1;
			
 
				 	}
			
 
				+	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
			
 
				+		return 1;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -365,7 +407,7 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
 
				 {
			
 
				 	return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
			
 
				 		sbp->sb_versionnum == XFS_SB_VERSION_3 ||
			
 
				-		(XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+		(XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
			
 
				 }
			
 
				 
			
@@ -373,7 +415,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 
				 {
			
 
				 	if (sbp->sb_versionnum == XFS_SB_VERSION_1)
			
 
				 		sbp->sb_versionnum = XFS_SB_VERSION_2;
			
 
				-	else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
			
 
				+	else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
			
 
				 		sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
			
 
				 	else
			
 
				 		sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
			
@@ -382,7 +424,7 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 
				 static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
			
 
				 {
			
 
				 	return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
			
 
				-		 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+		 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		  (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
			
 
				 }
			
 
				 
			
@@ -396,13 +438,13 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
 
				 
			
 
				 static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		(sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
			
 
				 }
			
 
				 
			
 
				 static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
			
 
				+	if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4)
			
 
				 		sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
			
 
				 	else
			
 
				 		sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
			
@@ -411,13 +453,14 @@ static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
 
				 
			
 
				 static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				-		(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				+		(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		(sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
			
 
				 }
			
 
				 
			
@@ -429,38 +472,42 @@ static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
 
				 
			
 
				 static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				-		(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+		(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				-		(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				+		(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				-		(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+		(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		(sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 &&
			
 
				 		(sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				-		(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
			
 
				+		(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -475,14 +522,16 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 
				 
			
 
				 static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return xfs_sb_version_hasmorebits(sbp) &&
			
 
				-		(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (xfs_sb_version_hasmorebits(sbp) &&
			
 
				+		(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return xfs_sb_version_hasmorebits(sbp) &&
			
 
				-		(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (xfs_sb_version_hasmorebits(sbp) &&
			
 
				+		(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));
			
 
				 }
			
 
				 
			
 
				 static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
			
@@ -500,14 +549,73 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
 
				 
			
 
				 static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return xfs_sb_version_hasmorebits(sbp) &&
			
 
				-		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT);
			
 
				+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||
			
 
				+	       (xfs_sb_version_hasmorebits(sbp) &&
			
 
				+		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
			
 
				 }
			
 
				 
			
 
				 static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
			
 
				 {
			
 
				-	return (xfs_sb_version_hasmorebits(sbp) &&
			
 
				-		(sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT));
			
 
				+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Extended v5 superblock feature masks. These are to be used for new v5
			
 
				+ * superblock features only.
			
 
				+ *
			
 
				+ * Compat features are new features that old kernels will not notice or affect
			
 
				+ * and so can mount read-write without issues.
			
 
				+ *
			
 
				+ * RO-Compat (read only) are features that old kernels can read but will break
			
 
				+ * if they write. Hence only read-only mounts of such filesystems are allowed on
			
 
				+ * kernels that don't support the feature bit.
			
 
				+ *
			
 
				+ * InCompat features are features which old kernels will not understand and so
			
 
				+ * must not mount.
			
 
				+ *
			
 
				+ * Log-InCompat features are for changes to log formats or new transactions that
			
 
				+ * can't be replayed on older kernels. The fields are set when the filesystem is
			
 
				+ * mounted, and a clean unmount clears the fields.
			
 
				+ */
			
 
				+#define XFS_SB_FEAT_COMPAT_ALL 0
			
 
				+#define XFS_SB_FEAT_COMPAT_UNKNOWN	~XFS_SB_FEAT_COMPAT_ALL
			
 
				+static inline bool
			
 
				+xfs_sb_has_compat_feature(
			
 
				+	struct xfs_sb	*sbp,
			
 
				+	__uint32_t	feature)
			
 
				+{
			
 
				+	return (sbp->sb_features_compat & feature) != 0;
			
 
				+}
			
 
				+
			
 
				+#define XFS_SB_FEAT_RO_COMPAT_ALL 0
			
 
				+#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
			
 
				+static inline bool
			
 
				+xfs_sb_has_ro_compat_feature(
			
 
				+	struct xfs_sb	*sbp,
			
 
				+	__uint32_t	feature)
			
 
				+{
			
 
				+	return (sbp->sb_features_ro_compat & feature) != 0;
			
 
				+}
			
 
				+
			
 
				+#define XFS_SB_FEAT_INCOMPAT_ALL 0
			
 
				+#define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
			
 
				+static inline bool
			
 
				+xfs_sb_has_incompat_feature(
			
 
				+	struct xfs_sb	*sbp,
			
 
				+	__uint32_t	feature)
			
 
				+{
			
 
				+	return (sbp->sb_features_incompat & feature) != 0;
			
 
				+}
			
 
				+
			
 
				+#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
			
 
				+#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_LOG_ALL
			
 
				+static inline bool
			
 
				+xfs_sb_has_incompat_log_feature(
			
 
				+	struct xfs_sb	*sbp,
			
 
				+	__uint32_t	feature)
			
 
				+{
			
 
				+	return (sbp->sb_features_log_incompat & feature) != 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -0,0 +1,730 @@
 
				+/*
			
 
				+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2012-2013 Red Hat, Inc.
			
 
				+ * All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#include "xfs.h"
			
 
				+#include "xfs_fs.h"
			
 
				+#include "xfs_types.h"
			
 
				+#include "xfs_bit.h"
			
 
				+#include "xfs_log.h"
			
 
				+#include "xfs_trans.h"
			
 
				+#include "xfs_sb.h"
			
 
				+#include "xfs_ag.h"
			
 
				+#include "xfs_dir2.h"
			
 
				+#include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				+#include "xfs_bmap_btree.h"
			
 
				+#include "xfs_ialloc_btree.h"
			
 
				+#include "xfs_dinode.h"
			
 
				+#include "xfs_inode.h"
			
 
				+#include "xfs_inode_item.h"
			
 
				+#include "xfs_itable.h"
			
 
				+#include "xfs_ialloc.h"
			
 
				+#include "xfs_alloc.h"
			
 
				+#include "xfs_bmap.h"
			
 
				+#include "xfs_error.h"
			
 
				+#include "xfs_quota.h"
			
 
				+#include "xfs_utils.h"
			
 
				+#include "xfs_trans_space.h"
			
 
				+#include "xfs_log_priv.h"
			
 
				+#include "xfs_trace.h"
			
 
				+#include "xfs_symlink.h"
			
 
				+#include "xfs_cksum.h"
			
 
				+#include "xfs_buf_item.h"
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Each contiguous block has a header, so it is not just a simple pathlen
			
 
				+ * to FSB conversion.
			
 
				+ */
			
 
				+int
			
 
				+xfs_symlink_blocks(
			
 
				+	struct xfs_mount *mp,
			
 
				+	int		pathlen)
			
 
				+{
			
 
				+	int		fsblocks = 0;
			
 
				+	int		len = pathlen;
			
 
				+
			
 
				+	do {
			
 
				+		fsblocks++;
			
 
				+		len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
			
 
				+	} while (len > 0);
			
 
				+
			
 
				+	ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
			
 
				+	return fsblocks;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+xfs_symlink_hdr_set(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return 0;
			
 
				+
			
 
				+	dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
			
 
				+	dsl->sl_offset = cpu_to_be32(offset);
			
 
				+	dsl->sl_bytes = cpu_to_be32(size);
			
 
				+	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
			
 
				+	dsl->sl_owner = cpu_to_be64(ino);
			
 
				+	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
			
 
				+	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+
			
 
				+	return sizeof(struct xfs_dsymlink_hdr);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Checking of the symlink header is split into two parts. the verifier does
			
 
				+ * CRC, location and bounds checking, the unpacking function checks the path
			
 
				+ * parameters and owner.
			
 
				+ */
			
 
				+bool
			
 
				+xfs_symlink_hdr_ok(
			
 
				+	struct xfs_mount	*mp,
			
 
				+	xfs_ino_t		ino,
			
 
				+	uint32_t		offset,
			
 
				+	uint32_t		size,
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				+
			
 
				+	if (offset != be32_to_cpu(dsl->sl_offset))
			
 
				+		return false;
			
 
				+	if (size != be32_to_cpu(dsl->sl_bytes))
			
 
				+		return false;
			
 
				+	if (ino != be64_to_cpu(dsl->sl_owner))
			
 
				+		return false;
			
 
				+
			
 
				+	/* ok */
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static bool
			
 
				+xfs_symlink_verify(
			
 
				+	struct xfs_buf		*bp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return false;
			
 
				+	if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
			
 
				+		return false;
			
 
				+	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
			
 
				+		return false;
			
 
				+	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
			
 
				+		return false;
			
 
				+	if (be32_to_cpu(dsl->sl_offset) +
			
 
				+				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
			
 
				+		return false;
			
 
				+	if (dsl->sl_owner == 0)
			
 
				+		return false;
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_symlink_read_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
			
 
				+	    !xfs_symlink_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+xfs_symlink_write_verify(
			
 
				+	struct xfs_buf	*bp)
			
 
				+{
			
 
				+	struct xfs_mount *mp = bp->b_target->bt_mount;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	/* no verification of non-crc buffers */
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb))
			
 
				+		return;
			
 
				+
			
 
				+	if (!xfs_symlink_verify(bp)) {
			
 
				+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			
 
				+		xfs_buf_ioerror(bp, EFSCORRUPTED);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (bip) {
			
 
				+		struct xfs_dsymlink_hdr *dsl = bp->b_addr;
			
 
				+		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
			
 
				+	}
			
 
				+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
			
 
				+			 offsetof(struct xfs_dsymlink_hdr, sl_crc));
			
 
				+}
			
 
				+
			
 
				+const struct xfs_buf_ops xfs_symlink_buf_ops = {
			
 
				+	.verify_read = xfs_symlink_read_verify,
			
 
				+	.verify_write = xfs_symlink_write_verify,
			
 
				+};
			
 
				+
			
 
				+void
			
 
				+xfs_symlink_local_to_remote(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	struct xfs_inode	*ip,
			
 
				+	struct xfs_ifork	*ifp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	char			*buf;
			
 
				+
			
 
				+	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+		bp->b_ops = NULL;
			
 
				+		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * As this symlink fits in an inode literal area, it must also fit in
			
 
				+	 * the smallest buffer the filesystem supports.
			
 
				+	 */
			
 
				+	ASSERT(BBTOB(bp->b_length) >=
			
 
				+			ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
			
 
				+
			
 
				+	bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+
			
 
				+	buf = bp->b_addr;
			
 
				+	buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
			
 
				+	memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
			
 
				+}
			
 
				+
			
 
				+/* ----- Kernel only functions below ----- */
			
 
				+STATIC int
			
 
				+xfs_readlink_bmap(
			
 
				+	struct xfs_inode	*ip,
			
 
				+	char			*link)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = ip->i_mount;
			
 
				+	struct xfs_bmbt_irec	mval[XFS_SYMLINK_MAPS];
			
 
				+	struct xfs_buf		*bp;
			
 
				+	xfs_daddr_t		d;
			
 
				+	char			*cur_chunk;
			
 
				+	int			pathlen = ip->i_d.di_size;
			
 
				+	int			nmaps = XFS_SYMLINK_MAPS;
			
 
				+	int			byte_cnt;
			
 
				+	int			n;
			
 
				+	int			error = 0;
			
 
				+	int			fsblocks = 0;
			
 
				+	int			offset;
			
 
				+
			
 
				+	fsblocks = xfs_symlink_blocks(mp, pathlen);
			
 
				+	error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
			
 
				+	if (error)
			
 
				+		goto out;
			
 
				+
			
 
				+	offset = 0;
			
 
				+	for (n = 0; n < nmaps; n++) {
			
 
				+		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
			
 
				+		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
			
 
				+
			
 
				+		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
			
 
				+				  &xfs_symlink_buf_ops);
			
 
				+		if (!bp)
			
 
				+			return XFS_ERROR(ENOMEM);
			
 
				+		error = bp->b_error;
			
 
				+		if (error) {
			
 
				+			xfs_buf_ioerror_alert(bp, __func__);
			
 
				+			xfs_buf_relse(bp);
			
 
				+			goto out;
			
 
				+		}
			
 
				+		byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
			
 
				+		if (pathlen < byte_cnt)
			
 
				+			byte_cnt = pathlen;
			
 
				+
			
 
				+		cur_chunk = bp->b_addr;
			
 
				+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
			
 
				+			if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset,
			
 
				+							byte_cnt, bp)) {
			
 
				+				error = EFSCORRUPTED;
			
 
				+				xfs_alert(mp,
			
 
				+"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
			
 
				+					offset, byte_cnt, ip->i_ino);
			
 
				+				xfs_buf_relse(bp);
			
 
				+				goto out;
			
 
				+
			
 
				+			}
			
 
				+
			
 
				+			cur_chunk += sizeof(struct xfs_dsymlink_hdr);
			
 
				+		}
			
 
				+
			
 
				+		memcpy(link + offset, bp->b_addr, byte_cnt);
			
 
				+
			
 
				+		pathlen -= byte_cnt;
			
 
				+		offset += byte_cnt;
			
 
				+
			
 
				+		xfs_buf_relse(bp);
			
 
				+	}
			
 
				+	ASSERT(pathlen == 0);
			
 
				+
			
 
				+	link[ip->i_d.di_size] = '\0';
			
 
				+	error = 0;
			
 
				+
			
 
				+ out:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_readlink(
			
 
				+	struct xfs_inode *ip,
			
 
				+	char		*link)
			
 
				+{
			
 
				+	struct xfs_mount *mp = ip->i_mount;
			
 
				+	xfs_fsize_t	pathlen;
			
 
				+	int		error = 0;
			
 
				+
			
 
				+	trace_xfs_readlink(ip);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				+
			
 
				+	pathlen = ip->i_d.di_size;
			
 
				+	if (!pathlen)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (pathlen < 0 || pathlen > MAXPATHLEN) {
			
 
				+		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
			
 
				+			 __func__, (unsigned long long) ip->i_ino,
			
 
				+			 (long long) pathlen);
			
 
				+		ASSERT(0);
			
 
				+		error = XFS_ERROR(EFSCORRUPTED);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	if (ip->i_df.if_flags & XFS_IFINLINE) {
			
 
				+		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
			
 
				+		link[pathlen] = '\0';
			
 
				+	} else {
			
 
				+		error = xfs_readlink_bmap(ip, link);
			
 
				+	}
			
 
				+
			
 
				+ out:
			
 
				+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+int
			
 
				+xfs_symlink(
			
 
				+	struct xfs_inode	*dp,
			
 
				+	struct xfs_name		*link_name,
			
 
				+	const char		*target_path,
			
 
				+	umode_t			mode,
			
 
				+	struct xfs_inode	**ipp)
			
 
				+{
			
 
				+	struct xfs_mount	*mp = dp->i_mount;
			
 
				+	struct xfs_trans	*tp = NULL;
			
 
				+	struct xfs_inode	*ip = NULL;
			
 
				+	int			error = 0;
			
 
				+	int			pathlen;
			
 
				+	struct xfs_bmap_free	free_list;
			
 
				+	xfs_fsblock_t		first_block;
			
 
				+	bool			unlock_dp_on_error = false;
			
 
				+	uint			cancel_flags;
			
 
				+	int			committed;
			
 
				+	xfs_fileoff_t		first_fsb;
			
 
				+	xfs_filblks_t		fs_blocks;
			
 
				+	int			nmaps;
			
 
				+	struct xfs_bmbt_irec	mval[XFS_SYMLINK_MAPS];
			
 
				+	xfs_daddr_t		d;
			
 
				+	const char		*cur_chunk;
			
 
				+	int			byte_cnt;
			
 
				+	int			n;
			
 
				+	xfs_buf_t		*bp;
			
 
				+	prid_t			prid;
			
 
				+	struct xfs_dquot	*udqp, *gdqp;
			
 
				+	uint			resblks;
			
 
				+
			
 
				+	*ipp = NULL;
			
 
				+
			
 
				+	trace_xfs_symlink(dp, link_name);
			
 
				+
			
 
				+	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				+		return XFS_ERROR(EIO);
			
 
				+
			
 
				+	/*
			
 
				+	 * Check component lengths of the target path name.
			
 
				+	 */
			
 
				+	pathlen = strlen(target_path);
			
 
				+	if (pathlen >= MAXPATHLEN)      /* total string too long */
			
 
				+		return XFS_ERROR(ENAMETOOLONG);
			
 
				+
			
 
				+	udqp = gdqp = NULL;
			
 
				+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
			
 
				+		prid = xfs_get_projid(dp);
			
 
				+	else
			
 
				+		prid = XFS_PROJID_DEFAULT;
			
 
				+
			
 
				+	/*
			
 
				+	 * Make sure that we have allocated dquot(s) on disk.
			
 
				+	 */
			
 
				+	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
			
 
				+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
			
 
				+	if (error)
			
 
				+		goto std_return;
			
 
				+
			
 
				+	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
			
 
				+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				+	/*
			
 
				+	 * The symlink will fit into the inode data fork?
			
 
				+	 * There can't be any attributes so we get the whole variable part.
			
 
				+	 */
			
 
				+	if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
			
 
				+		fs_blocks = 0;
			
 
				+	else
			
 
				+		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
			
 
				+	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
			
 
				+	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				+			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				+	if (error == ENOSPC && fs_blocks == 0) {
			
 
				+		resblks = 0;
			
 
				+		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				+				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				+	}
			
 
				+	if (error) {
			
 
				+		cancel_flags = 0;
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
			
 
				+	unlock_dp_on_error = true;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check whether the directory allows new symlinks or not.
			
 
				+	 */
			
 
				+	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
			
 
				+		error = XFS_ERROR(EPERM);
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Reserve disk quota : blocks and inode.
			
 
				+	 */
			
 
				+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
			
 
				+	if (error)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Check for ability to enter directory entry, if no space reserved.
			
 
				+	 */
			
 
				+	error = xfs_dir_canenter(tp, dp, link_name, resblks);
			
 
				+	if (error)
			
 
				+		goto error_return;
			
 
				+	/*
			
 
				+	 * Initialize the bmap freelist prior to calling either
			
 
				+	 * bmapi or the directory create code.
			
 
				+	 */
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate an inode for the symlink.
			
 
				+	 */
			
 
				+	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
			
 
				+			       prid, resblks > 0, &ip, NULL);
			
 
				+	if (error) {
			
 
				+		if (error == ENOSPC)
			
 
				+			goto error_return;
			
 
				+		goto error1;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * An error after we've joined dp to the transaction will result in the
			
 
				+	 * transaction cancel unlocking dp so don't do it explicitly in the
			
 
				+	 * error path.
			
 
				+	 */
			
 
				+	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				+	unlock_dp_on_error = false;
			
 
				+
			
 
				+	/*
			
 
				+	 * Also attach the dquot(s) to it, if applicable.
			
 
				+	 */
			
 
				+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
			
 
				+
			
 
				+	if (resblks)
			
 
				+		resblks -= XFS_IALLOC_SPACE_RES(mp);
			
 
				+	/*
			
 
				+	 * If the symlink will fit into the inode, write it inline.
			
 
				+	 */
			
 
				+	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
			
 
				+		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
			
 
				+		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
			
 
				+		ip->i_d.di_size = pathlen;
			
 
				+
			
 
				+		/*
			
 
				+		 * The inode was initially created in extent format.
			
 
				+		 */
			
 
				+		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
			
 
				+		ip->i_df.if_flags |= XFS_IFINLINE;
			
 
				+
			
 
				+		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
			
 
				+		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
			
 
				+
			
 
				+	} else {
			
 
				+		int	offset;
			
 
				+
			
 
				+		first_fsb = 0;
			
 
				+		nmaps = XFS_SYMLINK_MAPS;
			
 
				+
			
 
				+		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
			
 
				+				  XFS_BMAPI_METADATA, &first_block, resblks,
			
 
				+				  mval, &nmaps, &free_list);
			
 
				+		if (error)
			
 
				+			goto error2;
			
 
				+
			
 
				+		if (resblks)
			
 
				+			resblks -= fs_blocks;
			
 
				+		ip->i_d.di_size = pathlen;
			
 
				+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+
			
 
				+		cur_chunk = target_path;
			
 
				+		offset = 0;
			
 
				+		for (n = 0; n < nmaps; n++) {
			
 
				+			char *buf;
			
 
				+
			
 
				+			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
			
 
				+			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
			
 
				+			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
			
 
				+					       BTOBB(byte_cnt), 0);
			
 
				+			if (!bp) {
			
 
				+				error = ENOMEM;
			
 
				+				goto error2;
			
 
				+			}
			
 
				+			bp->b_ops = &xfs_symlink_buf_ops;
			
 
				+
			
 
				+			byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
			
 
				+			if (pathlen < byte_cnt) {
			
 
				+				byte_cnt = pathlen;
			
 
				+			}
			
 
				+
			
 
				+			buf = bp->b_addr;
			
 
				+			buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
			
 
				+						   byte_cnt, bp);
			
 
				+
			
 
				+			memcpy(buf, cur_chunk, byte_cnt);
			
 
				+
			
 
				+			cur_chunk += byte_cnt;
			
 
				+			pathlen -= byte_cnt;
			
 
				+			offset += byte_cnt;
			
 
				+
			
 
				+			xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
			
 
				+							(char *)bp->b_addr);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Create the directory entry for the symlink.
			
 
				+	 */
			
 
				+	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
			
 
				+					&first_block, &free_list, resblks);
			
 
				+	if (error)
			
 
				+		goto error2;
			
 
				+	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				+	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
			
 
				+
			
 
				+	/*
			
 
				+	 * If this is a synchronous mount, make sure that the
			
 
				+	 * symlink transaction goes to disk before returning to
			
 
				+	 * the user.
			
 
				+	 */
			
 
				+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
			
 
				+		xfs_trans_set_sync(tp);
			
 
				+	}
			
 
				+
			
 
				+	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				+	if (error) {
			
 
				+		goto error2;
			
 
				+	}
			
 
				+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				+	xfs_qm_dqrele(udqp);
			
 
				+	xfs_qm_dqrele(gdqp);
			
 
				+
			
 
				+	*ipp = ip;
			
 
				+	return 0;
			
 
				+
			
 
				+ error2:
			
 
				+	IRELE(ip);
			
 
				+ error1:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+	cancel_flags |= XFS_TRANS_ABORT;
			
 
				+ error_return:
			
 
				+	xfs_trans_cancel(tp, cancel_flags);
			
 
				+	xfs_qm_dqrele(udqp);
			
 
				+	xfs_qm_dqrele(gdqp);
			
 
				+
			
 
				+	if (unlock_dp_on_error)
			
 
				+		xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				+ std_return:
			
 
				+	return error;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Free a symlink that has blocks associated with it.
			
 
				+ */
			
 
				+int
			
 
				+xfs_inactive_symlink_rmt(
			
 
				+	xfs_inode_t	*ip,
			
 
				+	xfs_trans_t	**tpp)
			
 
				+{
			
 
				+	xfs_buf_t	*bp;
			
 
				+	int		committed;
			
 
				+	int		done;
			
 
				+	int		error;
			
 
				+	xfs_fsblock_t	first_block;
			
 
				+	xfs_bmap_free_t	free_list;
			
 
				+	int		i;
			
 
				+	xfs_mount_t	*mp;
			
 
				+	xfs_bmbt_irec_t	mval[XFS_SYMLINK_MAPS];
			
 
				+	int		nmaps;
			
 
				+	xfs_trans_t	*ntp;
			
 
				+	int		size;
			
 
				+	xfs_trans_t	*tp;
			
 
				+
			
 
				+	tp = *tpp;
			
 
				+	mp = ip->i_mount;
			
 
				+	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
			
 
				+	/*
			
 
				+	 * We're freeing a symlink that has some
			
 
				+	 * blocks allocated to it.  Free the
			
 
				+	 * blocks here.  We know that we've got
			
 
				+	 * either 1 or 2 extents and that we can
			
 
				+	 * free them all in one bunmapi call.
			
 
				+	 */
			
 
				+	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
			
 
				+
			
 
				+	/*
			
 
				+	 * Lock the inode, fix the size, and join it to the transaction.
			
 
				+	 * Hold it so in the normal path, we still have it locked for
			
 
				+	 * the second transaction.  In the error paths we need it
			
 
				+	 * held so the cancel won't rele it, see below.
			
 
				+	 */
			
 
				+	size = (int)ip->i_d.di_size;
			
 
				+	ip->i_d.di_size = 0;
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+	/*
			
 
				+	 * Find the block(s) so we can inval and unmap them.
			
 
				+	 */
			
 
				+	done = 0;
			
 
				+	xfs_bmap_init(&free_list, &first_block);
			
 
				+	nmaps = ARRAY_SIZE(mval);
			
 
				+	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),
			
 
				+				mval, &nmaps, 0);
			
 
				+	if (error)
			
 
				+		goto error0;
			
 
				+	/*
			
 
				+	 * Invalidate the block(s). No validation is done.
			
 
				+	 */
			
 
				+	for (i = 0; i < nmaps; i++) {
			
 
				+		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
			
 
				+			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
			
 
				+			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
			
 
				+		if (!bp) {
			
 
				+			error = ENOMEM;
			
 
				+			goto error1;
			
 
				+		}
			
 
				+		xfs_trans_binval(tp, bp);
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Unmap the dead block(s) to the free_list.
			
 
				+	 */
			
 
				+	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
			
 
				+			&first_block, &free_list, &done)))
			
 
				+		goto error1;
			
 
				+	ASSERT(done);
			
 
				+	/*
			
 
				+	 * Commit the first transaction.  This logs the EFI and the inode.
			
 
				+	 */
			
 
				+	if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
			
 
				+		goto error1;
			
 
				+	/*
			
 
				+	 * The transaction must have been committed, since there were
			
 
				+	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
			
 
				+	 * The new tp has the extent freeing and EFDs.
			
 
				+	 */
			
 
				+	ASSERT(committed);
			
 
				+	/*
			
 
				+	 * The first xact was committed, so add the inode to the new one.
			
 
				+	 * Mark it dirty so it will be logged and moved forward in the log as
			
 
				+	 * part of every commit.
			
 
				+	 */
			
 
				+	xfs_trans_ijoin(tp, ip, 0);
			
 
				+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				+	/*
			
 
				+	 * Get a new, empty transaction to return to our caller.
			
 
				+	 */
			
 
				+	ntp = xfs_trans_dup(tp);
			
 
				+	/*
			
 
				+	 * Commit the transaction containing extent freeing and EFDs.
			
 
				+	 * If we get an error on the commit here or on the reserve below,
			
 
				+	 * we need to unlock the inode since the new transaction doesn't
			
 
				+	 * have the inode attached.
			
 
				+	 */
			
 
				+	error = xfs_trans_commit(tp, 0);
			
 
				+	tp = ntp;
			
 
				+	if (error) {
			
 
				+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				+		goto error0;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * transaction commit worked ok so we can drop the extra ticket
			
 
				+	 * reference that we gained in xfs_trans_dup()
			
 
				+	 */
			
 
				+	xfs_log_ticket_put(tp->t_ticket);
			
 
				+
			
 
				+	/*
			
 
				+	 * Remove the memory for extent descriptions (just bookkeeping).
			
 
				+	 */
			
 
				+	if (ip->i_df.if_bytes)
			
 
				+		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
			
 
				+	ASSERT(ip->i_df.if_bytes == 0);
			
 
				+	/*
			
 
				+	 * Put an itruncate log reservation in the new transaction
			
 
				+	 * for our caller.
			
 
				+	 */
			
 
				+	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				+			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
			
 
				+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				+		goto error0;
			
 
				+	}
			
 
				+
			
 
				+	xfs_trans_ijoin(tp, ip, 0);
			
 
				+	*tpp = tp;
			
 
				+	return 0;
			
 
				+
			
 
				+ error1:
			
 
				+	xfs_bmap_cancel(&free_list);
			
 
				+ error0:
			
 
				+	return error;
			
 
				+}
			
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -0,0 +1,66 @@
 
				+/*
			
 
				+ * Copyright (c) 2012 Red Hat, Inc. All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it would be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write the Free Software Foundation,
			
 
				+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
			
 
				+ */
			
 
				+#ifndef __XFS_SYMLINK_H
			
 
				+#define __XFS_SYMLINK_H 1
			
 
				+
			
 
				+struct xfs_mount;
			
 
				+struct xfs_trans;
			
 
				+struct xfs_inode;
			
 
				+struct xfs_buf;
			
 
				+struct xfs_ifork;
			
 
				+struct xfs_name;
			
 
				+
			
 
				+#define XFS_SYMLINK_MAGIC	0x58534c4d	/* XSLM */
			
 
				+
			
 
				+struct xfs_dsymlink_hdr {
			
 
				+	__be32	sl_magic;
			
 
				+	__be32	sl_offset;
			
 
				+	__be32	sl_bytes;
			
 
				+	__be32	sl_crc;
			
 
				+	uuid_t	sl_uuid;
			
 
				+	__be64	sl_owner;
			
 
				+	__be64	sl_blkno;
			
 
				+	__be64	sl_lsn;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * The maximum pathlen is 1024 bytes. Since the minimum file system
			
 
				+ * blocksize is 512 bytes, we can get a max of 3 extents back from
			
 
				+ * bmapi when crc headers are taken into account.
			
 
				+ */
			
 
				+#define XFS_SYMLINK_MAPS 3
			
 
				+
			
 
				+#define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
			
 
				+	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
			
 
				+			sizeof(struct xfs_dsymlink_hdr) : 0))
			
 
				+
			
 
				+int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
			
 
				+
			
 
				+void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
			
 
				+				 struct xfs_inode *ip, struct xfs_ifork *ifp);
			
 
				+
			
 
				+extern const struct xfs_buf_ops xfs_symlink_buf_ops;
			
 
				+
			
 
				+#ifdef __KERNEL__
			
 
				+
			
 
				+int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
			
 
				+		const char *target_path, umode_t mode, struct xfs_inode **ipp);
			
 
				+int xfs_readlink(struct xfs_inode *ip, char *link);
			
 
				+int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp);
			
 
				+
			
 
				+#endif /* __KERNEL__ */
			
 
				+#endif /* __XFS_SYMLINK_H */
			
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -22,7 +22,6 @@
 
				 #include "xfs_trans.h"
			
 
				 #include "xfs_sb.h"
			
 
				 #include "xfs_ag.h"
			
 
				-#include "xfs_da_btree.h"
			
 
				 #include "xfs_bmap_btree.h"
			
 
				 #include "xfs_alloc_btree.h"
			
 
				 #include "xfs_ialloc_btree.h"
			
@@ -30,6 +29,7 @@
 
				 #include "xfs_inode.h"
			
 
				 #include "xfs_btree.h"
			
 
				 #include "xfs_mount.h"
			
 
				+#include "xfs_da_btree.h"
			
 
				 #include "xfs_ialloc.h"
			
 
				 #include "xfs_itable.h"
			
 
				 #include "xfs_alloc.h"
			
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -619,6 +619,30 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
 
				 		  (char *)__entry->caller_ip)
			
 
				 )
			
 
				 
			
 
				+TRACE_EVENT(xfs_iomap_prealloc_size,
			
 
				+	TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t blocks, int shift,
			
 
				+		 unsigned int writeio_blocks),
			
 
				+	TP_ARGS(ip, blocks, shift, writeio_blocks),
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(dev_t, dev)
			
 
				+		__field(xfs_ino_t, ino)
			
 
				+		__field(xfs_fsblock_t, blocks)
			
 
				+		__field(int, shift)
			
 
				+		__field(unsigned int, writeio_blocks)
			
 
				+	),
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
			
 
				+		__entry->ino = ip->i_ino;
			
 
				+		__entry->blocks = blocks;
			
 
				+		__entry->shift = shift;
			
 
				+		__entry->writeio_blocks = writeio_blocks;
			
 
				+	),
			
 
				+	TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
			
 
				+		  "m_writeio_blocks %u",
			
 
				+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
			
 
				+		  __entry->blocks, __entry->shift, __entry->writeio_blocks)
			
 
				+)
			
 
				+
			
 
				 #define DEFINE_IREF_EVENT(name) \
			
 
				 DEFINE_EVENT(xfs_iref_class, name, \
			
 
				 	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
			
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -659,6 +659,7 @@ xfs_trans_binval(
 
				 		ASSERT(XFS_BUF_ISSTALE(bp));
			
 
				 		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
			
 
				 		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
			
 
				+		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
			
 
				 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
			
 
				 		ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
			
 
				 		ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
			
@@ -671,6 +672,7 @@ xfs_trans_binval(
 
				 	bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
			
 
				 	bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
			
 
				 	bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
			
 
				+	bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
			
 
				 	for (i = 0; i < bip->bli_format_count; i++) {
			
 
				 		memset(bip->bli_formats[i].blf_data_map, 0,
			
 
				 		       (bip->bli_formats[i].blf_map_size * sizeof(uint)));
			
@@ -702,12 +704,13 @@ xfs_trans_inode_buf(
 
				 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				 
			
 
				 	bip->bli_flags |= XFS_BLI_INODE_BUF;
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * This call is used to indicate that the buffer is going to
			
 
				  * be staled and was an inode buffer. This means it gets
			
 
				- * special processing during unpin - where any inodes 
			
 
				+ * special processing during unpin - where any inodes
			
 
				  * associated with the buffer should be removed from ail.
			
 
				  * There is also special processing during recovery,
			
 
				  * any replay of the inodes in the buffer needs to be
			
@@ -726,6 +729,7 @@ xfs_trans_stale_inode_buf(
 
				 
			
 
				 	bip->bli_flags |= XFS_BLI_STALE_INODE;
			
 
				 	bip->bli_item.li_cb = xfs_buf_iodone;
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -749,8 +753,43 @@ xfs_trans_inode_alloc_buf(
 
				 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				 
			
 
				 	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
			
 
				+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Set the type of the buffer for log recovery so that it can correctly identify
			
 
				+ * and hence attach the correct buffer ops to the buffer after replay.
			
 
				+ */
			
 
				+void
			
 
				+xfs_trans_buf_set_type(
			
 
				+	struct xfs_trans	*tp,
			
 
				+	struct xfs_buf		*bp,
			
 
				+	enum xfs_blft		type)
			
 
				+{
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				+
			
 
				+	if (!tp)
			
 
				+		return;
			
 
				+
			
 
				+	ASSERT(bp->b_transp == tp);
			
 
				+	ASSERT(bip != NULL);
			
 
				+	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				+
			
 
				+	xfs_blft_to_flags(&bip->__bli_format, type);
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+xfs_trans_buf_copy_type(
			
 
				+	struct xfs_buf		*dst_bp,
			
 
				+	struct xfs_buf		*src_bp)
			
 
				+{
			
 
				+	struct xfs_buf_log_item	*sbip = src_bp->b_fspriv;
			
 
				+	struct xfs_buf_log_item	*dbip = dst_bp->b_fspriv;
			
 
				+	enum xfs_blft		type;
			
 
				+
			
 
				+	type = xfs_blft_from_flags(&sbip->__bli_format);
			
 
				+	xfs_blft_to_flags(&dbip->__bli_format, type);
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
			
@@ -769,14 +808,28 @@ xfs_trans_dquot_buf(
 
				 	xfs_buf_t	*bp,
			
 
				 	uint		type)
			
 
				 {
			
 
				-	xfs_buf_log_item_t	*bip = bp->b_fspriv;
			
 
				+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
			
 
				 
			
 
				-	ASSERT(bp->b_transp == tp);
			
 
				-	ASSERT(bip != NULL);
			
 
				 	ASSERT(type == XFS_BLF_UDQUOT_BUF ||
			
 
				 	       type == XFS_BLF_PDQUOT_BUF ||
			
 
				 	       type == XFS_BLF_GDQUOT_BUF);
			
 
				-	ASSERT(atomic_read(&bip->bli_refcount) > 0);
			
 
				 
			
 
				 	bip->__bli_format.blf_flags |= type;
			
 
				+
			
 
				+	switch (type) {
			
 
				+	case XFS_BLF_UDQUOT_BUF:
			
 
				+		type = XFS_BLFT_UDQUOT_BUF;
			
 
				+		break;
			
 
				+	case XFS_BLF_PDQUOT_BUF:
			
 
				+		type = XFS_BLFT_PDQUOT_BUF;
			
 
				+		break;
			
 
				+	case XFS_BLF_GDQUOT_BUF:
			
 
				+		type = XFS_BLFT_GDQUOT_BUF;
			
 
				+		break;
			
 
				+	default:
			
 
				+		type = XFS_BLFT_UNKNOWN_BUF;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				+	xfs_trans_buf_set_type(tp, bp, type);
			
 
				 }
			
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -326,12 +326,12 @@ xfs_trans_dqlockedjoin(
 
				  */
			
 
				 void
			
 
				 xfs_trans_apply_dquot_deltas(
			
 
				-	xfs_trans_t		*tp)
			
 
				+	struct xfs_trans	*tp)
			
 
				 {
			
 
				 	int			i, j;
			
 
				-	xfs_dquot_t		*dqp;
			
 
				-	xfs_dqtrx_t		*qtrx, *qa;
			
 
				-	xfs_disk_dquot_t	*d;
			
 
				+	struct xfs_dquot	*dqp;
			
 
				+	struct xfs_dqtrx	*qtrx, *qa;
			
 
				+	struct xfs_disk_dquot	*d;
			
 
				 	long			totalbdelta;
			
 
				 	long			totalrtbdelta;
			
 
				 
			
@@ -412,7 +412,7 @@ xfs_trans_apply_dquot_deltas(
 
				 			 * Start/reset the timer(s) if needed.
			
 
				 			 */
			
 
				 			if (d->d_id) {
			
 
				-				xfs_qm_adjust_dqlimits(tp->t_mountp, d);
			
 
				+				xfs_qm_adjust_dqlimits(tp->t_mountp, dqp);
			
 
				 				xfs_qm_adjust_dqtimers(tp->t_mountp, d);
			
 
				 			}
			
 
				 
			
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1,5 +1,6 @@
 
				 /*
			
 
				  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
			
 
				+ * Copyright (c) 2012 Red Hat, Inc.
			
 
				  * All Rights Reserved.
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or
			
@@ -48,103 +49,8 @@
 
				 #include "xfs_vnodeops.h"
			
 
				 #include "xfs_trace.h"
			
 
				 #include "xfs_icache.h"
			
 
				+#include "xfs_symlink.h"
			
 
				 
			
 
				-/*
			
 
				- * The maximum pathlen is 1024 bytes. Since the minimum file system
			
 
				- * blocksize is 512 bytes, we can get a max of 2 extents back from
			
 
				- * bmapi.
			
 
				- */
			
 
				-#define SYMLINK_MAPS 2
			
 
				-
			
 
				-STATIC int
			
 
				-xfs_readlink_bmap(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	char		*link)
			
 
				-{
			
 
				-	xfs_mount_t	*mp = ip->i_mount;
			
 
				-	int		pathlen = ip->i_d.di_size;
			
 
				-	int             nmaps = SYMLINK_MAPS;
			
 
				-	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
			
 
				-	xfs_daddr_t	d;
			
 
				-	int		byte_cnt;
			
 
				-	int		n;
			
 
				-	xfs_buf_t	*bp;
			
 
				-	int		error = 0;
			
 
				-
			
 
				-	error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps,
			
 
				-			       0);
			
 
				-	if (error)
			
 
				-		goto out;
			
 
				-
			
 
				-	for (n = 0; n < nmaps; n++) {
			
 
				-		d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
			
 
				-		byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
			
 
				-
			
 
				-		bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, NULL);
			
 
				-		if (!bp)
			
 
				-			return XFS_ERROR(ENOMEM);
			
 
				-		error = bp->b_error;
			
 
				-		if (error) {
			
 
				-			xfs_buf_ioerror_alert(bp, __func__);
			
 
				-			xfs_buf_relse(bp);
			
 
				-			goto out;
			
 
				-		}
			
 
				-		if (pathlen < byte_cnt)
			
 
				-			byte_cnt = pathlen;
			
 
				-		pathlen -= byte_cnt;
			
 
				-
			
 
				-		memcpy(link, bp->b_addr, byte_cnt);
			
 
				-		xfs_buf_relse(bp);
			
 
				-	}
			
 
				-
			
 
				-	link[ip->i_d.di_size] = '\0';
			
 
				-	error = 0;
			
 
				-
			
 
				- out:
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				-int
			
 
				-xfs_readlink(
			
 
				-	xfs_inode_t     *ip,
			
 
				-	char		*link)
			
 
				-{
			
 
				-	xfs_mount_t	*mp = ip->i_mount;
			
 
				-	xfs_fsize_t	pathlen;
			
 
				-	int		error = 0;
			
 
				-
			
 
				-	trace_xfs_readlink(ip);
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-		return XFS_ERROR(EIO);
			
 
				-
			
 
				-	xfs_ilock(ip, XFS_ILOCK_SHARED);
			
 
				-
			
 
				-	pathlen = ip->i_d.di_size;
			
 
				-	if (!pathlen)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (pathlen < 0 || pathlen > MAXPATHLEN) {
			
 
				-		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
			
 
				-			 __func__, (unsigned long long) ip->i_ino,
			
 
				-			 (long long) pathlen);
			
 
				-		ASSERT(0);
			
 
				-		error = XFS_ERROR(EFSCORRUPTED);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	if (ip->i_df.if_flags & XFS_IFINLINE) {
			
 
				-		memcpy(link, ip->i_df.if_u1.if_data, pathlen);
			
 
				-		link[pathlen] = '\0';
			
 
				-	} else {
			
 
				-		error = xfs_readlink_bmap(ip, link);
			
 
				-	}
			
 
				-
			
 
				- out:
			
 
				-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
			
 
				-	return error;
			
 
				-}
			
 
				 
			
 
				 /*
			
 
				  * This is called by xfs_inactive to free any blocks beyond eof
			
@@ -249,145 +155,6 @@ xfs_free_eofblocks(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Free a symlink that has blocks associated with it.
			
 
				- */
			
 
				-STATIC int
			
 
				-xfs_inactive_symlink_rmt(
			
 
				-	xfs_inode_t	*ip,
			
 
				-	xfs_trans_t	**tpp)
			
 
				-{
			
 
				-	xfs_buf_t	*bp;
			
 
				-	int		committed;
			
 
				-	int		done;
			
 
				-	int		error;
			
 
				-	xfs_fsblock_t	first_block;
			
 
				-	xfs_bmap_free_t	free_list;
			
 
				-	int		i;
			
 
				-	xfs_mount_t	*mp;
			
 
				-	xfs_bmbt_irec_t	mval[SYMLINK_MAPS];
			
 
				-	int		nmaps;
			
 
				-	xfs_trans_t	*ntp;
			
 
				-	int		size;
			
 
				-	xfs_trans_t	*tp;
			
 
				-
			
 
				-	tp = *tpp;
			
 
				-	mp = ip->i_mount;
			
 
				-	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
			
 
				-	/*
			
 
				-	 * We're freeing a symlink that has some
			
 
				-	 * blocks allocated to it.  Free the
			
 
				-	 * blocks here.  We know that we've got
			
 
				-	 * either 1 or 2 extents and that we can
			
 
				-	 * free them all in one bunmapi call.
			
 
				-	 */
			
 
				-	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
			
 
				-
			
 
				-	/*
			
 
				-	 * Lock the inode, fix the size, and join it to the transaction.
			
 
				-	 * Hold it so in the normal path, we still have it locked for
			
 
				-	 * the second transaction.  In the error paths we need it
			
 
				-	 * held so the cancel won't rele it, see below.
			
 
				-	 */
			
 
				-	size = (int)ip->i_d.di_size;
			
 
				-	ip->i_d.di_size = 0;
			
 
				-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				-	/*
			
 
				-	 * Find the block(s) so we can inval and unmap them.
			
 
				-	 */
			
 
				-	done = 0;
			
 
				-	xfs_bmap_init(&free_list, &first_block);
			
 
				-	nmaps = ARRAY_SIZE(mval);
			
 
				-	error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size),
			
 
				-				mval, &nmaps, 0);
			
 
				-	if (error)
			
 
				-		goto error0;
			
 
				-	/*
			
 
				-	 * Invalidate the block(s).
			
 
				-	 */
			
 
				-	for (i = 0; i < nmaps; i++) {
			
 
				-		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
			
 
				-			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
			
 
				-			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
			
 
				-		if (!bp) {
			
 
				-			error = ENOMEM;
			
 
				-			goto error1;
			
 
				-		}
			
 
				-		xfs_trans_binval(tp, bp);
			
 
				-	}
			
 
				-	/*
			
 
				-	 * Unmap the dead block(s) to the free_list.
			
 
				-	 */
			
 
				-	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
			
 
				-			&first_block, &free_list, &done)))
			
 
				-		goto error1;
			
 
				-	ASSERT(done);
			
 
				-	/*
			
 
				-	 * Commit the first transaction.  This logs the EFI and the inode.
			
 
				-	 */
			
 
				-	if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
			
 
				-		goto error1;
			
 
				-	/*
			
 
				-	 * The transaction must have been committed, since there were
			
 
				-	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
			
 
				-	 * The new tp has the extent freeing and EFDs.
			
 
				-	 */
			
 
				-	ASSERT(committed);
			
 
				-	/*
			
 
				-	 * The first xact was committed, so add the inode to the new one.
			
 
				-	 * Mark it dirty so it will be logged and moved forward in the log as
			
 
				-	 * part of every commit.
			
 
				-	 */
			
 
				-	xfs_trans_ijoin(tp, ip, 0);
			
 
				-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				-	/*
			
 
				-	 * Get a new, empty transaction to return to our caller.
			
 
				-	 */
			
 
				-	ntp = xfs_trans_dup(tp);
			
 
				-	/*
			
 
				-	 * Commit the transaction containing extent freeing and EFDs.
			
 
				-	 * If we get an error on the commit here or on the reserve below,
			
 
				-	 * we need to unlock the inode since the new transaction doesn't
			
 
				-	 * have the inode attached.
			
 
				-	 */
			
 
				-	error = xfs_trans_commit(tp, 0);
			
 
				-	tp = ntp;
			
 
				-	if (error) {
			
 
				-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				-		goto error0;
			
 
				-	}
			
 
				-	/*
			
 
				-	 * transaction commit worked ok so we can drop the extra ticket
			
 
				-	 * reference that we gained in xfs_trans_dup()
			
 
				-	 */
			
 
				-	xfs_log_ticket_put(tp->t_ticket);
			
 
				-
			
 
				-	/*
			
 
				-	 * Remove the memory for extent descriptions (just bookkeeping).
			
 
				-	 */
			
 
				-	if (ip->i_df.if_bytes)
			
 
				-		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
			
 
				-	ASSERT(ip->i_df.if_bytes == 0);
			
 
				-	/*
			
 
				-	 * Put an itruncate log reservation in the new transaction
			
 
				-	 * for our caller.
			
 
				-	 */
			
 
				-	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
			
 
				-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
			
 
				-		goto error0;
			
 
				-	}
			
 
				-
			
 
				-	xfs_trans_ijoin(tp, ip, 0);
			
 
				-	*tpp = tp;
			
 
				-	return 0;
			
 
				-
			
 
				- error1:
			
 
				-	xfs_bmap_cancel(&free_list);
			
 
				- error0:
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 xfs_release(
			
 
				 	xfs_inode_t	*ip)
			
@@ -1352,247 +1119,6 @@ xfs_link(
 
				 	return error;
			
 
				 }
			
 
				 
			
 
				-int
			
 
				-xfs_symlink(
			
 
				-	xfs_inode_t		*dp,
			
 
				-	struct xfs_name		*link_name,
			
 
				-	const char		*target_path,
			
 
				-	umode_t			mode,
			
 
				-	xfs_inode_t		**ipp)
			
 
				-{
			
 
				-	xfs_mount_t		*mp = dp->i_mount;
			
 
				-	xfs_trans_t		*tp;
			
 
				-	xfs_inode_t		*ip;
			
 
				-	int			error;
			
 
				-	int			pathlen;
			
 
				-	xfs_bmap_free_t		free_list;
			
 
				-	xfs_fsblock_t		first_block;
			
 
				-	bool                    unlock_dp_on_error = false;
			
 
				-	uint			cancel_flags;
			
 
				-	int			committed;
			
 
				-	xfs_fileoff_t		first_fsb;
			
 
				-	xfs_filblks_t		fs_blocks;
			
 
				-	int			nmaps;
			
 
				-	xfs_bmbt_irec_t		mval[SYMLINK_MAPS];
			
 
				-	xfs_daddr_t		d;
			
 
				-	const char		*cur_chunk;
			
 
				-	int			byte_cnt;
			
 
				-	int			n;
			
 
				-	xfs_buf_t		*bp;
			
 
				-	prid_t			prid;
			
 
				-	struct xfs_dquot	*udqp, *gdqp;
			
 
				-	uint			resblks;
			
 
				-
			
 
				-	*ipp = NULL;
			
 
				-	error = 0;
			
 
				-	ip = NULL;
			
 
				-	tp = NULL;
			
 
				-
			
 
				-	trace_xfs_symlink(dp, link_name);
			
 
				-
			
 
				-	if (XFS_FORCED_SHUTDOWN(mp))
			
 
				-		return XFS_ERROR(EIO);
			
 
				-
			
 
				-	/*
			
 
				-	 * Check component lengths of the target path name.
			
 
				-	 */
			
 
				-	pathlen = strlen(target_path);
			
 
				-	if (pathlen >= MAXPATHLEN)      /* total string too long */
			
 
				-		return XFS_ERROR(ENAMETOOLONG);
			
 
				-
			
 
				-	udqp = gdqp = NULL;
			
 
				-	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
			
 
				-		prid = xfs_get_projid(dp);
			
 
				-	else
			
 
				-		prid = XFS_PROJID_DEFAULT;
			
 
				-
			
 
				-	/*
			
 
				-	 * Make sure that we have allocated dquot(s) on disk.
			
 
				-	 */
			
 
				-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
			
 
				-			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
			
 
				-	if (error)
			
 
				-		goto std_return;
			
 
				-
			
 
				-	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
			
 
				-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
			
 
				-	/*
			
 
				-	 * The symlink will fit into the inode data fork?
			
 
				-	 * There can't be any attributes so we get the whole variable part.
			
 
				-	 */
			
 
				-	if (pathlen <= XFS_LITINO(mp))
			
 
				-		fs_blocks = 0;
			
 
				-	else
			
 
				-		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
			
 
				-	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
			
 
				-	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				-			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				-	if (error == ENOSPC && fs_blocks == 0) {
			
 
				-		resblks = 0;
			
 
				-		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
			
 
				-				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
			
 
				-	}
			
 
				-	if (error) {
			
 
				-		cancel_flags = 0;
			
 
				-		goto error_return;
			
 
				-	}
			
 
				-
			
 
				-	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
			
 
				-	unlock_dp_on_error = true;
			
 
				-
			
 
				-	/*
			
 
				-	 * Check whether the directory allows new symlinks or not.
			
 
				-	 */
			
 
				-	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
			
 
				-		error = XFS_ERROR(EPERM);
			
 
				-		goto error_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Reserve disk quota : blocks and inode.
			
 
				-	 */
			
 
				-	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
			
 
				-	if (error)
			
 
				-		goto error_return;
			
 
				-
			
 
				-	/*
			
 
				-	 * Check for ability to enter directory entry, if no space reserved.
			
 
				-	 */
			
 
				-	error = xfs_dir_canenter(tp, dp, link_name, resblks);
			
 
				-	if (error)
			
 
				-		goto error_return;
			
 
				-	/*
			
 
				-	 * Initialize the bmap freelist prior to calling either
			
 
				-	 * bmapi or the directory create code.
			
 
				-	 */
			
 
				-	xfs_bmap_init(&free_list, &first_block);
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate an inode for the symlink.
			
 
				-	 */
			
 
				-	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
			
 
				-			       prid, resblks > 0, &ip, NULL);
			
 
				-	if (error) {
			
 
				-		if (error == ENOSPC)
			
 
				-			goto error_return;
			
 
				-		goto error1;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * An error after we've joined dp to the transaction will result in the
			
 
				-	 * transaction cancel unlocking dp so don't do it explicitly in the
			
 
				-	 * error path.
			
 
				-	 */
			
 
				-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
			
 
				-	unlock_dp_on_error = false;
			
 
				-
			
 
				-	/*
			
 
				-	 * Also attach the dquot(s) to it, if applicable.
			
 
				-	 */
			
 
				-	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
			
 
				-
			
 
				-	if (resblks)
			
 
				-		resblks -= XFS_IALLOC_SPACE_RES(mp);
			
 
				-	/*
			
 
				-	 * If the symlink will fit into the inode, write it inline.
			
 
				-	 */
			
 
				-	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
			
 
				-		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
			
 
				-		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
			
 
				-		ip->i_d.di_size = pathlen;
			
 
				-
			
 
				-		/*
			
 
				-		 * The inode was initially created in extent format.
			
 
				-		 */
			
 
				-		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
			
 
				-		ip->i_df.if_flags |= XFS_IFINLINE;
			
 
				-
			
 
				-		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
			
 
				-		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
			
 
				-
			
 
				-	} else {
			
 
				-		first_fsb = 0;
			
 
				-		nmaps = SYMLINK_MAPS;
			
 
				-
			
 
				-		error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
			
 
				-				  XFS_BMAPI_METADATA, &first_block, resblks,
			
 
				-				  mval, &nmaps, &free_list);
			
 
				-		if (error)
			
 
				-			goto error2;
			
 
				-
			
 
				-		if (resblks)
			
 
				-			resblks -= fs_blocks;
			
 
				-		ip->i_d.di_size = pathlen;
			
 
				-		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
			
 
				-
			
 
				-		cur_chunk = target_path;
			
 
				-		for (n = 0; n < nmaps; n++) {
			
 
				-			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
			
 
				-			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
			
 
				-			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
			
 
				-					       BTOBB(byte_cnt), 0);
			
 
				-			if (!bp) {
			
 
				-				error = ENOMEM;
			
 
				-				goto error2;
			
 
				-			}
			
 
				-			if (pathlen < byte_cnt) {
			
 
				-				byte_cnt = pathlen;
			
 
				-			}
			
 
				-			pathlen -= byte_cnt;
			
 
				-
			
 
				-			memcpy(bp->b_addr, cur_chunk, byte_cnt);
			
 
				-			cur_chunk += byte_cnt;
			
 
				-
			
 
				-			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Create the directory entry for the symlink.
			
 
				-	 */
			
 
				-	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
			
 
				-					&first_block, &free_list, resblks);
			
 
				-	if (error)
			
 
				-		goto error2;
			
 
				-	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
			
 
				-	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
			
 
				-
			
 
				-	/*
			
 
				-	 * If this is a synchronous mount, make sure that the
			
 
				-	 * symlink transaction goes to disk before returning to
			
 
				-	 * the user.
			
 
				-	 */
			
 
				-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
			
 
				-		xfs_trans_set_sync(tp);
			
 
				-	}
			
 
				-
			
 
				-	error = xfs_bmap_finish(&tp, &free_list, &committed);
			
 
				-	if (error) {
			
 
				-		goto error2;
			
 
				-	}
			
 
				-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
			
 
				-	xfs_qm_dqrele(udqp);
			
 
				-	xfs_qm_dqrele(gdqp);
			
 
				-
			
 
				-	*ipp = ip;
			
 
				-	return 0;
			
 
				-
			
 
				- error2:
			
 
				-	IRELE(ip);
			
 
				- error1:
			
 
				-	xfs_bmap_cancel(&free_list);
			
 
				-	cancel_flags |= XFS_TRANS_ABORT;
			
 
				- error_return:
			
 
				-	xfs_trans_cancel(tp, cancel_flags);
			
 
				-	xfs_qm_dqrele(udqp);
			
 
				-	xfs_qm_dqrele(gdqp);
			
 
				-
			
 
				-	if (unlock_dp_on_error)
			
 
				-		xfs_iunlock(dp, XFS_ILOCK_EXCL);
			
 
				- std_return:
			
 
				-	return error;
			
 
				-}
			
 
				-
			
 
				 int
			
 
				 xfs_set_dmattrs(
			
 
				 	xfs_inode_t     *ip,