14 tahun lalu · 35806b4f7c
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -226,10 +226,6 @@ acl			Enables POSIX Access Control Lists support.
 
				 noacl			This option disables POSIX Access Control List
			
 
				 			support.
			
 
				 
			
 
				-reservation
			
 
				-
			
 
				-noreservation
			
 
				-
			
 
				 bsddf		(*)	Make 'df' act like BSD.
			
 
				 minixdf			Make 'df' act like Minix.
			
 
				 
			
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3572,9 +3572,16 @@ M:	Andrew Morton <akpm@linux-foundation.org>
 
				 M:	Jan Kara <jack@suse.cz>
			
 
				 L:	linux-ext4@vger.kernel.org
			
 
				 S:	Maintained
			
 
				-F:	fs/jbd*/
			
 
				-F:	include/linux/ext*jbd*.h
			
 
				-F:	include/linux/jbd*.h
			
 
				+F:	fs/jbd/
			
 
				+F:	include/linux/ext3_jbd.h
			
 
				+F:	include/linux/jbd.h
			
 
				+
			
 
				+JOURNALLING LAYER FOR BLOCK DEVICES (JBD2)
			
 
				+M:	"Theodore Ts'o" <tytso@mit.edu>
			
 
				+L:	linux-ext4@vger.kernel.org
			
 
				+S:	Maintained
			
 
				+F:	fs/jbd2/
			
 
				+F:	include/linux/jbd2.h
			
 
				 
			
 
				 JSM Neo PCI based serial card
			
 
				 M:	Breno Leitao <leitao@linux.vnet.ibm.com>
			
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
 
				 
			
 
				 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
			
 
				 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
			
 
				-		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
			
 
				+		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
			
 
				+		mmp.o
			
 
				 
			
 
				 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
			
 
				 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
			
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -361,130 +361,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 	return bh;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * ext4_add_groupblocks() -- Add given blocks to an existing group
			
 
				- * @handle:			handle to this transaction
			
 
				- * @sb:				super block
			
 
				- * @block:			start physcial block to add to the block group
			
 
				- * @count:			number of blocks to free
			
 
				- *
			
 
				- * This marks the blocks as free in the bitmap. We ask the
			
 
				- * mballoc to reload the buddy after this by setting group
			
 
				- * EXT4_GROUP_INFO_NEED_INIT_BIT flag
			
 
				- */
			
 
				-void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
			
 
				-			 ext4_fsblk_t block, unsigned long count)
			
 
				-{
			
 
				-	struct buffer_head *bitmap_bh = NULL;
			
 
				-	struct buffer_head *gd_bh;
			
 
				-	ext4_group_t block_group;
			
 
				-	ext4_grpblk_t bit;
			
 
				-	unsigned int i;
			
 
				-	struct ext4_group_desc *desc;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	int err = 0, ret, blk_free_count;
			
 
				-	ext4_grpblk_t blocks_freed;
			
 
				-	struct ext4_group_info *grp;
			
 
				-
			
 
				-	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
			
 
				-
			
 
				-	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
			
 
				-	grp = ext4_get_group_info(sb, block_group);
			
 
				-	/*
			
 
				-	 * Check to see if we are freeing blocks across a group
			
 
				-	 * boundary.
			
 
				-	 */
			
 
				-	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
			
 
				-		goto error_return;
			
 
				-	}
			
 
				-	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
			
 
				-	if (!bitmap_bh)
			
 
				-		goto error_return;
			
 
				-	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
			
 
				-	if (!desc)
			
 
				-		goto error_return;
			
 
				-
			
 
				-	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
			
 
				-	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
			
 
				-	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
			
 
				-	    in_range(block + count - 1, ext4_inode_table(sb, desc),
			
 
				-		     sbi->s_itb_per_group)) {
			
 
				-		ext4_error(sb, "Adding blocks in system zones - "
			
 
				-			   "Block = %llu, count = %lu",
			
 
				-			   block, count);
			
 
				-		goto error_return;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * We are about to add blocks to the bitmap,
			
 
				-	 * so we need undo access.
			
 
				-	 */
			
 
				-	BUFFER_TRACE(bitmap_bh, "getting undo access");
			
 
				-	err = ext4_journal_get_undo_access(handle, bitmap_bh);
			
 
				-	if (err)
			
 
				-		goto error_return;
			
 
				-
			
 
				-	/*
			
 
				-	 * We are about to modify some metadata.  Call the journal APIs
			
 
				-	 * to unshare ->b_data if a currently-committing transaction is
			
 
				-	 * using it
			
 
				-	 */
			
 
				-	BUFFER_TRACE(gd_bh, "get_write_access");
			
 
				-	err = ext4_journal_get_write_access(handle, gd_bh);
			
 
				-	if (err)
			
 
				-		goto error_return;
			
 
				-	/*
			
 
				-	 * make sure we don't allow a parallel init on other groups in the
			
 
				-	 * same buddy cache
			
 
				-	 */
			
 
				-	down_write(&grp->alloc_sem);
			
 
				-	for (i = 0, blocks_freed = 0; i < count; i++) {
			
 
				-		BUFFER_TRACE(bitmap_bh, "clear bit");
			
 
				-		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
			
 
				-						bit + i, bitmap_bh->b_data)) {
			
 
				-			ext4_error(sb, "bit already cleared for block %llu",
			
 
				-				   (ext4_fsblk_t)(block + i));
			
 
				-			BUFFER_TRACE(bitmap_bh, "bit already cleared");
			
 
				-		} else {
			
 
				-			blocks_freed++;
			
 
				-		}
			
 
				-	}
			
 
				-	ext4_lock_group(sb, block_group);
			
 
				-	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
			
 
				-	ext4_free_blks_set(sb, desc, blk_free_count);
			
 
				-	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
			
 
				-	ext4_unlock_group(sb, block_group);
			
 
				-	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
			
 
				-
			
 
				-	if (sbi->s_log_groups_per_flex) {
			
 
				-		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
			
 
				-		atomic_add(blocks_freed,
			
 
				-			   &sbi->s_flex_groups[flex_group].free_blocks);
			
 
				-	}
			
 
				-	/*
			
 
				-	 * request to reload the buddy with the
			
 
				-	 * new bitmap information
			
 
				-	 */
			
 
				-	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
			
 
				-	grp->bb_free += blocks_freed;
			
 
				-	up_write(&grp->alloc_sem);
			
 
				-
			
 
				-	/* We dirtied the bitmap block */
			
 
				-	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
			
 
				-	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
			
 
				-
			
 
				-	/* And the group descriptor block */
			
 
				-	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
			
 
				-	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
			
 
				-	if (!err)
			
 
				-		err = ret;
			
 
				-
			
 
				-error_return:
			
 
				-	brelse(bitmap_bh);
			
 
				-	ext4_std_error(sb, err);
			
 
				-	return;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * ext4_has_free_blocks()
			
 
				  * @sbi:	in-core super block structure.
			
@@ -493,7 +369,8 @@ error_return:
 
				  * Check if filesystem has nblocks free & available for allocation.
			
 
				  * On success return 1, return 0 on failure.
			
 
				  */
			
 
				-static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
			
 
				+static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
			
 
				+				s64 nblocks, unsigned int flags)
			
 
				 {
			
 
				 	s64 free_blocks, dirty_blocks, root_blocks;
			
 
				 	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
			
@@ -507,11 +384,6 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 
				 						EXT4_FREEBLOCKS_WATERMARK) {
			
 
				 		free_blocks  = percpu_counter_sum_positive(fbc);
			
 
				 		dirty_blocks = percpu_counter_sum_positive(dbc);
			
 
				-		if (dirty_blocks < 0) {
			
 
				-			printk(KERN_CRIT "Dirty block accounting "
			
 
				-					"went wrong %lld\n",
			
 
				-					(long long)dirty_blocks);
			
 
				-		}
			
 
				 	}
			
 
				 	/* Check whether we have space after
			
 
				 	 * accounting for current dirty blocks & root reserved blocks.
			
@@ -522,7 +394,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 
				 	/* Hm, nope.  Are (enough) root reserved blocks available? */
			
 
				 	if (sbi->s_resuid == current_fsuid() ||
			
 
				 	    ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
			
 
				-	    capable(CAP_SYS_RESOURCE)) {
			
 
				+	    capable(CAP_SYS_RESOURCE) ||
			
 
				+		(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
			
 
				+
			
 
				 		if (free_blocks >= (nblocks + dirty_blocks))
			
 
				 			return 1;
			
 
				 	}
			
@@ -531,9 +405,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 
				 }
			
 
				 
			
 
				 int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
			
 
				-						s64 nblocks)
			
 
				+			   s64 nblocks, unsigned int flags)
			
 
				 {
			
 
				-	if (ext4_has_free_blocks(sbi, nblocks)) {
			
 
				+	if (ext4_has_free_blocks(sbi, nblocks, flags)) {
			
 
				 		percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
			
 
				 		return 0;
			
 
				 	} else
			
@@ -554,7 +428,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
 
				  */
			
 
				 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
			
 
				 {
			
 
				-	if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
			
 
				+	if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
			
 
				 	    (*retries)++ > 3 ||
			
 
				 	    !EXT4_SB(sb)->s_journal)
			
 
				 		return 0;
			
@@ -577,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 
				  * error stores in errp pointer
			
 
				  */
			
 
				 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
			
 
				-		ext4_fsblk_t goal, unsigned long *count, int *errp)
			
 
				+				  ext4_fsblk_t goal, unsigned int flags,
			
 
				+				  unsigned long *count, int *errp)
			
 
				 {
			
 
				 	struct ext4_allocation_request ar;
			
 
				 	ext4_fsblk_t ret;
			
@@ -587,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
 
				 	ar.inode = inode;
			
 
				 	ar.goal = goal;
			
 
				 	ar.len = count ? *count : 1;
			
 
				+	ar.flags = flags;
			
 
				 
			
 
				 	ret = ext4_mb_new_blocks(handle, &ar, errp);
			
 
				 	if (count)
			
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t;
 
				 #define EXT4_MB_DELALLOC_RESERVED	0x0400
			
 
				 /* We are doing stream allocation */
			
 
				 #define EXT4_MB_STREAM_ALLOC		0x0800
			
 
				-
			
 
				+/* Use reserved root blocks if needed */
			
 
				+#define EXT4_MB_USE_ROOT_BLOCKS		0x1000
			
 
				 
			
 
				 struct ext4_allocation_request {
			
 
				 	/* target inode for block we're allocating */
			
@@ -209,6 +210,8 @@ struct ext4_io_submit {
 
				  */
			
 
				 #define	EXT4_BAD_INO		 1	/* Bad blocks inode */
			
 
				 #define EXT4_ROOT_INO		 2	/* Root inode */
			
 
				+#define EXT4_USR_QUOTA_INO	 3	/* User quota inode */
			
 
				+#define EXT4_GRP_QUOTA_INO	 4	/* Group quota inode */
			
 
				 #define EXT4_BOOT_LOADER_INO	 5	/* Boot loader inode */
			
 
				 #define EXT4_UNDEL_DIR_INO	 6	/* Undelete directory inode */
			
 
				 #define EXT4_RESIZE_INO		 7	/* Reserved group descriptors inode */
			
@@ -512,6 +515,10 @@ struct ext4_new_group_data {
 
				 	/* Convert extent to initialized after IO complete */
			
 
				 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
			
 
				 					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
			
 
				+	/* Punch out blocks of an extent */
			
 
				+#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT		0x0020
			
 
				+	/* Don't normalize allocation size (used for fallocate) */
			
 
				+#define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
			
 
				 
			
 
				 /*
			
 
				  * Flags used by ext4_free_blocks
			
@@ -1028,7 +1035,7 @@ struct ext4_super_block {
 
				 	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
			
 
				 	__le32	s_flags;		/* Miscellaneous flags */
			
 
				 	__le16  s_raid_stride;		/* RAID stride */
			
 
				-	__le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
			
 
				+	__le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
			
 
				 	__le64  s_mmp_block;            /* Block for multi-mount protection */
			
 
				 	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
			
 
				 	__u8	s_log_groups_per_flex;  /* FLEX_BG group size */
			
@@ -1144,6 +1151,9 @@ struct ext4_sb_info {
 
				 	unsigned long s_ext_blocks;
			
 
				 	unsigned long s_ext_extents;
			
 
				 #endif
			
 
				+	/* ext4 extent cache stats */
			
 
				+	unsigned long extent_cache_hits;
			
 
				+	unsigned long extent_cache_misses;
			
 
				 
			
 
				 	/* for buddy allocator */
			
 
				 	struct ext4_group_info ***s_group_info;
			
@@ -1201,6 +1211,9 @@ struct ext4_sb_info {
 
				 	struct ext4_li_request *s_li_request;
			
 
				 	/* Wait multiplier for lazy initialization thread */
			
 
				 	unsigned int s_li_wait_mult;
			
 
				+
			
 
				+	/* Kernel thread for multiple mount protection */
			
 
				+	struct task_struct *s_mmp_tsk;
			
 
				 };
			
 
				 
			
 
				 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
			
@@ -1338,6 +1351,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 
				 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM		0x0010
			
 
				 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
			
 
				 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
			
 
				+#define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
			
 
				 
			
 
				 #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
			
 
				 #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
			
@@ -1351,13 +1365,29 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 
				 #define EXT4_FEATURE_INCOMPAT_EA_INODE		0x0400 /* EA in inode */
			
 
				 #define EXT4_FEATURE_INCOMPAT_DIRDATA		0x1000 /* data in dirent */
			
 
				 
			
 
				+#define EXT2_FEATURE_COMPAT_SUPP	EXT4_FEATURE_COMPAT_EXT_ATTR
			
 
				+#define EXT2_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
			
 
				+					 EXT4_FEATURE_INCOMPAT_META_BG)
			
 
				+#define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
			
 
				+					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
			
 
				+					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
			
 
				+
			
 
				+#define EXT3_FEATURE_COMPAT_SUPP	EXT4_FEATURE_COMPAT_EXT_ATTR
			
 
				+#define EXT3_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
			
 
				+					 EXT4_FEATURE_INCOMPAT_RECOVER| \
			
 
				+					 EXT4_FEATURE_INCOMPAT_META_BG)
			
 
				+#define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
			
 
				+					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
			
 
				+					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
			
 
				+
			
 
				 #define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
			
 
				 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
			
 
				 					 EXT4_FEATURE_INCOMPAT_RECOVER| \
			
 
				 					 EXT4_FEATURE_INCOMPAT_META_BG| \
			
 
				 					 EXT4_FEATURE_INCOMPAT_EXTENTS| \
			
 
				 					 EXT4_FEATURE_INCOMPAT_64BIT| \
			
 
				-					 EXT4_FEATURE_INCOMPAT_FLEX_BG)
			
 
				+					 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
			
 
				+					 EXT4_FEATURE_INCOMPAT_MMP)
			
 
				 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
			
 
				 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
			
 
				 					 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
			
@@ -1590,12 +1620,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 
				  */
			
 
				 struct ext4_lazy_init {
			
 
				 	unsigned long		li_state;
			
 
				-
			
 
				-	wait_queue_head_t	li_wait_daemon;
			
 
				-	wait_queue_head_t	li_wait_task;
			
 
				-	struct timer_list	li_timer;
			
 
				-	struct task_struct	*li_task;
			
 
				-
			
 
				 	struct list_head	li_request_list;
			
 
				 	struct mutex		li_list_mtx;
			
 
				 };
			
@@ -1614,6 +1638,67 @@ struct ext4_features {
 
				 	struct completion f_kobj_unregister;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * This structure will be used for multiple mount protection. It will be
			
 
				+ * written into the block number saved in the s_mmp_block field in the
			
 
				+ * superblock. Programs that check MMP should assume that if
			
 
				+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
			
 
				+ * to use the filesystem, regardless of how old the timestamp is.
			
 
				+ */
			
 
				+#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
			
 
				+#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
			
 
				+#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
			
 
				+#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
			
 
				+
			
 
				+struct mmp_struct {
			
 
				+	__le32	mmp_magic;		/* Magic number for MMP */
			
 
				+	__le32	mmp_seq;		/* Sequence no. updated periodically */
			
 
				+
			
 
				+	/*
			
 
				+	 * mmp_time, mmp_nodename & mmp_bdevname are only used for information
			
 
				+	 * purposes and do not affect the correctness of the algorithm
			
 
				+	 */
			
 
				+	__le64	mmp_time;		/* Time last updated */
			
 
				+	char	mmp_nodename[64];	/* Node which last updated MMP block */
			
 
				+	char	mmp_bdevname[32];	/* Bdev which last updated MMP block */
			
 
				+
			
 
				+	/*
			
 
				+	 * mmp_check_interval is used to verify if the MMP block has been
			
 
				+	 * updated on the block device. The value is updated based on the
			
 
				+	 * maximum time to write the MMP block during an update cycle.
			
 
				+	 */
			
 
				+	__le16	mmp_check_interval;
			
 
				+
			
 
				+	__le16	mmp_pad1;
			
 
				+	__le32	mmp_pad2[227];
			
 
				+};
			
 
				+
			
 
				+/* arguments passed to the mmp thread */
			
 
				+struct mmpd_data {
			
 
				+	struct buffer_head *bh; /* bh from initial read_mmp_block() */
			
 
				+	struct super_block *sb;  /* super block of the fs */
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Check interval multiplier
			
 
				+ * The MMP block is written every update interval and initially checked every
			
 
				+ * update interval x the multiplier (the value is then adapted based on the
			
 
				+ * write latency). The reason is that writes can be delayed under load and we
			
 
				+ * don't want readers to incorrectly assume that the filesystem is no longer
			
 
				+ * in use.
			
 
				+ */
			
 
				+#define EXT4_MMP_CHECK_MULT		2UL
			
 
				+
			
 
				+/*
			
 
				+ * Minimum interval for MMP checking in seconds.
			
 
				+ */
			
 
				+#define EXT4_MMP_MIN_CHECK_INTERVAL	5UL
			
 
				+
			
 
				+/*
			
 
				+ * Maximum interval for MMP checking in seconds.
			
 
				+ */
			
 
				+#define EXT4_MMP_MAX_CHECK_INTERVAL	300UL
			
 
				+
			
 
				 /*
			
 
				  * Function prototypes
			
 
				  */
			
@@ -1638,10 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
 
				 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
			
 
				 			ext4_group_t group);
			
 
				 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
			
 
				-			ext4_fsblk_t goal, unsigned long *count, int *errp);
			
 
				-extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
			
 
				-extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
			
 
				-				ext4_fsblk_t block, unsigned long count);
			
 
				+					 ext4_fsblk_t goal,
			
 
				+					 unsigned int flags,
			
 
				+					 unsigned long *count,
			
 
				+					 int *errp);
			
 
				+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
			
 
				+				  s64 nblocks, unsigned int flags);
			
 
				 extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
			
 
				 extern void ext4_check_blocks_bitmap(struct super_block *);
			
 
				 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
			
@@ -1706,6 +1793,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
 
				 			     unsigned long count, int flags);
			
 
				 extern int ext4_mb_add_groupinfo(struct super_block *sb,
			
 
				 		ext4_group_t i, struct ext4_group_desc *desc);
			
 
				+extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
			
 
				+				ext4_fsblk_t block, unsigned long count);
			
 
				 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
			
 
				 
			
 
				 /* inode.c */
			
@@ -1729,6 +1818,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
 
				 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
			
 
				 extern int ext4_can_truncate(struct inode *inode);
			
 
				 extern void ext4_truncate(struct inode *);
			
 
				+extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
			
 
				 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
			
 
				 extern void ext4_set_inode_flags(struct inode *);
			
 
				 extern void ext4_get_inode_flags(struct ext4_inode_info *);
			
@@ -1738,6 +1828,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
 
				 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
			
 
				 extern int ext4_block_truncate_page(handle_t *handle,
			
 
				 		struct address_space *mapping, loff_t from);
			
 
				+extern int ext4_block_zero_page_range(handle_t *handle,
			
 
				+		struct address_space *mapping, loff_t from, loff_t length);
			
 
				 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
			
 
				 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
			
 
				 extern void ext4_da_update_reserve_space(struct inode *inode,
			
@@ -1788,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int,
 
				 						       __LINE__, ## message)
			
 
				 extern void ext4_msg(struct super_block *, const char *, const char *, ...)
			
 
				 	__attribute__ ((format (printf, 3, 4)));
			
 
				+extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
			
 
				+			   const char *, unsigned int, const char *);
			
 
				+#define dump_mmp_msg(sb, mmp, msg)	__dump_mmp_msg(sb, mmp, __func__, \
			
 
				+						       __LINE__, msg)
			
 
				 extern void __ext4_grp_locked_error(const char *, unsigned int, \
			
 
				 				    struct super_block *, ext4_group_t, \
			
 
				 				    unsigned long, ext4_fsblk_t, \
			
@@ -2064,6 +2160,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 
				 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
			
 
				 			       struct ext4_map_blocks *map, int flags);
			
 
				 extern void ext4_ext_truncate(struct inode *);
			
 
				+extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
			
 
				+				loff_t length);
			
 
				 extern void ext4_ext_init(struct super_block *);
			
 
				 extern void ext4_ext_release(struct super_block *);
			
 
				 extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
			
@@ -2092,6 +2190,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 			       int len,
			
 
				 			       struct writeback_control *wbc);
			
 
				 
			
 
				+/* mmp.c */
			
 
				+extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
			
 
				+
			
 
				 /* BH_Uninit flag: blocks are allocated but uninitialized on disk */
			
 
				 enum ext4_state_bits {
			
 
				 	BH_Uninit	/* blocks are allocated but uninitialized on disk */
			
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,20 +6,6 @@
 
				 
			
 
				 #include <trace/events/ext4.h>
			
 
				 
			
 
				-int __ext4_journal_get_undo_access(const char *where, unsigned int line,
			
 
				-				   handle_t *handle, struct buffer_head *bh)
			
 
				-{
			
 
				-	int err = 0;
			
 
				-
			
 
				-	if (ext4_handle_valid(handle)) {
			
 
				-		err = jbd2_journal_get_undo_access(handle, bh);
			
 
				-		if (err)
			
 
				-			ext4_journal_abort_handle(where, line, __func__, bh,
			
 
				-						  handle, err);
			
 
				-	}
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				 int __ext4_journal_get_write_access(const char *where, unsigned int line,
			
 
				 				    handle_t *handle, struct buffer_head *bh)
			
 
				 {
			
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
 
				 			       const char *err_fn,
			
 
				 		struct buffer_head *bh, handle_t *handle, int err);
			
 
				 
			
 
				-int __ext4_journal_get_undo_access(const char *where, unsigned int line,
			
 
				-				   handle_t *handle, struct buffer_head *bh);
			
 
				-
			
 
				 int __ext4_journal_get_write_access(const char *where, unsigned int line,
			
 
				 				    handle_t *handle, struct buffer_head *bh);
			
 
				 
			
@@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 
				 int __ext4_handle_dirty_super(const char *where, unsigned int line,
			
 
				 			      handle_t *handle, struct super_block *sb);
			
 
				 
			
 
				-#define ext4_journal_get_undo_access(handle, bh) \
			
 
				-	__ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
			
 
				 #define ext4_journal_get_write_access(handle, bh) \
			
 
				 	__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
			
 
				 #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
			
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -272,7 +272,6 @@ const struct file_operations ext4_file_operations = {
 
				 };
			
 
				 
			
 
				 const struct inode_operations ext4_file_inode_operations = {
			
 
				-	.truncate	= ext4_truncate,
			
 
				 	.setattr	= ext4_setattr,
			
 
				 	.getattr	= ext4_getattr,
			
 
				 #ifdef CONFIG_EXT4_FS_XATTR
			
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -36,7 +36,7 @@
 
				 
			
 
				 static void dump_completed_IO(struct inode * inode)
			
 
				 {
			
 
				-#ifdef	EXT4_DEBUG
			
 
				+#ifdef	EXT4FS_DEBUG
			
 
				 	struct list_head *cur, *before, *after;
			
 
				 	ext4_io_end_t *io, *io0, *io1;
			
 
				 	unsigned long flags;
			
@@ -172,6 +172,7 @@ int ext4_sync_file(struct file *file, int datasync)
 
				 	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
			
 
				 	int ret;
			
 
				 	tid_t commit_tid;
			
 
				+	bool needs_barrier = false;
			
 
				 
			
 
				 	J_ASSERT(ext4_journal_current_handle() == NULL);
			
 
				 
			
@@ -211,22 +212,12 @@ int ext4_sync_file(struct file *file, int datasync)
 
				 	}
			
 
				 
			
 
				 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
			
 
				-	if (jbd2_log_start_commit(journal, commit_tid)) {
			
 
				-		/*
			
 
				-		 * When the journal is on a different device than the
			
 
				-		 * fs data disk, we need to issue the barrier in
			
 
				-		 * writeback mode.  (In ordered mode, the jbd2 layer
			
 
				-		 * will take care of issuing the barrier.  In
			
 
				-		 * data=journal, all of the data blocks are written to
			
 
				-		 * the journal device.)
			
 
				-		 */
			
 
				-		if (ext4_should_writeback_data(inode) &&
			
 
				-		    (journal->j_fs_dev != journal->j_dev) &&
			
 
				-		    (journal->j_flags & JBD2_BARRIER))
			
 
				-			blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
			
 
				-					NULL);
			
 
				-		ret = jbd2_log_wait_commit(journal, commit_tid);
			
 
				-	} else if (journal->j_flags & JBD2_BARRIER)
			
 
				+	if (journal->j_flags & JBD2_BARRIER &&
			
 
				+	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
			
 
				+		needs_barrier = true;
			
 
				+	jbd2_log_start_commit(journal, commit_tid);
			
 
				+	ret = jbd2_log_wait_commit(journal, commit_tid);
			
 
				+	if (needs_barrier)
			
 
				 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
			
 
				  out:
			
 
				 	trace_ext4_sync_file_exit(inode, ret);
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
 
				 	while (target > 0) {
			
 
				 		count = target;
			
 
				 		/* allocating blocks for indirect blocks and direct blocks */
			
 
				-		current_block = ext4_new_meta_blocks(handle, inode,
			
 
				-							goal, &count, err);
			
 
				+		current_block = ext4_new_meta_blocks(handle, inode, goal,
			
 
				+						     0, &count, err);
			
 
				 		if (*err)
			
 
				 			goto failed_out;
			
 
				 
			
@@ -1930,7 +1930,7 @@ repeat:
 
				 	 * We do still charge estimated metadata to the sb though;
			
 
				 	 * we cannot afford to run out of free blocks.
			
 
				 	 */
			
 
				-	if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
			
 
				+	if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
			
 
				 		dquot_release_reservation_block(inode, 1);
			
 
				 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
			
 
				 			yield();
			
@@ -2796,9 +2796,7 @@ static int write_cache_pages_da(struct address_space *mapping,
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			if (PageWriteback(page))
			
 
				-				wait_on_page_writeback(page);
			
 
				-
			
 
				+			wait_on_page_writeback(page);
			
 
				 			BUG_ON(PageWriteback(page));
			
 
				 
			
 
				 			if (mpd->next_page != page->index)
			
@@ -3513,7 +3511,7 @@ retry:
 
				 			loff_t end = offset + iov_length(iov, nr_segs);
			
 
				 
			
 
				 			if (end > isize)
			
 
				-				vmtruncate(inode, isize);
			
 
				+				ext4_truncate_failed_write(inode);
			
 
				 		}
			
 
				 	}
			
 
				 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
			
@@ -3915,10 +3913,31 @@ void ext4_set_aops(struct inode *inode)
 
				  */
			
 
				 int ext4_block_truncate_page(handle_t *handle,
			
 
				 		struct address_space *mapping, loff_t from)
			
 
				+{
			
 
				+	unsigned offset = from & (PAGE_CACHE_SIZE-1);
			
 
				+	unsigned length;
			
 
				+	unsigned blocksize;
			
 
				+	struct inode *inode = mapping->host;
			
 
				+
			
 
				+	blocksize = inode->i_sb->s_blocksize;
			
 
				+	length = blocksize - (offset & (blocksize - 1));
			
 
				+
			
 
				+	return ext4_block_zero_page_range(handle, mapping, from, length);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * ext4_block_zero_page_range() zeros out a mapping of length 'length'
			
 
				+ * starting from file offset 'from'.  The range to be zero'd must
			
 
				+ * be contained with in one block.  If the specified range exceeds
			
 
				+ * the end of the block it will be shortened to end of the block
			
 
				+ * that cooresponds to 'from'
			
 
				+ */
			
 
				+int ext4_block_zero_page_range(handle_t *handle,
			
 
				+		struct address_space *mapping, loff_t from, loff_t length)
			
 
				 {
			
 
				 	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
			
 
				 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
			
 
				-	unsigned blocksize, length, pos;
			
 
				+	unsigned blocksize, max, pos;
			
 
				 	ext4_lblk_t iblock;
			
 
				 	struct inode *inode = mapping->host;
			
 
				 	struct buffer_head *bh;
			
@@ -3931,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle,
 
				 		return -EINVAL;
			
 
				 
			
 
				 	blocksize = inode->i_sb->s_blocksize;
			
 
				-	length = blocksize - (offset & (blocksize - 1));
			
 
				+	max = blocksize - (offset & (blocksize - 1));
			
 
				+
			
 
				+	/*
			
 
				+	 * correct length if it does not fall between
			
 
				+	 * 'from' and the end of the block
			
 
				+	 */
			
 
				+	if (length > max || length < 0)
			
 
				+		length = max;
			
 
				+
			
 
				 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
			
 
				 
			
 
				 	if (!page_has_buffers(page))
			
@@ -4380,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 
				 
			
 
				 int ext4_can_truncate(struct inode *inode)
			
 
				 {
			
 
				-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
			
 
				-		return 0;
			
 
				 	if (S_ISREG(inode->i_mode))
			
 
				 		return 1;
			
 
				 	if (S_ISDIR(inode->i_mode))
			
@@ -4391,6 +4416,31 @@ int ext4_can_truncate(struct inode *inode)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * ext4_punch_hole: punches a hole in a file by releaseing the blocks
			
 
				+ * associated with the given offset and length
			
 
				+ *
			
 
				+ * @inode:  File inode
			
 
				+ * @offset: The offset where the hole will begin
			
 
				+ * @len:    The length of the hole
			
 
				+ *
			
 
				+ * Returns: 0 on sucess or negative on failure
			
 
				+ */
			
 
				+
			
 
				+int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
			
 
				+{
			
 
				+	struct inode *inode = file->f_path.dentry->d_inode;
			
 
				+	if (!S_ISREG(inode->i_mode))
			
 
				+		return -ENOTSUPP;
			
 
				+
			
 
				+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
			
 
				+		/* TODO: Add support for non extent hole punching */
			
 
				+		return -ENOTSUPP;
			
 
				+	}
			
 
				+
			
 
				+	return ext4_ext_punch_hole(file, offset, length);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * ext4_truncate()
			
 
				  *
			
@@ -4617,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 
				 	/*
			
 
				 	 * Figure out the offset within the block group inode table
			
 
				 	 */
			
 
				-	inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
			
 
				+	inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
			
 
				 	inode_offset = ((inode->i_ino - 1) %
			
 
				 			EXT4_INODES_PER_GROUP(sb));
			
 
				 	block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
			
@@ -5311,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
				 
			
 
				 	if (S_ISREG(inode->i_mode) &&
			
 
				 	    attr->ia_valid & ATTR_SIZE &&
			
 
				-	    (attr->ia_size < inode->i_size ||
			
 
				-	     (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
			
 
				+	    (attr->ia_size < inode->i_size)) {
			
 
				 		handle_t *handle;
			
 
				 
			
 
				 		handle = ext4_journal_start(inode, 3);
			
@@ -5346,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
				 				goto err_out;
			
 
				 			}
			
 
				 		}
			
 
				-		/* ext4_truncate will clear the flag */
			
 
				-		if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
			
 
				-			ext4_truncate(inode);
			
 
				 	}
			
 
				 
			
 
				-	if ((attr->ia_valid & ATTR_SIZE) &&
			
 
				-	    attr->ia_size != i_size_read(inode))
			
 
				-		rc = vmtruncate(inode, attr->ia_size);
			
 
				+	if (attr->ia_valid & ATTR_SIZE) {
			
 
				+		if (attr->ia_size != i_size_read(inode)) {
			
 
				+			truncate_setsize(inode, attr->ia_size);
			
 
				+			ext4_truncate(inode);
			
 
				+		} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
			
 
				+			ext4_truncate(inode);
			
 
				+	}
			
 
				 
			
 
				 	if (!rc) {
			
 
				 		setattr_copy(inode, attr);
			
@@ -5811,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 		goto out_unlock;
			
 
				 	}
			
 
				 	ret = 0;
			
 
				-	if (PageMappedToDisk(page))
			
 
				-		goto out_unlock;
			
 
				+
			
 
				+	lock_page(page);
			
 
				+	wait_on_page_writeback(page);
			
 
				+	if (PageMappedToDisk(page)) {
			
 
				+		up_read(&inode->i_alloc_sem);
			
 
				+		return VM_FAULT_LOCKED;
			
 
				+	}
			
 
				 
			
 
				 	if (page->index == size >> PAGE_CACHE_SHIFT)
			
 
				 		len = size & ~PAGE_CACHE_MASK;
			
 
				 	else
			
 
				 		len = PAGE_CACHE_SIZE;
			
 
				 
			
 
				-	lock_page(page);
			
 
				 	/*
			
 
				 	 * return if we have all the buffers mapped. This avoid
			
 
				 	 * the need to call write_begin/write_end which does a
			
@@ -5829,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (page_has_buffers(page)) {
			
 
				 		if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
			
 
				 					ext4_bh_unmapped)) {
			
 
				-			unlock_page(page);
			
 
				-			goto out_unlock;
			
 
				+			up_read(&inode->i_alloc_sem);
			
 
				+			return VM_FAULT_LOCKED;
			
 
				 		}
			
 
				 	}
			
 
				 	unlock_page(page);
			
@@ -5850,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	if (ret < 0)
			
 
				 		goto out_unlock;
			
 
				 	ret = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * write_begin/end might have created a dirty page and someone
			
 
				+	 * could wander in and start the IO.  Make sure that hasn't
			
 
				+	 * happened.
			
 
				+	 */
			
 
				+	lock_page(page);
			
 
				+	wait_on_page_writeback(page);
			
 
				+	up_read(&inode->i_alloc_sem);
			
 
				+	return VM_FAULT_LOCKED;
			
 
				 out_unlock:
			
 
				 	if (ret)
			
 
				 		ret = VM_FAULT_SIGBUS;
			
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 	struct inode *inode;
			
 
				 	char *data;
			
 
				 	char *bitmap;
			
 
				+	struct ext4_group_info *grinfo;
			
 
				 
			
 
				 	mb_debug(1, "init page %lu\n", page->index);
			
 
				 
			
@@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 		if (first_group + i >= ngroups)
			
 
				 			break;
			
 
				 
			
 
				+		grinfo = ext4_get_group_info(sb, first_group + i);
			
 
				+		/*
			
 
				+		 * If page is uptodate then we came here after online resize
			
 
				+		 * which added some new uninitialized group info structs, so
			
 
				+		 * we must skip all initialized uptodate buddies on the page,
			
 
				+		 * which may be currently in use by an allocating task.
			
 
				+		 */
			
 
				+		if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
			
 
				+			bh[i] = NULL;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				 		err = -EIO;
			
 
				 		desc = ext4_get_group_desc(sb, first_group + i, NULL);
			
 
				 		if (desc == NULL)
			
@@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 	}
			
 
				 
			
 
				 	/* wait for I/O completion */
			
 
				-	for (i = 0; i < groups_per_page && bh[i]; i++)
			
 
				-		wait_on_buffer(bh[i]);
			
 
				+	for (i = 0; i < groups_per_page; i++)
			
 
				+		if (bh[i])
			
 
				+			wait_on_buffer(bh[i]);
			
 
				 
			
 
				 	err = -EIO;
			
 
				-	for (i = 0; i < groups_per_page && bh[i]; i++)
			
 
				-		if (!buffer_uptodate(bh[i]))
			
 
				+	for (i = 0; i < groups_per_page; i++)
			
 
				+		if (bh[i] && !buffer_uptodate(bh[i]))
			
 
				 			goto out;
			
 
				 
			
 
				 	err = 0;
			
 
				 	first_block = page->index * blocks_per_page;
			
 
				-	/* init the page  */
			
 
				-	memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
			
 
				 	for (i = 0; i < blocks_per_page; i++) {
			
 
				 		int group;
			
 
				-		struct ext4_group_info *grinfo;
			
 
				 
			
 
				 		group = (first_block + i) >> 1;
			
 
				 		if (group >= ngroups)
			
 
				 			break;
			
 
				 
			
 
				+		if (!bh[group - first_group])
			
 
				+			/* skip initialized uptodate buddy */
			
 
				+			continue;
			
 
				+
			
 
				 		/*
			
 
				 		 * data carry information regarding this
			
 
				 		 * particular group in the format specified
			
@@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 			 * incore got set to the group block bitmap below
			
 
				 			 */
			
 
				 			ext4_lock_group(sb, group);
			
 
				+			/* init the buddy */
			
 
				+			memset(data, 0xff, blocksize);
			
 
				 			ext4_mb_generate_buddy(sb, data, incore, group);
			
 
				 			ext4_unlock_group(sb, group);
			
 
				 			incore = NULL;
			
@@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 
			
 
				 out:
			
 
				 	if (bh) {
			
 
				-		for (i = 0; i < groups_per_page && bh[i]; i++)
			
 
				+		for (i = 0; i < groups_per_page; i++)
			
 
				 			brelse(bh[i]);
			
 
				 		if (bh != &bhs)
			
 
				 			kfree(bh);
			
@@ -957,22 +974,21 @@ out:
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * lock the group_info alloc_sem of all the groups
			
 
				- * belonging to the same buddy cache page. This
			
 
				- * make sure other parallel operation on the buddy
			
 
				- * cache doesn't happen  whild holding the buddy cache
			
 
				- * lock
			
 
				+ * Lock the buddy and bitmap pages. This make sure other parallel init_group
			
 
				+ * on the same buddy page doesn't happen whild holding the buddy page lock.
			
 
				+ * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
			
 
				+ * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
			
 
				  */
			
 
				-static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
			
 
				-					ext4_group_t group)
			
 
				+static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
			
 
				+		ext4_group_t group, struct ext4_buddy *e4b)
			
 
				 {
			
 
				-	int i;
			
 
				-	int block, pnum;
			
 
				+	struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
			
 
				+	int block, pnum, poff;
			
 
				 	int blocks_per_page;
			
 
				-	int groups_per_page;
			
 
				-	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				-	ext4_group_t first_group;
			
 
				-	struct ext4_group_info *grp;
			
 
				+	struct page *page;
			
 
				+
			
 
				+	e4b->bd_buddy_page = NULL;
			
 
				+	e4b->bd_bitmap_page = NULL;
			
 
				 
			
 
				 	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
			
 
				 	/*
			
@@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
 
				 	 */
			
 
				 	block = group * 2;
			
 
				 	pnum = block / blocks_per_page;
			
 
				-	first_group = pnum * blocks_per_page / 2;
			
 
				-
			
 
				-	groups_per_page = blocks_per_page >> 1;
			
 
				-	if (groups_per_page == 0)
			
 
				-		groups_per_page = 1;
			
 
				-	/* read all groups the page covers into the cache */
			
 
				-	for (i = 0; i < groups_per_page; i++) {
			
 
				+	poff = block % blocks_per_page;
			
 
				+	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
			
 
				+	if (!page)
			
 
				+		return -EIO;
			
 
				+	BUG_ON(page->mapping != inode->i_mapping);
			
 
				+	e4b->bd_bitmap_page = page;
			
 
				+	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
			
 
				 
			
 
				-		if ((first_group + i) >= ngroups)
			
 
				-			break;
			
 
				-		grp = ext4_get_group_info(sb, first_group + i);
			
 
				-		/* take all groups write allocation
			
 
				-		 * semaphore. This make sure there is
			
 
				-		 * no block allocation going on in any
			
 
				-		 * of that groups
			
 
				-		 */
			
 
				-		down_write_nested(&grp->alloc_sem, i);
			
 
				+	if (blocks_per_page >= 2) {
			
 
				+		/* buddy and bitmap are on the same page */
			
 
				+		return 0;
			
 
				 	}
			
 
				-	return i;
			
 
				+
			
 
				+	block++;
			
 
				+	pnum = block / blocks_per_page;
			
 
				+	poff = block % blocks_per_page;
			
 
				+	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
			
 
				+	if (!page)
			
 
				+		return -EIO;
			
 
				+	BUG_ON(page->mapping != inode->i_mapping);
			
 
				+	e4b->bd_buddy_page = page;
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				-static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
			
 
				-					 ext4_group_t group, int locked_group)
			
 
				+static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
			
 
				 {
			
 
				-	int i;
			
 
				-	int block, pnum;
			
 
				-	int blocks_per_page;
			
 
				-	ext4_group_t first_group;
			
 
				-	struct ext4_group_info *grp;
			
 
				-
			
 
				-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
			
 
				-	/*
			
 
				-	 * the buddy cache inode stores the block bitmap
			
 
				-	 * and buddy information in consecutive blocks.
			
 
				-	 * So for each group we need two blocks.
			
 
				-	 */
			
 
				-	block = group * 2;
			
 
				-	pnum = block / blocks_per_page;
			
 
				-	first_group = pnum * blocks_per_page / 2;
			
 
				-	/* release locks on all the groups */
			
 
				-	for (i = 0; i < locked_group; i++) {
			
 
				-
			
 
				-		grp = ext4_get_group_info(sb, first_group + i);
			
 
				-		/* take all groups write allocation
			
 
				-		 * semaphore. This make sure there is
			
 
				-		 * no block allocation going on in any
			
 
				-		 * of that groups
			
 
				-		 */
			
 
				-		up_write(&grp->alloc_sem);
			
 
				+	if (e4b->bd_bitmap_page) {
			
 
				+		unlock_page(e4b->bd_bitmap_page);
			
 
				+		page_cache_release(e4b->bd_bitmap_page);
			
 
				+	}
			
 
				+	if (e4b->bd_buddy_page) {
			
 
				+		unlock_page(e4b->bd_buddy_page);
			
 
				+		page_cache_release(e4b->bd_buddy_page);
			
 
				 	}
			
 
				-
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1044,93 +1043,60 @@ static noinline_for_stack
 
				 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
			
 
				 {
			
 
				 
			
 
				-	int ret = 0;
			
 
				-	void *bitmap;
			
 
				-	int blocks_per_page;
			
 
				-	int block, pnum, poff;
			
 
				-	int num_grp_locked = 0;
			
 
				 	struct ext4_group_info *this_grp;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	struct inode *inode = sbi->s_buddy_cache;
			
 
				-	struct page *page = NULL, *bitmap_page = NULL;
			
 
				+	struct ext4_buddy e4b;
			
 
				+	struct page *page;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	mb_debug(1, "init group %u\n", group);
			
 
				-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
			
 
				 	this_grp = ext4_get_group_info(sb, group);
			
 
				 	/*
			
 
				 	 * This ensures that we don't reinit the buddy cache
			
 
				 	 * page which map to the group from which we are already
			
 
				 	 * allocating. If we are looking at the buddy cache we would
			
 
				 	 * have taken a reference using ext4_mb_load_buddy and that
			
 
				-	 * would have taken the alloc_sem lock.
			
 
				+	 * would have pinned buddy page to page cache.
			
 
				 	 */
			
 
				-	num_grp_locked =  ext4_mb_get_buddy_cache_lock(sb, group);
			
 
				-	if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
			
 
				+	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
			
 
				+	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
			
 
				 		/*
			
 
				 		 * somebody initialized the group
			
 
				 		 * return without doing anything
			
 
				 		 */
			
 
				-		ret = 0;
			
 
				 		goto err;
			
 
				 	}
			
 
				-	/*
			
 
				-	 * the buddy cache inode stores the block bitmap
			
 
				-	 * and buddy information in consecutive blocks.
			
 
				-	 * So for each group we need two blocks.
			
 
				-	 */
			
 
				-	block = group * 2;
			
 
				-	pnum = block / blocks_per_page;
			
 
				-	poff = block % blocks_per_page;
			
 
				-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
			
 
				-	if (page) {
			
 
				-		BUG_ON(page->mapping != inode->i_mapping);
			
 
				-		ret = ext4_mb_init_cache(page, NULL);
			
 
				-		if (ret) {
			
 
				-			unlock_page(page);
			
 
				-			goto err;
			
 
				-		}
			
 
				-		unlock_page(page);
			
 
				-	}
			
 
				-	if (page == NULL || !PageUptodate(page)) {
			
 
				+
			
 
				+	page = e4b.bd_bitmap_page;
			
 
				+	ret = ext4_mb_init_cache(page, NULL);
			
 
				+	if (ret)
			
 
				+		goto err;
			
 
				+	if (!PageUptodate(page)) {
			
 
				 		ret = -EIO;
			
 
				 		goto err;
			
 
				 	}
			
 
				 	mark_page_accessed(page);
			
 
				-	bitmap_page = page;
			
 
				-	bitmap = page_address(page) + (poff * sb->s_blocksize);
			
 
				 
			
 
				-	/* init buddy cache */
			
 
				-	block++;
			
 
				-	pnum = block / blocks_per_page;
			
 
				-	poff = block % blocks_per_page;
			
 
				-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
			
 
				-	if (page == bitmap_page) {
			
 
				+	if (e4b.bd_buddy_page == NULL) {
			
 
				 		/*
			
 
				 		 * If both the bitmap and buddy are in
			
 
				 		 * the same page we don't need to force
			
 
				 		 * init the buddy
			
 
				 		 */
			
 
				-		unlock_page(page);
			
 
				-	} else if (page) {
			
 
				-		BUG_ON(page->mapping != inode->i_mapping);
			
 
				-		ret = ext4_mb_init_cache(page, bitmap);
			
 
				-		if (ret) {
			
 
				-			unlock_page(page);
			
 
				-			goto err;
			
 
				-		}
			
 
				-		unlock_page(page);
			
 
				+		ret = 0;
			
 
				+		goto err;
			
 
				 	}
			
 
				-	if (page == NULL || !PageUptodate(page)) {
			
 
				+	/* init buddy cache */
			
 
				+	page = e4b.bd_buddy_page;
			
 
				+	ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
			
 
				+	if (ret)
			
 
				+		goto err;
			
 
				+	if (!PageUptodate(page)) {
			
 
				 		ret = -EIO;
			
 
				 		goto err;
			
 
				 	}
			
 
				 	mark_page_accessed(page);
			
 
				 err:
			
 
				-	ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
			
 
				-	if (bitmap_page)
			
 
				-		page_cache_release(bitmap_page);
			
 
				-	if (page)
			
 
				-		page_cache_release(page);
			
 
				+	ext4_mb_put_buddy_page_lock(&e4b);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 
				 	e4b->bd_group = group;
			
 
				 	e4b->bd_buddy_page = NULL;
			
 
				 	e4b->bd_bitmap_page = NULL;
			
 
				-	e4b->alloc_semp = &grp->alloc_sem;
			
 
				-
			
 
				-	/* Take the read lock on the group alloc
			
 
				-	 * sem. This would make sure a parallel
			
 
				-	 * ext4_mb_init_group happening on other
			
 
				-	 * groups mapped by the page is blocked
			
 
				-	 * till we are done with allocation
			
 
				-	 */
			
 
				-repeat_load_buddy:
			
 
				-	down_read(e4b->alloc_semp);
			
 
				 
			
 
				 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
			
 
				-		/* we need to check for group need init flag
			
 
				-		 * with alloc_semp held so that we can be sure
			
 
				-		 * that new blocks didn't get added to the group
			
 
				-		 * when we are loading the buddy cache
			
 
				-		 */
			
 
				-		up_read(e4b->alloc_semp);
			
 
				 		/*
			
 
				 		 * we need full data about the group
			
 
				 		 * to make a good selection
			
@@ -1189,7 +1139,6 @@ repeat_load_buddy:
 
				 		ret = ext4_mb_init_group(sb, group);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				-		goto repeat_load_buddy;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -1273,15 +1222,14 @@ repeat_load_buddy:
 
				 	return 0;
			
 
				 
			
 
				 err:
			
 
				+	if (page)
			
 
				+		page_cache_release(page);
			
 
				 	if (e4b->bd_bitmap_page)
			
 
				 		page_cache_release(e4b->bd_bitmap_page);
			
 
				 	if (e4b->bd_buddy_page)
			
 
				 		page_cache_release(e4b->bd_buddy_page);
			
 
				 	e4b->bd_buddy = NULL;
			
 
				 	e4b->bd_bitmap = NULL;
			
 
				-
			
 
				-	/* Done with the buddy cache */
			
 
				-	up_read(e4b->alloc_semp);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 
				 		page_cache_release(e4b->bd_bitmap_page);
			
 
				 	if (e4b->bd_buddy_page)
			
 
				 		page_cache_release(e4b->bd_buddy_page);
			
 
				-	/* Done with the buddy cache */
			
 
				-	if (e4b->alloc_semp)
			
 
				-		up_read(e4b->alloc_semp);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
 
				 	get_page(ac->ac_bitmap_page);
			
 
				 	ac->ac_buddy_page = e4b->bd_buddy_page;
			
 
				 	get_page(ac->ac_buddy_page);
			
 
				-	/* on allocation we use ac to track the held semaphore */
			
 
				-	ac->alloc_semp =  e4b->alloc_semp;
			
 
				-	e4b->alloc_semp = NULL;
			
 
				 	/* store last allocated for subsequent stream allocation */
			
 
				 	if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
			
 
				 		spin_lock(&sbi->s_md_lock);
			
@@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 
				 	struct super_block *sb = journal->j_private;
			
 
				 	struct ext4_buddy e4b;
			
 
				 	struct ext4_group_info *db;
			
 
				-	int err, ret, count = 0, count2 = 0;
			
 
				+	int err, count = 0, count2 = 0;
			
 
				 	struct ext4_free_data *entry;
			
 
				 	struct list_head *l, *ltmp;
			
 
				 
			
@@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 
				 		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
			
 
				 			 entry->count, entry->group, entry);
			
 
				 
			
 
				-		if (test_opt(sb, DISCARD)) {
			
 
				-			ret = ext4_issue_discard(sb, entry->group,
			
 
				-					entry->start_blk, entry->count);
			
 
				-			if (unlikely(ret == -EOPNOTSUPP)) {
			
 
				-				ext4_warning(sb, "discard not supported, "
			
 
				-						 "disabling");
			
 
				-				clear_opt(sb, DISCARD);
			
 
				-			}
			
 
				-		}
			
 
				+		if (test_opt(sb, DISCARD))
			
 
				+			ext4_issue_discard(sb, entry->group,
			
 
				+					   entry->start_blk, entry->count);
			
 
				 
			
 
				 		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
			
 
				 		/* we expect to find existing buddy because it's pinned */
			
@@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 
				 			spin_unlock(&pa->pa_lock);
			
 
				 		}
			
 
				 	}
			
 
				-	if (ac->alloc_semp)
			
 
				-		up_read(ac->alloc_semp);
			
 
				 	if (pa) {
			
 
				 		/*
			
 
				 		 * We want to add the pa to the right bucket.
			
 
				 		 * Remove it from the list and while adding
			
 
				 		 * make sure the list to which we are adding
			
 
				-		 * doesn't grow big.  We need to release
			
 
				-		 * alloc_semp before calling ext4_mb_add_n_trim()
			
 
				+		 * doesn't grow big.
			
 
				 		 */
			
 
				 		if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
			
 
				 			spin_lock(pa->pa_obj_lock);
			
@@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
				 		 * there is enough free blocks to do block allocation
			
 
				 		 * and verify allocation doesn't exceed the quota limits.
			
 
				 		 */
			
 
				-		while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
			
 
				+		while (ar->len &&
			
 
				+			ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
			
 
				+
			
 
				 			/* let others to free the space */
			
 
				 			yield();
			
 
				 			ar->len = ar->len >> 1;
			
@@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
				 			return 0;
			
 
				 		}
			
 
				 		reserv_blks = ar->len;
			
 
				-		while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
			
 
				-			ar->flags |= EXT4_MB_HINT_NOPREALLOC;
			
 
				-			ar->len--;
			
 
				+		if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
			
 
				+			dquot_alloc_block_nofail(ar->inode, ar->len);
			
 
				+		} else {
			
 
				+			while (ar->len &&
			
 
				+				dquot_alloc_block(ar->inode, ar->len)) {
			
 
				+
			
 
				+				ar->flags |= EXT4_MB_HINT_NOPREALLOC;
			
 
				+				ar->len--;
			
 
				+			}
			
 
				 		}
			
 
				 		inquota = ar->len;
			
 
				 		if (ar->len == 0) {
			
@@ -4703,6 +4644,127 @@ error_return:
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * ext4_add_groupblocks() -- Add given blocks to an existing group
			
 
				+ * @handle:			handle to this transaction
			
 
				+ * @sb:				super block
			
 
				+ * @block:			start physcial block to add to the block group
			
 
				+ * @count:			number of blocks to free
			
 
				+ *
			
 
				+ * This marks the blocks as free in the bitmap and buddy.
			
 
				+ */
			
 
				+void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
			
 
				+			 ext4_fsblk_t block, unsigned long count)
			
 
				+{
			
 
				+	struct buffer_head *bitmap_bh = NULL;
			
 
				+	struct buffer_head *gd_bh;
			
 
				+	ext4_group_t block_group;
			
 
				+	ext4_grpblk_t bit;
			
 
				+	unsigned int i;
			
 
				+	struct ext4_group_desc *desc;
			
 
				+	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				+	struct ext4_buddy e4b;
			
 
				+	int err = 0, ret, blk_free_count;
			
 
				+	ext4_grpblk_t blocks_freed;
			
 
				+	struct ext4_group_info *grp;
			
 
				+
			
 
				+	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
			
 
				+
			
 
				+	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
			
 
				+	grp = ext4_get_group_info(sb, block_group);
			
 
				+	/*
			
 
				+	 * Check to see if we are freeing blocks across a group
			
 
				+	 * boundary.
			
 
				+	 */
			
 
				+	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
			
 
				+		goto error_return;
			
 
				+
			
 
				+	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
			
 
				+	if (!bitmap_bh)
			
 
				+		goto error_return;
			
 
				+	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
			
 
				+	if (!desc)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
			
 
				+	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
			
 
				+	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
			
 
				+	    in_range(block + count - 1, ext4_inode_table(sb, desc),
			
 
				+		     sbi->s_itb_per_group)) {
			
 
				+		ext4_error(sb, "Adding blocks in system zones - "
			
 
				+			   "Block = %llu, count = %lu",
			
 
				+			   block, count);
			
 
				+		goto error_return;
			
 
				+	}
			
 
				+
			
 
				+	BUFFER_TRACE(bitmap_bh, "getting write access");
			
 
				+	err = ext4_journal_get_write_access(handle, bitmap_bh);
			
 
				+	if (err)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	/*
			
 
				+	 * We are about to modify some metadata.  Call the journal APIs
			
 
				+	 * to unshare ->b_data if a currently-committing transaction is
			
 
				+	 * using it
			
 
				+	 */
			
 
				+	BUFFER_TRACE(gd_bh, "get_write_access");
			
 
				+	err = ext4_journal_get_write_access(handle, gd_bh);
			
 
				+	if (err)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	for (i = 0, blocks_freed = 0; i < count; i++) {
			
 
				+		BUFFER_TRACE(bitmap_bh, "clear bit");
			
 
				+		if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
			
 
				+			ext4_error(sb, "bit already cleared for block %llu",
			
 
				+				   (ext4_fsblk_t)(block + i));
			
 
				+			BUFFER_TRACE(bitmap_bh, "bit already cleared");
			
 
				+		} else {
			
 
				+			blocks_freed++;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	err = ext4_mb_load_buddy(sb, block_group, &e4b);
			
 
				+	if (err)
			
 
				+		goto error_return;
			
 
				+
			
 
				+	/*
			
 
				+	 * need to update group_info->bb_free and bitmap
			
 
				+	 * with group lock held. generate_buddy look at
			
 
				+	 * them with group lock_held
			
 
				+	 */
			
 
				+	ext4_lock_group(sb, block_group);
			
 
				+	mb_clear_bits(bitmap_bh->b_data, bit, count);
			
 
				+	mb_free_blocks(NULL, &e4b, bit, count);
			
 
				+	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
			
 
				+	ext4_free_blks_set(sb, desc, blk_free_count);
			
 
				+	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
			
 
				+	ext4_unlock_group(sb, block_group);
			
 
				+	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
			
 
				+
			
 
				+	if (sbi->s_log_groups_per_flex) {
			
 
				+		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
			
 
				+		atomic_add(blocks_freed,
			
 
				+			   &sbi->s_flex_groups[flex_group].free_blocks);
			
 
				+	}
			
 
				+
			
 
				+	ext4_mb_unload_buddy(&e4b);
			
 
				+
			
 
				+	/* We dirtied the bitmap block */
			
 
				+	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
			
 
				+	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
			
 
				+
			
 
				+	/* And the group descriptor block */
			
 
				+	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
			
 
				+	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
			
 
				+	if (!err)
			
 
				+		err = ret;
			
 
				+
			
 
				+error_return:
			
 
				+	brelse(bitmap_bh);
			
 
				+	ext4_std_error(sb, err);
			
 
				+	return;
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * ext4_trim_extent -- function to TRIM one single free extent in the group
			
 
				  * @sb:		super block for the file system
			
@@ -4715,11 +4777,10 @@ error_return:
 
				  * one will allocate those blocks, mark it as used in buddy bitmap. This must
			
 
				  * be called with under the group lock.
			
 
				  */
			
 
				-static int ext4_trim_extent(struct super_block *sb, int start, int count,
			
 
				-		ext4_group_t group, struct ext4_buddy *e4b)
			
 
				+static void ext4_trim_extent(struct super_block *sb, int start, int count,
			
 
				+			     ext4_group_t group, struct ext4_buddy *e4b)
			
 
				 {
			
 
				 	struct ext4_free_extent ex;
			
 
				-	int ret = 0;
			
 
				 
			
 
				 	assert_spin_locked(ext4_group_lock_ptr(sb, group));
			
 
				 
			
@@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 
				 	 */
			
 
				 	mb_mark_used(e4b, &ex);
			
 
				 	ext4_unlock_group(sb, group);
			
 
				-
			
 
				-	ret = ext4_issue_discard(sb, group, start, count);
			
 
				-
			
 
				+	ext4_issue_discard(sb, group, start, count);
			
 
				 	ext4_lock_group(sb, group);
			
 
				 	mb_free_blocks(NULL, e4b, start, ex.fe_len);
			
 
				-	return ret;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
 
				  * the group buddy bitmap. This is done until whole group is scanned.
			
 
				  */
			
 
				 static ext4_grpblk_t
			
 
				-ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
			
 
				-		ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
			
 
				+ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
			
 
				+		   ext4_grpblk_t start, ext4_grpblk_t max,
			
 
				+		   ext4_grpblk_t minblocks)
			
 
				 {
			
 
				 	void *bitmap;
			
 
				 	ext4_grpblk_t next, count = 0;
			
 
				-	ext4_group_t group;
			
 
				-	int ret = 0;
			
 
				+	struct ext4_buddy e4b;
			
 
				+	int ret;
			
 
				 
			
 
				-	BUG_ON(e4b == NULL);
			
 
				+	ret = ext4_mb_load_buddy(sb, group, &e4b);
			
 
				+	if (ret) {
			
 
				+		ext4_error(sb, "Error in loading buddy "
			
 
				+				"information for %u", group);
			
 
				+		return ret;
			
 
				+	}
			
 
				+	bitmap = e4b.bd_bitmap;
			
 
				 
			
 
				-	bitmap = e4b->bd_bitmap;
			
 
				-	group = e4b->bd_group;
			
 
				-	start = (e4b->bd_info->bb_first_free > start) ?
			
 
				-		e4b->bd_info->bb_first_free : start;
			
 
				 	ext4_lock_group(sb, group);
			
 
				+	start = (e4b.bd_info->bb_first_free > start) ?
			
 
				+		e4b.bd_info->bb_first_free : start;
			
 
				 
			
 
				 	while (start < max) {
			
 
				 		start = mb_find_next_zero_bit(bitmap, max, start);
			
@@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
 
				 		next = mb_find_next_bit(bitmap, max, start);
			
 
				 
			
 
				 		if ((next - start) >= minblocks) {
			
 
				-			ret = ext4_trim_extent(sb, start,
			
 
				-				next - start, group, e4b);
			
 
				-			if (ret < 0)
			
 
				-				break;
			
 
				+			ext4_trim_extent(sb, start,
			
 
				+					 next - start, group, &e4b);
			
 
				 			count += next - start;
			
 
				 		}
			
 
				 		start = next + 1;
			
@@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
 
				 			ext4_lock_group(sb, group);
			
 
				 		}
			
 
				 
			
 
				-		if ((e4b->bd_info->bb_free - count) < minblocks)
			
 
				+		if ((e4b.bd_info->bb_free - count) < minblocks)
			
 
				 			break;
			
 
				 	}
			
 
				 	ext4_unlock_group(sb, group);
			
 
				+	ext4_mb_unload_buddy(&e4b);
			
 
				 
			
 
				 	ext4_debug("trimmed %d blocks in the group %d\n",
			
 
				 		count, group);
			
 
				 
			
 
				-	if (ret < 0)
			
 
				-		count = ret;
			
 
				-
			
 
				 	return count;
			
 
				 }
			
 
				 
			
@@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
 
				  */
			
 
				 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
			
 
				 {
			
 
				-	struct ext4_buddy e4b;
			
 
				+	struct ext4_group_info *grp;
			
 
				 	ext4_group_t first_group, last_group;
			
 
				 	ext4_group_t group, ngroups = ext4_get_groups_count(sb);
			
 
				 	ext4_grpblk_t cnt = 0, first_block, last_block;
			
 
				-	uint64_t start, len, minlen, trimmed;
			
 
				+	uint64_t start, len, minlen, trimmed = 0;
			
 
				 	ext4_fsblk_t first_data_blk =
			
 
				 			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
			
 
				 	int ret = 0;
			
@@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 	start = range->start >> sb->s_blocksize_bits;
			
 
				 	len = range->len >> sb->s_blocksize_bits;
			
 
				 	minlen = range->minlen >> sb->s_blocksize_bits;
			
 
				-	trimmed = 0;
			
 
				 
			
 
				 	if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
			
 
				 		return -EINVAL;
			
@@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 		return -EINVAL;
			
 
				 
			
 
				 	for (group = first_group; group <= last_group; group++) {
			
 
				-		ret = ext4_mb_load_buddy(sb, group, &e4b);
			
 
				-		if (ret) {
			
 
				-			ext4_error(sb, "Error in loading buddy "
			
 
				-					"information for %u", group);
			
 
				-			break;
			
 
				+		grp = ext4_get_group_info(sb, group);
			
 
				+		/* We only do this if the grp has never been initialized */
			
 
				+		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
			
 
				+			ret = ext4_mb_init_group(sb, group);
			
 
				+			if (ret)
			
 
				+				break;
			
 
				 		}
			
 
				 
			
 
				 		/*
			
@@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 			last_block = first_block + len;
			
 
				 		len -= last_block - first_block;
			
 
				 
			
 
				-		if (e4b.bd_info->bb_free >= minlen) {
			
 
				-			cnt = ext4_trim_all_free(sb, &e4b, first_block,
			
 
				+		if (grp->bb_free >= minlen) {
			
 
				+			cnt = ext4_trim_all_free(sb, group, first_block,
			
 
				 						last_block, minlen);
			
 
				 			if (cnt < 0) {
			
 
				 				ret = cnt;
			
 
				-				ext4_mb_unload_buddy(&e4b);
			
 
				 				break;
			
 
				 			}
			
 
				 		}
			
 
				-		ext4_mb_unload_buddy(&e4b);
			
 
				 		trimmed += cnt;
			
 
				 		first_block = 0;
			
 
				 	}
			
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -193,11 +193,6 @@ struct ext4_allocation_context {
 
				 	__u8 ac_op;		/* operation, for history only */
			
 
				 	struct page *ac_bitmap_page;
			
 
				 	struct page *ac_buddy_page;
			
 
				-	/*
			
 
				-	 * pointer to the held semaphore upon successful
			
 
				-	 * block allocation
			
 
				-	 */
			
 
				-	struct rw_semaphore *alloc_semp;
			
 
				 	struct ext4_prealloc_space *ac_pa;
			
 
				 	struct ext4_locality_group *ac_lg;
			
 
				 };
			
@@ -215,7 +210,6 @@ struct ext4_buddy {
 
				 	struct super_block *bd_sb;
			
 
				 	__u16 bd_blkbits;
			
 
				 	ext4_group_t bd_group;
			
 
				-	struct rw_semaphore *alloc_semp;
			
 
				 };
			
 
				 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
			
 
				 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
			
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 
				 	 * We have the extent map build with the tmp inode.
			
 
				 	 * Now copy the i_data across
			
 
				 	 */
			
 
				-	ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
			
 
				+	ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
			
 
				 	memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
			
 
				 
			
 
				 	/*
			
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -0,0 +1,351 @@
 
				+#include <linux/fs.h>
			
 
				+#include <linux/random.h>
			
 
				+#include <linux/buffer_head.h>
			
 
				+#include <linux/utsname.h>
			
 
				+#include <linux/kthread.h>
			
 
				+
			
 
				+#include "ext4.h"
			
 
				+
			
 
				+/*
			
 
				+ * Write the MMP block using WRITE_SYNC to try to get the block on-disk
			
 
				+ * faster.
			
 
				+ */
			
 
				+static int write_mmp_block(struct buffer_head *bh)
			
 
				+{
			
 
				+	mark_buffer_dirty(bh);
			
 
				+	lock_buffer(bh);
			
 
				+	bh->b_end_io = end_buffer_write_sync;
			
 
				+	get_bh(bh);
			
 
				+	submit_bh(WRITE_SYNC, bh);
			
 
				+	wait_on_buffer(bh);
			
 
				+	if (unlikely(!buffer_uptodate(bh)))
			
 
				+		return 1;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Read the MMP block. It _must_ be read from disk and hence we clear the
			
 
				+ * uptodate flag on the buffer.
			
 
				+ */
			
 
				+static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
			
 
				+			  ext4_fsblk_t mmp_block)
			
 
				+{
			
 
				+	struct mmp_struct *mmp;
			
 
				+
			
 
				+	if (*bh)
			
 
				+		clear_buffer_uptodate(*bh);
			
 
				+
			
 
				+	/* This would be sb_bread(sb, mmp_block), except we need to be sure
			
 
				+	 * that the MD RAID device cache has been bypassed, and that the read
			
 
				+	 * is not blocked in the elevator. */
			
 
				+	if (!*bh)
			
 
				+		*bh = sb_getblk(sb, mmp_block);
			
 
				+	if (*bh) {
			
 
				+		get_bh(*bh);
			
 
				+		lock_buffer(*bh);
			
 
				+		(*bh)->b_end_io = end_buffer_read_sync;
			
 
				+		submit_bh(READ_SYNC, *bh);
			
 
				+		wait_on_buffer(*bh);
			
 
				+		if (!buffer_uptodate(*bh)) {
			
 
				+			brelse(*bh);
			
 
				+			*bh = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+	if (!*bh) {
			
 
				+		ext4_warning(sb, "Error while reading MMP block %llu",
			
 
				+			     mmp_block);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	mmp = (struct mmp_struct *)((*bh)->b_data);
			
 
				+	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Dump as much information as possible to help the admin.
			
 
				+ */
			
 
				+void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
			
 
				+		    const char *function, unsigned int line, const char *msg)
			
 
				+{
			
 
				+	__ext4_warning(sb, function, line, msg);
			
 
				+	__ext4_warning(sb, function, line,
			
 
				+		       "MMP failure info: last update time: %llu, last update "
			
 
				+		       "node: %s, last update device: %s\n",
			
 
				+		       (long long unsigned int) le64_to_cpu(mmp->mmp_time),
			
 
				+		       mmp->mmp_nodename, mmp->mmp_bdevname);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
			
 
				+ */
			
 
				+static int kmmpd(void *data)
			
 
				+{
			
 
				+	struct super_block *sb = ((struct mmpd_data *) data)->sb;
			
 
				+	struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
			
 
				+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
			
 
				+	struct mmp_struct *mmp;
			
 
				+	ext4_fsblk_t mmp_block;
			
 
				+	u32 seq = 0;
			
 
				+	unsigned long failed_writes = 0;
			
 
				+	int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
			
 
				+	unsigned mmp_check_interval;
			
 
				+	unsigned long last_update_time;
			
 
				+	unsigned long diff;
			
 
				+	int retval;
			
 
				+
			
 
				+	mmp_block = le64_to_cpu(es->s_mmp_block);
			
 
				+	mmp = (struct mmp_struct *)(bh->b_data);
			
 
				+	mmp->mmp_time = cpu_to_le64(get_seconds());
			
 
				+	/*
			
 
				+	 * Start with the higher mmp_check_interval and reduce it if
			
 
				+	 * the MMP block is being updated on time.
			
 
				+	 */
			
 
				+	mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
			
 
				+				 EXT4_MMP_MIN_CHECK_INTERVAL);
			
 
				+	mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
			
 
				+	bdevname(bh->b_bdev, mmp->mmp_bdevname);
			
 
				+
			
 
				+	memcpy(mmp->mmp_nodename, init_utsname()->sysname,
			
 
				+	       sizeof(mmp->mmp_nodename));
			
 
				+
			
 
				+	while (!kthread_should_stop()) {
			
 
				+		if (++seq > EXT4_MMP_SEQ_MAX)
			
 
				+			seq = 1;
			
 
				+
			
 
				+		mmp->mmp_seq = cpu_to_le32(seq);
			
 
				+		mmp->mmp_time = cpu_to_le64(get_seconds());
			
 
				+		last_update_time = jiffies;
			
 
				+
			
 
				+		retval = write_mmp_block(bh);
			
 
				+		/*
			
 
				+		 * Don't spew too many error messages. Print one every
			
 
				+		 * (s_mmp_update_interval * 60) seconds.
			
 
				+		 */
			
 
				+		if (retval && (failed_writes % 60) == 0) {
			
 
				+			ext4_error(sb, "Error writing to MMP block");
			
 
				+			failed_writes++;
			
 
				+		}
			
 
				+
			
 
				+		if (!(le32_to_cpu(es->s_feature_incompat) &
			
 
				+		    EXT4_FEATURE_INCOMPAT_MMP)) {
			
 
				+			ext4_warning(sb, "kmmpd being stopped since MMP feature"
			
 
				+				     " has been disabled.");
			
 
				+			EXT4_SB(sb)->s_mmp_tsk = NULL;
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		if (sb->s_flags & MS_RDONLY) {
			
 
				+			ext4_warning(sb, "kmmpd being stopped since filesystem "
			
 
				+				     "has been remounted as readonly.");
			
 
				+			EXT4_SB(sb)->s_mmp_tsk = NULL;
			
 
				+			goto failed;
			
 
				+		}
			
 
				+
			
 
				+		diff = jiffies - last_update_time;
			
 
				+		if (diff < mmp_update_interval * HZ)
			
 
				+			schedule_timeout_interruptible(mmp_update_interval *
			
 
				+						       HZ - diff);
			
 
				+
			
 
				+		/*
			
 
				+		 * We need to make sure that more than mmp_check_interval
			
 
				+		 * seconds have not passed since writing. If that has happened
			
 
				+		 * we need to check if the MMP block is as we left it.
			
 
				+		 */
			
 
				+		diff = jiffies - last_update_time;
			
 
				+		if (diff > mmp_check_interval * HZ) {
			
 
				+			struct buffer_head *bh_check = NULL;
			
 
				+			struct mmp_struct *mmp_check;
			
 
				+
			
 
				+			retval = read_mmp_block(sb, &bh_check, mmp_block);
			
 
				+			if (retval) {
			
 
				+				ext4_error(sb, "error reading MMP data: %d",
			
 
				+					   retval);
			
 
				+
			
 
				+				EXT4_SB(sb)->s_mmp_tsk = NULL;
			
 
				+				goto failed;
			
 
				+			}
			
 
				+
			
 
				+			mmp_check = (struct mmp_struct *)(bh_check->b_data);
			
 
				+			if (mmp->mmp_seq != mmp_check->mmp_seq ||
			
 
				+			    memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
			
 
				+				   sizeof(mmp->mmp_nodename))) {
			
 
				+				dump_mmp_msg(sb, mmp_check,
			
 
				+					     "Error while updating MMP info. "
			
 
				+					     "The filesystem seems to have been"
			
 
				+					     " multiply mounted.");
			
 
				+				ext4_error(sb, "abort");
			
 
				+				goto failed;
			
 
				+			}
			
 
				+			put_bh(bh_check);
			
 
				+		}
			
 
				+
			
 
				+		 /*
			
 
				+		 * Adjust the mmp_check_interval depending on how much time
			
 
				+		 * it took for the MMP block to be written.
			
 
				+		 */
			
 
				+		mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
			
 
				+					     EXT4_MMP_MAX_CHECK_INTERVAL),
			
 
				+					 EXT4_MMP_MIN_CHECK_INTERVAL);
			
 
				+		mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Unmount seems to be clean.
			
 
				+	 */
			
 
				+	mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
			
 
				+	mmp->mmp_time = cpu_to_le64(get_seconds());
			
 
				+
			
 
				+	retval = write_mmp_block(bh);
			
 
				+
			
 
				+failed:
			
 
				+	kfree(data);
			
 
				+	brelse(bh);
			
 
				+	return retval;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get a random new sequence number but make sure it is not greater than
			
 
				+ * EXT4_MMP_SEQ_MAX.
			
 
				+ */
			
 
				+static unsigned int mmp_new_seq(void)
			
 
				+{
			
 
				+	u32 new_seq;
			
 
				+
			
 
				+	do {
			
 
				+		get_random_bytes(&new_seq, sizeof(u32));
			
 
				+	} while (new_seq > EXT4_MMP_SEQ_MAX);
			
 
				+
			
 
				+	return new_seq;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Protect the filesystem from being mounted more than once.
			
 
				+ */
			
 
				+int ext4_multi_mount_protect(struct super_block *sb,
			
 
				+				    ext4_fsblk_t mmp_block)
			
 
				+{
			
 
				+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
			
 
				+	struct buffer_head *bh = NULL;
			
 
				+	struct mmp_struct *mmp = NULL;
			
 
				+	struct mmpd_data *mmpd_data;
			
 
				+	u32 seq;
			
 
				+	unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
			
 
				+	unsigned int wait_time = 0;
			
 
				+	int retval;
			
 
				+
			
 
				+	if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
			
 
				+	    mmp_block >= ext4_blocks_count(es)) {
			
 
				+		ext4_warning(sb, "Invalid MMP block in superblock");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	retval = read_mmp_block(sb, &bh, mmp_block);
			
 
				+	if (retval)
			
 
				+		goto failed;
			
 
				+
			
 
				+	mmp = (struct mmp_struct *)(bh->b_data);
			
 
				+
			
 
				+	if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
			
 
				+		mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * If check_interval in MMP block is larger, use that instead of
			
 
				+	 * update_interval from the superblock.
			
 
				+	 */
			
 
				+	if (mmp->mmp_check_interval > mmp_check_interval)
			
 
				+		mmp_check_interval = mmp->mmp_check_interval;
			
 
				+
			
 
				+	seq = le32_to_cpu(mmp->mmp_seq);
			
 
				+	if (seq == EXT4_MMP_SEQ_CLEAN)
			
 
				+		goto skip;
			
 
				+
			
 
				+	if (seq == EXT4_MMP_SEQ_FSCK) {
			
 
				+		dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	wait_time = min(mmp_check_interval * 2 + 1,
			
 
				+			mmp_check_interval + 60);
			
 
				+
			
 
				+	/* Print MMP interval if more than 20 secs. */
			
 
				+	if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
			
 
				+		ext4_warning(sb, "MMP interval %u higher than expected, please"
			
 
				+			     " wait.\n", wait_time * 2);
			
 
				+
			
 
				+	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
			
 
				+		ext4_warning(sb, "MMP startup interrupted, failing mount\n");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	retval = read_mmp_block(sb, &bh, mmp_block);
			
 
				+	if (retval)
			
 
				+		goto failed;
			
 
				+	mmp = (struct mmp_struct *)(bh->b_data);
			
 
				+	if (seq != le32_to_cpu(mmp->mmp_seq)) {
			
 
				+		dump_mmp_msg(sb, mmp,
			
 
				+			     "Device is already active on another node.");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+skip:
			
 
				+	/*
			
 
				+	 * write a new random sequence number.
			
 
				+	 */
			
 
				+	mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
			
 
				+
			
 
				+	retval = write_mmp_block(bh);
			
 
				+	if (retval)
			
 
				+		goto failed;
			
 
				+
			
 
				+	/*
			
 
				+	 * wait for MMP interval and check mmp_seq.
			
 
				+	 */
			
 
				+	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
			
 
				+		ext4_warning(sb, "MMP startup interrupted, failing mount\n");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	retval = read_mmp_block(sb, &bh, mmp_block);
			
 
				+	if (retval)
			
 
				+		goto failed;
			
 
				+	mmp = (struct mmp_struct *)(bh->b_data);
			
 
				+	if (seq != le32_to_cpu(mmp->mmp_seq)) {
			
 
				+		dump_mmp_msg(sb, mmp,
			
 
				+			     "Device is already active on another node.");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
			
 
				+	if (!mmpd_data) {
			
 
				+		ext4_warning(sb, "not enough memory for mmpd_data");
			
 
				+		goto failed;
			
 
				+	}
			
 
				+	mmpd_data->sb = sb;
			
 
				+	mmpd_data->bh = bh;
			
 
				+
			
 
				+	/*
			
 
				+	 * Start a kernel thread to update the MMP block periodically.
			
 
				+	 */
			
 
				+	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
			
 
				+					     bdevname(bh->b_bdev,
			
 
				+						      mmp->mmp_bdevname));
			
 
				+	if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
			
 
				+		EXT4_SB(sb)->s_mmp_tsk = NULL;
			
 
				+		kfree(mmpd_data);
			
 
				+		ext4_warning(sb, "Unable to create kmmpd thread for %s.",
			
 
				+			     sb->s_id);
			
 
				+		goto failed;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+failed:
			
 
				+	brelse(bh);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+
			
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 
				 	 * It needs to call wait_on_page_writeback() to wait for the
			
 
				 	 * writeback of the page.
			
 
				 	 */
			
 
				-	if (PageWriteback(page))
			
 
				-		wait_on_page_writeback(page);
			
 
				+	wait_on_page_writeback(page);
			
 
				 
			
 
				 	/* Release old bh and drop refs */
			
 
				 	try_to_release_page(page, 0);
			
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1413,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
				 	frame->at = entries;
			
 
				 	frame->bh = bh;
			
 
				 	bh = bh2;
			
 
				+
			
 
				+	ext4_handle_dirty_metadata(handle, dir, frame->bh);
			
 
				+	ext4_handle_dirty_metadata(handle, dir, bh);
			
 
				+
			
 
				 	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
			
 
				-	dx_release (frames);
			
 
				-	if (!(de))
			
 
				+	if (!de) {
			
 
				+		/*
			
 
				+		 * Even if the block split failed, we have to properly write
			
 
				+		 * out all the changes we did so far. Otherwise we can end up
			
 
				+		 * with corrupted filesystem.
			
 
				+		 */
			
 
				+		ext4_mark_inode_dirty(handle, dir);
			
 
				+		dx_release(frames);
			
 
				 		return retval;
			
 
				+	}
			
 
				+	dx_release(frames);
			
 
				 
			
 
				 	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
			
 
				 	brelse(bh);
			
@@ -2240,6 +2252,7 @@ static int ext4_symlink(struct inode *dir,
 
				 	handle_t *handle;
			
 
				 	struct inode *inode;
			
 
				 	int l, err, retries = 0;
			
 
				+	int credits;
			
 
				 
			
 
				 	l = strlen(symname)+1;
			
 
				 	if (l > dir->i_sb->s_blocksize)
			
@@ -2247,10 +2260,26 @@ static int ext4_symlink(struct inode *dir,
 
				 
			
 
				 	dquot_initialize(dir);
			
 
				 
			
 
				+	if (l > EXT4_N_BLOCKS * 4) {
			
 
				+		/*
			
 
				+		 * For non-fast symlinks, we just allocate inode and put it on
			
 
				+		 * orphan list in the first transaction => we need bitmap,
			
 
				+		 * group descriptor, sb, inode block, quota blocks.
			
 
				+		 */
			
 
				+		credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Fast symlink. We have to add entry to directory
			
 
				+		 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
			
 
				+		 * allocate new inode (bitmap, group descriptor, inode block,
			
 
				+		 * quota blocks, sb is already counted in previous macros).
			
 
				+		 */
			
 
				+		credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				+			  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
			
 
				+			  EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
			
 
				+	}
			
 
				 retry:
			
 
				-	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				-					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
			
 
				-					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
			
 
				+	handle = ext4_journal_start(dir, credits);
			
 
				 	if (IS_ERR(handle))
			
 
				 		return PTR_ERR(handle);
			
 
				 
			
@@ -2263,21 +2292,44 @@ retry:
 
				 	if (IS_ERR(inode))
			
 
				 		goto out_stop;
			
 
				 
			
 
				-	if (l > sizeof(EXT4_I(inode)->i_data)) {
			
 
				+	if (l > EXT4_N_BLOCKS * 4) {
			
 
				 		inode->i_op = &ext4_symlink_inode_operations;
			
 
				 		ext4_set_aops(inode);
			
 
				 		/*
			
 
				-		 * page_symlink() calls into ext4_prepare/commit_write.
			
 
				-		 * We have a transaction open.  All is sweetness.  It also sets
			
 
				-		 * i_size in generic_commit_write().
			
 
				+		 * We cannot call page_symlink() with transaction started
			
 
				+		 * because it calls into ext4_write_begin() which can wait
			
 
				+		 * for transaction commit if we are running out of space
			
 
				+		 * and thus we deadlock. So we have to stop transaction now
			
 
				+		 * and restart it when symlink contents is written.
			
 
				+		 * 
			
 
				+		 * To keep fs consistent in case of crash, we have to put inode
			
 
				+		 * to orphan list in the mean time.
			
 
				 		 */
			
 
				+		drop_nlink(inode);
			
 
				+		err = ext4_orphan_add(handle, inode);
			
 
				+		ext4_journal_stop(handle);
			
 
				+		if (err)
			
 
				+			goto err_drop_inode;
			
 
				 		err = __page_symlink(inode, symname, l, 1);
			
 
				+		if (err)
			
 
				+			goto err_drop_inode;
			
 
				+		/*
			
 
				+		 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
			
 
				+		 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
			
 
				+		 */
			
 
				+		handle = ext4_journal_start(dir,
			
 
				+				EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				+				EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
			
 
				+		if (IS_ERR(handle)) {
			
 
				+			err = PTR_ERR(handle);
			
 
				+			goto err_drop_inode;
			
 
				+		}
			
 
				+		inc_nlink(inode);
			
 
				+		err = ext4_orphan_del(handle, inode);
			
 
				 		if (err) {
			
 
				+			ext4_journal_stop(handle);
			
 
				 			clear_nlink(inode);
			
 
				-			unlock_new_inode(inode);
			
 
				-			ext4_mark_inode_dirty(handle, inode);
			
 
				-			iput(inode);
			
 
				-			goto out_stop;
			
 
				+			goto err_drop_inode;
			
 
				 		}
			
 
				 	} else {
			
 
				 		/* clear the extent format for fast symlink */
			
@@ -2293,6 +2345,10 @@ out_stop:
 
				 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
			
 
				 		goto retry;
			
 
				 	return err;
			
 
				+err_drop_inode:
			
 
				+	unlock_new_inode(inode);
			
 
				+	iput(inode);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 static int ext4_link(struct dentry *old_dentry,
			
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -203,46 +203,29 @@ static void ext4_end_bio(struct bio *bio, int error)
 
				 	for (i = 0; i < io_end->num_io_pages; i++) {
			
 
				 		struct page *page = io_end->pages[i]->p_page;
			
 
				 		struct buffer_head *bh, *head;
			
 
				-		int partial_write = 0;
			
 
				+		loff_t offset;
			
 
				+		loff_t io_end_offset;
			
 
				 
			
 
				-		head = page_buffers(page);
			
 
				-		if (error)
			
 
				+		if (error) {
			
 
				 			SetPageError(page);
			
 
				-		BUG_ON(!head);
			
 
				-		if (head->b_size != PAGE_CACHE_SIZE) {
			
 
				-			loff_t offset;
			
 
				-			loff_t io_end_offset = io_end->offset + io_end->size;
			
 
				+			set_bit(AS_EIO, &page->mapping->flags);
			
 
				+			head = page_buffers(page);
			
 
				+			BUG_ON(!head);
			
 
				+
			
 
				+			io_end_offset = io_end->offset + io_end->size;
			
 
				 
			
 
				 			offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
			
 
				 			bh = head;
			
 
				 			do {
			
 
				 				if ((offset >= io_end->offset) &&
			
 
				-				    (offset+bh->b_size <= io_end_offset)) {
			
 
				-					if (error)
			
 
				-						buffer_io_error(bh);
			
 
				-
			
 
				-				}
			
 
				-				if (buffer_delay(bh))
			
 
				-					partial_write = 1;
			
 
				-				else if (!buffer_mapped(bh))
			
 
				-					clear_buffer_dirty(bh);
			
 
				-				else if (buffer_dirty(bh))
			
 
				-					partial_write = 1;
			
 
				+				    (offset+bh->b_size <= io_end_offset))
			
 
				+					buffer_io_error(bh);
			
 
				+
			
 
				 				offset += bh->b_size;
			
 
				 				bh = bh->b_this_page;
			
 
				 			} while (bh != head);
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * If this is a partial write which happened to make
			
 
				-		 * all buffers uptodate then we can optimize away a
			
 
				-		 * bogus readpage() for the next read(). Here we
			
 
				-		 * 'discover' whether the page went uptodate as a
			
 
				-		 * result of this (potentially partial) write.
			
 
				-		 */
			
 
				-		if (!partial_write)
			
 
				-			SetPageUptodate(page);
			
 
				-
			
 
				 		put_io_page(io_end->pages[i]);
			
 
				 	}
			
 
				 	io_end->num_io_pages = 0;
			
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -75,11 +75,27 @@ static void ext4_write_super(struct super_block *sb);
 
				 static int ext4_freeze(struct super_block *sb);
			
 
				 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
			
 
				 		       const char *dev_name, void *data);
			
 
				+static inline int ext2_feature_set_ok(struct super_block *sb);
			
 
				+static inline int ext3_feature_set_ok(struct super_block *sb);
			
 
				 static int ext4_feature_set_ok(struct super_block *sb, int readonly);
			
 
				 static void ext4_destroy_lazyinit_thread(void);
			
 
				 static void ext4_unregister_li_request(struct super_block *sb);
			
 
				 static void ext4_clear_request_list(void);
			
 
				 
			
 
				+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
			
 
				+static struct file_system_type ext2_fs_type = {
			
 
				+	.owner		= THIS_MODULE,
			
 
				+	.name		= "ext2",
			
 
				+	.mount		= ext4_mount,
			
 
				+	.kill_sb	= kill_block_super,
			
 
				+	.fs_flags	= FS_REQUIRES_DEV,
			
 
				+};
			
 
				+#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
			
 
				+#else
			
 
				+#define IS_EXT2_SB(sb) (0)
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
			
 
				 static struct file_system_type ext3_fs_type = {
			
 
				 	.owner		= THIS_MODULE,
			
@@ -806,6 +822,8 @@ static void ext4_put_super(struct super_block *sb)
 
				 		invalidate_bdev(sbi->journal_bdev);
			
 
				 		ext4_blkdev_remove(sbi);
			
 
				 	}
			
 
				+	if (sbi->s_mmp_tsk)
			
 
				+		kthread_stop(sbi->s_mmp_tsk);
			
 
				 	sb->s_fs_info = NULL;
			
 
				 	/*
			
 
				 	 * Now that we are completely done shutting down the
			
@@ -1096,7 +1114,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
				 
			
 
				 	if (!test_opt(sb, INIT_INODE_TABLE))
			
 
				 		seq_puts(seq, ",noinit_inode_table");
			
 
				-	else if (sbi->s_li_wait_mult)
			
 
				+	else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
			
 
				 		seq_printf(seq, ",init_inode_table=%u",
			
 
				 			   (unsigned) sbi->s_li_wait_mult);
			
 
				 
			
@@ -1187,9 +1205,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 
				 				const char *data, size_t len, loff_t off);
			
 
				 
			
 
				 static const struct dquot_operations ext4_quota_operations = {
			
 
				-#ifdef CONFIG_QUOTA
			
 
				 	.get_reserved_space = ext4_get_reserved_space,
			
 
				-#endif
			
 
				 	.write_dquot	= ext4_write_dquot,
			
 
				 	.acquire_dquot	= ext4_acquire_dquot,
			
 
				 	.release_dquot	= ext4_release_dquot,
			
@@ -1900,7 +1916,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 
				 		ext4_msg(sb, KERN_WARNING,
			
 
				 			 "warning: mounting fs with errors, "
			
 
				 			 "running e2fsck is recommended");
			
 
				-	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
			
 
				+	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
			
 
				 		 le16_to_cpu(es->s_mnt_count) >=
			
 
				 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
			
 
				 		ext4_msg(sb, KERN_WARNING,
			
@@ -2425,6 +2441,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
 
				 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
			
 
				 }
			
 
				 
			
 
				+static ssize_t extent_cache_hits_show(struct ext4_attr *a,
			
 
				+				      struct ext4_sb_info *sbi, char *buf)
			
 
				+{
			
 
				+	return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
			
 
				+}
			
 
				+
			
 
				+static ssize_t extent_cache_misses_show(struct ext4_attr *a,
			
 
				+					struct ext4_sb_info *sbi, char *buf)
			
 
				+{
			
 
				+	return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
			
 
				+}
			
 
				+
			
 
				 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
			
 
				 					  struct ext4_sb_info *sbi,
			
 
				 					  const char *buf, size_t count)
			
@@ -2482,6 +2510,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 
				 EXT4_RO_ATTR(delayed_allocation_blocks);
			
 
				 EXT4_RO_ATTR(session_write_kbytes);
			
 
				 EXT4_RO_ATTR(lifetime_write_kbytes);
			
 
				+EXT4_RO_ATTR(extent_cache_hits);
			
 
				+EXT4_RO_ATTR(extent_cache_misses);
			
 
				 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
			
 
				 		 inode_readahead_blks_store, s_inode_readahead_blks);
			
 
				 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
			
@@ -2497,6 +2527,8 @@ static struct attribute *ext4_attrs[] = {
 
				 	ATTR_LIST(delayed_allocation_blocks),
			
 
				 	ATTR_LIST(session_write_kbytes),
			
 
				 	ATTR_LIST(lifetime_write_kbytes),
			
 
				+	ATTR_LIST(extent_cache_hits),
			
 
				+	ATTR_LIST(extent_cache_misses),
			
 
				 	ATTR_LIST(inode_readahead_blks),
			
 
				 	ATTR_LIST(inode_goal),
			
 
				 	ATTR_LIST(mb_stats),
			
@@ -2659,12 +2691,6 @@ static void print_daily_error_info(unsigned long arg)
 
				 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
			
 
				 }
			
 
				 
			
 
				-static void ext4_lazyinode_timeout(unsigned long data)
			
 
				-{
			
 
				-	struct task_struct *p = (struct task_struct *)data;
			
 
				-	wake_up_process(p);
			
 
				-}
			
 
				-
			
 
				 /* Find next suitable group and run ext4_init_inode_table */
			
 
				 static int ext4_run_li_request(struct ext4_li_request *elr)
			
 
				 {
			
@@ -2696,11 +2722,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
 
				 		ret = ext4_init_inode_table(sb, group,
			
 
				 					    elr->lr_timeout ? 0 : 1);
			
 
				 		if (elr->lr_timeout == 0) {
			
 
				-			timeout = jiffies - timeout;
			
 
				-			if (elr->lr_sbi->s_li_wait_mult)
			
 
				-				timeout *= elr->lr_sbi->s_li_wait_mult;
			
 
				-			else
			
 
				-				timeout *= 20;
			
 
				+			timeout = (jiffies - timeout) *
			
 
				+				  elr->lr_sbi->s_li_wait_mult;
			
 
				 			elr->lr_timeout = timeout;
			
 
				 		}
			
 
				 		elr->lr_next_sched = jiffies + elr->lr_timeout;
			
@@ -2712,7 +2735,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
 
				 
			
 
				 /*
			
 
				  * Remove lr_request from the list_request and free the
			
 
				- * request tructure. Should be called with li_list_mtx held
			
 
				+ * request structure. Should be called with li_list_mtx held
			
 
				  */
			
 
				 static void ext4_remove_li_request(struct ext4_li_request *elr)
			
 
				 {
			
@@ -2730,14 +2753,16 @@ static void ext4_remove_li_request(struct ext4_li_request *elr)
 
				 
			
 
				 static void ext4_unregister_li_request(struct super_block *sb)
			
 
				 {
			
 
				-	struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
			
 
				-
			
 
				-	if (!ext4_li_info)
			
 
				+	mutex_lock(&ext4_li_mtx);
			
 
				+	if (!ext4_li_info) {
			
 
				+		mutex_unlock(&ext4_li_mtx);
			
 
				 		return;
			
 
				+	}
			
 
				 
			
 
				 	mutex_lock(&ext4_li_info->li_list_mtx);
			
 
				-	ext4_remove_li_request(elr);
			
 
				+	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
			
 
				 	mutex_unlock(&ext4_li_info->li_list_mtx);
			
 
				+	mutex_unlock(&ext4_li_mtx);
			
 
				 }
			
 
				 
			
 
				 static struct task_struct *ext4_lazyinit_task;
			
@@ -2756,17 +2781,10 @@ static int ext4_lazyinit_thread(void *arg)
 
				 	struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
			
 
				 	struct list_head *pos, *n;
			
 
				 	struct ext4_li_request *elr;
			
 
				-	unsigned long next_wakeup;
			
 
				-	DEFINE_WAIT(wait);
			
 
				+	unsigned long next_wakeup, cur;
			
 
				 
			
 
				 	BUG_ON(NULL == eli);
			
 
				 
			
 
				-	eli->li_timer.data = (unsigned long)current;
			
 
				-	eli->li_timer.function = ext4_lazyinode_timeout;
			
 
				-
			
 
				-	eli->li_task = current;
			
 
				-	wake_up(&eli->li_wait_task);
			
 
				-
			
 
				 cont_thread:
			
 
				 	while (true) {
			
 
				 		next_wakeup = MAX_JIFFY_OFFSET;
			
@@ -2797,19 +2815,15 @@ cont_thread:
 
				 		if (freezing(current))
			
 
				 			refrigerator();
			
 
				 
			
 
				-		if ((time_after_eq(jiffies, next_wakeup)) ||
			
 
				+		cur = jiffies;
			
 
				+		if ((time_after_eq(cur, next_wakeup)) ||
			
 
				 		    (MAX_JIFFY_OFFSET == next_wakeup)) {
			
 
				 			cond_resched();
			
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		eli->li_timer.expires = next_wakeup;
			
 
				-		add_timer(&eli->li_timer);
			
 
				-		prepare_to_wait(&eli->li_wait_daemon, &wait,
			
 
				-				TASK_INTERRUPTIBLE);
			
 
				-		if (time_before(jiffies, next_wakeup))
			
 
				-			schedule();
			
 
				-		finish_wait(&eli->li_wait_daemon, &wait);
			
 
				+		schedule_timeout_interruptible(next_wakeup - cur);
			
 
				+
			
 
				 		if (kthread_should_stop()) {
			
 
				 			ext4_clear_request_list();
			
 
				 			goto exit_thread;
			
@@ -2833,12 +2847,7 @@ exit_thread:
 
				 		goto cont_thread;
			
 
				 	}
			
 
				 	mutex_unlock(&eli->li_list_mtx);
			
 
				-	del_timer_sync(&ext4_li_info->li_timer);
			
 
				-	eli->li_task = NULL;
			
 
				-	wake_up(&eli->li_wait_task);
			
 
				-
			
 
				 	kfree(ext4_li_info);
			
 
				-	ext4_lazyinit_task = NULL;
			
 
				 	ext4_li_info = NULL;
			
 
				 	mutex_unlock(&ext4_li_mtx);
			
 
				 
			
@@ -2866,7 +2875,6 @@ static int ext4_run_lazyinit_thread(void)
 
				 	if (IS_ERR(ext4_lazyinit_task)) {
			
 
				 		int err = PTR_ERR(ext4_lazyinit_task);
			
 
				 		ext4_clear_request_list();
			
 
				-		del_timer_sync(&ext4_li_info->li_timer);
			
 
				 		kfree(ext4_li_info);
			
 
				 		ext4_li_info = NULL;
			
 
				 		printk(KERN_CRIT "EXT4: error %d creating inode table "
			
@@ -2875,8 +2883,6 @@ static int ext4_run_lazyinit_thread(void)
 
				 		return err;
			
 
				 	}
			
 
				 	ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
			
 
				-
			
 
				-	wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2911,13 +2917,9 @@ static int ext4_li_info_new(void)
 
				 	if (!eli)
			
 
				 		return -ENOMEM;
			
 
				 
			
 
				-	eli->li_task = NULL;
			
 
				 	INIT_LIST_HEAD(&eli->li_request_list);
			
 
				 	mutex_init(&eli->li_list_mtx);
			
 
				 
			
 
				-	init_waitqueue_head(&eli->li_wait_daemon);
			
 
				-	init_waitqueue_head(&eli->li_wait_task);
			
 
				-	init_timer(&eli->li_timer);
			
 
				 	eli->li_state |= EXT4_LAZYINIT_QUIT;
			
 
				 
			
 
				 	ext4_li_info = eli;
			
@@ -2960,20 +2962,19 @@ static int ext4_register_li_request(struct super_block *sb,
 
				 	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
			
 
				 	int ret = 0;
			
 
				 
			
 
				-	if (sbi->s_li_request != NULL)
			
 
				+	if (sbi->s_li_request != NULL) {
			
 
				+		/*
			
 
				+		 * Reset timeout so it can be computed again, because
			
 
				+		 * s_li_wait_mult might have changed.
			
 
				+		 */
			
 
				+		sbi->s_li_request->lr_timeout = 0;
			
 
				 		return 0;
			
 
				+	}
			
 
				 
			
 
				 	if (first_not_zeroed == ngroups ||
			
 
				 	    (sb->s_flags & MS_RDONLY) ||
			
 
				-	    !test_opt(sb, INIT_INODE_TABLE)) {
			
 
				-		sbi->s_li_request = NULL;
			
 
				+	    !test_opt(sb, INIT_INODE_TABLE))
			
 
				 		return 0;
			
 
				-	}
			
 
				-
			
 
				-	if (first_not_zeroed == ngroups) {
			
 
				-		sbi->s_li_request = NULL;
			
 
				-		return 0;
			
 
				-	}
			
 
				 
			
 
				 	elr = ext4_li_request_new(sb, first_not_zeroed);
			
 
				 	if (!elr)
			
@@ -3166,6 +3167,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 	    ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
			
 
				 		set_opt(sb, DELALLOC);
			
 
				 
			
 
				+	/*
			
 
				+	 * set default s_li_wait_mult for lazyinit, for the case there is
			
 
				+	 * no mount option specified.
			
 
				+	 */
			
 
				+	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
			
 
				+
			
 
				 	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
			
 
				 			   &journal_devnum, &journal_ioprio, NULL, 0)) {
			
 
				 		ext4_msg(sb, KERN_WARNING,
			
@@ -3187,6 +3194,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 		       "feature flags set on rev 0 fs, "
			
 
				 		       "running e2fsck is recommended");
			
 
				 
			
 
				+	if (IS_EXT2_SB(sb)) {
			
 
				+		if (ext2_feature_set_ok(sb))
			
 
				+			ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
			
 
				+				 "using the ext4 subsystem");
			
 
				+		else {
			
 
				+			ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
			
 
				+				 "to feature incompatibilities");
			
 
				+			goto failed_mount;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (IS_EXT3_SB(sb)) {
			
 
				+		if (ext3_feature_set_ok(sb))
			
 
				+			ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
			
 
				+				 "using the ext4 subsystem");
			
 
				+		else {
			
 
				+			ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
			
 
				+				 "to feature incompatibilities");
			
 
				+			goto failed_mount;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	/*
			
 
				 	 * Check feature flags regardless of the revision level, since we
			
 
				 	 * previously didn't change the revision level when setting the flags,
			
@@ -3459,6 +3488,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 			  EXT4_HAS_INCOMPAT_FEATURE(sb,
			
 
				 				    EXT4_FEATURE_INCOMPAT_RECOVER));
			
 
				 
			
 
				+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
			
 
				+	    !(sb->s_flags & MS_RDONLY))
			
 
				+		if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
			
 
				+			goto failed_mount3;
			
 
				+
			
 
				 	/*
			
 
				 	 * The first inode we look at is the journal inode.  Don't try
			
 
				 	 * root first: it may be modified in the journal!
			
@@ -3474,7 +3508,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 		goto failed_mount_wq;
			
 
				 	} else {
			
 
				 		clear_opt(sb, DATA_FLAGS);
			
 
				-		set_opt(sb, WRITEBACK_DATA);
			
 
				 		sbi->s_journal = NULL;
			
 
				 		needs_recovery = 0;
			
 
				 		goto no_journal;
			
@@ -3707,6 +3740,8 @@ failed_mount3:
 
				 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
			
 
				 	percpu_counter_destroy(&sbi->s_dirs_counter);
			
 
				 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
			
 
				+	if (sbi->s_mmp_tsk)
			
 
				+		kthread_stop(sbi->s_mmp_tsk);
			
 
				 failed_mount2:
			
 
				 	for (i = 0; i < db_count; i++)
			
 
				 		brelse(sbi->s_group_desc[i]);
			
@@ -4242,7 +4277,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 
				 	int enable_quota = 0;
			
 
				 	ext4_group_t g;
			
 
				 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
			
 
				-	int err;
			
 
				+	int err = 0;
			
 
				 #ifdef CONFIG_QUOTA
			
 
				 	int i;
			
 
				 #endif
			
@@ -4368,6 +4403,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 
				 				goto restore_opts;
			
 
				 			if (!ext4_setup_super(sb, es, 0))
			
 
				 				sb->s_flags &= ~MS_RDONLY;
			
 
				+			if (EXT4_HAS_INCOMPAT_FEATURE(sb,
			
 
				+						     EXT4_FEATURE_INCOMPAT_MMP))
			
 
				+				if (ext4_multi_mount_protect(sb,
			
 
				+						le64_to_cpu(es->s_mmp_block))) {
			
 
				+					err = -EROFS;
			
 
				+					goto restore_opts;
			
 
				+				}
			
 
				 			enable_quota = 1;
			
 
				 		}
			
 
				 	}
			
@@ -4432,6 +4474,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	struct ext4_super_block *es = sbi->s_es;
			
 
				 	u64 fsid;
			
 
				+	s64 bfree;
			
 
				 
			
 
				 	if (test_opt(sb, MINIX_DF)) {
			
 
				 		sbi->s_overhead_last = 0;
			
@@ -4475,8 +4518,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 	buf->f_type = EXT4_SUPER_MAGIC;
			
 
				 	buf->f_bsize = sb->s_blocksize;
			
 
				 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
			
 
				-	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
			
 
				+	bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
			
 
				 		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
			
 
				+	/* prevent underflow in case that few free space is available */
			
 
				+	buf->f_bfree = max_t(s64, bfree, 0);
			
 
				 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
			
 
				 	if (buf->f_bfree < ext4_r_blocks_count(es))
			
 
				 		buf->f_bavail = 0;
			
@@ -4652,6 +4697,9 @@ static int ext4_quota_off(struct super_block *sb, int type)
 
				 	if (test_opt(sb, DELALLOC))
			
 
				 		sync_filesystem(sb);
			
 
				 
			
 
				+	if (!inode)
			
 
				+		goto out;
			
 
				+
			
 
				 	/* Update modification times of quota files when userspace can
			
 
				 	 * start looking at them */
			
 
				 	handle = ext4_journal_start(inode, 1);
			
@@ -4772,14 +4820,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
 
				 }
			
 
				 
			
 
				 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
			
 
				-static struct file_system_type ext2_fs_type = {
			
 
				-	.owner		= THIS_MODULE,
			
 
				-	.name		= "ext2",
			
 
				-	.mount		= ext4_mount,
			
 
				-	.kill_sb	= kill_block_super,
			
 
				-	.fs_flags	= FS_REQUIRES_DEV,
			
 
				-};
			
 
				-
			
 
				 static inline void register_as_ext2(void)
			
 
				 {
			
 
				 	int err = register_filesystem(&ext2_fs_type);
			
@@ -4792,10 +4832,22 @@ static inline void unregister_as_ext2(void)
 
				 {
			
 
				 	unregister_filesystem(&ext2_fs_type);
			
 
				 }
			
 
				+
			
 
				+static inline int ext2_feature_set_ok(struct super_block *sb)
			
 
				+{
			
 
				+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
			
 
				+		return 0;
			
 
				+	if (sb->s_flags & MS_RDONLY)
			
 
				+		return 1;
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				 MODULE_ALIAS("ext2");
			
 
				 #else
			
 
				 static inline void register_as_ext2(void) { }
			
 
				 static inline void unregister_as_ext2(void) { }
			
 
				+static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
			
 
				 #endif
			
 
				 
			
 
				 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
			
@@ -4811,10 +4863,24 @@ static inline void unregister_as_ext3(void)
 
				 {
			
 
				 	unregister_filesystem(&ext3_fs_type);
			
 
				 }
			
 
				+
			
 
				+static inline int ext3_feature_set_ok(struct super_block *sb)
			
 
				+{
			
 
				+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
			
 
				+		return 0;
			
 
				+	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
			
 
				+		return 0;
			
 
				+	if (sb->s_flags & MS_RDONLY)
			
 
				+		return 1;
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				 MODULE_ALIAS("ext3");
			
 
				 #else
			
 
				 static inline void register_as_ext3(void) { }
			
 
				 static inline void unregister_as_ext3(void) { }
			
 
				+static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
			
 
				 #endif
			
 
				 
			
 
				 static struct file_system_type ext4_fs_type = {
			
@@ -4898,8 +4964,8 @@ static int __init ext4_init_fs(void)
 
				 	err = init_inodecache();
			
 
				 	if (err)
			
 
				 		goto out1;
			
 
				-	register_as_ext2();
			
 
				 	register_as_ext3();
			
 
				+	register_as_ext2();
			
 
				 	err = register_filesystem(&ext4_fs_type);
			
 
				 	if (err)
			
 
				 		goto out;
			
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,8 @@ inserted:
 
				 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
			
 
				 				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
			
 
				 
			
 
				-			block = ext4_new_meta_blocks(handle, inode,
			
 
				-						  goal, NULL, &error);
			
 
				+			block = ext4_new_meta_blocks(handle, inode, goal, 0,
			
 
				+						     NULL, &error);
			
 
				 			if (error)
			
 
				 				goto cleanup;
			
 
				 
			
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
 
				 			ret = err;
			
 
				 		spin_lock(&journal->j_list_lock);
			
 
				 		J_ASSERT(jinode->i_transaction == commit_transaction);
			
 
				-		commit_transaction->t_flushed_data_blocks = 1;
			
 
				 		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
			
 
				 		smp_mb__after_clear_bit();
			
 
				 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
			
@@ -672,12 +671,16 @@ start_journal_io:
 
				 		err = 0;
			
 
				 	}
			
 
				 
			
 
				+	write_lock(&journal->j_state_lock);
			
 
				+	J_ASSERT(commit_transaction->t_state == T_COMMIT);
			
 
				+	commit_transaction->t_state = T_COMMIT_DFLUSH;
			
 
				+	write_unlock(&journal->j_state_lock);
			
 
				 	/* 
			
 
				 	 * If the journal is not located on the file system device,
			
 
				 	 * then we must flush the file system device before we issue
			
 
				 	 * the commit record
			
 
				 	 */
			
 
				-	if (commit_transaction->t_flushed_data_blocks &&
			
 
				+	if (commit_transaction->t_need_data_flush &&
			
 
				 	    (journal->j_fs_dev != journal->j_dev) &&
			
 
				 	    (journal->j_flags & JBD2_BARRIER))
			
 
				 		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
			
@@ -754,8 +757,13 @@ wait_for_iobuf:
 
				                    required. */
			
 
				 		JBUFFER_TRACE(jh, "file as BJ_Forget");
			
 
				 		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
			
 
				-		/* Wake up any transactions which were waiting for this
			
 
				-		   IO to complete */
			
 
				+		/*
			
 
				+		 * Wake up any transactions which were waiting for this IO to
			
 
				+		 * complete. The barrier must be here so that changes by
			
 
				+		 * jbd2_journal_file_buffer() take effect before wake_up_bit()
			
 
				+		 * does the waitqueue check.
			
 
				+		 */
			
 
				+		smp_mb();
			
 
				 		wake_up_bit(&bh->b_state, BH_Unshadow);
			
 
				 		JBUFFER_TRACE(jh, "brelse shadowed buffer");
			
 
				 		__brelse(bh);
			
@@ -794,6 +802,10 @@ wait_for_iobuf:
 
				 		jbd2_journal_abort(journal, err);
			
 
				 
			
 
				 	jbd_debug(3, "JBD: commit phase 5\n");
			
 
				+	write_lock(&journal->j_state_lock);
			
 
				+	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
			
 
				+	commit_transaction->t_state = T_COMMIT_JFLUSH;
			
 
				+	write_unlock(&journal->j_state_lock);
			
 
				 
			
 
				 	if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
			
 
				 				       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
			
@@ -949,7 +961,7 @@ restart_loop:
 
				 
			
 
				 	jbd_debug(3, "JBD: commit phase 7\n");
			
 
				 
			
 
				-	J_ASSERT(commit_transaction->t_state == T_COMMIT);
			
 
				+	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
			
 
				 
			
 
				 	commit_transaction->t_start = jiffies;
			
 
				 	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
			
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal)
 
				 int __jbd2_log_start_commit(journal_t *journal, tid_t target)
			
 
				 {
			
 
				 	/*
			
 
				-	 * Are we already doing a recent enough commit?
			
 
				+	 * The only transaction we can possibly wait upon is the
			
 
				+	 * currently running transaction (if it exists).  Otherwise,
			
 
				+	 * the target tid must be an old one.
			
 
				 	 */
			
 
				-	if (!tid_geq(journal->j_commit_request, target)) {
			
 
				+	if (journal->j_running_transaction &&
			
 
				+	    journal->j_running_transaction->t_tid == target) {
			
 
				 		/*
			
 
				 		 * We want a new commit: OK, mark the request and wakeup the
			
 
				 		 * commit thread.  We do _not_ do the commit ourselves.
			
@@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 
				 			  journal->j_commit_sequence);
			
 
				 		wake_up(&journal->j_wait_commit);
			
 
				 		return 1;
			
 
				-	}
			
 
				+	} else if (!tid_geq(journal->j_commit_request, target))
			
 
				+		/* This should never happen, but if it does, preserve
			
 
				+		   the evidence before kjournald goes into a loop and
			
 
				+		   increments j_commit_sequence beyond all recognition. */
			
 
				+		WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
			
 
				+			  journal->j_commit_request,
			
 
				+			  journal->j_commit_sequence,
			
 
				+			  target, journal->j_running_transaction ? 
			
 
				+			  journal->j_running_transaction->t_tid : 0);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -576,6 +587,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return 1 if a given transaction has not yet sent barrier request
			
 
				+ * connected with a transaction commit. If 0 is returned, transaction
			
 
				+ * may or may not have sent the barrier. Used to avoid sending barrier
			
 
				+ * twice in common cases.
			
 
				+ */
			
 
				+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	transaction_t *commit_trans;
			
 
				+
			
 
				+	if (!(journal->j_flags & JBD2_BARRIER))
			
 
				+		return 0;
			
 
				+	read_lock(&journal->j_state_lock);
			
 
				+	/* Transaction already committed? */
			
 
				+	if (tid_geq(journal->j_commit_sequence, tid))
			
 
				+		goto out;
			
 
				+	commit_trans = journal->j_committing_transaction;
			
 
				+	if (!commit_trans || commit_trans->t_tid != tid) {
			
 
				+		ret = 1;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Transaction is being committed and we already proceeded to
			
 
				+	 * submitting a flush to fs partition?
			
 
				+	 */
			
 
				+	if (journal->j_fs_dev != journal->j_dev) {
			
 
				+		if (!commit_trans->t_need_data_flush ||
			
 
				+		    commit_trans->t_state >= T_COMMIT_DFLUSH)
			
 
				+			goto out;
			
 
				+	} else {
			
 
				+		if (commit_trans->t_state >= T_COMMIT_JFLUSH)
			
 
				+			goto out;
			
 
				+	}
			
 
				+	ret = 1;
			
 
				+out:
			
 
				+	read_unlock(&journal->j_state_lock);
			
 
				+	return ret;
			
 
				+}
			
 
				+EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
			
 
				+
			
 
				 /*
			
 
				  * Wait for a specified commit to complete.
			
 
				  * The caller may not hold the journal lock.
			
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 
				  */
			
 
				 
			
 
				 /*
			
 
				- * Update transiaction's maximum wait time, if debugging is enabled.
			
 
				+ * Update transaction's maximum wait time, if debugging is enabled.
			
 
				  *
			
 
				  * In order for t_max_wait to be reliable, it must be protected by a
			
 
				  * lock.  But doing so will mean that start_this_handle() can not be
			
@@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 
				  * means that maximum wait time reported by the jbd2_run_stats
			
 
				  * tracepoint will always be zero.
			
 
				  */
			
 
				-static inline void update_t_max_wait(transaction_t *transaction)
			
 
				+static inline void update_t_max_wait(transaction_t *transaction,
			
 
				+				     unsigned long ts)
			
 
				 {
			
 
				 #ifdef CONFIG_JBD2_DEBUG
			
 
				-	unsigned long ts = jiffies;
			
 
				-
			
 
				 	if (jbd2_journal_enable_debug &&
			
 
				 	    time_after(transaction->t_start, ts)) {
			
 
				 		ts = jbd2_time_diff(ts, transaction->t_start);
			
@@ -121,6 +120,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 
				 	tid_t		tid;
			
 
				 	int		needed, need_to_start;
			
 
				 	int		nblocks = handle->h_buffer_credits;
			
 
				+	unsigned long ts = jiffies;
			
 
				 
			
 
				 	if (nblocks > journal->j_max_transaction_buffers) {
			
 
				 		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
			
@@ -271,7 +271,7 @@ repeat:
 
				 	/* OK, account for the buffers that this operation expects to
			
 
				 	 * use and add the handle to the running transaction. 
			
 
				 	 */
			
 
				-	update_t_max_wait(transaction);
			
 
				+	update_t_max_wait(transaction, ts);
			
 
				 	handle->h_transaction = transaction;
			
 
				 	atomic_inc(&transaction->t_updates);
			
 
				 	atomic_inc(&transaction->t_handle_count);
			
@@ -316,7 +316,8 @@ static handle_t *new_handle(int nblocks)
 
				  * This function is visible to journal users (like ext3fs), so is not
			
 
				  * called with the journal already locked.
			
 
				  *
			
 
				- * Return a pointer to a newly allocated handle, or NULL on failure
			
 
				+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
			
 
				+ * on failure.
			
 
				  */
			
 
				 handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
			
 
				 {
			
@@ -921,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 
				 	 */
			
 
				 	JBUFFER_TRACE(jh, "cancelling revoke");
			
 
				 	jbd2_journal_cancel_revoke(handle, jh);
			
 
				-	jbd2_journal_put_journal_head(jh);
			
 
				 out:
			
 
				+	jbd2_journal_put_journal_head(jh);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -2147,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
 
				 	    jinode->i_next_transaction == transaction)
			
 
				 		goto done;
			
 
				 
			
 
				+	/*
			
 
				+	 * We only ever set this variable to 1 so the test is safe. Since
			
 
				+	 * t_need_data_flush is likely to be set, we do the test to save some
			
 
				+	 * cacheline bouncing
			
 
				+	 */
			
 
				+	if (!transaction->t_need_data_flush)
			
 
				+		transaction->t_need_data_flush = 1;
			
 
				 	/* On some different transaction's list - should be
			
 
				 	 * the committing one */
			
 
				 	if (jinode->i_transaction) {
			
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -529,9 +529,10 @@ struct transaction_s
 
				 	enum {
			
 
				 		T_RUNNING,
			
 
				 		T_LOCKED,
			
 
				-		T_RUNDOWN,
			
 
				 		T_FLUSH,
			
 
				 		T_COMMIT,
			
 
				+		T_COMMIT_DFLUSH,
			
 
				+		T_COMMIT_JFLUSH,
			
 
				 		T_FINISHED
			
 
				 	}			t_state;
			
 
				 
			
@@ -658,7 +659,9 @@ struct transaction_s
 
				 	 * waiting for it to finish.
			
 
				 	 */
			
 
				 	unsigned int t_synchronous_commit:1;
			
 
				-	unsigned int t_flushed_data_blocks:1;
			
 
				+
			
 
				+	/* Disk flush needs to be sent to fs partition [no locking] */
			
 
				+	int			t_need_data_flush;
			
 
				 
			
 
				 	/*
			
 
				 	 * For use by the filesystem to store fs-specific data
			
@@ -1228,6 +1231,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
 
				 int jbd2_journal_force_commit_nested(journal_t *journal);
			
 
				 int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
			
 
				 int jbd2_log_do_checkpoint(journal_t *journal);
			
 
				+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
			
 
				 
			
 
				 void __jbd2_log_wait_for_space(journal_t *journal);
			
 
				 extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);