12 years ago · 149b306089
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -494,6 +494,17 @@ Files in /sys/fs/ext4/<devname>
 
				  session_write_kbytes         This file is read-only and shows the number of
			
 
				                               kilobytes of data that have been written to this
			
 
				                               filesystem since it was mounted.
			
 
				+
			
 
				+ reserved_clusters            This is RW file and contains number of reserved
			
 
				+                              clusters in the file system which will be used
			
 
				+                              in the specific situations to avoid costly
			
 
				+                              zeroout, unexpected ENOSPC, or possible data
			
 
				+                              loss. The default is 2% or 4096 clusters,
			
 
				+                              whichever is smaller and this can be changed
			
 
				+                              however it can never exceed number of clusters
			
 
				+                              in the file system. If there is not enough space
			
 
				+                              for the reserved space when mounting the file
			
 
				+                              mount will _not_ fail.
			
 
				 ..............................................................................
			
 
				 
			
 
				 Ioctls
			
@@ -587,6 +598,16 @@ Table of Ext4 specific ioctls
 
				 			      bitmaps and inode table, the userspace tool thus
			
 
				 			      just passes the new number of blocks.
			
 
				 
			
 
				+EXT4_IOC_SWAP_BOOT	      Swap i_blocks and associated attributes
			
 
				+			      (like i_blocks, i_size, i_flags, ...) from
			
 
				+			      the specified inode with inode
			
 
				+			      EXT4_BOOT_LOADER_INO (#5). This is typically
			
 
				+			      used to store a boot loader in a secure part of
			
 
				+			      the filesystem, where it can't be changed by a
			
 
				+			      normal user by accident.
			
 
				+			      The data blocks of the previous boot loader
			
 
				+			      will be associated with the given inode.
			
 
				+
			
 
				 ..............................................................................
			
 
				 
			
 
				 References
			
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2987,6 +2987,11 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
 
				 	/* Take care of bh's that straddle the end of the device */
			
 
				 	guard_bh_eod(rw, bio, bh);
			
 
				 
			
 
				+	if (buffer_meta(bh))
			
 
				+		rw |= REQ_META;
			
 
				+	if (buffer_prio(bh))
			
 
				+		rw |= REQ_PRIO;
			
 
				+
			
 
				 	bio_get(bio);
			
 
				 	submit_bio(rw, bio);
			
 
				 
			
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -71,4 +71,5 @@ config EXT4_DEBUG
 
				 	  Enables run-time debugging support for the ext4 filesystem.
			
 
				 
			
 
				 	  If you select Y here, then you will be able to turn on debugging
			
 
				-	  with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
			
 
				+	  with a command such as:
			
 
				+		echo 1 > /sys/module/ext4/parameters/mballoc_debug
			
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -29,6 +29,23 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
 
				  * balloc.c contains the blocks allocation and deallocation routines
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Calculate block group number for a given block number
			
 
				+ */
			
 
				+ext4_group_t ext4_get_group_number(struct super_block *sb,
			
 
				+				   ext4_fsblk_t block)
			
 
				+{
			
 
				+	ext4_group_t group;
			
 
				+
			
 
				+	if (test_opt2(sb, STD_GROUP_SIZE))
			
 
				+		group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
			
 
				+			 block) >>
			
 
				+			(EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
			
 
				+	else
			
 
				+		ext4_get_group_no_and_offset(sb, block, &group, NULL);
			
 
				+	return group;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Calculate the block group number and offset into the block/cluster
			
 
				  * allocation bitmap, given a block number
			
@@ -49,14 +66,18 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 
				 
			
 
				 }
			
 
				 
			
 
				-static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
			
 
				-			ext4_group_t block_group)
			
 
				+/*
			
 
				+ * Check whether the 'block' lives within the 'block_group'. Returns 1 if so
			
 
				+ * and 0 otherwise.
			
 
				+ */
			
 
				+static inline int ext4_block_in_group(struct super_block *sb,
			
 
				+				      ext4_fsblk_t block,
			
 
				+				      ext4_group_t block_group)
			
 
				 {
			
 
				 	ext4_group_t actual_group;
			
 
				-	ext4_get_group_no_and_offset(sb, block, &actual_group, NULL);
			
 
				-	if (actual_group == block_group)
			
 
				-		return 1;
			
 
				-	return 0;
			
 
				+
			
 
				+	actual_group = ext4_get_group_number(sb, block);
			
 
				+	return (actual_group == block_group) ? 1 : 0;
			
 
				 }
			
 
				 
			
 
				 /* Return the number of clusters used for file system metadata; this
			
@@ -420,7 +441,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 
				 	trace_ext4_read_block_bitmap_load(sb, block_group);
			
 
				 	bh->b_end_io = ext4_end_bitmap_read;
			
 
				 	get_bh(bh);
			
 
				-	submit_bh(READ, bh);
			
 
				+	submit_bh(READ | REQ_META | REQ_PRIO, bh);
			
 
				 	return bh;
			
 
				 verify:
			
 
				 	ext4_validate_block_bitmap(sb, desc, block_group, bh);
			
@@ -478,20 +499,22 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
			
 
				 				  s64 nclusters, unsigned int flags)
			
 
				 {
			
 
				-	s64 free_clusters, dirty_clusters, root_clusters;
			
 
				+	s64 free_clusters, dirty_clusters, rsv, resv_clusters;
			
 
				 	struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
			
 
				 	struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
			
 
				 
			
 
				 	free_clusters  = percpu_counter_read_positive(fcc);
			
 
				 	dirty_clusters = percpu_counter_read_positive(dcc);
			
 
				+	resv_clusters = atomic64_read(&sbi->s_resv_clusters);
			
 
				 
			
 
				 	/*
			
 
				 	 * r_blocks_count should always be multiple of the cluster ratio so
			
 
				 	 * we are safe to do a plane bit shift only.
			
 
				 	 */
			
 
				-	root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
			
 
				+	rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
			
 
				+	      resv_clusters;
			
 
				 
			
 
				-	if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
			
 
				+	if (free_clusters - (nclusters + rsv + dirty_clusters) <
			
 
				 					EXT4_FREECLUSTERS_WATERMARK) {
			
 
				 		free_clusters  = percpu_counter_sum_positive(fcc);
			
 
				 		dirty_clusters = percpu_counter_sum_positive(dcc);
			
@@ -499,15 +522,21 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
 
				 	/* Check whether we have space after accounting for current
			
 
				 	 * dirty clusters & root reserved clusters.
			
 
				 	 */
			
 
				-	if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
			
 
				+	if (free_clusters >= (rsv + nclusters + dirty_clusters))
			
 
				 		return 1;
			
 
				 
			
 
				 	/* Hm, nope.  Are (enough) root reserved clusters available? */
			
 
				 	if (uid_eq(sbi->s_resuid, current_fsuid()) ||
			
 
				 	    (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
			
 
				 	    capable(CAP_SYS_RESOURCE) ||
			
 
				-		(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
			
 
				+	    (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
			
 
				 
			
 
				+		if (free_clusters >= (nclusters + dirty_clusters +
			
 
				+				      resv_clusters))
			
 
				+			return 1;
			
 
				+	}
			
 
				+	/* No free blocks. Let's see if we can dip into reserved pool */
			
 
				+	if (flags & EXT4_MB_USE_RESERVED) {
			
 
				 		if (free_clusters >= (nclusters + dirty_clusters))
			
 
				 			return 1;
			
 
				 	}
			
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -46,7 +46,8 @@ static int is_dx_dir(struct inode *inode)
 
				 	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
			
 
				 		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
			
 
				 	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
			
 
				-	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
			
 
				+	     ((inode->i_size >> sb->s_blocksize_bits) == 1) ||
			
 
				+	     ext4_has_inline_data(inode)))
			
 
				 		return 1;
			
 
				 
			
 
				 	return 0;
			
@@ -115,14 +116,6 @@ static int ext4_readdir(struct file *filp,
 
				 	int ret = 0;
			
 
				 	int dir_has_error = 0;
			
 
				 
			
 
				-	if (ext4_has_inline_data(inode)) {
			
 
				-		int has_inline_data = 1;
			
 
				-		ret = ext4_read_inline_dir(filp, dirent, filldir,
			
 
				-					   &has_inline_data);
			
 
				-		if (has_inline_data)
			
 
				-			return ret;
			
 
				-	}
			
 
				-
			
 
				 	if (is_dx_dir(inode)) {
			
 
				 		err = ext4_dx_readdir(filp, dirent, filldir);
			
 
				 		if (err != ERR_BAD_DX_DIR) {
			
@@ -136,6 +129,15 @@ static int ext4_readdir(struct file *filp,
 
				 		ext4_clear_inode_flag(file_inode(filp),
			
 
				 				      EXT4_INODE_INDEX);
			
 
				 	}
			
 
				+
			
 
				+	if (ext4_has_inline_data(inode)) {
			
 
				+		int has_inline_data = 1;
			
 
				+		ret = ext4_read_inline_dir(filp, dirent, filldir,
			
 
				+					   &has_inline_data);
			
 
				+		if (has_inline_data)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				 	stored = 0;
			
 
				 	offset = filp->f_pos & (sb->s_blocksize - 1);
			
 
				 
			
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -121,6 +121,8 @@ typedef unsigned int ext4_group_t;
 
				 #define EXT4_MB_STREAM_ALLOC		0x0800
			
 
				 /* Use reserved root blocks if needed */
			
 
				 #define EXT4_MB_USE_ROOT_BLOCKS		0x1000
			
 
				+/* Use blocks from reserved pool */
			
 
				+#define EXT4_MB_USE_RESERVED		0x2000
			
 
				 
			
 
				 struct ext4_allocation_request {
			
 
				 	/* target inode for block we're allocating */
			
@@ -196,19 +198,8 @@ struct mpage_da_data {
 
				 #define EXT4_IO_END_ERROR	0x0002
			
 
				 #define EXT4_IO_END_DIRECT	0x0004
			
 
				 
			
 
				-struct ext4_io_page {
			
 
				-	struct page	*p_page;
			
 
				-	atomic_t	p_count;
			
 
				-};
			
 
				-
			
 
				-#define MAX_IO_PAGES 128
			
 
				-
			
 
				 /*
			
 
				  * For converting uninitialized extents on a work queue.
			
 
				- *
			
 
				- * 'page' is only used from the writepage() path; 'pages' is only used for
			
 
				- * buffered writes; they are used to keep page references until conversion
			
 
				- * takes place.  For AIO/DIO, neither field is filled in.
			
 
				  */
			
 
				 typedef struct ext4_io_end {
			
 
				 	struct list_head	list;		/* per-file finished IO list */
			
@@ -218,15 +209,13 @@ typedef struct ext4_io_end {
 
				 	ssize_t			size;		/* size of the extent */
			
 
				 	struct kiocb		*iocb;		/* iocb struct for AIO */
			
 
				 	int			result;		/* error value for AIO */
			
 
				-	int			num_io_pages;   /* for writepages() */
			
 
				-	struct ext4_io_page	*pages[MAX_IO_PAGES]; /* for writepages() */
			
 
				+	atomic_t		count;		/* reference counter */
			
 
				 } ext4_io_end_t;
			
 
				 
			
 
				 struct ext4_io_submit {
			
 
				 	int			io_op;
			
 
				 	struct bio		*io_bio;
			
 
				 	ext4_io_end_t		*io_end;
			
 
				-	struct ext4_io_page	*io_page;
			
 
				 	sector_t		io_next_block;
			
 
				 };
			
 
				 
			
@@ -403,7 +392,7 @@ struct flex_groups {
 
				 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
			
 
				 
			
 
				 #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
			
 
				-#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
			
 
				+#define EXT4_FL_USER_MODIFIABLE		0x004380FF /* User modifiable flags */
			
 
				 
			
 
				 /* Flags that should be inherited by new inodes from their parent. */
			
 
				 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
			
@@ -557,9 +546,8 @@ enum {
 
				 #define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
			
 
				 #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
			
 
				 						 EXT4_GET_BLOCKS_CREATE)
			
 
				-	/* Caller is from the delayed allocation writeout path,
			
 
				-	   so set the magic i_delalloc_reserve_flag after taking the
			
 
				-	   inode allocation semaphore for */
			
 
				+	/* Caller is from the delayed allocation writeout path
			
 
				+	 * finally doing the actual allocation of delayed blocks */
			
 
				 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
			
 
				 	/* caller is from the direct IO path, request to creation of an
			
 
				 	unitialized extents if not allocated, split the uninitialized
			
@@ -571,8 +559,9 @@ enum {
 
				 	/* Convert extent to initialized after IO complete */
			
 
				 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
			
 
				 					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
			
 
				-	/* Punch out blocks of an extent */
			
 
				-#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT		0x0020
			
 
				+	/* Eventual metadata allocation (due to growing extent tree)
			
 
				+	 * should not fail, so try to use reserved blocks for that.*/
			
 
				+#define EXT4_GET_BLOCKS_METADATA_NOFAIL		0x0020
			
 
				 	/* Don't normalize allocation size (used for fallocate) */
			
 
				 #define EXT4_GET_BLOCKS_NO_NORMALIZE		0x0040
			
 
				 	/* Request will not result in inode size update (user for fallocate) */
			
@@ -616,6 +605,7 @@ enum {
 
				 #define EXT4_IOC_ALLOC_DA_BLKS		_IO('f', 12)
			
 
				 #define EXT4_IOC_MOVE_EXT		_IOWR('f', 15, struct move_extent)
			
 
				 #define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
			
 
				+#define EXT4_IOC_SWAP_BOOT		_IO('f', 17)
			
 
				 
			
 
				 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
			
 
				 /*
			
@@ -949,7 +939,7 @@ struct ext4_inode_info {
 
				 #define EXT2_FLAGS_TEST_FILESYS		0x0004	/* to test development code */
			
 
				 
			
 
				 /*
			
 
				- * Mount flags
			
 
				+ * Mount flags set via mount options or defaults
			
 
				  */
			
 
				 #define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
			
 
				 #define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
			
@@ -981,8 +971,16 @@ struct ext4_inode_info {
 
				 #define EXT4_MOUNT_DISCARD		0x40000000 /* Issue DISCARD requests */
			
 
				 #define EXT4_MOUNT_INIT_INODE_TABLE	0x80000000 /* Initialize uninitialized itables */
			
 
				 
			
 
				+/*
			
 
				+ * Mount flags set either automatically (could not be set by mount option)
			
 
				+ * based on per file system feature or property or in special cases such as
			
 
				+ * distinguishing between explicit mount option definition and default.
			
 
				+ */
			
 
				 #define EXT4_MOUNT2_EXPLICIT_DELALLOC	0x00000001 /* User explicitly
			
 
				 						      specified delalloc */
			
 
				+#define EXT4_MOUNT2_STD_GROUP_SIZE	0x00000002 /* We have standard group
			
 
				+						      size of blocksize * 8
			
 
				+						      blocks */
			
 
				 
			
 
				 #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
			
 
				 						~EXT4_MOUNT_##opt
			
@@ -1179,6 +1177,7 @@ struct ext4_sb_info {
 
				 	unsigned int s_mount_flags;
			
 
				 	unsigned int s_def_mount_opt;
			
 
				 	ext4_fsblk_t s_sb_block;
			
 
				+	atomic64_t s_resv_clusters;
			
 
				 	kuid_t s_resuid;
			
 
				 	kgid_t s_resgid;
			
 
				 	unsigned short s_mount_state;
			
@@ -1333,6 +1332,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 
				 	return ino == EXT4_ROOT_INO ||
			
 
				 		ino == EXT4_USR_QUOTA_INO ||
			
 
				 		ino == EXT4_GRP_QUOTA_INO ||
			
 
				+		ino == EXT4_BOOT_LOADER_INO ||
			
 
				 		ino == EXT4_JOURNAL_INO ||
			
 
				 		ino == EXT4_RESIZE_INO ||
			
 
				 		(ino >= EXT4_FIRST_INO(sb) &&
			
@@ -1374,6 +1374,7 @@ enum {
 
				 	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
			
 
				 					   nolocking */
			
 
				 	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
			
 
				+	EXT4_STATE_ORDERED_MODE,	/* data=ordered mode */
			
 
				 };
			
 
				 
			
 
				 #define EXT4_INODE_BIT_FNS(name, field, offset)				\
			
@@ -1784,9 +1785,6 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
 
				  */
			
 
				 #define ERR_BAD_DX_DIR	-75000
			
 
				 
			
 
				-void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
			
 
				-			ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
			
 
				-
			
 
				 /*
			
 
				  * Timeout and state flag for lazy initialization inode thread.
			
 
				  */
			
@@ -1908,6 +1906,13 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
 
				 				  struct buffer_head *bh);
			
 
				 
			
 
				 /* balloc.c */
			
 
				+extern void ext4_get_group_no_and_offset(struct super_block *sb,
			
 
				+					 ext4_fsblk_t blocknr,
			
 
				+					 ext4_group_t *blockgrpp,
			
 
				+					 ext4_grpblk_t *offsetp);
			
 
				+extern ext4_group_t ext4_get_group_number(struct super_block *sb,
			
 
				+					  ext4_fsblk_t block);
			
 
				+
			
 
				 extern void ext4_validate_block_bitmap(struct super_block *sb,
			
 
				 				       struct ext4_group_desc *desc,
			
 
				 				       unsigned int block_group,
			
@@ -2108,8 +2113,9 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 
				 				unsigned long nr_segs);
			
 
				 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
			
 
				 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
			
 
				-extern void ext4_ind_truncate(struct inode *inode);
			
 
				-extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
			
 
				+extern void ext4_ind_truncate(handle_t *, struct inode *inode);
			
 
				+extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
			
 
				+				 ext4_lblk_t first, ext4_lblk_t stop);
			
 
				 
			
 
				 /* ioctl.c */
			
 
				 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
			
@@ -2117,6 +2123,7 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
 
				 
			
 
				 /* migrate.c */
			
 
				 extern int ext4_ext_migrate(struct inode *);
			
 
				+extern int ext4_ind_migrate(struct inode *inode);
			
 
				 
			
 
				 /* namei.c */
			
 
				 extern int ext4_dirent_csum_verify(struct inode *inode,
			
@@ -2511,6 +2518,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle,
 
				 extern int ext4_read_inline_dir(struct file *filp,
			
 
				 				void *dirent, filldir_t filldir,
			
 
				 				int *has_inline_data);
			
 
				+extern int htree_inlinedir_to_tree(struct file *dir_file,
			
 
				+				   struct inode *dir, ext4_lblk_t block,
			
 
				+				   struct dx_hash_info *hinfo,
			
 
				+				   __u32 start_hash, __u32 start_minor_hash,
			
 
				+				   int *has_inline_data);
			
 
				 extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
			
 
				 					const struct qstr *d_name,
			
 
				 					struct ext4_dir_entry_2 **res_dir,
			
@@ -2547,6 +2559,24 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
 
				 extern int ext4_handle_dirty_dirent_node(handle_t *handle,
			
 
				 					 struct inode *inode,
			
 
				 					 struct buffer_head *bh);
			
 
				+#define S_SHIFT 12
			
 
				+static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
			
 
				+	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
			
 
				+	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
			
 
				+	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
			
 
				+	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
			
 
				+	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
			
 
				+	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
			
 
				+	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
			
 
				+};
			
 
				+
			
 
				+static inline void ext4_set_de_type(struct super_block *sb,
			
 
				+				struct ext4_dir_entry_2 *de,
			
 
				+				umode_t mode) {
			
 
				+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
			
 
				+		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
			
 
				+}
			
 
				+
			
 
				 
			
 
				 /* symlink.c */
			
 
				 extern const struct inode_operations ext4_symlink_inode_operations;
			
@@ -2573,9 +2603,9 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 
				 				       int chunk);
			
 
				 extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
			
 
				 			       struct ext4_map_blocks *map, int flags);
			
 
				-extern void ext4_ext_truncate(struct inode *);
			
 
				-extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
			
 
				-				loff_t length);
			
 
				+extern void ext4_ext_truncate(handle_t *, struct inode *);
			
 
				+extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			
 
				+				 ext4_lblk_t end);
			
 
				 extern void ext4_ext_init(struct super_block *);
			
 
				 extern void ext4_ext_release(struct super_block *);
			
 
				 extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
			
@@ -2609,17 +2639,26 @@ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
				 
			
 
				 
			
 
				 /* move_extent.c */
			
 
				+extern void ext4_double_down_write_data_sem(struct inode *first,
			
 
				+					    struct inode *second);
			
 
				+extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
			
 
				+					  struct inode *donor_inode);
			
 
				+void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
			
 
				+void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
			
 
				 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
			
 
				 			     __u64 start_orig, __u64 start_donor,
			
 
				 			     __u64 len, __u64 *moved_len);
			
 
				 
			
 
				 /* page-io.c */
			
 
				 extern int __init ext4_init_pageio(void);
			
 
				-extern void ext4_add_complete_io(ext4_io_end_t *io_end);
			
 
				 extern void ext4_exit_pageio(void);
			
 
				 extern void ext4_ioend_shutdown(struct inode *);
			
 
				-extern void ext4_free_io_end(ext4_io_end_t *io);
			
 
				 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
			
 
				+extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
			
 
				+extern int ext4_put_io_end(ext4_io_end_t *io_end);
			
 
				+extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
			
 
				+extern void ext4_io_submit_init(struct ext4_io_submit *io,
			
 
				+				struct writeback_control *wbc);
			
 
				 extern void ext4_end_io_work(struct work_struct *work);
			
 
				 extern void ext4_io_submit(struct ext4_io_submit *io);
			
 
				 extern int ext4_bio_write_page(struct ext4_io_submit *io,
			
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -270,5 +270,10 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
 
				 				     0xffff);
			
 
				 }
			
 
				 
			
 
				+#define ext4_ext_dirty(handle, inode, path) \
			
 
				+		__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
			
 
				+int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
			
 
				+		     struct inode *inode, struct ext4_ext_path *path);
			
 
				+
			
 
				 #endif /* _EXT4_EXTENTS */
			
 
				 
			
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
 
				 {
			
 
				 	journal_t *journal;
			
 
				 
			
 
				+	might_sleep();
			
 
				+
			
 
				 	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
			
 
				 	if (sb->s_flags & MS_RDONLY)
			
 
				 		return ERR_PTR(-EROFS);
			
@@ -113,6 +115,8 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
 
				 {
			
 
				 	int err = 0;
			
 
				 
			
 
				+	might_sleep();
			
 
				+
			
 
				 	if (ext4_handle_valid(handle)) {
			
 
				 		err = jbd2_journal_get_write_access(handle, bh);
			
 
				 		if (err)
			
@@ -209,6 +213,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 
				 {
			
 
				 	int err = 0;
			
 
				 
			
 
				+	might_sleep();
			
 
				+
			
 
				+	set_buffer_meta(bh);
			
 
				+	set_buffer_prio(bh);
			
 
				 	if (ext4_handle_valid(handle)) {
			
 
				 		err = jbd2_journal_dirty_metadata(handle, bh);
			
 
				 		if (err) {
			
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -29,11 +29,13 @@
 
				  * block to complete the transaction.
			
 
				  *
			
 
				  * For extents-enabled fs we may have to allocate and modify up to
			
 
				- * 5 levels of tree + root which are stored in the inode. */
			
 
				+ * 5 levels of tree, data block (for each of these we need bitmap + group
			
 
				+ * summaries), root which is stored in the inode, sb
			
 
				+ */
			
 
				 
			
 
				 #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb)				\
			
 
				 	(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
			
 
				-	 ? 27U : 8U)
			
 
				+	 ? 20U : 8U)
			
 
				 
			
 
				 /* Extended attribute operations touch at most two data buffers,
			
 
				  * two bitmap buffers, and two group summaries, in addition to the inode
			
@@ -194,16 +196,20 @@ static inline void ext4_journal_callback_add(handle_t *handle,
 
				  * ext4_journal_callback_del: delete a registered callback
			
 
				  * @handle: active journal transaction handle on which callback was registered
			
 
				  * @jce: registered journal callback entry to unregister
			
 
				+ * Return true if object was sucessfully removed
			
 
				  */
			
 
				-static inline void ext4_journal_callback_del(handle_t *handle,
			
 
				+static inline bool ext4_journal_callback_try_del(handle_t *handle,
			
 
				 					     struct ext4_journal_cb_entry *jce)
			
 
				 {
			
 
				+	bool deleted;
			
 
				 	struct ext4_sb_info *sbi =
			
 
				 			EXT4_SB(handle->h_transaction->t_journal->j_private);
			
 
				 
			
 
				 	spin_lock(&sbi->s_md_lock);
			
 
				+	deleted = !list_empty(&jce->jce_list);
			
 
				 	list_del_init(&jce->jce_list);
			
 
				 	spin_unlock(&sbi->s_md_lock);
			
 
				+	return deleted;
			
 
				 }
			
 
				 
			
 
				 int
			
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -157,11 +157,8 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
 
				  *  - ENOMEM
			
 
				  *  - EIO
			
 
				  */
			
 
				-#define ext4_ext_dirty(handle, inode, path) \
			
 
				-		__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
			
 
				-static int __ext4_ext_dirty(const char *where, unsigned int line,
			
 
				-			    handle_t *handle, struct inode *inode,
			
 
				-			    struct ext4_ext_path *path)
			
 
				+int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
			
 
				+		     struct inode *inode, struct ext4_ext_path *path)
			
 
				 {
			
 
				 	int err;
			
 
				 	if (path->p_bh) {
			
@@ -1813,39 +1810,101 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
				 	}
			
 
				 	depth = ext_depth(inode);
			
 
				 	ex = path[depth].p_ext;
			
 
				+	eh = path[depth].p_hdr;
			
 
				 	if (unlikely(path[depth].p_hdr == NULL)) {
			
 
				 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
			
 
				 		return -EIO;
			
 
				 	}
			
 
				 
			
 
				 	/* try to insert block into found extent and return */
			
 
				-	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
			
 
				-		&& ext4_can_extents_be_merged(inode, ex, newext)) {
			
 
				-		ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
			
 
				-			  ext4_ext_is_uninitialized(newext),
			
 
				-			  ext4_ext_get_actual_len(newext),
			
 
				-			  le32_to_cpu(ex->ee_block),
			
 
				-			  ext4_ext_is_uninitialized(ex),
			
 
				-			  ext4_ext_get_actual_len(ex),
			
 
				-			  ext4_ext_pblock(ex));
			
 
				-		err = ext4_ext_get_access(handle, inode, path + depth);
			
 
				-		if (err)
			
 
				-			return err;
			
 
				+	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)) {
			
 
				 
			
 
				 		/*
			
 
				-		 * ext4_can_extents_be_merged should have checked that either
			
 
				-		 * both extents are uninitialized, or both aren't. Thus we
			
 
				-		 * need to check only one of them here.
			
 
				+		 * Try to see whether we should rather test the extent on
			
 
				+		 * right from ex, or from the left of ex. This is because
			
 
				+		 * ext4_ext_find_extent() can return either extent on the
			
 
				+		 * left, or on the right from the searched position. This
			
 
				+		 * will make merging more effective.
			
 
				 		 */
			
 
				-		if (ext4_ext_is_uninitialized(ex))
			
 
				-			uninitialized = 1;
			
 
				-		ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
			
 
				+		if (ex < EXT_LAST_EXTENT(eh) &&
			
 
				+		    (le32_to_cpu(ex->ee_block) +
			
 
				+		    ext4_ext_get_actual_len(ex) <
			
 
				+		    le32_to_cpu(newext->ee_block))) {
			
 
				+			ex += 1;
			
 
				+			goto prepend;
			
 
				+		} else if ((ex > EXT_FIRST_EXTENT(eh)) &&
			
 
				+			   (le32_to_cpu(newext->ee_block) +
			
 
				+			   ext4_ext_get_actual_len(newext) <
			
 
				+			   le32_to_cpu(ex->ee_block)))
			
 
				+			ex -= 1;
			
 
				+
			
 
				+		/* Try to append newex to the ex */
			
 
				+		if (ext4_can_extents_be_merged(inode, ex, newext)) {
			
 
				+			ext_debug("append [%d]%d block to %u:[%d]%d"
			
 
				+				  "(from %llu)\n",
			
 
				+				  ext4_ext_is_uninitialized(newext),
			
 
				+				  ext4_ext_get_actual_len(newext),
			
 
				+				  le32_to_cpu(ex->ee_block),
			
 
				+				  ext4_ext_is_uninitialized(ex),
			
 
				+				  ext4_ext_get_actual_len(ex),
			
 
				+				  ext4_ext_pblock(ex));
			
 
				+			err = ext4_ext_get_access(handle, inode,
			
 
				+						  path + depth);
			
 
				+			if (err)
			
 
				+				return err;
			
 
				+
			
 
				+			/*
			
 
				+			 * ext4_can_extents_be_merged should have checked
			
 
				+			 * that either both extents are uninitialized, or
			
 
				+			 * both aren't. Thus we need to check only one of
			
 
				+			 * them here.
			
 
				+			 */
			
 
				+			if (ext4_ext_is_uninitialized(ex))
			
 
				+				uninitialized = 1;
			
 
				+			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
			
 
				 					+ ext4_ext_get_actual_len(newext));
			
 
				-		if (uninitialized)
			
 
				-			ext4_ext_mark_uninitialized(ex);
			
 
				-		eh = path[depth].p_hdr;
			
 
				-		nearex = ex;
			
 
				-		goto merge;
			
 
				+			if (uninitialized)
			
 
				+				ext4_ext_mark_uninitialized(ex);
			
 
				+			eh = path[depth].p_hdr;
			
 
				+			nearex = ex;
			
 
				+			goto merge;
			
 
				+		}
			
 
				+
			
 
				+prepend:
			
 
				+		/* Try to prepend newex to the ex */
			
 
				+		if (ext4_can_extents_be_merged(inode, newext, ex)) {
			
 
				+			ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
			
 
				+				  "(from %llu)\n",
			
 
				+				  le32_to_cpu(newext->ee_block),
			
 
				+				  ext4_ext_is_uninitialized(newext),
			
 
				+				  ext4_ext_get_actual_len(newext),
			
 
				+				  le32_to_cpu(ex->ee_block),
			
 
				+				  ext4_ext_is_uninitialized(ex),
			
 
				+				  ext4_ext_get_actual_len(ex),
			
 
				+				  ext4_ext_pblock(ex));
			
 
				+			err = ext4_ext_get_access(handle, inode,
			
 
				+						  path + depth);
			
 
				+			if (err)
			
 
				+				return err;
			
 
				+
			
 
				+			/*
			
 
				+			 * ext4_can_extents_be_merged should have checked
			
 
				+			 * that either both extents are uninitialized, or
			
 
				+			 * both aren't. Thus we need to check only one of
			
 
				+			 * them here.
			
 
				+			 */
			
 
				+			if (ext4_ext_is_uninitialized(ex))
			
 
				+				uninitialized = 1;
			
 
				+			ex->ee_block = newext->ee_block;
			
 
				+			ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
			
 
				+			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
			
 
				+					+ ext4_ext_get_actual_len(newext));
			
 
				+			if (uninitialized)
			
 
				+				ext4_ext_mark_uninitialized(ex);
			
 
				+			eh = path[depth].p_hdr;
			
 
				+			nearex = ex;
			
 
				+			goto merge;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	depth = ext_depth(inode);
			
@@ -1880,8 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
				 	 * There is no free space in the found leaf.
			
 
				 	 * We're gonna add a new leaf in the tree.
			
 
				 	 */
			
 
				-	if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
			
 
				-		flags = EXT4_MB_USE_ROOT_BLOCKS;
			
 
				+	if (flag & EXT4_GET_BLOCKS_METADATA_NOFAIL)
			
 
				+		flags = EXT4_MB_USE_RESERVED;
			
 
				 	err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
			
 
				 	if (err)
			
 
				 		goto cleanup;
			
@@ -2599,8 +2658,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			
 
				-				 ext4_lblk_t end)
			
 
				+int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			
 
				+			  ext4_lblk_t end)
			
 
				 {
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				 	int depth = ext_depth(inode);
			
@@ -2667,12 +2726,14 @@ again:
 
				 
			
 
				 			/*
			
 
				 			 * Split the extent in two so that 'end' is the last
			
 
				-			 * block in the first new extent
			
 
				+			 * block in the first new extent. Also we should not
			
 
				+			 * fail removing space due to ENOSPC so try to use
			
 
				+			 * reserved block if that happens.
			
 
				 			 */
			
 
				 			err = ext4_split_extent_at(handle, inode, path,
			
 
				-						end + 1, split_flag,
			
 
				-						EXT4_GET_BLOCKS_PRE_IO |
			
 
				-						EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
			
 
				+					end + 1, split_flag,
			
 
				+					EXT4_GET_BLOCKS_PRE_IO |
			
 
				+					EXT4_GET_BLOCKS_METADATA_NOFAIL);
			
 
				 
			
 
				 			if (err < 0)
			
 
				 				goto out;
			
@@ -3147,35 +3208,35 @@ out:
 
				 static int ext4_ext_convert_to_initialized(handle_t *handle,
			
 
				 					   struct inode *inode,
			
 
				 					   struct ext4_map_blocks *map,
			
 
				-					   struct ext4_ext_path *path)
			
 
				+					   struct ext4_ext_path *path,
			
 
				+					   int flags)
			
 
				 {
			
 
				 	struct ext4_sb_info *sbi;
			
 
				 	struct ext4_extent_header *eh;
			
 
				 	struct ext4_map_blocks split_map;
			
 
				 	struct ext4_extent zero_ex;
			
 
				-	struct ext4_extent *ex;
			
 
				+	struct ext4_extent *ex, *abut_ex;
			
 
				 	ext4_lblk_t ee_block, eof_block;
			
 
				-	unsigned int ee_len, depth;
			
 
				-	int allocated, max_zeroout = 0;
			
 
				+	unsigned int ee_len, depth, map_len = map->m_len;
			
 
				+	int allocated = 0, max_zeroout = 0;
			
 
				 	int err = 0;
			
 
				 	int split_flag = 0;
			
 
				 
			
 
				 	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
			
 
				 		"block %llu, max_blocks %u\n", inode->i_ino,
			
 
				-		(unsigned long long)map->m_lblk, map->m_len);
			
 
				+		(unsigned long long)map->m_lblk, map_len);
			
 
				 
			
 
				 	sbi = EXT4_SB(inode->i_sb);
			
 
				 	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
			
 
				 		inode->i_sb->s_blocksize_bits;
			
 
				-	if (eof_block < map->m_lblk + map->m_len)
			
 
				-		eof_block = map->m_lblk + map->m_len;
			
 
				+	if (eof_block < map->m_lblk + map_len)
			
 
				+		eof_block = map->m_lblk + map_len;
			
 
				 
			
 
				 	depth = ext_depth(inode);
			
 
				 	eh = path[depth].p_hdr;
			
 
				 	ex = path[depth].p_ext;
			
 
				 	ee_block = le32_to_cpu(ex->ee_block);
			
 
				 	ee_len = ext4_ext_get_actual_len(ex);
			
 
				-	allocated = ee_len - (map->m_lblk - ee_block);
			
 
				 	zero_ex.ee_len = 0;
			
 
				 
			
 
				 	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
			
@@ -3186,77 +3247,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 
				 
			
 
				 	/*
			
 
				 	 * Attempt to transfer newly initialized blocks from the currently
			
 
				-	 * uninitialized extent to its left neighbor. This is much cheaper
			
 
				+	 * uninitialized extent to its neighbor. This is much cheaper
			
 
				 	 * than an insertion followed by a merge as those involve costly
			
 
				-	 * memmove() calls. This is the common case in steady state for
			
 
				-	 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
			
 
				-	 * writes.
			
 
				+	 * memmove() calls. Transferring to the left is the common case in
			
 
				+	 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
			
 
				+	 * followed by append writes.
			
 
				 	 *
			
 
				 	 * Limitations of the current logic:
			
 
				-	 *  - L1: we only deal with writes at the start of the extent.
			
 
				-	 *    The approach could be extended to writes at the end
			
 
				-	 *    of the extent but this scenario was deemed less common.
			
 
				-	 *  - L2: we do not deal with writes covering the whole extent.
			
 
				+	 *  - L1: we do not deal with writes covering the whole extent.
			
 
				 	 *    This would require removing the extent if the transfer
			
 
				 	 *    is possible.
			
 
				-	 *  - L3: we only attempt to merge with an extent stored in the
			
 
				+	 *  - L2: we only attempt to merge with an extent stored in the
			
 
				 	 *    same extent tree node.
			
 
				 	 */
			
 
				-	if ((map->m_lblk == ee_block) &&	/*L1*/
			
 
				-		(map->m_len < ee_len) &&	/*L2*/
			
 
				-		(ex > EXT_FIRST_EXTENT(eh))) {	/*L3*/
			
 
				-		struct ext4_extent *prev_ex;
			
 
				+	if ((map->m_lblk == ee_block) &&
			
 
				+		/* See if we can merge left */
			
 
				+		(map_len < ee_len) &&		/*L1*/
			
 
				+		(ex > EXT_FIRST_EXTENT(eh))) {	/*L2*/
			
 
				 		ext4_lblk_t prev_lblk;
			
 
				 		ext4_fsblk_t prev_pblk, ee_pblk;
			
 
				-		unsigned int prev_len, write_len;
			
 
				+		unsigned int prev_len;
			
 
				 
			
 
				-		prev_ex = ex - 1;
			
 
				-		prev_lblk = le32_to_cpu(prev_ex->ee_block);
			
 
				-		prev_len = ext4_ext_get_actual_len(prev_ex);
			
 
				-		prev_pblk = ext4_ext_pblock(prev_ex);
			
 
				+		abut_ex = ex - 1;
			
 
				+		prev_lblk = le32_to_cpu(abut_ex->ee_block);
			
 
				+		prev_len = ext4_ext_get_actual_len(abut_ex);
			
 
				+		prev_pblk = ext4_ext_pblock(abut_ex);
			
 
				 		ee_pblk = ext4_ext_pblock(ex);
			
 
				-		write_len = map->m_len;
			
 
				 
			
 
				 		/*
			
 
				-		 * A transfer of blocks from 'ex' to 'prev_ex' is allowed
			
 
				+		 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
			
 
				 		 * upon those conditions:
			
 
				-		 * - C1: prev_ex is initialized,
			
 
				-		 * - C2: prev_ex is logically abutting ex,
			
 
				-		 * - C3: prev_ex is physically abutting ex,
			
 
				-		 * - C4: prev_ex can receive the additional blocks without
			
 
				+		 * - C1: abut_ex is initialized,
			
 
				+		 * - C2: abut_ex is logically abutting ex,
			
 
				+		 * - C3: abut_ex is physically abutting ex,
			
 
				+		 * - C4: abut_ex can receive the additional blocks without
			
 
				 		 *   overflowing the (initialized) length limit.
			
 
				 		 */
			
 
				-		if ((!ext4_ext_is_uninitialized(prev_ex)) &&		/*C1*/
			
 
				+		if ((!ext4_ext_is_uninitialized(abut_ex)) &&		/*C1*/
			
 
				 			((prev_lblk + prev_len) == ee_block) &&		/*C2*/
			
 
				 			((prev_pblk + prev_len) == ee_pblk) &&		/*C3*/
			
 
				-			(prev_len < (EXT_INIT_MAX_LEN - write_len))) {	/*C4*/
			
 
				+			(prev_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/
			
 
				 			err = ext4_ext_get_access(handle, inode, path + depth);
			
 
				 			if (err)
			
 
				 				goto out;
			
 
				 
			
 
				 			trace_ext4_ext_convert_to_initialized_fastpath(inode,
			
 
				-				map, ex, prev_ex);
			
 
				+				map, ex, abut_ex);
			
 
				 
			
 
				-			/* Shift the start of ex by 'write_len' blocks */
			
 
				-			ex->ee_block = cpu_to_le32(ee_block + write_len);
			
 
				-			ext4_ext_store_pblock(ex, ee_pblk + write_len);
			
 
				-			ex->ee_len = cpu_to_le16(ee_len - write_len);
			
 
				+			/* Shift the start of ex by 'map_len' blocks */
			
 
				+			ex->ee_block = cpu_to_le32(ee_block + map_len);
			
 
				+			ext4_ext_store_pblock(ex, ee_pblk + map_len);
			
 
				+			ex->ee_len = cpu_to_le16(ee_len - map_len);
			
 
				 			ext4_ext_mark_uninitialized(ex); /* Restore the flag */
			
 
				 
			
 
				-			/* Extend prev_ex by 'write_len' blocks */
			
 
				-			prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
			
 
				+			/* Extend abut_ex by 'map_len' blocks */
			
 
				+			abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
			
 
				 
			
 
				-			/* Mark the block containing both extents as dirty */
			
 
				-			ext4_ext_dirty(handle, inode, path + depth);
			
 
				+			/* Result: number of initialized blocks past m_lblk */
			
 
				+			allocated = map_len;
			
 
				+		}
			
 
				+	} else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
			
 
				+		   (map_len < ee_len) &&	/*L1*/
			
 
				+		   ex < EXT_LAST_EXTENT(eh)) {	/*L2*/
			
 
				+		/* See if we can merge right */
			
 
				+		ext4_lblk_t next_lblk;
			
 
				+		ext4_fsblk_t next_pblk, ee_pblk;
			
 
				+		unsigned int next_len;
			
 
				+
			
 
				+		abut_ex = ex + 1;
			
 
				+		next_lblk = le32_to_cpu(abut_ex->ee_block);
			
 
				+		next_len = ext4_ext_get_actual_len(abut_ex);
			
 
				+		next_pblk = ext4_ext_pblock(abut_ex);
			
 
				+		ee_pblk = ext4_ext_pblock(ex);
			
 
				 
			
 
				-			/* Update path to point to the right extent */
			
 
				-			path[depth].p_ext = prev_ex;
			
 
				+		/*
			
 
				+		 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
			
 
				+		 * upon those conditions:
			
 
				+		 * - C1: abut_ex is initialized,
			
 
				+		 * - C2: abut_ex is logically abutting ex,
			
 
				+		 * - C3: abut_ex is physically abutting ex,
			
 
				+		 * - C4: abut_ex can receive the additional blocks without
			
 
				+		 *   overflowing the (initialized) length limit.
			
 
				+		 */
			
 
				+		if ((!ext4_ext_is_uninitialized(abut_ex)) &&		/*C1*/
			
 
				+		    ((map->m_lblk + map_len) == next_lblk) &&		/*C2*/
			
 
				+		    ((ee_pblk + ee_len) == next_pblk) &&		/*C3*/
			
 
				+		    (next_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/
			
 
				+			err = ext4_ext_get_access(handle, inode, path + depth);
			
 
				+			if (err)
			
 
				+				goto out;
			
 
				+
			
 
				+			trace_ext4_ext_convert_to_initialized_fastpath(inode,
			
 
				+				map, ex, abut_ex);
			
 
				+
			
 
				+			/* Shift the start of abut_ex by 'map_len' blocks */
			
 
				+			abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
			
 
				+			ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
			
 
				+			ex->ee_len = cpu_to_le16(ee_len - map_len);
			
 
				+			ext4_ext_mark_uninitialized(ex); /* Restore the flag */
			
 
				+
			
 
				+			/* Extend abut_ex by 'map_len' blocks */
			
 
				+			abut_ex->ee_len = cpu_to_le16(next_len + map_len);
			
 
				 
			
 
				 			/* Result: number of initialized blocks past m_lblk */
			
 
				-			allocated = write_len;
			
 
				-			goto out;
			
 
				+			allocated = map_len;
			
 
				 		}
			
 
				 	}
			
 
				+	if (allocated) {
			
 
				+		/* Mark the block containing both extents as dirty */
			
 
				+		ext4_ext_dirty(handle, inode, path + depth);
			
 
				+
			
 
				+		/* Update path to point to the right extent */
			
 
				+		path[depth].p_ext = abut_ex;
			
 
				+		goto out;
			
 
				+	} else
			
 
				+		allocated = ee_len - (map->m_lblk - ee_block);
			
 
				 
			
 
				 	WARN_ON(map->m_lblk < ee_block);
			
 
				 	/*
			
@@ -3330,7 +3435,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 
				 	}
			
 
				 
			
 
				 	allocated = ext4_split_extent(handle, inode, path,
			
 
				-				      &split_map, split_flag, 0);
			
 
				+				      &split_map, split_flag, flags);
			
 
				 	if (allocated < 0)
			
 
				 		err = allocated;
			
 
				 
			
@@ -3650,6 +3755,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 
				 		  flags, allocated);
			
 
				 	ext4_ext_show_leaf(inode, path);
			
 
				 
			
 
				+	/*
			
 
				+	 * When writing into uninitialized space, we should not fail to
			
 
				+	 * allocate metadata blocks for the new extent block if needed.
			
 
				+	 */
			
 
				+	flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
			
 
				+
			
 
				 	trace_ext4_ext_handle_uninitialized_extents(inode, map, flags,
			
 
				 						    allocated, newblock);
			
 
				 
			
@@ -3713,7 +3824,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	/* buffered write, writepage time, convert*/
			
 
				-	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
			
 
				+	ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
			
 
				 	if (ret >= 0)
			
 
				 		ext4_update_inode_fsync_trans(handle, inode, 1);
			
 
				 out:
			
@@ -4257,47 +4368,12 @@ out3:
 
				 	return err ? err : allocated;
			
 
				 }
			
 
				 
			
 
				-void ext4_ext_truncate(struct inode *inode)
			
 
				+void ext4_ext_truncate(handle_t *handle, struct inode *inode)
			
 
				 {
			
 
				-	struct address_space *mapping = inode->i_mapping;
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				 	ext4_lblk_t last_block;
			
 
				-	handle_t *handle;
			
 
				-	loff_t page_len;
			
 
				 	int err = 0;
			
 
				 
			
 
				-	/*
			
 
				-	 * finish any pending end_io work so we won't run the risk of
			
 
				-	 * converting any truncated blocks to initialized later
			
 
				-	 */
			
 
				-	ext4_flush_unwritten_io(inode);
			
 
				-
			
 
				-	/*
			
 
				-	 * probably first extent we're gonna free will be last in block
			
 
				-	 */
			
 
				-	err = ext4_writepage_trans_blocks(inode);
			
 
				-	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
			
 
				-	if (IS_ERR(handle))
			
 
				-		return;
			
 
				-
			
 
				-	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				-		page_len = PAGE_CACHE_SIZE -
			
 
				-			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				-
			
 
				-		err = ext4_discard_partial_page_buffers(handle,
			
 
				-			mapping, inode->i_size, page_len, 0);
			
 
				-
			
 
				-		if (err)
			
 
				-			goto out_stop;
			
 
				-	}
			
 
				-
			
 
				-	if (ext4_orphan_add(handle, inode))
			
 
				-		goto out_stop;
			
 
				-
			
 
				-	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				-
			
 
				-	ext4_discard_preallocations(inode);
			
 
				-
			
 
				 	/*
			
 
				 	 * TODO: optimization is possible here.
			
 
				 	 * Probably we need not scan at all,
			
@@ -4313,29 +4389,6 @@ void ext4_ext_truncate(struct inode *inode)
 
				 	err = ext4_es_remove_extent(inode, last_block,
			
 
				 				    EXT_MAX_BLOCKS - last_block);
			
 
				 	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
			
 
				-
			
 
				-	/* In a multi-transaction truncate, we only make the final
			
 
				-	 * transaction synchronous.
			
 
				-	 */
			
 
				-	if (IS_SYNC(inode))
			
 
				-		ext4_handle_sync(handle);
			
 
				-
			
 
				-	up_write(&EXT4_I(inode)->i_data_sem);
			
 
				-
			
 
				-out_stop:
			
 
				-	/*
			
 
				-	 * If this was a simple ftruncate() and the file will remain alive,
			
 
				-	 * then we need to clear up the orphan record which we created above.
			
 
				-	 * However, if this was a real unlink then we were called by
			
 
				-	 * ext4_delete_inode(), and we allow that function to clean up the
			
 
				-	 * orphan info for us.
			
 
				-	 */
			
 
				-	if (inode->i_nlink)
			
 
				-		ext4_orphan_del(handle, inode);
			
 
				-
			
 
				-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				-	ext4_mark_inode_dirty(handle, inode);
			
 
				-	ext4_journal_stop(handle);
			
 
				 }
			
 
				 
			
 
				 static void ext4_falloc_update_inode(struct inode *inode,
			
@@ -4623,187 +4676,6 @@ static int ext4_xattr_fiemap(struct inode *inode,
 
				 	return (error < 0 ? error : 0);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * ext4_ext_punch_hole
			
 
				- *
			
 
				- * Punches a hole of "length" bytes in a file starting
			
 
				- * at byte "offset"
			
 
				- *
			
 
				- * @inode:  The inode of the file to punch a hole in
			
 
				- * @offset: The starting byte offset of the hole
			
 
				- * @length: The length of the hole
			
 
				- *
			
 
				- * Returns the number of blocks removed or negative on err
			
 
				- */
			
 
				-int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
			
 
				-{
			
 
				-	struct inode *inode = file_inode(file);
			
 
				-	struct super_block *sb = inode->i_sb;
			
 
				-	ext4_lblk_t first_block, stop_block;
			
 
				-	struct address_space *mapping = inode->i_mapping;
			
 
				-	handle_t *handle;
			
 
				-	loff_t first_page, last_page, page_len;
			
 
				-	loff_t first_page_offset, last_page_offset;
			
 
				-	int credits, err = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Write out all dirty pages to avoid race conditions
			
 
				-	 * Then release them.
			
 
				-	 */
			
 
				-	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
			
 
				-		err = filemap_write_and_wait_range(mapping,
			
 
				-			offset, offset + length - 1);
			
 
				-
			
 
				-		if (err)
			
 
				-			return err;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&inode->i_mutex);
			
 
				-	/* It's not possible punch hole on append only file */
			
 
				-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
			
 
				-		err = -EPERM;
			
 
				-		goto out_mutex;
			
 
				-	}
			
 
				-	if (IS_SWAPFILE(inode)) {
			
 
				-		err = -ETXTBSY;
			
 
				-		goto out_mutex;
			
 
				-	}
			
 
				-
			
 
				-	/* No need to punch hole beyond i_size */
			
 
				-	if (offset >= inode->i_size)
			
 
				-		goto out_mutex;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the hole extends beyond i_size, set the hole
			
 
				-	 * to end after the page that contains i_size
			
 
				-	 */
			
 
				-	if (offset + length > inode->i_size) {
			
 
				-		length = inode->i_size +
			
 
				-		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
			
 
				-		   offset;
			
 
				-	}
			
 
				-
			
 
				-	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				-	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-	first_page_offset = first_page << PAGE_CACHE_SHIFT;
			
 
				-	last_page_offset = last_page << PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-	/* Now release the pages */
			
 
				-	if (last_page_offset > first_page_offset) {
			
 
				-		truncate_pagecache_range(inode, first_page_offset,
			
 
				-					 last_page_offset - 1);
			
 
				-	}
			
 
				-
			
 
				-	/* Wait all existing dio workers, newcomers will block on i_mutex */
			
 
				-	ext4_inode_block_unlocked_dio(inode);
			
 
				-	err = ext4_flush_unwritten_io(inode);
			
 
				-	if (err)
			
 
				-		goto out_dio;
			
 
				-	inode_dio_wait(inode);
			
 
				-
			
 
				-	credits = ext4_writepage_trans_blocks(inode);
			
 
				-	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
			
 
				-	if (IS_ERR(handle)) {
			
 
				-		err = PTR_ERR(handle);
			
 
				-		goto out_dio;
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	/*
			
 
				-	 * Now we need to zero out the non-page-aligned data in the
			
 
				-	 * pages at the start and tail of the hole, and unmap the buffer
			
 
				-	 * heads for the block aligned regions of the page that were
			
 
				-	 * completely zeroed.
			
 
				-	 */
			
 
				-	if (first_page > last_page) {
			
 
				-		/*
			
 
				-		 * If the file space being truncated is contained within a page
			
 
				-		 * just zero out and unmap the middle of that page
			
 
				-		 */
			
 
				-		err = ext4_discard_partial_page_buffers(handle,
			
 
				-			mapping, offset, length, 0);
			
 
				-
			
 
				-		if (err)
			
 
				-			goto out;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * zero out and unmap the partial page that contains
			
 
				-		 * the start of the hole
			
 
				-		 */
			
 
				-		page_len  = first_page_offset - offset;
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				-						   offset, page_len, 0);
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * zero out and unmap the partial page that contains
			
 
				-		 * the end of the hole
			
 
				-		 */
			
 
				-		page_len = offset + length - last_page_offset;
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				-					last_page_offset, page_len, 0);
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If i_size is contained in the last page, we need to
			
 
				-	 * unmap and zero the partial page after i_size
			
 
				-	 */
			
 
				-	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
			
 
				-	   inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				-
			
 
				-		page_len = PAGE_CACHE_SIZE -
			
 
				-			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				-
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle,
			
 
				-			  mapping, inode->i_size, page_len, 0);
			
 
				-
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	first_block = (offset + sb->s_blocksize - 1) >>
			
 
				-		EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-
			
 
				-	/* If there are no blocks to remove, return now */
			
 
				-	if (first_block >= stop_block)
			
 
				-		goto out;
			
 
				-
			
 
				-	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				-	ext4_discard_preallocations(inode);
			
 
				-
			
 
				-	err = ext4_es_remove_extent(inode, first_block,
			
 
				-				    stop_block - first_block);
			
 
				-	err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
			
 
				-
			
 
				-	ext4_discard_preallocations(inode);
			
 
				-
			
 
				-	if (IS_SYNC(inode))
			
 
				-		ext4_handle_sync(handle);
			
 
				-
			
 
				-	up_write(&EXT4_I(inode)->i_data_sem);
			
 
				-
			
 
				-out:
			
 
				-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				-	ext4_mark_inode_dirty(handle, inode);
			
 
				-	ext4_journal_stop(handle);
			
 
				-out_dio:
			
 
				-	ext4_inode_resume_unlocked_dio(inode);
			
 
				-out_mutex:
			
 
				-	mutex_unlock(&inode->i_mutex);
			
 
				-	return err;
			
 
				-}
			
 
				-
			
 
				 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			
 
				 		__u64 start, __u64 len)
			
 
				 {
			
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
				 	if (journal->j_flags & JBD2_BARRIER &&
			
 
				 	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
			
 
				 		needs_barrier = true;
			
 
				-	jbd2_log_start_commit(journal, commit_tid);
			
 
				-	ret = jbd2_log_wait_commit(journal, commit_tid);
			
 
				+	ret = jbd2_complete_transaction(journal, commit_tid);
			
 
				 	if (needs_barrier) {
			
 
				 		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
			
 
				 		if (!ret)
			
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -166,7 +166,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 	trace_ext4_load_inode_bitmap(sb, block_group);
			
 
				 	bh->b_end_io = ext4_end_bitmap_read;
			
 
				 	get_bh(bh);
			
 
				-	submit_bh(READ, bh);
			
 
				+	submit_bh(READ | REQ_META | REQ_PRIO, bh);
			
 
				 	wait_on_buffer(bh);
			
 
				 	if (!buffer_uptodate(bh)) {
			
 
				 		put_bh(bh);
			
@@ -666,6 +666,23 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 
				 	ei = EXT4_I(inode);
			
 
				 	sbi = EXT4_SB(sb);
			
 
				 
			
 
				+	/*
			
 
				+	 * Initalize owners and quota early so that we don't have to account
			
 
				+	 * for quota initialization worst case in standard inode creating
			
 
				+	 * transaction
			
 
				+	 */
			
 
				+	if (owner) {
			
 
				+		inode->i_mode = mode;
			
 
				+		i_uid_write(inode, owner[0]);
			
 
				+		i_gid_write(inode, owner[1]);
			
 
				+	} else if (test_opt(sb, GRPID)) {
			
 
				+		inode->i_mode = mode;
			
 
				+		inode->i_uid = current_fsuid();
			
 
				+		inode->i_gid = dir->i_gid;
			
 
				+	} else
			
 
				+		inode_init_owner(inode, dir, mode);
			
 
				+	dquot_initialize(inode);
			
 
				+
			
 
				 	if (!goal)
			
 
				 		goal = sbi->s_inode_goal;
			
 
				 
			
@@ -697,7 +714,7 @@ got_group:
 
				 
			
 
				 		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
			
 
				 		if (!gdp)
			
 
				-			goto fail;
			
 
				+			goto out;
			
 
				 
			
 
				 		/*
			
 
				 		 * Check free inodes count before loading bitmap.
			
@@ -711,7 +728,7 @@ got_group:
 
				 		brelse(inode_bitmap_bh);
			
 
				 		inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
			
 
				 		if (!inode_bitmap_bh)
			
 
				-			goto fail;
			
 
				+			goto out;
			
 
				 
			
 
				 repeat_in_this_group:
			
 
				 		ino = ext4_find_next_zero_bit((unsigned long *)
			
@@ -733,13 +750,16 @@ repeat_in_this_group:
 
				 							 handle_type, nblocks);
			
 
				 			if (IS_ERR(handle)) {
			
 
				 				err = PTR_ERR(handle);
			
 
				-				goto fail;
			
 
				+				ext4_std_error(sb, err);
			
 
				+				goto out;
			
 
				 			}
			
 
				 		}
			
 
				 		BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			
 
				 		err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
			
 
				-		if (err)
			
 
				-			goto fail;
			
 
				+		if (err) {
			
 
				+			ext4_std_error(sb, err);
			
 
				+			goto out;
			
 
				+		}
			
 
				 		ext4_lock_group(sb, group);
			
 
				 		ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
			
 
				 		ext4_unlock_group(sb, group);
			
@@ -755,8 +775,10 @@ repeat_in_this_group:
 
				 got:
			
 
				 	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
			
 
				 	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
			
 
				-	if (err)
			
 
				-		goto fail;
			
 
				+	if (err) {
			
 
				+		ext4_std_error(sb, err);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				 	/* We may have to initialize the block bitmap if it isn't already */
			
 
				 	if (ext4_has_group_desc_csum(sb) &&
			
@@ -768,7 +790,8 @@ got:
 
				 		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
			
 
				 		if (err) {
			
 
				 			brelse(block_bitmap_bh);
			
 
				-			goto fail;
			
 
				+			ext4_std_error(sb, err);
			
 
				+			goto out;
			
 
				 		}
			
 
				 
			
 
				 		BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
			
@@ -787,14 +810,18 @@ got:
 
				 		ext4_unlock_group(sb, group);
			
 
				 		brelse(block_bitmap_bh);
			
 
				 
			
 
				-		if (err)
			
 
				-			goto fail;
			
 
				+		if (err) {
			
 
				+			ext4_std_error(sb, err);
			
 
				+			goto out;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	BUFFER_TRACE(group_desc_bh, "get_write_access");
			
 
				 	err = ext4_journal_get_write_access(handle, group_desc_bh);
			
 
				-	if (err)
			
 
				-		goto fail;
			
 
				+	if (err) {
			
 
				+		ext4_std_error(sb, err);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				 	/* Update the relevant bg descriptor fields */
			
 
				 	if (ext4_has_group_desc_csum(sb)) {
			
@@ -840,8 +867,10 @@ got:
 
				 
			
 
				 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
			
 
				 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
			
 
				-	if (err)
			
 
				-		goto fail;
			
 
				+	if (err) {
			
 
				+		ext4_std_error(sb, err);
			
 
				+		goto out;
			
 
				+	}
			
 
				 
			
 
				 	percpu_counter_dec(&sbi->s_freeinodes_counter);
			
 
				 	if (S_ISDIR(mode))
			
@@ -851,16 +880,6 @@ got:
 
				 		flex_group = ext4_flex_group(sbi, group);
			
 
				 		atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
			
 
				 	}
			
 
				-	if (owner) {
			
 
				-		inode->i_mode = mode;
			
 
				-		i_uid_write(inode, owner[0]);
			
 
				-		i_gid_write(inode, owner[1]);
			
 
				-	} else if (test_opt(sb, GRPID)) {
			
 
				-		inode->i_mode = mode;
			
 
				-		inode->i_uid = current_fsuid();
			
 
				-		inode->i_gid = dir->i_gid;
			
 
				-	} else
			
 
				-		inode_init_owner(inode, dir, mode);
			
 
				 
			
 
				 	inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
			
 
				 	/* This is the optimal IO size (for stat), not the fs block size */
			
@@ -889,7 +908,9 @@ got:
 
				 		 * twice.
			
 
				 		 */
			
 
				 		err = -EIO;
			
 
				-		goto fail;
			
 
				+		ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
			
 
				+			   inode->i_ino);
			
 
				+		goto out;
			
 
				 	}
			
 
				 	spin_lock(&sbi->s_next_gen_lock);
			
 
				 	inode->i_generation = sbi->s_next_generation++;
			
@@ -899,7 +920,6 @@ got:
 
				 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
			
 
				 			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
			
 
				 		__u32 csum;
			
 
				-		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
			
 
				 		__le32 inum = cpu_to_le32(inode->i_ino);
			
 
				 		__le32 gen = cpu_to_le32(inode->i_generation);
			
 
				 		csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
			
@@ -918,7 +938,6 @@ got:
 
				 		ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
			
 
				 
			
 
				 	ret = inode;
			
 
				-	dquot_initialize(inode);
			
 
				 	err = dquot_alloc_inode(inode);
			
 
				 	if (err)
			
 
				 		goto fail_drop;
			
@@ -952,24 +971,17 @@ got:
 
				 
			
 
				 	ext4_debug("allocating inode %lu\n", inode->i_ino);
			
 
				 	trace_ext4_allocate_inode(inode, dir, mode);
			
 
				-	goto really_out;
			
 
				-fail:
			
 
				-	ext4_std_error(sb, err);
			
 
				-out:
			
 
				-	iput(inode);
			
 
				-	ret = ERR_PTR(err);
			
 
				-really_out:
			
 
				 	brelse(inode_bitmap_bh);
			
 
				 	return ret;
			
 
				 
			
 
				 fail_free_drop:
			
 
				 	dquot_free_inode(inode);
			
 
				-
			
 
				 fail_drop:
			
 
				-	dquot_drop(inode);
			
 
				-	inode->i_flags |= S_NOQUOTA;
			
 
				 	clear_nlink(inode);
			
 
				 	unlock_new_inode(inode);
			
 
				+out:
			
 
				+	dquot_drop(inode);
			
 
				+	inode->i_flags |= S_NOQUOTA;
			
 
				 	iput(inode);
			
 
				 	brelse(inode_bitmap_bh);
			
 
				 	return ERR_PTR(err);
			
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -291,131 +291,6 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- *	ext4_alloc_blocks: multiple allocate blocks needed for a branch
			
 
				- *	@handle: handle for this transaction
			
 
				- *	@inode: inode which needs allocated blocks
			
 
				- *	@iblock: the logical block to start allocated at
			
 
				- *	@goal: preferred physical block of allocation
			
 
				- *	@indirect_blks: the number of blocks need to allocate for indirect
			
 
				- *			blocks
			
 
				- *	@blks: number of desired blocks
			
 
				- *	@new_blocks: on return it will store the new block numbers for
			
 
				- *	the indirect blocks(if needed) and the first direct block,
			
 
				- *	@err: on return it will store the error code
			
 
				- *
			
 
				- *	This function will return the number of blocks allocated as
			
 
				- *	requested by the passed-in parameters.
			
 
				- */
			
 
				-static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
			
 
				-			     ext4_lblk_t iblock, ext4_fsblk_t goal,
			
 
				-			     int indirect_blks, int blks,
			
 
				-			     ext4_fsblk_t new_blocks[4], int *err)
			
 
				-{
			
 
				-	struct ext4_allocation_request ar;
			
 
				-	int target, i;
			
 
				-	unsigned long count = 0, blk_allocated = 0;
			
 
				-	int index = 0;
			
 
				-	ext4_fsblk_t current_block = 0;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Here we try to allocate the requested multiple blocks at once,
			
 
				-	 * on a best-effort basis.
			
 
				-	 * To build a branch, we should allocate blocks for
			
 
				-	 * the indirect blocks(if not allocated yet), and at least
			
 
				-	 * the first direct block of this branch.  That's the
			
 
				-	 * minimum number of blocks need to allocate(required)
			
 
				-	 */
			
 
				-	/* first we try to allocate the indirect blocks */
			
 
				-	target = indirect_blks;
			
 
				-	while (target > 0) {
			
 
				-		count = target;
			
 
				-		/* allocating blocks for indirect blocks and direct blocks */
			
 
				-		current_block = ext4_new_meta_blocks(handle, inode, goal,
			
 
				-						     0, &count, err);
			
 
				-		if (*err)
			
 
				-			goto failed_out;
			
 
				-
			
 
				-		if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
			
 
				-			EXT4_ERROR_INODE(inode,
			
 
				-					 "current_block %llu + count %lu > %d!",
			
 
				-					 current_block, count,
			
 
				-					 EXT4_MAX_BLOCK_FILE_PHYS);
			
 
				-			*err = -EIO;
			
 
				-			goto failed_out;
			
 
				-		}
			
 
				-
			
 
				-		target -= count;
			
 
				-		/* allocate blocks for indirect blocks */
			
 
				-		while (index < indirect_blks && count) {
			
 
				-			new_blocks[index++] = current_block++;
			
 
				-			count--;
			
 
				-		}
			
 
				-		if (count > 0) {
			
 
				-			/*
			
 
				-			 * save the new block number
			
 
				-			 * for the first direct block
			
 
				-			 */
			
 
				-			new_blocks[index] = current_block;
			
 
				-			WARN(1, KERN_INFO "%s returned more blocks than "
			
 
				-						"requested\n", __func__);
			
 
				-			break;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	target = blks - count ;
			
 
				-	blk_allocated = count;
			
 
				-	if (!target)
			
 
				-		goto allocated;
			
 
				-	/* Now allocate data blocks */
			
 
				-	memset(&ar, 0, sizeof(ar));
			
 
				-	ar.inode = inode;
			
 
				-	ar.goal = goal;
			
 
				-	ar.len = target;
			
 
				-	ar.logical = iblock;
			
 
				-	if (S_ISREG(inode->i_mode))
			
 
				-		/* enable in-core preallocation only for regular files */
			
 
				-		ar.flags = EXT4_MB_HINT_DATA;
			
 
				-
			
 
				-	current_block = ext4_mb_new_blocks(handle, &ar, err);
			
 
				-	if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
			
 
				-		EXT4_ERROR_INODE(inode,
			
 
				-				 "current_block %llu + ar.len %d > %d!",
			
 
				-				 current_block, ar.len,
			
 
				-				 EXT4_MAX_BLOCK_FILE_PHYS);
			
 
				-		*err = -EIO;
			
 
				-		goto failed_out;
			
 
				-	}
			
 
				-
			
 
				-	if (*err && (target == blks)) {
			
 
				-		/*
			
 
				-		 * if the allocation failed and we didn't allocate
			
 
				-		 * any blocks before
			
 
				-		 */
			
 
				-		goto failed_out;
			
 
				-	}
			
 
				-	if (!*err) {
			
 
				-		if (target == blks) {
			
 
				-			/*
			
 
				-			 * save the new block number
			
 
				-			 * for the first direct block
			
 
				-			 */
			
 
				-			new_blocks[index] = current_block;
			
 
				-		}
			
 
				-		blk_allocated += ar.len;
			
 
				-	}
			
 
				-allocated:
			
 
				-	/* total number of blocks allocated for direct blocks */
			
 
				-	ret = blk_allocated;
			
 
				-	*err = 0;
			
 
				-	return ret;
			
 
				-failed_out:
			
 
				-	for (i = 0; i < index; i++)
			
 
				-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  *	ext4_alloc_branch - allocate and set up a chain of blocks.
			
 
				  *	@handle: handle for this transaction
			
@@ -448,60 +323,59 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 
				 			     int *blks, ext4_fsblk_t goal,
			
 
				 			     ext4_lblk_t *offsets, Indirect *branch)
			
 
				 {
			
 
				-	int blocksize = inode->i_sb->s_blocksize;
			
 
				-	int i, n = 0;
			
 
				-	int err = 0;
			
 
				-	struct buffer_head *bh;
			
 
				-	int num;
			
 
				-	ext4_fsblk_t new_blocks[4];
			
 
				-	ext4_fsblk_t current_block;
			
 
				-
			
 
				-	num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
			
 
				-				*blks, new_blocks, &err);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				+	struct ext4_allocation_request	ar;
			
 
				+	struct buffer_head *		bh;
			
 
				+	ext4_fsblk_t			b, new_blocks[4];
			
 
				+	__le32				*p;
			
 
				+	int				i, j, err, len = 1;
			
 
				 
			
 
				-	branch[0].key = cpu_to_le32(new_blocks[0]);
			
 
				 	/*
			
 
				-	 * metadata blocks and data blocks are allocated.
			
 
				+	 * Set up for the direct block allocation
			
 
				 	 */
			
 
				-	for (n = 1; n <= indirect_blks;  n++) {
			
 
				-		/*
			
 
				-		 * Get buffer_head for parent block, zero it out
			
 
				-		 * and set the pointer to new one, then send
			
 
				-		 * parent to disk.
			
 
				-		 */
			
 
				-		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
			
 
				+	memset(&ar, 0, sizeof(ar));
			
 
				+	ar.inode = inode;
			
 
				+	ar.len = *blks;
			
 
				+	ar.logical = iblock;
			
 
				+	if (S_ISREG(inode->i_mode))
			
 
				+		ar.flags = EXT4_MB_HINT_DATA;
			
 
				+
			
 
				+	for (i = 0; i <= indirect_blks; i++) {
			
 
				+		if (i == indirect_blks) {
			
 
				+			ar.goal = goal;
			
 
				+			new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
			
 
				+		} else
			
 
				+			goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
			
 
				+							goal, 0, NULL, &err);
			
 
				+		if (err) {
			
 
				+			i--;
			
 
				+			goto failed;
			
 
				+		}
			
 
				+		branch[i].key = cpu_to_le32(new_blocks[i]);
			
 
				+		if (i == 0)
			
 
				+			continue;
			
 
				+
			
 
				+		bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
			
 
				 		if (unlikely(!bh)) {
			
 
				 			err = -ENOMEM;
			
 
				 			goto failed;
			
 
				 		}
			
 
				-
			
 
				-		branch[n].bh = bh;
			
 
				 		lock_buffer(bh);
			
 
				 		BUFFER_TRACE(bh, "call get_create_access");
			
 
				 		err = ext4_journal_get_create_access(handle, bh);
			
 
				 		if (err) {
			
 
				-			/* Don't brelse(bh) here; it's done in
			
 
				-			 * ext4_journal_forget() below */
			
 
				 			unlock_buffer(bh);
			
 
				 			goto failed;
			
 
				 		}
			
 
				 
			
 
				-		memset(bh->b_data, 0, blocksize);
			
 
				-		branch[n].p = (__le32 *) bh->b_data + offsets[n];
			
 
				-		branch[n].key = cpu_to_le32(new_blocks[n]);
			
 
				-		*branch[n].p = branch[n].key;
			
 
				-		if (n == indirect_blks) {
			
 
				-			current_block = new_blocks[n];
			
 
				-			/*
			
 
				-			 * End of chain, update the last new metablock of
			
 
				-			 * the chain to point to the new allocated
			
 
				-			 * data blocks numbers
			
 
				-			 */
			
 
				-			for (i = 1; i < num; i++)
			
 
				-				*(branch[n].p + i) = cpu_to_le32(++current_block);
			
 
				-		}
			
 
				+		memset(bh->b_data, 0, bh->b_size);
			
 
				+		p = branch[i].p = (__le32 *) bh->b_data + offsets[i];
			
 
				+		b = new_blocks[i];
			
 
				+
			
 
				+		if (i == indirect_blks)
			
 
				+			len = ar.len;
			
 
				+		for (j = 0; j < len; j++)
			
 
				+			*p++ = cpu_to_le32(b++);
			
 
				+
			
 
				 		BUFFER_TRACE(bh, "marking uptodate");
			
 
				 		set_buffer_uptodate(bh);
			
 
				 		unlock_buffer(bh);
			
@@ -511,25 +385,16 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 
				 		if (err)
			
 
				 			goto failed;
			
 
				 	}
			
 
				-	*blks = num;
			
 
				-	return err;
			
 
				+	*blks = ar.len;
			
 
				+	return 0;
			
 
				 failed:
			
 
				-	/* Allocation failed, free what we already allocated */
			
 
				-	ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
			
 
				-	for (i = 1; i <= n ; i++) {
			
 
				-		/*
			
 
				-		 * branch[i].bh is newly allocated, so there is no
			
 
				-		 * need to revoke the block, which is why we don't
			
 
				-		 * need to set EXT4_FREE_BLOCKS_METADATA.
			
 
				-		 */
			
 
				-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
			
 
				-				 EXT4_FREE_BLOCKS_FORGET);
			
 
				+	for (; i >= 0; i--) {
			
 
				+		if (i != indirect_blks && branch[i].bh)
			
 
				+			ext4_forget(handle, 1, inode, branch[i].bh,
			
 
				+				    branch[i].bh->b_blocknr);
			
 
				+		ext4_free_blocks(handle, inode, NULL, new_blocks[i],
			
 
				+				 (i == indirect_blks) ? ar.len : 1, 0);
			
 
				 	}
			
 
				-	for (i = n+1; i < indirect_blks; i++)
			
 
				-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
			
 
				-
			
 
				-	ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
			
 
				-
			
 
				 	return err;
			
 
				 }
			
 
				 
			
@@ -941,26 +806,9 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 
				  * be able to restart the transaction at a conventient checkpoint to make
			
 
				  * sure we don't overflow the journal.
			
 
				  *
			
 
				- * start_transaction gets us a new handle for a truncate transaction,
			
 
				- * and extend_transaction tries to extend the existing one a bit.  If
			
 
				+ * Try to extend this transaction for the purposes of truncation.  If
			
 
				  * extend fails, we need to propagate the failure up and restart the
			
 
				  * transaction in the top-level truncate loop. --sct
			
 
				- */
			
 
				-static handle_t *start_transaction(struct inode *inode)
			
 
				-{
			
 
				-	handle_t *result;
			
 
				-
			
 
				-	result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
			
 
				-				    ext4_blocks_for_truncate(inode));
			
 
				-	if (!IS_ERR(result))
			
 
				-		return result;
			
 
				-
			
 
				-	ext4_std_error(inode->i_sb, PTR_ERR(result));
			
 
				-	return result;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Try to extend this transaction for the purposes of truncation.
			
 
				  *
			
 
				  * Returns 0 if we managed to create more room.  If we can't create more
			
 
				  * room, and the transaction must be restarted we return 1.
			
@@ -1353,68 +1201,30 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void ext4_ind_truncate(struct inode *inode)
			
 
				+void ext4_ind_truncate(handle_t *handle, struct inode *inode)
			
 
				 {
			
 
				-	handle_t *handle;
			
 
				 	struct ext4_inode_info *ei = EXT4_I(inode);
			
 
				 	__le32 *i_data = ei->i_data;
			
 
				 	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
			
 
				-	struct address_space *mapping = inode->i_mapping;
			
 
				 	ext4_lblk_t offsets[4];
			
 
				 	Indirect chain[4];
			
 
				 	Indirect *partial;
			
 
				 	__le32 nr = 0;
			
 
				 	int n = 0;
			
 
				 	ext4_lblk_t last_block, max_block;
			
 
				-	loff_t page_len;
			
 
				 	unsigned blocksize = inode->i_sb->s_blocksize;
			
 
				-	int err;
			
 
				-
			
 
				-	handle = start_transaction(inode);
			
 
				-	if (IS_ERR(handle))
			
 
				-		return;		/* AKPM: return what? */
			
 
				 
			
 
				 	last_block = (inode->i_size + blocksize-1)
			
 
				 					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
			
 
				 	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
			
 
				 					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
			
 
				 
			
 
				-	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				-		page_len = PAGE_CACHE_SIZE -
			
 
				-			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				-
			
 
				-		err = ext4_discard_partial_page_buffers(handle,
			
 
				-			mapping, inode->i_size, page_len, 0);
			
 
				-
			
 
				-		if (err)
			
 
				-			goto out_stop;
			
 
				-	}
			
 
				-
			
 
				 	if (last_block != max_block) {
			
 
				 		n = ext4_block_to_path(inode, last_block, offsets, NULL);
			
 
				 		if (n == 0)
			
 
				-			goto out_stop;	/* error */
			
 
				+			return;
			
 
				 	}
			
 
				 
			
 
				-	/*
			
 
				-	 * OK.  This truncate is going to happen.  We add the inode to the
			
 
				-	 * orphan list, so that if this truncate spans multiple transactions,
			
 
				-	 * and we crash, we will resume the truncate when the filesystem
			
 
				-	 * recovers.  It also marks the inode dirty, to catch the new size.
			
 
				-	 *
			
 
				-	 * Implication: the file must always be in a sane, consistent
			
 
				-	 * truncatable state while each transaction commits.
			
 
				-	 */
			
 
				-	if (ext4_orphan_add(handle, inode))
			
 
				-		goto out_stop;
			
 
				-
			
 
				-	/*
			
 
				-	 * From here we block out all ext4_get_block() callers who want to
			
 
				-	 * modify the block allocation tree.
			
 
				-	 */
			
 
				-	down_write(&ei->i_data_sem);
			
 
				-
			
 
				-	ext4_discard_preallocations(inode);
			
 
				 	ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
			
 
				 
			
 
				 	/*
			
@@ -1431,7 +1241,7 @@ void ext4_ind_truncate(struct inode *inode)
 
				 		 * It is unnecessary to free any data blocks if last_block is
			
 
				 		 * equal to the indirect block limit.
			
 
				 		 */
			
 
				-		goto out_unlock;
			
 
				+		return;
			
 
				 	} else if (n == 1) {		/* direct blocks */
			
 
				 		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
			
 
				 			       i_data + EXT4_NDIR_BLOCKS);
			
@@ -1491,31 +1301,6 @@ do_indirects:
 
				 	case EXT4_TIND_BLOCK:
			
 
				 		;
			
 
				 	}
			
 
				-
			
 
				-out_unlock:
			
 
				-	up_write(&ei->i_data_sem);
			
 
				-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				-	ext4_mark_inode_dirty(handle, inode);
			
 
				-
			
 
				-	/*
			
 
				-	 * In a multi-transaction truncate, we only make the final transaction
			
 
				-	 * synchronous
			
 
				-	 */
			
 
				-	if (IS_SYNC(inode))
			
 
				-		ext4_handle_sync(handle);
			
 
				-out_stop:
			
 
				-	/*
			
 
				-	 * If this was a simple ftruncate(), and the file will remain alive
			
 
				-	 * then we need to clear up the orphan record which we created above.
			
 
				-	 * However, if this was a real unlink then we were called by
			
 
				-	 * ext4_delete_inode(), and we allow that function to clean up the
			
 
				-	 * orphan info for us.
			
 
				-	 */
			
 
				-	if (inode->i_nlink)
			
 
				-		ext4_orphan_del(handle, inode);
			
 
				-
			
 
				-	ext4_journal_stop(handle);
			
 
				-	trace_ext4_truncate_exit(inode);
			
 
				 }
			
 
				 
			
 
				 static int free_hole_blocks(handle_t *handle, struct inode *inode,
			
@@ -1569,8 +1354,8 @@ err:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
			
 
				-				 ext4_lblk_t first, ext4_lblk_t stop)
			
 
				+int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
			
 
				+			  ext4_lblk_t first, ext4_lblk_t stop)
			
 
				 {
			
 
				 	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
			
 
				 	int level, ret = 0;
			
@@ -1604,157 +1389,3 @@ err:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
			
 
				-{
			
 
				-	struct inode *inode = file_inode(file);
			
 
				-	struct super_block *sb = inode->i_sb;
			
 
				-	ext4_lblk_t first_block, stop_block;
			
 
				-	struct address_space *mapping = inode->i_mapping;
			
 
				-	handle_t *handle = NULL;
			
 
				-	loff_t first_page, last_page, page_len;
			
 
				-	loff_t first_page_offset, last_page_offset;
			
 
				-	int err = 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Write out all dirty pages to avoid race conditions
			
 
				-	 * Then release them.
			
 
				-	 */
			
 
				-	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
			
 
				-		err = filemap_write_and_wait_range(mapping,
			
 
				-			offset, offset + length - 1);
			
 
				-		if (err)
			
 
				-			return err;
			
 
				-	}
			
 
				-
			
 
				-	mutex_lock(&inode->i_mutex);
			
 
				-	/* It's not possible punch hole on append only file */
			
 
				-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
			
 
				-		err = -EPERM;
			
 
				-		goto out_mutex;
			
 
				-	}
			
 
				-	if (IS_SWAPFILE(inode)) {
			
 
				-		err = -ETXTBSY;
			
 
				-		goto out_mutex;
			
 
				-	}
			
 
				-
			
 
				-	/* No need to punch hole beyond i_size */
			
 
				-	if (offset >= inode->i_size)
			
 
				-		goto out_mutex;
			
 
				-
			
 
				-	/*
			
 
				-	 * If the hole extents beyond i_size, set the hole
			
 
				-	 * to end after the page that contains i_size
			
 
				-	 */
			
 
				-	if (offset + length > inode->i_size) {
			
 
				-		length = inode->i_size +
			
 
				-		    PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
			
 
				-		    offset;
			
 
				-	}
			
 
				-
			
 
				-	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				-	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-	first_page_offset = first_page << PAGE_CACHE_SHIFT;
			
 
				-	last_page_offset = last_page << PAGE_CACHE_SHIFT;
			
 
				-
			
 
				-	/* Now release the pages */
			
 
				-	if (last_page_offset > first_page_offset) {
			
 
				-		truncate_pagecache_range(inode, first_page_offset,
			
 
				-					 last_page_offset - 1);
			
 
				-	}
			
 
				-
			
 
				-	/* Wait all existing dio works, newcomers will block on i_mutex */
			
 
				-	inode_dio_wait(inode);
			
 
				-
			
 
				-	handle = start_transaction(inode);
			
 
				-	if (IS_ERR(handle))
			
 
				-		goto out_mutex;
			
 
				-
			
 
				-	/*
			
 
				-	 * Now we need to zero out the non-page-aligned data in the
			
 
				-	 * pages at the start and tail of the hole, and unmap the buffer
			
 
				-	 * heads for the block aligned regions of the page that were
			
 
				-	 * completely zerod.
			
 
				-	 */
			
 
				-	if (first_page > last_page) {
			
 
				-		/*
			
 
				-		 * If the file space being truncated is contained within a page
			
 
				-		 * just zero out and unmap the middle of that page
			
 
				-		 */
			
 
				-		err = ext4_discard_partial_page_buffers(handle,
			
 
				-			mapping, offset, length, 0);
			
 
				-		if (err)
			
 
				-			goto out;
			
 
				-	} else {
			
 
				-		/*
			
 
				-		 * Zero out and unmap the paritial page that contains
			
 
				-		 * the start of the hole
			
 
				-		 */
			
 
				-		page_len = first_page_offset - offset;
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				-							offset, page_len, 0);
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Zero out and unmap the partial page that contains
			
 
				-		 * the end of the hole
			
 
				-		 */
			
 
				-		page_len = offset + length - last_page_offset;
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				-						last_page_offset, page_len, 0);
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * If i_size contained in the last page, we need to
			
 
				-	 * unmap and zero the paritial page after i_size
			
 
				-	 */
			
 
				-	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
			
 
				-	    inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				-		page_len = PAGE_CACHE_SIZE -
			
 
				-			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				-		if (page_len > 0) {
			
 
				-			err = ext4_discard_partial_page_buffers(handle,
			
 
				-				mapping, inode->i_size, page_len, 0);
			
 
				-			if (err)
			
 
				-				goto out;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	first_block = (offset + sb->s_blocksize - 1) >>
			
 
				-		EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-
			
 
				-	if (first_block >= stop_block)
			
 
				-		goto out;
			
 
				-
			
 
				-	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				-	ext4_discard_preallocations(inode);
			
 
				-
			
 
				-	err = ext4_es_remove_extent(inode, first_block,
			
 
				-				    stop_block - first_block);
			
 
				-	err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
			
 
				-
			
 
				-	ext4_discard_preallocations(inode);
			
 
				-
			
 
				-	if (IS_SYNC(inode))
			
 
				-		ext4_handle_sync(handle);
			
 
				-
			
 
				-	up_write(&EXT4_I(inode)->i_data_sem);
			
 
				-
			
 
				-out:
			
 
				-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				-	ext4_mark_inode_dirty(handle, inode);
			
 
				-	ext4_journal_stop(handle);
			
 
				-
			
 
				-out_mutex:
			
 
				-	mutex_unlock(&inode->i_mutex);
			
 
				-
			
 
				-	return err;
			
 
				-}
			
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -19,7 +19,8 @@
 
				 
			
 
				 #define EXT4_XATTR_SYSTEM_DATA	"data"
			
 
				 #define EXT4_MIN_INLINE_DATA_SIZE	((sizeof(__le32) * EXT4_N_BLOCKS))
			
 
				-#define EXT4_INLINE_DOTDOT_SIZE	4
			
 
				+#define EXT4_INLINE_DOTDOT_OFFSET	2
			
 
				+#define EXT4_INLINE_DOTDOT_SIZE		4
			
 
				 
			
 
				 int ext4_get_inline_size(struct inode *inode)
			
 
				 {
			
@@ -1289,6 +1290,120 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * This function fills a red-black tree with information from an
			
 
				+ * inlined dir.  It returns the number directory entries loaded
			
 
				+ * into the tree.  If there is an error it is returned in err.
			
 
				+ */
			
 
				+int htree_inlinedir_to_tree(struct file *dir_file,
			
 
				+			    struct inode *dir, ext4_lblk_t block,
			
 
				+			    struct dx_hash_info *hinfo,
			
 
				+			    __u32 start_hash, __u32 start_minor_hash,
			
 
				+			    int *has_inline_data)
			
 
				+{
			
 
				+	int err = 0, count = 0;
			
 
				+	unsigned int parent_ino;
			
 
				+	int pos;
			
 
				+	struct ext4_dir_entry_2 *de;
			
 
				+	struct inode *inode = file_inode(dir_file);
			
 
				+	int ret, inline_size = 0;
			
 
				+	struct ext4_iloc iloc;
			
 
				+	void *dir_buf = NULL;
			
 
				+	struct ext4_dir_entry_2 fake;
			
 
				+
			
 
				+	ret = ext4_get_inode_loc(inode, &iloc);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				+
			
 
				+	down_read(&EXT4_I(inode)->xattr_sem);
			
 
				+	if (!ext4_has_inline_data(inode)) {
			
 
				+		up_read(&EXT4_I(inode)->xattr_sem);
			
 
				+		*has_inline_data = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	inline_size = ext4_get_inline_size(inode);
			
 
				+	dir_buf = kmalloc(inline_size, GFP_NOFS);
			
 
				+	if (!dir_buf) {
			
 
				+		ret = -ENOMEM;
			
 
				+		up_read(&EXT4_I(inode)->xattr_sem);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc);
			
 
				+	up_read(&EXT4_I(inode)->xattr_sem);
			
 
				+	if (ret < 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	pos = 0;
			
 
				+	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
			
 
				+	while (pos < inline_size) {
			
 
				+		/*
			
 
				+		 * As inlined dir doesn't store any information about '.' and
			
 
				+		 * only the inode number of '..' is stored, we have to handle
			
 
				+		 * them differently.
			
 
				+		 */
			
 
				+		if (pos == 0) {
			
 
				+			fake.inode = cpu_to_le32(inode->i_ino);
			
 
				+			fake.name_len = 1;
			
 
				+			strcpy(fake.name, ".");
			
 
				+			fake.rec_len = ext4_rec_len_to_disk(
			
 
				+						EXT4_DIR_REC_LEN(fake.name_len),
			
 
				+						inline_size);
			
 
				+			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
			
 
				+			de = &fake;
			
 
				+			pos = EXT4_INLINE_DOTDOT_OFFSET;
			
 
				+		} else if (pos == EXT4_INLINE_DOTDOT_OFFSET) {
			
 
				+			fake.inode = cpu_to_le32(parent_ino);
			
 
				+			fake.name_len = 2;
			
 
				+			strcpy(fake.name, "..");
			
 
				+			fake.rec_len = ext4_rec_len_to_disk(
			
 
				+						EXT4_DIR_REC_LEN(fake.name_len),
			
 
				+						inline_size);
			
 
				+			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
			
 
				+			de = &fake;
			
 
				+			pos = EXT4_INLINE_DOTDOT_SIZE;
			
 
				+		} else {
			
 
				+			de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
			
 
				+			pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
			
 
				+			if (ext4_check_dir_entry(inode, dir_file, de,
			
 
				+					 iloc.bh, dir_buf,
			
 
				+					 inline_size, pos)) {
			
 
				+				ret = count;
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		ext4fs_dirhash(de->name, de->name_len, hinfo);
			
 
				+		if ((hinfo->hash < start_hash) ||
			
 
				+		    ((hinfo->hash == start_hash) &&
			
 
				+		     (hinfo->minor_hash < start_minor_hash)))
			
 
				+			continue;
			
 
				+		if (de->inode == 0)
			
 
				+			continue;
			
 
				+		err = ext4_htree_store_dirent(dir_file,
			
 
				+				   hinfo->hash, hinfo->minor_hash, de);
			
 
				+		if (err) {
			
 
				+			count = err;
			
 
				+			goto out;
			
 
				+		}
			
 
				+		count++;
			
 
				+	}
			
 
				+	ret = count;
			
 
				+out:
			
 
				+	kfree(dir_buf);
			
 
				+	brelse(iloc.bh);
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * So this function is called when the volume is mkfsed with
			
 
				+ * dir_index disabled. In order to keep f_pos persistent
			
 
				+ * after we convert from an inlined dir to a blocked based,
			
 
				+ * we just pretend that we are a normal dir and return the
			
 
				+ * offset as if '.' and '..' really take place.
			
 
				+ *
			
 
				+ */
			
 
				 int ext4_read_inline_dir(struct file *filp,
			
 
				 			 void *dirent, filldir_t filldir,
			
 
				 			 int *has_inline_data)
			
@@ -1302,6 +1417,7 @@ int ext4_read_inline_dir(struct file *filp,
 
				 	int ret, inline_size = 0;
			
 
				 	struct ext4_iloc iloc;
			
 
				 	void *dir_buf = NULL;
			
 
				+	int dotdot_offset, dotdot_size, extra_offset, extra_size;
			
 
				 
			
 
				 	ret = ext4_get_inode_loc(inode, &iloc);
			
 
				 	if (ret)
			
@@ -1330,8 +1446,21 @@ int ext4_read_inline_dir(struct file *filp,
 
				 	sb = inode->i_sb;
			
 
				 	stored = 0;
			
 
				 	parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode);
			
 
				+	offset = filp->f_pos;
			
 
				 
			
 
				-	while (!error && !stored && filp->f_pos < inode->i_size) {
			
 
				+	/*
			
 
				+	 * dotdot_offset and dotdot_size is the real offset and
			
 
				+	 * size for ".." and "." if the dir is block based while
			
 
				+	 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE.
			
 
				+	 * So we will use extra_offset and extra_size to indicate them
			
 
				+	 * during the inline dir iteration.
			
 
				+	 */
			
 
				+	dotdot_offset = EXT4_DIR_REC_LEN(1);
			
 
				+	dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
			
 
				+	extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
			
 
				+	extra_size = extra_offset + inline_size;
			
 
				+
			
 
				+	while (!error && !stored && filp->f_pos < extra_size) {
			
 
				 revalidate:
			
 
				 		/*
			
 
				 		 * If the version has changed since the last call to
			
@@ -1340,15 +1469,23 @@ revalidate:
 
				 		 * dir to make sure.
			
 
				 		 */
			
 
				 		if (filp->f_version != inode->i_version) {
			
 
				-			for (i = 0;
			
 
				-			     i < inode->i_size && i < offset;) {
			
 
				+			for (i = 0; i < extra_size && i < offset;) {
			
 
				+				/*
			
 
				+				 * "." is with offset 0 and
			
 
				+				 * ".." is dotdot_offset.
			
 
				+				 */
			
 
				 				if (!i) {
			
 
				-					/* skip "." and ".." if needed. */
			
 
				-					i += EXT4_INLINE_DOTDOT_SIZE;
			
 
				+					i = dotdot_offset;
			
 
				+					continue;
			
 
				+				} else if (i == dotdot_offset) {
			
 
				+					i = dotdot_size;
			
 
				 					continue;
			
 
				 				}
			
 
				+				/* for other entry, the real offset in
			
 
				+				 * the buf has to be tuned accordingly.
			
 
				+				 */
			
 
				 				de = (struct ext4_dir_entry_2 *)
			
 
				-					(dir_buf + i);
			
 
				+					(dir_buf + i - extra_offset);
			
 
				 				/* It's too expensive to do a full
			
 
				 				 * dirent test each time round this
			
 
				 				 * loop, but we do have to test at
			
@@ -1356,43 +1493,47 @@ revalidate:
 
				 				 * failure will be detected in the
			
 
				 				 * dirent test below. */
			
 
				 				if (ext4_rec_len_from_disk(de->rec_len,
			
 
				-					inline_size) < EXT4_DIR_REC_LEN(1))
			
 
				+					extra_size) < EXT4_DIR_REC_LEN(1))
			
 
				 					break;
			
 
				 				i += ext4_rec_len_from_disk(de->rec_len,
			
 
				-							    inline_size);
			
 
				+							    extra_size);
			
 
				 			}
			
 
				 			offset = i;
			
 
				 			filp->f_pos = offset;
			
 
				 			filp->f_version = inode->i_version;
			
 
				 		}
			
 
				 
			
 
				-		while (!error && filp->f_pos < inode->i_size) {
			
 
				+		while (!error && filp->f_pos < extra_size) {
			
 
				 			if (filp->f_pos == 0) {
			
 
				 				error = filldir(dirent, ".", 1, 0, inode->i_ino,
			
 
				 						DT_DIR);
			
 
				 				if (error)
			
 
				 					break;
			
 
				 				stored++;
			
 
				+				filp->f_pos = dotdot_offset;
			
 
				+				continue;
			
 
				+			}
			
 
				 
			
 
				-				error = filldir(dirent, "..", 2, 0, parent_ino,
			
 
				-						DT_DIR);
			
 
				+			if (filp->f_pos == dotdot_offset) {
			
 
				+				error = filldir(dirent, "..", 2,
			
 
				+						dotdot_offset,
			
 
				+						parent_ino, DT_DIR);
			
 
				 				if (error)
			
 
				 					break;
			
 
				 				stored++;
			
 
				 
			
 
				-				filp->f_pos = offset = EXT4_INLINE_DOTDOT_SIZE;
			
 
				+				filp->f_pos = dotdot_size;
			
 
				 				continue;
			
 
				 			}
			
 
				 
			
 
				-			de = (struct ext4_dir_entry_2 *)(dir_buf + offset);
			
 
				+			de = (struct ext4_dir_entry_2 *)
			
 
				+				(dir_buf + filp->f_pos - extra_offset);
			
 
				 			if (ext4_check_dir_entry(inode, filp, de,
			
 
				 						 iloc.bh, dir_buf,
			
 
				-						 inline_size, offset)) {
			
 
				+						 extra_size, filp->f_pos)) {
			
 
				 				ret = stored;
			
 
				 				goto out;
			
 
				 			}
			
 
				-			offset += ext4_rec_len_from_disk(de->rec_len,
			
 
				-							 inline_size);
			
 
				 			if (le32_to_cpu(de->inode)) {
			
 
				 				/* We might block in the next section
			
 
				 				 * if the data destination is
			
@@ -1415,9 +1556,8 @@ revalidate:
 
				 				stored++;
			
 
				 			}
			
 
				 			filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
			
 
				-							      inline_size);
			
 
				+							      extra_size);
			
 
				 		}
			
 
				-		offset = 0;
			
 
				 	}
			
 
				 out:
			
 
				 	kfree(dir_buf);
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -55,21 +55,21 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
 
				 	__u16 csum_hi = 0;
			
 
				 	__u32 csum;
			
 
				 
			
 
				-	csum_lo = raw->i_checksum_lo;
			
 
				+	csum_lo = le16_to_cpu(raw->i_checksum_lo);
			
 
				 	raw->i_checksum_lo = 0;
			
 
				 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
			
 
				 	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
			
 
				-		csum_hi = raw->i_checksum_hi;
			
 
				+		csum_hi = le16_to_cpu(raw->i_checksum_hi);
			
 
				 		raw->i_checksum_hi = 0;
			
 
				 	}
			
 
				 
			
 
				 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
			
 
				 			   EXT4_INODE_SIZE(inode->i_sb));
			
 
				 
			
 
				-	raw->i_checksum_lo = csum_lo;
			
 
				+	raw->i_checksum_lo = cpu_to_le16(csum_lo);
			
 
				 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
			
 
				 	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
			
 
				-		raw->i_checksum_hi = csum_hi;
			
 
				+		raw->i_checksum_hi = cpu_to_le16(csum_hi);
			
 
				 
			
 
				 	return csum;
			
 
				 }
			
@@ -210,8 +210,7 @@ void ext4_evict_inode(struct inode *inode)
 
				 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
			
 
				 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
			
 
				 
			
 
				-			jbd2_log_start_commit(journal, commit_tid);
			
 
				-			jbd2_log_wait_commit(journal, commit_tid);
			
 
				+			jbd2_complete_transaction(journal, commit_tid);
			
 
				 			filemap_write_and_wait(&inode->i_data);
			
 
				 		}
			
 
				 		truncate_inode_pages(&inode->i_data, 0);
			
@@ -1081,20 +1080,42 @@ retry_journal:
 
				 /* For write_end() in data=journal mode */
			
 
				 static int write_end_fn(handle_t *handle, struct buffer_head *bh)
			
 
				 {
			
 
				+	int ret;
			
 
				 	if (!buffer_mapped(bh) || buffer_freed(bh))
			
 
				 		return 0;
			
 
				 	set_buffer_uptodate(bh);
			
 
				-	return ext4_handle_dirty_metadata(handle, NULL, bh);
			
 
				+	ret = ext4_handle_dirty_metadata(handle, NULL, bh);
			
 
				+	clear_buffer_meta(bh);
			
 
				+	clear_buffer_prio(bh);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				-static int ext4_generic_write_end(struct file *file,
			
 
				-				  struct address_space *mapping,
			
 
				-				  loff_t pos, unsigned len, unsigned copied,
			
 
				-				  struct page *page, void *fsdata)
			
 
				+/*
			
 
				+ * We need to pick up the new inode size which generic_commit_write gave us
			
 
				+ * `file' can be NULL - eg, when called from page_symlink().
			
 
				+ *
			
 
				+ * ext4 never places buffers on inode->i_mapping->private_list.  metadata
			
 
				+ * buffers are managed internally.
			
 
				+ */
			
 
				+static int ext4_write_end(struct file *file,
			
 
				+			  struct address_space *mapping,
			
 
				+			  loff_t pos, unsigned len, unsigned copied,
			
 
				+			  struct page *page, void *fsdata)
			
 
				 {
			
 
				-	int i_size_changed = 0;
			
 
				-	struct inode *inode = mapping->host;
			
 
				 	handle_t *handle = ext4_journal_current_handle();
			
 
				+	struct inode *inode = mapping->host;
			
 
				+	int ret = 0, ret2;
			
 
				+	int i_size_changed = 0;
			
 
				+
			
 
				+	trace_ext4_write_end(inode, pos, len, copied);
			
 
				+	if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
			
 
				+		ret = ext4_jbd2_file_inode(handle, inode);
			
 
				+		if (ret) {
			
 
				+			unlock_page(page);
			
 
				+			page_cache_release(page);
			
 
				+			goto errout;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	if (ext4_has_inline_data(inode))
			
 
				 		copied = ext4_write_inline_data_end(inode, pos, len,
			
@@ -1105,7 +1126,7 @@ static int ext4_generic_write_end(struct file *file,
 
				 
			
 
				 	/*
			
 
				 	 * No need to use i_size_read() here, the i_size
			
 
				-	 * cannot change under us because we hold i_mutex.
			
 
				+	 * cannot change under us because we hole i_mutex.
			
 
				 	 *
			
 
				 	 * But it's important to update i_size while still holding page lock:
			
 
				 	 * page writeout could otherwise come in and zero beyond i_size.
			
@@ -1115,10 +1136,10 @@ static int ext4_generic_write_end(struct file *file,
 
				 		i_size_changed = 1;
			
 
				 	}
			
 
				 
			
 
				-	if (pos + copied >  EXT4_I(inode)->i_disksize) {
			
 
				+	if (pos + copied > EXT4_I(inode)->i_disksize) {
			
 
				 		/* We need to mark inode dirty even if
			
 
				 		 * new_i_size is less that inode->i_size
			
 
				-		 * bu greater than i_disksize.(hint delalloc)
			
 
				+		 * but greater than i_disksize. (hint delalloc)
			
 
				 		 */
			
 
				 		ext4_update_i_disksize(inode, (pos + copied));
			
 
				 		i_size_changed = 1;
			
@@ -1135,87 +1156,15 @@ static int ext4_generic_write_end(struct file *file,
 
				 	if (i_size_changed)
			
 
				 		ext4_mark_inode_dirty(handle, inode);
			
 
				 
			
 
				-	return copied;
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * We need to pick up the new inode size which generic_commit_write gave us
			
 
				- * `file' can be NULL - eg, when called from page_symlink().
			
 
				- *
			
 
				- * ext4 never places buffers on inode->i_mapping->private_list.  metadata
			
 
				- * buffers are managed internally.
			
 
				- */
			
 
				-static int ext4_ordered_write_end(struct file *file,
			
 
				-				  struct address_space *mapping,
			
 
				-				  loff_t pos, unsigned len, unsigned copied,
			
 
				-				  struct page *page, void *fsdata)
			
 
				-{
			
 
				-	handle_t *handle = ext4_journal_current_handle();
			
 
				-	struct inode *inode = mapping->host;
			
 
				-	int ret = 0, ret2;
			
 
				-
			
 
				-	trace_ext4_ordered_write_end(inode, pos, len, copied);
			
 
				-	ret = ext4_jbd2_file_inode(handle, inode);
			
 
				-
			
 
				-	if (ret == 0) {
			
 
				-		ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
			
 
				-							page, fsdata);
			
 
				-		copied = ret2;
			
 
				-		if (pos + len > inode->i_size && ext4_can_truncate(inode))
			
 
				-			/* if we have allocated more blocks and copied
			
 
				-			 * less. We will have blocks allocated outside
			
 
				-			 * inode->i_size. So truncate them
			
 
				-			 */
			
 
				-			ext4_orphan_add(handle, inode);
			
 
				-		if (ret2 < 0)
			
 
				-			ret = ret2;
			
 
				-	} else {
			
 
				-		unlock_page(page);
			
 
				-		page_cache_release(page);
			
 
				-	}
			
 
				-
			
 
				-	ret2 = ext4_journal_stop(handle);
			
 
				-	if (!ret)
			
 
				-		ret = ret2;
			
 
				-
			
 
				-	if (pos + len > inode->i_size) {
			
 
				-		ext4_truncate_failed_write(inode);
			
 
				-		/*
			
 
				-		 * If truncate failed early the inode might still be
			
 
				-		 * on the orphan list; we need to make sure the inode
			
 
				-		 * is removed from the orphan list in that case.
			
 
				-		 */
			
 
				-		if (inode->i_nlink)
			
 
				-			ext4_orphan_del(NULL, inode);
			
 
				-	}
			
 
				-
			
 
				-
			
 
				-	return ret ? ret : copied;
			
 
				-}
			
 
				-
			
 
				-static int ext4_writeback_write_end(struct file *file,
			
 
				-				    struct address_space *mapping,
			
 
				-				    loff_t pos, unsigned len, unsigned copied,
			
 
				-				    struct page *page, void *fsdata)
			
 
				-{
			
 
				-	handle_t *handle = ext4_journal_current_handle();
			
 
				-	struct inode *inode = mapping->host;
			
 
				-	int ret = 0, ret2;
			
 
				-
			
 
				-	trace_ext4_writeback_write_end(inode, pos, len, copied);
			
 
				-	ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
			
 
				-							page, fsdata);
			
 
				-	copied = ret2;
			
 
				+	if (copied < 0)
			
 
				+		ret = copied;
			
 
				 	if (pos + len > inode->i_size && ext4_can_truncate(inode))
			
 
				 		/* if we have allocated more blocks and copied
			
 
				 		 * less. We will have blocks allocated outside
			
 
				 		 * inode->i_size. So truncate them
			
 
				 		 */
			
 
				 		ext4_orphan_add(handle, inode);
			
 
				-
			
 
				-	if (ret2 < 0)
			
 
				-		ret = ret2;
			
 
				-
			
 
				+errout:
			
 
				 	ret2 = ext4_journal_stop(handle);
			
 
				 	if (!ret)
			
 
				 		ret = ret2;
			
@@ -1538,7 +1487,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 
				 	struct ext4_io_submit io_submit;
			
 
				 
			
 
				 	BUG_ON(mpd->next_page <= mpd->first_page);
			
 
				-	memset(&io_submit, 0, sizeof(io_submit));
			
 
				+	ext4_io_submit_init(&io_submit, mpd->wbc);
			
 
				+	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				+	if (!io_submit.io_end)
			
 
				+		return -ENOMEM;
			
 
				 	/*
			
 
				 	 * We need to start from the first_page to the next_page - 1
			
 
				 	 * to make sure we also write the mapped dirty buffer_heads.
			
@@ -1626,6 +1578,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 
				 		pagevec_release(&pvec);
			
 
				 	}
			
 
				 	ext4_io_submit(&io_submit);
			
 
				+	/* Drop io_end reference we got from init */
			
 
				+	ext4_put_io_end_defer(io_submit.io_end);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -1670,22 +1624,25 @@ static void ext4_print_free_blocks(struct inode *inode)
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				+	struct ext4_inode_info *ei = EXT4_I(inode);
			
 
				 
			
 
				 	ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
			
 
				 	       EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				-			ext4_count_free_clusters(inode->i_sb)));
			
 
				+			ext4_count_free_clusters(sb)));
			
 
				 	ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
			
 
				 	ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
			
 
				-	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				+	       (long long) EXT4_C2B(EXT4_SB(sb),
			
 
				 		percpu_counter_sum(&sbi->s_freeclusters_counter)));
			
 
				 	ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
			
 
				-	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				+	       (long long) EXT4_C2B(EXT4_SB(sb),
			
 
				 		percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
			
 
				 	ext4_msg(sb, KERN_CRIT, "Block reservation details");
			
 
				 	ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
			
 
				-		 EXT4_I(inode)->i_reserved_data_blocks);
			
 
				+		 ei->i_reserved_data_blocks);
			
 
				 	ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
			
 
				-	       EXT4_I(inode)->i_reserved_meta_blocks);
			
 
				+	       ei->i_reserved_meta_blocks);
			
 
				+	ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
			
 
				+	       ei->i_allocated_meta_blocks);
			
 
				 	return;
			
 
				 }
			
 
				 
			
@@ -1740,12 +1697,21 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 
				 	 */
			
 
				 	map.m_lblk = next;
			
 
				 	map.m_len = max_blocks;
			
 
				-	get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
			
 
				+	/*
			
 
				+	 * We're in delalloc path and it is possible that we're going to
			
 
				+	 * need more metadata blocks than previously reserved. However
			
 
				+	 * we must not fail because we're in writeback and there is
			
 
				+	 * nothing we can do about it so it might result in data loss.
			
 
				+	 * So use reserved blocks to allocate metadata if possible.
			
 
				+	 */
			
 
				+	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
			
 
				+			   EXT4_GET_BLOCKS_METADATA_NOFAIL;
			
 
				 	if (ext4_should_dioread_nolock(mpd->inode))
			
 
				 		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
			
 
				 	if (mpd->b_state & (1 << BH_Delay))
			
 
				 		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
			
 
				 
			
 
				+
			
 
				 	blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
			
 
				 	if (blks < 0) {
			
 
				 		struct super_block *sb = mpd->inode->i_sb;
			
@@ -2272,9 +2238,16 @@ static int ext4_writepage(struct page *page,
 
				 		 */
			
 
				 		return __ext4_journalled_writepage(page, len);
			
 
				 
			
 
				-	memset(&io_submit, 0, sizeof(io_submit));
			
 
				+	ext4_io_submit_init(&io_submit, wbc);
			
 
				+	io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				+	if (!io_submit.io_end) {
			
 
				+		redirty_page_for_writepage(wbc, page);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				 	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
			
 
				 	ext4_io_submit(&io_submit);
			
 
				+	/* Drop io_end reference we got from init */
			
 
				+	ext4_put_io_end_defer(io_submit.io_end);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -2661,7 +2634,7 @@ out_writepages:
 
				 
			
 
				 static int ext4_nonda_switch(struct super_block *sb)
			
 
				 {
			
 
				-	s64 free_blocks, dirty_blocks;
			
 
				+	s64 free_clusters, dirty_clusters;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 
			
 
				 	/*
			
@@ -2672,17 +2645,18 @@ static int ext4_nonda_switch(struct super_block *sb)
 
				 	 * Delalloc need an accurate free block accounting. So switch
			
 
				 	 * to non delalloc when we are near to error range.
			
 
				 	 */
			
 
				-	free_blocks  = EXT4_C2B(sbi,
			
 
				-		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
			
 
				-	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
			
 
				+	free_clusters =
			
 
				+		percpu_counter_read_positive(&sbi->s_freeclusters_counter);
			
 
				+	dirty_clusters =
			
 
				+		percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
			
 
				 	/*
			
 
				 	 * Start pushing delalloc when 1/2 of free blocks are dirty.
			
 
				 	 */
			
 
				-	if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
			
 
				+	if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
			
 
				 		try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
			
 
				 
			
 
				-	if (2 * free_blocks < 3 * dirty_blocks ||
			
 
				-		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
			
 
				+	if (2 * free_clusters < 3 * dirty_clusters ||
			
 
				+	    free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
			
 
				 		/*
			
 
				 		 * free block count is less than 150% of dirty blocks
			
 
				 		 * or free blocks is less than watermark
			
@@ -2818,18 +2792,9 @@ static int ext4_da_write_end(struct file *file,
 
				 	unsigned long start, end;
			
 
				 	int write_mode = (int)(unsigned long)fsdata;
			
 
				 
			
 
				-	if (write_mode == FALL_BACK_TO_NONDELALLOC) {
			
 
				-		switch (ext4_inode_journal_mode(inode)) {
			
 
				-		case EXT4_INODE_ORDERED_DATA_MODE:
			
 
				-			return ext4_ordered_write_end(file, mapping, pos,
			
 
				-					len, copied, page, fsdata);
			
 
				-		case EXT4_INODE_WRITEBACK_DATA_MODE:
			
 
				-			return ext4_writeback_write_end(file, mapping, pos,
			
 
				-					len, copied, page, fsdata);
			
 
				-		default:
			
 
				-			BUG();
			
 
				-		}
			
 
				-	}
			
 
				+	if (write_mode == FALL_BACK_TO_NONDELALLOC)
			
 
				+		return ext4_write_end(file, mapping, pos,
			
 
				+				      len, copied, page, fsdata);
			
 
				 
			
 
				 	trace_ext4_da_write_end(inode, pos, len, copied);
			
 
				 	start = pos & (PAGE_CACHE_SIZE - 1);
			
@@ -3113,9 +3078,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
				 	struct inode *inode = file_inode(iocb->ki_filp);
			
 
				         ext4_io_end_t *io_end = iocb->private;
			
 
				 
			
 
				-	/* if not async direct IO or dio with 0 bytes write, just return */
			
 
				-	if (!io_end || !size)
			
 
				-		goto out;
			
 
				+	/* if not async direct IO just return */
			
 
				+	if (!io_end) {
			
 
				+		inode_dio_done(inode);
			
 
				+		if (is_async)
			
 
				+			aio_complete(iocb, ret, 0);
			
 
				+		return;
			
 
				+	}
			
 
				 
			
 
				 	ext_debug("ext4_end_io_dio(): io_end 0x%p "
			
 
				 		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
			
@@ -3123,25 +3092,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
				 		  size);
			
 
				 
			
 
				 	iocb->private = NULL;
			
 
				-
			
 
				-	/* if not aio dio with unwritten extents, just free io and return */
			
 
				-	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
			
 
				-		ext4_free_io_end(io_end);
			
 
				-out:
			
 
				-		inode_dio_done(inode);
			
 
				-		if (is_async)
			
 
				-			aio_complete(iocb, ret, 0);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				 	io_end->offset = offset;
			
 
				 	io_end->size = size;
			
 
				 	if (is_async) {
			
 
				 		io_end->iocb = iocb;
			
 
				 		io_end->result = ret;
			
 
				 	}
			
 
				-
			
 
				-	ext4_add_complete_io(io_end);
			
 
				+	ext4_put_io_end_defer(io_end);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3175,6 +3132,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
				 	get_block_t *get_block_func = NULL;
			
 
				 	int dio_flags = 0;
			
 
				 	loff_t final_size = offset + count;
			
 
				+	ext4_io_end_t *io_end = NULL;
			
 
				 
			
 
				 	/* Use the old path for reads and writes beyond i_size. */
			
 
				 	if (rw != WRITE || final_size > inode->i_size)
			
@@ -3213,13 +3171,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
				 	iocb->private = NULL;
			
 
				 	ext4_inode_aio_set(inode, NULL);
			
 
				 	if (!is_sync_kiocb(iocb)) {
			
 
				-		ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				+		io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				 		if (!io_end) {
			
 
				 			ret = -ENOMEM;
			
 
				 			goto retake_lock;
			
 
				 		}
			
 
				 		io_end->flag |= EXT4_IO_END_DIRECT;
			
 
				-		iocb->private = io_end;
			
 
				+		/*
			
 
				+		 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
			
 
				+		 */
			
 
				+		iocb->private = ext4_get_io_end(io_end);
			
 
				 		/*
			
 
				 		 * we save the io structure for current async direct
			
 
				 		 * IO, so that later ext4_map_blocks() could flag the
			
@@ -3243,26 +3204,27 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
				 				   NULL,
			
 
				 				   dio_flags);
			
 
				 
			
 
				-	if (iocb->private)
			
 
				-		ext4_inode_aio_set(inode, NULL);
			
 
				 	/*
			
 
				-	 * The io_end structure takes a reference to the inode, that
			
 
				-	 * structure needs to be destroyed and the reference to the
			
 
				-	 * inode need to be dropped, when IO is complete, even with 0
			
 
				-	 * byte write, or failed.
			
 
				-	 *
			
 
				-	 * In the successful AIO DIO case, the io_end structure will
			
 
				-	 * be destroyed and the reference to the inode will be dropped
			
 
				-	 * after the end_io call back function is called.
			
 
				-	 *
			
 
				-	 * In the case there is 0 byte write, or error case, since VFS
			
 
				-	 * direct IO won't invoke the end_io call back function, we
			
 
				-	 * need to free the end_io structure here.
			
 
				+	 * Put our reference to io_end. This can free the io_end structure e.g.
			
 
				+	 * in sync IO case or in case of error. It can even perform extent
			
 
				+	 * conversion if all bios we submitted finished before we got here.
			
 
				+	 * Note that in that case iocb->private can be already set to NULL
			
 
				+	 * here.
			
 
				 	 */
			
 
				-	if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
			
 
				-		ext4_free_io_end(iocb->private);
			
 
				-		iocb->private = NULL;
			
 
				-	} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
			
 
				+	if (io_end) {
			
 
				+		ext4_inode_aio_set(inode, NULL);
			
 
				+		ext4_put_io_end(io_end);
			
 
				+		/*
			
 
				+		 * In case of error or no write ext4_end_io_dio() was not
			
 
				+		 * called so we have to put iocb's reference.
			
 
				+		 */
			
 
				+		if (ret <= 0 && ret != -EIOCBQUEUED) {
			
 
				+			WARN_ON(iocb->private != io_end);
			
 
				+			ext4_put_io_end(io_end);
			
 
				+			iocb->private = NULL;
			
 
				+		}
			
 
				+	}
			
 
				+	if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
			
 
				 						EXT4_STATE_DIO_UNWRITTEN)) {
			
 
				 		int err;
			
 
				 		/*
			
@@ -3334,27 +3296,12 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 
				 	return __set_page_dirty_nobuffers(page);
			
 
				 }
			
 
				 
			
 
				-static const struct address_space_operations ext4_ordered_aops = {
			
 
				+static const struct address_space_operations ext4_aops = {
			
 
				 	.readpage		= ext4_readpage,
			
 
				 	.readpages		= ext4_readpages,
			
 
				 	.writepage		= ext4_writepage,
			
 
				 	.write_begin		= ext4_write_begin,
			
 
				-	.write_end		= ext4_ordered_write_end,
			
 
				-	.bmap			= ext4_bmap,
			
 
				-	.invalidatepage		= ext4_invalidatepage,
			
 
				-	.releasepage		= ext4_releasepage,
			
 
				-	.direct_IO		= ext4_direct_IO,
			
 
				-	.migratepage		= buffer_migrate_page,
			
 
				-	.is_partially_uptodate  = block_is_partially_uptodate,
			
 
				-	.error_remove_page	= generic_error_remove_page,
			
 
				-};
			
 
				-
			
 
				-static const struct address_space_operations ext4_writeback_aops = {
			
 
				-	.readpage		= ext4_readpage,
			
 
				-	.readpages		= ext4_readpages,
			
 
				-	.writepage		= ext4_writepage,
			
 
				-	.write_begin		= ext4_write_begin,
			
 
				-	.write_end		= ext4_writeback_write_end,
			
 
				+	.write_end		= ext4_write_end,
			
 
				 	.bmap			= ext4_bmap,
			
 
				 	.invalidatepage		= ext4_invalidatepage,
			
 
				 	.releasepage		= ext4_releasepage,
			
@@ -3399,23 +3346,21 @@ void ext4_set_aops(struct inode *inode)
 
				 {
			
 
				 	switch (ext4_inode_journal_mode(inode)) {
			
 
				 	case EXT4_INODE_ORDERED_DATA_MODE:
			
 
				-		if (test_opt(inode->i_sb, DELALLOC))
			
 
				-			inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				-		else
			
 
				-			inode->i_mapping->a_ops = &ext4_ordered_aops;
			
 
				+		ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE);
			
 
				 		break;
			
 
				 	case EXT4_INODE_WRITEBACK_DATA_MODE:
			
 
				-		if (test_opt(inode->i_sb, DELALLOC))
			
 
				-			inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				-		else
			
 
				-			inode->i_mapping->a_ops = &ext4_writeback_aops;
			
 
				+		ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE);
			
 
				 		break;
			
 
				 	case EXT4_INODE_JOURNAL_DATA_MODE:
			
 
				 		inode->i_mapping->a_ops = &ext4_journalled_aops;
			
 
				-		break;
			
 
				+		return;
			
 
				 	default:
			
 
				 		BUG();
			
 
				 	}
			
 
				+	if (test_opt(inode->i_sb, DELALLOC))
			
 
				+		inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				+	else
			
 
				+		inode->i_mapping->a_ops = &ext4_aops;
			
 
				 }
			
 
				 
			
 
				 
			
@@ -3646,20 +3591,190 @@ int ext4_can_truncate(struct inode *inode)
 
				 int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
			
 
				 {
			
 
				 	struct inode *inode = file_inode(file);
			
 
				+	struct super_block *sb = inode->i_sb;
			
 
				+	ext4_lblk_t first_block, stop_block;
			
 
				+	struct address_space *mapping = inode->i_mapping;
			
 
				+	loff_t first_page, last_page, page_len;
			
 
				+	loff_t first_page_offset, last_page_offset;
			
 
				+	handle_t *handle;
			
 
				+	unsigned int credits;
			
 
				+	int ret = 0;
			
 
				+
			
 
				 	if (!S_ISREG(inode->i_mode))
			
 
				 		return -EOPNOTSUPP;
			
 
				 
			
 
				-	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
			
 
				-		return ext4_ind_punch_hole(file, offset, length);
			
 
				-
			
 
				-	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
			
 
				+	if (EXT4_SB(sb)->s_cluster_ratio > 1) {
			
 
				 		/* TODO: Add support for bigalloc file systems */
			
 
				 		return -EOPNOTSUPP;
			
 
				 	}
			
 
				 
			
 
				 	trace_ext4_punch_hole(inode, offset, length);
			
 
				 
			
 
				-	return ext4_ext_punch_hole(file, offset, length);
			
 
				+	/*
			
 
				+	 * Write out all dirty pages to avoid race conditions
			
 
				+	 * Then release them.
			
 
				+	 */
			
 
				+	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
			
 
				+		ret = filemap_write_and_wait_range(mapping, offset,
			
 
				+						   offset + length - 1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	mutex_lock(&inode->i_mutex);
			
 
				+	/* It's not possible punch hole on append only file */
			
 
				+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
			
 
				+		ret = -EPERM;
			
 
				+		goto out_mutex;
			
 
				+	}
			
 
				+	if (IS_SWAPFILE(inode)) {
			
 
				+		ret = -ETXTBSY;
			
 
				+		goto out_mutex;
			
 
				+	}
			
 
				+
			
 
				+	/* No need to punch hole beyond i_size */
			
 
				+	if (offset >= inode->i_size)
			
 
				+		goto out_mutex;
			
 
				+
			
 
				+	/*
			
 
				+	 * If the hole extends beyond i_size, set the hole
			
 
				+	 * to end after the page that contains i_size
			
 
				+	 */
			
 
				+	if (offset + length > inode->i_size) {
			
 
				+		length = inode->i_size +
			
 
				+		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
			
 
				+		   offset;
			
 
				+	}
			
 
				+
			
 
				+	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				+	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+	first_page_offset = first_page << PAGE_CACHE_SHIFT;
			
 
				+	last_page_offset = last_page << PAGE_CACHE_SHIFT;
			
 
				+
			
 
				+	/* Now release the pages */
			
 
				+	if (last_page_offset > first_page_offset) {
			
 
				+		truncate_pagecache_range(inode, first_page_offset,
			
 
				+					 last_page_offset - 1);
			
 
				+	}
			
 
				+
			
 
				+	/* Wait all existing dio workers, newcomers will block on i_mutex */
			
 
				+	ext4_inode_block_unlocked_dio(inode);
			
 
				+	ret = ext4_flush_unwritten_io(inode);
			
 
				+	if (ret)
			
 
				+		goto out_dio;
			
 
				+	inode_dio_wait(inode);
			
 
				+
			
 
				+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
			
 
				+		credits = ext4_writepage_trans_blocks(inode);
			
 
				+	else
			
 
				+		credits = ext4_blocks_for_truncate(inode);
			
 
				+	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		ret = PTR_ERR(handle);
			
 
				+		ext4_std_error(sb, ret);
			
 
				+		goto out_dio;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Now we need to zero out the non-page-aligned data in the
			
 
				+	 * pages at the start and tail of the hole, and unmap the
			
 
				+	 * buffer heads for the block aligned regions of the page that
			
 
				+	 * were completely zeroed.
			
 
				+	 */
			
 
				+	if (first_page > last_page) {
			
 
				+		/*
			
 
				+		 * If the file space being truncated is contained
			
 
				+		 * within a page just zero out and unmap the middle of
			
 
				+		 * that page
			
 
				+		 */
			
 
				+		ret = ext4_discard_partial_page_buffers(handle,
			
 
				+			mapping, offset, length, 0);
			
 
				+
			
 
				+		if (ret)
			
 
				+			goto out_stop;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * zero out and unmap the partial page that contains
			
 
				+		 * the start of the hole
			
 
				+		 */
			
 
				+		page_len = first_page_offset - offset;
			
 
				+		if (page_len > 0) {
			
 
				+			ret = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				+						offset, page_len, 0);
			
 
				+			if (ret)
			
 
				+				goto out_stop;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * zero out and unmap the partial page that contains
			
 
				+		 * the end of the hole
			
 
				+		 */
			
 
				+		page_len = offset + length - last_page_offset;
			
 
				+		if (page_len > 0) {
			
 
				+			ret = ext4_discard_partial_page_buffers(handle, mapping,
			
 
				+					last_page_offset, page_len, 0);
			
 
				+			if (ret)
			
 
				+				goto out_stop;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If i_size is contained in the last page, we need to
			
 
				+	 * unmap and zero the partial page after i_size
			
 
				+	 */
			
 
				+	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
			
 
				+	   inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				+		page_len = PAGE_CACHE_SIZE -
			
 
				+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				+
			
 
				+		if (page_len > 0) {
			
 
				+			ret = ext4_discard_partial_page_buffers(handle,
			
 
				+					mapping, inode->i_size, page_len, 0);
			
 
				+
			
 
				+			if (ret)
			
 
				+				goto out_stop;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	first_block = (offset + sb->s_blocksize - 1) >>
			
 
				+		EXT4_BLOCK_SIZE_BITS(sb);
			
 
				+	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				+
			
 
				+	/* If there are no blocks to remove, return now */
			
 
				+	if (first_block >= stop_block)
			
 
				+		goto out_stop;
			
 
				+
			
 
				+	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				+	ext4_discard_preallocations(inode);
			
 
				+
			
 
				+	ret = ext4_es_remove_extent(inode, first_block,
			
 
				+				    stop_block - first_block);
			
 
				+	if (ret) {
			
 
				+		up_write(&EXT4_I(inode)->i_data_sem);
			
 
				+		goto out_stop;
			
 
				+	}
			
 
				+
			
 
				+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
			
 
				+		ret = ext4_ext_remove_space(inode, first_block,
			
 
				+					    stop_block - 1);
			
 
				+	else
			
 
				+		ret = ext4_free_hole_blocks(handle, inode, first_block,
			
 
				+					    stop_block);
			
 
				+
			
 
				+	ext4_discard_preallocations(inode);
			
 
				+	up_write(&EXT4_I(inode)->i_data_sem);
			
 
				+	if (IS_SYNC(inode))
			
 
				+		ext4_handle_sync(handle);
			
 
				+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				+	ext4_mark_inode_dirty(handle, inode);
			
 
				+out_stop:
			
 
				+	ext4_journal_stop(handle);
			
 
				+out_dio:
			
 
				+	ext4_inode_resume_unlocked_dio(inode);
			
 
				+out_mutex:
			
 
				+	mutex_unlock(&inode->i_mutex);
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3692,6 +3807,19 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				  */
			
 
				 void ext4_truncate(struct inode *inode)
			
 
				 {
			
 
				+	struct ext4_inode_info *ei = EXT4_I(inode);
			
 
				+	unsigned int credits;
			
 
				+	handle_t *handle;
			
 
				+	struct address_space *mapping = inode->i_mapping;
			
 
				+	loff_t page_len;
			
 
				+
			
 
				+	/*
			
 
				+	 * There is a possibility that we're either freeing the inode
			
 
				+	 * or it completely new indode. In those cases we might not
			
 
				+	 * have i_mutex locked because it's not necessary.
			
 
				+	 */
			
 
				+	if (!(inode->i_state & (I_NEW|I_FREEING)))
			
 
				+		WARN_ON(!mutex_is_locked(&inode->i_mutex));
			
 
				 	trace_ext4_truncate_enter(inode);
			
 
				 
			
 
				 	if (!ext4_can_truncate(inode))
			
@@ -3710,10 +3838,72 @@ void ext4_truncate(struct inode *inode)
 
				 			return;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * finish any pending end_io work so we won't run the risk of
			
 
				+	 * converting any truncated blocks to initialized later
			
 
				+	 */
			
 
				+	ext4_flush_unwritten_io(inode);
			
 
				+
			
 
				+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
			
 
				+		credits = ext4_writepage_trans_blocks(inode);
			
 
				+	else
			
 
				+		credits = ext4_blocks_for_truncate(inode);
			
 
				+
			
 
				+	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		ext4_std_error(inode->i_sb, PTR_ERR(handle));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (inode->i_size % PAGE_CACHE_SIZE != 0) {
			
 
				+		page_len = PAGE_CACHE_SIZE -
			
 
				+			(inode->i_size & (PAGE_CACHE_SIZE - 1));
			
 
				+
			
 
				+		if (ext4_discard_partial_page_buffers(handle,
			
 
				+				mapping, inode->i_size, page_len, 0))
			
 
				+			goto out_stop;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We add the inode to the orphan list, so that if this
			
 
				+	 * truncate spans multiple transactions, and we crash, we will
			
 
				+	 * resume the truncate when the filesystem recovers.  It also
			
 
				+	 * marks the inode dirty, to catch the new size.
			
 
				+	 *
			
 
				+	 * Implication: the file must always be in a sane, consistent
			
 
				+	 * truncatable state while each transaction commits.
			
 
				+	 */
			
 
				+	if (ext4_orphan_add(handle, inode))
			
 
				+		goto out_stop;
			
 
				+
			
 
				+	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				+
			
 
				+	ext4_discard_preallocations(inode);
			
 
				+
			
 
				 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
			
 
				-		ext4_ext_truncate(inode);
			
 
				+		ext4_ext_truncate(handle, inode);
			
 
				 	else
			
 
				-		ext4_ind_truncate(inode);
			
 
				+		ext4_ind_truncate(handle, inode);
			
 
				+
			
 
				+	up_write(&ei->i_data_sem);
			
 
				+
			
 
				+	if (IS_SYNC(inode))
			
 
				+		ext4_handle_sync(handle);
			
 
				+
			
 
				+out_stop:
			
 
				+	/*
			
 
				+	 * If this was a simple ftruncate() and the file will remain alive,
			
 
				+	 * then we need to clear up the orphan record which we created above.
			
 
				+	 * However, if this was a real unlink then we were called by
			
 
				+	 * ext4_delete_inode(), and we allow that function to clean up the
			
 
				+	 * orphan info for us.
			
 
				+	 */
			
 
				+	if (inode->i_nlink)
			
 
				+		ext4_orphan_del(handle, inode);
			
 
				+
			
 
				+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
			
 
				+	ext4_mark_inode_dirty(handle, inode);
			
 
				+	ext4_journal_stop(handle);
			
 
				 
			
 
				 	trace_ext4_truncate_exit(inode);
			
 
				 }
			
@@ -3821,13 +4011,14 @@ make_io:
 
				 		if (EXT4_SB(sb)->s_inode_readahead_blks) {
			
 
				 			ext4_fsblk_t b, end, table;
			
 
				 			unsigned num;
			
 
				+			__u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
			
 
				 
			
 
				 			table = ext4_inode_table(sb, gdp);
			
 
				 			/* s_inode_readahead_blks is always a power of 2 */
			
 
				-			b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
			
 
				+			b = block & ~((ext4_fsblk_t) ra_blks - 1);
			
 
				 			if (table > b)
			
 
				 				b = table;
			
 
				-			end = b + EXT4_SB(sb)->s_inode_readahead_blks;
			
 
				+			end = b + ra_blks;
			
 
				 			num = EXT4_INODES_PER_GROUP(sb);
			
 
				 			if (ext4_has_group_desc_csum(sb))
			
 
				 				num -= ext4_itable_unused_count(sb, gdp);
			
@@ -4024,8 +4215,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
				 	 * NeilBrown 1999oct15
			
 
				 	 */
			
 
				 	if (inode->i_nlink == 0) {
			
 
				-		if (inode->i_mode == 0 ||
			
 
				-		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
			
 
				+		if ((inode->i_mode == 0 ||
			
 
				+		     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
			
 
				+		    ino != EXT4_BOOT_LOADER_INO) {
			
 
				 			/* this inode is deleted */
			
 
				 			ret = -ESTALE;
			
 
				 			goto bad_inode;
			
@@ -4033,7 +4225,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
				 		/* The only unlinked inodes we let through here have
			
 
				 		 * valid i_mode and are being read by the orphan
			
 
				 		 * recovery code: that's fine, we're about to complete
			
 
				-		 * the process of deleting those. */
			
 
				+		 * the process of deleting those.
			
 
				+		 * OR it is the EXT4_BOOT_LOADER_INO which is
			
 
				+		 * not initialized on a new filesystem. */
			
 
				 	}
			
 
				 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
			
 
				 	inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
			
@@ -4153,6 +4347,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 
				 		else
			
 
				 			init_special_inode(inode, inode->i_mode,
			
 
				 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
			
 
				+	} else if (ino == EXT4_BOOT_LOADER_INO) {
			
 
				+		make_bad_inode(inode);
			
 
				 	} else {
			
 
				 		ret = -EIO;
			
 
				 		EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
			
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -17,9 +17,201 @@
 
				 #include <asm/uaccess.h>
			
 
				 #include "ext4_jbd2.h"
			
 
				 #include "ext4.h"
			
 
				+#include "ext4_extents.h"
			
 
				 
			
 
				 #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
			
 
				 
			
 
				+/**
			
 
				+ * Swap memory between @a and @b for @len bytes.
			
 
				+ *
			
 
				+ * @a:          pointer to first memory area
			
 
				+ * @b:          pointer to second memory area
			
 
				+ * @len:        number of bytes to swap
			
 
				+ *
			
 
				+ */
			
 
				+static void memswap(void *a, void *b, size_t len)
			
 
				+{
			
 
				+	unsigned char *ap, *bp;
			
 
				+	unsigned char tmp;
			
 
				+
			
 
				+	ap = (unsigned char *)a;
			
 
				+	bp = (unsigned char *)b;
			
 
				+	while (len-- > 0) {
			
 
				+		tmp = *ap;
			
 
				+		*ap = *bp;
			
 
				+		*bp = tmp;
			
 
				+		ap++;
			
 
				+		bp++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Swap i_data and associated attributes between @inode1 and @inode2.
			
 
				+ * This function is used for the primary swap between inode1 and inode2
			
 
				+ * and also to revert this primary swap in case of errors.
			
 
				+ *
			
 
				+ * Therefore you have to make sure, that calling this method twice
			
 
				+ * will revert all changes.
			
 
				+ *
			
 
				+ * @inode1:     pointer to first inode
			
 
				+ * @inode2:     pointer to second inode
			
 
				+ */
			
 
				+static void swap_inode_data(struct inode *inode1, struct inode *inode2)
			
 
				+{
			
 
				+	loff_t isize;
			
 
				+	struct ext4_inode_info *ei1;
			
 
				+	struct ext4_inode_info *ei2;
			
 
				+
			
 
				+	ei1 = EXT4_I(inode1);
			
 
				+	ei2 = EXT4_I(inode2);
			
 
				+
			
 
				+	memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
			
 
				+	memswap(&inode1->i_version, &inode2->i_version,
			
 
				+		  sizeof(inode1->i_version));
			
 
				+	memswap(&inode1->i_blocks, &inode2->i_blocks,
			
 
				+		  sizeof(inode1->i_blocks));
			
 
				+	memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
			
 
				+	memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
			
 
				+	memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));
			
 
				+
			
 
				+	memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
			
 
				+	memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
			
 
				+	memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
			
 
				+	memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
			
 
				+	memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
			
 
				+
			
 
				+	isize = i_size_read(inode1);
			
 
				+	i_size_write(inode1, i_size_read(inode2));
			
 
				+	i_size_write(inode2, isize);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Swap the information from the given @inode and the inode
			
 
				+ * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
			
 
				+ * important fields of the inodes.
			
 
				+ *
			
 
				+ * @sb:         the super block of the filesystem
			
 
				+ * @inode:      the inode to swap with EXT4_BOOT_LOADER_INO
			
 
				+ *
			
 
				+ */
			
 
				+static long swap_inode_boot_loader(struct super_block *sb,
			
 
				+				struct inode *inode)
			
 
				+{
			
 
				+	handle_t *handle;
			
 
				+	int err;
			
 
				+	struct inode *inode_bl;
			
 
				+	struct ext4_inode_info *ei;
			
 
				+	struct ext4_inode_info *ei_bl;
			
 
				+	struct ext4_sb_info *sbi;
			
 
				+
			
 
				+	if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) {
			
 
				+		err = -EINVAL;
			
 
				+		goto swap_boot_out;
			
 
				+	}
			
 
				+
			
 
				+	if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
			
 
				+		err = -EPERM;
			
 
				+		goto swap_boot_out;
			
 
				+	}
			
 
				+
			
 
				+	sbi = EXT4_SB(sb);
			
 
				+	ei = EXT4_I(inode);
			
 
				+
			
 
				+	inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
			
 
				+	if (IS_ERR(inode_bl)) {
			
 
				+		err = PTR_ERR(inode_bl);
			
 
				+		goto swap_boot_out;
			
 
				+	}
			
 
				+	ei_bl = EXT4_I(inode_bl);
			
 
				+
			
 
				+	filemap_flush(inode->i_mapping);
			
 
				+	filemap_flush(inode_bl->i_mapping);
			
 
				+
			
 
				+	/* Protect orig inodes against a truncate and make sure,
			
 
				+	 * that only 1 swap_inode_boot_loader is running. */
			
 
				+	ext4_inode_double_lock(inode, inode_bl);
			
 
				+
			
 
				+	truncate_inode_pages(&inode->i_data, 0);
			
 
				+	truncate_inode_pages(&inode_bl->i_data, 0);
			
 
				+
			
 
				+	/* Wait for all existing dio workers */
			
 
				+	ext4_inode_block_unlocked_dio(inode);
			
 
				+	ext4_inode_block_unlocked_dio(inode_bl);
			
 
				+	inode_dio_wait(inode);
			
 
				+	inode_dio_wait(inode_bl);
			
 
				+
			
 
				+	handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		err = -EINVAL;
			
 
				+		goto swap_boot_out;
			
 
				+	}
			
 
				+
			
 
				+	/* Protect extent tree against block allocations via delalloc */
			
 
				+	ext4_double_down_write_data_sem(inode, inode_bl);
			
 
				+
			
 
				+	if (inode_bl->i_nlink == 0) {
			
 
				+		/* this inode has never been used as a BOOT_LOADER */
			
 
				+		set_nlink(inode_bl, 1);
			
 
				+		i_uid_write(inode_bl, 0);
			
 
				+		i_gid_write(inode_bl, 0);
			
 
				+		inode_bl->i_flags = 0;
			
 
				+		ei_bl->i_flags = 0;
			
 
				+		inode_bl->i_version = 1;
			
 
				+		i_size_write(inode_bl, 0);
			
 
				+		inode_bl->i_mode = S_IFREG;
			
 
				+		if (EXT4_HAS_INCOMPAT_FEATURE(sb,
			
 
				+					      EXT4_FEATURE_INCOMPAT_EXTENTS)) {
			
 
				+			ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
			
 
				+			ext4_ext_tree_init(handle, inode_bl);
			
 
				+		} else
			
 
				+			memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
			
 
				+	}
			
 
				+
			
 
				+	swap_inode_data(inode, inode_bl);
			
 
				+
			
 
				+	inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode);
			
 
				+
			
 
				+	spin_lock(&sbi->s_next_gen_lock);
			
 
				+	inode->i_generation = sbi->s_next_generation++;
			
 
				+	inode_bl->i_generation = sbi->s_next_generation++;
			
 
				+	spin_unlock(&sbi->s_next_gen_lock);
			
 
				+
			
 
				+	ext4_discard_preallocations(inode);
			
 
				+
			
 
				+	err = ext4_mark_inode_dirty(handle, inode);
			
 
				+	if (err < 0) {
			
 
				+		ext4_warning(inode->i_sb,
			
 
				+			"couldn't mark inode #%lu dirty (err %d)",
			
 
				+			inode->i_ino, err);
			
 
				+		/* Revert all changes: */
			
 
				+		swap_inode_data(inode, inode_bl);
			
 
				+	} else {
			
 
				+		err = ext4_mark_inode_dirty(handle, inode_bl);
			
 
				+		if (err < 0) {
			
 
				+			ext4_warning(inode_bl->i_sb,
			
 
				+				"couldn't mark inode #%lu dirty (err %d)",
			
 
				+				inode_bl->i_ino, err);
			
 
				+			/* Revert all changes: */
			
 
				+			swap_inode_data(inode, inode_bl);
			
 
				+			ext4_mark_inode_dirty(handle, inode);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ext4_journal_stop(handle);
			
 
				+
			
 
				+	ext4_double_up_write_data_sem(inode, inode_bl);
			
 
				+
			
 
				+	ext4_inode_resume_unlocked_dio(inode);
			
 
				+	ext4_inode_resume_unlocked_dio(inode_bl);
			
 
				+
			
 
				+	ext4_inode_double_unlock(inode, inode_bl);
			
 
				+
			
 
				+	iput(inode_bl);
			
 
				+
			
 
				+swap_boot_out:
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
			
 
				 {
			
 
				 	struct inode *inode = file_inode(filp);
			
@@ -83,17 +275,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
				 			if (!capable(CAP_SYS_RESOURCE))
			
 
				 				goto flags_out;
			
 
				 		}
			
 
				-		if (oldflags & EXT4_EXTENTS_FL) {
			
 
				-			/* We don't support clearning extent flags */
			
 
				-			if (!(flags & EXT4_EXTENTS_FL)) {
			
 
				-				err = -EOPNOTSUPP;
			
 
				-				goto flags_out;
			
 
				-			}
			
 
				-		} else if (flags & EXT4_EXTENTS_FL) {
			
 
				-			/* migrate the file */
			
 
				+		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
			
 
				 			migrate = 1;
			
 
				-			flags &= ~EXT4_EXTENTS_FL;
			
 
				-		}
			
 
				 
			
 
				 		if (flags & EXT4_EOFBLOCKS_FL) {
			
 
				 			/* we don't support adding EOFBLOCKS flag */
			
@@ -137,8 +320,13 @@ flags_err:
 
				 			err = ext4_change_inode_journal_flag(inode, jflag);
			
 
				 		if (err)
			
 
				 			goto flags_out;
			
 
				-		if (migrate)
			
 
				-			err = ext4_ext_migrate(inode);
			
 
				+		if (migrate) {
			
 
				+			if (flags & EXT4_EXTENTS_FL)
			
 
				+				err = ext4_ext_migrate(inode);
			
 
				+			else
			
 
				+				err = ext4_ind_migrate(inode);
			
 
				+		}
			
 
				+
			
 
				 flags_out:
			
 
				 		mutex_unlock(&inode->i_mutex);
			
 
				 		mnt_drop_write_file(filp);
			
@@ -357,9 +545,13 @@ group_add_out:
 
				 		return err;
			
 
				 	}
			
 
				 
			
 
				+	case EXT4_IOC_SWAP_BOOT:
			
 
				+		if (!(filp->f_mode & FMODE_WRITE))
			
 
				+			return -EBADF;
			
 
				+		return swap_inode_boot_loader(sb, inode);
			
 
				+
			
 
				 	case EXT4_IOC_RESIZE_FS: {
			
 
				 		ext4_fsblk_t n_blocks_count;
			
 
				-		struct super_block *sb = inode->i_sb;
			
 
				 		int err = 0, err2 = 0;
			
 
				 		ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
			
 
				 
			
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr)
 
				 	ext4_clear_bit(bit, addr);
			
 
				 }
			
 
				 
			
 
				+static inline int mb_test_and_clear_bit(int bit, void *addr)
			
 
				+{
			
 
				+	addr = mb_correct_addr_and_bit(&bit, addr);
			
 
				+	return ext4_test_and_clear_bit(bit, addr);
			
 
				+}
			
 
				+
			
 
				 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
			
 
				 {
			
 
				 	int fix = 0, ret, tmpmax;
			
@@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 
				 	spin_unlock(&EXT4_SB(sb)->s_bal_lock);
			
 
				 }
			
 
				 
			
 
				+static void mb_regenerate_buddy(struct ext4_buddy *e4b)
			
 
				+{
			
 
				+	int count;
			
 
				+	int order = 1;
			
 
				+	void *buddy;
			
 
				+
			
 
				+	while ((buddy = mb_find_buddy(e4b, order++, &count))) {
			
 
				+		ext4_set_bits(buddy, 0, count);
			
 
				+	}
			
 
				+	e4b->bd_info->bb_fragments = 0;
			
 
				+	memset(e4b->bd_info->bb_counters, 0,
			
 
				+		sizeof(*e4b->bd_info->bb_counters) *
			
 
				+		(e4b->bd_sb->s_blocksize_bits + 2));
			
 
				+
			
 
				+	ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
			
 
				+		e4b->bd_bitmap, e4b->bd_group);
			
 
				+}
			
 
				+
			
 
				 /* The buddy information is attached the buddy cache inode
			
 
				  * for convenience. The information regarding each group
			
 
				  * is loaded via ext4_mb_load_buddy. The information involve
			
@@ -860,8 +884,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 
			
 
				 	first_block = page->index * blocks_per_page;
			
 
				 	for (i = 0; i < blocks_per_page; i++) {
			
 
				-		int group;
			
 
				-
			
 
				 		group = (first_block + i) >> 1;
			
 
				 		if (group >= ngroups)
			
 
				 			break;
			
@@ -1011,6 +1033,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 
				 	struct page *page;
			
 
				 	int ret = 0;
			
 
				 
			
 
				+	might_sleep();
			
 
				 	mb_debug(1, "init group %u\n", group);
			
 
				 	this_grp = ext4_get_group_info(sb, group);
			
 
				 	/*
			
@@ -1082,6 +1105,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	struct inode *inode = sbi->s_buddy_cache;
			
 
				 
			
 
				+	might_sleep();
			
 
				 	mb_debug(1, "load group %u\n", group);
			
 
				 
			
 
				 	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
			
@@ -1244,6 +1268,33 @@ static void mb_clear_bits(void *bm, int cur, int len)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/* clear bits in given range
			
 
				+ * will return first found zero bit if any, -1 otherwise
			
 
				+ */
			
 
				+static int mb_test_and_clear_bits(void *bm, int cur, int len)
			
 
				+{
			
 
				+	__u32 *addr;
			
 
				+	int zero_bit = -1;
			
 
				+
			
 
				+	len = cur + len;
			
 
				+	while (cur < len) {
			
 
				+		if ((cur & 31) == 0 && (len - cur) >= 32) {
			
 
				+			/* fast path: clear whole word at once */
			
 
				+			addr = bm + (cur >> 3);
			
 
				+			if (*addr != (__u32)(-1) && zero_bit == -1)
			
 
				+				zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
			
 
				+			*addr = 0;
			
 
				+			cur += 32;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
			
 
				+			zero_bit = cur;
			
 
				+		cur++;
			
 
				+	}
			
 
				+
			
 
				+	return zero_bit;
			
 
				+}
			
 
				+
			
 
				 void ext4_set_bits(void *bm, int cur, int len)
			
 
				 {
			
 
				 	__u32 *addr;
			
@@ -1262,17 +1313,90 @@ void ext4_set_bits(void *bm, int cur, int len)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * _________________________________________________________________ */
			
 
				+
			
 
				+static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
			
 
				+{
			
 
				+	if (mb_test_bit(*bit + side, bitmap)) {
			
 
				+		mb_clear_bit(*bit, bitmap);
			
 
				+		(*bit) -= side;
			
 
				+		return 1;
			
 
				+	}
			
 
				+	else {
			
 
				+		(*bit) += side;
			
 
				+		mb_set_bit(*bit, bitmap);
			
 
				+		return -1;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
			
 
				+{
			
 
				+	int max;
			
 
				+	int order = 1;
			
 
				+	void *buddy = mb_find_buddy(e4b, order, &max);
			
 
				+
			
 
				+	while (buddy) {
			
 
				+		void *buddy2;
			
 
				+
			
 
				+		/* Bits in range [first; last] are known to be set since
			
 
				+		 * corresponding blocks were allocated. Bits in range
			
 
				+		 * (first; last) will stay set because they form buddies on
			
 
				+		 * upper layer. We just deal with borders if they don't
			
 
				+		 * align with upper layer and then go up.
			
 
				+		 * Releasing entire group is all about clearing
			
 
				+		 * single bit of highest order buddy.
			
 
				+		 */
			
 
				+
			
 
				+		/* Example:
			
 
				+		 * ---------------------------------
			
 
				+		 * |   1   |   1   |   1   |   1   |
			
 
				+		 * ---------------------------------
			
 
				+		 * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
			
 
				+		 * ---------------------------------
			
 
				+		 *   0   1   2   3   4   5   6   7
			
 
				+		 *      \_____________________/
			
 
				+		 *
			
 
				+		 * Neither [1] nor [6] is aligned to above layer.
			
 
				+		 * Left neighbour [0] is free, so mark it busy,
			
 
				+		 * decrease bb_counters and extend range to
			
 
				+		 * [0; 6]
			
 
				+		 * Right neighbour [7] is busy. It can't be coaleasced with [6], so
			
 
				+		 * mark [6] free, increase bb_counters and shrink range to
			
 
				+		 * [0; 5].
			
 
				+		 * Then shift range to [0; 2], go up and do the same.
			
 
				+		 */
			
 
				+
			
 
				+
			
 
				+		if (first & 1)
			
 
				+			e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
			
 
				+		if (!(last & 1))
			
 
				+			e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
			
 
				+		if (first > last)
			
 
				+			break;
			
 
				+		order++;
			
 
				+
			
 
				+		if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
			
 
				+			mb_clear_bits(buddy, first, last - first + 1);
			
 
				+			e4b->bd_info->bb_counters[order - 1] += last - first + 1;
			
 
				+			break;
			
 
				+		}
			
 
				+		first >>= 1;
			
 
				+		last >>= 1;
			
 
				+		buddy = buddy2;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
			
 
				-			  int first, int count)
			
 
				+			   int first, int count)
			
 
				 {
			
 
				-	int block = 0;
			
 
				-	int max = 0;
			
 
				-	int order;
			
 
				-	void *buddy;
			
 
				-	void *buddy2;
			
 
				+	int left_is_free = 0;
			
 
				+	int right_is_free = 0;
			
 
				+	int block;
			
 
				+	int last = first + count - 1;
			
 
				 	struct super_block *sb = e4b->bd_sb;
			
 
				 
			
 
				-	BUG_ON(first + count > (sb->s_blocksize << 3));
			
 
				+	BUG_ON(last >= (sb->s_blocksize << 3));
			
 
				 	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
			
 
				 	mb_check_buddy(e4b);
			
 
				 	mb_free_blocks_double(inode, e4b, first, count);
			
@@ -1281,67 +1405,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 
				 	if (first < e4b->bd_info->bb_first_free)
			
 
				 		e4b->bd_info->bb_first_free = first;
			
 
				 
			
 
				-	/* let's maintain fragments counter */
			
 
				+	/* access memory sequentially: check left neighbour,
			
 
				+	 * clear range and then check right neighbour
			
 
				+	 */
			
 
				 	if (first != 0)
			
 
				-		block = !mb_test_bit(first - 1, e4b->bd_bitmap);
			
 
				-	if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
			
 
				-		max = !mb_test_bit(first + count, e4b->bd_bitmap);
			
 
				-	if (block && max)
			
 
				-		e4b->bd_info->bb_fragments--;
			
 
				-	else if (!block && !max)
			
 
				-		e4b->bd_info->bb_fragments++;
			
 
				+		left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
			
 
				+	block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
			
 
				+	if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
			
 
				+		right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
			
 
				 
			
 
				-	/* let's maintain buddy itself */
			
 
				-	while (count-- > 0) {
			
 
				-		block = first++;
			
 
				-		order = 0;
			
 
				+	if (unlikely(block != -1)) {
			
 
				+		ext4_fsblk_t blocknr;
			
 
				 
			
 
				-		if (!mb_test_bit(block, e4b->bd_bitmap)) {
			
 
				-			ext4_fsblk_t blocknr;
			
 
				-
			
 
				-			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
			
 
				-			blocknr += EXT4_C2B(EXT4_SB(sb), block);
			
 
				-			ext4_grp_locked_error(sb, e4b->bd_group,
			
 
				-					      inode ? inode->i_ino : 0,
			
 
				-					      blocknr,
			
 
				-					      "freeing already freed block "
			
 
				-					      "(bit %u)", block);
			
 
				-		}
			
 
				-		mb_clear_bit(block, e4b->bd_bitmap);
			
 
				-		e4b->bd_info->bb_counters[order]++;
			
 
				-
			
 
				-		/* start of the buddy */
			
 
				-		buddy = mb_find_buddy(e4b, order, &max);
			
 
				-
			
 
				-		do {
			
 
				-			block &= ~1UL;
			
 
				-			if (mb_test_bit(block, buddy) ||
			
 
				-					mb_test_bit(block + 1, buddy))
			
 
				-				break;
			
 
				-
			
 
				-			/* both the buddies are free, try to coalesce them */
			
 
				-			buddy2 = mb_find_buddy(e4b, order + 1, &max);
			
 
				+		blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
			
 
				+		blocknr += EXT4_C2B(EXT4_SB(sb), block);
			
 
				+		ext4_grp_locked_error(sb, e4b->bd_group,
			
 
				+				      inode ? inode->i_ino : 0,
			
 
				+				      blocknr,
			
 
				+				      "freeing already freed block "
			
 
				+				      "(bit %u)", block);
			
 
				+		mb_regenerate_buddy(e4b);
			
 
				+		goto done;
			
 
				+	}
			
 
				 
			
 
				-			if (!buddy2)
			
 
				-				break;
			
 
				+	/* let's maintain fragments counter */
			
 
				+	if (left_is_free && right_is_free)
			
 
				+		e4b->bd_info->bb_fragments--;
			
 
				+	else if (!left_is_free && !right_is_free)
			
 
				+		e4b->bd_info->bb_fragments++;
			
 
				 
			
 
				-			if (order > 0) {
			
 
				-				/* for special purposes, we don't set
			
 
				-				 * free bits in bitmap */
			
 
				-				mb_set_bit(block, buddy);
			
 
				-				mb_set_bit(block + 1, buddy);
			
 
				-			}
			
 
				-			e4b->bd_info->bb_counters[order]--;
			
 
				-			e4b->bd_info->bb_counters[order]--;
			
 
				+	/* buddy[0] == bd_bitmap is a special case, so handle
			
 
				+	 * it right away and let mb_buddy_mark_free stay free of
			
 
				+	 * zero order checks.
			
 
				+	 * Check if neighbours are to be coaleasced,
			
 
				+	 * adjust bitmap bb_counters and borders appropriately.
			
 
				+	 */
			
 
				+	if (first & 1) {
			
 
				+		first += !left_is_free;
			
 
				+		e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
			
 
				+	}
			
 
				+	if (!(last & 1)) {
			
 
				+		last -= !right_is_free;
			
 
				+		e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
			
 
				+	}
			
 
				 
			
 
				-			block = block >> 1;
			
 
				-			order++;
			
 
				-			e4b->bd_info->bb_counters[order]++;
			
 
				+	if (first <= last)
			
 
				+		mb_buddy_mark_free(e4b, first >> 1, last >> 1);
			
 
				 
			
 
				-			mb_clear_bit(block, buddy2);
			
 
				-			buddy = buddy2;
			
 
				-		} while (1);
			
 
				-	}
			
 
				+done:
			
 
				 	mb_set_largest_free_order(sb, e4b->bd_info);
			
 
				 	mb_check_buddy(e4b);
			
 
				 }
			
@@ -3342,7 +3453,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 
				 	if (pa->pa_type == MB_GROUP_PA)
			
 
				 		grp_blk--;
			
 
				 
			
 
				-	ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
			
 
				+	grp = ext4_get_group_number(sb, grp_blk);
			
 
				 
			
 
				 	/*
			
 
				 	 * possible race:
			
@@ -3807,7 +3918,7 @@ repeat:
 
				 
			
 
				 	list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
			
 
				 		BUG_ON(pa->pa_type != MB_INODE_PA);
			
 
				-		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, pa->pa_pstart);
			
 
				 
			
 
				 		err = ext4_mb_load_buddy(sb, group, &e4b);
			
 
				 		if (err) {
			
@@ -4069,7 +4180,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 
				 
			
 
				 	list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
			
 
				 
			
 
				-		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, pa->pa_pstart);
			
 
				 		if (ext4_mb_load_buddy(sb, group, &e4b)) {
			
 
				 			ext4_error(sb, "Error loading buddy information for %u",
			
 
				 					group);
			
@@ -4217,6 +4328,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
				 	unsigned int inquota = 0;
			
 
				 	unsigned int reserv_clstrs = 0;
			
 
				 
			
 
				+	might_sleep();
			
 
				 	sb = ar->inode->i_sb;
			
 
				 	sbi = EXT4_SB(sb);
			
 
				 
			
@@ -4420,11 +4532,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	node = rb_prev(new_node);
			
 
				 	if (node) {
			
 
				 		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				-		if (can_merge(entry, new_entry)) {
			
 
				+		if (can_merge(entry, new_entry) &&
			
 
				+		    ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
			
 
				 			new_entry->efd_start_cluster = entry->efd_start_cluster;
			
 
				 			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				 			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
@@ -4432,10 +4544,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	node = rb_next(new_node);
			
 
				 	if (node) {
			
 
				 		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				-		if (can_merge(new_entry, entry)) {
			
 
				+		if (can_merge(new_entry, entry) &&
			
 
				+		    ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
			
 
				 			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				 			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
@@ -4470,6 +4582,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 
				 	int err = 0;
			
 
				 	int ret;
			
 
				 
			
 
				+	might_sleep();
			
 
				 	if (bh) {
			
 
				 		if (block)
			
 
				 			BUG_ON(block != bh->b_blocknr);
			
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -426,7 +426,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
 
				 			return retval;
			
 
				 	}
			
 
				 	return retval;
			
 
				-
			
 
				 }
			
 
				 
			
 
				 int ext4_ext_migrate(struct inode *inode)
			
@@ -606,3 +605,64 @@ out:
 
				 
			
 
				 	return retval;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * Migrate a simple extent-based inode to use the i_blocks[] array
			
 
				+ */
			
 
				+int ext4_ind_migrate(struct inode *inode)
			
 
				+{
			
 
				+	struct ext4_extent_header	*eh;
			
 
				+	struct ext4_super_block		*es = EXT4_SB(inode->i_sb)->s_es;
			
 
				+	struct ext4_inode_info		*ei = EXT4_I(inode);
			
 
				+	struct ext4_extent		*ex;
			
 
				+	unsigned int			i, len;
			
 
				+	ext4_fsblk_t			blk;
			
 
				+	handle_t			*handle;
			
 
				+	int				ret;
			
 
				+
			
 
				+	if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
			
 
				+				       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
			
 
				+	    (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
			
 
				+				       EXT4_FEATURE_RO_COMPAT_BIGALLOC))
			
 
				+		return -EOPNOTSUPP;
			
 
				+
			
 
				+	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
			
 
				+	if (IS_ERR(handle))
			
 
				+		return PTR_ERR(handle);
			
 
				+
			
 
				+	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				+	ret = ext4_ext_check_inode(inode);
			
 
				+	if (ret)
			
 
				+		goto errout;
			
 
				+
			
 
				+	eh = ext_inode_hdr(inode);
			
 
				+	ex  = EXT_FIRST_EXTENT(eh);
			
 
				+	if (ext4_blocks_count(es) > EXT4_MAX_BLOCK_FILE_PHYS ||
			
 
				+	    eh->eh_depth != 0 || le16_to_cpu(eh->eh_entries) > 1) {
			
 
				+		ret = -EOPNOTSUPP;
			
 
				+		goto errout;
			
 
				+	}
			
 
				+	if (eh->eh_entries == 0)
			
 
				+		blk = len = 0;
			
 
				+	else {
			
 
				+		len = le16_to_cpu(ex->ee_len);
			
 
				+		blk = ext4_ext_pblock(ex);
			
 
				+		if (len > EXT4_NDIR_BLOCKS) {
			
 
				+			ret = -EOPNOTSUPP;
			
 
				+			goto errout;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
			
 
				+	memset(ei->i_data, 0, sizeof(ei->i_data));
			
 
				+	for (i=0; i < len; i++)
			
 
				+		ei->i_data[i] = cpu_to_le32(blk++);
			
 
				+	ext4_mark_inode_dirty(handle, inode);
			
 
				+errout:
			
 
				+	ext4_journal_stop(handle);
			
 
				+	up_write(&EXT4_I(inode)->i_data_sem);
			
 
				+	return ret;
			
 
				+}
			
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -7,7 +7,7 @@
 
				 #include "ext4.h"
			
 
				 
			
 
				 /* Checksumming functions */
			
 
				-static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
			
 
				+static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
			
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	int offset = offsetof(struct mmp_struct, mmp_checksum);
			
@@ -54,7 +54,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 
				 	lock_buffer(bh);
			
 
				 	bh->b_end_io = end_buffer_write_sync;
			
 
				 	get_bh(bh);
			
 
				-	submit_bh(WRITE_SYNC, bh);
			
 
				+	submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
			
 
				 	wait_on_buffer(bh);
			
 
				 	sb_end_write(sb);
			
 
				 	if (unlikely(!buffer_uptodate(bh)))
			
@@ -86,7 +86,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 
				 		get_bh(*bh);
			
 
				 		lock_buffer(*bh);
			
 
				 		(*bh)->b_end_io = end_buffer_read_sync;
			
 
				-		submit_bh(READ_SYNC, *bh);
			
 
				+		submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
			
 
				 		wait_on_buffer(*bh);
			
 
				 		if (!buffer_uptodate(*bh)) {
			
 
				 			brelse(*bh);
			
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -144,12 +144,13 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
			
 
				+ * ext4_double_down_write_data_sem - Acquire two inodes' write lock
			
 
				+ *                                   of i_data_sem
			
 
				  *
			
 
				  * Acquire write lock of i_data_sem of the two inodes
			
 
				  */
			
 
				-static void
			
 
				-double_down_write_data_sem(struct inode *first, struct inode *second)
			
 
				+void
			
 
				+ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
			
 
				 {
			
 
				 	if (first < second) {
			
 
				 		down_write(&EXT4_I(first)->i_data_sem);
			
@@ -162,14 +163,15 @@ double_down_write_data_sem(struct inode *first, struct inode *second)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
			
 
				+ * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
			
 
				  *
			
 
				  * @orig_inode:		original inode structure to be released its lock first
			
 
				  * @donor_inode:	donor inode structure to be released its lock second
			
 
				  * Release write lock of i_data_sem of two inodes (orig and donor).
			
 
				  */
			
 
				-static void
			
 
				-double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
			
 
				+void
			
 
				+ext4_double_up_write_data_sem(struct inode *orig_inode,
			
 
				+			      struct inode *donor_inode)
			
 
				 {
			
 
				 	up_write(&EXT4_I(orig_inode)->i_data_sem);
			
 
				 	up_write(&EXT4_I(donor_inode)->i_data_sem);
			
@@ -407,18 +409,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,
 
				 		mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
			
 
				 						end_ext, eh, range_to_move);
			
 
				 
			
 
				-	if (depth) {
			
 
				-		ret = ext4_handle_dirty_metadata(handle, orig_inode,
			
 
				-						 orig_path->p_bh);
			
 
				-		if (ret)
			
 
				-			return ret;
			
 
				-	} else {
			
 
				-		ret = ext4_mark_inode_dirty(handle, orig_inode);
			
 
				-		if (ret < 0)
			
 
				-			return ret;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				+	return ext4_ext_dirty(handle, orig_inode, orig_path);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -737,6 +728,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
 
				 		donor_off += dext_alen;
			
 
				 		orig_off += dext_alen;
			
 
				 
			
 
				+		BUG_ON(replaced_count > count);
			
 
				 		/* Already moved the expected blocks */
			
 
				 		if (replaced_count >= count)
			
 
				 			break;
			
@@ -814,7 +806,13 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
 
				 		page_cache_release(page[0]);
			
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				-
			
 
				+	/*
			
 
				+	 * grab_cache_page_write_begin() may not wait on page's writeback if
			
 
				+	 * BDI not demand that. But it is reasonable to be very conservative
			
 
				+	 * here and explicitly wait on page's writeback
			
 
				+	 */
			
 
				+	wait_on_page_writeback(page[0]);
			
 
				+	wait_on_page_writeback(page[1]);
			
 
				 	if (inode1 > inode2) {
			
 
				 		struct page *tmp;
			
 
				 		tmp = page[0];
			
@@ -856,7 +854,6 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
 
				 		if (buffer_uptodate(bh))
			
 
				 			continue;
			
 
				 		if (!buffer_mapped(bh)) {
			
 
				-			int err = 0;
			
 
				 			err = ext4_get_block(inode, block, bh, 0);
			
 
				 			if (err) {
			
 
				 				SetPageError(page);
			
@@ -976,7 +973,7 @@ again:
 
				 	 * necessary, just swap data blocks between orig and donor.
			
 
				 	 */
			
 
				 	if (uninit) {
			
 
				-		double_down_write_data_sem(orig_inode, donor_inode);
			
 
				+		ext4_double_down_write_data_sem(orig_inode, donor_inode);
			
 
				 		/* If any of extents in range became initialized we have to
			
 
				 		 * fallback to data copying */
			
 
				 		uninit = mext_check_coverage(orig_inode, orig_blk_offset,
			
@@ -990,7 +987,7 @@ again:
 
				 			goto drop_data_sem;
			
 
				 
			
 
				 		if (!uninit) {
			
 
				-			double_up_write_data_sem(orig_inode, donor_inode);
			
 
				+			ext4_double_up_write_data_sem(orig_inode, donor_inode);
			
 
				 			goto data_copy;
			
 
				 		}
			
 
				 		if ((page_has_private(pagep[0]) &&
			
@@ -1004,7 +1001,7 @@ again:
 
				 						donor_inode, orig_blk_offset,
			
 
				 						block_len_in_page, err);
			
 
				 	drop_data_sem:
			
 
				-		double_up_write_data_sem(orig_inode, donor_inode);
			
 
				+		ext4_double_up_write_data_sem(orig_inode, donor_inode);
			
 
				 		goto unlock_pages;
			
 
				 	}
			
 
				 data_copy:
			
@@ -1033,7 +1030,7 @@ data_copy:
 
				 	}
			
 
				 	/* Perform all necessary steps similar write_begin()/write_end()
			
 
				 	 * but keeping in mind that i_size will not change */
			
 
				-	*err = __block_write_begin(pagep[0], from, from + replaced_size,
			
 
				+	*err = __block_write_begin(pagep[0], from, replaced_size,
			
 
				 				   ext4_get_block);
			
 
				 	if (!*err)
			
 
				 		*err = block_commit_write(pagep[0], from, from + replaced_size);
			
@@ -1065,11 +1062,11 @@ repair_branches:
 
				 	 * Extents are swapped already, but we are not able to copy data.
			
 
				 	 * Try to swap extents to it's original places
			
 
				 	 */
			
 
				-	double_down_write_data_sem(orig_inode, donor_inode);
			
 
				+	ext4_double_down_write_data_sem(orig_inode, donor_inode);
			
 
				 	replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
			
 
				 					       orig_blk_offset,
			
 
				 					       block_len_in_page, &err2);
			
 
				-	double_up_write_data_sem(orig_inode, donor_inode);
			
 
				+	ext4_double_up_write_data_sem(orig_inode, donor_inode);
			
 
				 	if (replaced_count != block_len_in_page) {
			
 
				 		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
			
 
				 				       "Unable to copy data block,"
			
@@ -1209,15 +1206,15 @@ mext_check_arguments(struct inode *orig_inode,
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
			
 
				+ * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
			
 
				  *
			
 
				  * @inode1:	the inode structure
			
 
				  * @inode2:	the inode structure
			
 
				  *
			
 
				  * Lock two inodes' i_mutex
			
 
				  */
			
 
				-static void
			
 
				-mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
			
 
				+void
			
 
				+ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
			
 
				 {
			
 
				 	BUG_ON(inode1 == inode2);
			
 
				 	if (inode1 < inode2) {
			
@@ -1230,15 +1227,15 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
			
 
				+ * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
			
 
				  *
			
 
				  * @inode1:     the inode that is released first
			
 
				  * @inode2:     the inode that is released second
			
 
				  *
			
 
				  */
			
 
				 
			
 
				-static void
			
 
				-mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
			
 
				+void
			
 
				+ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
			
 
				 {
			
 
				 	mutex_unlock(&inode1->i_mutex);
			
 
				 	mutex_unlock(&inode2->i_mutex);
			
@@ -1333,7 +1330,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 	/* Protect orig and donor inodes against a truncate */
			
 
				-	mext_inode_double_lock(orig_inode, donor_inode);
			
 
				+	ext4_inode_double_lock(orig_inode, donor_inode);
			
 
				 
			
 
				 	/* Wait for all existing dio workers */
			
 
				 	ext4_inode_block_unlocked_dio(orig_inode);
			
@@ -1342,7 +1339,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
				 	inode_dio_wait(donor_inode);
			
 
				 
			
 
				 	/* Protect extent tree against block allocations via delalloc */
			
 
				-	double_down_write_data_sem(orig_inode, donor_inode);
			
 
				+	ext4_double_down_write_data_sem(orig_inode, donor_inode);
			
 
				 	/* Check the filesystem environment whether move_extent can be done */
			
 
				 	ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
			
 
				 				    donor_start, &len);
			
@@ -1466,7 +1463,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
				 		 * b. racing with ->readpage, ->write_begin, and ext4_get_block
			
 
				 		 *    in move_extent_per_page
			
 
				 		 */
			
 
				-		double_up_write_data_sem(orig_inode, donor_inode);
			
 
				+		ext4_double_up_write_data_sem(orig_inode, donor_inode);
			
 
				 
			
 
				 		while (orig_page_offset <= seq_end_page) {
			
 
				 
			
@@ -1500,7 +1497,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
				 				block_len_in_page = rest_blocks;
			
 
				 		}
			
 
				 
			
 
				-		double_down_write_data_sem(orig_inode, donor_inode);
			
 
				+		ext4_double_down_write_data_sem(orig_inode, donor_inode);
			
 
				 		if (ret < 0)
			
 
				 			break;
			
 
				 
			
@@ -1538,10 +1535,10 @@ out:
 
				 		ext4_ext_drop_refs(holecheck_path);
			
 
				 		kfree(holecheck_path);
			
 
				 	}
			
 
				-	double_up_write_data_sem(orig_inode, donor_inode);
			
 
				+	ext4_double_up_write_data_sem(orig_inode, donor_inode);
			
 
				 	ext4_inode_resume_unlocked_dio(orig_inode);
			
 
				 	ext4_inode_resume_unlocked_dio(donor_inode);
			
 
				-	mext_inode_double_unlock(orig_inode, donor_inode);
			
 
				+	ext4_inode_double_unlock(orig_inode, donor_inode);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -416,15 +416,16 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
			
 
				 	struct ext4_inode_info *ei = EXT4_I(inode);
			
 
				-	__u32 csum, old_csum;
			
 
				+	__u32 csum;
			
 
				+	__le32 save_csum;
			
 
				 	int size;
			
 
				 
			
 
				 	size = count_offset + (count * sizeof(struct dx_entry));
			
 
				-	old_csum = t->dt_checksum;
			
 
				+	save_csum = t->dt_checksum;
			
 
				 	t->dt_checksum = 0;
			
 
				 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
			
 
				 	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
			
 
				-	t->dt_checksum = old_csum;
			
 
				+	t->dt_checksum = save_csum;
			
 
				 
			
 
				 	return cpu_to_le32(csum);
			
 
				 }
			
@@ -971,6 +972,17 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
				 			hinfo.hash_version +=
			
 
				 				EXT4_SB(dir->i_sb)->s_hash_unsigned;
			
 
				 		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
			
 
				+		if (ext4_has_inline_data(dir)) {
			
 
				+			int has_inline_data = 1;
			
 
				+			count = htree_inlinedir_to_tree(dir_file, dir, 0,
			
 
				+							&hinfo, start_hash,
			
 
				+							start_minor_hash,
			
 
				+							&has_inline_data);
			
 
				+			if (has_inline_data) {
			
 
				+				*next_hash = ~0;
			
 
				+				return count;
			
 
				+			}
			
 
				+		}
			
 
				 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
			
 
				 					       start_hash, start_minor_hash);
			
 
				 		*next_hash = ~0;
			
@@ -1455,24 +1467,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
 
				 	return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
			
 
				 }
			
 
				 
			
 
				-#define S_SHIFT 12
			
 
				-static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
			
 
				-	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
			
 
				-	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
			
 
				-	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
			
 
				-	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
			
 
				-	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
			
 
				-	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
			
 
				-	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
			
 
				-};
			
 
				-
			
 
				-static inline void ext4_set_de_type(struct super_block *sb,
			
 
				-				struct ext4_dir_entry_2 *de,
			
 
				-				umode_t mode) {
			
 
				-	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
			
 
				-		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Move count entries from end of map between two memory locations.
			
 
				  * Returns pointer to last entry moved.
			
@@ -2251,8 +2245,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
				 	dquot_initialize(dir);
			
 
				 
			
 
				 	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				-		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
			
 
				-		   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
			
 
				+		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
			
 
				 retry:
			
 
				 	inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
			
 
				 					    NULL, EXT4_HT_DIR, credits);
			
@@ -2286,8 +2279,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 
				 	dquot_initialize(dir);
			
 
				 
			
 
				 	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				-		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
			
 
				-		   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
			
 
				+		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
			
 
				 retry:
			
 
				 	inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
			
 
				 					    NULL, EXT4_HT_DIR, credits);
			
@@ -2396,8 +2388,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
				 	dquot_initialize(dir);
			
 
				 
			
 
				 	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				-		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
			
 
				-		   EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
			
 
				+		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
			
 
				 retry:
			
 
				 	inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
			
 
				 					    &dentry->d_name,
			
@@ -2826,8 +2817,7 @@ static int ext4_symlink(struct inode *dir,
 
				 		 * quota blocks, sb is already counted in previous macros).
			
 
				 		 */
			
 
				 		credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
			
 
				-			  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
			
 
				-			  EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
			
 
				+			  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
			
 
				 	}
			
 
				 retry:
			
 
				 	inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
			
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -29,25 +29,19 @@
 
				 #include "xattr.h"
			
 
				 #include "acl.h"
			
 
				 
			
 
				-static struct kmem_cache *io_page_cachep, *io_end_cachep;
			
 
				+static struct kmem_cache *io_end_cachep;
			
 
				 
			
 
				 int __init ext4_init_pageio(void)
			
 
				 {
			
 
				-	io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
			
 
				-	if (io_page_cachep == NULL)
			
 
				-		return -ENOMEM;
			
 
				 	io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
			
 
				-	if (io_end_cachep == NULL) {
			
 
				-		kmem_cache_destroy(io_page_cachep);
			
 
				+	if (io_end_cachep == NULL)
			
 
				 		return -ENOMEM;
			
 
				-	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 void ext4_exit_pageio(void)
			
 
				 {
			
 
				 	kmem_cache_destroy(io_end_cachep);
			
 
				-	kmem_cache_destroy(io_page_cachep);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -67,29 +61,28 @@ void ext4_ioend_shutdown(struct inode *inode)
 
				 		cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
			
 
				 }
			
 
				 
			
 
				-static void put_io_page(struct ext4_io_page *io_page)
			
 
				+static void ext4_release_io_end(ext4_io_end_t *io_end)
			
 
				 {
			
 
				-	if (atomic_dec_and_test(&io_page->p_count)) {
			
 
				-		end_page_writeback(io_page->p_page);
			
 
				-		put_page(io_page->p_page);
			
 
				-		kmem_cache_free(io_page_cachep, io_page);
			
 
				-	}
			
 
				+	BUG_ON(!list_empty(&io_end->list));
			
 
				+	BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
			
 
				+
			
 
				+	if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
			
 
				+		wake_up_all(ext4_ioend_wq(io_end->inode));
			
 
				+	if (io_end->flag & EXT4_IO_END_DIRECT)
			
 
				+		inode_dio_done(io_end->inode);
			
 
				+	if (io_end->iocb)
			
 
				+		aio_complete(io_end->iocb, io_end->result, 0);
			
 
				+	kmem_cache_free(io_end_cachep, io_end);
			
 
				 }
			
 
				 
			
 
				-void ext4_free_io_end(ext4_io_end_t *io)
			
 
				+static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
			
 
				 {
			
 
				-	int i;
			
 
				-
			
 
				-	BUG_ON(!io);
			
 
				-	BUG_ON(!list_empty(&io->list));
			
 
				-	BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
			
 
				+	struct inode *inode = io_end->inode;
			
 
				 
			
 
				-	for (i = 0; i < io->num_io_pages; i++)
			
 
				-		put_io_page(io->pages[i]);
			
 
				-	io->num_io_pages = 0;
			
 
				-	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
			
 
				-		wake_up_all(ext4_ioend_wq(io->inode));
			
 
				-	kmem_cache_free(io_end_cachep, io);
			
 
				+	io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
			
 
				+	/* Wake up anyone waiting on unwritten extent conversion */
			
 
				+	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
			
 
				+		wake_up_all(ext4_ioend_wq(inode));
			
 
				 }
			
 
				 
			
 
				 /* check a range of space and convert unwritten extents to written. */
			
@@ -112,13 +105,8 @@ static int ext4_end_io(ext4_io_end_t *io)
 
				 			 "(inode %lu, offset %llu, size %zd, error %d)",
			
 
				 			 inode->i_ino, offset, size, ret);
			
 
				 	}
			
 
				-	/* Wake up anyone waiting on unwritten extent conversion */
			
 
				-	if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
			
 
				-		wake_up_all(ext4_ioend_wq(inode));
			
 
				-	if (io->flag & EXT4_IO_END_DIRECT)
			
 
				-		inode_dio_done(inode);
			
 
				-	if (io->iocb)
			
 
				-		aio_complete(io->iocb, io->result, 0);
			
 
				+	ext4_clear_io_unwritten_flag(io);
			
 
				+	ext4_release_io_end(io);
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
@@ -149,7 +137,7 @@ static void dump_completed_IO(struct inode *inode)
 
				 }
			
 
				 
			
 
				 /* Add the io_end to per-inode completed end_io list. */
			
 
				-void ext4_add_complete_io(ext4_io_end_t *io_end)
			
 
				+static void ext4_add_complete_io(ext4_io_end_t *io_end)
			
 
				 {
			
 
				 	struct ext4_inode_info *ei = EXT4_I(io_end->inode);
			
 
				 	struct workqueue_struct *wq;
			
@@ -186,8 +174,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
 
				 		err = ext4_end_io(io);
			
 
				 		if (unlikely(!ret && err))
			
 
				 			ret = err;
			
 
				-		io->flag &= ~EXT4_IO_END_UNWRITTEN;
			
 
				-		ext4_free_io_end(io);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
@@ -219,10 +205,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
 
				 		atomic_inc(&EXT4_I(inode)->i_ioend_count);
			
 
				 		io->inode = inode;
			
 
				 		INIT_LIST_HEAD(&io->list);
			
 
				+		atomic_set(&io->count, 1);
			
 
				 	}
			
 
				 	return io;
			
 
				 }
			
 
				 
			
 
				+void ext4_put_io_end_defer(ext4_io_end_t *io_end)
			
 
				+{
			
 
				+	if (atomic_dec_and_test(&io_end->count)) {
			
 
				+		if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
			
 
				+			ext4_release_io_end(io_end);
			
 
				+			return;
			
 
				+		}
			
 
				+		ext4_add_complete_io(io_end);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int ext4_put_io_end(ext4_io_end_t *io_end)
			
 
				+{
			
 
				+	int err = 0;
			
 
				+
			
 
				+	if (atomic_dec_and_test(&io_end->count)) {
			
 
				+		if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
			
 
				+			err = ext4_convert_unwritten_extents(io_end->inode,
			
 
				+						io_end->offset, io_end->size);
			
 
				+			ext4_clear_io_unwritten_flag(io_end);
			
 
				+		}
			
 
				+		ext4_release_io_end(io_end);
			
 
				+	}
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
			
 
				+{
			
 
				+	atomic_inc(&io_end->count);
			
 
				+	return io_end;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Print an buffer I/O error compatible with the fs/buffer.c.  This
			
 
				  * provides compatibility with dmesg scrapers that look for a specific
			
@@ -243,45 +262,56 @@ static void ext4_end_bio(struct bio *bio, int error)
 
				 	ext4_io_end_t *io_end = bio->bi_private;
			
 
				 	struct inode *inode;
			
 
				 	int i;
			
 
				+	int blocksize;
			
 
				 	sector_t bi_sector = bio->bi_sector;
			
 
				 
			
 
				 	BUG_ON(!io_end);
			
 
				+	inode = io_end->inode;
			
 
				+	blocksize = 1 << inode->i_blkbits;
			
 
				 	bio->bi_private = NULL;
			
 
				 	bio->bi_end_io = NULL;
			
 
				 	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
			
 
				 		error = 0;
			
 
				-	bio_put(bio);
			
 
				-
			
 
				-	for (i = 0; i < io_end->num_io_pages; i++) {
			
 
				-		struct page *page = io_end->pages[i]->p_page;
			
 
				+	for (i = 0; i < bio->bi_vcnt; i++) {
			
 
				+		struct bio_vec *bvec = &bio->bi_io_vec[i];
			
 
				+		struct page *page = bvec->bv_page;
			
 
				 		struct buffer_head *bh, *head;
			
 
				-		loff_t offset;
			
 
				-		loff_t io_end_offset;
			
 
				+		unsigned bio_start = bvec->bv_offset;
			
 
				+		unsigned bio_end = bio_start + bvec->bv_len;
			
 
				+		unsigned under_io = 0;
			
 
				+		unsigned long flags;
			
 
				+
			
 
				+		if (!page)
			
 
				+			continue;
			
 
				 
			
 
				 		if (error) {
			
 
				 			SetPageError(page);
			
 
				 			set_bit(AS_EIO, &page->mapping->flags);
			
 
				-			head = page_buffers(page);
			
 
				-			BUG_ON(!head);
			
 
				-
			
 
				-			io_end_offset = io_end->offset + io_end->size;
			
 
				-
			
 
				-			offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
			
 
				-			bh = head;
			
 
				-			do {
			
 
				-				if ((offset >= io_end->offset) &&
			
 
				-				    (offset+bh->b_size <= io_end_offset))
			
 
				-					buffer_io_error(bh);
			
 
				-
			
 
				-				offset += bh->b_size;
			
 
				-				bh = bh->b_this_page;
			
 
				-			} while (bh != head);
			
 
				 		}
			
 
				-
			
 
				-		put_io_page(io_end->pages[i]);
			
 
				+		bh = head = page_buffers(page);
			
 
				+		/*
			
 
				+		 * We check all buffers in the page under BH_Uptodate_Lock
			
 
				+		 * to avoid races with other end io clearing async_write flags
			
 
				+		 */
			
 
				+		local_irq_save(flags);
			
 
				+		bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
			
 
				+		do {
			
 
				+			if (bh_offset(bh) < bio_start ||
			
 
				+			    bh_offset(bh) + blocksize > bio_end) {
			
 
				+				if (buffer_async_write(bh))
			
 
				+					under_io++;
			
 
				+				continue;
			
 
				+			}
			
 
				+			clear_buffer_async_write(bh);
			
 
				+			if (error)
			
 
				+				buffer_io_error(bh);
			
 
				+		} while ((bh = bh->b_this_page) != head);
			
 
				+		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
			
 
				+		local_irq_restore(flags);
			
 
				+		if (!under_io)
			
 
				+			end_page_writeback(page);
			
 
				 	}
			
 
				-	io_end->num_io_pages = 0;
			
 
				-	inode = io_end->inode;
			
 
				+	bio_put(bio);
			
 
				 
			
 
				 	if (error) {
			
 
				 		io_end->flag |= EXT4_IO_END_ERROR;
			
@@ -294,12 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
 
				 			     bi_sector >> (inode->i_blkbits - 9));
			
 
				 	}
			
 
				 
			
 
				-	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
			
 
				-		ext4_free_io_end(io_end);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	ext4_add_complete_io(io_end);
			
 
				+	ext4_put_io_end_defer(io_end);
			
 
				 }
			
 
				 
			
 
				 void ext4_io_submit(struct ext4_io_submit *io)
			
@@ -313,76 +338,59 @@ void ext4_io_submit(struct ext4_io_submit *io)
 
				 		bio_put(io->io_bio);
			
 
				 	}
			
 
				 	io->io_bio = NULL;
			
 
				-	io->io_op = 0;
			
 
				+}
			
 
				+
			
 
				+void ext4_io_submit_init(struct ext4_io_submit *io,
			
 
				+			 struct writeback_control *wbc)
			
 
				+{
			
 
				+	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
			
 
				+	io->io_bio = NULL;
			
 
				 	io->io_end = NULL;
			
 
				 }
			
 
				 
			
 
				-static int io_submit_init(struct ext4_io_submit *io,
			
 
				-			  struct inode *inode,
			
 
				-			  struct writeback_control *wbc,
			
 
				-			  struct buffer_head *bh)
			
 
				+static int io_submit_init_bio(struct ext4_io_submit *io,
			
 
				+			      struct buffer_head *bh)
			
 
				 {
			
 
				-	ext4_io_end_t *io_end;
			
 
				-	struct page *page = bh->b_page;
			
 
				 	int nvecs = bio_get_nr_vecs(bh->b_bdev);
			
 
				 	struct bio *bio;
			
 
				 
			
 
				-	io_end = ext4_init_io_end(inode, GFP_NOFS);
			
 
				-	if (!io_end)
			
 
				-		return -ENOMEM;
			
 
				 	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
			
 
				 	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
			
 
				 	bio->bi_bdev = bh->b_bdev;
			
 
				-	bio->bi_private = io->io_end = io_end;
			
 
				 	bio->bi_end_io = ext4_end_bio;
			
 
				-
			
 
				-	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
			
 
				-
			
 
				+	bio->bi_private = ext4_get_io_end(io->io_end);
			
 
				+	if (!io->io_end->size)
			
 
				+		io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
			
 
				+				     + bh_offset(bh);
			
 
				 	io->io_bio = bio;
			
 
				-	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
			
 
				 	io->io_next_block = bh->b_blocknr;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 static int io_submit_add_bh(struct ext4_io_submit *io,
			
 
				-			    struct ext4_io_page *io_page,
			
 
				 			    struct inode *inode,
			
 
				-			    struct writeback_control *wbc,
			
 
				 			    struct buffer_head *bh)
			
 
				 {
			
 
				 	ext4_io_end_t *io_end;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (buffer_new(bh)) {
			
 
				-		clear_buffer_new(bh);
			
 
				-		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
			
 
				-	}
			
 
				-
			
 
				 	if (io->io_bio && bh->b_blocknr != io->io_next_block) {
			
 
				 submit_and_retry:
			
 
				 		ext4_io_submit(io);
			
 
				 	}
			
 
				 	if (io->io_bio == NULL) {
			
 
				-		ret = io_submit_init(io, inode, wbc, bh);
			
 
				+		ret = io_submit_init_bio(io, bh);
			
 
				 		if (ret)
			
 
				 			return ret;
			
 
				 	}
			
 
				-	io_end = io->io_end;
			
 
				-	if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
			
 
				-	    (io_end->pages[io_end->num_io_pages-1] != io_page))
			
 
				-		goto submit_and_retry;
			
 
				-	if (buffer_uninit(bh))
			
 
				-		ext4_set_io_unwritten_flag(inode, io_end);
			
 
				-	io->io_end->size += bh->b_size;
			
 
				-	io->io_next_block++;
			
 
				 	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
			
 
				 	if (ret != bh->b_size)
			
 
				 		goto submit_and_retry;
			
 
				-	if ((io_end->num_io_pages == 0) ||
			
 
				-	    (io_end->pages[io_end->num_io_pages-1] != io_page)) {
			
 
				-		io_end->pages[io_end->num_io_pages++] = io_page;
			
 
				-		atomic_inc(&io_page->p_count);
			
 
				-	}
			
 
				+	io_end = io->io_end;
			
 
				+	if (test_clear_buffer_uninit(bh))
			
 
				+		ext4_set_io_unwritten_flag(inode, io_end);
			
 
				+	io_end->size += bh->b_size;
			
 
				+	io->io_next_block++;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -392,33 +400,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 			struct writeback_control *wbc)
			
 
				 {
			
 
				 	struct inode *inode = page->mapping->host;
			
 
				-	unsigned block_start, block_end, blocksize;
			
 
				-	struct ext4_io_page *io_page;
			
 
				+	unsigned block_start, blocksize;
			
 
				 	struct buffer_head *bh, *head;
			
 
				 	int ret = 0;
			
 
				+	int nr_submitted = 0;
			
 
				 
			
 
				 	blocksize = 1 << inode->i_blkbits;
			
 
				 
			
 
				 	BUG_ON(!PageLocked(page));
			
 
				 	BUG_ON(PageWriteback(page));
			
 
				 
			
 
				-	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
			
 
				-	if (!io_page) {
			
 
				-		redirty_page_for_writepage(wbc, page);
			
 
				-		unlock_page(page);
			
 
				-		return -ENOMEM;
			
 
				-	}
			
 
				-	io_page->p_page = page;
			
 
				-	atomic_set(&io_page->p_count, 1);
			
 
				-	get_page(page);
			
 
				 	set_page_writeback(page);
			
 
				 	ClearPageError(page);
			
 
				 
			
 
				-	for (bh = head = page_buffers(page), block_start = 0;
			
 
				-	     bh != head || !block_start;
			
 
				-	     block_start = block_end, bh = bh->b_this_page) {
			
 
				-
			
 
				-		block_end = block_start + blocksize;
			
 
				+	/*
			
 
				+	 * In the first loop we prepare and mark buffers to submit. We have to
			
 
				+	 * mark all buffers in the page before submitting so that
			
 
				+	 * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
			
 
				+	 * on the first buffer finishes and we are still working on submitting
			
 
				+	 * the second buffer.
			
 
				+	 */
			
 
				+	bh = head = page_buffers(page);
			
 
				+	do {
			
 
				+		block_start = bh_offset(bh);
			
 
				 		if (block_start >= len) {
			
 
				 			/*
			
 
				 			 * Comments copied from block_write_full_page_endio:
			
@@ -431,7 +435,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 			 * mapped, and writes to that region are not written
			
 
				 			 * out to the file."
			
 
				 			 */
			
 
				-			zero_user_segment(page, block_start, block_end);
			
 
				+			zero_user_segment(page, block_start,
			
 
				+					  block_start + blocksize);
			
 
				 			clear_buffer_dirty(bh);
			
 
				 			set_buffer_uptodate(bh);
			
 
				 			continue;
			
@@ -445,7 +450,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 				ext4_io_submit(io);
			
 
				 			continue;
			
 
				 		}
			
 
				-		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
			
 
				+		if (buffer_new(bh)) {
			
 
				+			clear_buffer_new(bh);
			
 
				+			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
			
 
				+		}
			
 
				+		set_buffer_async_write(bh);
			
 
				+	} while ((bh = bh->b_this_page) != head);
			
 
				+
			
 
				+	/* Now submit buffers to write */
			
 
				+	bh = head = page_buffers(page);
			
 
				+	do {
			
 
				+		if (!buffer_async_write(bh))
			
 
				+			continue;
			
 
				+		ret = io_submit_add_bh(io, inode, bh);
			
 
				 		if (ret) {
			
 
				 			/*
			
 
				 			 * We only get here on ENOMEM.  Not much else
			
@@ -455,17 +472,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 			redirty_page_for_writepage(wbc, page);
			
 
				 			break;
			
 
				 		}
			
 
				+		nr_submitted++;
			
 
				 		clear_buffer_dirty(bh);
			
 
				+	} while ((bh = bh->b_this_page) != head);
			
 
				+
			
 
				+	/* Error stopped previous loop? Clean up buffers... */
			
 
				+	if (ret) {
			
 
				+		do {
			
 
				+			clear_buffer_async_write(bh);
			
 
				+			bh = bh->b_this_page;
			
 
				+		} while (bh != head);
			
 
				 	}
			
 
				 	unlock_page(page);
			
 
				-	/*
			
 
				-	 * If the page was truncated before we could do the writeback,
			
 
				-	 * or we had a memory allocation error while trying to write
			
 
				-	 * the first buffer head, we won't have submitted any pages for
			
 
				-	 * I/O.  In that case we need to make sure we've cleared the
			
 
				-	 * PageWriteback bit from the page to prevent the system from
			
 
				-	 * wedging later on.
			
 
				-	 */
			
 
				-	put_io_page(io_page);
			
 
				+	/* Nothing submitted - we have to end page writeback */
			
 
				+	if (!nr_submitted)
			
 
				+		end_page_writeback(page);
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -272,7 +272,7 @@ next_group:
 
				 		if (start_blk >= last_blk)
			
 
				 			goto next_group;
			
 
				 		group_data[bb_index].block_bitmap = start_blk++;
			
 
				-		ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, start_blk - 1);
			
 
				 		group -= group_data[0].group;
			
 
				 		group_data[group].free_blocks_count--;
			
 
				 		if (flexbg_size > 1)
			
@@ -284,7 +284,7 @@ next_group:
 
				 		if (start_blk >= last_blk)
			
 
				 			goto next_group;
			
 
				 		group_data[ib_index].inode_bitmap = start_blk++;
			
 
				-		ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, start_blk - 1);
			
 
				 		group -= group_data[0].group;
			
 
				 		group_data[group].free_blocks_count--;
			
 
				 		if (flexbg_size > 1)
			
@@ -296,7 +296,7 @@ next_group:
 
				 		if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
			
 
				 			goto next_group;
			
 
				 		group_data[it_index].inode_table = start_blk;
			
 
				-		ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, start_blk - 1);
			
 
				 		group -= group_data[0].group;
			
 
				 		group_data[group].free_blocks_count -=
			
 
				 					EXT4_SB(sb)->s_itb_per_group;
			
@@ -392,7 +392,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 
				 		ext4_group_t group;
			
 
				 		int err;
			
 
				 
			
 
				-		ext4_get_group_no_and_offset(sb, block, &group, NULL);
			
 
				+		group = ext4_get_group_number(sb, block);
			
 
				 		start = ext4_group_first_block_no(sb, group);
			
 
				 		group -= flex_gd->groups[0].group;
			
 
				 
			
@@ -1341,6 +1341,8 @@ static void ext4_update_super(struct super_block *sb,
 
				 
			
 
				 	/* Update the global fs size fields */
			
 
				 	sbi->s_groups_count += flex_gd->count;
			
 
				+	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
			
 
				+			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
			
 
				 
			
 
				 	/* Update the reserved block counts only once the new group is
			
 
				 	 * active. */
			
@@ -1879,7 +1881,11 @@ retry:
 
				 		/* Nothing need to do */
			
 
				 		return 0;
			
 
				 
			
 
				-	ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
			
 
				+	n_group = ext4_get_group_number(sb, n_blocks_count - 1);
			
 
				+	if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
			
 
				+		ext4_warning(sb, "resize would cause inodes_count overflow");
			
 
				+		return -EINVAL;
			
 
				+	}
			
 
				 	ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
			
 
				 
			
 
				 	n_desc_blocks = num_desc_blocks(sb, n_group + 1);
			
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -81,6 +81,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
 
				 static void ext4_destroy_lazyinit_thread(void);
			
 
				 static void ext4_unregister_li_request(struct super_block *sb);
			
 
				 static void ext4_clear_request_list(void);
			
 
				+static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
			
 
				 
			
 
				 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
			
 
				 static struct file_system_type ext2_fs_type = {
			
@@ -353,10 +354,13 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 
				 	struct super_block		*sb = journal->j_private;
			
 
				 	struct ext4_sb_info		*sbi = EXT4_SB(sb);
			
 
				 	int				error = is_journal_aborted(journal);
			
 
				-	struct ext4_journal_cb_entry	*jce, *tmp;
			
 
				+	struct ext4_journal_cb_entry	*jce;
			
 
				 
			
 
				+	BUG_ON(txn->t_state == T_FINISHED);
			
 
				 	spin_lock(&sbi->s_md_lock);
			
 
				-	list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
			
 
				+	while (!list_empty(&txn->t_private_list)) {
			
 
				+		jce = list_entry(txn->t_private_list.next,
			
 
				+				 struct ext4_journal_cb_entry, jce_list);
			
 
				 		list_del_init(&jce->jce_list);
			
 
				 		spin_unlock(&sbi->s_md_lock);
			
 
				 		jce->jce_func(sb, jce, error);
			
@@ -1948,16 +1952,16 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
 
				 	if ((sbi->s_es->s_feature_ro_compat &
			
 
				 	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
			
 
				 		/* Use new metadata_csum algorithm */
			
 
				-		__u16 old_csum;
			
 
				+		__le16 save_csum;
			
 
				 		__u32 csum32;
			
 
				 
			
 
				-		old_csum = gdp->bg_checksum;
			
 
				+		save_csum = gdp->bg_checksum;
			
 
				 		gdp->bg_checksum = 0;
			
 
				 		csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
			
 
				 				     sizeof(le_group));
			
 
				 		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
			
 
				 				     sbi->s_desc_size);
			
 
				-		gdp->bg_checksum = old_csum;
			
 
				+		gdp->bg_checksum = save_csum;
			
 
				 
			
 
				 		crc = csum32 & 0xFFFF;
			
 
				 		goto out;
			
@@ -2379,17 +2383,15 @@ struct ext4_attr {
 
				 	int offset;
			
 
				 };
			
 
				 
			
 
				-static int parse_strtoul(const char *buf,
			
 
				-		unsigned long max, unsigned long *value)
			
 
				+static int parse_strtoull(const char *buf,
			
 
				+		unsigned long long max, unsigned long long *value)
			
 
				 {
			
 
				-	char *endp;
			
 
				-
			
 
				-	*value = simple_strtoul(skip_spaces(buf), &endp, 0);
			
 
				-	endp = skip_spaces(endp);
			
 
				-	if (*endp || *value > max)
			
 
				-		return -EINVAL;
			
 
				+	int ret;
			
 
				 
			
 
				-	return 0;
			
 
				+	ret = kstrtoull(skip_spaces(buf), 0, value);
			
 
				+	if (!ret && *value > max)
			
 
				+		ret = -EINVAL;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
			
@@ -2431,11 +2433,13 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 
				 					  const char *buf, size_t count)
			
 
				 {
			
 
				 	unsigned long t;
			
 
				+	int ret;
			
 
				 
			
 
				-	if (parse_strtoul(buf, 0x40000000, &t))
			
 
				-		return -EINVAL;
			
 
				+	ret = kstrtoul(skip_spaces(buf), 0, &t);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 
			
 
				-	if (t && !is_power_of_2(t))
			
 
				+	if (t && (!is_power_of_2(t) || t > 0x40000000))
			
 
				 		return -EINVAL;
			
 
				 
			
 
				 	sbi->s_inode_readahead_blks = t;
			
@@ -2456,13 +2460,36 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
 
				 {
			
 
				 	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
			
 
				 	unsigned long t;
			
 
				+	int ret;
			
 
				 
			
 
				-	if (parse_strtoul(buf, 0xffffffff, &t))
			
 
				-		return -EINVAL;
			
 
				+	ret = kstrtoul(skip_spaces(buf), 0, &t);
			
 
				+	if (ret)
			
 
				+		return ret;
			
 
				 	*ui = t;
			
 
				 	return count;
			
 
				 }
			
 
				 
			
 
				+static ssize_t reserved_clusters_show(struct ext4_attr *a,
			
 
				+				  struct ext4_sb_info *sbi, char *buf)
			
 
				+{
			
 
				+	return snprintf(buf, PAGE_SIZE, "%llu\n",
			
 
				+		(unsigned long long) atomic64_read(&sbi->s_resv_clusters));
			
 
				+}
			
 
				+
			
 
				+static ssize_t reserved_clusters_store(struct ext4_attr *a,
			
 
				+				   struct ext4_sb_info *sbi,
			
 
				+				   const char *buf, size_t count)
			
 
				+{
			
 
				+	unsigned long long val;
			
 
				+	int ret;
			
 
				+
			
 
				+	if (parse_strtoull(buf, -1ULL, &val))
			
 
				+		return -EINVAL;
			
 
				+	ret = ext4_reserve_clusters(sbi, val);
			
 
				+
			
 
				+	return ret ? ret : count;
			
 
				+}
			
 
				+
			
 
				 static ssize_t trigger_test_error(struct ext4_attr *a,
			
 
				 				  struct ext4_sb_info *sbi,
			
 
				 				  const char *buf, size_t count)
			
@@ -2500,6 +2527,7 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 
				 EXT4_RO_ATTR(delayed_allocation_blocks);
			
 
				 EXT4_RO_ATTR(session_write_kbytes);
			
 
				 EXT4_RO_ATTR(lifetime_write_kbytes);
			
 
				+EXT4_RW_ATTR(reserved_clusters);
			
 
				 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
			
 
				 		 inode_readahead_blks_store, s_inode_readahead_blks);
			
 
				 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
			
@@ -2517,6 +2545,7 @@ static struct attribute *ext4_attrs[] = {
 
				 	ATTR_LIST(delayed_allocation_blocks),
			
 
				 	ATTR_LIST(session_write_kbytes),
			
 
				 	ATTR_LIST(lifetime_write_kbytes),
			
 
				+	ATTR_LIST(reserved_clusters),
			
 
				 	ATTR_LIST(inode_readahead_blks),
			
 
				 	ATTR_LIST(inode_goal),
			
 
				 	ATTR_LIST(mb_stats),
			
@@ -3192,6 +3221,40 @@ int ext4_calculate_overhead(struct super_block *sb)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+
			
 
				+static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
			
 
				+{
			
 
				+	ext4_fsblk_t resv_clusters;
			
 
				+
			
 
				+	/*
			
 
				+	 * By default we reserve 2% or 4096 clusters, whichever is smaller.
			
 
				+	 * This should cover the situations where we can not afford to run
			
 
				+	 * out of space like for example punch hole, or converting
			
 
				+	 * uninitialized extents in delalloc path. In most cases such
			
 
				+	 * allocation would require 1, or 2 blocks, higher numbers are
			
 
				+	 * very rare.
			
 
				+	 */
			
 
				+	resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
			
 
				+
			
 
				+	do_div(resv_clusters, 50);
			
 
				+	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
			
 
				+
			
 
				+	return resv_clusters;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
			
 
				+{
			
 
				+	ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
			
 
				+				sbi->s_cluster_bits;
			
 
				+
			
 
				+	if (count >= clusters)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	atomic64_set(&sbi->s_resv_clusters, count);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
			
 
				 {
			
 
				 	char *orig_data = kstrdup(data, GFP_KERNEL);
			
@@ -3526,6 +3589,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
			
 
				 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
			
 
				 
			
 
				+	/* Do we have standard group size of blocksize * 8 blocks ? */
			
 
				+	if (sbi->s_blocks_per_group == blocksize << 3)
			
 
				+		set_opt2(sb, STD_GROUP_SIZE);
			
 
				+
			
 
				 	for (i = 0; i < 4; i++)
			
 
				 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
			
 
				 	sbi->s_def_hash_version = es->s_def_hash_version;
			
@@ -3698,6 +3765,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 	sbi->s_err_report.function = print_daily_error_info;
			
 
				 	sbi->s_err_report.data = (unsigned long) sb;
			
 
				 
			
 
				+	/* Register extent status tree shrinker */
			
 
				+	ext4_es_register_shrinker(sb);
			
 
				+
			
 
				 	err = percpu_counter_init(&sbi->s_freeclusters_counter,
			
 
				 			ext4_count_free_clusters(sb));
			
 
				 	if (!err) {
			
@@ -3723,9 +3793,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 	sbi->s_max_writeback_mb_bump = 128;
			
 
				 	sbi->s_extent_max_zeroout_kb = 32;
			
 
				 
			
 
				-	/* Register extent status tree shrinker */
			
 
				-	ext4_es_register_shrinker(sb);
			
 
				-
			
 
				 	/*
			
 
				 	 * set up enough so that it can read an inode
			
 
				 	 */
			
@@ -3911,6 +3978,13 @@ no_journal:
 
				 			 "available");
			
 
				 	}
			
 
				 
			
 
				+	err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
			
 
				+	if (err) {
			
 
				+		ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
			
 
				+			 "reserved pool", ext4_calculate_resv_clusters(sbi));
			
 
				+		goto failed_mount4a;
			
 
				+	}
			
 
				+
			
 
				 	err = ext4_setup_system_zone(sb);
			
 
				 	if (err) {
			
 
				 		ext4_msg(sb, KERN_ERR, "failed to initialize system "
			
@@ -4010,6 +4084,7 @@ failed_mount_wq:
 
				 		sbi->s_journal = NULL;
			
 
				 	}
			
 
				 failed_mount3:
			
 
				+	ext4_es_unregister_shrinker(sb);
			
 
				 	del_timer(&sbi->s_err_report);
			
 
				 	if (sbi->s_flex_groups)
			
 
				 		ext4_kvfree(sbi->s_flex_groups);
			
@@ -4177,7 +4252,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 
				 		goto out_bdev;
			
 
				 	}
			
 
				 	journal->j_private = sb;
			
 
				-	ll_rw_block(READ, 1, &journal->j_sb_buffer);
			
 
				+	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
			
 
				 	wait_on_buffer(journal->j_sb_buffer);
			
 
				 	if (!buffer_uptodate(journal->j_sb_buffer)) {
			
 
				 		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
			
@@ -4742,9 +4817,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 	struct super_block *sb = dentry->d_sb;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	struct ext4_super_block *es = sbi->s_es;
			
 
				-	ext4_fsblk_t overhead = 0;
			
 
				+	ext4_fsblk_t overhead = 0, resv_blocks;
			
 
				 	u64 fsid;
			
 
				 	s64 bfree;
			
 
				+	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
			
 
				 
			
 
				 	if (!test_opt(sb, MINIX_DF))
			
 
				 		overhead = sbi->s_overhead;
			
@@ -4756,8 +4832,9 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
				 		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
			
 
				 	/* prevent underflow in case that few free space is available */
			
 
				 	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
			
 
				-	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
			
 
				-	if (buf->f_bfree < ext4_r_blocks_count(es))
			
 
				+	buf->f_bavail = buf->f_bfree -
			
 
				+			(ext4_r_blocks_count(es) + resv_blocks);
			
 
				+	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
			
 
				 		buf->f_bavail = 0;
			
 
				 	buf->f_files = le32_to_cpu(es->s_inodes_count);
			
 
				 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
			
@@ -4945,6 +5022,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 
				 		return PTR_ERR(qf_inode);
			
 
				 	}
			
 
				 
			
 
				+	/* Don't account quota for quota files to avoid recursion */
			
 
				+	qf_inode->i_flags |= S_NOQUOTA;
			
 
				 	err = dquot_enable(qf_inode, type, format_id, flags);
			
 
				 	iput(qf_inode);
			
 
				 
			
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -122,17 +122,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
 
				 				    struct ext4_xattr_header *hdr)
			
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
			
 
				-	__u32 csum, old;
			
 
				+	__u32 csum;
			
 
				+	__le32 save_csum;
			
 
				+	__le64 dsk_block_nr = cpu_to_le64(block_nr);
			
 
				 
			
 
				-	old = hdr->h_checksum;
			
 
				+	save_csum = hdr->h_checksum;
			
 
				 	hdr->h_checksum = 0;
			
 
				-	block_nr = cpu_to_le64(block_nr);
			
 
				-	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr,
			
 
				-			   sizeof(block_nr));
			
 
				+	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
			
 
				+			   sizeof(dsk_block_nr));
			
 
				 	csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
			
 
				 			   EXT4_BLOCK_SIZE(inode->i_sb));
			
 
				 
			
 
				-	hdr->h_checksum = old;
			
 
				+	hdr->h_checksum = save_csum;
			
 
				 	return cpu_to_le32(csum);
			
 
				 }
			
 
				 
			
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -22,6 +22,7 @@
 
				 #define	EXT4_XATTR_INDEX_LUSTRE			5
			
 
				 #define EXT4_XATTR_INDEX_SECURITY	        6
			
 
				 #define EXT4_XATTR_INDEX_SYSTEM			7
			
 
				+#define EXT4_XATTR_INDEX_RICHACL		8
			
 
				 
			
 
				 struct ext4_xattr_header {
			
 
				 	__le32	h_magic;	/* magic number for identification */
			
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -382,7 +382,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
				 	int space_left = 0;
			
 
				 	int first_tag = 0;
			
 
				 	int tag_flag;
			
 
				-	int i, to_free = 0;
			
 
				+	int i;
			
 
				 	int tag_bytes = journal_tag_bytes(journal);
			
 
				 	struct buffer_head *cbh = NULL; /* For transactional checksums */
			
 
				 	__u32 crc32_sum = ~0;
			
@@ -1134,7 +1134,7 @@ restart_loop:
 
				 	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
			
 
				 	spin_unlock(&journal->j_history_lock);
			
 
				 
			
 
				-	commit_transaction->t_state = T_FINISHED;
			
 
				+	commit_transaction->t_state = T_COMMIT_CALLBACK;
			
 
				 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
			
 
				 	journal->j_commit_sequence = commit_transaction->t_tid;
			
 
				 	journal->j_committing_transaction = NULL;
			
@@ -1149,38 +1149,44 @@ restart_loop:
 
				 				journal->j_average_commit_time*3) / 4;
			
 
				 	else
			
 
				 		journal->j_average_commit_time = commit_time;
			
 
				+
			
 
				 	write_unlock(&journal->j_state_lock);
			
 
				 
			
 
				-	if (commit_transaction->t_checkpoint_list == NULL &&
			
 
				-	    commit_transaction->t_checkpoint_io_list == NULL) {
			
 
				-		__jbd2_journal_drop_transaction(journal, commit_transaction);
			
 
				-		to_free = 1;
			
 
				+	if (journal->j_checkpoint_transactions == NULL) {
			
 
				+		journal->j_checkpoint_transactions = commit_transaction;
			
 
				+		commit_transaction->t_cpnext = commit_transaction;
			
 
				+		commit_transaction->t_cpprev = commit_transaction;
			
 
				 	} else {
			
 
				-		if (journal->j_checkpoint_transactions == NULL) {
			
 
				-			journal->j_checkpoint_transactions = commit_transaction;
			
 
				-			commit_transaction->t_cpnext = commit_transaction;
			
 
				-			commit_transaction->t_cpprev = commit_transaction;
			
 
				-		} else {
			
 
				-			commit_transaction->t_cpnext =
			
 
				-				journal->j_checkpoint_transactions;
			
 
				-			commit_transaction->t_cpprev =
			
 
				-				commit_transaction->t_cpnext->t_cpprev;
			
 
				-			commit_transaction->t_cpnext->t_cpprev =
			
 
				-				commit_transaction;
			
 
				-			commit_transaction->t_cpprev->t_cpnext =
			
 
				+		commit_transaction->t_cpnext =
			
 
				+			journal->j_checkpoint_transactions;
			
 
				+		commit_transaction->t_cpprev =
			
 
				+			commit_transaction->t_cpnext->t_cpprev;
			
 
				+		commit_transaction->t_cpnext->t_cpprev =
			
 
				+			commit_transaction;
			
 
				+		commit_transaction->t_cpprev->t_cpnext =
			
 
				 				commit_transaction;
			
 
				-		}
			
 
				 	}
			
 
				 	spin_unlock(&journal->j_list_lock);
			
 
				-
			
 
				+	/* Drop all spin_locks because commit_callback may be block.
			
 
				+	 * __journal_remove_checkpoint() can not destroy transaction
			
 
				+	 * under us because it is not marked as T_FINISHED yet */
			
 
				 	if (journal->j_commit_callback)
			
 
				 		journal->j_commit_callback(journal, commit_transaction);
			
 
				 
			
 
				 	trace_jbd2_end_commit(journal, commit_transaction);
			
 
				 	jbd_debug(1, "JBD2: commit %d complete, head %d\n",
			
 
				 		  journal->j_commit_sequence, journal->j_tail_sequence);
			
 
				-	if (to_free)
			
 
				-		jbd2_journal_free_transaction(commit_transaction);
			
 
				 
			
 
				+	write_lock(&journal->j_state_lock);
			
 
				+	spin_lock(&journal->j_list_lock);
			
 
				+	commit_transaction->t_state = T_FINISHED;
			
 
				+	/* Recheck checkpoint lists after j_list_lock was dropped */
			
 
				+	if (commit_transaction->t_checkpoint_list == NULL &&
			
 
				+	    commit_transaction->t_checkpoint_io_list == NULL) {
			
 
				+		__jbd2_journal_drop_transaction(journal, commit_transaction);
			
 
				+		jbd2_journal_free_transaction(commit_transaction);
			
 
				+	}
			
 
				+	spin_unlock(&journal->j_list_lock);
			
 
				+	write_unlock(&journal->j_state_lock);
			
 
				 	wake_up(&journal->j_wait_done_commit);
			
 
				 }
			
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -707,6 +707,37 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 
				 	return err;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * When this function returns the transaction corresponding to tid
			
 
				+ * will be completed.  If the transaction has currently running, start
			
 
				+ * committing that transaction before waiting for it to complete.  If
			
 
				+ * the transaction id is stale, it is by definition already completed,
			
 
				+ * so just return SUCCESS.
			
 
				+ */
			
 
				+int jbd2_complete_transaction(journal_t *journal, tid_t tid)
			
 
				+{
			
 
				+	int	need_to_wait = 1;
			
 
				+
			
 
				+	read_lock(&journal->j_state_lock);
			
 
				+	if (journal->j_running_transaction &&
			
 
				+	    journal->j_running_transaction->t_tid == tid) {
			
 
				+		if (journal->j_commit_request != tid) {
			
 
				+			/* transaction not yet started, so request it */
			
 
				+			read_unlock(&journal->j_state_lock);
			
 
				+			jbd2_log_start_commit(journal, tid);
			
 
				+			goto wait_commit;
			
 
				+		}
			
 
				+	} else if (!(journal->j_committing_transaction &&
			
 
				+		     journal->j_committing_transaction->t_tid == tid))
			
 
				+		need_to_wait = 0;
			
 
				+	read_unlock(&journal->j_state_lock);
			
 
				+	if (!need_to_wait)
			
 
				+		return 0;
			
 
				+wait_commit:
			
 
				+	return jbd2_log_wait_commit(journal, tid);
			
 
				+}
			
 
				+EXPORT_SYMBOL(jbd2_complete_transaction);
			
 
				+
			
 
				 /*
			
 
				  * Log buffer allocation routines:
			
 
				  */
			
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -332,7 +332,6 @@ static handle_t *new_handle(int nblocks)
 
				 	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
			
 
				 	if (!handle)
			
 
				 		return NULL;
			
 
				-	memset(handle, 0, sizeof(*handle));
			
 
				 	handle->h_buffer_credits = nblocks;
			
 
				 	handle->h_ref = 1;
			
 
				 
			
@@ -640,6 +639,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 
				 	int error;
			
 
				 	char *frozen_buffer = NULL;
			
 
				 	int need_copy = 0;
			
 
				+	unsigned long start_lock, time_lock;
			
 
				 
			
 
				 	if (is_handle_aborted(handle))
			
 
				 		return -EROFS;
			
@@ -655,9 +655,16 @@ repeat:
 
				 
			
 
				 	/* @@@ Need to check for errors here at some point. */
			
 
				 
			
 
				+ 	start_lock = jiffies;
			
 
				 	lock_buffer(bh);
			
 
				 	jbd_lock_bh_state(bh);
			
 
				 
			
 
				+	/* If it takes too long to lock the buffer, trace it */
			
 
				+	time_lock = jbd2_time_diff(start_lock, jiffies);
			
 
				+	if (time_lock > HZ/10)
			
 
				+		trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
			
 
				+			jiffies_to_msecs(time_lock));
			
 
				+
			
 
				 	/* We now hold the buffer lock so it is safe to query the buffer
			
 
				 	 * state.  Is the buffer dirty?
			
 
				 	 *
			
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -34,6 +34,8 @@ enum bh_state_bits {
 
				 	BH_Write_EIO,	/* I/O error on write */
			
 
				 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
			
 
				 	BH_Quiet,	/* Buffer Error Prinks to be quiet */
			
 
				+	BH_Meta,	/* Buffer contains metadata */
			
 
				+	BH_Prio,	/* Buffer should be submitted with REQ_PRIO */
			
 
				 
			
 
				 	BH_PrivateStart,/* not a state bit, but the first bit available
			
 
				 			 * for private allocation by other entities
			
@@ -124,6 +126,8 @@ BUFFER_FNS(Delay, delay)
 
				 BUFFER_FNS(Boundary, boundary)
			
 
				 BUFFER_FNS(Write_EIO, write_io_error)
			
 
				 BUFFER_FNS(Unwritten, unwritten)
			
 
				+BUFFER_FNS(Meta, meta)
			
 
				+BUFFER_FNS(Prio, prio)
			
 
				 
			
 
				 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
			
 
				 
			
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -480,6 +480,7 @@ struct transaction_s
 
				 		T_COMMIT,
			
 
				 		T_COMMIT_DFLUSH,
			
 
				 		T_COMMIT_JFLUSH,
			
 
				+		T_COMMIT_CALLBACK,
			
 
				 		T_FINISHED
			
 
				 	}			t_state;
			
 
				 
			
@@ -1144,7 +1145,7 @@ extern struct kmem_cache *jbd2_handle_cache;
 
				 
			
 
				 static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
			
 
				 {
			
 
				-	return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
			
 
				+	return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags);
			
 
				 }
			
 
				 
			
 
				 static inline void jbd2_free_handle(handle_t *handle)
			
@@ -1200,6 +1201,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
 
				 int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
			
 
				 int jbd2_journal_force_commit_nested(journal_t *journal);
			
 
				 int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
			
 
				+int jbd2_complete_transaction(journal_t *journal, tid_t tid);
			
 
				 int jbd2_log_do_checkpoint(journal_t *journal);
			
 
				 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
			
 
				 
			
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -31,21 +31,14 @@ struct journal_head {
 
				 	/*
			
 
				 	 * Journalling list for this buffer [jbd_lock_bh_state()]
			
 
				 	 */
			
 
				-	unsigned b_jlist;
			
 
				+	unsigned b_jlist:4;
			
 
				 
			
 
				 	/*
			
 
				 	 * This flag signals the buffer has been modified by
			
 
				 	 * the currently running transaction
			
 
				 	 * [jbd_lock_bh_state()]
			
 
				 	 */
			
 
				-	unsigned b_modified;
			
 
				-
			
 
				-	/*
			
 
				-	 * This feild tracks the last transaction id in which this buffer
			
 
				-	 * has been cowed
			
 
				-	 * [jbd_lock_bh_state()]
			
 
				-	 */
			
 
				-	tid_t b_cow_tid;
			
 
				+	unsigned b_modified:1;
			
 
				 
			
 
				 	/*
			
 
				 	 * Copy of the buffer data frozen for writing to the log.
			
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -257,15 +257,7 @@ DECLARE_EVENT_CLASS(ext4__write_end,
 
				 		  __entry->pos, __entry->len, __entry->copied)
			
 
				 );
			
 
				 
			
 
				-DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
			
 
				-
			
 
				-	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
			
 
				-		 unsigned int copied),
			
 
				-
			
 
				-	TP_ARGS(inode, pos, len, copied)
			
 
				-);
			
 
				-
			
 
				-DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end,
			
 
				+DEFINE_EVENT(ext4__write_end, ext4_write_end,
			
 
				 
			
 
				 	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
			
 
				 		 unsigned int copied),
			
@@ -1956,7 +1948,7 @@ TRACE_EVENT(ext4_remove_blocks,
 
				 		__entry->to		= to;
			
 
				 		__entry->partial	= partial_cluster;
			
 
				 		__entry->ee_pblk	= ext4_ext_pblock(ex);
			
 
				-		__entry->ee_lblk	= cpu_to_le32(ex->ee_block);
			
 
				+		__entry->ee_lblk	= le32_to_cpu(ex->ee_block);
			
 
				 		__entry->ee_len		= ext4_ext_get_actual_len(ex);
			
 
				 	),
			
 
				 
			
@@ -2060,7 +2052,7 @@ TRACE_EVENT(ext4_ext_remove_space,
 
				 
			
 
				 TRACE_EVENT(ext4_ext_remove_space_done,
			
 
				 	TP_PROTO(struct inode *inode, ext4_lblk_t start, int depth,
			
 
				-		ext4_lblk_t partial, unsigned short eh_entries),
			
 
				+		ext4_lblk_t partial, __le16 eh_entries),
			
 
				 
			
 
				 	TP_ARGS(inode, start, depth, partial, eh_entries),
			
 
				 
			
@@ -2079,7 +2071,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
 
				 		__entry->start		= start;
			
 
				 		__entry->depth		= depth;
			
 
				 		__entry->partial	= partial;
			
 
				-		__entry->eh_entries	= eh_entries;
			
 
				+		__entry->eh_entries	= le16_to_cpu(eh_entries);
			
 
				 	),
			
 
				 
			
 
				 	TP_printk("dev %d,%d ino %lu since %u depth %d partial %u "
			
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -358,6 +358,27 @@ TRACE_EVENT(jbd2_write_superblock,
 
				 		  MINOR(__entry->dev), __entry->write_op)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(jbd2_lock_buffer_stall,
			
 
				+
			
 
				+	TP_PROTO(dev_t dev, unsigned long stall_ms),
			
 
				+
			
 
				+	TP_ARGS(dev, stall_ms),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(        dev_t, dev	)
			
 
				+		__field(unsigned long, stall_ms	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev		= dev;
			
 
				+		__entry->stall_ms	= stall_ms;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("dev %d,%d stall_ms %lu",
			
 
				+		MAJOR(__entry->dev), MINOR(__entry->dev),
			
 
				+		__entry->stall_ms)
			
 
				+);
			
 
				+
			
 
				 #endif /* _TRACE_JBD2_H */
			
 
				 
			
 
				 /* This part must be outside protection */