13 years ago · 69e1aaddd6
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -144,9 +144,6 @@ journal_async_commit	Commit block can be written to disk without waiting
 
				 			mount the device. This will enable 'journal_checksum'
			
 
				 			internally.
			
 
				 
			
 
				-journal=update		Update the ext4 file system's journal to the current
			
 
				-			format.
			
 
				-
			
 
				 journal_dev=devnum	When the external journal device's major/minor numbers
			
 
				 			have changed, this option allows the user to specify
			
 
				 			the new journal location.  The journal device is
			
@@ -356,11 +353,6 @@ nouid32			Disables 32-bit UIDs and GIDs.  This is for
 
				 			interoperability  with  older kernels which only
			
 
				 			store and expect 16-bit values.
			
 
				 
			
 
				-resize			Allows to resize filesystem to the end of the last
			
 
				-			existing block group, further resize has to be done
			
 
				-			with resize2fs either online, or offline. It can be
			
 
				-			used only with conjunction with remount.
			
 
				-
			
 
				 block_validity		This options allows to enables/disables the in-kernel
			
 
				 noblock_validity	facility for tracking filesystem metadata blocks
			
 
				 			within internal data structures. This allows multi-
			
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -336,10 +336,10 @@ err_out:
 
				  * Return buffer_head on success or NULL in case of failure.
			
 
				  */
			
 
				 struct buffer_head *
			
 
				-ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
			
 
				+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
			
 
				 {
			
 
				 	struct ext4_group_desc *desc;
			
 
				-	struct buffer_head *bh = NULL;
			
 
				+	struct buffer_head *bh;
			
 
				 	ext4_fsblk_t bitmap_blk;
			
 
				 
			
 
				 	desc = ext4_get_group_desc(sb, block_group, NULL);
			
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 	bitmap_blk = ext4_block_bitmap(sb, desc);
			
 
				 	bh = sb_getblk(sb, bitmap_blk);
			
 
				 	if (unlikely(!bh)) {
			
 
				-		ext4_error(sb, "Cannot read block bitmap - "
			
 
				-			    "block_group = %u, block_bitmap = %llu",
			
 
				-			    block_group, bitmap_blk);
			
 
				+		ext4_error(sb, "Cannot get buffer for block bitmap - "
			
 
				+			   "block_group = %u, block_bitmap = %llu",
			
 
				+			   block_group, bitmap_blk);
			
 
				 		return NULL;
			
 
				 	}
			
 
				 
			
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 		return bh;
			
 
				 	}
			
 
				 	/*
			
 
				-	 * submit the buffer_head for read. We can
			
 
				-	 * safely mark the bitmap as uptodate now.
			
 
				-	 * We do it here so the bitmap uptodate bit
			
 
				-	 * get set with buffer lock held.
			
 
				+	 * submit the buffer_head for reading
			
 
				 	 */
			
 
				+	set_buffer_new(bh);
			
 
				 	trace_ext4_read_block_bitmap_load(sb, block_group);
			
 
				-	set_bitmap_uptodate(bh);
			
 
				-	if (bh_submit_read(bh) < 0) {
			
 
				-		put_bh(bh);
			
 
				+	bh->b_end_io = ext4_end_bitmap_read;
			
 
				+	get_bh(bh);
			
 
				+	submit_bh(READ, bh);
			
 
				+	return bh;
			
 
				+}
			
 
				+
			
 
				+/* Returns 0 on success, 1 on error */
			
 
				+int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
			
 
				+			   struct buffer_head *bh)
			
 
				+{
			
 
				+	struct ext4_group_desc *desc;
			
 
				+
			
 
				+	if (!buffer_new(bh))
			
 
				+		return 0;
			
 
				+	desc = ext4_get_group_desc(sb, block_group, NULL);
			
 
				+	if (!desc)
			
 
				+		return 1;
			
 
				+	wait_on_buffer(bh);
			
 
				+	if (!buffer_uptodate(bh)) {
			
 
				 		ext4_error(sb, "Cannot read block bitmap - "
			
 
				-			    "block_group = %u, block_bitmap = %llu",
			
 
				-			    block_group, bitmap_blk);
			
 
				-		return NULL;
			
 
				+			   "block_group = %u, block_bitmap = %llu",
			
 
				+			   block_group, (unsigned long long) bh->b_blocknr);
			
 
				+		return 1;
			
 
				 	}
			
 
				+	clear_buffer_new(bh);
			
 
				+	/* Panic or remount fs read-only if block bitmap is invalid */
			
 
				 	ext4_valid_block_bitmap(sb, desc, block_group, bh);
			
 
				-	/*
			
 
				-	 * file system mounted not to panic on error,
			
 
				-	 * continue with corrupt bitmap
			
 
				-	 */
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct buffer_head *
			
 
				+ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
			
 
				+{
			
 
				+	struct buffer_head *bh;
			
 
				+
			
 
				+	bh = ext4_read_block_bitmap_nowait(sb, block_group);
			
 
				+	if (ext4_wait_block_bitmap(sb, block_group, bh)) {
			
 
				+		put_bh(bh);
			
 
				+		return NULL;
			
 
				+	}
			
 
				 	return bh;
			
 
				 }
			
 
				 
			
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -91,17 +91,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
 
				 		return 0;
			
 
				 
			
 
				 	if (filp)
			
 
				-		ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
			
 
				+		ext4_error_file(filp, function, line, bh->b_blocknr,
			
 
				 				"bad entry in directory: %s - offset=%u(%u), "
			
 
				 				"inode=%u, rec_len=%d, name_len=%d",
			
 
				-				error_msg, (unsigned) (offset%bh->b_size),
			
 
				+				error_msg, (unsigned) (offset % bh->b_size),
			
 
				 				offset, le32_to_cpu(de->inode),
			
 
				 				rlen, de->name_len);
			
 
				 	else
			
 
				-		ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
			
 
				+		ext4_error_inode(dir, function, line, bh->b_blocknr,
			
 
				 				"bad entry in directory: %s - offset=%u(%u), "
			
 
				 				"inode=%u, rec_len=%d, name_len=%d",
			
 
				-				error_msg, (unsigned) (offset%bh->b_size),
			
 
				+				error_msg, (unsigned) (offset % bh->b_size),
			
 
				 				offset, le32_to_cpu(de->inode),
			
 
				 				rlen, de->name_len);
			
 
				 
			
@@ -425,8 +425,9 @@ static int call_filldir(struct file *filp, void *dirent,
 
				 	sb = inode->i_sb;
			
 
				 
			
 
				 	if (!fname) {
			
 
				-		printk(KERN_ERR "EXT4-fs: call_filldir: called with "
			
 
				-		       "null fname?!?\n");
			
 
				+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
			
 
				+			 "called with null fname?!?", __func__, __LINE__,
			
 
				+			 inode->i_ino, current->comm);
			
 
				 		return 0;
			
 
				 	}
			
 
				 	curr_pos = hash2pos(fname->hash, fname->minor_hash);
			
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,7 +53,7 @@
 
				 		printk(KERN_DEBUG f, ## a);				\
			
 
				 	} while (0)
			
 
				 #else
			
 
				-#define ext4_debug(f, a...)	do {} while (0)
			
 
				+#define ext4_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
			
 
				 #endif
			
 
				 
			
 
				 #define EXT4_ERROR_INODE(inode, fmt, a...) \
			
@@ -184,6 +184,8 @@ struct mpage_da_data {
 
				 #define	EXT4_IO_END_UNWRITTEN	0x0001
			
 
				 #define EXT4_IO_END_ERROR	0x0002
			
 
				 #define EXT4_IO_END_QUEUED	0x0004
			
 
				+#define EXT4_IO_END_DIRECT	0x0008
			
 
				+#define EXT4_IO_END_IN_FSYNC	0x0010
			
 
				 
			
 
				 struct ext4_io_page {
			
 
				 	struct page	*p_page;
			
@@ -192,18 +194,25 @@ struct ext4_io_page {
 
				 
			
 
				 #define MAX_IO_PAGES 128
			
 
				 
			
 
				+/*
			
 
				+ * For converting uninitialized extents on a work queue.
			
 
				+ *
			
 
				+ * 'page' is only used from the writepage() path; 'pages' is only used for
			
 
				+ * buffered writes; they are used to keep page references until conversion
			
 
				+ * takes place.  For AIO/DIO, neither field is filled in.
			
 
				+ */
			
 
				 typedef struct ext4_io_end {
			
 
				 	struct list_head	list;		/* per-file finished IO list */
			
 
				 	struct inode		*inode;		/* file being written to */
			
 
				 	unsigned int		flag;		/* unwritten or not */
			
 
				-	struct page		*page;		/* page struct for buffer write */
			
 
				+	struct page		*page;		/* for writepage() path */
			
 
				 	loff_t			offset;		/* offset in the file */
			
 
				 	ssize_t			size;		/* size of the extent */
			
 
				 	struct work_struct	work;		/* data work queue */
			
 
				 	struct kiocb		*iocb;		/* iocb struct for AIO */
			
 
				 	int			result;		/* error value for AIO */
			
 
				-	int			num_io_pages;
			
 
				-	struct ext4_io_page	*pages[MAX_IO_PAGES];
			
 
				+	int			num_io_pages;   /* for writepages() */
			
 
				+	struct ext4_io_page	*pages[MAX_IO_PAGES]; /* for writepages() */
			
 
				 } ext4_io_end_t;
			
 
				 
			
 
				 struct ext4_io_submit {
			
@@ -923,6 +932,7 @@ struct ext4_inode_info {
 
				 #define EXT4_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
			
 
				 #define EXT4_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
			
 
				 #define EXT4_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
			
 
				+#define EXT4_MOUNT_ERRORS_MASK		0x00070
			
 
				 #define EXT4_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
			
 
				 #define EXT4_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
			
 
				 #define EXT4_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
			
@@ -941,7 +951,6 @@ struct ext4_inode_info {
 
				 #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
			
 
				 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
			
 
				 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
			
 
				-#define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
			
 
				 #define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
			
 
				 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
			
 
				 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
			
@@ -1142,6 +1151,7 @@ struct ext4_sb_info {
 
				 	unsigned int s_mount_opt;
			
 
				 	unsigned int s_mount_opt2;
			
 
				 	unsigned int s_mount_flags;
			
 
				+	unsigned int s_def_mount_opt;
			
 
				 	ext4_fsblk_t s_sb_block;
			
 
				 	uid_t s_resuid;
			
 
				 	gid_t s_resgid;
			
@@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 
				 #define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
			
 
				 #define EXT4_FEATURE_INCOMPAT_EA_INODE		0x0400 /* EA in inode */
			
 
				 #define EXT4_FEATURE_INCOMPAT_DIRDATA		0x1000 /* data in dirent */
			
 
				-#define EXT4_FEATURE_INCOMPAT_INLINEDATA	0x2000 /* data in inode */
			
 
				+#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM	0x2000 /* use crc32c for bg */
			
 
				 #define EXT4_FEATURE_INCOMPAT_LARGEDIR		0x4000 /* >2GB or 3-lvl htree */
			
 
				+#define EXT4_FEATURE_INCOMPAT_INLINEDATA	0x8000 /* data in inode */
			
 
				 
			
 
				 #define EXT2_FEATURE_COMPAT_SUPP	EXT4_FEATURE_COMPAT_EXT_ATTR
			
 
				 #define EXT2_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
			
@@ -1794,8 +1805,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 
				 						    ext4_group_t block_group,
			
 
				 						    struct buffer_head ** bh);
			
 
				 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
			
 
				-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
			
 
				-				      ext4_group_t block_group);
			
 
				+
			
 
				+extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
			
 
				+						ext4_group_t block_group);
			
 
				+extern int ext4_wait_block_bitmap(struct super_block *sb,
			
 
				+				  ext4_group_t block_group,
			
 
				+				  struct buffer_head *bh);
			
 
				+extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
			
 
				+						  ext4_group_t block_group);
			
 
				 extern void ext4_init_block_bitmap(struct super_block *sb,
			
 
				 				   struct buffer_head *bh,
			
 
				 				   ext4_group_t group,
			
@@ -1841,6 +1858,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
 
				 extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
			
 
				 extern int ext4_init_inode_table(struct super_block *sb,
			
 
				 				 ext4_group_t group, int barrier);
			
 
				+extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
			
 
				 
			
 
				 /* mballoc.c */
			
 
				 extern long ext4_mb_stats;
			
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -47,9 +47,9 @@
 
				  */
			
 
				 #define EXT_DEBUG__
			
 
				 #ifdef EXT_DEBUG
			
 
				-#define ext_debug(a...)		printk(a)
			
 
				+#define ext_debug(fmt, ...)	printk(fmt, ##__VA_ARGS__)
			
 
				 #else
			
 
				-#define ext_debug(a...)
			
 
				+#define ext_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -104,6 +104,78 @@
 
				 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
			
 
				 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
			
 
				 
			
 
				+/**
			
 
				+ *   struct ext4_journal_cb_entry - Base structure for callback information.
			
 
				+ *
			
 
				+ *   This struct is a 'seed' structure for a using with your own callback
			
 
				+ *   structs. If you are using callbacks you must allocate one of these
			
 
				+ *   or another struct of your own definition which has this struct
			
 
				+ *   as it's first element and pass it to ext4_journal_callback_add().
			
 
				+ */
			
 
				+struct ext4_journal_cb_entry {
			
 
				+	/* list information for other callbacks attached to the same handle */
			
 
				+	struct list_head jce_list;
			
 
				+
			
 
				+	/*  Function to call with this callback structure */
			
 
				+	void (*jce_func)(struct super_block *sb,
			
 
				+			 struct ext4_journal_cb_entry *jce, int error);
			
 
				+
			
 
				+	/* user data goes here */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * ext4_journal_callback_add: add a function to call after transaction commit
			
 
				+ * @handle: active journal transaction handle to register callback on
			
 
				+ * @func: callback function to call after the transaction has committed:
			
 
				+ *        @sb: superblock of current filesystem for transaction
			
 
				+ *        @jce: returned journal callback data
			
 
				+ *        @rc: journal state at commit (0 = transaction committed properly)
			
 
				+ * @jce: journal callback data (internal and function private data struct)
			
 
				+ *
			
 
				+ * The registered function will be called in the context of the journal thread
			
 
				+ * after the transaction for which the handle was created has completed.
			
 
				+ *
			
 
				+ * No locks are held when the callback function is called, so it is safe to
			
 
				+ * call blocking functions from within the callback, but the callback should
			
 
				+ * not block or run for too long, or the filesystem will be blocked waiting for
			
 
				+ * the next transaction to commit. No journaling functions can be used, or
			
 
				+ * there is a risk of deadlock.
			
 
				+ *
			
 
				+ * There is no guaranteed calling order of multiple registered callbacks on
			
 
				+ * the same transaction.
			
 
				+ */
			
 
				+static inline void ext4_journal_callback_add(handle_t *handle,
			
 
				+			void (*func)(struct super_block *sb,
			
 
				+				     struct ext4_journal_cb_entry *jce,
			
 
				+				     int rc),
			
 
				+			struct ext4_journal_cb_entry *jce)
			
 
				+{
			
 
				+	struct ext4_sb_info *sbi =
			
 
				+			EXT4_SB(handle->h_transaction->t_journal->j_private);
			
 
				+
			
 
				+	/* Add the jce to transaction's private list */
			
 
				+	jce->jce_func = func;
			
 
				+	spin_lock(&sbi->s_md_lock);
			
 
				+	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
			
 
				+	spin_unlock(&sbi->s_md_lock);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ext4_journal_callback_del: delete a registered callback
			
 
				+ * @handle: active journal transaction handle on which callback was registered
			
 
				+ * @jce: registered journal callback entry to unregister
			
 
				+ */
			
 
				+static inline void ext4_journal_callback_del(handle_t *handle,
			
 
				+					     struct ext4_journal_cb_entry *jce)
			
 
				+{
			
 
				+	struct ext4_sb_info *sbi =
			
 
				+			EXT4_SB(handle->h_transaction->t_journal->j_private);
			
 
				+
			
 
				+	spin_lock(&sbi->s_md_lock);
			
 
				+	list_del_init(&jce->jce_list);
			
 
				+	spin_unlock(&sbi->s_md_lock);
			
 
				+}
			
 
				+
			
 
				 int
			
 
				 ext4_mark_iloc_dirty(handle_t *handle,
			
 
				 		     struct inode *inode,
			
@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
 
				 /* super.c */
			
 
				 int ext4_force_commit(struct super_block *sb);
			
 
				 
			
 
				-static inline int ext4_should_journal_data(struct inode *inode)
			
 
				+/*
			
 
				+ * Ext4 inode journal modes
			
 
				+ */
			
 
				+#define EXT4_INODE_JOURNAL_DATA_MODE	0x01 /* journal data mode */
			
 
				+#define EXT4_INODE_ORDERED_DATA_MODE	0x02 /* ordered data mode */
			
 
				+#define EXT4_INODE_WRITEBACK_DATA_MODE	0x04 /* writeback data mode */
			
 
				+
			
 
				+static inline int ext4_inode_journal_mode(struct inode *inode)
			
 
				 {
			
 
				 	if (EXT4_JOURNAL(inode) == NULL)
			
 
				-		return 0;
			
 
				-	if (!S_ISREG(inode->i_mode))
			
 
				-		return 1;
			
 
				-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
			
 
				-		return 1;
			
 
				-	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
			
 
				-		return 1;
			
 
				-	return 0;
			
 
				+		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */
			
 
				+	/* We do not support data journalling with delayed allocation */
			
 
				+	if (!S_ISREG(inode->i_mode) ||
			
 
				+	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
			
 
				+		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */
			
 
				+	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
			
 
				+	    !test_opt(inode->i_sb, DELALLOC))
			
 
				+		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */
			
 
				+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
			
 
				+		return EXT4_INODE_ORDERED_DATA_MODE;	/* ordered */
			
 
				+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
			
 
				+		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */
			
 
				+	else
			
 
				+		BUG();
			
 
				+}
			
 
				+
			
 
				+static inline int ext4_should_journal_data(struct inode *inode)
			
 
				+{
			
 
				+	return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
			
 
				 }
			
 
				 
			
 
				 static inline int ext4_should_order_data(struct inode *inode)
			
 
				 {
			
 
				-	if (EXT4_JOURNAL(inode) == NULL)
			
 
				-		return 0;
			
 
				-	if (!S_ISREG(inode->i_mode))
			
 
				-		return 0;
			
 
				-	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
			
 
				-		return 0;
			
 
				-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
			
 
				-		return 1;
			
 
				-	return 0;
			
 
				+	return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
			
 
				 }
			
 
				 
			
 
				 static inline int ext4_should_writeback_data(struct inode *inode)
			
 
				 {
			
 
				-	if (EXT4_JOURNAL(inode) == NULL)
			
 
				-		return 1;
			
 
				-	if (!S_ISREG(inode->i_mode))
			
 
				-		return 0;
			
 
				-	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
			
 
				-		return 0;
			
 
				-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
			
 
				-		return 1;
			
 
				-	return 0;
			
 
				+	return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,14 @@
 
				 
			
 
				 #include <trace/events/ext4.h>
			
 
				 
			
 
				+/*
			
 
				+ * used by extent splitting.
			
 
				+ */
			
 
				+#define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \
			
 
				+					due to ENOSPC */
			
 
				+#define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
			
 
				+#define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
			
 
				+
			
 
				 static int ext4_split_extent(handle_t *handle,
			
 
				 				struct inode *inode,
			
 
				 				struct ext4_ext_path *path,
			
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle,
 
				 				int split_flag,
			
 
				 				int flags);
			
 
				 
			
 
				+static int ext4_split_extent_at(handle_t *handle,
			
 
				+			     struct inode *inode,
			
 
				+			     struct ext4_ext_path *path,
			
 
				+			     ext4_lblk_t split,
			
 
				+			     int split_flag,
			
 
				+			     int flags);
			
 
				+
			
 
				 static int ext4_ext_truncate_extend_restart(handle_t *handle,
			
 
				 					    struct inode *inode,
			
 
				 					    int needed)
			
@@ -300,6 +315,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 
				 	ext4_fsblk_t block = ext4_ext_pblock(ext);
			
 
				 	int len = ext4_ext_get_actual_len(ext);
			
 
				 
			
 
				+	if (len == 0)
			
 
				+		return 0;
			
 
				 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
			
 
				 }
			
 
				 
			
@@ -2308,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
				 	struct ext4_extent *ex;
			
 
				 
			
 
				 	/* the header must be checked already in ext4_ext_remove_space() */
			
 
				-	ext_debug("truncate since %u in leaf\n", start);
			
 
				+	ext_debug("truncate since %u in leaf to %u\n", start, end);
			
 
				 	if (!path[depth].p_hdr)
			
 
				 		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
			
 
				 	eh = path[depth].p_hdr;
			
@@ -2343,14 +2360,17 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
				 		ext_debug("  border %u:%u\n", a, b);
			
 
				 
			
 
				 		/* If this extent is beyond the end of the hole, skip it */
			
 
				-		if (end <= ex_ee_block) {
			
 
				+		if (end < ex_ee_block) {
			
 
				 			ex--;
			
 
				 			ex_ee_block = le32_to_cpu(ex->ee_block);
			
 
				 			ex_ee_len = ext4_ext_get_actual_len(ex);
			
 
				 			continue;
			
 
				 		} else if (b != ex_ee_block + ex_ee_len - 1) {
			
 
				-			EXT4_ERROR_INODE(inode,"  bad truncate %u:%u\n",
			
 
				-					 start, end);
			
 
				+			EXT4_ERROR_INODE(inode,
			
 
				+					 "can not handle truncate %u:%u "
			
 
				+					 "on extent %u:%u",
			
 
				+					 start, end, ex_ee_block,
			
 
				+					 ex_ee_block + ex_ee_len - 1);
			
 
				 			err = -EIO;
			
 
				 			goto out;
			
 
				 		} else if (a != ex_ee_block) {
			
@@ -2482,7 +2502,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
			
 
				+static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			
 
				+				 ext4_lblk_t end)
			
 
				 {
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				 	int depth = ext_depth(inode);
			
@@ -2491,7 +2512,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 
				 	handle_t *handle;
			
 
				 	int i, err;
			
 
				 
			
 
				-	ext_debug("truncate since %u\n", start);
			
 
				+	ext_debug("truncate since %u to %u\n", start, end);
			
 
				 
			
 
				 	/* probably first extent we're gonna free will be last in block */
			
 
				 	handle = ext4_journal_start(inode, depth + 1);
			
@@ -2503,6 +2524,61 @@ again:
 
				 
			
 
				 	trace_ext4_ext_remove_space(inode, start, depth);
			
 
				 
			
 
				+	/*
			
 
				+	 * Check if we are removing extents inside the extent tree. If that
			
 
				+	 * is the case, we are going to punch a hole inside the extent tree
			
 
				+	 * so we have to check whether we need to split the extent covering
			
 
				+	 * the last block to remove so we can easily remove the part of it
			
 
				+	 * in ext4_ext_rm_leaf().
			
 
				+	 */
			
 
				+	if (end < EXT_MAX_BLOCKS - 1) {
			
 
				+		struct ext4_extent *ex;
			
 
				+		ext4_lblk_t ee_block;
			
 
				+
			
 
				+		/* find extent for this block */
			
 
				+		path = ext4_ext_find_extent(inode, end, NULL);
			
 
				+		if (IS_ERR(path)) {
			
 
				+			ext4_journal_stop(handle);
			
 
				+			return PTR_ERR(path);
			
 
				+		}
			
 
				+		depth = ext_depth(inode);
			
 
				+		ex = path[depth].p_ext;
			
 
				+		if (!ex)
			
 
				+			goto cont;
			
 
				+
			
 
				+		ee_block = le32_to_cpu(ex->ee_block);
			
 
				+
			
 
				+		/*
			
 
				+		 * See if the last block is inside the extent, if so split
			
 
				+		 * the extent at 'end' block so we can easily remove the
			
 
				+		 * tail of the first part of the split extent in
			
 
				+		 * ext4_ext_rm_leaf().
			
 
				+		 */
			
 
				+		if (end >= ee_block &&
			
 
				+		    end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
			
 
				+			int split_flag = 0;
			
 
				+
			
 
				+			if (ext4_ext_is_uninitialized(ex))
			
 
				+				split_flag = EXT4_EXT_MARK_UNINIT1 |
			
 
				+					     EXT4_EXT_MARK_UNINIT2;
			
 
				+
			
 
				+			/*
			
 
				+			 * Split the extent in two so that 'end' is the last
			
 
				+			 * block in the first new extent
			
 
				+			 */
			
 
				+			err = ext4_split_extent_at(handle, inode, path,
			
 
				+						end + 1, split_flag,
			
 
				+						EXT4_GET_BLOCKS_PRE_IO |
			
 
				+						EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
			
 
				+
			
 
				+			if (err < 0)
			
 
				+				goto out;
			
 
				+		}
			
 
				+		ext4_ext_drop_refs(path);
			
 
				+		kfree(path);
			
 
				+	}
			
 
				+cont:
			
 
				+
			
 
				 	/*
			
 
				 	 * We start scanning from right side, freeing all the blocks
			
 
				 	 * after i_size and walking into the tree depth-wise.
			
@@ -2515,6 +2591,7 @@ again:
 
				 	}
			
 
				 	path[0].p_depth = depth;
			
 
				 	path[0].p_hdr = ext_inode_hdr(inode);
			
 
				+
			
 
				 	if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
			
 
				 		err = -EIO;
			
 
				 		goto out;
			
@@ -2526,7 +2603,7 @@ again:
 
				 			/* this is leaf block */
			
 
				 			err = ext4_ext_rm_leaf(handle, inode, path,
			
 
				 					       &partial_cluster, start,
			
 
				-					       EXT_MAX_BLOCKS - 1);
			
 
				+					       end);
			
 
				 			/* root level has p_bh == NULL, brelse() eats this */
			
 
				 			brelse(path[i].p_bh);
			
 
				 			path[i].p_bh = NULL;
			
@@ -2651,17 +2728,17 @@ void ext4_ext_init(struct super_block *sb)
 
				 
			
 
				 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
			
 
				 #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
			
 
				-		printk(KERN_INFO "EXT4-fs: file extents enabled");
			
 
				+		printk(KERN_INFO "EXT4-fs: file extents enabled"
			
 
				 #ifdef AGGRESSIVE_TEST
			
 
				-		printk(", aggressive tests");
			
 
				+		       ", aggressive tests"
			
 
				 #endif
			
 
				 #ifdef CHECK_BINSEARCH
			
 
				-		printk(", check binsearch");
			
 
				+		       ", check binsearch"
			
 
				 #endif
			
 
				 #ifdef EXTENTS_STATS
			
 
				-		printk(", stats");
			
 
				+		       ", stats"
			
 
				 #endif
			
 
				-		printk("\n");
			
 
				+		       "\n");
			
 
				 #endif
			
 
				 #ifdef EXTENTS_STATS
			
 
				 		spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
			
@@ -2708,14 +2785,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * used by extent splitting.
			
 
				- */
			
 
				-#define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \
			
 
				-					due to ENOSPC */
			
 
				-#define EXT4_EXT_MARK_UNINIT1	0x2  /* mark first half uninitialized */
			
 
				-#define EXT4_EXT_MARK_UNINIT2	0x4  /* mark second half uninitialized */
			
 
				-
			
 
				 /*
			
 
				  * ext4_split_extent_at() splits an extent at given block.
			
 
				  *
			
@@ -3224,11 +3293,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 
				 	depth = ext_depth(inode);
			
 
				 	eh = path[depth].p_hdr;
			
 
				 
			
 
				-	if (unlikely(!eh->eh_entries)) {
			
 
				-		EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
			
 
				-				 "EOFBLOCKS_FL set");
			
 
				-		return -EIO;
			
 
				-	}
			
 
				+	/*
			
 
				+	 * We're going to remove EOFBLOCKS_FL entirely in future so we
			
 
				+	 * do not care for this case anymore. Simply remove the flag
			
 
				+	 * if there are no extents.
			
 
				+	 */
			
 
				+	if (unlikely(!eh->eh_entries))
			
 
				+		goto out;
			
 
				 	last_ex = EXT_LAST_EXTENT(eh);
			
 
				 	/*
			
 
				 	 * We should clear the EOFBLOCKS_FL flag if we are writing the
			
@@ -3252,6 +3323,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 
				 	for (i = depth-1; i >= 0; i--)
			
 
				 		if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
			
 
				 			return 0;
			
 
				+out:
			
 
				 	ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
			
 
				 	return ext4_mark_inode_dirty(handle, inode);
			
 
				 }
			
@@ -3710,8 +3782,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
				 	int free_on_err = 0, err = 0, depth, ret;
			
 
				 	unsigned int allocated = 0, offset = 0;
			
 
				 	unsigned int allocated_clusters = 0;
			
 
				-	unsigned int punched_out = 0;
			
 
				-	unsigned int result = 0;
			
 
				 	struct ext4_allocation_request ar;
			
 
				 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
			
 
				 	ext4_lblk_t cluster_offset;
			
@@ -3721,8 +3791,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
				 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
			
 
				 
			
 
				 	/* check in cache */
			
 
				-	if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
			
 
				-		ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
			
 
				+	if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
			
 
				 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
			
 
				 			if ((sbi->s_cluster_ratio > 1) &&
			
 
				 			    ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
			
@@ -3790,113 +3859,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
				 
			
 
				 		/* if found extent covers block, simply return it */
			
 
				 		if (in_range(map->m_lblk, ee_block, ee_len)) {
			
 
				-			struct ext4_map_blocks punch_map;
			
 
				-			ext4_fsblk_t partial_cluster = 0;
			
 
				-
			
 
				 			newblock = map->m_lblk - ee_block + ee_start;
			
 
				 			/* number of remaining blocks in the extent */
			
 
				 			allocated = ee_len - (map->m_lblk - ee_block);
			
 
				 			ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
			
 
				 				  ee_block, ee_len, newblock);
			
 
				 
			
 
				-			if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
			
 
				-				/*
			
 
				-				 * Do not put uninitialized extent
			
 
				-				 * in the cache
			
 
				-				 */
			
 
				-				if (!ext4_ext_is_uninitialized(ex)) {
			
 
				-					ext4_ext_put_in_cache(inode, ee_block,
			
 
				-						ee_len, ee_start);
			
 
				-					goto out;
			
 
				-				}
			
 
				-				ret = ext4_ext_handle_uninitialized_extents(
			
 
				-					handle, inode, map, path, flags,
			
 
				-					allocated, newblock);
			
 
				-				return ret;
			
 
				-			}
			
 
				-
			
 
				-			/*
			
 
				-			 * Punch out the map length, but only to the
			
 
				-			 * end of the extent
			
 
				-			 */
			
 
				-			punched_out = allocated < map->m_len ?
			
 
				-				allocated : map->m_len;
			
 
				-
			
 
				 			/*
			
 
				-			 * Sense extents need to be converted to
			
 
				-			 * uninitialized, they must fit in an
			
 
				-			 * uninitialized extent
			
 
				+			 * Do not put uninitialized extent
			
 
				+			 * in the cache
			
 
				 			 */
			
 
				-			if (punched_out > EXT_UNINIT_MAX_LEN)
			
 
				-				punched_out = EXT_UNINIT_MAX_LEN;
			
 
				-
			
 
				-			punch_map.m_lblk = map->m_lblk;
			
 
				-			punch_map.m_pblk = newblock;
			
 
				-			punch_map.m_len = punched_out;
			
 
				-			punch_map.m_flags = 0;
			
 
				-
			
 
				-			/* Check to see if the extent needs to be split */
			
 
				-			if (punch_map.m_len != ee_len ||
			
 
				-				punch_map.m_lblk != ee_block) {
			
 
				-
			
 
				-				ret = ext4_split_extent(handle, inode,
			
 
				-				path, &punch_map, 0,
			
 
				-				EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
			
 
				-				EXT4_GET_BLOCKS_PRE_IO);
			
 
				-
			
 
				-				if (ret < 0) {
			
 
				-					err = ret;
			
 
				-					goto out2;
			
 
				-				}
			
 
				-				/*
			
 
				-				 * find extent for the block at
			
 
				-				 * the start of the hole
			
 
				-				 */
			
 
				-				ext4_ext_drop_refs(path);
			
 
				-				kfree(path);
			
 
				-
			
 
				-				path = ext4_ext_find_extent(inode,
			
 
				-				map->m_lblk, NULL);
			
 
				-				if (IS_ERR(path)) {
			
 
				-					err = PTR_ERR(path);
			
 
				-					path = NULL;
			
 
				-					goto out2;
			
 
				-				}
			
 
				-
			
 
				-				depth = ext_depth(inode);
			
 
				-				ex = path[depth].p_ext;
			
 
				-				ee_len = ext4_ext_get_actual_len(ex);
			
 
				-				ee_block = le32_to_cpu(ex->ee_block);
			
 
				-				ee_start = ext4_ext_pblock(ex);
			
 
				-
			
 
				-			}
			
 
				-
			
 
				-			ext4_ext_mark_uninitialized(ex);
			
 
				-
			
 
				-			ext4_ext_invalidate_cache(inode);
			
 
				-
			
 
				-			err = ext4_ext_rm_leaf(handle, inode, path,
			
 
				-					       &partial_cluster, map->m_lblk,
			
 
				-					       map->m_lblk + punched_out);
			
 
				-
			
 
				-			if (!err && path->p_hdr->eh_entries == 0) {
			
 
				-				/*
			
 
				-				 * Punch hole freed all of this sub tree,
			
 
				-				 * so we need to correct eh_depth
			
 
				-				 */
			
 
				-				err = ext4_ext_get_access(handle, inode, path);
			
 
				-				if (err == 0) {
			
 
				-					ext_inode_hdr(inode)->eh_depth = 0;
			
 
				-					ext_inode_hdr(inode)->eh_max =
			
 
				-					cpu_to_le16(ext4_ext_space_root(
			
 
				-						inode, 0));
			
 
				-
			
 
				-					err = ext4_ext_dirty(
			
 
				-						handle, inode, path);
			
 
				-				}
			
 
				+			if (!ext4_ext_is_uninitialized(ex)) {
			
 
				+				ext4_ext_put_in_cache(inode, ee_block,
			
 
				+					ee_len, ee_start);
			
 
				+				goto out;
			
 
				 			}
			
 
				-
			
 
				-			goto out2;
			
 
				+			ret = ext4_ext_handle_uninitialized_extents(
			
 
				+				handle, inode, map, path, flags,
			
 
				+				allocated, newblock);
			
 
				+			return ret;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -4165,13 +4146,11 @@ out2:
 
				 		ext4_ext_drop_refs(path);
			
 
				 		kfree(path);
			
 
				 	}
			
 
				-	result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
			
 
				-			punched_out : allocated;
			
 
				 
			
 
				 	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
			
 
				-		newblock, map->m_len, err ? err : result);
			
 
				+		newblock, map->m_len, err ? err : allocated);
			
 
				 
			
 
				-	return err ? err : result;
			
 
				+	return err ? err : allocated;
			
 
				 }
			
 
				 
			
 
				 void ext4_ext_truncate(struct inode *inode)
			
@@ -4228,7 +4207,7 @@ void ext4_ext_truncate(struct inode *inode)
 
				 
			
 
				 	last_block = (inode->i_size + sb->s_blocksize - 1)
			
 
				 			>> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-	err = ext4_ext_remove_space(inode, last_block);
			
 
				+	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
			
 
				 
			
 
				 	/* In a multi-transaction truncate, we only make the final
			
 
				 	 * transaction synchronous.
			
@@ -4436,10 +4415,11 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 
				 				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
			
 
				 		if (ret <= 0) {
			
 
				 			WARN_ON(ret <= 0);
			
 
				-			printk(KERN_ERR "%s: ext4_ext_map_blocks "
			
 
				-				    "returned error inode#%lu, block=%u, "
			
 
				-				    "max_blocks=%u", __func__,
			
 
				-				    inode->i_ino, map.m_lblk, map.m_len);
			
 
				+			ext4_msg(inode->i_sb, KERN_ERR,
			
 
				+				 "%s:%d: inode #%lu: block %u: len %u: "
			
 
				+				 "ext4_ext_map_blocks returned %d",
			
 
				+				 __func__, __LINE__, inode->i_ino, map.m_lblk,
			
 
				+				 map.m_len, ret);
			
 
				 		}
			
 
				 		ext4_mark_inode_dirty(handle, inode);
			
 
				 		ret2 = ext4_journal_stop(handle);
			
@@ -4705,14 +4685,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				 {
			
 
				 	struct inode *inode = file->f_path.dentry->d_inode;
			
 
				 	struct super_block *sb = inode->i_sb;
			
 
				-	struct ext4_ext_cache cache_ex;
			
 
				-	ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
			
 
				+	ext4_lblk_t first_block, stop_block;
			
 
				 	struct address_space *mapping = inode->i_mapping;
			
 
				-	struct ext4_map_blocks map;
			
 
				 	handle_t *handle;
			
 
				 	loff_t first_page, last_page, page_len;
			
 
				 	loff_t first_page_offset, last_page_offset;
			
 
				-	int ret, credits, blocks_released, err = 0;
			
 
				+	int credits, err = 0;
			
 
				 
			
 
				 	/* No need to punch hole beyond i_size */
			
 
				 	if (offset >= inode->i_size)
			
@@ -4728,10 +4706,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				 		   offset;
			
 
				 	}
			
 
				 
			
 
				-	first_block = (offset + sb->s_blocksize - 1) >>
			
 
				-		EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-	last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				-
			
 
				 	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
			
 
				 	last_page = (offset + length) >> PAGE_CACHE_SHIFT;
			
 
				 
			
@@ -4810,7 +4784,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-
			
 
				 	/*
			
 
				 	 * If i_size is contained in the last page, we need to
			
 
				 	 * unmap and zero the partial page after i_size
			
@@ -4830,73 +4803,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	first_block = (offset + sb->s_blocksize - 1) >>
			
 
				+		EXT4_BLOCK_SIZE_BITS(sb);
			
 
				+	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
			
 
				+
			
 
				 	/* If there are no blocks to remove, return now */
			
 
				-	if (first_block >= last_block)
			
 
				+	if (first_block >= stop_block)
			
 
				 		goto out;
			
 
				 
			
 
				 	down_write(&EXT4_I(inode)->i_data_sem);
			
 
				 	ext4_ext_invalidate_cache(inode);
			
 
				 	ext4_discard_preallocations(inode);
			
 
				 
			
 
				-	/*
			
 
				-	 * Loop over all the blocks and identify blocks
			
 
				-	 * that need to be punched out
			
 
				-	 */
			
 
				-	iblock = first_block;
			
 
				-	blocks_released = 0;
			
 
				-	while (iblock < last_block) {
			
 
				-		max_blocks = last_block - iblock;
			
 
				-		num_blocks = 1;
			
 
				-		memset(&map, 0, sizeof(map));
			
 
				-		map.m_lblk = iblock;
			
 
				-		map.m_len = max_blocks;
			
 
				-		ret = ext4_ext_map_blocks(handle, inode, &map,
			
 
				-			EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
			
 
				-
			
 
				-		if (ret > 0) {
			
 
				-			blocks_released += ret;
			
 
				-			num_blocks = ret;
			
 
				-		} else if (ret == 0) {
			
 
				-			/*
			
 
				-			 * If map blocks could not find the block,
			
 
				-			 * then it is in a hole.  If the hole was
			
 
				-			 * not already cached, then map blocks should
			
 
				-			 * put it in the cache.  So we can get the hole
			
 
				-			 * out of the cache
			
 
				-			 */
			
 
				-			memset(&cache_ex, 0, sizeof(cache_ex));
			
 
				-			if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
			
 
				-				!cache_ex.ec_start) {
			
 
				-
			
 
				-				/* The hole is cached */
			
 
				-				num_blocks = cache_ex.ec_block +
			
 
				-				cache_ex.ec_len - iblock;
			
 
				-
			
 
				-			} else {
			
 
				-				/* The block could not be identified */
			
 
				-				err = -EIO;
			
 
				-				break;
			
 
				-			}
			
 
				-		} else {
			
 
				-			/* Map blocks error */
			
 
				-			err = ret;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		if (num_blocks == 0) {
			
 
				-			/* This condition should never happen */
			
 
				-			ext_debug("Block lookup failed");
			
 
				-			err = -EIO;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-		iblock += num_blocks;
			
 
				-	}
			
 
				+	err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
			
 
				 
			
 
				-	if (blocks_released > 0) {
			
 
				-		ext4_ext_invalidate_cache(inode);
			
 
				-		ext4_discard_preallocations(inode);
			
 
				-	}
			
 
				+	ext4_ext_invalidate_cache(inode);
			
 
				+	ext4_discard_preallocations(inode);
			
 
				 
			
 
				 	if (IS_SYNC(inode))
			
 
				 		ext4_handle_sync(handle);
			
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode)
 
				 		io = list_entry(ei->i_completed_io_list.next,
			
 
				 				ext4_io_end_t, list);
			
 
				 		list_del_init(&io->list);
			
 
				+		io->flag |= EXT4_IO_END_IN_FSYNC;
			
 
				 		/*
			
 
				 		 * Calling ext4_end_io_nolock() to convert completed
			
 
				 		 * IO to written.
			
@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode)
 
				 		if (ret < 0)
			
 
				 			ret2 = ret;
			
 
				 		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
			
 
				+		io->flag &= ~EXT4_IO_END_IN_FSYNC;
			
 
				 	}
			
 
				 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
			
 
				 	return (ret2 < 0) ? ret2 : 0;
			
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
 
				 	return EXT4_INODES_PER_GROUP(sb);
			
 
				 }
			
 
				 
			
 
				+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
			
 
				+{
			
 
				+	if (uptodate) {
			
 
				+		set_buffer_uptodate(bh);
			
 
				+		set_bitmap_uptodate(bh);
			
 
				+	}
			
 
				+	unlock_buffer(bh);
			
 
				+	put_bh(bh);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Read the inode allocation bitmap for a given block_group, reading
			
 
				  * into the specified slot in the superblock's bitmap cache.
			
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 		return bh;
			
 
				 	}
			
 
				 	/*
			
 
				-	 * submit the buffer_head for read. We can
			
 
				-	 * safely mark the bitmap as uptodate now.
			
 
				-	 * We do it here so the bitmap uptodate bit
			
 
				-	 * get set with buffer lock held.
			
 
				+	 * submit the buffer_head for reading
			
 
				 	 */
			
 
				 	trace_ext4_load_inode_bitmap(sb, block_group);
			
 
				-	set_bitmap_uptodate(bh);
			
 
				-	if (bh_submit_read(bh) < 0) {
			
 
				+	bh->b_end_io = ext4_end_bitmap_read;
			
 
				+	get_bh(bh);
			
 
				+	submit_bh(READ, bh);
			
 
				+	wait_on_buffer(bh);
			
 
				+	if (!buffer_uptodate(bh)) {
			
 
				 		put_bh(bh);
			
 
				 		ext4_error(sb, "Cannot read inode bitmap - "
			
 
				-			    "block_group = %u, inode_bitmap = %llu",
			
 
				-			    block_group, bitmap_blk);
			
 
				+			   "block_group = %u, inode_bitmap = %llu",
			
 
				+			   block_group, bitmap_blk);
			
 
				 		return NULL;
			
 
				 	}
			
 
				 	return bh;
			
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
				 	struct ext4_sb_info *sbi;
			
 
				 	int fatal = 0, err, count, cleared;
			
 
				 
			
 
				-	if (atomic_read(&inode->i_count) > 1) {
			
 
				-		printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
			
 
				-		       atomic_read(&inode->i_count));
			
 
				+	if (!sb) {
			
 
				+		printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
			
 
				+		       "nonexistent device\n", __func__, __LINE__);
			
 
				 		return;
			
 
				 	}
			
 
				-	if (inode->i_nlink) {
			
 
				-		printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
			
 
				-		       inode->i_nlink);
			
 
				+	if (atomic_read(&inode->i_count) > 1) {
			
 
				+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
			
 
				+			 __func__, __LINE__, inode->i_ino,
			
 
				+			 atomic_read(&inode->i_count));
			
 
				 		return;
			
 
				 	}
			
 
				-	if (!sb) {
			
 
				-		printk(KERN_ERR "ext4_free_inode: inode on "
			
 
				-		       "nonexistent device\n");
			
 
				+	if (inode->i_nlink) {
			
 
				+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
			
 
				+			 __func__, __LINE__, inode->i_ino, inode->i_nlink);
			
 
				 		return;
			
 
				 	}
			
 
				 	sbi = EXT4_SB(sb);
			
@@ -592,94 +603,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * claim the inode from the inode bitmap. If the group
			
 
				- * is uninit we need to take the groups's ext4_group_lock
			
 
				- * and clear the uninit flag. The inode bitmap update
			
 
				- * and group desc uninit flag clear should be done
			
 
				- * after holding ext4_group_lock so that ext4_read_inode_bitmap
			
 
				- * doesn't race with the ext4_claim_inode
			
 
				- */
			
 
				-static int ext4_claim_inode(struct super_block *sb,
			
 
				-			struct buffer_head *inode_bitmap_bh,
			
 
				-			unsigned long ino, ext4_group_t group, umode_t mode)
			
 
				-{
			
 
				-	int free = 0, retval = 0, count;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
			
 
				-	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
			
 
				-
			
 
				-	/*
			
 
				-	 * We have to be sure that new inode allocation does not race with
			
 
				-	 * inode table initialization, because otherwise we may end up
			
 
				-	 * allocating and writing new inode right before sb_issue_zeroout
			
 
				-	 * takes place and overwriting our new inode with zeroes. So we
			
 
				-	 * take alloc_sem to prevent it.
			
 
				-	 */
			
 
				-	down_read(&grp->alloc_sem);
			
 
				-	ext4_lock_group(sb, group);
			
 
				-	if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
			
 
				-		/* not a free inode */
			
 
				-		retval = 1;
			
 
				-		goto err_ret;
			
 
				-	}
			
 
				-	ino++;
			
 
				-	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
			
 
				-			ino > EXT4_INODES_PER_GROUP(sb)) {
			
 
				-		ext4_unlock_group(sb, group);
			
 
				-		up_read(&grp->alloc_sem);
			
 
				-		ext4_error(sb, "reserved inode or inode > inodes count - "
			
 
				-			   "block_group = %u, inode=%lu", group,
			
 
				-			   ino + group * EXT4_INODES_PER_GROUP(sb));
			
 
				-		return 1;
			
 
				-	}
			
 
				-	/* If we didn't allocate from within the initialized part of the inode
			
 
				-	 * table then we need to initialize up to this inode. */
			
 
				-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				-
			
 
				-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			
 
				-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
			
 
				-			/* When marking the block group with
			
 
				-			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
			
 
				-			 * on the value of bg_itable_unused even though
			
 
				-			 * mke2fs could have initialized the same for us.
			
 
				-			 * Instead we calculated the value below
			
 
				-			 */
			
 
				-
			
 
				-			free = 0;
			
 
				-		} else {
			
 
				-			free = EXT4_INODES_PER_GROUP(sb) -
			
 
				-				ext4_itable_unused_count(sb, gdp);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Check the relative inode number against the last used
			
 
				-		 * relative inode number in this group. if it is greater
			
 
				-		 * we need to  update the bg_itable_unused count
			
 
				-		 *
			
 
				-		 */
			
 
				-		if (ino > free)
			
 
				-			ext4_itable_unused_set(sb, gdp,
			
 
				-					(EXT4_INODES_PER_GROUP(sb) - ino));
			
 
				-	}
			
 
				-	count = ext4_free_inodes_count(sb, gdp) - 1;
			
 
				-	ext4_free_inodes_set(sb, gdp, count);
			
 
				-	if (S_ISDIR(mode)) {
			
 
				-		count = ext4_used_dirs_count(sb, gdp) + 1;
			
 
				-		ext4_used_dirs_set(sb, gdp, count);
			
 
				-		if (sbi->s_log_groups_per_flex) {
			
 
				-			ext4_group_t f = ext4_flex_group(sbi, group);
			
 
				-
			
 
				-			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
			
 
				-		}
			
 
				-	}
			
 
				-	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
			
 
				-err_ret:
			
 
				-	ext4_unlock_group(sb, group);
			
 
				-	up_read(&grp->alloc_sem);
			
 
				-	return retval;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * There are two policies for allocating an inode.  If the new inode is
			
 
				  * a directory, then a forward search is made for a block group with both
			
@@ -741,6 +664,11 @@ got_group:
 
				 	if (ret2 == -1)
			
 
				 		goto out;
			
 
				 
			
 
				+	/*
			
 
				+	 * Normally we will only go through one pass of this loop,
			
 
				+	 * unless we get unlucky and it turns out the group we selected
			
 
				+	 * had its last inode grabbed by someone else.
			
 
				+	 */
			
 
				 	for (i = 0; i < ngroups; i++, ino = 0) {
			
 
				 		err = -EIO;
			
 
				 
			
@@ -757,51 +685,24 @@ repeat_in_this_group:
 
				 		ino = ext4_find_next_zero_bit((unsigned long *)
			
 
				 					      inode_bitmap_bh->b_data,
			
 
				 					      EXT4_INODES_PER_GROUP(sb), ino);
			
 
				-
			
 
				-		if (ino < EXT4_INODES_PER_GROUP(sb)) {
			
 
				-
			
 
				-			BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			
 
				-			err = ext4_journal_get_write_access(handle,
			
 
				-							    inode_bitmap_bh);
			
 
				-			if (err)
			
 
				-				goto fail;
			
 
				-
			
 
				-			BUFFER_TRACE(group_desc_bh, "get_write_access");
			
 
				-			err = ext4_journal_get_write_access(handle,
			
 
				-								group_desc_bh);
			
 
				-			if (err)
			
 
				-				goto fail;
			
 
				-			if (!ext4_claim_inode(sb, inode_bitmap_bh,
			
 
				-						ino, group, mode)) {
			
 
				-				/* we won it */
			
 
				-				BUFFER_TRACE(inode_bitmap_bh,
			
 
				-					"call ext4_handle_dirty_metadata");
			
 
				-				err = ext4_handle_dirty_metadata(handle,
			
 
				-								 NULL,
			
 
				-							inode_bitmap_bh);
			
 
				-				if (err)
			
 
				-					goto fail;
			
 
				-				/* zero bit is inode number 1*/
			
 
				-				ino++;
			
 
				-				goto got;
			
 
				-			}
			
 
				-			/* we lost it */
			
 
				-			ext4_handle_release_buffer(handle, inode_bitmap_bh);
			
 
				-			ext4_handle_release_buffer(handle, group_desc_bh);
			
 
				-
			
 
				-			if (++ino < EXT4_INODES_PER_GROUP(sb))
			
 
				-				goto repeat_in_this_group;
			
 
				+		if (ino >= EXT4_INODES_PER_GROUP(sb)) {
			
 
				+			if (++group == ngroups)
			
 
				+				group = 0;
			
 
				+			continue;
			
 
				 		}
			
 
				-
			
 
				-		/*
			
 
				-		 * This case is possible in concurrent environment.  It is very
			
 
				-		 * rare.  We cannot repeat the find_group_xxx() call because
			
 
				-		 * that will simply return the same blockgroup, because the
			
 
				-		 * group descriptor metadata has not yet been updated.
			
 
				-		 * So we just go onto the next blockgroup.
			
 
				-		 */
			
 
				-		if (++group == ngroups)
			
 
				-			group = 0;
			
 
				+		if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
			
 
				+			ext4_error(sb, "reserved inode found cleared - "
			
 
				+				   "inode=%lu", ino + 1);
			
 
				+			continue;
			
 
				+		}
			
 
				+		ext4_lock_group(sb, group);
			
 
				+		ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
			
 
				+		ext4_unlock_group(sb, group);
			
 
				+		ino++;		/* the inode bitmap is zero-based */
			
 
				+		if (!ret2)
			
 
				+			goto got; /* we grabbed the inode! */
			
 
				+		if (ino < EXT4_INODES_PER_GROUP(sb))
			
 
				+			goto repeat_in_this_group;
			
 
				 	}
			
 
				 	err = -ENOSPC;
			
 
				 	goto out;
			
@@ -838,6 +739,59 @@ got:
 
				 		if (err)
			
 
				 			goto fail;
			
 
				 	}
			
 
				+
			
 
				+	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			
 
				+	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				+	BUFFER_TRACE(group_desc_bh, "get_write_access");
			
 
				+	err = ext4_journal_get_write_access(handle, group_desc_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				+	/* Update the relevant bg descriptor fields */
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				+		int free;
			
 
				+		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
			
 
				+
			
 
				+		down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
			
 
				+		ext4_lock_group(sb, group); /* while we modify the bg desc */
			
 
				+		free = EXT4_INODES_PER_GROUP(sb) -
			
 
				+			ext4_itable_unused_count(sb, gdp);
			
 
				+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			
 
				+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
			
 
				+			free = 0;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Check the relative inode number against the last used
			
 
				+		 * relative inode number in this group. if it is greater
			
 
				+		 * we need to update the bg_itable_unused count
			
 
				+		 */
			
 
				+		if (ino > free)
			
 
				+			ext4_itable_unused_set(sb, gdp,
			
 
				+					(EXT4_INODES_PER_GROUP(sb) - ino));
			
 
				+		up_read(&grp->alloc_sem);
			
 
				+	}
			
 
				+	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
			
 
				+	if (S_ISDIR(mode)) {
			
 
				+		ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
			
 
				+		if (sbi->s_log_groups_per_flex) {
			
 
				+			ext4_group_t f = ext4_flex_group(sbi, group);
			
 
				+
			
 
				+			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
			
 
				+		}
			
 
				+	}
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				+		gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
			
 
				+		ext4_unlock_group(sb, group);
			
 
				+	}
			
 
				+
			
 
				+	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
			
 
				+	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
			
 
				 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
			
 
				 	if (err)
			
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
 
				  * where it is called from on active part of filesystem is ext4lazyinit
			
 
				  * thread, so we do not need any special locks, however we have to prevent
			
 
				  * inode allocation from the current group, so we take alloc_sem lock, to
			
 
				- * block ext4_claim_inode until we are finished.
			
 
				+ * block ext4_new_inode() until we are finished.
			
 
				  */
			
 
				 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
			
 
				 				 int barrier)
			
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 
				 			    sbi->s_inodes_per_block);
			
 
				 
			
 
				 	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
			
 
				-		ext4_error(sb, "Something is wrong with group %u\n"
			
 
				-			   "Used itable blocks: %d"
			
 
				-			   "itable unused count: %u\n",
			
 
				+		ext4_error(sb, "Something is wrong with group %u: "
			
 
				+			   "used itable blocks: %d; "
			
 
				+			   "itable unused count: %u",
			
 
				 			   group, used_blks,
			
 
				 			   ext4_itable_unused_count(sb, gdp));
			
 
				 		ret = 1;
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
				 	trace_ext4_da_update_reserve_space(inode, used, quota_claim);
			
 
				 	if (unlikely(used > ei->i_reserved_data_blocks)) {
			
 
				 		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
			
 
				-			 "with only %d reserved data blocks\n",
			
 
				+			 "with only %d reserved data blocks",
			
 
				 			 __func__, inode->i_ino, used,
			
 
				 			 ei->i_reserved_data_blocks);
			
 
				 		WARN_ON(1);
			
@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
				 		 */
			
 
				 		ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
			
 
				 			 "ino %lu, to_free %d with only %d reserved "
			
 
				-			 "data blocks\n", inode->i_ino, to_free,
			
 
				+			 "data blocks", inode->i_ino, to_free,
			
 
				 			 ei->i_reserved_data_blocks);
			
 
				 		WARN_ON(1);
			
 
				 		to_free = ei->i_reserved_data_blocks;
			
@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 
				 static void ext4_print_free_blocks(struct inode *inode)
			
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
			
 
				-	printk(KERN_CRIT "Total free blocks count %lld\n",
			
 
				+	struct super_block *sb = inode->i_sb;
			
 
				+
			
 
				+	ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
			
 
				 	       EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				 			ext4_count_free_clusters(inode->i_sb)));
			
 
				-	printk(KERN_CRIT "Free/Dirty block details\n");
			
 
				-	printk(KERN_CRIT "free_blocks=%lld\n",
			
 
				+	ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
			
 
				+	ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
			
 
				 	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				 		percpu_counter_sum(&sbi->s_freeclusters_counter)));
			
 
				-	printk(KERN_CRIT "dirty_blocks=%lld\n",
			
 
				+	ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
			
 
				 	       (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
			
 
				 		percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
			
 
				-	printk(KERN_CRIT "Block reservation details\n");
			
 
				-	printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
			
 
				-	       EXT4_I(inode)->i_reserved_data_blocks);
			
 
				-	printk(KERN_CRIT "i_reserved_meta_blocks=%u\n",
			
 
				+	ext4_msg(sb, KERN_CRIT, "Block reservation details");
			
 
				+	ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
			
 
				+		 EXT4_I(inode)->i_reserved_data_blocks);
			
 
				+	ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
			
 
				 	       EXT4_I(inode)->i_reserved_meta_blocks);
			
 
				 	return;
			
 
				 }
			
@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file,
 
				 	int write_mode = (int)(unsigned long)fsdata;
			
 
				 
			
 
				 	if (write_mode == FALL_BACK_TO_NONDELALLOC) {
			
 
				-		if (ext4_should_order_data(inode)) {
			
 
				+		switch (ext4_inode_journal_mode(inode)) {
			
 
				+		case EXT4_INODE_ORDERED_DATA_MODE:
			
 
				 			return ext4_ordered_write_end(file, mapping, pos,
			
 
				 					len, copied, page, fsdata);
			
 
				-		} else if (ext4_should_writeback_data(inode)) {
			
 
				+		case EXT4_INODE_WRITEBACK_DATA_MODE:
			
 
				 			return ext4_writeback_write_end(file, mapping, pos,
			
 
				 					len, copied, page, fsdata);
			
 
				-		} else {
			
 
				+		default:
			
 
				 			BUG();
			
 
				 		}
			
 
				 	}
			
@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
				 		goto out;
			
 
				 
			
 
				 	ext_debug("ext4_end_io_dio(): io_end 0x%p "
			
 
				-		  "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
			
 
				+		  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
			
 
				  		  iocb->private, io_end->inode->i_ino, iocb, offset,
			
 
				 		  size);
			
 
				 
			
@@ -2795,9 +2798,6 @@ out:
 
				 
			
 
				 	/* queue the work to convert unwritten extents to written */
			
 
				 	queue_work(wq, &io_end->work);
			
 
				-
			
 
				-	/* XXX: probably should move into the real I/O completion handler */
			
 
				-	inode_dio_done(inode);
			
 
				 }
			
 
				 
			
 
				 static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
			
@@ -2811,8 +2811,9 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
 
				 		goto out;
			
 
				 
			
 
				 	if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
			
 
				-		printk("sb umounted, discard end_io request for inode %lu\n",
			
 
				-			io_end->inode->i_ino);
			
 
				+		ext4_msg(io_end->inode->i_sb, KERN_INFO,
			
 
				+			 "sb umounted, discard end_io request for inode %lu",
			
 
				+			 io_end->inode->i_ino);
			
 
				 		ext4_free_io_end(io_end);
			
 
				 		goto out;
			
 
				 	}
			
@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
				 		iocb->private = NULL;
			
 
				 		EXT4_I(inode)->cur_aio_dio = NULL;
			
 
				 		if (!is_sync_kiocb(iocb)) {
			
 
				-			iocb->private = ext4_init_io_end(inode, GFP_NOFS);
			
 
				-			if (!iocb->private)
			
 
				+			ext4_io_end_t *io_end =
			
 
				+				ext4_init_io_end(inode, GFP_NOFS);
			
 
				+			if (!io_end)
			
 
				 				return -ENOMEM;
			
 
				+			io_end->flag |= EXT4_IO_END_DIRECT;
			
 
				+			iocb->private = io_end;
			
 
				 			/*
			
 
				 			 * we save the io structure for current async
			
 
				 			 * direct IO, so that later ext4_map_blocks()
			
@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
				 					 ext4_get_block_write,
			
 
				 					 ext4_end_io_dio,
			
 
				 					 NULL,
			
 
				-					 DIO_LOCKING | DIO_SKIP_HOLES);
			
 
				+					 DIO_LOCKING);
			
 
				 		if (iocb->private)
			
 
				 			EXT4_I(inode)->cur_aio_dio = NULL;
			
 
				 		/*
			
@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = {
 
				 
			
 
				 void ext4_set_aops(struct inode *inode)
			
 
				 {
			
 
				-	if (ext4_should_order_data(inode) &&
			
 
				-		test_opt(inode->i_sb, DELALLOC))
			
 
				-		inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				-	else if (ext4_should_order_data(inode))
			
 
				-		inode->i_mapping->a_ops = &ext4_ordered_aops;
			
 
				-	else if (ext4_should_writeback_data(inode) &&
			
 
				-		 test_opt(inode->i_sb, DELALLOC))
			
 
				-		inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				-	else if (ext4_should_writeback_data(inode))
			
 
				-		inode->i_mapping->a_ops = &ext4_writeback_aops;
			
 
				-	else
			
 
				+	switch (ext4_inode_journal_mode(inode)) {
			
 
				+	case EXT4_INODE_ORDERED_DATA_MODE:
			
 
				+		if (test_opt(inode->i_sb, DELALLOC))
			
 
				+			inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				+		else
			
 
				+			inode->i_mapping->a_ops = &ext4_ordered_aops;
			
 
				+		break;
			
 
				+	case EXT4_INODE_WRITEBACK_DATA_MODE:
			
 
				+		if (test_opt(inode->i_sb, DELALLOC))
			
 
				+			inode->i_mapping->a_ops = &ext4_da_aops;
			
 
				+		else
			
 
				+			inode->i_mapping->a_ops = &ext4_writeback_aops;
			
 
				+		break;
			
 
				+	case EXT4_INODE_JOURNAL_DATA_MODE:
			
 
				 		inode->i_mapping->a_ops = &ext4_journalled_aops;
			
 
				+		break;
			
 
				+	default:
			
 
				+		BUG();
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 
			
@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 
				 {
			
 
				 	struct inode *inode = file->f_path.dentry->d_inode;
			
 
				 	if (!S_ISREG(inode->i_mode))
			
 
				-		return -ENOTSUPP;
			
 
				+		return -EOPNOTSUPP;
			
 
				 
			
 
				 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
			
 
				 		/* TODO: Add support for non extent hole punching */
			
 
				-		return -ENOTSUPP;
			
 
				+		return -EOPNOTSUPP;
			
 
				 	}
			
 
				 
			
 
				 	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
			
 
				 		/* TODO: Add support for bigalloc file systems */
			
 
				-		return -ENOTSUPP;
			
 
				+		return -EOPNOTSUPP;
			
 
				 	}
			
 
				 
			
 
				 	return ext4_ext_punch_hole(file, offset, length);
			
@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle,
 
				 			ext4_update_dynamic_rev(sb);
			
 
				 			EXT4_SET_RO_COMPAT_FEATURE(sb,
			
 
				 					EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
			
 
				-			sb->s_dirt = 1;
			
 
				 			ext4_handle_sync(handle);
			
 
				-			err = ext4_handle_dirty_metadata(handle, NULL,
			
 
				-					EXT4_SB(sb)->s_sbh);
			
 
				+			err = ext4_handle_dirty_super(handle, sb);
			
 
				 		}
			
 
				 	}
			
 
				 	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
			
@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
				 	}
			
 
				 
			
 
				 	if (attr->ia_valid & ATTR_SIZE) {
			
 
				-		if (attr->ia_size != i_size_read(inode)) {
			
 
				+		if (attr->ia_size != i_size_read(inode))
			
 
				 			truncate_setsize(inode, attr->ia_size);
			
 
				-			ext4_truncate(inode);
			
 
				-		} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
			
 
				-			ext4_truncate(inode);
			
 
				+		ext4_truncate(inode);
			
 
				 	}
			
 
				 
			
 
				 	if (!rc) {
			
@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 
				 {
			
 
				 	int err = 0;
			
 
				 
			
 
				-	if (test_opt(inode->i_sb, I_VERSION))
			
 
				+	if (IS_I_VERSION(inode))
			
 
				 		inode_inc_iversion(inode);
			
 
				 
			
 
				 	/* the do_update_inode consumes one bh->b_count */
			
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
 
				  * mballoc.c contains the multiblocks allocation routines
			
 
				  */
			
 
				 
			
 
				+#include "ext4_jbd2.h"
			
 
				 #include "mballoc.h"
			
 
				 #include <linux/debugfs.h>
			
 
				 #include <linux/slab.h>
			
@@ -339,7 +340,7 @@
 
				  */
			
 
				 static struct kmem_cache *ext4_pspace_cachep;
			
 
				 static struct kmem_cache *ext4_ac_cachep;
			
 
				-static struct kmem_cache *ext4_free_ext_cachep;
			
 
				+static struct kmem_cache *ext4_free_data_cachep;
			
 
				 
			
 
				 /* We create slab caches for groupinfo data structures based on the
			
 
				  * superblock block size.  There will be one per mounted filesystem for
			
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 
				 					ext4_group_t group);
			
 
				 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
			
 
				 						ext4_group_t group);
			
 
				-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
			
 
				+static void ext4_free_data_callback(struct super_block *sb,
			
 
				+				struct ext4_journal_cb_entry *jce, int rc);
			
 
				 
			
 
				 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
			
 
				 {
			
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
 
				 {
			
 
				 	char *bb;
			
 
				 
			
 
				-	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
			
 
				+	BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
			
 
				 	BUG_ON(max == NULL);
			
 
				 
			
 
				 	if (order > e4b->bd_blkbits + 1) {
			
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
 
				 	/* at order 0 we see each particular block */
			
 
				 	if (order == 0) {
			
 
				 		*max = 1 << (e4b->bd_blkbits + 3);
			
 
				-		return EXT4_MB_BITMAP(e4b);
			
 
				+		return e4b->bd_bitmap;
			
 
				 	}
			
 
				 
			
 
				-	bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
			
 
				+	bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
			
 
				 	*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
			
 
				 
			
 
				 	return bb;
			
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
 
				 			for (j = 0; j < (1 << order); j++) {
			
 
				 				k = (i * (1 << order)) + j;
			
 
				 				MB_CHECK_ASSERT(
			
 
				-					!mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
			
 
				+					!mb_test_bit(k, e4b->bd_bitmap));
			
 
				 			}
			
 
				 			count++;
			
 
				 		}
			
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 	int groups_per_page;
			
 
				 	int err = 0;
			
 
				 	int i;
			
 
				-	ext4_group_t first_group;
			
 
				+	ext4_group_t first_group, group;
			
 
				 	int first_block;
			
 
				 	struct super_block *sb;
			
 
				 	struct buffer_head *bhs;
			
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 
			
 
				 	/* allocate buffer_heads to read bitmaps */
			
 
				 	if (groups_per_page > 1) {
			
 
				-		err = -ENOMEM;
			
 
				 		i = sizeof(struct buffer_head *) * groups_per_page;
			
 
				 		bh = kzalloc(i, GFP_NOFS);
			
 
				-		if (bh == NULL)
			
 
				+		if (bh == NULL) {
			
 
				+			err = -ENOMEM;
			
 
				 			goto out;
			
 
				+		}
			
 
				 	} else
			
 
				 		bh = &bhs;
			
 
				 
			
 
				 	first_group = page->index * blocks_per_page / 2;
			
 
				 
			
 
				 	/* read all groups the page covers into the cache */
			
 
				-	for (i = 0; i < groups_per_page; i++) {
			
 
				-		struct ext4_group_desc *desc;
			
 
				-
			
 
				-		if (first_group + i >= ngroups)
			
 
				+	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
			
 
				+		if (group >= ngroups)
			
 
				 			break;
			
 
				 
			
 
				-		grinfo = ext4_get_group_info(sb, first_group + i);
			
 
				+		grinfo = ext4_get_group_info(sb, group);
			
 
				 		/*
			
 
				 		 * If page is uptodate then we came here after online resize
			
 
				 		 * which added some new uninitialized group info structs, so
			
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 			bh[i] = NULL;
			
 
				 			continue;
			
 
				 		}
			
 
				-
			
 
				-		err = -EIO;
			
 
				-		desc = ext4_get_group_desc(sb, first_group + i, NULL);
			
 
				-		if (desc == NULL)
			
 
				-			goto out;
			
 
				-
			
 
				-		err = -ENOMEM;
			
 
				-		bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
			
 
				-		if (bh[i] == NULL)
			
 
				+		if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
			
 
				+			err = -ENOMEM;
			
 
				 			goto out;
			
 
				-
			
 
				-		if (bitmap_uptodate(bh[i]))
			
 
				-			continue;
			
 
				-
			
 
				-		lock_buffer(bh[i]);
			
 
				-		if (bitmap_uptodate(bh[i])) {
			
 
				-			unlock_buffer(bh[i]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		ext4_lock_group(sb, first_group + i);
			
 
				-		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
			
 
				-			ext4_init_block_bitmap(sb, bh[i],
			
 
				-						first_group + i, desc);
			
 
				-			set_bitmap_uptodate(bh[i]);
			
 
				-			set_buffer_uptodate(bh[i]);
			
 
				-			ext4_unlock_group(sb, first_group + i);
			
 
				-			unlock_buffer(bh[i]);
			
 
				-			continue;
			
 
				 		}
			
 
				-		ext4_unlock_group(sb, first_group + i);
			
 
				-		if (buffer_uptodate(bh[i])) {
			
 
				-			/*
			
 
				-			 * if not uninit if bh is uptodate,
			
 
				-			 * bitmap is also uptodate
			
 
				-			 */
			
 
				-			set_bitmap_uptodate(bh[i]);
			
 
				-			unlock_buffer(bh[i]);
			
 
				-			continue;
			
 
				-		}
			
 
				-		get_bh(bh[i]);
			
 
				-		/*
			
 
				-		 * submit the buffer_head for read. We can
			
 
				-		 * safely mark the bitmap as uptodate now.
			
 
				-		 * We do it here so the bitmap uptodate bit
			
 
				-		 * get set with buffer lock held.
			
 
				-		 */
			
 
				-		set_bitmap_uptodate(bh[i]);
			
 
				-		bh[i]->b_end_io = end_buffer_read_sync;
			
 
				-		submit_bh(READ, bh[i]);
			
 
				-		mb_debug(1, "read bitmap for group %u\n", first_group + i);
			
 
				+		mb_debug(1, "read bitmap for group %u\n", group);
			
 
				 	}
			
 
				 
			
 
				 	/* wait for I/O completion */
			
 
				-	for (i = 0; i < groups_per_page; i++)
			
 
				-		if (bh[i])
			
 
				-			wait_on_buffer(bh[i]);
			
 
				-
			
 
				-	err = -EIO;
			
 
				-	for (i = 0; i < groups_per_page; i++)
			
 
				-		if (bh[i] && !buffer_uptodate(bh[i]))
			
 
				+	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
			
 
				+		if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
			
 
				+			err = -EIO;
			
 
				 			goto out;
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				-	err = 0;
			
 
				 	first_block = page->index * blocks_per_page;
			
 
				 	for (i = 0; i < blocks_per_page; i++) {
			
 
				 		int group;
			
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
 
				 	int order = 1;
			
 
				 	void *bb;
			
 
				 
			
 
				-	BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
			
 
				+	BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
			
 
				 	BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
			
 
				 
			
 
				-	bb = EXT4_MB_BUDDY(e4b);
			
 
				+	bb = e4b->bd_buddy;
			
 
				 	while (order <= e4b->bd_blkbits + 1) {
			
 
				 		block = block >> 1;
			
 
				 		if (!mb_test_bit(block, bb)) {
			
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 
				 
			
 
				 	/* let's maintain fragments counter */
			
 
				 	if (first != 0)
			
 
				-		block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
			
 
				+		block = !mb_test_bit(first - 1, e4b->bd_bitmap);
			
 
				 	if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
			
 
				-		max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
			
 
				+		max = !mb_test_bit(first + count, e4b->bd_bitmap);
			
 
				 	if (block && max)
			
 
				 		e4b->bd_info->bb_fragments--;
			
 
				 	else if (!block && !max)
			
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 
				 		block = first++;
			
 
				 		order = 0;
			
 
				 
			
 
				-		if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
			
 
				+		if (!mb_test_bit(block, e4b->bd_bitmap)) {
			
 
				 			ext4_fsblk_t blocknr;
			
 
				 
			
 
				 			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
			
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 
				 					      "freeing already freed block "
			
 
				 					      "(bit %u)", block);
			
 
				 		}
			
 
				-		mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
			
 
				+		mb_clear_bit(block, e4b->bd_bitmap);
			
 
				 		e4b->bd_info->bb_counters[order]++;
			
 
				 
			
 
				 		/* start of the buddy */
			
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
 
				 			break;
			
 
				 
			
 
				 		next = (block + 1) * (1 << order);
			
 
				-		if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
			
 
				+		if (mb_test_bit(next, e4b->bd_bitmap))
			
 
				 			break;
			
 
				 
			
 
				 		order = mb_find_order_for_block(e4b, next);
			
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 
				 
			
 
				 	/* let's maintain fragments counter */
			
 
				 	if (start != 0)
			
 
				-		mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
			
 
				+		mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
			
 
				 	if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
			
 
				-		max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
			
 
				+		max = !mb_test_bit(start + len, e4b->bd_bitmap);
			
 
				 	if (mlen && max)
			
 
				 		e4b->bd_info->bb_fragments++;
			
 
				 	else if (!mlen && !max)
			
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 
				 	}
			
 
				 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
			
 
				 
			
 
				-	ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
			
 
				+	ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
			
 
				 	mb_check_buddy(e4b);
			
 
				 
			
 
				 	return ret;
			
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
 
				 					struct ext4_buddy *e4b)
			
 
				 {
			
 
				 	struct super_block *sb = ac->ac_sb;
			
 
				-	void *bitmap = EXT4_MB_BITMAP(e4b);
			
 
				+	void *bitmap = e4b->bd_bitmap;
			
 
				 	struct ext4_free_extent ex;
			
 
				 	int i;
			
 
				 	int free;
			
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
 
				 {
			
 
				 	struct super_block *sb = ac->ac_sb;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	void *bitmap = EXT4_MB_BITMAP(e4b);
			
 
				+	void *bitmap = e4b->bd_bitmap;
			
 
				 	struct ext4_free_extent ex;
			
 
				 	ext4_fsblk_t first_group_block;
			
 
				 	ext4_fsblk_t a;
			
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
				 			EXT4_DESC_PER_BLOCK_BITS(sb);
			
 
				 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
			
 
				 		if (meta_group_info == NULL) {
			
 
				-			ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
			
 
				+			ext4_msg(sb, KERN_ERR, "can't allocate mem "
			
 
				 				 "for a buddy group");
			
 
				 			goto exit_meta_group_info;
			
 
				 		}
			
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
				 
			
 
				 	meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
			
 
				 	if (meta_group_info[i] == NULL) {
			
 
				-		ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
			
 
				+		ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
			
 
				 		goto exit_group_info;
			
 
				 	}
			
 
				 	memset(meta_group_info[i], 0, kmem_cache_size(cachep));
			
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
				 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
			
 
				 				 &ext4_mb_seq_groups_fops, sb);
			
 
				 
			
 
				-	if (sbi->s_journal)
			
 
				-		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
			
 
				-
			
 
				 	return 0;
			
 
				 
			
 
				 out_free_locality_groups:
			
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
 
				  * This function is called by the jbd2 layer once the commit has finished,
			
 
				  * so we know we can free the blocks that were released with that commit.
			
 
				  */
			
 
				-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
			
 
				+static void ext4_free_data_callback(struct super_block *sb,
			
 
				+				    struct ext4_journal_cb_entry *jce,
			
 
				+				    int rc)
			
 
				 {
			
 
				-	struct super_block *sb = journal->j_private;
			
 
				+	struct ext4_free_data *entry = (struct ext4_free_data *)jce;
			
 
				 	struct ext4_buddy e4b;
			
 
				 	struct ext4_group_info *db;
			
 
				 	int err, count = 0, count2 = 0;
			
 
				-	struct ext4_free_data *entry;
			
 
				-	struct list_head *l, *ltmp;
			
 
				 
			
 
				-	list_for_each_safe(l, ltmp, &txn->t_private_list) {
			
 
				-		entry = list_entry(l, struct ext4_free_data, list);
			
 
				+	mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
			
 
				+		 entry->efd_count, entry->efd_group, entry);
			
 
				 
			
 
				-		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
			
 
				-			 entry->count, entry->group, entry);
			
 
				+	if (test_opt(sb, DISCARD))
			
 
				+		ext4_issue_discard(sb, entry->efd_group,
			
 
				+				   entry->efd_start_cluster, entry->efd_count);
			
 
				 
			
 
				-		if (test_opt(sb, DISCARD))
			
 
				-			ext4_issue_discard(sb, entry->group,
			
 
				-					   entry->start_cluster, entry->count);
			
 
				+	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
			
 
				+	/* we expect to find existing buddy because it's pinned */
			
 
				+	BUG_ON(err != 0);
			
 
				 
			
 
				-		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
			
 
				-		/* we expect to find existing buddy because it's pinned */
			
 
				-		BUG_ON(err != 0);
			
 
				 
			
 
				-		db = e4b.bd_info;
			
 
				-		/* there are blocks to put in buddy to make them really free */
			
 
				-		count += entry->count;
			
 
				-		count2++;
			
 
				-		ext4_lock_group(sb, entry->group);
			
 
				-		/* Take it out of per group rb tree */
			
 
				-		rb_erase(&entry->node, &(db->bb_free_root));
			
 
				-		mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
			
 
				+	db = e4b.bd_info;
			
 
				+	/* there are blocks to put in buddy to make them really free */
			
 
				+	count += entry->efd_count;
			
 
				+	count2++;
			
 
				+	ext4_lock_group(sb, entry->efd_group);
			
 
				+	/* Take it out of per group rb tree */
			
 
				+	rb_erase(&entry->efd_node, &(db->bb_free_root));
			
 
				+	mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
			
 
				 
			
 
				-		/*
			
 
				-		 * Clear the trimmed flag for the group so that the next
			
 
				-		 * ext4_trim_fs can trim it.
			
 
				-		 * If the volume is mounted with -o discard, online discard
			
 
				-		 * is supported and the free blocks will be trimmed online.
			
 
				-		 */
			
 
				-		if (!test_opt(sb, DISCARD))
			
 
				-			EXT4_MB_GRP_CLEAR_TRIMMED(db);
			
 
				+	/*
			
 
				+	 * Clear the trimmed flag for the group so that the next
			
 
				+	 * ext4_trim_fs can trim it.
			
 
				+	 * If the volume is mounted with -o discard, online discard
			
 
				+	 * is supported and the free blocks will be trimmed online.
			
 
				+	 */
			
 
				+	if (!test_opt(sb, DISCARD))
			
 
				+		EXT4_MB_GRP_CLEAR_TRIMMED(db);
			
 
				 
			
 
				-		if (!db->bb_free_root.rb_node) {
			
 
				-			/* No more items in the per group rb tree
			
 
				-			 * balance refcounts from ext4_mb_free_metadata()
			
 
				-			 */
			
 
				-			page_cache_release(e4b.bd_buddy_page);
			
 
				-			page_cache_release(e4b.bd_bitmap_page);
			
 
				-		}
			
 
				-		ext4_unlock_group(sb, entry->group);
			
 
				-		kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				-		ext4_mb_unload_buddy(&e4b);
			
 
				+	if (!db->bb_free_root.rb_node) {
			
 
				+		/* No more items in the per group rb tree
			
 
				+		 * balance refcounts from ext4_mb_free_metadata()
			
 
				+		 */
			
 
				+		page_cache_release(e4b.bd_buddy_page);
			
 
				+		page_cache_release(e4b.bd_bitmap_page);
			
 
				 	}
			
 
				+	ext4_unlock_group(sb, entry->efd_group);
			
 
				+	kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				+	ext4_mb_unload_buddy(&e4b);
			
 
				 
			
 
				 	mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
			
 
				 }
			
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void)
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				-	ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
			
 
				-					  SLAB_RECLAIM_ACCOUNT);
			
 
				-	if (ext4_free_ext_cachep == NULL) {
			
 
				+	ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
			
 
				+					   SLAB_RECLAIM_ACCOUNT);
			
 
				+	if (ext4_free_data_cachep == NULL) {
			
 
				 		kmem_cache_destroy(ext4_pspace_cachep);
			
 
				 		kmem_cache_destroy(ext4_ac_cachep);
			
 
				 		return -ENOMEM;
			
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void)
 
				 	rcu_barrier();
			
 
				 	kmem_cache_destroy(ext4_pspace_cachep);
			
 
				 	kmem_cache_destroy(ext4_ac_cachep);
			
 
				-	kmem_cache_destroy(ext4_free_ext_cachep);
			
 
				+	kmem_cache_destroy(ext4_free_data_cachep);
			
 
				 	ext4_groupinfo_destroy_slabs();
			
 
				 	ext4_remove_debugfs_entry();
			
 
				 }
			
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
				 	len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
			
 
				 	if (!ext4_data_block_valid(sbi, block, len)) {
			
 
				 		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
			
 
				-			   "fs metadata\n", block, block+len);
			
 
				+			   "fs metadata", block, block+len);
			
 
				 		/* File system mounted not to panic on error
			
 
				 		 * Fix the bitmap and repeat the block allocation
			
 
				 		 * We leak some of the blocks here.
			
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
				 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
			
 
				 	int bsbits, max;
			
 
				 	ext4_lblk_t end;
			
 
				-	loff_t size, orig_size, start_off;
			
 
				+	loff_t size, start_off;
			
 
				+	loff_t orig_size __maybe_unused;
			
 
				 	ext4_lblk_t start;
			
 
				 	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
			
 
				 	struct ext4_prealloc_space *pa;
			
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
				 	n = rb_first(&(grp->bb_free_root));
			
 
				 
			
 
				 	while (n) {
			
 
				-		entry = rb_entry(n, struct ext4_free_data, node);
			
 
				-		ext4_set_bits(bitmap, entry->start_cluster, entry->count);
			
 
				+		entry = rb_entry(n, struct ext4_free_data, efd_node);
			
 
				+		ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
			
 
				 		n = rb_next(n);
			
 
				 	}
			
 
				 	return;
			
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 
				 	    (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
			
 
				 		return;
			
 
				 
			
 
				-	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
			
 
				+	ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
			
 
				 			" Allocation context details:");
			
 
				-	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
			
 
				+	ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
			
 
				 			ac->ac_status, ac->ac_flags);
			
 
				-	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
			
 
				+	ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
			
 
				 		 	"goal %lu/%lu/%lu@%lu, "
			
 
				 			"best %lu/%lu/%lu@%lu cr %d",
			
 
				 			(unsigned long)ac->ac_o_ex.fe_group,
			
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 
				 			(unsigned long)ac->ac_b_ex.fe_len,
			
 
				 			(unsigned long)ac->ac_b_ex.fe_logical,
			
 
				 			(int)ac->ac_criteria);
			
 
				-	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
			
 
				+	ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
			
 
				 		 ac->ac_ex_scanned, ac->ac_found);
			
 
				-	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
			
 
				+	ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
			
 
				 	ngroups = ext4_get_groups_count(sb);
			
 
				 	for (i = 0; i < ngroups; i++) {
			
 
				 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
			
@@ -4428,9 +4376,9 @@ out:
 
				 static int can_merge(struct ext4_free_data *entry1,
			
 
				 			struct ext4_free_data *entry2)
			
 
				 {
			
 
				-	if ((entry1->t_tid == entry2->t_tid) &&
			
 
				-	    (entry1->group == entry2->group) &&
			
 
				-	    ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
			
 
				+	if ((entry1->efd_tid == entry2->efd_tid) &&
			
 
				+	    (entry1->efd_group == entry2->efd_group) &&
			
 
				+	    ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
			
 
				 		return 1;
			
 
				 	return 0;
			
 
				 }
			
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	BUG_ON(e4b->bd_bitmap_page == NULL);
			
 
				 	BUG_ON(e4b->bd_buddy_page == NULL);
			
 
				 
			
 
				-	new_node = &new_entry->node;
			
 
				-	cluster = new_entry->start_cluster;
			
 
				+	new_node = &new_entry->efd_node;
			
 
				+	cluster = new_entry->efd_start_cluster;
			
 
				 
			
 
				 	if (!*n) {
			
 
				 		/* first free block exent. We need to
			
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	}
			
 
				 	while (*n) {
			
 
				 		parent = *n;
			
 
				-		entry = rb_entry(parent, struct ext4_free_data, node);
			
 
				-		if (cluster < entry->start_cluster)
			
 
				+		entry = rb_entry(parent, struct ext4_free_data, efd_node);
			
 
				+		if (cluster < entry->efd_start_cluster)
			
 
				 			n = &(*n)->rb_left;
			
 
				-		else if (cluster >= (entry->start_cluster + entry->count))
			
 
				+		else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
			
 
				 			n = &(*n)->rb_right;
			
 
				 		else {
			
 
				 			ext4_grp_locked_error(sb, group, 0,
			
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	/* Now try to see the extent can be merged to left and right */
			
 
				 	node = rb_prev(new_node);
			
 
				 	if (node) {
			
 
				-		entry = rb_entry(node, struct ext4_free_data, node);
			
 
				+		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				 		if (can_merge(entry, new_entry)) {
			
 
				-			new_entry->start_cluster = entry->start_cluster;
			
 
				-			new_entry->count += entry->count;
			
 
				+			new_entry->efd_start_cluster = entry->efd_start_cluster;
			
 
				+			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			spin_lock(&sbi->s_md_lock);
			
 
				-			list_del(&entry->list);
			
 
				-			spin_unlock(&sbi->s_md_lock);
			
 
				-			kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				+			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				+			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	node = rb_next(new_node);
			
 
				 	if (node) {
			
 
				-		entry = rb_entry(node, struct ext4_free_data, node);
			
 
				+		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				 		if (can_merge(new_entry, entry)) {
			
 
				-			new_entry->count += entry->count;
			
 
				+			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			spin_lock(&sbi->s_md_lock);
			
 
				-			list_del(&entry->list);
			
 
				-			spin_unlock(&sbi->s_md_lock);
			
 
				-			kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				+			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				+			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
 
				 	/* Add the extent to transaction's private list */
			
 
				-	spin_lock(&sbi->s_md_lock);
			
 
				-	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
			
 
				-	spin_unlock(&sbi->s_md_lock);
			
 
				+	ext4_journal_callback_add(handle, ext4_free_data_callback,
			
 
				+				  &new_entry->efd_jce);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -4691,15 +4634,15 @@ do_more:
 
				 		 * blocks being freed are metadata. these blocks shouldn't
			
 
				 		 * be used until this transaction is committed
			
 
				 		 */
			
 
				-		new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
			
 
				+		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
			
 
				 		if (!new_entry) {
			
 
				 			err = -ENOMEM;
			
 
				 			goto error_return;
			
 
				 		}
			
 
				-		new_entry->start_cluster = bit;
			
 
				-		new_entry->group  = block_group;
			
 
				-		new_entry->count = count_clusters;
			
 
				-		new_entry->t_tid = handle->h_transaction->t_tid;
			
 
				+		new_entry->efd_start_cluster = bit;
			
 
				+		new_entry->efd_group = block_group;
			
 
				+		new_entry->efd_count = count_clusters;
			
 
				+		new_entry->efd_tid = handle->h_transaction->t_tid;
			
 
				 
			
 
				 		ext4_lock_group(sb, block_group);
			
 
				 		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
			
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 
				 	start = (e4b.bd_info->bb_first_free > start) ?
			
 
				 		e4b.bd_info->bb_first_free : start;
			
 
				 
			
 
				-	while (start < max) {
			
 
				-		start = mb_find_next_zero_bit(bitmap, max, start);
			
 
				-		if (start >= max)
			
 
				+	while (start <= max) {
			
 
				+		start = mb_find_next_zero_bit(bitmap, max + 1, start);
			
 
				+		if (start > max)
			
 
				 			break;
			
 
				-		next = mb_find_next_bit(bitmap, max, start);
			
 
				+		next = mb_find_next_bit(bitmap, max + 1, start);
			
 
				 
			
 
				 		if ((next - start) >= minblocks) {
			
 
				 			ext4_trim_extent(sb, start,
			
@@ -5027,37 +4970,36 @@ out:
 
				 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
			
 
				 {
			
 
				 	struct ext4_group_info *grp;
			
 
				-	ext4_group_t first_group, last_group;
			
 
				-	ext4_group_t group, ngroups = ext4_get_groups_count(sb);
			
 
				+	ext4_group_t group, first_group, last_group;
			
 
				 	ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
			
 
				-	uint64_t start, len, minlen, trimmed = 0;
			
 
				+	uint64_t start, end, minlen, trimmed = 0;
			
 
				 	ext4_fsblk_t first_data_blk =
			
 
				 			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
			
 
				+	ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
			
 
				 	int ret = 0;
			
 
				 
			
 
				 	start = range->start >> sb->s_blocksize_bits;
			
 
				-	len = range->len >> sb->s_blocksize_bits;
			
 
				+	end = start + (range->len >> sb->s_blocksize_bits) - 1;
			
 
				 	minlen = range->minlen >> sb->s_blocksize_bits;
			
 
				 
			
 
				-	if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
			
 
				+	if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
			
 
				+	    unlikely(start >= max_blks))
			
 
				 		return -EINVAL;
			
 
				-	if (start + len <= first_data_blk)
			
 
				+	if (end >= max_blks)
			
 
				+		end = max_blks - 1;
			
 
				+	if (end <= first_data_blk)
			
 
				 		goto out;
			
 
				-	if (start < first_data_blk) {
			
 
				-		len -= first_data_blk - start;
			
 
				+	if (start < first_data_blk)
			
 
				 		start = first_data_blk;
			
 
				-	}
			
 
				 
			
 
				-	/* Determine first and last group to examine based on start and len */
			
 
				+	/* Determine first and last group to examine based on start and end */
			
 
				 	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
			
 
				 				     &first_group, &first_cluster);
			
 
				-	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
			
 
				+	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
			
 
				 				     &last_group, &last_cluster);
			
 
				-	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
			
 
				-	last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
			
 
				 
			
 
				-	if (first_group > last_group)
			
 
				-		return -EINVAL;
			
 
				+	/* end now represents the last cluster to discard in this group */
			
 
				+	end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
			
 
				 
			
 
				 	for (group = first_group; group <= last_group; group++) {
			
 
				 		grp = ext4_get_group_info(sb, group);
			
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
				 		}
			
 
				 
			
 
				 		/*
			
 
				-		 * For all the groups except the last one, last block will
			
 
				-		 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
			
 
				-		 * change it for the last group in which case start +
			
 
				-		 * len < EXT4_BLOCKS_PER_GROUP(sb).
			
 
				+		 * For all the groups except the last one, last cluster will
			
 
				+		 * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
			
 
				+		 * change it for the last group, note that last_cluster is
			
 
				+		 * already computed earlier by ext4_get_group_no_and_offset()
			
 
				 		 */
			
 
				-		if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
			
 
				-			last_cluster = first_cluster + len;
			
 
				-		len -= last_cluster - first_cluster;
			
 
				+		if (group == last_group)
			
 
				+			end = last_cluster;
			
 
				 
			
 
				 		if (grp->bb_free >= minlen) {
			
 
				 			cnt = ext4_trim_all_free(sb, group, first_cluster,
			
 
				-						last_cluster, minlen);
			
 
				+						end, minlen);
			
 
				 			if (cnt < 0) {
			
 
				 				ret = cnt;
			
 
				 				break;
			
 
				 			}
			
 
				+			trimmed += cnt;
			
 
				 		}
			
 
				-		trimmed += cnt;
			
 
				+
			
 
				+		/*
			
 
				+		 * For every group except the first one, we are sure
			
 
				+		 * that the first cluster to discard will be cluster #0.
			
 
				+		 */
			
 
				 		first_cluster = 0;
			
 
				 	}
			
 
				-	range->len = trimmed * sb->s_blocksize;
			
 
				 
			
 
				 	if (!ret)
			
 
				 		atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
			
 
				 
			
 
				 out:
			
 
				+	range->len = trimmed * sb->s_blocksize;
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
 
				 
			
 
				 
			
 
				 struct ext4_free_data {
			
 
				-	/* this links the free block information from group_info */
			
 
				-	struct rb_node node;
			
 
				+	/* MUST be the first member */
			
 
				+	struct ext4_journal_cb_entry	efd_jce;
			
 
				+
			
 
				+	/* ext4_free_data private data starts from here */
			
 
				 
			
 
				-	/* this links the free block information from ext4_sb_info */
			
 
				-	struct list_head list;
			
 
				+	/* this links the free block information from group_info */
			
 
				+	struct rb_node			efd_node;
			
 
				 
			
 
				 	/* group which free block extent belongs */
			
 
				-	ext4_group_t group;
			
 
				+	ext4_group_t			efd_group;
			
 
				 
			
 
				 	/* free block extent */
			
 
				-	ext4_grpblk_t start_cluster;
			
 
				-	ext4_grpblk_t count;
			
 
				+	ext4_grpblk_t			efd_start_cluster;
			
 
				+	ext4_grpblk_t			efd_count;
			
 
				 
			
 
				 	/* transaction which freed this extent */
			
 
				-	tid_t	t_tid;
			
 
				+	tid_t				efd_tid;
			
 
				 };
			
 
				 
			
 
				 struct ext4_prealloc_space {
			
@@ -210,8 +212,6 @@ struct ext4_buddy {
 
				 	__u16 bd_blkbits;
			
 
				 	ext4_group_t bd_group;
			
 
				 };
			
 
				-#define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
			
 
				-#define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
			
 
				 
			
 
				 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
			
 
				 					struct ext4_free_extent *fex)
			
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
 
				 	tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
			
 
				 				   S_IFREG, NULL, goal, owner);
			
 
				 	if (IS_ERR(tmp_inode)) {
			
 
				-		retval = PTR_ERR(inode);
			
 
				+		retval = PTR_ERR(tmp_inode);
			
 
				 		ext4_journal_stop(handle);
			
 
				 		return retval;
			
 
				 	}
			
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
 
				 	 * If check_interval in MMP block is larger, use that instead of
			
 
				 	 * update_interval from the superblock.
			
 
				 	 */
			
 
				-	if (mmp->mmp_check_interval > mmp_check_interval)
			
 
				-		mmp_check_interval = mmp->mmp_check_interval;
			
 
				+	if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
			
 
				+		mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
			
 
				 
			
 
				 	seq = le32_to_cpu(mmp->mmp_seq);
			
 
				 	if (seq == EXT4_MMP_SEQ_CLEAN)
			
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -468,7 +468,7 @@ fail2:
 
				 fail:
			
 
				 	if (*err == ERR_BAD_DX_DIR)
			
 
				 		ext4_warning(dir->i_sb,
			
 
				-			     "Corrupt dir inode %ld, running e2fsck is "
			
 
				+			     "Corrupt dir inode %lu, running e2fsck is "
			
 
				 			     "recommended.", dir->i_ino);
			
 
				 	return NULL;
			
 
				 }
			
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -60,7 +60,6 @@ void ext4_ioend_wait(struct inode *inode)
 
				 static void put_io_page(struct ext4_io_page *io_page)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&io_page->p_count)) {
			
 
				-		end_page_writeback(io_page->p_page);
			
 
				 		put_page(io_page->p_page);
			
 
				 		kmem_cache_free(io_page_cachep, io_page);
			
 
				 	}
			
@@ -110,6 +109,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
 
				 	if (io->iocb)
			
 
				 		aio_complete(io->iocb, io->result, 0);
			
 
				 
			
 
				+	if (io->flag & EXT4_IO_END_DIRECT)
			
 
				+		inode_dio_done(inode);
			
 
				 	/* Wake up anyone waiting on unwritten extent conversion */
			
 
				 	if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
			
 
				 		wake_up_all(ext4_ioend_wq(io->inode));
			
@@ -127,12 +128,18 @@ static void ext4_end_io_work(struct work_struct *work)
 
				 	unsigned long		flags;
			
 
				 
			
 
				 	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
			
 
				+	if (io->flag & EXT4_IO_END_IN_FSYNC)
			
 
				+		goto requeue;
			
 
				 	if (list_empty(&io->list)) {
			
 
				 		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
			
 
				 		goto free;
			
 
				 	}
			
 
				 
			
 
				 	if (!mutex_trylock(&inode->i_mutex)) {
			
 
				+		bool was_queued;
			
 
				+requeue:
			
 
				+		was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
			
 
				+		io->flag |= EXT4_IO_END_QUEUED;
			
 
				 		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
			
 
				 		/*
			
 
				 		 * Requeue the work instead of waiting so that the work
			
@@ -145,9 +152,8 @@ static void ext4_end_io_work(struct work_struct *work)
 
				 		 * yield the cpu if it sees an end_io request that has already
			
 
				 		 * been requeued.
			
 
				 		 */
			
 
				-		if (io->flag & EXT4_IO_END_QUEUED)
			
 
				+		if (was_queued)
			
 
				 			yield();
			
 
				-		io->flag |= EXT4_IO_END_QUEUED;
			
 
				 		return;
			
 
				 	}
			
 
				 	list_del_init(&io->list);
			
@@ -227,9 +233,9 @@ static void ext4_end_bio(struct bio *bio, int error)
 
				 			} while (bh != head);
			
 
				 		}
			
 
				 
			
 
				-		put_io_page(io_end->pages[i]);
			
 
				+		if (atomic_read(&io_end->pages[i]->p_count) == 1)
			
 
				+			end_page_writeback(io_end->pages[i]->p_page);
			
 
				 	}
			
 
				-	io_end->num_io_pages = 0;
			
 
				 	inode = io_end->inode;
			
 
				 
			
 
				 	if (error) {
			
@@ -421,6 +427,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
				 	 * PageWriteback bit from the page to prevent the system from
			
 
				 	 * wedging later on.
			
 
				 	 */
			
 
				+	if (atomic_read(&io_page->p_count) == 1)
			
 
				+		end_page_writeback(page);
			
 
				 	put_io_page(io_page);
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb,
 
				 	do_div(reserved_blocks, 100);
			
 
				 
			
 
				 	ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
			
 
				+	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
			
 
				 	le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
			
 
				 		     flex_gd->count);
			
 
				+	le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
			
 
				+		     flex_gd->count);
			
 
				 
			
 
				 	/*
			
 
				 	 * We need to protect s_groups_count against other CPUs seeing
			
@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	ext4_blocks_count_set(es, o_blocks_count + add);
			
 
				+	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
			
 
				 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
			
 
				 		   o_blocks_count + add);
			
 
				 	/* We add the blocks to the bitmap and set the group need init bit */
			
@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
				 	o_blocks_count = ext4_blocks_count(es);
			
 
				 
			
 
				 	if (test_opt(sb, DEBUG))
			
 
				-		printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n",
			
 
				-		       o_blocks_count, n_blocks_count);
			
 
				+		ext4_msg(sb, KERN_DEBUG,
			
 
				+			 "extending last group from %llu to %llu blocks",
			
 
				+			 o_blocks_count, n_blocks_count);
			
 
				 
			
 
				 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
			
 
				-		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
			
 
				-			" too large to resize to %llu blocks safely\n",
			
 
				-			sb->s_id, n_blocks_count);
			
 
				+		ext4_msg(sb, KERN_ERR,
			
 
				+			 "filesystem too large to resize to %llu blocks safely",
			
 
				+			 n_blocks_count);
			
 
				 		if (sizeof(sector_t) < 8)
			
 
				 			ext4_warning(sb, "CONFIG_LBDAF not enabled");
			
 
				 		return -EINVAL;
			
@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 
				 	ext4_fsblk_t o_blocks_count;
			
 
				 	ext4_group_t o_group;
			
 
				 	ext4_group_t n_group;
			
 
				-	ext4_grpblk_t offset;
			
 
				+	ext4_grpblk_t offset, add;
			
 
				 	unsigned long n_desc_blocks;
			
 
				 	unsigned long o_desc_blocks;
			
 
				 	unsigned long desc_blocks;
			
@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 
				 	o_blocks_count = ext4_blocks_count(es);
			
 
				 
			
 
				 	if (test_opt(sb, DEBUG))
			
 
				-		printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
			
 
				-		       "upto %llu blocks\n", o_blocks_count, n_blocks_count);
			
 
				+		ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
			
 
				+		       "to %llu blocks", o_blocks_count, n_blocks_count);
			
 
				 
			
 
				 	if (n_blocks_count < o_blocks_count) {
			
 
				 		/* On-line shrinking not supported */
			
@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 
				 		return 0;
			
 
				 
			
 
				 	ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
			
 
				-	ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
			
 
				+	ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
			
 
				 
			
 
				 	n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
			
 
				 			EXT4_DESC_PER_BLOCK(sb);
			
@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 
				 	}
			
 
				 	brelse(bh);
			
 
				 
			
 
				-	if (offset != 0) {
			
 
				-		/* extend the last group */
			
 
				-		ext4_grpblk_t add;
			
 
				-		add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
			
 
				+	/* extend the last group */
			
 
				+	if (n_group == o_group)
			
 
				+		add = n_blocks_count - o_blocks_count;
			
 
				+	else
			
 
				+		add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
			
 
				+	if (add > 0) {
			
 
				 		err = ext4_group_extend_no_check(sb, o_blocks_count, add);
			
 
				 		if (err)
			
 
				 			goto out;
			
@@ -1674,7 +1681,7 @@ out:
 
				 
			
 
				 	iput(resize_inode);
			
 
				 	if (test_opt(sb, DEBUG))
			
 
				-		printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
			
 
				-		       "upto %llu blocks\n", o_blocks_count, n_blocks_count);
			
 
				+		ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
			
 
				+		       "upto %llu blocks", o_blocks_count, n_blocks_count);
			
 
				 	return err;
			
 
				 }
			
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -82,8 +82,8 @@
 
				 		printk("\n"); \
			
 
				 	} while (0)
			
 
				 #else
			
 
				-# define ea_idebug(f...)
			
 
				-# define ea_bdebug(f...)
			
 
				+# define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
			
 
				+# define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
			
 
				 #endif
			
 
				 
			
 
				 static void ext4_xattr_cache_insert(struct buffer_head *);
			
@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
 
				 static inline int
			
 
				 ext4_xattr_check_block(struct buffer_head *bh)
			
 
				 {
			
 
				-	int error;
			
 
				-
			
 
				 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
			
 
				 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
			
 
				 		return -EIO;
			
 
				-	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
			
 
				-	return error;
			
 
				+	return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
			
 
				 }
			
 
				 
			
 
				 static inline int
			
@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 
				 	error = -ENODATA;
			
 
				 	if (!EXT4_I(inode)->i_file_acl)
			
 
				 		goto cleanup;
			
 
				-	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
			
 
				+	ea_idebug(inode, "reading block %llu",
			
 
				+		  (unsigned long long)EXT4_I(inode)->i_file_acl);
			
 
				 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
			
 
				 	if (!bh)
			
 
				 		goto cleanup;
			
@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 
				 	error = 0;
			
 
				 	if (!EXT4_I(inode)->i_file_acl)
			
 
				 		goto cleanup;
			
 
				-	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
			
 
				+	ea_idebug(inode, "reading block %llu",
			
 
				+		  (unsigned long long)EXT4_I(inode)->i_file_acl);
			
 
				 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
			
 
				 	error = -EIO;
			
 
				 	if (!bh)
			
@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 
				 		ext4_free_blocks(handle, inode, bh, 0, 1,
			
 
				 				 EXT4_FREE_BLOCKS_METADATA |
			
 
				 				 EXT4_FREE_BLOCKS_FORGET);
			
 
				+		unlock_buffer(bh);
			
 
				 	} else {
			
 
				 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
			
 
				+		if (ce)
			
 
				+			mb_cache_entry_release(ce);
			
 
				+		unlock_buffer(bh);
			
 
				 		error = ext4_handle_dirty_metadata(handle, inode, bh);
			
 
				 		if (IS_SYNC(inode))
			
 
				 			ext4_handle_sync(handle);
			
 
				 		dquot_free_block(inode, 1);
			
 
				 		ea_bdebug(bh, "refcount now=%d; releasing",
			
 
				 			  le32_to_cpu(BHDR(bh)->h_refcount));
			
 
				-		if (ce)
			
 
				-			mb_cache_entry_release(ce);
			
 
				 	}
			
 
				-	unlock_buffer(bh);
			
 
				 out:
			
 
				 	ext4_std_error(inode->i_sb, error);
			
 
				 	return;
			
@@ -834,7 +834,8 @@ inserted:
 
				 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
			
 
				 				BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
			
 
				 
			
 
				-			ea_idebug(inode, "creating block %d", block);
			
 
				+			ea_idebug(inode, "creating block %llu",
			
 
				+				  (unsigned long long)block);
			
 
				 
			
 
				 			new_bh = sb_getblk(sb, block);
			
 
				 			if (!new_bh) {
			
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
 
				  * whole transaction.
			
 
				  *
			
 
				  * Requires j_list_lock
			
 
				- * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
			
 
				  */
			
 
				 static int __try_to_free_cp_buf(struct journal_head *jh)
			
 
				 {
			
 
				 	int ret = 0;
			
 
				 	struct buffer_head *bh = jh2bh(jh);
			
 
				 
			
 
				-	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
			
 
				+	if (jh->b_transaction == NULL && !buffer_locked(bh) &&
			
 
				 	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
			
 
				 		/*
			
 
				 		 * Get our reference so that bh cannot be freed before
			
@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
				 		get_bh(bh);
			
 
				 		JBUFFER_TRACE(jh, "remove from checkpoint list");
			
 
				 		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		BUFFER_TRACE(bh, "release");
			
 
				 		__brelse(bh);
			
 
				-	} else {
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
@@ -179,21 +175,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
			
 
				- * The caller must restart a list walk.  Wait for someone else to run
			
 
				- * jbd_unlock_bh_state().
			
 
				- */
			
 
				-static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
			
 
				-	__releases(journal->j_list_lock)
			
 
				-{
			
 
				-	get_bh(bh);
			
 
				-	spin_unlock(&journal->j_list_lock);
			
 
				-	jbd_lock_bh_state(bh);
			
 
				-	jbd_unlock_bh_state(bh);
			
 
				-	put_bh(bh);
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * Clean up transaction's list of buffers submitted for io.
			
 
				  * We wait for any pending IO to complete and remove any clean
			
@@ -222,15 +203,9 @@ restart:
 
				 	while (!released && transaction->t_checkpoint_io_list) {
			
 
				 		jh = transaction->t_checkpoint_io_list;
			
 
				 		bh = jh2bh(jh);
			
 
				-		if (!jbd_trylock_bh_state(bh)) {
			
 
				-			jbd_sync_bh(journal, bh);
			
 
				-			spin_lock(&journal->j_list_lock);
			
 
				-			goto restart;
			
 
				-		}
			
 
				 		get_bh(bh);
			
 
				 		if (buffer_locked(bh)) {
			
 
				 			spin_unlock(&journal->j_list_lock);
			
 
				-			jbd_unlock_bh_state(bh);
			
 
				 			wait_on_buffer(bh);
			
 
				 			/* the journal_head may have gone by now */
			
 
				 			BUFFER_TRACE(bh, "brelse");
			
@@ -246,7 +221,6 @@ restart:
 
				 		 * it has been written out and so we can drop it from the list
			
 
				 		 */
			
 
				 		released = __jbd2_journal_remove_checkpoint(jh);
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		__brelse(bh);
			
 
				 	}
			
 
				 
			
@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
 
				 
			
 
				 	for (i = 0; i < *batch_count; i++) {
			
 
				 		struct buffer_head *bh = journal->j_chkpt_bhs[i];
			
 
				-		clear_buffer_jwrite(bh);
			
 
				 		BUFFER_TRACE(bh, "brelse");
			
 
				 		__brelse(bh);
			
 
				 	}
			
@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
 
				  * be written out.
			
 
				  *
			
 
				  * Called with j_list_lock held and drops it if 1 is returned
			
 
				- * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
			
 
				  */
			
 
				 static int __process_buffer(journal_t *journal, struct journal_head *jh,
			
 
				 			    int *batch_count, transaction_t *transaction)
			
@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
				 	if (buffer_locked(bh)) {
			
 
				 		get_bh(bh);
			
 
				 		spin_unlock(&journal->j_list_lock);
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		wait_on_buffer(bh);
			
 
				 		/* the journal_head may have gone by now */
			
 
				 		BUFFER_TRACE(bh, "brelse");
			
@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
				 
			
 
				 		transaction->t_chp_stats.cs_forced_to_close++;
			
 
				 		spin_unlock(&journal->j_list_lock);
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		if (unlikely(journal->j_flags & JBD2_UNMOUNT))
			
 
				 			/*
			
 
				 			 * The journal thread is dead; so starting and
			
@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
				 		if (unlikely(buffer_write_io_error(bh)))
			
 
				 			ret = -EIO;
			
 
				 		get_bh(bh);
			
 
				-		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
			
 
				 		BUFFER_TRACE(bh, "remove from checkpoint");
			
 
				 		__jbd2_journal_remove_checkpoint(jh);
			
 
				 		spin_unlock(&journal->j_list_lock);
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		__brelse(bh);
			
 
				 	} else {
			
 
				 		/*
			
@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
				 		BUFFER_TRACE(bh, "queue");
			
 
				 		get_bh(bh);
			
 
				 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
			
 
				-		set_buffer_jwrite(bh);
			
 
				 		journal->j_chkpt_bhs[*batch_count] = bh;
			
 
				 		__buffer_relink_io(jh);
			
 
				-		jbd_unlock_bh_state(bh);
			
 
				 		transaction->t_chp_stats.cs_written++;
			
 
				 		(*batch_count)++;
			
 
				 		if (*batch_count == JBD2_NR_BATCH) {
			
@@ -407,15 +373,7 @@ restart:
 
				 		int retry = 0, err;
			
 
				 
			
 
				 		while (!retry && transaction->t_checkpoint_list) {
			
 
				-			struct buffer_head *bh;
			
 
				-
			
 
				 			jh = transaction->t_checkpoint_list;
			
 
				-			bh = jh2bh(jh);
			
 
				-			if (!jbd_trylock_bh_state(bh)) {
			
 
				-				jbd_sync_bh(journal, bh);
			
 
				-				retry = 1;
			
 
				-				break;
			
 
				-			}
			
 
				 			retry = __process_buffer(journal, jh, &batch_count,
			
 
				 						 transaction);
			
 
				 			if (retry < 0 && !result)
			
@@ -478,79 +436,28 @@ out:
 
				 
			
 
				 int jbd2_cleanup_journal_tail(journal_t *journal)
			
 
				 {
			
 
				-	transaction_t * transaction;
			
 
				 	tid_t		first_tid;
			
 
				-	unsigned long	blocknr, freed;
			
 
				+	unsigned long	blocknr;
			
 
				 
			
 
				 	if (is_journal_aborted(journal))
			
 
				 		return 1;
			
 
				 
			
 
				-	/* OK, work out the oldest transaction remaining in the log, and
			
 
				-	 * the log block it starts at.
			
 
				-	 *
			
 
				-	 * If the log is now empty, we need to work out which is the
			
 
				-	 * next transaction ID we will write, and where it will
			
 
				-	 * start. */
			
 
				-
			
 
				-	write_lock(&journal->j_state_lock);
			
 
				-	spin_lock(&journal->j_list_lock);
			
 
				-	transaction = journal->j_checkpoint_transactions;
			
 
				-	if (transaction) {
			
 
				-		first_tid = transaction->t_tid;
			
 
				-		blocknr = transaction->t_log_start;
			
 
				-	} else if ((transaction = journal->j_committing_transaction) != NULL) {
			
 
				-		first_tid = transaction->t_tid;
			
 
				-		blocknr = transaction->t_log_start;
			
 
				-	} else if ((transaction = journal->j_running_transaction) != NULL) {
			
 
				-		first_tid = transaction->t_tid;
			
 
				-		blocknr = journal->j_head;
			
 
				-	} else {
			
 
				-		first_tid = journal->j_transaction_sequence;
			
 
				-		blocknr = journal->j_head;
			
 
				-	}
			
 
				-	spin_unlock(&journal->j_list_lock);
			
 
				-	J_ASSERT(blocknr != 0);
			
 
				-
			
 
				-	/* If the oldest pinned transaction is at the tail of the log
			
 
				-           already then there's not much we can do right now. */
			
 
				-	if (journal->j_tail_sequence == first_tid) {
			
 
				-		write_unlock(&journal->j_state_lock);
			
 
				+	if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
			
 
				 		return 1;
			
 
				-	}
			
 
				-
			
 
				-	/* OK, update the superblock to recover the freed space.
			
 
				-	 * Physical blocks come first: have we wrapped beyond the end of
			
 
				-	 * the log?  */
			
 
				-	freed = blocknr - journal->j_tail;
			
 
				-	if (blocknr < journal->j_tail)
			
 
				-		freed = freed + journal->j_last - journal->j_first;
			
 
				-
			
 
				-	trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
			
 
				-	jbd_debug(1,
			
 
				-		  "Cleaning journal tail from %d to %d (offset %lu), "
			
 
				-		  "freeing %lu\n",
			
 
				-		  journal->j_tail_sequence, first_tid, blocknr, freed);
			
 
				-
			
 
				-	journal->j_free += freed;
			
 
				-	journal->j_tail_sequence = first_tid;
			
 
				-	journal->j_tail = blocknr;
			
 
				-	write_unlock(&journal->j_state_lock);
			
 
				+	J_ASSERT(blocknr != 0);
			
 
				 
			
 
				 	/*
			
 
				-	 * If there is an external journal, we need to make sure that
			
 
				-	 * any data blocks that were recently written out --- perhaps
			
 
				-	 * by jbd2_log_do_checkpoint() --- are flushed out before we
			
 
				-	 * drop the transactions from the external journal.  It's
			
 
				-	 * unlikely this will be necessary, especially with a
			
 
				-	 * appropriately sized journal, but we need this to guarantee
			
 
				-	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
			
 
				-	 * doesn't get called all that often.
			
 
				+	 * We need to make sure that any blocks that were recently written out
			
 
				+	 * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
			
 
				+	 * we drop the transactions from the journal. It's unlikely this will
			
 
				+	 * be necessary, especially with an appropriately sized journal, but we
			
 
				+	 * need this to guarantee correctness.  Fortunately
			
 
				+	 * jbd2_cleanup_journal_tail() doesn't get called all that often.
			
 
				 	 */
			
 
				-	if ((journal->j_fs_dev != journal->j_dev) &&
			
 
				-	    (journal->j_flags & JBD2_BARRIER))
			
 
				+	if (journal->j_flags & JBD2_BARRIER)
			
 
				 		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
			
 
				-	if (!(journal->j_flags & JBD2_ABORT))
			
 
				-		jbd2_journal_update_superblock(journal, 1);
			
 
				+
			
 
				+	__jbd2_update_log_tail(journal, first_tid, blocknr);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -582,15 +489,12 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 
				 	do {
			
 
				 		jh = next_jh;
			
 
				 		next_jh = jh->b_cpnext;
			
 
				-		/* Use trylock because of the ranking */
			
 
				-		if (jbd_trylock_bh_state(jh2bh(jh))) {
			
 
				-			ret = __try_to_free_cp_buf(jh);
			
 
				-			if (ret) {
			
 
				-				freed++;
			
 
				-				if (ret == 2) {
			
 
				-					*released = 1;
			
 
				-					return freed;
			
 
				-				}
			
 
				+		ret = __try_to_free_cp_buf(jh);
			
 
				+		if (ret) {
			
 
				+			freed++;
			
 
				+			if (ret == 2) {
			
 
				+				*released = 1;
			
 
				+				return freed;
			
 
				 			}
			
 
				 		}
			
 
				 		/*
			
@@ -673,9 +577,7 @@ out:
 
				  * The function can free jh and bh.
			
 
				  *
			
 
				  * This function is called with j_list_lock held.
			
 
				- * This function is called with jbd_lock_bh_state(jh2bh(jh))
			
 
				  */
			
 
				-
			
 
				 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
			
 
				 {
			
 
				 	struct transaction_chp_stats_s *stats;
			
@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 
				 				    transaction->t_tid, stats);
			
 
				 
			
 
				 	__jbd2_journal_drop_transaction(journal, transaction);
			
 
				-	kfree(transaction);
			
 
				+	jbd2_journal_free_transaction(transaction);
			
 
				 
			
 
				 	/* Just in case anybody was waiting for more transactions to be
			
 
				            checkpointed... */
			
@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
 
				 	J_ASSERT(journal->j_committing_transaction != transaction);
			
 
				 	J_ASSERT(journal->j_running_transaction != transaction);
			
 
				 
			
 
				+	trace_jbd2_drop_transaction(journal, transaction);
			
 
				+
			
 
				 	jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
			
 
				 }
			
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
				 	struct buffer_head *cbh = NULL; /* For transactional checksums */
			
 
				 	__u32 crc32_sum = ~0;
			
 
				 	struct blk_plug plug;
			
 
				+	/* Tail of the journal */
			
 
				+	unsigned long first_block;
			
 
				+	tid_t first_tid;
			
 
				+	int update_tail;
			
 
				 
			
 
				 	/*
			
 
				 	 * First job: lock down the current transaction and wait for
			
@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
				 	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
			
 
				 	if (journal->j_flags & JBD2_FLUSHED) {
			
 
				 		jbd_debug(3, "super block updated\n");
			
 
				-		jbd2_journal_update_superblock(journal, 1);
			
 
				+		mutex_lock(&journal->j_checkpoint_mutex);
			
 
				+		/*
			
 
				+		 * We hold j_checkpoint_mutex so tail cannot change under us.
			
 
				+		 * We don't need any special data guarantees for writing sb
			
 
				+		 * since journal is empty and it is ok for write to be
			
 
				+		 * flushed only with transaction commit.
			
 
				+		 */
			
 
				+		jbd2_journal_update_sb_log_tail(journal,
			
 
				+						journal->j_tail_sequence,
			
 
				+						journal->j_tail,
			
 
				+						WRITE_SYNC);
			
 
				+		mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				 	} else {
			
 
				 		jbd_debug(3, "superblock not updated\n");
			
 
				 	}
			
@@ -677,10 +692,30 @@ start_journal_io:
 
				 		err = 0;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * Get current oldest transaction in the log before we issue flush
			
 
				+	 * to the filesystem device. After the flush we can be sure that
			
 
				+	 * blocks of all older transactions are checkpointed to persistent
			
 
				+	 * storage and we will be safe to update journal start in the
			
 
				+	 * superblock with the numbers we get here.
			
 
				+	 */
			
 
				+	update_tail =
			
 
				+		jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
			
 
				+
			
 
				 	write_lock(&journal->j_state_lock);
			
 
				+	if (update_tail) {
			
 
				+		long freed = first_block - journal->j_tail;
			
 
				+
			
 
				+		if (first_block < journal->j_tail)
			
 
				+			freed += journal->j_last - journal->j_first;
			
 
				+		/* Update tail only if we free significant amount of space */
			
 
				+		if (freed < journal->j_maxlen / 4)
			
 
				+			update_tail = 0;
			
 
				+	}
			
 
				 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
			
 
				 	commit_transaction->t_state = T_COMMIT_DFLUSH;
			
 
				 	write_unlock(&journal->j_state_lock);
			
 
				+
			
 
				 	/* 
			
 
				 	 * If the journal is not located on the file system device,
			
 
				 	 * then we must flush the file system device before we issue
			
@@ -831,6 +866,14 @@ wait_for_iobuf:
 
				 	if (err)
			
 
				 		jbd2_journal_abort(journal, err);
			
 
				 
			
 
				+	/*
			
 
				+	 * Now disk caches for filesystem device are flushed so we are safe to
			
 
				+	 * erase checkpointed transactions from the log by updating journal
			
 
				+	 * superblock.
			
 
				+	 */
			
 
				+	if (update_tail)
			
 
				+		jbd2_update_log_tail(journal, first_tid, first_block);
			
 
				+
			
 
				 	/* End of a transaction!  Finally, we can do checkpoint
			
 
				            processing: any buffers committed as a result of this
			
 
				            transaction can be removed from any checkpoint list it was on
			
@@ -1048,7 +1091,7 @@ restart_loop:
 
				 	jbd_debug(1, "JBD2: commit %d complete, head %d\n",
			
 
				 		  journal->j_commit_sequence, journal->j_tail_sequence);
			
 
				 	if (to_free)
			
 
				-		kfree(commit_transaction);
			
 
				+		jbd2_journal_free_transaction(commit_transaction);
			
 
				 
			
 
				 	wake_up(&journal->j_wait_done_commit);
			
 
				 }
			
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -71,7 +71,6 @@ EXPORT_SYMBOL(jbd2_journal_revoke);
 
				 
			
 
				 EXPORT_SYMBOL(jbd2_journal_init_dev);
			
 
				 EXPORT_SYMBOL(jbd2_journal_init_inode);
			
 
				-EXPORT_SYMBOL(jbd2_journal_update_format);
			
 
				 EXPORT_SYMBOL(jbd2_journal_check_used_features);
			
 
				 EXPORT_SYMBOL(jbd2_journal_check_available_features);
			
 
				 EXPORT_SYMBOL(jbd2_journal_set_features);
			
@@ -96,7 +95,6 @@ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
 
				 EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
			
 
				 EXPORT_SYMBOL(jbd2_inode_cache);
			
 
				 
			
 
				-static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
			
 
				 static void __journal_abort_soft (journal_t *journal, int errno);
			
 
				 static int jbd2_journal_create_slab(size_t slab_size);
			
 
				 
			
@@ -746,6 +744,98 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 
				 	return jbd2_journal_add_journal_head(bh);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return tid of the oldest transaction in the journal and block in the journal
			
 
				+ * where the transaction starts.
			
 
				+ *
			
 
				+ * If the journal is now empty, return which will be the next transaction ID
			
 
				+ * we will write and where will that transaction start.
			
 
				+ *
			
 
				+ * The return value is 0 if journal tail cannot be pushed any further, 1 if
			
 
				+ * it can.
			
 
				+ */
			
 
				+int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
			
 
				+			      unsigned long *block)
			
 
				+{
			
 
				+	transaction_t *transaction;
			
 
				+	int ret;
			
 
				+
			
 
				+	read_lock(&journal->j_state_lock);
			
 
				+	spin_lock(&journal->j_list_lock);
			
 
				+	transaction = journal->j_checkpoint_transactions;
			
 
				+	if (transaction) {
			
 
				+		*tid = transaction->t_tid;
			
 
				+		*block = transaction->t_log_start;
			
 
				+	} else if ((transaction = journal->j_committing_transaction) != NULL) {
			
 
				+		*tid = transaction->t_tid;
			
 
				+		*block = transaction->t_log_start;
			
 
				+	} else if ((transaction = journal->j_running_transaction) != NULL) {
			
 
				+		*tid = transaction->t_tid;
			
 
				+		*block = journal->j_head;
			
 
				+	} else {
			
 
				+		*tid = journal->j_transaction_sequence;
			
 
				+		*block = journal->j_head;
			
 
				+	}
			
 
				+	ret = tid_gt(*tid, journal->j_tail_sequence);
			
 
				+	spin_unlock(&journal->j_list_lock);
			
 
				+	read_unlock(&journal->j_state_lock);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Update information in journal structure and in on disk journal superblock
			
 
				+ * about log tail. This function does not check whether information passed in
			
 
				+ * really pushes log tail further. It's responsibility of the caller to make
			
 
				+ * sure provided log tail information is valid (e.g. by holding
			
 
				+ * j_checkpoint_mutex all the time between computing log tail and calling this
			
 
				+ * function as is the case with jbd2_cleanup_journal_tail()).
			
 
				+ *
			
 
				+ * Requires j_checkpoint_mutex
			
 
				+ */
			
 
				+void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
			
 
				+{
			
 
				+	unsigned long freed;
			
 
				+
			
 
				+	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
			
 
				+
			
 
				+	/*
			
 
				+	 * We cannot afford for write to remain in drive's caches since as
			
 
				+	 * soon as we update j_tail, next transaction can start reusing journal
			
 
				+	 * space and if we lose sb update during power failure we'd replay
			
 
				+	 * old transaction with possibly newly overwritten data.
			
 
				+	 */
			
 
				+	jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
			
 
				+	write_lock(&journal->j_state_lock);
			
 
				+	freed = block - journal->j_tail;
			
 
				+	if (block < journal->j_tail)
			
 
				+		freed += journal->j_last - journal->j_first;
			
 
				+
			
 
				+	trace_jbd2_update_log_tail(journal, tid, block, freed);
			
 
				+	jbd_debug(1,
			
 
				+		  "Cleaning journal tail from %d to %d (offset %lu), "
			
 
				+		  "freeing %lu\n",
			
 
				+		  journal->j_tail_sequence, tid, block, freed);
			
 
				+
			
 
				+	journal->j_free += freed;
			
 
				+	journal->j_tail_sequence = tid;
			
 
				+	journal->j_tail = block;
			
 
				+	write_unlock(&journal->j_state_lock);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is a variaon of __jbd2_update_log_tail which checks for validity of
			
 
				+ * provided log tail and locks j_checkpoint_mutex. So it is safe against races
			
 
				+ * with other threads updating log tail.
			
 
				+ */
			
 
				+void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
			
 
				+{
			
 
				+	mutex_lock(&journal->j_checkpoint_mutex);
			
 
				+	if (tid_gt(tid, journal->j_tail_sequence))
			
 
				+		__jbd2_update_log_tail(journal, tid, block);
			
 
				+	mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				+}
			
 
				+
			
 
				 struct jbd2_stats_proc_session {
			
 
				 	journal_t *journal;
			
 
				 	struct transaction_stats_s *stats;
			
@@ -1114,40 +1204,45 @@ static int journal_reset(journal_t *journal)
 
				 
			
 
				 	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
			
 
				 
			
 
				-	/* Add the dynamic fields and write it to disk. */
			
 
				-	jbd2_journal_update_superblock(journal, 1);
			
 
				-	return jbd2_journal_start_thread(journal);
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * void jbd2_journal_update_superblock() - Update journal sb on disk.
			
 
				- * @journal: The journal to update.
			
 
				- * @wait: Set to '0' if you don't want to wait for IO completion.
			
 
				- *
			
 
				- * Update a journal's dynamic superblock fields and write it to disk,
			
 
				- * optionally waiting for the IO to complete.
			
 
				- */
			
 
				-void jbd2_journal_update_superblock(journal_t *journal, int wait)
			
 
				-{
			
 
				-	journal_superblock_t *sb = journal->j_superblock;
			
 
				-	struct buffer_head *bh = journal->j_sb_buffer;
			
 
				-
			
 
				 	/*
			
 
				 	 * As a special case, if the on-disk copy is already marked as needing
			
 
				-	 * no recovery (s_start == 0) and there are no outstanding transactions
			
 
				-	 * in the filesystem, then we can safely defer the superblock update
			
 
				-	 * until the next commit by setting JBD2_FLUSHED.  This avoids
			
 
				+	 * no recovery (s_start == 0), then we can safely defer the superblock
			
 
				+	 * update until the next commit by setting JBD2_FLUSHED.  This avoids
			
 
				 	 * attempting a write to a potential-readonly device.
			
 
				 	 */
			
 
				-	if (sb->s_start == 0 && journal->j_tail_sequence ==
			
 
				-				journal->j_transaction_sequence) {
			
 
				+	if (sb->s_start == 0) {
			
 
				 		jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
			
 
				 			"(start %ld, seq %d, errno %d)\n",
			
 
				 			journal->j_tail, journal->j_tail_sequence,
			
 
				 			journal->j_errno);
			
 
				-		goto out;
			
 
				+		journal->j_flags |= JBD2_FLUSHED;
			
 
				+	} else {
			
 
				+		/* Lock here to make assertions happy... */
			
 
				+		mutex_lock(&journal->j_checkpoint_mutex);
			
 
				+		/*
			
 
				+		 * Update log tail information. We use WRITE_FUA since new
			
 
				+		 * transaction will start reusing journal space and so we
			
 
				+		 * must make sure information about current log tail is on
			
 
				+		 * disk before that.
			
 
				+		 */
			
 
				+		jbd2_journal_update_sb_log_tail(journal,
			
 
				+						journal->j_tail_sequence,
			
 
				+						journal->j_tail,
			
 
				+						WRITE_FUA);
			
 
				+		mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				 	}
			
 
				+	return jbd2_journal_start_thread(journal);
			
 
				+}
			
 
				 
			
 
				+static void jbd2_write_superblock(journal_t *journal, int write_op)
			
 
				+{
			
 
				+	struct buffer_head *bh = journal->j_sb_buffer;
			
 
				+	int ret;
			
 
				+
			
 
				+	trace_jbd2_write_superblock(journal, write_op);
			
 
				+	if (!(journal->j_flags & JBD2_BARRIER))
			
 
				+		write_op &= ~(REQ_FUA | REQ_FLUSH);
			
 
				+	lock_buffer(bh);
			
 
				 	if (buffer_write_io_error(bh)) {
			
 
				 		/*
			
 
				 		 * Oh, dear.  A previous attempt to write the journal
			
@@ -1163,48 +1258,106 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
 
				 		clear_buffer_write_io_error(bh);
			
 
				 		set_buffer_uptodate(bh);
			
 
				 	}
			
 
				+	get_bh(bh);
			
 
				+	bh->b_end_io = end_buffer_write_sync;
			
 
				+	ret = submit_bh(write_op, bh);
			
 
				+	wait_on_buffer(bh);
			
 
				+	if (buffer_write_io_error(bh)) {
			
 
				+		clear_buffer_write_io_error(bh);
			
 
				+		set_buffer_uptodate(bh);
			
 
				+		ret = -EIO;
			
 
				+	}
			
 
				+	if (ret) {
			
 
				+		printk(KERN_ERR "JBD2: Error %d detected when updating "
			
 
				+		       "journal superblock for %s.\n", ret,
			
 
				+		       journal->j_devname);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
			
 
				+ * @journal: The journal to update.
			
 
				+ * @tail_tid: TID of the new transaction at the tail of the log
			
 
				+ * @tail_block: The first block of the transaction at the tail of the log
			
 
				+ * @write_op: With which operation should we write the journal sb
			
 
				+ *
			
 
				+ * Update a journal's superblock information about log tail and write it to
			
 
				+ * disk, waiting for the IO to complete.
			
 
				+ */
			
 
				+void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
			
 
				+				     unsigned long tail_block, int write_op)
			
 
				+{
			
 
				+	journal_superblock_t *sb = journal->j_superblock;
			
 
				+
			
 
				+	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
			
 
				+	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
			
 
				+		  tail_block, tail_tid);
			
 
				+
			
 
				+	sb->s_sequence = cpu_to_be32(tail_tid);
			
 
				+	sb->s_start    = cpu_to_be32(tail_block);
			
 
				+
			
 
				+	jbd2_write_superblock(journal, write_op);
			
 
				+
			
 
				+	/* Log is no longer empty */
			
 
				+	write_lock(&journal->j_state_lock);
			
 
				+	WARN_ON(!sb->s_sequence);
			
 
				+	journal->j_flags &= ~JBD2_FLUSHED;
			
 
				+	write_unlock(&journal->j_state_lock);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * jbd2_mark_journal_empty() - Mark on disk journal as empty.
			
 
				+ * @journal: The journal to update.
			
 
				+ *
			
 
				+ * Update a journal's dynamic superblock fields to show that journal is empty.
			
 
				+ * Write updated superblock to disk waiting for IO to complete.
			
 
				+ */
			
 
				+static void jbd2_mark_journal_empty(journal_t *journal)
			
 
				+{
			
 
				+	journal_superblock_t *sb = journal->j_superblock;
			
 
				 
			
 
				+	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
			
 
				 	read_lock(&journal->j_state_lock);
			
 
				-	jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
			
 
				-		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
			
 
				+	jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
			
 
				+		  journal->j_tail_sequence);
			
 
				 
			
 
				 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
			
 
				-	sb->s_start    = cpu_to_be32(journal->j_tail);
			
 
				-	sb->s_errno    = cpu_to_be32(journal->j_errno);
			
 
				+	sb->s_start    = cpu_to_be32(0);
			
 
				 	read_unlock(&journal->j_state_lock);
			
 
				 
			
 
				-	BUFFER_TRACE(bh, "marking dirty");
			
 
				-	mark_buffer_dirty(bh);
			
 
				-	if (wait) {
			
 
				-		sync_dirty_buffer(bh);
			
 
				-		if (buffer_write_io_error(bh)) {
			
 
				-			printk(KERN_ERR "JBD2: I/O error detected "
			
 
				-			       "when updating journal superblock for %s.\n",
			
 
				-			       journal->j_devname);
			
 
				-			clear_buffer_write_io_error(bh);
			
 
				-			set_buffer_uptodate(bh);
			
 
				-		}
			
 
				-	} else
			
 
				-		write_dirty_buffer(bh, WRITE);
			
 
				-
			
 
				-out:
			
 
				-	/* If we have just flushed the log (by marking s_start==0), then
			
 
				-	 * any future commit will have to be careful to update the
			
 
				-	 * superblock again to re-record the true start of the log. */
			
 
				+	jbd2_write_superblock(journal, WRITE_FUA);
			
 
				 
			
 
				+	/* Log is no longer empty */
			
 
				 	write_lock(&journal->j_state_lock);
			
 
				-	if (sb->s_start)
			
 
				-		journal->j_flags &= ~JBD2_FLUSHED;
			
 
				-	else
			
 
				-		journal->j_flags |= JBD2_FLUSHED;
			
 
				+	journal->j_flags |= JBD2_FLUSHED;
			
 
				 	write_unlock(&journal->j_state_lock);
			
 
				 }
			
 
				 
			
 
				+
			
 
				+/**
			
 
				+ * jbd2_journal_update_sb_errno() - Update error in the journal.
			
 
				+ * @journal: The journal to update.
			
 
				+ *
			
 
				+ * Update a journal's errno.  Write updated superblock to disk waiting for IO
			
 
				+ * to complete.
			
 
				+ */
			
 
				+static void jbd2_journal_update_sb_errno(journal_t *journal)
			
 
				+{
			
 
				+	journal_superblock_t *sb = journal->j_superblock;
			
 
				+
			
 
				+	read_lock(&journal->j_state_lock);
			
 
				+	jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
			
 
				+		  journal->j_errno);
			
 
				+	sb->s_errno    = cpu_to_be32(journal->j_errno);
			
 
				+	read_unlock(&journal->j_state_lock);
			
 
				+
			
 
				+	jbd2_write_superblock(journal, WRITE_SYNC);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Read the superblock for a given journal, performing initial
			
 
				  * validation of the format.
			
 
				  */
			
 
				-
			
 
				 static int journal_get_superblock(journal_t *journal)
			
 
				 {
			
 
				 	struct buffer_head *bh;
			
@@ -1398,14 +1551,11 @@ int jbd2_journal_destroy(journal_t *journal)
 
				 
			
 
				 	if (journal->j_sb_buffer) {
			
 
				 		if (!is_journal_aborted(journal)) {
			
 
				-			/* We can now mark the journal as empty. */
			
 
				-			journal->j_tail = 0;
			
 
				-			journal->j_tail_sequence =
			
 
				-				++journal->j_transaction_sequence;
			
 
				-			jbd2_journal_update_superblock(journal, 1);
			
 
				-		} else {
			
 
				+			mutex_lock(&journal->j_checkpoint_mutex);
			
 
				+			jbd2_mark_journal_empty(journal);
			
 
				+			mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				+		} else
			
 
				 			err = -EIO;
			
 
				-		}
			
 
				 		brelse(journal->j_sb_buffer);
			
 
				 	}
			
 
				 
			
@@ -1551,61 +1701,6 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
 
				 }
			
 
				 EXPORT_SYMBOL(jbd2_journal_clear_features);
			
 
				 
			
 
				-/**
			
 
				- * int jbd2_journal_update_format () - Update on-disk journal structure.
			
 
				- * @journal: Journal to act on.
			
 
				- *
			
 
				- * Given an initialised but unloaded journal struct, poke about in the
			
 
				- * on-disk structure to update it to the most recent supported version.
			
 
				- */
			
 
				-int jbd2_journal_update_format (journal_t *journal)
			
 
				-{
			
 
				-	journal_superblock_t *sb;
			
 
				-	int err;
			
 
				-
			
 
				-	err = journal_get_superblock(journal);
			
 
				-	if (err)
			
 
				-		return err;
			
 
				-
			
 
				-	sb = journal->j_superblock;
			
 
				-
			
 
				-	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
			
 
				-	case JBD2_SUPERBLOCK_V2:
			
 
				-		return 0;
			
 
				-	case JBD2_SUPERBLOCK_V1:
			
 
				-		return journal_convert_superblock_v1(journal, sb);
			
 
				-	default:
			
 
				-		break;
			
 
				-	}
			
 
				-	return -EINVAL;
			
 
				-}
			
 
				-
			
 
				-static int journal_convert_superblock_v1(journal_t *journal,
			
 
				-					 journal_superblock_t *sb)
			
 
				-{
			
 
				-	int offset, blocksize;
			
 
				-	struct buffer_head *bh;
			
 
				-
			
 
				-	printk(KERN_WARNING
			
 
				-		"JBD2: Converting superblock from version 1 to 2.\n");
			
 
				-
			
 
				-	/* Pre-initialise new fields to zero */
			
 
				-	offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
			
 
				-	blocksize = be32_to_cpu(sb->s_blocksize);
			
 
				-	memset(&sb->s_feature_compat, 0, blocksize-offset);
			
 
				-
			
 
				-	sb->s_nr_users = cpu_to_be32(1);
			
 
				-	sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
			
 
				-	journal->j_format_version = 2;
			
 
				-
			
 
				-	bh = journal->j_sb_buffer;
			
 
				-	BUFFER_TRACE(bh, "marking dirty");
			
 
				-	mark_buffer_dirty(bh);
			
 
				-	sync_dirty_buffer(bh);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-
			
 
				 /**
			
 
				  * int jbd2_journal_flush () - Flush journal
			
 
				  * @journal: Journal to act on.
			
@@ -1619,7 +1714,6 @@ int jbd2_journal_flush(journal_t *journal)
 
				 {
			
 
				 	int err = 0;
			
 
				 	transaction_t *transaction = NULL;
			
 
				-	unsigned long old_tail;
			
 
				 
			
 
				 	write_lock(&journal->j_state_lock);
			
 
				 
			
@@ -1654,6 +1748,7 @@ int jbd2_journal_flush(journal_t *journal)
 
				 	if (is_journal_aborted(journal))
			
 
				 		return -EIO;
			
 
				 
			
 
				+	mutex_lock(&journal->j_checkpoint_mutex);
			
 
				 	jbd2_cleanup_journal_tail(journal);
			
 
				 
			
 
				 	/* Finally, mark the journal as really needing no recovery.
			
@@ -1661,14 +1756,9 @@ int jbd2_journal_flush(journal_t *journal)
 
				 	 * the magic code for a fully-recovered superblock.  Any future
			
 
				 	 * commits of data to the journal will restore the current
			
 
				 	 * s_start value. */
			
 
				+	jbd2_mark_journal_empty(journal);
			
 
				+	mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				 	write_lock(&journal->j_state_lock);
			
 
				-	old_tail = journal->j_tail;
			
 
				-	journal->j_tail = 0;
			
 
				-	write_unlock(&journal->j_state_lock);
			
 
				-	jbd2_journal_update_superblock(journal, 1);
			
 
				-	write_lock(&journal->j_state_lock);
			
 
				-	journal->j_tail = old_tail;
			
 
				-
			
 
				 	J_ASSERT(!journal->j_running_transaction);
			
 
				 	J_ASSERT(!journal->j_committing_transaction);
			
 
				 	J_ASSERT(!journal->j_checkpoint_transactions);
			
@@ -1708,8 +1798,12 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 
				 		write ? "Clearing" : "Ignoring");
			
 
				 
			
 
				 	err = jbd2_journal_skip_recovery(journal);
			
 
				-	if (write)
			
 
				-		jbd2_journal_update_superblock(journal, 1);
			
 
				+	if (write) {
			
 
				+		/* Lock to make assertions happy... */
			
 
				+		mutex_lock(&journal->j_checkpoint_mutex);
			
 
				+		jbd2_mark_journal_empty(journal);
			
 
				+		mutex_unlock(&journal->j_checkpoint_mutex);
			
 
				+	}
			
 
				 
			
 
				  no_recovery:
			
 
				 	return err;
			
@@ -1759,7 +1853,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
 
				 	__jbd2_journal_abort_hard(journal);
			
 
				 
			
 
				 	if (errno)
			
 
				-		jbd2_journal_update_superblock(journal, 1);
			
 
				+		jbd2_journal_update_sb_errno(journal);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -2017,7 +2111,7 @@ static struct kmem_cache *jbd2_journal_head_cache;
 
				 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
			
 
				 #endif
			
 
				 
			
 
				-static int journal_init_jbd2_journal_head_cache(void)
			
 
				+static int jbd2_journal_init_journal_head_cache(void)
			
 
				 {
			
 
				 	int retval;
			
 
				 
			
@@ -2035,7 +2129,7 @@ static int journal_init_jbd2_journal_head_cache(void)
 
				 	return retval;
			
 
				 }
			
 
				 
			
 
				-static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
			
 
				+static void jbd2_journal_destroy_journal_head_cache(void)
			
 
				 {
			
 
				 	if (jbd2_journal_head_cache) {
			
 
				 		kmem_cache_destroy(jbd2_journal_head_cache);
			
@@ -2323,7 +2417,7 @@ static void __exit jbd2_remove_jbd_stats_proc_entry(void)
 
				 
			
 
				 struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
			
 
				 
			
 
				-static int __init journal_init_handle_cache(void)
			
 
				+static int __init jbd2_journal_init_handle_cache(void)
			
 
				 {
			
 
				 	jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
			
 
				 	if (jbd2_handle_cache == NULL) {
			
@@ -2358,17 +2452,20 @@ static int __init journal_init_caches(void)
 
				 
			
 
				 	ret = jbd2_journal_init_revoke_caches();
			
 
				 	if (ret == 0)
			
 
				-		ret = journal_init_jbd2_journal_head_cache();
			
 
				+		ret = jbd2_journal_init_journal_head_cache();
			
 
				+	if (ret == 0)
			
 
				+		ret = jbd2_journal_init_handle_cache();
			
 
				 	if (ret == 0)
			
 
				-		ret = journal_init_handle_cache();
			
 
				+		ret = jbd2_journal_init_transaction_cache();
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				 static void jbd2_journal_destroy_caches(void)
			
 
				 {
			
 
				 	jbd2_journal_destroy_revoke_caches();
			
 
				-	jbd2_journal_destroy_jbd2_journal_head_cache();
			
 
				+	jbd2_journal_destroy_journal_head_cache();
			
 
				 	jbd2_journal_destroy_handle_cache();
			
 
				+	jbd2_journal_destroy_transaction_cache();
			
 
				 	jbd2_journal_destroy_slabs();
			
 
				 }
			
 
				 
			
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
 
				 #include <linux/jbd2.h>
			
 
				 #include <linux/errno.h>
			
 
				 #include <linux/crc32.h>
			
 
				+#include <linux/blkdev.h>
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
 
				 	err2 = sync_blockdev(journal->j_fs_dev);
			
 
				 	if (!err)
			
 
				 		err = err2;
			
 
				-
			
 
				+	/* Make sure all replayed data is on permanent storage */
			
 
				+	if (journal->j_flags & JBD2_BARRIER)
			
 
				+		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
			
 
				 	return err;
			
 
				 }
			
 
				 
			
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
 
				 	J_ASSERT(!jbd2_revoke_record_cache);
			
 
				 	J_ASSERT(!jbd2_revoke_table_cache);
			
 
				 
			
 
				-	jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
			
 
				-					   sizeof(struct jbd2_revoke_record_s),
			
 
				-					   0,
			
 
				-					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
			
 
				-					   NULL);
			
 
				+	jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
			
 
				+					SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
			
 
				 	if (!jbd2_revoke_record_cache)
			
 
				 		goto record_cache_failure;
			
 
				 
			
 
				-	jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
			
 
				-					   sizeof(struct jbd2_revoke_table_s),
			
 
				-					   0, SLAB_TEMPORARY, NULL);
			
 
				+	jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
			
 
				+					     SLAB_TEMPORARY);
			
 
				 	if (!jbd2_revoke_table_cache)
			
 
				 		goto table_cache_failure;
			
 
				 	return 0;
			
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -33,6 +33,35 @@
 
				 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
			
 
				 static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
			
 
				 
			
 
				+static struct kmem_cache *transaction_cache;
			
 
				+int __init jbd2_journal_init_transaction_cache(void)
			
 
				+{
			
 
				+	J_ASSERT(!transaction_cache);
			
 
				+	transaction_cache = kmem_cache_create("jbd2_transaction_s",
			
 
				+					sizeof(transaction_t),
			
 
				+					0,
			
 
				+					SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
			
 
				+					NULL);
			
 
				+	if (transaction_cache)
			
 
				+		return 0;
			
 
				+	return -ENOMEM;
			
 
				+}
			
 
				+
			
 
				+void jbd2_journal_destroy_transaction_cache(void)
			
 
				+{
			
 
				+	if (transaction_cache) {
			
 
				+		kmem_cache_destroy(transaction_cache);
			
 
				+		transaction_cache = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void jbd2_journal_free_transaction(transaction_t *transaction)
			
 
				+{
			
 
				+	if (unlikely(ZERO_OR_NULL_PTR(transaction)))
			
 
				+		return;
			
 
				+	kmem_cache_free(transaction_cache, transaction);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * jbd2_get_transaction: obtain a new transaction_t object.
			
 
				  *
			
@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 
				 
			
 
				 alloc_transaction:
			
 
				 	if (!journal->j_running_transaction) {
			
 
				-		new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
			
 
				+		new_transaction = kmem_cache_alloc(transaction_cache,
			
 
				+						   gfp_mask | __GFP_ZERO);
			
 
				 		if (!new_transaction) {
			
 
				 			/*
			
 
				 			 * If __GFP_FS is not present, then we may be
			
@@ -162,7 +192,7 @@ repeat:
 
				 	if (is_journal_aborted(journal) ||
			
 
				 	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
			
 
				 		read_unlock(&journal->j_state_lock);
			
 
				-		kfree(new_transaction);
			
 
				+		jbd2_journal_free_transaction(new_transaction);
			
 
				 		return -EROFS;
			
 
				 	}
			
 
				 
			
@@ -284,7 +314,7 @@ repeat:
 
				 	read_unlock(&journal->j_state_lock);
			
 
				 
			
 
				 	lock_map_acquire(&handle->h_lockdep_map);
			
 
				-	kfree(new_transaction);
			
 
				+	jbd2_journal_free_transaction(new_transaction);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
 
				  * of these pointers, it could go bad.  Generally the caller needs to re-read
			
 
				  * the pointer from the transaction_t.
			
 
				  *
			
 
				- * Called under j_list_lock.  The journal may not be locked.
			
 
				+ * Called under j_list_lock.
			
 
				  */
			
 
				-void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
			
 
				+static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
			
 
				 {
			
 
				 	struct journal_head **list = NULL;
			
 
				 	transaction_t *transaction;
			
@@ -1646,10 +1676,8 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
 
				 	spin_lock(&journal->j_list_lock);
			
 
				 	if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
			
 
				 		/* written-back checkpointed metadata buffer */
			
 
				-		if (jh->b_jlist == BJ_None) {
			
 
				-			JBUFFER_TRACE(jh, "remove from checkpoint list");
			
 
				-			__jbd2_journal_remove_checkpoint(jh);
			
 
				-		}
			
 
				+		JBUFFER_TRACE(jh, "remove from checkpoint list");
			
 
				+		__jbd2_journal_remove_checkpoint(jh);
			
 
				 	}
			
 
				 	spin_unlock(&journal->j_list_lock);
			
 
				 out:
			
@@ -1949,6 +1977,8 @@ zap_buffer_unlocked:
 
				 	clear_buffer_mapped(bh);
			
 
				 	clear_buffer_req(bh);
			
 
				 	clear_buffer_new(bh);
			
 
				+	clear_buffer_delay(bh);
			
 
				+	clear_buffer_unwritten(bh);
			
 
				 	bh->b_bdev = NULL;
			
 
				 	return may_free;
			
 
				 }
			
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1872,19 +1872,6 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
 
				 	const struct dentry_operations *dops,
			
 
				 	unsigned long);
			
 
				 
			
 
				-static inline void sb_mark_dirty(struct super_block *sb)
			
 
				-{
			
 
				-	sb->s_dirt = 1;
			
 
				-}
			
 
				-static inline void sb_mark_clean(struct super_block *sb)
			
 
				-{
			
 
				-	sb->s_dirt = 0;
			
 
				-}
			
 
				-static inline int sb_is_dirty(struct super_block *sb)
			
 
				-{
			
 
				-	return sb->s_dirt;
			
 
				-}
			
 
				-
			
 
				 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
			
 
				 #define fops_get(fops) \
			
 
				 	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
			
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -971,6 +971,10 @@ extern void __journal_clean_data_list(transaction_t *transaction);
 
				 /* Log buffer allocation */
			
 
				 extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
			
 
				 int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
			
 
				+int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
			
 
				+			      unsigned long *block);
			
 
				+void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
			
 
				+void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
			
 
				 
			
 
				 /* Commit management */
			
 
				 extern void jbd2_journal_commit_transaction(journal_t *);
			
@@ -1020,6 +1024,11 @@ jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
 
				 /* Transaction locking */
			
 
				 extern void		__wait_on_journal (journal_t *);
			
 
				 
			
 
				+/* Transaction cache support */
			
 
				+extern void jbd2_journal_destroy_transaction_cache(void);
			
 
				+extern int  jbd2_journal_init_transaction_cache(void);
			
 
				+extern void jbd2_journal_free_transaction(transaction_t *);
			
 
				+
			
 
				 /*
			
 
				  * Journal locking.
			
 
				  *
			
@@ -1082,7 +1091,8 @@ extern int	   jbd2_journal_destroy    (journal_t *);
 
				 extern int	   jbd2_journal_recover    (journal_t *journal);
			
 
				 extern int	   jbd2_journal_wipe       (journal_t *, int);
			
 
				 extern int	   jbd2_journal_skip_recovery	(journal_t *);
			
 
				-extern void	   jbd2_journal_update_superblock	(journal_t *, int);
			
 
				+extern void	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
			
 
				+				unsigned long, int);
			
 
				 extern void	   __jbd2_journal_abort_hard	(journal_t *);
			
 
				 extern void	   jbd2_journal_abort      (journal_t *, int);
			
 
				 extern int	   jbd2_journal_errno      (journal_t *);
			
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -66,6 +66,8 @@ struct journal_head {
 
				 	 * transaction (if there is one).  Only applies to buffers on a
			
 
				 	 * transaction's data or metadata journaling list.
			
 
				 	 * [j_list_lock] [jbd_lock_bh_state()]
			
 
				+	 * Either of these locks is enough for reading, both are needed for
			
 
				+	 * changes.
			
 
				 	 */
			
 
				 	transaction_t *b_transaction;
			
 
				 
			
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -81,6 +81,13 @@ DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
 
				 	TP_ARGS(journal, commit_transaction)
			
 
				 );
			
 
				 
			
 
				+DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction,
			
 
				+
			
 
				+	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
			
 
				+
			
 
				+	TP_ARGS(journal, commit_transaction)
			
 
				+);
			
 
				+
			
 
				 TRACE_EVENT(jbd2_end_commit,
			
 
				 	TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
			
 
				 
			
@@ -200,7 +207,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 
				 		  __entry->forced_to_close, __entry->written, __entry->dropped)
			
 
				 );
			
 
				 
			
 
				-TRACE_EVENT(jbd2_cleanup_journal_tail,
			
 
				+TRACE_EVENT(jbd2_update_log_tail,
			
 
				 
			
 
				 	TP_PROTO(journal_t *journal, tid_t first_tid,
			
 
				 		 unsigned long block_nr, unsigned long freed),
			
@@ -229,6 +236,26 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
 
				 		  __entry->block_nr, __entry->freed)
			
 
				 );
			
 
				 
			
 
				+TRACE_EVENT(jbd2_write_superblock,
			
 
				+
			
 
				+	TP_PROTO(journal_t *journal, int write_op),
			
 
				+
			
 
				+	TP_ARGS(journal, write_op),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	dev_t,  dev			)
			
 
				+		__field(	  int,  write_op		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->dev		= journal->j_fs_dev->bd_dev;
			
 
				+		__entry->write_op	= write_op;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev),
			
 
				+		  MINOR(__entry->dev), __entry->write_op)
			
 
				+);
			
 
				+
			
 
				 #endif /* _TRACE_JBD2_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -95,6 +95,8 @@ unsigned long vm_dirty_bytes;
 
				  */
			
 
				 unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
			
 
				 
			
 
				+EXPORT_SYMBOL_GPL(dirty_writeback_interval);
			
 
				+
			
 
				 /*
			
 
				  * The longest time for which data is allowed to remain dirty
			
 
				  */