16 роки тому · e893123c73
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh)
 
				 	BUG_ON(!buffer_locked(bh));
			
 
				 	BUG_ON(!buffer_mapped(bh));
			
 
				 	BUG_ON(!bh->b_end_io);
			
 
				+	BUG_ON(buffer_delay(bh));
			
 
				+	BUG_ON(buffer_unwritten(bh));
			
 
				 
			
 
				 	/*
			
 
				 	 * Mask in barrier bit for a write (could be either a WRITE or a
			
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1093,6 +1093,7 @@ failed_mount:
 
				 	brelse(bh);
			
 
				 failed_sbi:
			
 
				 	sb->s_fs_info = NULL;
			
 
				+	kfree(sbi->s_blockgroup_lock);
			
 
				 	kfree(sbi);
			
 
				 	return ret;
			
 
				 }
			
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2021,6 +2021,7 @@ failed_mount:
 
				 	brelse(bh);
			
 
				 out_fail:
			
 
				 	sb->s_fs_info = NULL;
			
 
				+	kfree(sbi->s_blockgroup_lock);
			
 
				 	kfree(sbi);
			
 
				 	lock_kernel();
			
 
				 	return ret;
			
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,8 +5,8 @@
 
				 obj-$(CONFIG_EXT4_FS) += ext4.o
			
 
				 
			
 
				 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
			
 
				-		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
			
 
				-		   ext4_jbd2.o migrate.o mballoc.o
			
 
				+		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
			
 
				+		ext4_jbd2.o migrate.o mballoc.o block_validity.o
			
 
				 
			
 
				 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
			
 
				 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
			
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -19,7 +19,6 @@
 
				 #include <linux/buffer_head.h>
			
 
				 #include "ext4.h"
			
 
				 #include "ext4_jbd2.h"
			
 
				-#include "group.h"
			
 
				 #include "mballoc.h"
			
 
				 
			
 
				 /*
			
@@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 
				 		 ext4_group_t block_group, struct ext4_group_desc *gdp)
			
 
				 {
			
 
				 	int bit, bit_max;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	unsigned free_blocks, group_blocks;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 
			
@@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 
				 		bit_max += ext4_bg_num_gdb(sb, block_group);
			
 
				 	}
			
 
				 
			
 
				-	if (block_group == sbi->s_groups_count - 1) {
			
 
				+	if (block_group == ngroups - 1) {
			
 
				 		/*
			
 
				 		 * Even though mke2fs always initialize first and last group
			
 
				 		 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
			
@@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 
				 		 */
			
 
				 		group_blocks = ext4_blocks_count(sbi->s_es) -
			
 
				 			le32_to_cpu(sbi->s_es->s_first_data_block) -
			
 
				-			(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
			
 
				+			(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
			
 
				 	} else {
			
 
				 		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
			
 
				 	}
			
@@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 
				 {
			
 
				 	unsigned int group_desc;
			
 
				 	unsigned int offset;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	struct ext4_group_desc *desc;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 
			
 
				-	if (block_group >= sbi->s_groups_count) {
			
 
				+	if (block_group >= ngroups) {
			
 
				 		ext4_error(sb, "ext4_get_group_desc",
			
 
				 			   "block_group >= groups_count - "
			
 
				 			   "block_group = %u, groups_count = %u",
			
 
				-			   block_group, sbi->s_groups_count);
			
 
				+			   block_group, ngroups);
			
 
				 
			
 
				 		return NULL;
			
 
				 	}
			
 
				-	smp_rmb();
			
 
				 
			
 
				 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
			
 
				 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
			
@@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 		unlock_buffer(bh);
			
 
				 		return bh;
			
 
				 	}
			
 
				-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+	ext4_lock_group(sb, block_group);
			
 
				 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
			
 
				 		ext4_init_block_bitmap(sb, bh, block_group, desc);
			
 
				 		set_bitmap_uptodate(bh);
			
 
				 		set_buffer_uptodate(bh);
			
 
				-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+		ext4_unlock_group(sb, block_group);
			
 
				 		unlock_buffer(bh);
			
 
				 		return bh;
			
 
				 	}
			
 
				-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+	ext4_unlock_group(sb, block_group);
			
 
				 	if (buffer_uptodate(bh)) {
			
 
				 		/*
			
 
				 		 * if not uninit if bh is uptodate,
			
@@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 
				 	down_write(&grp->alloc_sem);
			
 
				 	for (i = 0, blocks_freed = 0; i < count; i++) {
			
 
				 		BUFFER_TRACE(bitmap_bh, "clear bit");
			
 
				-		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
			
 
				+		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
			
 
				 						bit + i, bitmap_bh->b_data)) {
			
 
				 			ext4_error(sb, __func__,
			
 
				 				   "bit already cleared for block %llu",
			
@@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 
				 			blocks_freed++;
			
 
				 		}
			
 
				 	}
			
 
				-	spin_lock(sb_bgl_lock(sbi, block_group));
			
 
				+	ext4_lock_group(sb, block_group);
			
 
				 	blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
			
 
				 	ext4_free_blks_set(sb, desc, blk_free_count);
			
 
				 	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
			
 
				-	spin_unlock(sb_bgl_lock(sbi, block_group));
			
 
				+	ext4_unlock_group(sb, block_group);
			
 
				 	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
			
 
				 
			
 
				 	if (sbi->s_log_groups_per_flex) {
			
@@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 
				 	ext4_fsblk_t desc_count;
			
 
				 	struct ext4_group_desc *gdp;
			
 
				 	ext4_group_t i;
			
 
				-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 #ifdef EXT4FS_DEBUG
			
 
				 	struct ext4_super_block *es;
			
 
				 	ext4_fsblk_t bitmap_count;
			
@@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 
				 	bitmap_count = 0;
			
 
				 	gdp = NULL;
			
 
				 
			
 
				-	smp_rmb();
			
 
				 	for (i = 0; i < ngroups; i++) {
			
 
				 		gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (!gdp)
			
@@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 
				 	return bitmap_count;
			
 
				 #else
			
 
				 	desc_count = 0;
			
 
				-	smp_rmb();
			
 
				 	for (i = 0; i < ngroups; i++) {
			
 
				 		gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (!gdp)
			
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -0,0 +1,244 @@
 
				+/*
			
 
				+ *  linux/fs/ext4/block_validity.c
			
 
				+ *
			
 
				+ * Copyright (C) 2009
			
 
				+ * Theodore Ts'o (tytso@mit.edu)
			
 
				+ *
			
 
				+ * Track which blocks in the filesystem are metadata blocks that
			
 
				+ * should never be used as data blocks by files or directories.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/time.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/namei.h>
			
 
				+#include <linux/quotaops.h>
			
 
				+#include <linux/buffer_head.h>
			
 
				+#include <linux/module.h>
			
 
				+#include <linux/swap.h>
			
 
				+#include <linux/pagemap.h>
			
 
				+#include <linux/version.h>
			
 
				+#include <linux/blkdev.h>
			
 
				+#include <linux/mutex.h>
			
 
				+#include "ext4.h"
			
 
				+
			
 
				+struct ext4_system_zone {
			
 
				+	struct rb_node	node;
			
 
				+	ext4_fsblk_t	start_blk;
			
 
				+	unsigned int	count;
			
 
				+};
			
 
				+
			
 
				+static struct kmem_cache *ext4_system_zone_cachep;
			
 
				+
			
 
				+int __init init_ext4_system_zone(void)
			
 
				+{
			
 
				+	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
			
 
				+					     SLAB_RECLAIM_ACCOUNT);
			
 
				+	if (ext4_system_zone_cachep == NULL)
			
 
				+		return -ENOMEM;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void exit_ext4_system_zone(void)
			
 
				+{
			
 
				+	kmem_cache_destroy(ext4_system_zone_cachep);
			
 
				+}
			
 
				+
			
 
				+static inline int can_merge(struct ext4_system_zone *entry1,
			
 
				+		     struct ext4_system_zone *entry2)
			
 
				+{
			
 
				+	if ((entry1->start_blk + entry1->count) == entry2->start_blk)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Mark a range of blocks as belonging to the "system zone" --- that
			
 
				+ * is, filesystem metadata blocks which should never be used by
			
 
				+ * inodes.
			
 
				+ */
			
 
				+static int add_system_zone(struct ext4_sb_info *sbi,
			
 
				+			   ext4_fsblk_t start_blk,
			
 
				+			   unsigned int count)
			
 
				+{
			
 
				+	struct ext4_system_zone *new_entry = NULL, *entry;
			
 
				+	struct rb_node **n = &sbi->system_blks.rb_node, *node;
			
 
				+	struct rb_node *parent = NULL, *new_node = NULL;
			
 
				+
			
 
				+	while (*n) {
			
 
				+		parent = *n;
			
 
				+		entry = rb_entry(parent, struct ext4_system_zone, node);
			
 
				+		if (start_blk < entry->start_blk)
			
 
				+			n = &(*n)->rb_left;
			
 
				+		else if (start_blk >= (entry->start_blk + entry->count))
			
 
				+			n = &(*n)->rb_right;
			
 
				+		else {
			
 
				+			if (start_blk + count > (entry->start_blk + 
			
 
				+						 entry->count))
			
 
				+				entry->count = (start_blk + count - 
			
 
				+						entry->start_blk);
			
 
				+			new_node = *n;
			
 
				+			new_entry = rb_entry(new_node, struct ext4_system_zone,
			
 
				+					     node);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (!new_entry) {
			
 
				+		new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
			
 
				+					     GFP_KERNEL);
			
 
				+		if (!new_entry)
			
 
				+			return -ENOMEM;
			
 
				+		new_entry->start_blk = start_blk;
			
 
				+		new_entry->count = count;
			
 
				+		new_node = &new_entry->node;
			
 
				+
			
 
				+		rb_link_node(new_node, parent, n);
			
 
				+		rb_insert_color(new_node, &sbi->system_blks);
			
 
				+	}
			
 
				+
			
 
				+	/* Can we merge to the left? */
			
 
				+	node = rb_prev(new_node);
			
 
				+	if (node) {
			
 
				+		entry = rb_entry(node, struct ext4_system_zone, node);
			
 
				+		if (can_merge(entry, new_entry)) {
			
 
				+			new_entry->start_blk = entry->start_blk;
			
 
				+			new_entry->count += entry->count;
			
 
				+			rb_erase(node, &sbi->system_blks);
			
 
				+			kmem_cache_free(ext4_system_zone_cachep, entry);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Can we merge to the right? */
			
 
				+	node = rb_next(new_node);
			
 
				+	if (node) {
			
 
				+		entry = rb_entry(node, struct ext4_system_zone, node);
			
 
				+		if (can_merge(new_entry, entry)) {
			
 
				+			new_entry->count += entry->count;
			
 
				+			rb_erase(node, &sbi->system_blks);
			
 
				+			kmem_cache_free(ext4_system_zone_cachep, entry);
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void debug_print_tree(struct ext4_sb_info *sbi)
			
 
				+{
			
 
				+	struct rb_node *node;
			
 
				+	struct ext4_system_zone *entry;
			
 
				+	int first = 1;
			
 
				+
			
 
				+	printk(KERN_INFO "System zones: ");
			
 
				+	node = rb_first(&sbi->system_blks);
			
 
				+	while (node) {
			
 
				+		entry = rb_entry(node, struct ext4_system_zone, node);
			
 
				+		printk("%s%llu-%llu", first ? "" : ", ",
			
 
				+		       entry->start_blk, entry->start_blk + entry->count - 1);
			
 
				+		first = 0;
			
 
				+		node = rb_next(node);
			
 
				+	}
			
 
				+	printk("\n");
			
 
				+}
			
 
				+
			
 
				+int ext4_setup_system_zone(struct super_block *sb)
			
 
				+{
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				+	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				+	struct ext4_group_desc *gdp;
			
 
				+	ext4_group_t i;
			
 
				+	int flex_size = ext4_flex_bg_size(sbi);
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!test_opt(sb, BLOCK_VALIDITY)) {
			
 
				+		if (EXT4_SB(sb)->system_blks.rb_node)
			
 
				+			ext4_release_system_zone(sb);
			
 
				+		return 0;
			
 
				+	}
			
 
				+	if (EXT4_SB(sb)->system_blks.rb_node)
			
 
				+		return 0;
			
 
				+
			
 
				+	for (i=0; i < ngroups; i++) {
			
 
				+		if (ext4_bg_has_super(sb, i) &&
			
 
				+		    ((i < 5) || ((i % flex_size) == 0)))
			
 
				+			add_system_zone(sbi, ext4_group_first_block_no(sb, i),
			
 
				+					sbi->s_gdb_count + 1);
			
 
				+		gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				+		ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+		ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
			
 
				+				sbi->s_itb_per_group);
			
 
				+		if (ret)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	if (test_opt(sb, DEBUG))
			
 
				+		debug_print_tree(EXT4_SB(sb));
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Called when the filesystem is unmounted */
			
 
				+void ext4_release_system_zone(struct super_block *sb)
			
 
				+{
			
 
				+	struct rb_node	*n = EXT4_SB(sb)->system_blks.rb_node;
			
 
				+	struct rb_node	*parent;
			
 
				+	struct ext4_system_zone	*entry;
			
 
				+
			
 
				+	while (n) {
			
 
				+		/* Do the node's children first */
			
 
				+		if (n->rb_left) {
			
 
				+			n = n->rb_left;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if (n->rb_right) {
			
 
				+			n = n->rb_right;
			
 
				+			continue;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * The node has no children; free it, and then zero
			
 
				+		 * out parent's link to it.  Finally go to the
			
 
				+		 * beginning of the loop and try to free the parent
			
 
				+		 * node.
			
 
				+		 */
			
 
				+		parent = rb_parent(n);
			
 
				+		entry = rb_entry(n, struct ext4_system_zone, node);
			
 
				+		kmem_cache_free(ext4_system_zone_cachep, entry);
			
 
				+		if (!parent)
			
 
				+			EXT4_SB(sb)->system_blks.rb_node = NULL;
			
 
				+		else if (parent->rb_left == n)
			
 
				+			parent->rb_left = NULL;
			
 
				+		else if (parent->rb_right == n)
			
 
				+			parent->rb_right = NULL;
			
 
				+		n = parent;
			
 
				+	}
			
 
				+	EXT4_SB(sb)->system_blks.rb_node = NULL;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Returns 1 if the passed-in block region (start_blk,
			
 
				+ * start_blk+count) is valid; 0 if some part of the block region
			
 
				+ * overlaps with filesystem metadata blocks.
			
 
				+ */
			
 
				+int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
			
 
				+			  unsigned int count)
			
 
				+{
			
 
				+	struct ext4_system_zone *entry;
			
 
				+	struct rb_node *n = sbi->system_blks.rb_node;
			
 
				+
			
 
				+	if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
			
 
				+	    (start_blk + count > ext4_blocks_count(sbi->s_es)))
			
 
				+		return 0;
			
 
				+	while (n) {
			
 
				+		entry = rb_entry(n, struct ext4_system_zone, node);
			
 
				+		if (start_blk + count - 1 < entry->start_blk)
			
 
				+			n = n->rb_left;
			
 
				+		else if (start_blk >= (entry->start_blk + entry->count))
			
 
				+			n = n->rb_right;
			
 
				+		else
			
 
				+			return 0;
			
 
				+	}
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
 
				 		struct buffer_head *bh = NULL;
			
 
				 
			
 
				 		map_bh.b_state = 0;
			
 
				-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
			
 
				-						0, 0, 0);
			
 
				+		err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
			
 
				 		if (err > 0) {
			
 
				 			pgoff_t index = map_bh.b_blocknr >>
			
 
				 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
			
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -21,7 +21,14 @@
 
				 #include <linux/magic.h>
			
 
				 #include <linux/jbd2.h>
			
 
				 #include <linux/quota.h>
			
 
				-#include "ext4_i.h"
			
 
				+#include <linux/rwsem.h>
			
 
				+#include <linux/rbtree.h>
			
 
				+#include <linux/seqlock.h>
			
 
				+#include <linux/mutex.h>
			
 
				+#include <linux/timer.h>
			
 
				+#include <linux/wait.h>
			
 
				+#include <linux/blockgroup_lock.h>
			
 
				+#include <linux/percpu_counter.h>
			
 
				 
			
 
				 /*
			
 
				  * The fourth extended filesystem constants/structures
			
@@ -46,6 +53,19 @@
 
				 #define ext4_debug(f, a...)	do {} while (0)
			
 
				 #endif
			
 
				 
			
 
				+/* data type for block offset of block group */
			
 
				+typedef int ext4_grpblk_t;
			
 
				+
			
 
				+/* data type for filesystem-wide blocks number */
			
 
				+typedef unsigned long long ext4_fsblk_t;
			
 
				+
			
 
				+/* data type for file logical block number */
			
 
				+typedef __u32 ext4_lblk_t;
			
 
				+
			
 
				+/* data type for block group number */
			
 
				+typedef unsigned int ext4_group_t;
			
 
				+
			
 
				+
			
 
				 /* prefer goal again. length */
			
 
				 #define EXT4_MB_HINT_MERGE		1
			
 
				 /* blocks already reserved */
			
@@ -179,9 +199,6 @@ struct flex_groups {
 
				 #define EXT4_BG_BLOCK_UNINIT	0x0002 /* Block bitmap not in use */
			
 
				 #define EXT4_BG_INODE_ZEROED	0x0004 /* On-disk itable initialized to zero */
			
 
				 
			
 
				-#ifdef __KERNEL__
			
 
				-#include "ext4_sb.h"
			
 
				-#endif
			
 
				 /*
			
 
				  * Macro-instructions used to manage group descriptors
			
 
				  */
			
@@ -297,10 +314,23 @@ struct ext4_new_group_data {
 
				 };
			
 
				 
			
 
				 /*
			
 
				- * Following is used by preallocation code to tell get_blocks() that we
			
 
				- * want uninitialzed extents.
			
 
				+ * Flags used by ext4_get_blocks()
			
 
				  */
			
 
				-#define EXT4_CREATE_UNINITIALIZED_EXT		2
			
 
				+	/* Allocate any needed blocks and/or convert an unitialized
			
 
				+	   extent to be an initialized ext4 */
			
 
				+#define EXT4_GET_BLOCKS_CREATE			0x0001
			
 
				+	/* Request the creation of an unitialized extent */
			
 
				+#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
			
 
				+#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
			
 
				+						 EXT4_GET_BLOCKS_CREATE)
			
 
				+	/* Caller is from the delayed allocation writeout path,
			
 
				+	   so set the magic i_delalloc_reserve_flag after taking the 
			
 
				+	   inode allocation semaphore for */
			
 
				+#define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
			
 
				+	/* Call ext4_da_update_reserve_space() after successfully 
			
 
				+	   allocating the blocks */
			
 
				+#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE	0x0008
			
 
				+
			
 
				 
			
 
				 /*
			
 
				  * ioctl commands
			
@@ -515,6 +545,110 @@ do {									       \
 
				 
			
 
				 #endif /* defined(__KERNEL__) || defined(__linux__) */
			
 
				 
			
 
				+/*
			
 
				+ * storage for cached extent
			
 
				+ */
			
 
				+struct ext4_ext_cache {
			
 
				+	ext4_fsblk_t	ec_start;
			
 
				+	ext4_lblk_t	ec_block;
			
 
				+	__u32		ec_len; /* must be 32bit to return holes */
			
 
				+	__u32		ec_type;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * fourth extended file system inode data in memory
			
 
				+ */
			
 
				+struct ext4_inode_info {
			
 
				+	__le32	i_data[15];	/* unconverted */
			
 
				+	__u32	i_flags;
			
 
				+	ext4_fsblk_t	i_file_acl;
			
 
				+	__u32	i_dtime;
			
 
				+
			
 
				+	/*
			
 
				+	 * i_block_group is the number of the block group which contains
			
 
				+	 * this file's inode.  Constant across the lifetime of the inode,
			
 
				+	 * it is ued for making block allocation decisions - we try to
			
 
				+	 * place a file's data blocks near its inode block, and new inodes
			
 
				+	 * near to their parent directory's inode.
			
 
				+	 */
			
 
				+	ext4_group_t	i_block_group;
			
 
				+	__u32	i_state;		/* Dynamic state flags for ext4 */
			
 
				+
			
 
				+	ext4_lblk_t		i_dir_start_lookup;
			
 
				+#ifdef CONFIG_EXT4_FS_XATTR
			
 
				+	/*
			
 
				+	 * Extended attributes can be read independently of the main file
			
 
				+	 * data. Taking i_mutex even when reading would cause contention
			
 
				+	 * between readers of EAs and writers of regular file data, so
			
 
				+	 * instead we synchronize on xattr_sem when reading or changing
			
 
				+	 * EAs.
			
 
				+	 */
			
 
				+	struct rw_semaphore xattr_sem;
			
 
				+#endif
			
 
				+#ifdef CONFIG_EXT4_FS_POSIX_ACL
			
 
				+	struct posix_acl	*i_acl;
			
 
				+	struct posix_acl	*i_default_acl;
			
 
				+#endif
			
 
				+
			
 
				+	struct list_head i_orphan;	/* unlinked but open inodes */
			
 
				+
			
 
				+	/*
			
 
				+	 * i_disksize keeps track of what the inode size is ON DISK, not
			
 
				+	 * in memory.  During truncate, i_size is set to the new size by
			
 
				+	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
			
 
				+	 * set i_disksize to 0 until the truncate is actually under way.
			
 
				+	 *
			
 
				+	 * The intent is that i_disksize always represents the blocks which
			
 
				+	 * are used by this file.  This allows recovery to restart truncate
			
 
				+	 * on orphans if we crash during truncate.  We actually write i_disksize
			
 
				+	 * into the on-disk inode when writing inodes out, instead of i_size.
			
 
				+	 *
			
 
				+	 * The only time when i_disksize and i_size may be different is when
			
 
				+	 * a truncate is in progress.  The only things which change i_disksize
			
 
				+	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
			
 
				+	 */
			
 
				+	loff_t	i_disksize;
			
 
				+
			
 
				+	/*
			
 
				+	 * i_data_sem is for serialising ext4_truncate() against
			
 
				+	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
			
 
				+	 * data tree are chopped off during truncate. We can't do that in
			
 
				+	 * ext4 because whenever we perform intermediate commits during
			
 
				+	 * truncate, the inode and all the metadata blocks *must* be in a
			
 
				+	 * consistent state which allows truncation of the orphans to restart
			
 
				+	 * during recovery.  Hence we must fix the get_block-vs-truncate race
			
 
				+	 * by other means, so we have i_data_sem.
			
 
				+	 */
			
 
				+	struct rw_semaphore i_data_sem;
			
 
				+	struct inode vfs_inode;
			
 
				+	struct jbd2_inode jinode;
			
 
				+
			
 
				+	struct ext4_ext_cache i_cached_extent;
			
 
				+	/*
			
 
				+	 * File creation time. Its function is same as that of
			
 
				+	 * struct timespec i_{a,c,m}time in the generic inode.
			
 
				+	 */
			
 
				+	struct timespec i_crtime;
			
 
				+
			
 
				+	/* mballoc */
			
 
				+	struct list_head i_prealloc_list;
			
 
				+	spinlock_t i_prealloc_lock;
			
 
				+
			
 
				+	/* ialloc */
			
 
				+	ext4_group_t	i_last_alloc_group;
			
 
				+
			
 
				+	/* allocation reservation info for delalloc */
			
 
				+	unsigned int i_reserved_data_blocks;
			
 
				+	unsigned int i_reserved_meta_blocks;
			
 
				+	unsigned int i_allocated_meta_blocks;
			
 
				+	unsigned short i_delalloc_reserved_flag;
			
 
				+
			
 
				+	/* on-disk additional length */
			
 
				+	__u16 i_extra_isize;
			
 
				+
			
 
				+	spinlock_t i_block_reservation_lock;
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * File system states
			
 
				  */
			
@@ -560,6 +694,7 @@ do {									       \
 
				 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
			
 
				 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
			
 
				 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
			
 
				+#define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
			
 
				 
			
 
				 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
			
 
				 #ifndef _LINUX_EXT2_FS_H
			
@@ -689,6 +824,137 @@ struct ext4_super_block {
 
				 };
			
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				+/*
			
 
				+ * fourth extended-fs super-block data in memory
			
 
				+ */
			
 
				+struct ext4_sb_info {
			
 
				+	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
			
 
				+	unsigned long s_inodes_per_block;/* Number of inodes per block */
			
 
				+	unsigned long s_blocks_per_group;/* Number of blocks in a group */
			
 
				+	unsigned long s_inodes_per_group;/* Number of inodes in a group */
			
 
				+	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
			
 
				+	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
			
 
				+	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
			
 
				+	ext4_group_t s_groups_count;	/* Number of groups in the fs */
			
 
				+	unsigned long s_overhead_last;  /* Last calculated overhead */
			
 
				+	unsigned long s_blocks_last;    /* Last seen block count */
			
 
				+	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
			
 
				+	struct buffer_head * s_sbh;	/* Buffer containing the super block */
			
 
				+	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
			
 
				+	struct buffer_head **s_group_desc;
			
 
				+	unsigned long  s_mount_opt;
			
 
				+	ext4_fsblk_t s_sb_block;
			
 
				+	uid_t s_resuid;
			
 
				+	gid_t s_resgid;
			
 
				+	unsigned short s_mount_state;
			
 
				+	unsigned short s_pad;
			
 
				+	int s_addr_per_block_bits;
			
 
				+	int s_desc_per_block_bits;
			
 
				+	int s_inode_size;
			
 
				+	int s_first_ino;
			
 
				+	unsigned int s_inode_readahead_blks;
			
 
				+	spinlock_t s_next_gen_lock;
			
 
				+	u32 s_next_generation;
			
 
				+	u32 s_hash_seed[4];
			
 
				+	int s_def_hash_version;
			
 
				+	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
			
 
				+	struct percpu_counter s_freeblocks_counter;
			
 
				+	struct percpu_counter s_freeinodes_counter;
			
 
				+	struct percpu_counter s_dirs_counter;
			
 
				+	struct percpu_counter s_dirtyblocks_counter;
			
 
				+	struct blockgroup_lock *s_blockgroup_lock;
			
 
				+	struct proc_dir_entry *s_proc;
			
 
				+	struct kobject s_kobj;
			
 
				+	struct completion s_kobj_unregister;
			
 
				+
			
 
				+	/* Journaling */
			
 
				+	struct inode *s_journal_inode;
			
 
				+	struct journal_s *s_journal;
			
 
				+	struct list_head s_orphan;
			
 
				+	struct mutex s_orphan_lock;
			
 
				+	struct mutex s_resize_lock;
			
 
				+	unsigned long s_commit_interval;
			
 
				+	u32 s_max_batch_time;
			
 
				+	u32 s_min_batch_time;
			
 
				+	struct block_device *journal_bdev;
			
 
				+#ifdef CONFIG_JBD2_DEBUG
			
 
				+	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
			
 
				+	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
			
 
				+#endif
			
 
				+#ifdef CONFIG_QUOTA
			
 
				+	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
			
 
				+	int s_jquota_fmt;			/* Format of quota to use */
			
 
				+#endif
			
 
				+	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
			
 
				+	struct rb_root system_blks;
			
 
				+
			
 
				+#ifdef EXTENTS_STATS
			
 
				+	/* ext4 extents stats */
			
 
				+	unsigned long s_ext_min;
			
 
				+	unsigned long s_ext_max;
			
 
				+	unsigned long s_depth_max;
			
 
				+	spinlock_t s_ext_stats_lock;
			
 
				+	unsigned long s_ext_blocks;
			
 
				+	unsigned long s_ext_extents;
			
 
				+#endif
			
 
				+
			
 
				+	/* for buddy allocator */
			
 
				+	struct ext4_group_info ***s_group_info;
			
 
				+	struct inode *s_buddy_cache;
			
 
				+	long s_blocks_reserved;
			
 
				+	spinlock_t s_reserve_lock;
			
 
				+	spinlock_t s_md_lock;
			
 
				+	tid_t s_last_transaction;
			
 
				+	unsigned short *s_mb_offsets;
			
 
				+	unsigned int *s_mb_maxs;
			
 
				+
			
 
				+	/* tunables */
			
 
				+	unsigned long s_stripe;
			
 
				+	unsigned int s_mb_stream_request;
			
 
				+	unsigned int s_mb_max_to_scan;
			
 
				+	unsigned int s_mb_min_to_scan;
			
 
				+	unsigned int s_mb_stats;
			
 
				+	unsigned int s_mb_order2_reqs;
			
 
				+	unsigned int s_mb_group_prealloc;
			
 
				+	/* where last allocation was done - for stream allocation */
			
 
				+	unsigned long s_mb_last_group;
			
 
				+	unsigned long s_mb_last_start;
			
 
				+
			
 
				+	/* history to debug policy */
			
 
				+	struct ext4_mb_history *s_mb_history;
			
 
				+	int s_mb_history_cur;
			
 
				+	int s_mb_history_max;
			
 
				+	int s_mb_history_num;
			
 
				+	spinlock_t s_mb_history_lock;
			
 
				+	int s_mb_history_filter;
			
 
				+
			
 
				+	/* stats for buddy allocator */
			
 
				+	spinlock_t s_mb_pa_lock;
			
 
				+	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
			
 
				+	atomic_t s_bal_success;	/* we found long enough chunks */
			
 
				+	atomic_t s_bal_allocated;	/* in blocks */
			
 
				+	atomic_t s_bal_ex_scanned;	/* total extents scanned */
			
 
				+	atomic_t s_bal_goals;	/* goal hits */
			
 
				+	atomic_t s_bal_breaks;	/* too long searches */
			
 
				+	atomic_t s_bal_2orders;	/* 2^order hits */
			
 
				+	spinlock_t s_bal_lock;
			
 
				+	unsigned long s_mb_buddies_generated;
			
 
				+	unsigned long long s_mb_generation_time;
			
 
				+	atomic_t s_mb_lost_chunks;
			
 
				+	atomic_t s_mb_preallocated;
			
 
				+	atomic_t s_mb_discarded;
			
 
				+
			
 
				+	/* locality groups */
			
 
				+	struct ext4_locality_group *s_locality_groups;
			
 
				+
			
 
				+	/* for write statistics */
			
 
				+	unsigned long s_sectors_written_start;
			
 
				+	u64 s_kbytes_written;
			
 
				+
			
 
				+	unsigned int s_log_groups_per_flex;
			
 
				+	struct flex_groups *s_flex_groups;
			
 
				+};
			
 
				+
			
 
				 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
			
 
				 {
			
 
				 	return sb->s_fs_info;
			
@@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
 
				 		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
			
 
				 {
			
 
				 	return ino == EXT4_ROOT_INO ||
			
@@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 
				 						    ext4_group_t block_group,
			
 
				 						    struct buffer_head ** bh);
			
 
				 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
			
 
				+struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
			
 
				+				      ext4_group_t block_group);
			
 
				+extern unsigned ext4_init_block_bitmap(struct super_block *sb,
			
 
				+				       struct buffer_head *bh,
			
 
				+				       ext4_group_t group,
			
 
				+				       struct ext4_group_desc *desc);
			
 
				+#define ext4_free_blocks_after_init(sb, group, desc)			\
			
 
				+		ext4_init_block_bitmap(sb, NULL, group, desc)
			
 
				 
			
 
				 /* dir.c */
			
 
				 extern int ext4_check_dir_entry(const char *, struct inode *,
			
@@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 
				 extern unsigned long ext4_count_free_inodes(struct super_block *);
			
 
				 extern unsigned long ext4_count_dirs(struct super_block *);
			
 
				 extern void ext4_check_inodes_bitmap(struct super_block *);
			
 
				+extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
			
 
				+				       struct buffer_head *bh,
			
 
				+				       ext4_group_t group,
			
 
				+				       struct ext4_group_desc *desc);
			
 
				+extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
			
 
				 
			
 
				 /* mballoc.c */
			
 
				 extern long ext4_mb_stats;
			
@@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
 
				 	__attribute__ ((format (printf, 3, 4)));
			
 
				 extern void ext4_warning(struct super_block *, const char *, const char *, ...)
			
 
				 	__attribute__ ((format (printf, 3, 4)));
			
 
				+extern void ext4_msg(struct super_block *, const char *, const char *, ...)
			
 
				+	__attribute__ ((format (printf, 3, 4)));
			
 
				 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
			
 
				 				const char *, const char *, ...)
			
 
				 	__attribute__ ((format (printf, 4, 5)));
			
@@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
 
				 				struct ext4_group_desc *bg, __u32 count);
			
 
				 extern void ext4_itable_unused_set(struct super_block *sb,
			
 
				 				   struct ext4_group_desc *bg, __u32 count);
			
 
				+extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
			
 
				+				   struct ext4_group_desc *gdp);
			
 
				+extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
			
 
				+				       struct ext4_group_desc *gdp);
			
 
				 
			
 
				 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
			
 
				 {
			
@@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
 
				 	 return grp_info[indexv][indexh];
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Reading s_groups_count requires using smp_rmb() afterwards.  See
			
 
				+ * the locking protocol documented in the comments of ext4_group_add()
			
 
				+ * in resize.c
			
 
				+ */
			
 
				+static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
			
 
				+{
			
 
				+	ext4_group_t	ngroups = EXT4_SB(sb)->s_groups_count;
			
 
				+
			
 
				+	smp_rmb();
			
 
				+	return ngroups;
			
 
				+}
			
 
				 
			
 
				 static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
			
 
				 					     ext4_group_t block_group)
			
@@ -1283,33 +1579,25 @@ struct ext4_group_info {
 
				 };
			
 
				 
			
 
				 #define EXT4_GROUP_INFO_NEED_INIT_BIT	0
			
 
				-#define EXT4_GROUP_INFO_LOCKED_BIT	1
			
 
				 
			
 
				 #define EXT4_MB_GRP_NEED_INIT(grp)	\
			
 
				 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
			
 
				 
			
 
				-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
			
 
				+static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
			
 
				+					      ext4_group_t group)
			
 
				 {
			
 
				-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
			
 
				-
			
 
				-	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
			
 
				+	return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
			
 
				 }
			
 
				 
			
 
				-static inline void ext4_unlock_group(struct super_block *sb,
			
 
				-					ext4_group_t group)
			
 
				+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
			
 
				 {
			
 
				-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
			
 
				-
			
 
				-	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
			
 
				+	spin_lock(ext4_group_lock_ptr(sb, group));
			
 
				 }
			
 
				 
			
 
				-static inline int ext4_is_group_locked(struct super_block *sb,
			
 
				+static inline void ext4_unlock_group(struct super_block *sb,
			
 
				 					ext4_group_t group)
			
 
				 {
			
 
				-	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
			
 
				-
			
 
				-	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
			
 
				-						&(grinfo->bb_state));
			
 
				+	spin_unlock(ext4_group_lock_ptr(sb, group));
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
 
				 /* namei.c */
			
 
				 extern const struct inode_operations ext4_dir_inode_operations;
			
 
				 extern const struct inode_operations ext4_special_inode_operations;
			
 
				+extern struct dentry *ext4_get_parent(struct dentry *child);
			
 
				 
			
 
				 /* symlink.c */
			
 
				 extern const struct inode_operations ext4_symlink_inode_operations;
			
 
				 extern const struct inode_operations ext4_fast_symlink_inode_operations;
			
 
				 
			
 
				+/* block_validity */
			
 
				+extern void ext4_release_system_zone(struct super_block *sb);
			
 
				+extern int ext4_setup_system_zone(struct super_block *sb);
			
 
				+extern int __init init_ext4_system_zone(void);
			
 
				+extern void exit_ext4_system_zone(void);
			
 
				+extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
			
 
				+				 ext4_fsblk_t start_blk,
			
 
				+				 unsigned int count);
			
 
				+
			
 
				 /* extents.c */
			
 
				 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
			
 
				 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
			
@@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
 
				 				       int chunk);
			
 
				 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
			
 
				 			       ext4_lblk_t iblock, unsigned int max_blocks,
			
 
				-			       struct buffer_head *bh_result,
			
 
				-			       int create, int extend_disksize);
			
 
				+			       struct buffer_head *bh_result, int flags);
			
 
				 extern void ext4_ext_truncate(struct inode *);
			
 
				 extern void ext4_ext_init(struct super_block *);
			
 
				 extern void ext4_ext_release(struct super_block *);
			
 
				 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
			
 
				 			  loff_t len);
			
 
				-extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
			
 
				-			sector_t block, unsigned int max_blocks,
			
 
				-			struct buffer_head *bh, int create,
			
 
				-			int extend_disksize, int flag);
			
 
				+extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
			
 
				+			   sector_t block, unsigned int max_blocks,
			
 
				+			   struct buffer_head *bh, int flags);
			
 
				 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			
 
				 			__u64 start, __u64 len);
			
 
				 
			
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -1,140 +0,0 @@
 
				-/*
			
 
				- *  ext4_i.h
			
 
				- *
			
 
				- * Copyright (C) 1992, 1993, 1994, 1995
			
 
				- * Remy Card (card@masi.ibp.fr)
			
 
				- * Laboratoire MASI - Institut Blaise Pascal
			
 
				- * Universite Pierre et Marie Curie (Paris VI)
			
 
				- *
			
 
				- *  from
			
 
				- *
			
 
				- *  linux/include/linux/minix_fs_i.h
			
 
				- *
			
 
				- *  Copyright (C) 1991, 1992  Linus Torvalds
			
 
				- */
			
 
				-
			
 
				-#ifndef _EXT4_I
			
 
				-#define _EXT4_I
			
 
				-
			
 
				-#include <linux/rwsem.h>
			
 
				-#include <linux/rbtree.h>
			
 
				-#include <linux/seqlock.h>
			
 
				-#include <linux/mutex.h>
			
 
				-
			
 
				-/* data type for block offset of block group */
			
 
				-typedef int ext4_grpblk_t;
			
 
				-
			
 
				-/* data type for filesystem-wide blocks number */
			
 
				-typedef unsigned long long ext4_fsblk_t;
			
 
				-
			
 
				-/* data type for file logical block number */
			
 
				-typedef __u32 ext4_lblk_t;
			
 
				-
			
 
				-/* data type for block group number */
			
 
				-typedef unsigned int ext4_group_t;
			
 
				-
			
 
				-/*
			
 
				- * storage for cached extent
			
 
				- */
			
 
				-struct ext4_ext_cache {
			
 
				-	ext4_fsblk_t	ec_start;
			
 
				-	ext4_lblk_t	ec_block;
			
 
				-	__u32		ec_len; /* must be 32bit to return holes */
			
 
				-	__u32		ec_type;
			
 
				-};
			
 
				-
			
 
				-/*
			
 
				- * fourth extended file system inode data in memory
			
 
				- */
			
 
				-struct ext4_inode_info {
			
 
				-	__le32	i_data[15];	/* unconverted */
			
 
				-	__u32	i_flags;
			
 
				-	ext4_fsblk_t	i_file_acl;
			
 
				-	__u32	i_dtime;
			
 
				-
			
 
				-	/*
			
 
				-	 * i_block_group is the number of the block group which contains
			
 
				-	 * this file's inode.  Constant across the lifetime of the inode,
			
 
				-	 * it is ued for making block allocation decisions - we try to
			
 
				-	 * place a file's data blocks near its inode block, and new inodes
			
 
				-	 * near to their parent directory's inode.
			
 
				-	 */
			
 
				-	ext4_group_t	i_block_group;
			
 
				-	__u32	i_state;		/* Dynamic state flags for ext4 */
			
 
				-
			
 
				-	ext4_lblk_t		i_dir_start_lookup;
			
 
				-#ifdef CONFIG_EXT4_FS_XATTR
			
 
				-	/*
			
 
				-	 * Extended attributes can be read independently of the main file
			
 
				-	 * data. Taking i_mutex even when reading would cause contention
			
 
				-	 * between readers of EAs and writers of regular file data, so
			
 
				-	 * instead we synchronize on xattr_sem when reading or changing
			
 
				-	 * EAs.
			
 
				-	 */
			
 
				-	struct rw_semaphore xattr_sem;
			
 
				-#endif
			
 
				-#ifdef CONFIG_EXT4_FS_POSIX_ACL
			
 
				-	struct posix_acl	*i_acl;
			
 
				-	struct posix_acl	*i_default_acl;
			
 
				-#endif
			
 
				-
			
 
				-	struct list_head i_orphan;	/* unlinked but open inodes */
			
 
				-
			
 
				-	/*
			
 
				-	 * i_disksize keeps track of what the inode size is ON DISK, not
			
 
				-	 * in memory.  During truncate, i_size is set to the new size by
			
 
				-	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
			
 
				-	 * set i_disksize to 0 until the truncate is actually under way.
			
 
				-	 *
			
 
				-	 * The intent is that i_disksize always represents the blocks which
			
 
				-	 * are used by this file.  This allows recovery to restart truncate
			
 
				-	 * on orphans if we crash during truncate.  We actually write i_disksize
			
 
				-	 * into the on-disk inode when writing inodes out, instead of i_size.
			
 
				-	 *
			
 
				-	 * The only time when i_disksize and i_size may be different is when
			
 
				-	 * a truncate is in progress.  The only things which change i_disksize
			
 
				-	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
			
 
				-	 */
			
 
				-	loff_t	i_disksize;
			
 
				-
			
 
				-	/*
			
 
				-	 * i_data_sem is for serialising ext4_truncate() against
			
 
				-	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
			
 
				-	 * data tree are chopped off during truncate. We can't do that in
			
 
				-	 * ext4 because whenever we perform intermediate commits during
			
 
				-	 * truncate, the inode and all the metadata blocks *must* be in a
			
 
				-	 * consistent state which allows truncation of the orphans to restart
			
 
				-	 * during recovery.  Hence we must fix the get_block-vs-truncate race
			
 
				-	 * by other means, so we have i_data_sem.
			
 
				-	 */
			
 
				-	struct rw_semaphore i_data_sem;
			
 
				-	struct inode vfs_inode;
			
 
				-	struct jbd2_inode jinode;
			
 
				-
			
 
				-	struct ext4_ext_cache i_cached_extent;
			
 
				-	/*
			
 
				-	 * File creation time. Its function is same as that of
			
 
				-	 * struct timespec i_{a,c,m}time in the generic inode.
			
 
				-	 */
			
 
				-	struct timespec i_crtime;
			
 
				-
			
 
				-	/* mballoc */
			
 
				-	struct list_head i_prealloc_list;
			
 
				-	spinlock_t i_prealloc_lock;
			
 
				-
			
 
				-	/* ialloc */
			
 
				-	ext4_group_t	i_last_alloc_group;
			
 
				-
			
 
				-	/* allocation reservation info for delalloc */
			
 
				-	unsigned int i_reserved_data_blocks;
			
 
				-	unsigned int i_reserved_meta_blocks;
			
 
				-	unsigned int i_allocated_meta_blocks;
			
 
				-	unsigned short i_delalloc_reserved_flag;
			
 
				-
			
 
				-	/* on-disk additional length */
			
 
				-	__u16 i_extra_isize;
			
 
				-
			
 
				-	spinlock_t i_block_reservation_lock;
			
 
				-};
			
 
				-
			
 
				-#endif	/* _EXT4_I */
			
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -1,161 +0,0 @@
 
				-/*
			
 
				- *  ext4_sb.h
			
 
				- *
			
 
				- * Copyright (C) 1992, 1993, 1994, 1995
			
 
				- * Remy Card (card@masi.ibp.fr)
			
 
				- * Laboratoire MASI - Institut Blaise Pascal
			
 
				- * Universite Pierre et Marie Curie (Paris VI)
			
 
				- *
			
 
				- *  from
			
 
				- *
			
 
				- *  linux/include/linux/minix_fs_sb.h
			
 
				- *
			
 
				- *  Copyright (C) 1991, 1992  Linus Torvalds
			
 
				- */
			
 
				-
			
 
				-#ifndef _EXT4_SB
			
 
				-#define _EXT4_SB
			
 
				-
			
 
				-#ifdef __KERNEL__
			
 
				-#include <linux/timer.h>
			
 
				-#include <linux/wait.h>
			
 
				-#include <linux/blockgroup_lock.h>
			
 
				-#include <linux/percpu_counter.h>
			
 
				-#endif
			
 
				-#include <linux/rbtree.h>
			
 
				-
			
 
				-/*
			
 
				- * fourth extended-fs super-block data in memory
			
 
				- */
			
 
				-struct ext4_sb_info {
			
 
				-	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
			
 
				-	unsigned long s_inodes_per_block;/* Number of inodes per block */
			
 
				-	unsigned long s_blocks_per_group;/* Number of blocks in a group */
			
 
				-	unsigned long s_inodes_per_group;/* Number of inodes in a group */
			
 
				-	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
			
 
				-	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
			
 
				-	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
			
 
				-	ext4_group_t s_groups_count;	/* Number of groups in the fs */
			
 
				-	unsigned long s_overhead_last;  /* Last calculated overhead */
			
 
				-	unsigned long s_blocks_last;    /* Last seen block count */
			
 
				-	loff_t s_bitmap_maxbytes;	/* max bytes for bitmap files */
			
 
				-	struct buffer_head * s_sbh;	/* Buffer containing the super block */
			
 
				-	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
			
 
				-	struct buffer_head **s_group_desc;
			
 
				-	unsigned long  s_mount_opt;
			
 
				-	ext4_fsblk_t s_sb_block;
			
 
				-	uid_t s_resuid;
			
 
				-	gid_t s_resgid;
			
 
				-	unsigned short s_mount_state;
			
 
				-	unsigned short s_pad;
			
 
				-	int s_addr_per_block_bits;
			
 
				-	int s_desc_per_block_bits;
			
 
				-	int s_inode_size;
			
 
				-	int s_first_ino;
			
 
				-	unsigned int s_inode_readahead_blks;
			
 
				-	spinlock_t s_next_gen_lock;
			
 
				-	u32 s_next_generation;
			
 
				-	u32 s_hash_seed[4];
			
 
				-	int s_def_hash_version;
			
 
				-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
			
 
				-	struct percpu_counter s_freeblocks_counter;
			
 
				-	struct percpu_counter s_freeinodes_counter;
			
 
				-	struct percpu_counter s_dirs_counter;
			
 
				-	struct percpu_counter s_dirtyblocks_counter;
			
 
				-	struct blockgroup_lock *s_blockgroup_lock;
			
 
				-	struct proc_dir_entry *s_proc;
			
 
				-	struct kobject s_kobj;
			
 
				-	struct completion s_kobj_unregister;
			
 
				-
			
 
				-	/* Journaling */
			
 
				-	struct inode *s_journal_inode;
			
 
				-	struct journal_s *s_journal;
			
 
				-	struct list_head s_orphan;
			
 
				-	unsigned long s_commit_interval;
			
 
				-	u32 s_max_batch_time;
			
 
				-	u32 s_min_batch_time;
			
 
				-	struct block_device *journal_bdev;
			
 
				-#ifdef CONFIG_JBD2_DEBUG
			
 
				-	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
			
 
				-	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
			
 
				-#endif
			
 
				-#ifdef CONFIG_QUOTA
			
 
				-	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
			
 
				-	int s_jquota_fmt;			/* Format of quota to use */
			
 
				-#endif
			
 
				-	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
			
 
				-
			
 
				-#ifdef EXTENTS_STATS
			
 
				-	/* ext4 extents stats */
			
 
				-	unsigned long s_ext_min;
			
 
				-	unsigned long s_ext_max;
			
 
				-	unsigned long s_depth_max;
			
 
				-	spinlock_t s_ext_stats_lock;
			
 
				-	unsigned long s_ext_blocks;
			
 
				-	unsigned long s_ext_extents;
			
 
				-#endif
			
 
				-
			
 
				-	/* for buddy allocator */
			
 
				-	struct ext4_group_info ***s_group_info;
			
 
				-	struct inode *s_buddy_cache;
			
 
				-	long s_blocks_reserved;
			
 
				-	spinlock_t s_reserve_lock;
			
 
				-	spinlock_t s_md_lock;
			
 
				-	tid_t s_last_transaction;
			
 
				-	unsigned short *s_mb_offsets;
			
 
				-	unsigned int *s_mb_maxs;
			
 
				-
			
 
				-	/* tunables */
			
 
				-	unsigned long s_stripe;
			
 
				-	unsigned int s_mb_stream_request;
			
 
				-	unsigned int s_mb_max_to_scan;
			
 
				-	unsigned int s_mb_min_to_scan;
			
 
				-	unsigned int s_mb_stats;
			
 
				-	unsigned int s_mb_order2_reqs;
			
 
				-	unsigned int s_mb_group_prealloc;
			
 
				-	/* where last allocation was done - for stream allocation */
			
 
				-	unsigned long s_mb_last_group;
			
 
				-	unsigned long s_mb_last_start;
			
 
				-
			
 
				-	/* history to debug policy */
			
 
				-	struct ext4_mb_history *s_mb_history;
			
 
				-	int s_mb_history_cur;
			
 
				-	int s_mb_history_max;
			
 
				-	int s_mb_history_num;
			
 
				-	spinlock_t s_mb_history_lock;
			
 
				-	int s_mb_history_filter;
			
 
				-
			
 
				-	/* stats for buddy allocator */
			
 
				-	spinlock_t s_mb_pa_lock;
			
 
				-	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
			
 
				-	atomic_t s_bal_success;	/* we found long enough chunks */
			
 
				-	atomic_t s_bal_allocated;	/* in blocks */
			
 
				-	atomic_t s_bal_ex_scanned;	/* total extents scanned */
			
 
				-	atomic_t s_bal_goals;	/* goal hits */
			
 
				-	atomic_t s_bal_breaks;	/* too long searches */
			
 
				-	atomic_t s_bal_2orders;	/* 2^order hits */
			
 
				-	spinlock_t s_bal_lock;
			
 
				-	unsigned long s_mb_buddies_generated;
			
 
				-	unsigned long long s_mb_generation_time;
			
 
				-	atomic_t s_mb_lost_chunks;
			
 
				-	atomic_t s_mb_preallocated;
			
 
				-	atomic_t s_mb_discarded;
			
 
				-
			
 
				-	/* locality groups */
			
 
				-	struct ext4_locality_group *s_locality_groups;
			
 
				-
			
 
				-	/* for write statistics */
			
 
				-	unsigned long s_sectors_written_start;
			
 
				-	u64 s_kbytes_written;
			
 
				-
			
 
				-	unsigned int s_log_groups_per_flex;
			
 
				-	struct flex_groups *s_flex_groups;
			
 
				-};
			
 
				-
			
 
				-static inline spinlock_t *
			
 
				-sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
			
 
				-{
			
 
				-	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
			
 
				-}
			
 
				-
			
 
				-#endif	/* _EXT4_SB */
			
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 
				 
			
 
				 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
			
 
				 {
			
 
				-	ext4_fsblk_t block = ext_pblock(ext), valid_block;
			
 
				+	ext4_fsblk_t block = ext_pblock(ext);
			
 
				 	int len = ext4_ext_get_actual_len(ext);
			
 
				-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
			
 
				 
			
 
				-	valid_block = le32_to_cpu(es->s_first_data_block) +
			
 
				-		EXT4_SB(inode->i_sb)->s_gdb_count;
			
 
				-	if (unlikely(block <= valid_block ||
			
 
				-		     ((block + len) > ext4_blocks_count(es))))
			
 
				-		return 0;
			
 
				-	else
			
 
				-		return 1;
			
 
				+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
			
 
				 }
			
 
				 
			
 
				 static int ext4_valid_extent_idx(struct inode *inode,
			
 
				 				struct ext4_extent_idx *ext_idx)
			
 
				 {
			
 
				-	ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
			
 
				-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
			
 
				+	ext4_fsblk_t block = idx_pblock(ext_idx);
			
 
				 
			
 
				-	valid_block = le32_to_cpu(es->s_first_data_block) +
			
 
				-		EXT4_SB(inode->i_sb)->s_gdb_count;
			
 
				-	if (unlikely(block <= valid_block ||
			
 
				-		     (block >= ext4_blocks_count(es))))
			
 
				-		return 0;
			
 
				-	else
			
 
				-		return 1;
			
 
				+	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
			
 
				 }
			
 
				 
			
 
				 static int ext4_valid_extent_entries(struct inode *inode,
			
@@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
				 	ex = EXT_LAST_EXTENT(eh);
			
 
				 
			
 
				 	ex_ee_block = le32_to_cpu(ex->ee_block);
			
 
				-	if (ext4_ext_is_uninitialized(ex))
			
 
				-		uninitialized = 1;
			
 
				 	ex_ee_len = ext4_ext_get_actual_len(ex);
			
 
				 
			
 
				 	while (ex >= EXT_FIRST_EXTENT(eh) &&
			
 
				 			ex_ee_block + ex_ee_len > start) {
			
 
				+
			
 
				+		if (ext4_ext_is_uninitialized(ex))
			
 
				+			uninitialized = 1;
			
 
				+		else
			
 
				+			uninitialized = 0;
			
 
				+
			
 
				 		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
			
 
				 		path[depth].p_ext = ex;
			
 
				 
			
@@ -2784,7 +2774,7 @@ fix_extent_len:
 
				 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
			
 
				 			ext4_lblk_t iblock,
			
 
				 			unsigned int max_blocks, struct buffer_head *bh_result,
			
 
				-			int create, int extend_disksize)
			
 
				+			int flags)
			
 
				 {
			
 
				 	struct ext4_ext_path *path = NULL;
			
 
				 	struct ext4_extent_header *eh;
			
@@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	int err = 0, depth, ret, cache_type;
			
 
				 	unsigned int allocated = 0;
			
 
				 	struct ext4_allocation_request ar;
			
 
				-	loff_t disksize;
			
 
				 
			
 
				 	__clear_bit(BH_New, &bh_result->b_state);
			
 
				 	ext_debug("blocks %u/%u requested for inode %u\n",
			
@@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	cache_type = ext4_ext_in_cache(inode, iblock, &newex);
			
 
				 	if (cache_type) {
			
 
				 		if (cache_type == EXT4_EXT_CACHE_GAP) {
			
 
				-			if (!create) {
			
 
				+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
			
 
				 				/*
			
 
				 				 * block isn't allocated yet and
			
 
				 				 * user doesn't want to allocate it
			
@@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 							EXT4_EXT_CACHE_EXTENT);
			
 
				 				goto out;
			
 
				 			}
			
 
				-			if (create == EXT4_CREATE_UNINITIALIZED_EXT)
			
 
				+			if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
			
 
				 				goto out;
			
 
				-			if (!create) {
			
 
				+			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
			
 
				+				if (allocated > max_blocks)
			
 
				+					allocated = max_blocks;
			
 
				 				/*
			
 
				 				 * We have blocks reserved already.  We
			
 
				 				 * return allocated blocks so that delalloc
			
@@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 				 * the buffer head will be unmapped so that
			
 
				 				 * a read from the block returns 0s.
			
 
				 				 */
			
 
				-				if (allocated > max_blocks)
			
 
				-					allocated = max_blocks;
			
 
				 				set_buffer_unwritten(bh_result);
			
 
				 				bh_result->b_bdev = inode->i_sb->s_bdev;
			
 
				 				bh_result->b_blocknr = newblock;
			
@@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	 * requested block isn't allocated yet;
			
 
				 	 * we couldn't try to create block if create flag is zero
			
 
				 	 */
			
 
				-	if (!create) {
			
 
				+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
			
 
				 		/*
			
 
				 		 * put just found gap into cache to speed up
			
 
				 		 * subsequent requests
			
@@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	 * EXT_UNINIT_MAX_LEN.
			
 
				 	 */
			
 
				 	if (max_blocks > EXT_INIT_MAX_LEN &&
			
 
				-	    create != EXT4_CREATE_UNINITIALIZED_EXT)
			
 
				+	    !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
			
 
				 		max_blocks = EXT_INIT_MAX_LEN;
			
 
				 	else if (max_blocks > EXT_UNINIT_MAX_LEN &&
			
 
				-		 create == EXT4_CREATE_UNINITIALIZED_EXT)
			
 
				+		 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
			
 
				 		max_blocks = EXT_UNINIT_MAX_LEN;
			
 
				 
			
 
				 	/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
			
@@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	/* try to insert new extent into found leaf and return */
			
 
				 	ext4_ext_store_pblock(&newex, newblock);
			
 
				 	newex.ee_len = cpu_to_le16(ar.len);
			
 
				-	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
			
 
				+	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)  /* Mark uninitialized */
			
 
				 		ext4_ext_mark_uninitialized(&newex);
			
 
				 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
			
 
				 	if (err) {
			
@@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 
				 	newblock = ext_pblock(&newex);
			
 
				 	allocated = ext4_ext_get_actual_len(&newex);
			
 
				 outnew:
			
 
				-	if (extend_disksize) {
			
 
				-		disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
			
 
				-		if (disksize > i_size_read(inode))
			
 
				-			disksize = i_size_read(inode);
			
 
				-		if (disksize > EXT4_I(inode)->i_disksize)
			
 
				-			EXT4_I(inode)->i_disksize = disksize;
			
 
				-	}
			
 
				-
			
 
				 	set_buffer_new(bh_result);
			
 
				 
			
 
				 	/* Cache only when it is _not_ an uninitialized extent */
			
 
				-	if (create != EXT4_CREATE_UNINITIALIZED_EXT)
			
 
				+	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
			
 
				 		ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
			
 
				 						EXT4_EXT_CACHE_EXTENT);
			
 
				 out:
			
@@ -3150,9 +3131,10 @@ retry:
 
				 			ret = PTR_ERR(handle);
			
 
				 			break;
			
 
				 		}
			
 
				-		ret = ext4_get_blocks_wrap(handle, inode, block,
			
 
				-					  max_blocks, &map_bh,
			
 
				-					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
			
 
				+		map_bh.b_state = 0;
			
 
				+		ret = ext4_get_blocks(handle, inode, block,
			
 
				+				      max_blocks, &map_bh,
			
 
				+				      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
			
 
				 		if (ret <= 0) {
			
 
				 #ifdef EXT4FS_DEBUG
			
 
				 			WARN_ON(ret <= 0);
			
@@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 
				 		       void *data)
			
 
				 {
			
 
				 	struct fiemap_extent_info *fieinfo = data;
			
 
				-	unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
			
 
				+	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
			
 
				 	__u64	logical;
			
 
				 	__u64	physical;
			
 
				 	__u64	length;
			
@@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
 
				 	 *
			
 
				 	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
			
 
				 	 */
			
 
				-	if (logical + length - 1 == EXT_MAX_BLOCK ||
			
 
				-	    ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
			
 
				+	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
			
 
				+	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
			
 
				+		loff_t size = i_size_read(inode);
			
 
				+		loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
			
 
				+
			
 
				 		flags |= FIEMAP_EXTENT_LAST;
			
 
				+		if ((flags & FIEMAP_EXTENT_DELALLOC) &&
			
 
				+		    logical+length > size)
			
 
				+			length = (size - logical + bs - 1) & ~(bs-1);
			
 
				+	}
			
 
				 
			
 
				 	error = fiemap_fill_next_extent(fieinfo, logical, physical,
			
 
				 					length, flags);
			
@@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
				 		 * Walk the extent tree gathering extent information.
			
 
				 		 * ext4_ext_fiemap_cb will push extents back to user.
			
 
				 		 */
			
 
				-		down_write(&EXT4_I(inode)->i_data_sem);
			
 
				+		down_read(&EXT4_I(inode)->i_data_sem);
			
 
				 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
			
 
				 					  ext4_ext_fiemap_cb, fieinfo);
			
 
				-		up_write(&EXT4_I(inode)->i_data_sem);
			
 
				+		up_read(&EXT4_I(inode)->i_data_sem);
			
 
				 	}
			
 
				 
			
 
				 	return error;
			
--- a/fs/ext4/group.h
+++ b/fs/ext4/group.h
@@ -1,29 +0,0 @@
 
				-/*
			
 
				- *  linux/fs/ext4/group.h
			
 
				- *
			
 
				- * Copyright (C) 2007 Cluster File Systems, Inc
			
 
				- *
			
 
				- * Author: Andreas Dilger <adilger@clusterfs.com>
			
 
				- */
			
 
				-
			
 
				-#ifndef _LINUX_EXT4_GROUP_H
			
 
				-#define _LINUX_EXT4_GROUP_H
			
 
				-
			
 
				-extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
			
 
				-				   struct ext4_group_desc *gdp);
			
 
				-extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
			
 
				-				       struct ext4_group_desc *gdp);
			
 
				-struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
			
 
				-				      ext4_group_t block_group);
			
 
				-extern unsigned ext4_init_block_bitmap(struct super_block *sb,
			
 
				-				       struct buffer_head *bh,
			
 
				-				       ext4_group_t group,
			
 
				-				       struct ext4_group_desc *desc);
			
 
				-#define ext4_free_blocks_after_init(sb, group, desc)			\
			
 
				-		ext4_init_block_bitmap(sb, NULL, group, desc)
			
 
				-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
			
 
				-				       struct buffer_head *bh,
			
 
				-				       ext4_group_t group,
			
 
				-				       struct ext4_group_desc *desc);
			
 
				-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
			
 
				-#endif /* _LINUX_EXT4_GROUP_H */
			
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -27,7 +27,6 @@
 
				 #include "ext4_jbd2.h"
			
 
				 #include "xattr.h"
			
 
				 #include "acl.h"
			
 
				-#include "group.h"
			
 
				 
			
 
				 /*
			
 
				  * ialloc.c contains the inodes allocation and deallocation routines
			
@@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 
				 		unlock_buffer(bh);
			
 
				 		return bh;
			
 
				 	}
			
 
				-	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+	ext4_lock_group(sb, block_group);
			
 
				 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			
 
				 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
			
 
				 		set_bitmap_uptodate(bh);
			
 
				 		set_buffer_uptodate(bh);
			
 
				-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+		ext4_unlock_group(sb, block_group);
			
 
				 		unlock_buffer(bh);
			
 
				 		return bh;
			
 
				 	}
			
 
				-	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
			
 
				+	ext4_unlock_group(sb, block_group);
			
 
				 	if (buffer_uptodate(bh)) {
			
 
				 		/*
			
 
				 		 * if not uninit if bh is uptodate,
			
@@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
				 		goto error_return;
			
 
				 
			
 
				 	/* Ok, now we can actually update the inode bitmaps.. */
			
 
				-	spin_lock(sb_bgl_lock(sbi, block_group));
			
 
				-	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
			
 
				-	spin_unlock(sb_bgl_lock(sbi, block_group));
			
 
				+	cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
			
 
				+					bit, bitmap_bh->b_data);
			
 
				 	if (!cleared)
			
 
				 		ext4_error(sb, "ext4_free_inode",
			
 
				 			   "bit already cleared for inode %lu", ino);
			
@@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
				 		if (fatal) goto error_return;
			
 
				 
			
 
				 		if (gdp) {
			
 
				-			spin_lock(sb_bgl_lock(sbi, block_group));
			
 
				+			ext4_lock_group(sb, block_group);
			
 
				 			count = ext4_free_inodes_count(sb, gdp) + 1;
			
 
				 			ext4_free_inodes_set(sb, gdp, count);
			
 
				 			if (is_directory) {
			
@@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 
				 			}
			
 
				 			gdp->bg_checksum = ext4_group_desc_csum(sbi,
			
 
				 							block_group, gdp);
			
 
				-			spin_unlock(sb_bgl_lock(sbi, block_group));
			
 
				+			ext4_unlock_group(sb, block_group);
			
 
				 			percpu_counter_inc(&sbi->s_freeinodes_counter);
			
 
				 			if (is_directory)
			
 
				 				percpu_counter_dec(&sbi->s_dirs_counter);
			
@@ -316,7 +314,7 @@ error_return:
 
				 static int find_group_dir(struct super_block *sb, struct inode *parent,
			
 
				 				ext4_group_t *best_group)
			
 
				 {
			
 
				-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	unsigned int freei, avefreei;
			
 
				 	struct ext4_group_desc *desc, *best_desc = NULL;
			
 
				 	ext4_group_t group;
			
@@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 
				 {
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	struct ext4_group_desc *desc;
			
 
				-	struct buffer_head *bh;
			
 
				 	struct flex_groups *flex_group = sbi->s_flex_groups;
			
 
				 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
			
 
				 	ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
			
 
				-	ext4_group_t ngroups = sbi->s_groups_count;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	int flex_size = ext4_flex_bg_size(sbi);
			
 
				 	ext4_group_t best_flex = parent_fbg_group;
			
 
				 	int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
			
@@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
 
				 	ext4_group_t n_fbg_groups;
			
 
				 	ext4_group_t i;
			
 
				 
			
 
				-	n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
			
 
				+	n_fbg_groups = (ngroups + flex_size - 1) >>
			
 
				 		sbi->s_log_groups_per_flex;
			
 
				 
			
 
				 find_close_to_parent:
			
@@ -404,7 +401,7 @@ find_close_to_parent:
 
				 found_flexbg:
			
 
				 	for (i = best_flex * flex_size; i < ngroups &&
			
 
				 		     i < (best_flex + 1) * flex_size; i++) {
			
 
				-		desc = ext4_get_group_desc(sb, i, &bh);
			
 
				+		desc = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (ext4_free_inodes_count(sb, desc)) {
			
 
				 			*best_group = i;
			
 
				 			goto out;
			
@@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
				 {
			
 
				 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	ext4_group_t ngroups = sbi->s_groups_count;
			
 
				+	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
			
 
				 	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
			
 
				 	unsigned int freei, avefreei;
			
 
				 	ext4_fsblk_t freeb, avefreeb;
			
 
				 	unsigned int ndirs;
			
 
				 	int max_dirs, min_inodes;
			
 
				 	ext4_grpblk_t min_blocks;
			
 
				-	ext4_group_t i, grp, g;
			
 
				+	ext4_group_t i, grp, g, ngroups;
			
 
				 	struct ext4_group_desc *desc;
			
 
				 	struct orlov_stats stats;
			
 
				 	int flex_size = ext4_flex_bg_size(sbi);
			
 
				 
			
 
				+	ngroups = real_ngroups;
			
 
				 	if (flex_size > 1) {
			
 
				-		ngroups = (ngroups + flex_size - 1) >>
			
 
				+		ngroups = (real_ngroups + flex_size - 1) >>
			
 
				 			sbi->s_log_groups_per_flex;
			
 
				 		parent_group >>= sbi->s_log_groups_per_flex;
			
 
				 	}
			
@@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
				 		 */
			
 
				 		grp *= flex_size;
			
 
				 		for (i = 0; i < flex_size; i++) {
			
 
				-			if (grp+i >= sbi->s_groups_count)
			
 
				+			if (grp+i >= real_ngroups)
			
 
				 				break;
			
 
				 			desc = ext4_get_group_desc(sb, grp+i, NULL);
			
 
				 			if (desc && ext4_free_inodes_count(sb, desc)) {
			
@@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
				 	}
			
 
				 
			
 
				 fallback:
			
 
				-	ngroups = sbi->s_groups_count;
			
 
				+	ngroups = real_ngroups;
			
 
				 	avefreei = freei / ngroups;
			
 
				 fallback_retry:
			
 
				 	parent_group = EXT4_I(parent)->i_block_group;
			
@@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 
				 			    ext4_group_t *group, int mode)
			
 
				 {
			
 
				 	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
			
 
				-	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
			
 
				+	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
			
 
				 	struct ext4_group_desc *desc;
			
 
				-	ext4_group_t i, last;
			
 
				 	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
			
 
				 
			
 
				 	/*
			
@@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 
				 
			
 
				 /*
			
 
				  * claim the inode from the inode bitmap. If the group
			
 
				- * is uninit we need to take the groups's sb_bgl_lock
			
 
				+ * is uninit we need to take the groups's ext4_group_lock
			
 
				  * and clear the uninit flag. The inode bitmap update
			
 
				  * and group desc uninit flag clear should be done
			
 
				- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
			
 
				+ * after holding ext4_group_lock so that ext4_read_inode_bitmap
			
 
				  * doesn't race with the ext4_claim_inode
			
 
				  */
			
 
				 static int ext4_claim_inode(struct super_block *sb,
			
@@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
			
 
				 
			
 
				-	spin_lock(sb_bgl_lock(sbi, group));
			
 
				+	ext4_lock_group(sb, group);
			
 
				 	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
			
 
				 		/* not a free inode */
			
 
				 		retval = 1;
			
@@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
 
				 	ino++;
			
 
				 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
			
 
				 			ino > EXT4_INODES_PER_GROUP(sb)) {
			
 
				-		spin_unlock(sb_bgl_lock(sbi, group));
			
 
				+		ext4_unlock_group(sb, group);
			
 
				 		ext4_error(sb, __func__,
			
 
				 			   "reserved inode or inode > inodes count - "
			
 
				 			   "block_group = %u, inode=%lu", group,
			
@@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
 
				 	}
			
 
				 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
			
 
				 err_ret:
			
 
				-	spin_unlock(sb_bgl_lock(sbi, group));
			
 
				+	ext4_unlock_group(sb, group);
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 
				 	struct super_block *sb;
			
 
				 	struct buffer_head *inode_bitmap_bh = NULL;
			
 
				 	struct buffer_head *group_desc_bh;
			
 
				-	ext4_group_t group = 0;
			
 
				+	ext4_group_t ngroups, group = 0;
			
 
				 	unsigned long ino = 0;
			
 
				 	struct inode *inode;
			
 
				 	struct ext4_group_desc *gdp = NULL;
			
 
				-	struct ext4_super_block *es;
			
 
				 	struct ext4_inode_info *ei;
			
 
				 	struct ext4_sb_info *sbi;
			
 
				 	int ret2, err = 0;
			
@@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 
				 		return ERR_PTR(-EPERM);
			
 
				 
			
 
				 	sb = dir->i_sb;
			
 
				+	ngroups = ext4_get_groups_count(sb);
			
 
				 	trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
			
 
				 		   dir->i_ino, mode);
			
 
				 	inode = new_inode(sb);
			
 
				 	if (!inode)
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 	ei = EXT4_I(inode);
			
 
				-
			
 
				 	sbi = EXT4_SB(sb);
			
 
				-	es = sbi->s_es;
			
 
				 
			
 
				 	if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
			
 
				 		ret2 = find_group_flex(sb, dir, &group);
			
@@ -856,7 +851,7 @@ got_group:
 
				 	if (ret2 == -1)
			
 
				 		goto out;
			
 
				 
			
 
				-	for (i = 0; i < sbi->s_groups_count; i++) {
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		err = -EIO;
			
 
				 
			
 
				 		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
			
@@ -917,7 +912,7 @@ repeat_in_this_group:
 
				 		 * group descriptor metadata has not yet been updated.
			
 
				 		 * So we just go onto the next blockgroup.
			
 
				 		 */
			
 
				-		if (++group == sbi->s_groups_count)
			
 
				+		if (++group == ngroups)
			
 
				 			group = 0;
			
 
				 	}
			
 
				 	err = -ENOSPC;
			
@@ -938,7 +933,7 @@ got:
 
				 		}
			
 
				 
			
 
				 		free = 0;
			
 
				-		spin_lock(sb_bgl_lock(sbi, group));
			
 
				+		ext4_lock_group(sb, group);
			
 
				 		/* recheck and clear flag under lock if we still need to */
			
 
				 		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
			
 
				 			free = ext4_free_blocks_after_init(sb, group, gdp);
			
@@ -947,7 +942,7 @@ got:
 
				 			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
			
 
				 								gdp);
			
 
				 		}
			
 
				-		spin_unlock(sb_bgl_lock(sbi, group));
			
 
				+		ext4_unlock_group(sb, group);
			
 
				 
			
 
				 		/* Don't need to dirty bitmap block if we didn't change it */
			
 
				 		if (free) {
			
@@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 
				 {
			
 
				 	unsigned long desc_count;
			
 
				 	struct ext4_group_desc *gdp;
			
 
				-	ext4_group_t i;
			
 
				+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
			
 
				 #ifdef EXT4FS_DEBUG
			
 
				 	struct ext4_super_block *es;
			
 
				 	unsigned long bitmap_count, x;
			
@@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 
				 	desc_count = 0;
			
 
				 	bitmap_count = 0;
			
 
				 	gdp = NULL;
			
 
				-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (!gdp)
			
 
				 			continue;
			
@@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 
				 	return desc_count;
			
 
				 #else
			
 
				 	desc_count = 0;
			
 
				-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (!gdp)
			
 
				 			continue;
			
@@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 
				 unsigned long ext4_count_dirs(struct super_block * sb)
			
 
				 {
			
 
				 	unsigned long count = 0;
			
 
				-	ext4_group_t i;
			
 
				+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
			
 
				 
			
 
				-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (!gdp)
			
 
				 			continue;
			
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
 
				 	ext4_set_bit(bit, addr);
			
 
				 }
			
 
				 
			
 
				-static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
			
 
				-{
			
 
				-	addr = mb_correct_addr_and_bit(&bit, addr);
			
 
				-	ext4_set_bit_atomic(lock, bit, addr);
			
 
				-}
			
 
				-
			
 
				 static inline void mb_clear_bit(int bit, void *addr)
			
 
				 {
			
 
				 	addr = mb_correct_addr_and_bit(&bit, addr);
			
 
				 	ext4_clear_bit(bit, addr);
			
 
				 }
			
 
				 
			
 
				-static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
			
 
				-{
			
 
				-	addr = mb_correct_addr_and_bit(&bit, addr);
			
 
				-	ext4_clear_bit_atomic(lock, bit, addr);
			
 
				-}
			
 
				-
			
 
				 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
			
 
				 {
			
 
				 	int fix = 0, ret, tmpmax;
			
@@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
 
				 
			
 
				 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
			
 
				 		return;
			
 
				-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
			
 
				+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
			
 
				 	for (i = 0; i < count; i++) {
			
 
				 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
			
 
				 			ext4_fsblk_t blocknr;
			
@@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
 
				 
			
 
				 	if (unlikely(e4b->bd_info->bb_bitmap == NULL))
			
 
				 		return;
			
 
				-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
			
 
				+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
			
 
				 	for (i = 0; i < count; i++) {
			
 
				 		BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
			
 
				 		mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
			
@@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
 
				 
			
 
				 static int ext4_mb_init_cache(struct page *page, char *incore)
			
 
				 {
			
 
				+	ext4_group_t ngroups;
			
 
				 	int blocksize;
			
 
				 	int blocks_per_page;
			
 
				 	int groups_per_page;
			
@@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 
			
 
				 	inode = page->mapping->host;
			
 
				 	sb = inode->i_sb;
			
 
				+	ngroups = ext4_get_groups_count(sb);
			
 
				 	blocksize = 1 << inode->i_blkbits;
			
 
				 	blocks_per_page = PAGE_CACHE_SIZE / blocksize;
			
 
				 
			
@@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 	for (i = 0; i < groups_per_page; i++) {
			
 
				 		struct ext4_group_desc *desc;
			
 
				 
			
 
				-		if (first_group + i >= EXT4_SB(sb)->s_groups_count)
			
 
				+		if (first_group + i >= ngroups)
			
 
				 			break;
			
 
				 
			
 
				 		err = -EIO;
			
@@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 			unlock_buffer(bh[i]);
			
 
				 			continue;
			
 
				 		}
			
 
				-		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
			
 
				+		ext4_lock_group(sb, first_group + i);
			
 
				 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
			
 
				 			ext4_init_block_bitmap(sb, bh[i],
			
 
				 						first_group + i, desc);
			
 
				 			set_bitmap_uptodate(bh[i]);
			
 
				 			set_buffer_uptodate(bh[i]);
			
 
				-			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
			
 
				+			ext4_unlock_group(sb, first_group + i);
			
 
				 			unlock_buffer(bh[i]);
			
 
				 			continue;
			
 
				 		}
			
 
				-		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
			
 
				+		ext4_unlock_group(sb, first_group + i);
			
 
				 		if (buffer_uptodate(bh[i])) {
			
 
				 			/*
			
 
				 			 * if not uninit if bh is uptodate,
			
@@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
				 		struct ext4_group_info *grinfo;
			
 
				 
			
 
				 		group = (first_block + i) >> 1;
			
 
				-		if (group >= EXT4_SB(sb)->s_groups_count)
			
 
				+		if (group >= ngroups)
			
 
				 			break;
			
 
				 
			
 
				 		/*
			
@@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
			
 
				+static void mb_clear_bits(void *bm, int cur, int len)
			
 
				 {
			
 
				 	__u32 *addr;
			
 
				 
			
@@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
 
				 			cur += 32;
			
 
				 			continue;
			
 
				 		}
			
 
				-		if (lock)
			
 
				-			mb_clear_bit_atomic(lock, cur, bm);
			
 
				-		else
			
 
				-			mb_clear_bit(cur, bm);
			
 
				+		mb_clear_bit(cur, bm);
			
 
				 		cur++;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
			
 
				+static void mb_set_bits(void *bm, int cur, int len)
			
 
				 {
			
 
				 	__u32 *addr;
			
 
				 
			
@@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
 
				 			cur += 32;
			
 
				 			continue;
			
 
				 		}
			
 
				-		if (lock)
			
 
				-			mb_set_bit_atomic(lock, cur, bm);
			
 
				-		else
			
 
				-			mb_set_bit(cur, bm);
			
 
				+		mb_set_bit(cur, bm);
			
 
				 		cur++;
			
 
				 	}
			
 
				 }
			
@@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
 
				 	struct super_block *sb = e4b->bd_sb;
			
 
				 
			
 
				 	BUG_ON(first + count > (sb->s_blocksize << 3));
			
 
				-	BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
			
 
				+	assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
			
 
				 	mb_check_buddy(e4b);
			
 
				 	mb_free_blocks_double(inode, e4b, first, count);
			
 
				 
			
@@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
 
				 	int ord;
			
 
				 	void *buddy;
			
 
				 
			
 
				-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
			
 
				+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
			
 
				 	BUG_ON(ex == NULL);
			
 
				 
			
 
				 	buddy = mb_find_buddy(e4b, order, &max);
			
@@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 
				 
			
 
				 	BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
			
 
				 	BUG_ON(e4b->bd_group != ex->fe_group);
			
 
				-	BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
			
 
				+	assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
			
 
				 	mb_check_buddy(e4b);
			
 
				 	mb_mark_used_double(e4b, start, len);
			
 
				 
			
@@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 
				 		e4b->bd_info->bb_counters[ord]++;
			
 
				 	}
			
 
				 
			
 
				-	mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
			
 
				-			EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
			
 
				+	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
			
 
				 	mb_check_buddy(e4b);
			
 
				 
			
 
				 	return ret;
			
@@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 
				 	unsigned free, fragments;
			
 
				 	unsigned i, bits;
			
 
				 	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
			
 
				-	struct ext4_group_desc *desc;
			
 
				 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
			
 
				 
			
 
				 	BUG_ON(cr < 0 || cr >= 4);
			
@@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 
				 	switch (cr) {
			
 
				 	case 0:
			
 
				 		BUG_ON(ac->ac_2order == 0);
			
 
				-		/* If this group is uninitialized, skip it initially */
			
 
				-		desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
			
 
				-		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
			
 
				-			return 0;
			
 
				 
			
 
				 		/* Avoid using the first bg of a flexgroup for data files */
			
 
				 		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
			
@@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
 
				 	int block, pnum;
			
 
				 	int blocks_per_page;
			
 
				 	int groups_per_page;
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	ext4_group_t first_group;
			
 
				 	struct ext4_group_info *grp;
			
 
				 
			
@@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
 
				 	/* read all groups the page covers into the cache */
			
 
				 	for (i = 0; i < groups_per_page; i++) {
			
 
				 
			
 
				-		if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
			
 
				+		if ((first_group + i) >= ngroups)
			
 
				 			break;
			
 
				 		grp = ext4_get_group_info(sb, first_group + i);
			
 
				 		/* take all groups write allocation
			
@@ -1945,8 +1924,7 @@ err:
 
				 static noinline_for_stack int
			
 
				 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
			
 
				 {
			
 
				-	ext4_group_t group;
			
 
				-	ext4_group_t i;
			
 
				+	ext4_group_t ngroups, group, i;
			
 
				 	int cr;
			
 
				 	int err = 0;
			
 
				 	int bsbits;
			
@@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 
				 
			
 
				 	sb = ac->ac_sb;
			
 
				 	sbi = EXT4_SB(sb);
			
 
				+	ngroups = ext4_get_groups_count(sb);
			
 
				 	BUG_ON(ac->ac_status == AC_STATUS_FOUND);
			
 
				 
			
 
				 	/* first, try the goal */
			
@@ -2017,11 +1996,11 @@ repeat:
 
				 		 */
			
 
				 		group = ac->ac_g_ex.fe_group;
			
 
				 
			
 
				-		for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
			
 
				+		for (i = 0; i < ngroups; group++, i++) {
			
 
				 			struct ext4_group_info *grp;
			
 
				 			struct ext4_group_desc *desc;
			
 
				 
			
 
				-			if (group == EXT4_SB(sb)->s_groups_count)
			
 
				+			if (group == ngroups)
			
 
				 				group = 0;
			
 
				 
			
 
				 			/* quick check to skip empty groups */
			
@@ -2064,9 +2043,7 @@ repeat:
 
				 
			
 
				 			ac->ac_groups_scanned++;
			
 
				 			desc = ext4_get_group_desc(sb, group, NULL);
			
 
				-			if (cr == 0 || (desc->bg_flags &
			
 
				-					cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
			
 
				-					ac->ac_2order != 0))
			
 
				+			if (cr == 0)
			
 
				 				ext4_mb_simple_scan_group(ac, &e4b);
			
 
				 			else if (cr == 1 &&
			
 
				 					ac->ac_g_ex.fe_len == sbi->s_stripe)
			
@@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
 
				 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
			
 
				 {
			
 
				 	struct super_block *sb = seq->private;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	ext4_group_t group;
			
 
				 
			
 
				-	if (*pos < 0 || *pos >= sbi->s_groups_count)
			
 
				+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
			
 
				 		return NULL;
			
 
				-
			
 
				 	group = *pos + 1;
			
 
				 	return (void *) ((unsigned long) group);
			
 
				 }
			
@@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 
				 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
			
 
				 {
			
 
				 	struct super_block *sb = seq->private;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 	ext4_group_t group;
			
 
				 
			
 
				 	++*pos;
			
 
				-	if (*pos < 0 || *pos >= sbi->s_groups_count)
			
 
				+	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
			
 
				 		return NULL;
			
 
				 	group = *pos + 1;
			
 
				 	return (void *) ((unsigned long) group);
			
@@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
 
				 
			
 
				 	if (sbi->s_proc != NULL) {
			
 
				 		remove_proc_entry("mb_groups", sbi->s_proc);
			
 
				-		remove_proc_entry("mb_history", sbi->s_proc);
			
 
				+		if (sbi->s_mb_history_max)
			
 
				+			remove_proc_entry("mb_history", sbi->s_proc);
			
 
				 	}
			
 
				 	kfree(sbi->s_mb_history);
			
 
				 }
			
@@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
 
				 	int i;
			
 
				 
			
 
				 	if (sbi->s_proc != NULL) {
			
 
				-		proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
			
 
				-				 &ext4_mb_seq_history_fops, sb);
			
 
				+		if (sbi->s_mb_history_max)
			
 
				+			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
			
 
				+					 &ext4_mb_seq_history_fops, sb);
			
 
				 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
			
 
				 				 &ext4_mb_seq_groups_fops, sb);
			
 
				 	}
			
 
				 
			
 
				-	sbi->s_mb_history_max = 1000;
			
 
				 	sbi->s_mb_history_cur = 0;
			
 
				 	spin_lock_init(&sbi->s_mb_history_lock);
			
 
				 	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
			
 
				-	sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
			
 
				+	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
			
 
				 	/* if we can't allocate history, then we simple won't use it */
			
 
				 }
			
 
				 
			
@@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
 
				 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
			
 
				 	struct ext4_mb_history h;
			
 
				 
			
 
				-	if (unlikely(sbi->s_mb_history == NULL))
			
 
				+	if (sbi->s_mb_history == NULL)
			
 
				 		return;
			
 
				 
			
 
				 	if (!(ac->ac_op & sbi->s_mb_history_filter))
			
@@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
 
				 
			
 
				 static int ext4_mb_init_backend(struct super_block *sb)
			
 
				 {
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	ext4_group_t i;
			
 
				 	int metalen;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
@@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 
				 	struct ext4_group_desc *desc;
			
 
				 
			
 
				 	/* This is the number of blocks used by GDT */
			
 
				-	num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
			
 
				+	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
			
 
				 				1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
			
 
				 
			
 
				 	/*
			
@@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 
				 	for (i = 0; i < num_meta_group_infos; i++) {
			
 
				 		if ((i + 1) == num_meta_group_infos)
			
 
				 			metalen = sizeof(*meta_group_info) *
			
 
				-				(sbi->s_groups_count -
			
 
				+				(ngroups -
			
 
				 					(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
			
 
				 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
			
 
				 		if (meta_group_info == NULL) {
			
@@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 
				 		sbi->s_group_info[i] = meta_group_info;
			
 
				 	}
			
 
				 
			
 
				-	for (i = 0; i < sbi->s_groups_count; i++) {
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		desc = ext4_get_group_desc(sb, i, NULL);
			
 
				 		if (desc == NULL) {
			
 
				 			printk(KERN_ERR
			
@@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/* need to called with ext4 group lock (ext4_lock_group) */
			
 
				+/* need to called with the ext4 group lock held */
			
 
				 static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
			
 
				 {
			
 
				 	struct ext4_prealloc_space *pa;
			
@@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
 
				 
			
 
				 int ext4_mb_release(struct super_block *sb)
			
 
				 {
			
 
				+	ext4_group_t ngroups = ext4_get_groups_count(sb);
			
 
				 	ext4_group_t i;
			
 
				 	int num_meta_group_infos;
			
 
				 	struct ext4_group_info *grinfo;
			
 
				 	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				 
			
 
				 	if (sbi->s_group_info) {
			
 
				-		for (i = 0; i < sbi->s_groups_count; i++) {
			
 
				+		for (i = 0; i < ngroups; i++) {
			
 
				 			grinfo = ext4_get_group_info(sb, i);
			
 
				 #ifdef DOUBLE_CHECK
			
 
				 			kfree(grinfo->bb_bitmap);
			
@@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
 
				 			ext4_unlock_group(sb, i);
			
 
				 			kfree(grinfo);
			
 
				 		}
			
 
				-		num_meta_group_infos = (sbi->s_groups_count +
			
 
				+		num_meta_group_infos = (ngroups +
			
 
				 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
			
 
				 			EXT4_DESC_PER_BLOCK_BITS(sb);
			
 
				 		for (i = 0; i < num_meta_group_infos; i++)
			
@@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
				 		+ le32_to_cpu(es->s_first_data_block);
			
 
				 
			
 
				 	len = ac->ac_b_ex.fe_len;
			
 
				-	if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
			
 
				-	    in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
			
 
				-	    in_range(block, ext4_inode_table(sb, gdp),
			
 
				-		     EXT4_SB(sb)->s_itb_per_group) ||
			
 
				-	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
			
 
				-		     EXT4_SB(sb)->s_itb_per_group)) {
			
 
				+	if (!ext4_data_block_valid(sbi, block, len)) {
			
 
				 		ext4_error(sb, __func__,
			
 
				-			   "Allocating block %llu in system zone of %d group\n",
			
 
				-			   block, ac->ac_b_ex.fe_group);
			
 
				+			   "Allocating blocks %llu-%llu which overlap "
			
 
				+			   "fs metadata\n", block, block+len);
			
 
				 		/* File system mounted not to panic on error
			
 
				 		 * Fix the bitmap and repeat the block allocation
			
 
				 		 * We leak some of the blocks here.
			
 
				 		 */
			
 
				-		mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
			
 
				-				bitmap_bh->b_data, ac->ac_b_ex.fe_start,
			
 
				-				ac->ac_b_ex.fe_len);
			
 
				+		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
			
 
				+		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
			
 
				+			    ac->ac_b_ex.fe_len);
			
 
				+		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
			
 
				 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
			
 
				 		if (!err)
			
 
				 			err = -EAGAIN;
			
 
				 		goto out_err;
			
 
				 	}
			
 
				+
			
 
				+	ext4_lock_group(sb, ac->ac_b_ex.fe_group);
			
 
				 #ifdef AGGRESSIVE_CHECK
			
 
				 	{
			
 
				 		int i;
			
@@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
				 		}
			
 
				 	}
			
 
				 #endif
			
 
				-	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
			
 
				-	mb_set_bits(NULL, bitmap_bh->b_data,
			
 
				-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
			
 
				+	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
			
 
				 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
			
 
				 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
			
 
				 		ext4_free_blks_set(sb, gdp,
			
@@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 
				 	len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
			
 
				 	ext4_free_blks_set(sb, gdp, len);
			
 
				 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
			
 
				-	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
			
 
				+
			
 
				+	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
			
 
				 	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
			
 
				 	/*
			
 
				 	 * Now reduce the dirty block count also. Should not go negative
			
@@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
				  * the function goes through all block freed in the group
			
 
				  * but not yet committed and marks them used in in-core bitmap.
			
 
				  * buddy must be generated from this bitmap
			
 
				- * Need to be called with ext4 group lock (ext4_lock_group)
			
 
				+ * Need to be called with the ext4 group lock held
			
 
				  */
			
 
				 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
			
 
				 						ext4_group_t group)
			
@@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
				 
			
 
				 	while (n) {
			
 
				 		entry = rb_entry(n, struct ext4_free_data, node);
			
 
				-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
			
 
				-				bitmap, entry->start_blk,
			
 
				-				entry->count);
			
 
				+		mb_set_bits(bitmap, entry->start_blk, entry->count);
			
 
				 		n = rb_next(n);
			
 
				 	}
			
 
				 	return;
			
@@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
				 /*
			
 
				  * the function goes through all preallocation in this group and marks them
			
 
				  * used in in-core bitmap. buddy must be generated from this bitmap
			
 
				- * Need to be called with ext4 group lock (ext4_lock_group)
			
 
				+ * Need to be called with ext4 group lock held
			
 
				  */
			
 
				 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
			
 
				 					ext4_group_t group)
			
@@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 
				 		if (unlikely(len == 0))
			
 
				 			continue;
			
 
				 		BUG_ON(groupnr != group);
			
 
				-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
			
 
				-						bitmap, start, len);
			
 
				+		mb_set_bits(bitmap, start, len);
			
 
				 		preallocated += len;
			
 
				 		count++;
			
 
				 	}
			
@@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
 
				 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
			
 
				 {
			
 
				 	struct super_block *sb = ac->ac_sb;
			
 
				-	ext4_group_t i;
			
 
				+	ext4_group_t ngroups, i;
			
 
				 
			
 
				 	printk(KERN_ERR "EXT4-fs: Can't allocate:"
			
 
				 			" Allocation context details:\n");
			
@@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 
				 	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
			
 
				 		ac->ac_found);
			
 
				 	printk(KERN_ERR "EXT4-fs: groups: \n");
			
 
				-	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
			
 
				+	ngroups = ext4_get_groups_count(sb);
			
 
				+	for (i = 0; i < ngroups; i++) {
			
 
				 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
			
 
				 		struct ext4_prealloc_space *pa;
			
 
				 		ext4_grpblk_t start;
			
@@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 
				 
			
 
				 static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
			
 
				 {
			
 
				-	ext4_group_t i;
			
 
				+	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
			
 
				 	int ret;
			
 
				 	int freed = 0;
			
 
				 
			
 
				 	trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
			
 
				 		   sb->s_id, needed);
			
 
				-	for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
			
 
				+	for (i = 0; i < ngroups && needed > 0; i++) {
			
 
				 		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
			
 
				 		freed += ret;
			
 
				 		needed -= ret;
			
@@ -4859,29 +4831,25 @@ do_more:
 
				 		new_entry->group  = block_group;
			
 
				 		new_entry->count = count;
			
 
				 		new_entry->t_tid = handle->h_transaction->t_tid;
			
 
				+
			
 
				 		ext4_lock_group(sb, block_group);
			
 
				-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
			
 
				-				bit, count);
			
 
				+		mb_clear_bits(bitmap_bh->b_data, bit, count);
			
 
				 		ext4_mb_free_metadata(handle, &e4b, new_entry);
			
 
				-		ext4_unlock_group(sb, block_group);
			
 
				 	} else {
			
 
				-		ext4_lock_group(sb, block_group);
			
 
				 		/* need to update group_info->bb_free and bitmap
			
 
				 		 * with group lock held. generate_buddy look at
			
 
				 		 * them with group lock_held
			
 
				 		 */
			
 
				-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
			
 
				-				bit, count);
			
 
				+		ext4_lock_group(sb, block_group);
			
 
				+		mb_clear_bits(bitmap_bh->b_data, bit, count);
			
 
				 		mb_free_blocks(inode, &e4b, bit, count);
			
 
				 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
			
 
				-		ext4_unlock_group(sb, block_group);
			
 
				 	}
			
 
				 
			
 
				-	spin_lock(sb_bgl_lock(sbi, block_group));
			
 
				 	ret = ext4_free_blks_count(sb, gdp) + count;
			
 
				 	ext4_free_blks_set(sb, gdp, ret);
			
 
				 	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
			
 
				-	spin_unlock(sb_bgl_lock(sbi, block_group));
			
 
				+	ext4_unlock_group(sb, block_group);
			
 
				 	percpu_counter_add(&sbi->s_freeblocks_counter, count);
			
 
				 
			
 
				 	if (sbi->s_log_groups_per_flex) {
			
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,7 +23,6 @@
 
				 #include <linux/mutex.h>
			
 
				 #include "ext4_jbd2.h"
			
 
				 #include "ext4.h"
			
 
				-#include "group.h"
			
 
				 
			
 
				 /*
			
 
				  * with AGGRESSIVE_CHECK allocator runs consistency checks over
			
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -37,7 +37,6 @@
 
				 #include "ext4.h"
			
 
				 #include "ext4_jbd2.h"
			
 
				 
			
 
				-#include "namei.h"
			
 
				 #include "xattr.h"
			
 
				 #include "acl.h"
			
 
				 
			
@@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 
				 			ext4fs_dirhash(de->name, de->name_len, &h);
			
 
				 			map_tail--;
			
 
				 			map_tail->hash = h.hash;
			
 
				-			map_tail->offs = (u16) ((char *) de - base);
			
 
				+			map_tail->offs = ((char *) de - base)>>2;
			
 
				 			map_tail->size = le16_to_cpu(de->rec_len);
			
 
				 			count++;
			
 
				 			cond_resched();
			
@@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
 
				 	unsigned rec_len = 0;
			
 
				 
			
 
				 	while (count--) {
			
 
				-		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
			
 
				+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
			
 
				+						(from + (map->offs<<2));
			
 
				 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
			
 
				 		memcpy (to, de, rec_len);
			
 
				 		((struct ext4_dir_entry_2 *) to)->rec_len =
			
@@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 
				 	if (!ext4_handle_valid(handle))
			
 
				 		return 0;
			
 
				 
			
 
				-	lock_super(sb);
			
 
				+	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
			
 
				 	if (!list_empty(&EXT4_I(inode)->i_orphan))
			
 
				 		goto out_unlock;
			
 
				 
			
@@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 
				 
			
 
				 	/* @@@ FIXME: Observation from aviro:
			
 
				 	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
			
 
				-	 * here (on lock_super()), so race with ext4_link() which might bump
			
 
				+	 * here (on s_orphan_lock), so race with ext4_link() which might bump
			
 
				 	 * ->i_nlink. For, say it, character device. Not a regular file,
			
 
				 	 * not a directory, not a symlink and ->i_nlink > 0.
			
 
				+	 *
			
 
				+	 * tytso, 4/25/2009: I'm not sure how that could happen;
			
 
				+	 * shouldn't the fs core protect us from these sort of
			
 
				+	 * unlink()/link() races?
			
 
				 	 */
			
 
				 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
			
 
				 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
			
@@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 
				 	jbd_debug(4, "orphan inode %lu will point to %d\n",
			
 
				 			inode->i_ino, NEXT_ORPHAN(inode));
			
 
				 out_unlock:
			
 
				-	unlock_super(sb);
			
 
				+	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
			
 
				 	ext4_std_error(inode->i_sb, err);
			
 
				 	return err;
			
 
				 }
			
@@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 
				 	if (!ext4_handle_valid(handle))
			
 
				 		return 0;
			
 
				 
			
 
				-	lock_super(inode->i_sb);
			
 
				-	if (list_empty(&ei->i_orphan)) {
			
 
				-		unlock_super(inode->i_sb);
			
 
				-		return 0;
			
 
				-	}
			
 
				+	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
			
 
				+	if (list_empty(&ei->i_orphan))
			
 
				+		goto out;
			
 
				 
			
 
				 	ino_next = NEXT_ORPHAN(inode);
			
 
				 	prev = ei->i_orphan.prev;
			
@@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 
				 out_err:
			
 
				 	ext4_std_error(inode->i_sb, err);
			
 
				 out:
			
 
				-	unlock_super(inode->i_sb);
			
 
				+	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
			
 
				 	return err;
			
 
				 
			
 
				 out_brelse:
			
@@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 
				 	.removexattr	= generic_removexattr,
			
 
				 #endif
			
 
				 	.permission	= ext4_permission,
			
 
				+	.fiemap         = ext4_fiemap,
			
 
				 };
			
 
				 
			
 
				 const struct inode_operations ext4_special_inode_operations = {
			
--- a/fs/ext4/namei.h
+++ b/fs/ext4/namei.h
@@ -1,8 +0,0 @@
 
				-/*  linux/fs/ext4/namei.h
			
 
				- *
			
 
				- * Copyright (C) 2005 Simtec Electronics
			
 
				- *	Ben Dooks <ben@simtec.co.uk>
			
 
				- *
			
 
				-*/
			
 
				-
			
 
				-extern struct dentry *ext4_get_parent(struct dentry *child);
			
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -15,7 +15,6 @@
 
				 #include <linux/slab.h>
			
 
				 
			
 
				 #include "ext4_jbd2.h"
			
 
				-#include "group.h"
			
 
				 
			
 
				 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
			
 
				 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
			
@@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 
				 	if (IS_ERR(handle))
			
 
				 		return PTR_ERR(handle);
			
 
				 
			
 
				-	lock_super(sb);
			
 
				+	mutex_lock(&sbi->s_resize_lock);
			
 
				 	if (input->group != sbi->s_groups_count) {
			
 
				 		err = -EBUSY;
			
 
				 		goto exit_journal;
			
@@ -302,7 +301,7 @@ exit_bh:
 
				 	brelse(bh);
			
 
				 
			
 
				 exit_journal:
			
 
				-	unlock_super(sb);
			
 
				+	mutex_unlock(&sbi->s_resize_lock);
			
 
				 	if ((err2 = ext4_journal_stop(handle)) && !err)
			
 
				 		err = err2;
			
 
				 
			
@@ -643,11 +642,12 @@ exit_free:
 
				  * important part is that the new block and inode counts are in the backup
			
 
				  * superblocks, and the location of the new group metadata in the GDT backups.
			
 
				  *
			
 
				- * We do not need lock_super() for this, because these blocks are not
			
 
				- * otherwise touched by the filesystem code when it is mounted.  We don't
			
 
				- * need to worry about last changing from sbi->s_groups_count, because the
			
 
				- * worst that can happen is that we do not copy the full number of backups
			
 
				- * at this time.  The resize which changed s_groups_count will backup again.
			
 
				+ * We do not need take the s_resize_lock for this, because these
			
 
				+ * blocks are not otherwise touched by the filesystem code when it is
			
 
				+ * mounted.  We don't need to worry about last changing from
			
 
				+ * sbi->s_groups_count, because the worst that can happen is that we
			
 
				+ * do not copy the full number of backups at this time.  The resize
			
 
				+ * which changed s_groups_count will backup again.
			
 
				  */
			
 
				 static void update_backups(struct super_block *sb,
			
 
				 			   int blk_off, char *data, int size)
			
@@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
				 		goto exit_put;
			
 
				 	}
			
 
				 
			
 
				-	lock_super(sb);
			
 
				+	mutex_lock(&sbi->s_resize_lock);
			
 
				 	if (input->group != sbi->s_groups_count) {
			
 
				 		ext4_warning(sb, __func__,
			
 
				 			     "multiple resizers run on filesystem!");
			
@@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
				         /*
			
 
				          * OK, now we've set up the new group.  Time to make it active.
			
 
				          *
			
 
				-         * Current kernels don't lock all allocations via lock_super(),
			
 
				+         * We do not lock all allocations via s_resize_lock
			
 
				          * so we have to be safe wrt. concurrent accesses the group
			
 
				          * data.  So we need to be careful to set all of the relevant
			
 
				          * group descriptor data etc. *before* we enable the group.
			
@@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
				 	 *
			
 
				 	 * The precise rules we use are:
			
 
				 	 *
			
 
				-	 * * Writers of s_groups_count *must* hold lock_super
			
 
				+	 * * Writers of s_groups_count *must* hold s_resize_lock
			
 
				 	 * AND
			
 
				 	 * * Writers must perform a smp_wmb() after updating all dependent
			
 
				 	 *   data and before modifying the groups count
			
 
				 	 *
			
 
				-	 * * Readers must hold lock_super() over the access
			
 
				+	 * * Readers must hold s_resize_lock over the access
			
 
				 	 * OR
			
 
				 	 * * Readers must perform an smp_rmb() after reading the groups count
			
 
				 	 *   and before reading any dependent data.
			
@@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
				 	sb->s_dirt = 1;
			
 
				 
			
 
				 exit_journal:
			
 
				-	unlock_super(sb);
			
 
				+	mutex_unlock(&sbi->s_resize_lock);
			
 
				 	if ((err2 = ext4_journal_stop(handle)) && !err)
			
 
				 		err = err2;
			
 
				 	if (!err) {
			
@@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
				 
			
 
				 	/* We don't need to worry about locking wrt other resizers just
			
 
				 	 * yet: we're going to revalidate es->s_blocks_count after
			
 
				-	 * taking lock_super() below. */
			
 
				+	 * taking the s_resize_lock below. */
			
 
				 	o_blocks_count = ext4_blocks_count(es);
			
 
				 	o_groups_count = EXT4_SB(sb)->s_groups_count;
			
 
				 
			
@@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
				 		goto exit_put;
			
 
				 	}
			
 
				 
			
 
				-	lock_super(sb);
			
 
				+	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
			
 
				 	if (o_blocks_count != ext4_blocks_count(es)) {
			
 
				 		ext4_warning(sb, __func__,
			
 
				 			     "multiple resizers run on filesystem!");
			
 
				-		unlock_super(sb);
			
 
				+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
			
 
				 		ext4_journal_stop(handle);
			
 
				 		err = -EBUSY;
			
 
				 		goto exit_put;
			
@@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
				 						 EXT4_SB(sb)->s_sbh))) {
			
 
				 		ext4_warning(sb, __func__,
			
 
				 			     "error %d on journal write access", err);
			
 
				-		unlock_super(sb);
			
 
				+		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
			
 
				 		ext4_journal_stop(handle);
			
 
				 		goto exit_put;
			
 
				 	}
			
 
				 	ext4_blocks_count_set(es, o_blocks_count + add);
			
 
				 	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
			
 
				 	sb->s_dirt = 1;
			
 
				-	unlock_super(sb);
			
 
				+	mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
			
 
				 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
			
 
				 		   o_blocks_count + add);
			
 
				 	/* We add the blocks to the bitmap and set the group need init bit */
			
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 
				 	switch (cmd) {
			
 
				 	case FIBMAP:
			
 
				 		return ioctl_fibmap(filp, p);
			
 
				-	case FS_IOC_FIEMAP:
			
 
				-		return ioctl_fiemap(filp, arg);
			
 
				-	case FIGETBSZ:
			
 
				-		return put_user(inode->i_sb->s_blocksize, p);
			
 
				 	case FIONREAD:
			
 
				 		return put_user(i_size_read(inode) - filp->f_pos, p);
			
 
				 	}
			
@@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 
				 		error = ioctl_fsthaw(filp);
			
 
				 		break;
			
 
				 
			
 
				+	case FS_IOC_FIEMAP:
			
 
				+		return ioctl_fiemap(filp, arg);
			
 
				+
			
 
				+	case FIGETBSZ:
			
 
				+	{
			
 
				+		struct inode *inode = filp->f_path.dentry->d_inode;
			
 
				+		int __user *p = (int __user *)arg;
			
 
				+		return put_user(inode->i_sb->s_blocksize, p);
			
 
				+	}
			
 
				+
			
 
				 	default:
			
 
				 		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
			
 
				 			error = file_ioctl(filp, cmd, arg);
			
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 
				  * Journal abort has very specific semantics, which we describe
			
 
				  * for journal abort.
			
 
				  *
			
 
				- * Two internal function, which provide abort to te jbd layer
			
 
				+ * Two internal functions, which provide abort to the jbd layer
			
 
				  * itself are here.
			
 
				  */
			
 
				 
			
@@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
 
				  * int jbd2_journal_errno () - returns the journal's error state.
			
 
				  * @journal: journal to examine.
			
 
				  *
			
 
				- * This is the errno numbet set with jbd2_journal_abort(), the last
			
 
				+ * This is the errno number set with jbd2_journal_abort(), the last
			
 
				  * time the journal was mounted - if the journal was stopped
			
 
				  * without calling abort this will be 0.
			
 
				  *
			
@@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
 
				  * int jbd2_journal_clear_err () - clears the journal's error state
			
 
				  * @journal: journal to act on.
			
 
				  *
			
 
				- * An error must be cleared or Acked to take a FS out of readonly
			
 
				+ * An error must be cleared or acked to take a FS out of readonly
			
 
				  * mode.
			
 
				  */
			
 
				 int jbd2_journal_clear_err(journal_t *journal)
			
@@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
 
				  * void jbd2_journal_ack_err() - Ack journal err.
			
 
				  * @journal: journal to act on.
			
 
				  *
			
 
				- * An error must be cleared or Acked to take a FS out of readonly
			
 
				+ * An error must be cleared or acked to take a FS out of readonly
			
 
				  * mode.
			
 
				  */
			
 
				 void jbd2_journal_ack_err(journal_t *journal)
			
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 
				 	struct buffer_head map_bh;
			
 
				 	unsigned long first_logical_block = 0;
			
 
				 
			
 
				-	clear_buffer_mapped(&map_bh);
			
 
				+	map_bh.b_state = 0;
			
 
				+	map_bh.b_size = 0;
			
 
				 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
			
 
				 		struct page *page = list_entry(pages->prev, struct page, lru);
			
 
				 
			
@@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 
				 	struct buffer_head map_bh;
			
 
				 	unsigned long first_logical_block = 0;
			
 
				 
			
 
				-	clear_buffer_mapped(&map_bh);
			
 
				+	map_bh.b_state = 0;
			
 
				+	map_bh.b_size = 0;
			
 
				 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
			
 
				 			&map_bh, &first_logical_block, get_block);
			
 
				 	if (bio)