15 years ago · 03e62303cf
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -80,3 +80,10 @@ user_xattr	(*)	Enables Extended User Attributes.
 
				 nouser_xattr		Disables Extended User Attributes.
			
 
				 acl			Enables POSIX Access Control Lists support.
			
 
				 noacl		(*)	Disables POSIX Access Control Lists support.
			
 
				+resv_level=2	(*)	Set how agressive allocation reservations will be.
			
 
				+			Valid values are between 0 (reservations off) to 8
			
 
				+			(maximum space for reservations).
			
 
				+dir_resv_level=	(*)	By default, directory reservations will scale with file
			
 
				+			reservations - users should rarely need to change this
			
 
				+			value. If allocation reservations are turned off, this
			
 
				+			option will have no effect.
			
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
 
				 	mmap.o 			\
			
 
				 	namei.o 		\
			
 
				 	refcounttree.o		\
			
 
				+	reservations.o		\
			
 
				 	resize.o		\
			
 
				 	slot_map.o 		\
			
 
				 	suballoc.o 		\
			
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
 
				 			struct ocfs2_cached_dealloc_ctxt *dealloc);
			
 
				 int ocfs2_remove_btree_range(struct inode *inode,
			
 
				 			     struct ocfs2_extent_tree *et,
			
 
				-			     u32 cpos, u32 phys_cpos, u32 len,
			
 
				-			     struct ocfs2_cached_dealloc_ctxt *dealloc);
			
 
				+			     u32 cpos, u32 phys_cpos, u32 len, int flags,
			
 
				+			     struct ocfs2_cached_dealloc_ctxt *dealloc,
			
 
				+			     u64 refcount_loc);
			
 
				 
			
 
				 int ocfs2_num_free_extents(struct ocfs2_super *osb,
			
 
				 			   struct ocfs2_extent_tree *et);
			
@@ -209,7 +210,7 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
 
				 int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
			
 
				 				u64 blkno, unsigned int bit);
			
 
				 int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
			
 
				-			      int type, int slot, u64 blkno,
			
 
				+			      int type, int slot, u64 suballoc, u64 blkno,
			
 
				 			      unsigned int bit);
			
 
				 static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
			
 
				 {
			
@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
 
				 			   struct ocfs2_truncate_context **tc);
			
 
				 int ocfs2_commit_truncate(struct ocfs2_super *osb,
			
 
				 			  struct inode *inode,
			
 
				-			  struct buffer_head *fe_bh,
			
 
				-			  struct ocfs2_truncate_context *tc);
			
 
				+			  struct buffer_head *di_bh);
			
 
				 int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
			
 
				 			  unsigned int start, unsigned int end, int trunc);
			
 
				 
			
@@ -319,6 +319,8 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
 
				 			      struct ocfs2_path *path);
			
 
				 int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
			
 
				 				   struct ocfs2_path *path, u32 *cpos);
			
 
				+int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
			
 
				+				  struct ocfs2_path *path, u32 *cpos);
			
 
				 int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
			
 
				 			    struct ocfs2_path *left,
			
 
				 			    struct ocfs2_path *right);
			
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1735,6 +1735,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				+		if (data_ac)
			
 
				+			data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
			
 
				+
			
 
				 		credits = ocfs2_calc_extend_credits(inode->i_sb,
			
 
				 						    &di->id2.i_list,
			
 
				 						    clusters_to_alloc);
			
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 
				 	define_mask(ERROR),
			
 
				 	define_mask(NOTICE),
			
 
				 	define_mask(KTHREAD),
			
 
				+	define_mask(RESERVATIONS),
			
 
				 };
			
 
				 
			
 
				 static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
			
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,6 +119,7 @@
 
				 #define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
			
 
				 #define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
			
 
				 #define ML_KTHREAD	0x0000000400000000ULL /* kernel thread activity */
			
 
				+#define	ML_RESERVATIONS	0x0000000800000000ULL /* ocfs2 alloc reservations */
			
 
				 
			
 
				 #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
			
 
				 #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
			
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -583,6 +583,9 @@ static void o2net_state_change(struct sock *sk)
 
				 			o2net_sc_queue_work(sc, &sc->sc_connect_work);
			
 
				 			break;
			
 
				 		default:
			
 
				+			printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT
			
 
				+			      " shutdown, state %d\n",
			
 
				+			      SC_NODEF_ARGS(sc), sk->sk_state);
			
 
				 			o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
			
 
				 			break;
			
 
				 	}
			
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1194,7 +1194,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
 
				 			else
			
 
				 				de->inode = 0;
			
 
				 			dir->i_version++;
			
 
				-			status = ocfs2_journal_dirty(handle, bh);
			
 
				+			ocfs2_journal_dirty(handle, bh);
			
 
				 			goto bail;
			
 
				 		}
			
 
				 		i += le16_to_cpu(de->rec_len);
			
@@ -1752,7 +1752,7 @@ int __ocfs2_add_entry(handle_t *handle,
 
				 				ocfs2_recalc_free_list(dir, handle, lookup);
			
 
				 
			
 
				 			dir->i_version++;
			
 
				-			status = ocfs2_journal_dirty(handle, insert_bh);
			
 
				+			ocfs2_journal_dirty(handle, insert_bh);
			
 
				 			retval = 0;
			
 
				 			goto bail;
			
 
				 		}
			
@@ -2297,12 +2297,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
 
				 	}
			
 
				 
			
 
				 	ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
			
 
				-
			
 
				 	ocfs2_journal_dirty(handle, di_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				 
			
 
				 	i_size_write(inode, size);
			
 
				 	inode->i_nlink = 2;
			
@@ -2366,11 +2361,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
 
				 		ocfs2_init_dir_trailer(inode, new_bh, size);
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, new_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, new_bh);
			
 
				 
			
 
				 	i_size_write(inode, inode->i_sb->s_blocksize);
			
 
				 	inode->i_nlink = 2;
			
@@ -2404,15 +2395,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
				 	int ret;
			
 
				 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
			
 
				 	u16 dr_suballoc_bit;
			
 
				-	u64 dr_blkno;
			
 
				+	u64 suballoc_loc, dr_blkno;
			
 
				 	unsigned int num_bits;
			
 
				 	struct buffer_head *dx_root_bh = NULL;
			
 
				 	struct ocfs2_dx_root_block *dx_root;
			
 
				 	struct ocfs2_dir_block_trailer *trailer =
			
 
				 		ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
			
 
				 
			
 
				-	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit,
			
 
				-				   &num_bits, &dr_blkno);
			
 
				+	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
			
 
				+				   &dr_suballoc_bit, &num_bits, &dr_blkno);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
@@ -2440,6 +2431,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
				 	memset(dx_root, 0, osb->sb->s_blocksize);
			
 
				 	strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
			
 
				 	dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
			
 
				+	dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
			
 
				 	dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
			
 
				 	dx_root->dr_blkno = cpu_to_le64(dr_blkno);
			
@@ -2458,10 +2450,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
				 		dx_root->dr_list.l_count =
			
 
				 			cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
			
 
				 	}
			
 
				-
			
 
				-	ret = ocfs2_journal_dirty(handle, dx_root_bh);
			
 
				-	if (ret)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, dx_root_bh);
			
 
				 
			
 
				 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
			
 
				 				      OCFS2_JOURNAL_ACCESS_CREATE);
			
@@ -2475,9 +2464,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
				 	OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
			
 
				 	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, di_bh);
			
 
				-	if (ret)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, di_bh);
			
 
				 
			
 
				 	*ret_dx_root_bh = dx_root_bh;
			
 
				 	dx_root_bh = NULL;
			
@@ -2558,7 +2545,7 @@ static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
 
				 	 * chance of contiguousness as the directory grows in number
			
 
				 	 * of entries.
			
 
				 	 */
			
 
				-	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num);
			
 
				+	ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
@@ -2991,7 +2978,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
				 	 * if we only get one now, that's enough to continue. The rest
			
 
				 	 * will be claimed after the conversion to extents.
			
 
				 	 */
			
 
				-	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
			
 
				+	if (ocfs2_dir_resv_allowed(osb))
			
 
				+		data_ac->ac_resv = &oi->ip_la_data_resv;
			
 
				+	ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out_commit;
			
@@ -3034,11 +3023,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
				 		ocfs2_init_dir_trailer(dir, dirdata_bh, i);
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, dirdata_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, dirdata_bh);
			
 
				 
			
 
				 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
			
 
				 		/*
			
@@ -3104,11 +3089,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
				 	 */
			
 
				 	dir->i_blocks = ocfs2_inode_sector_count(dir);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, di_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, di_bh);
			
 
				 
			
 
				 	if (ocfs2_supports_indexed_dirs(osb)) {
			
 
				 		ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
			
@@ -3138,7 +3119,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
				 	 * pass. Claim the 2nd cluster as a separate extent.
			
 
				 	 */
			
 
				 	if (alloc > len) {
			
 
				-		ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
			
 
				+		ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
			
 
				 					   &len);
			
 
				 		if (ret) {
			
 
				 			mlog_errno(ret);
			
@@ -3369,6 +3350,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 
				 			goto bail;
			
 
				 		}
			
 
				 
			
 
				+		if (ocfs2_dir_resv_allowed(osb))
			
 
				+			data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
			
 
				+
			
 
				 		credits = ocfs2_calc_extend_credits(sb, el, 1);
			
 
				 	} else {
			
 
				 		spin_unlock(&OCFS2_I(dir)->ip_lock);
			
@@ -3423,11 +3407,7 @@ do_extend:
 
				 	} else {
			
 
				 		de->rec_len = cpu_to_le16(sb->s_blocksize);
			
 
				 	}
			
 
				-	status = ocfs2_journal_dirty(handle, new_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, new_bh);
			
 
				 
			
 
				 	dir_i_size += dir->i_sb->s_blocksize;
			
 
				 	i_size_write(dir, dir_i_size);
			
@@ -3906,11 +3886,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 
				 	     sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
			
 
				 	     dx_leaf_sort_swap);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, dx_leaf_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, dx_leaf_bh);
			
 
				 
			
 
				 	ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
			
 
				 					   &split_hash);
			
@@ -4490,7 +4466,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
 
				 
			
 
				 	blk = le64_to_cpu(dx_root->dr_blkno);
			
 
				 	bit = le16_to_cpu(dx_root->dr_suballoc_bit);
			
 
				-	bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				+	if (dx_root->dr_suballoc_loc)
			
 
				+		bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
			
 
				+	else
			
 
				+		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				 	ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
			
 
				 				       bit, bg_blkno, 1);
			
 
				 	if (ret)
			
@@ -4551,8 +4530,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
 
				 
			
 
				 		p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
			
 
				 
			
 
				-		ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen,
			
 
				-					       &dealloc);
			
 
				+		ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
			
 
				+					       &dealloc, 0);
			
 
				 		if (ret) {
			
 
				 			mlog_errno(ret);
			
 
				 			goto out;
			
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
			
 
				+void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
			
 
				 {
			
 
				 	mlog_entry_void();
			
 
				 
			
@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 
				 }
			
 
				 
			
 
				 
			
 
				-static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
			
 
				+void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
			
 
				 {
			
 
				 	mlog_entry_void();
			
 
				 
			
@@ -451,7 +451,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 
				 	ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
			
 
				 				     lock->ml.node, &status);
			
 
				 	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
			
 
				+		     lock->ml.node);
			
 
				 	else {
			
 
				 		if (status == DLM_RECOVERING) {
			
 
				 			mlog(ML_ERROR, "sent AST to node %u, it thinks this "
			
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,7 @@
 
				 #define DLM_THREAD_SHUFFLE_INTERVAL    5     // flush everything every 5 passes
			
 
				 #define DLM_THREAD_MS                  200   // flush at least every 200 ms
			
 
				 
			
 
				-#define DLM_HASH_SIZE_DEFAULT	(1 << 14)
			
 
				+#define DLM_HASH_SIZE_DEFAULT	(1 << 17)
			
 
				 #if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
			
 
				 # define DLM_HASH_PAGES		1
			
 
				 #else
			
@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
 
				 
			
 
				 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
			
 
				 void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
			
 
				+void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
			
 
				+void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
			
 
				 void dlm_do_local_ast(struct dlm_ctxt *dlm,
			
 
				 		      struct dlm_lock_resource *res,
			
 
				 		      struct dlm_lock *lock);
			
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -390,7 +390,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
 
				 		} else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
			
 
				 			dlm_error(ret);
			
 
				 	} else {
			
 
				-		mlog_errno(tmpret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
			
 
				+		     res->owner);
			
 
				 		if (dlm_is_host_down(tmpret)) {
			
 
				 			/* instead of logging the same network error over
			
 
				 			 * and over, sleep here and wait for the heartbeat
			
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -511,7 +511,7 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
 
				 
			
 
				 	assert_spin_locked(&dlm->spinlock);
			
 
				 
			
 
				-	printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name);
			
 
				+	printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name);
			
 
				 
			
 
				 	while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
			
 
				 				     node + 1)) < O2NM_MAX_NODES) {
			
@@ -534,7 +534,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
 
				 
			
 
				 	node = exit_msg->node_idx;
			
 
				 
			
 
				-	printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name);
			
 
				+	printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
			
 
				 
			
 
				 	spin_lock(&dlm->spinlock);
			
 
				 	clear_bit(node, dlm->domain_map);
			
@@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
 
				 	status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
			
 
				 				    &leave_msg, sizeof(leave_msg), node,
			
 
				 				    NULL);
			
 
				-
			
 
				+	if (status < 0)
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
			
 
				 	mlog(0, "status return %d from o2net_send_message\n", status);
			
 
				 
			
 
				 	return status;
			
@@ -904,7 +906,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
 
				 		set_bit(assert->node_idx, dlm->domain_map);
			
 
				 		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
			
 
				 
			
 
				-		printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n",
			
 
				+		printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
			
 
				 		       assert->node_idx, dlm->name);
			
 
				 		__dlm_print_nodes(dlm);
			
 
				 
			
@@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
 
				 				    &cancel_msg, sizeof(cancel_msg), node,
			
 
				 				    NULL);
			
 
				 	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
			
 
				+		     node);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
@@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
 
				 	byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
			
 
				 
			
 
				 	status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
			
 
				-				    sizeof(join_msg), node,
			
 
				-				    &join_resp);
			
 
				+				    sizeof(join_msg), node, &join_resp);
			
 
				 	if (status < 0 && status != -ENOPROTOOPT) {
			
 
				-		mlog_errno(status);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
			
 
				+		     node);
			
 
				 		goto bail;
			
 
				 	}
			
 
				 	dlm_query_join_wire_to_packet(join_resp, &packet);
			
@@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
 
				 				    &assert_msg, sizeof(assert_msg), node,
			
 
				 				    NULL);
			
 
				 	if (status < 0)
			
 
				-		mlog_errno(status);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
			
 
				+		     node);
			
 
				 
			
 
				 	return status;
			
 
				 }
			
@@ -1516,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL);
			
 
				+	dlm->name = kstrdup(domain, GFP_KERNEL);
			
 
				 	if (dlm->name == NULL) {
			
 
				 		mlog_errno(-ENOMEM);
			
 
				 		kfree(dlm);
			
@@ -1550,7 +1557,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
 
				 	for (i = 0; i < DLM_HASH_BUCKETS; i++)
			
 
				 		INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
			
 
				 
			
 
				-	strcpy(dlm->name, domain);
			
 
				 	dlm->key = key;
			
 
				 	dlm->node_num = o2nm_this_node();
			
 
				 
			
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
 
				 			BUG();
			
 
				 		}
			
 
				 	} else {
			
 
				-		mlog_errno(tmpret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
			
 
				+		     res->owner);
			
 
				 		if (dlm_is_host_down(tmpret)) {
			
 
				 			ret = DLM_RECOVERING;
			
 
				 			mlog(0, "node %u died so returning DLM_RECOVERING "
			
@@ -429,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
 
				 	struct dlm_lock *lock;
			
 
				 	int kernel_allocated = 0;
			
 
				 
			
 
				-	lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
			
 
				+	lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
			
 
				 	if (!lock)
			
 
				 		return NULL;
			
 
				 
			
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -617,13 +617,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
 
				 {
			
 
				 	struct dlm_lock_resource *res = NULL;
			
 
				 
			
 
				-	res = (struct dlm_lock_resource *)
			
 
				-				kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
			
 
				+	res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
			
 
				 	if (!res)
			
 
				 		goto error;
			
 
				 
			
 
				-	res->lockname.name = (char *)
			
 
				-				kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
			
 
				+	res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
			
 
				 	if (!res->lockname.name)
			
 
				 		goto error;
			
 
				 
			
@@ -757,8 +755,7 @@ lookup:
 
				 		spin_unlock(&dlm->spinlock);
			
 
				 		mlog(0, "allocating a new resource\n");
			
 
				 		/* nothing found and we need to allocate one. */
			
 
				-		alloc_mle = (struct dlm_master_list_entry *)
			
 
				-			kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				+		alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				 		if (!alloc_mle)
			
 
				 			goto leave;
			
 
				 		res = dlm_new_lockres(dlm, lockid, namelen);
			
@@ -1542,8 +1539,7 @@ way_up_top:
 
				 			spin_unlock(&dlm->master_lock);
			
 
				 			spin_unlock(&dlm->spinlock);
			
 
				 
			
 
				-			mle = (struct dlm_master_list_entry *)
			
 
				-				kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				+			mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				 			if (!mle) {
			
 
				 				response = DLM_MASTER_RESP_ERROR;
			
 
				 				mlog_errno(-ENOMEM);
			
@@ -1666,7 +1662,9 @@ again:
 
				 		tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
			
 
				 					    &assert, sizeof(assert), to, &r);
			
 
				 		if (tmpret < 0) {
			
 
				-			mlog(0, "assert_master returned %d!\n", tmpret);
			
 
				+			mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+			     "0x%x) to node %u\n", tmpret,
			
 
				+			     DLM_ASSERT_MASTER_MSG, dlm->key, to);
			
 
				 			if (!dlm_is_host_down(tmpret)) {
			
 
				 				mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
			
 
				 				BUG();
			
@@ -2205,7 +2203,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 
				 	ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
			
 
				 				 &deref, sizeof(deref), res->owner, &r);
			
 
				 	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
			
 
				+		     res->owner);
			
 
				 	else if (r < 0) {
			
 
				 		/* BAD.  other node says I did not have a ref. */
			
 
				 		mlog(ML_ERROR,"while dropping ref on %s:%.*s "
			
@@ -2452,8 +2452,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
			
 
				-								GFP_NOFS);
			
 
				+	mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				 	if (!mle) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto leave;
			
@@ -2975,7 +2974,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
 
				 					 &migrate, sizeof(migrate), nodenum,
			
 
				 					 &status);
			
 
				 		if (ret < 0) {
			
 
				-			mlog(0, "migrate_request returned %d!\n", ret);
			
 
				+			mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+			     "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
			
 
				+			     dlm->key, nodenum);
			
 
				 			if (!dlm_is_host_down(ret)) {
			
 
				 				mlog(ML_ERROR, "unhandled error=%d!\n", ret);
			
 
				 				BUG();
			
@@ -3033,8 +3034,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
 
				 	hash = dlm_lockid_hash(name, namelen);
			
 
				 
			
 
				 	/* preallocate.. if this fails, abort */
			
 
				-	mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache,
			
 
				-							 GFP_NOFS);
			
 
				+	mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
			
 
				 
			
 
				 	if (!mle) {
			
 
				 		ret = -ENOMEM;
			
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
 
				 
			
 
				 	/* negative status is handled by caller */
			
 
				 	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+		     "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
			
 
				+		     dlm->key, request_from);
			
 
				 
			
 
				 	// return from here, then
			
 
				 	// sleep until all received or error
			
@@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
 
				 	ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
			
 
				 				 sizeof(done_msg), send_to, &tmpret);
			
 
				 	if (ret < 0) {
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+		     "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
			
 
				+		     dlm->key, send_to);
			
 
				 		if (!dlm_is_host_down(ret)) {
			
 
				-			mlog_errno(ret);
			
 
				-			mlog(ML_ERROR, "%s: unknown error sending data-done "
			
 
				-			     "to %u\n", dlm->name, send_to);
			
 
				 			BUG();
			
 
				 		}
			
 
				 	} else
			
@@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
 
				 	if (ret < 0) {
			
 
				 		/* XXX: negative status is not handled.
			
 
				 		 * this will end up killing this node. */
			
 
				-		mlog_errno(ret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+		     "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
			
 
				+		     dlm->key, send_to);
			
 
				 	} else {
			
 
				 		/* might get an -ENOMEM back here */
			
 
				 		ret = status;
			
@@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 
				 				 &req, sizeof(req), nodenum, &status);
			
 
				 	/* XXX: negative status not handled properly here. */
			
 
				 	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+		     "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
			
 
				+		     dlm->key, nodenum);
			
 
				 	else {
			
 
				 		BUG_ON(status < 0);
			
 
				 		BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
			
@@ -2640,7 +2646,7 @@ retry:
 
				 		if (dlm_is_host_down(ret)) {
			
 
				 			/* node is down.  not involved in recovery
			
 
				 			 * so just keep going */
			
 
				-			mlog(0, "%s: node %u was down when sending "
			
 
				+			mlog(ML_NOTICE, "%s: node %u was down when sending "
			
 
				 			     "begin reco msg (%d)\n", dlm->name, nodenum, ret);
			
 
				 			ret = 0;
			
 
				 		}
			
@@ -2660,11 +2666,12 @@ retry:
 
				 		}
			
 
				 		if (ret < 0) {
			
 
				 			struct dlm_lock_resource *res;
			
 
				+
			
 
				 			/* this is now a serious problem, possibly ENOMEM
			
 
				 			 * in the network stack.  must retry */
			
 
				 			mlog_errno(ret);
			
 
				 			mlog(ML_ERROR, "begin reco of dlm %s to node %u "
			
 
				-			    " returned %d\n", dlm->name, nodenum, ret);
			
 
				+			     "returned %d\n", dlm->name, nodenum, ret);
			
 
				 			res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
			
 
				 						 DLM_RECOVERY_LOCK_NAME_LEN);
			
 
				 			if (res) {
			
@@ -2789,7 +2796,9 @@ stage2:
 
				 		if (ret >= 0)
			
 
				 			ret = status;
			
 
				 		if (ret < 0) {
			
 
				-			mlog_errno(ret);
			
 
				+			mlog(ML_ERROR, "Error %d when sending message %u (key "
			
 
				+			     "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
			
 
				+			     dlm->key, nodenum);
			
 
				 			if (dlm_is_host_down(ret)) {
			
 
				 				/* this has no effect on this recovery
			
 
				 				 * session, so set the status to zero to
			
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
 
				 	 * spinlock, and because we know that it is not migrating/
			
 
				 	 * recovering/in-progress, it is fine to reserve asts and
			
 
				 	 * basts right before queueing them all throughout */
			
 
				+	assert_spin_locked(&dlm->ast_lock);
			
 
				 	assert_spin_locked(&res->spinlock);
			
 
				 	BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
			
 
				 			      DLM_LOCK_RES_RECOVERING|
			
@@ -337,7 +338,7 @@ converting:
 
				 			/* queue the BAST if not already */
			
 
				 			if (lock->ml.highest_blocked == LKM_IVMODE) {
			
 
				 				__dlm_lockres_reserve_ast(res);
			
 
				-				dlm_queue_bast(dlm, lock);
			
 
				+				__dlm_queue_bast(dlm, lock);
			
 
				 			}
			
 
				 			/* update the highest_blocked if needed */
			
 
				 			if (lock->ml.highest_blocked < target->ml.convert_type)
			
@@ -355,7 +356,7 @@ converting:
 
				 			can_grant = 0;
			
 
				 			if (lock->ml.highest_blocked == LKM_IVMODE) {
			
 
				 				__dlm_lockres_reserve_ast(res);
			
 
				-				dlm_queue_bast(dlm, lock);
			
 
				+				__dlm_queue_bast(dlm, lock);
			
 
				 			}
			
 
				 			if (lock->ml.highest_blocked < target->ml.convert_type)
			
 
				 				lock->ml.highest_blocked =
			
@@ -383,7 +384,7 @@ converting:
 
				 		spin_unlock(&target->spinlock);
			
 
				 
			
 
				 		__dlm_lockres_reserve_ast(res);
			
 
				-		dlm_queue_ast(dlm, target);
			
 
				+		__dlm_queue_ast(dlm, target);
			
 
				 		/* go back and check for more */
			
 
				 		goto converting;
			
 
				 	}
			
@@ -402,7 +403,7 @@ blocked:
 
				 			can_grant = 0;
			
 
				 			if (lock->ml.highest_blocked == LKM_IVMODE) {
			
 
				 				__dlm_lockres_reserve_ast(res);
			
 
				-				dlm_queue_bast(dlm, lock);
			
 
				+				__dlm_queue_bast(dlm, lock);
			
 
				 			}
			
 
				 			if (lock->ml.highest_blocked < target->ml.type)
			
 
				 				lock->ml.highest_blocked = target->ml.type;
			
@@ -418,7 +419,7 @@ blocked:
 
				 			can_grant = 0;
			
 
				 			if (lock->ml.highest_blocked == LKM_IVMODE) {
			
 
				 				__dlm_lockres_reserve_ast(res);
			
 
				-				dlm_queue_bast(dlm, lock);
			
 
				+				__dlm_queue_bast(dlm, lock);
			
 
				 			}
			
 
				 			if (lock->ml.highest_blocked < target->ml.type)
			
 
				 				lock->ml.highest_blocked = target->ml.type;
			
@@ -444,7 +445,7 @@ blocked:
 
				 		spin_unlock(&target->spinlock);
			
 
				 
			
 
				 		__dlm_lockres_reserve_ast(res);
			
 
				-		dlm_queue_ast(dlm, target);
			
 
				+		__dlm_queue_ast(dlm, target);
			
 
				 		/* go back and check for more */
			
 
				 		goto converting;
			
 
				 	}
			
@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
 
				 		 	/* lockres can be re-dirtied/re-added to the
			
 
				 			 * dirty_list in this gap, but that is ok */
			
 
				 
			
 
				+			spin_lock(&dlm->ast_lock);
			
 
				 			spin_lock(&res->spinlock);
			
 
				 			if (res->owner != dlm->node_num) {
			
 
				 				__dlm_print_one_lock_resource(res);
			
@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
 
				 				/* move it to the tail and keep going */
			
 
				 				res->state &= ~DLM_LOCK_RES_DIRTY;
			
 
				 				spin_unlock(&res->spinlock);
			
 
				+				spin_unlock(&dlm->ast_lock);
			
 
				 				mlog(0, "delaying list shuffling for in-"
			
 
				 				     "progress lockres %.*s, state=%d\n",
			
 
				 				     res->lockname.len, res->lockname.name,
			
@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
 
				 			dlm_shuffle_lists(dlm, res);
			
 
				 			res->state &= ~DLM_LOCK_RES_DIRTY;
			
 
				 			spin_unlock(&res->spinlock);
			
 
				+			spin_unlock(&dlm->ast_lock);
			
 
				 
			
 
				 			dlm_lockres_calc_usage(dlm, res);
			
 
				 
			
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -354,7 +354,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
 
				 			mlog(0, "master was in-progress.  retry\n");
			
 
				 		ret = status;
			
 
				 	} else {
			
 
				-		mlog_errno(tmpret);
			
 
				+		mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
			
 
				+		     "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
			
 
				 		if (dlm_is_host_down(tmpret)) {
			
 
				 			/* NOTE: this seems strange, but it is what we want.
			
 
				 			 * when the master goes down during a cancel or
			
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -278,10 +278,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
 
				 	inode->i_atime = CURRENT_TIME;
			
 
				 	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
			
 
				 	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
			
 
				-
			
 
				-	ret = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 
			
 
				 out_commit:
			
 
				 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
			
@@ -430,9 +427,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
 
				 	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
			
 
				 	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, fe_bh);
			
 
				-	if (status < 0)
			
 
				-		mlog_errno(status);
			
 
				+	ocfs2_journal_dirty(handle, fe_bh);
			
 
				 
			
 
				 out_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
@@ -449,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 	int status = 0;
			
 
				 	struct ocfs2_dinode *fe = NULL;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-	struct ocfs2_truncate_context *tc = NULL;
			
 
				 
			
 
				 	mlog_entry("(inode = %llu, new_i_size = %llu\n",
			
 
				 		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
@@ -488,6 +482,9 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 
			
 
				 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
			
 
				 
			
 
				+	ocfs2_resv_discard(&osb->osb_la_resmap,
			
 
				+			   &OCFS2_I(inode)->ip_la_data_resv);
			
 
				+
			
 
				 	/*
			
 
				 	 * The inode lock forced other nodes to sync and drop their
			
 
				 	 * pages, which (correctly) happens even if we have a truncate
			
@@ -517,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
 
				 		goto bail_unlock_sem;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail_unlock_sem;
			
 
				-	}
			
 
				-
			
 
				-	status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
			
 
				+	status = ocfs2_commit_truncate(osb, inode, di_bh);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto bail_unlock_sem;
			
@@ -666,11 +657,7 @@ restarted_transaction:
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 
			
 
				 	spin_lock(&OCFS2_I(inode)->ip_lock);
			
 
				 	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
			
@@ -1195,9 +1182,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
 
				 	di = (struct ocfs2_dinode *) bh->b_data;
			
 
				 	di->i_mode = cpu_to_le16(inode->i_mode);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 
			
 
				 out_trans:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
@@ -1434,16 +1419,90 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct ocfs2_extent_rec *rec = NULL;
			
 
				+
			
 
				+	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
			
 
				+
			
 
				+		rec = &el->l_recs[i];
			
 
				+
			
 
				+		if (le32_to_cpu(rec->e_cpos) < pos)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return i;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Helper to calculate the punching pos and length in one run, we handle the
			
 
				+ * following three cases in order:
			
 
				+ *
			
 
				+ * - remove the entire record
			
 
				+ * - remove a partial record
			
 
				+ * - no record needs to be removed (hole-punching completed)
			
 
				+*/
			
 
				+static void ocfs2_calc_trunc_pos(struct inode *inode,
			
 
				+				 struct ocfs2_extent_list *el,
			
 
				+				 struct ocfs2_extent_rec *rec,
			
 
				+				 u32 trunc_start, u32 *trunc_cpos,
			
 
				+				 u32 *trunc_len, u32 *trunc_end,
			
 
				+				 u64 *blkno, int *done)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	u32 coff, range;
			
 
				+
			
 
				+	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
			
 
				+
			
 
				+	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
			
 
				+		*trunc_cpos = le32_to_cpu(rec->e_cpos);
			
 
				+		/*
			
 
				+		 * Skip holes if any.
			
 
				+		 */
			
 
				+		if (range < *trunc_end)
			
 
				+			*trunc_end = range;
			
 
				+		*trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
			
 
				+		*blkno = le64_to_cpu(rec->e_blkno);
			
 
				+		*trunc_end = le32_to_cpu(rec->e_cpos);
			
 
				+	} else if (range > trunc_start) {
			
 
				+		*trunc_cpos = trunc_start;
			
 
				+		*trunc_len = *trunc_end - trunc_start;
			
 
				+		coff = trunc_start - le32_to_cpu(rec->e_cpos);
			
 
				+		*blkno = le64_to_cpu(rec->e_blkno) +
			
 
				+				ocfs2_clusters_to_blocks(inode->i_sb, coff);
			
 
				+		*trunc_end = trunc_start;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * It may have two following possibilities:
			
 
				+		 *
			
 
				+		 * - last record has been removed
			
 
				+		 * - trunc_start was within a hole
			
 
				+		 *
			
 
				+		 * both two cases mean the completion of hole punching.
			
 
				+		 */
			
 
				+		ret = 1;
			
 
				+	}
			
 
				+
			
 
				+	*done = ret;
			
 
				+}
			
 
				+
			
 
				 static int ocfs2_remove_inode_range(struct inode *inode,
			
 
				 				    struct buffer_head *di_bh, u64 byte_start,
			
 
				 				    u64 byte_len)
			
 
				 {
			
 
				-	int ret = 0;
			
 
				-	u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
			
 
				+	int ret = 0, flags = 0, done = 0, i;
			
 
				+	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
			
 
				+	u32 cluster_in_el;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 	struct ocfs2_cached_dealloc_ctxt dealloc;
			
 
				 	struct address_space *mapping = inode->i_mapping;
			
 
				 	struct ocfs2_extent_tree et;
			
 
				+	struct ocfs2_path *path = NULL;
			
 
				+	struct ocfs2_extent_list *el = NULL;
			
 
				+	struct ocfs2_extent_rec *rec = NULL;
			
 
				+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
			
 
				+	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
			
 
				 
			
 
				 	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
			
 
				 	ocfs2_init_dealloc_ctxt(&dealloc);
			
@@ -1469,17 +1528,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * For reflinks, we may need to CoW 2 clusters which might be
			
 
				+	 * partially zero'd later, if hole's start and end offset were
			
 
				+	 * within one cluster(means is not exactly aligned to clustersize).
			
 
				+	 */
			
 
				+
			
 
				+	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
			
 
				+
			
 
				+		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
			
 
				-	trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
			
 
				-	if (trunc_len >= trunc_start)
			
 
				-		trunc_len -= trunc_start;
			
 
				-	else
			
 
				-		trunc_len = 0;
			
 
				+	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
			
 
				+	cluster_in_el = trunc_end;
			
 
				 
			
 
				-	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
			
 
				+	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
			
 
				 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
			
 
				 	     (unsigned long long)byte_start,
			
 
				-	     (unsigned long long)byte_len, trunc_start, trunc_len);
			
 
				+	     (unsigned long long)byte_len, trunc_start, trunc_end);
			
 
				 
			
 
				 	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
			
 
				 	if (ret) {
			
@@ -1487,31 +1564,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	cpos = trunc_start;
			
 
				-	while (trunc_len) {
			
 
				-		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
			
 
				-					 &alloc_size, NULL);
			
 
				+	path = ocfs2_new_path_from_et(&et);
			
 
				+	if (!path) {
			
 
				+		ret = -ENOMEM;
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	while (trunc_end > trunc_start) {
			
 
				+
			
 
				+		ret = ocfs2_find_path(INODE_CACHE(inode), path,
			
 
				+				      cluster_in_el);
			
 
				 		if (ret) {
			
 
				 			mlog_errno(ret);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		if (alloc_size > trunc_len)
			
 
				-			alloc_size = trunc_len;
			
 
				+		el = path_leaf_el(path);
			
 
				 
			
 
				-		/* Only do work for non-holes */
			
 
				-		if (phys_cpos != 0) {
			
 
				-			ret = ocfs2_remove_btree_range(inode, &et, cpos,
			
 
				-						       phys_cpos, alloc_size,
			
 
				-						       &dealloc);
			
 
				+		i = ocfs2_find_rec(el, trunc_end);
			
 
				+		/*
			
 
				+		 * Need to go to previous extent block.
			
 
				+		 */
			
 
				+		if (i < 0) {
			
 
				+			if (path->p_tree_depth == 0)
			
 
				+				break;
			
 
				+
			
 
				+			ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
			
 
				+							    path,
			
 
				+							    &cluster_in_el);
			
 
				 			if (ret) {
			
 
				 				mlog_errno(ret);
			
 
				 				goto out;
			
 
				 			}
			
 
				+
			
 
				+			/*
			
 
				+			 * We've reached the leftmost extent block,
			
 
				+			 * it's safe to leave.
			
 
				+			 */
			
 
				+			if (cluster_in_el == 0)
			
 
				+				break;
			
 
				+
			
 
				+			/*
			
 
				+			 * The 'pos' searched for previous extent block is
			
 
				+			 * always one cluster less than actual trunc_end.
			
 
				+			 */
			
 
				+			trunc_end = cluster_in_el + 1;
			
 
				+
			
 
				+			ocfs2_reinit_path(path, 1);
			
 
				+
			
 
				+			continue;
			
 
				+
			
 
				+		} else
			
 
				+			rec = &el->l_recs[i];
			
 
				+
			
 
				+		ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
			
 
				+				     &trunc_len, &trunc_end, &blkno, &done);
			
 
				+		if (done)
			
 
				+			break;
			
 
				+
			
 
				+		flags = rec->e_flags;
			
 
				+		phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
			
 
				+
			
 
				+		ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
			
 
				+					       phys_cpos, trunc_len, flags,
			
 
				+					       &dealloc, refcount_loc);
			
 
				+		if (ret < 0) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				 		}
			
 
				 
			
 
				-		cpos += alloc_size;
			
 
				-		trunc_len -= alloc_size;
			
 
				+		cluster_in_el = trunc_end;
			
 
				+
			
 
				+		ocfs2_reinit_path(path, 1);
			
 
				 	}
			
 
				 
			
 
				 	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
			
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -376,6 +376,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
				 
			
 
				 	OCFS2_I(inode)->ip_last_used_slot = 0;
			
 
				 	OCFS2_I(inode)->ip_last_used_group = 0;
			
 
				+
			
 
				+	if (S_ISDIR(inode->i_mode))
			
 
				+		ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
			
 
				+				    OCFS2_RESV_FLAG_DIR);
			
 
				 	mlog_exit_void();
			
 
				 }
			
 
				 
			
@@ -539,7 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 
				 				     struct buffer_head *fe_bh)
			
 
				 {
			
 
				 	int status = 0;
			
 
				-	struct ocfs2_truncate_context *tc = NULL;
			
 
				 	struct ocfs2_dinode *fe;
			
 
				 	handle_t *handle = NULL;
			
 
				 
			
@@ -582,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 		handle = NULL;
			
 
				 
			
 
				-		status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
			
 
				-		if (status < 0) {
			
 
				-			mlog_errno(status);
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
			
 
				+		status = ocfs2_commit_truncate(osb, inode, fe_bh);
			
 
				 		if (status < 0) {
			
 
				 			mlog_errno(status);
			
 
				 			goto out;
			
@@ -659,12 +656,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 
				 
			
 
				 	di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
			
 
				 	di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, di_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, di_bh);
			
 
				 
			
 
				 	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
			
 
				 	dquot_free_inode(inode);
			
@@ -980,7 +972,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
 
				 void ocfs2_delete_inode(struct inode *inode)
			
 
				 {
			
 
				 	int wipe, status;
			
 
				-	sigset_t blocked, oldset;
			
 
				+	sigset_t oldset;
			
 
				 	struct buffer_head *di_bh = NULL;
			
 
				 
			
 
				 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
			
@@ -1007,13 +999,7 @@ void ocfs2_delete_inode(struct inode *inode)
 
				 	 * messaging paths may return us -ERESTARTSYS. Which would
			
 
				 	 * cause us to exit early, resulting in inodes being orphaned
			
 
				 	 * forever. */
			
 
				-	sigfillset(&blocked);
			
 
				-	status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		ocfs2_cleanup_delete_inode(inode, 1);
			
 
				-		goto bail;
			
 
				-	}
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				 
			
 
				 	/*
			
 
				 	 * Synchronize us against ocfs2_get_dentry. We take this in
			
@@ -1087,9 +1073,7 @@ bail_unlock_nfs_sync:
 
				 	ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
			
 
				 
			
 
				 bail_unblock:
			
 
				-	status = sigprocmask(SIG_SETMASK, &oldset, NULL);
			
 
				-	if (status < 0)
			
 
				-		mlog_errno(status);
			
 
				+	ocfs2_unblock_signals(&oldset);
			
 
				 bail:
			
 
				 	clear_inode(inode);
			
 
				 	mlog_exit_void();
			
@@ -1123,6 +1107,10 @@ void ocfs2_clear_inode(struct inode *inode)
 
				 	ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
			
 
				 	ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
			
 
				 
			
 
				+	ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
			
 
				+			   &oi->ip_la_data_resv);
			
 
				+	ocfs2_resv_init_once(&oi->ip_la_data_resv);
			
 
				+
			
 
				 	/* We very well may get a clear_inode before all an inodes
			
 
				 	 * metadata has hit disk. Of course, we can't drop any cluster
			
 
				 	 * locks until the journal has finished with it. The only
			
@@ -1298,13 +1286,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
 
				 	fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
			
 
				 	fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0)
			
 
				-		mlog_errno(status);
			
 
				-
			
 
				-	status = 0;
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 leave:
			
 
				-
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
 
				 }
			
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -70,6 +70,8 @@ struct ocfs2_inode_info
 
				 	/* Only valid if the inode is the dir. */
			
 
				 	u32				ip_last_used_slot;
			
 
				 	u64				ip_last_used_group;
			
 
				+
			
 
				+	struct ocfs2_alloc_reservation	ip_la_data_resv;
			
 
				 };
			
 
				 
			
 
				 /*
			
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -402,9 +402,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * 'nblocks' is what you want to add to the current
			
 
				- * transaction. extend_trans will either extend the current handle by
			
 
				- * nblocks, or commit it and start a new one with nblocks credits.
			
 
				+ * 'nblocks' is what you want to add to the current transaction.
			
 
				  *
			
 
				  * This might call jbd2_journal_restart() which will commit dirty buffers
			
 
				  * and then restart the transaction. Before calling
			
@@ -422,11 +420,15 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
 
				  */
			
 
				 int ocfs2_extend_trans(handle_t *handle, int nblocks)
			
 
				 {
			
 
				-	int status;
			
 
				+	int status, old_nblocks;
			
 
				 
			
 
				 	BUG_ON(!handle);
			
 
				-	BUG_ON(!nblocks);
			
 
				+	BUG_ON(nblocks < 0);
			
 
				+
			
 
				+	if (!nblocks)
			
 
				+		return 0;
			
 
				 
			
 
				+	old_nblocks = handle->h_buffer_credits;
			
 
				 	mlog_entry_void();
			
 
				 
			
 
				 	mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
			
@@ -445,7 +447,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
 
				 		mlog(0,
			
 
				 		     "jbd2_journal_extend failed, trying "
			
 
				 		     "jbd2_journal_restart\n");
			
 
				-		status = jbd2_journal_restart(handle, nblocks);
			
 
				+		status = jbd2_journal_restart(handle,
			
 
				+					      old_nblocks + nblocks);
			
 
				 		if (status < 0) {
			
 
				 			mlog_errno(status);
			
 
				 			goto bail;
			
@@ -734,8 +737,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 
				 	return __ocfs2_journal_access(handle, ci, bh, NULL, type);
			
 
				 }
			
 
				 
			
 
				-int ocfs2_journal_dirty(handle_t *handle,
			
 
				-			struct buffer_head *bh)
			
 
				+void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
			
 
				 {
			
 
				 	int status;
			
 
				 
			
@@ -743,13 +745,9 @@ int ocfs2_journal_dirty(handle_t *handle,
 
				 		   (unsigned long long)bh->b_blocknr);
			
 
				 
			
 
				 	status = jbd2_journal_dirty_metadata(handle, bh);
			
 
				-	if (status < 0)
			
 
				-		mlog(ML_ERROR, "Could not dirty metadata buffer. "
			
 
				-		     "(bh->b_blocknr=%llu)\n",
			
 
				-		     (unsigned long long)bh->b_blocknr);
			
 
				+	BUG_ON(status);
			
 
				 
			
 
				-	mlog_exit(status);
			
 
				-	return status;
			
 
				+	mlog_exit_void();
			
 
				 }
			
 
				 
			
 
				 #define OCFS2_DEFAULT_COMMIT_INTERVAL	(HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
			
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -325,8 +325,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
 
				  *	<modify the bh>
			
 
				  * 	ocfs2_journal_dirty(handle, bh);
			
 
				  */
			
 
				-int                  ocfs2_journal_dirty(handle_t *handle,
			
 
				-					 struct buffer_head *bh);
			
 
				+void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh);
			
 
				 
			
 
				 /*
			
 
				  *  Credit Macros:
			
@@ -562,6 +561,18 @@ static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
 
				 	return blocks;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Allocating a discontiguous block group requires the credits from
			
 
				+ * ocfs2_calc_group_alloc_credits() as well as enough credits to fill
			
 
				+ * the group descriptor's extent list.  The caller already has started
			
 
				+ * the transaction with ocfs2_calc_group_alloc_credits().  They extend
			
 
				+ * it with these credits.
			
 
				+ */
			
 
				+static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
			
 
				+{
			
 
				+	return ocfs2_extent_recs_per_gd(sb);
			
 
				+}
			
 
				+
			
 
				 static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
			
 
				 						unsigned int clusters_to_del,
			
 
				 						struct ocfs2_dinode *fe,
			
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
 
				 
			
 
				 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
			
 
				 					     struct ocfs2_dinode *alloc,
			
 
				-					     u32 numbits);
			
 
				+					     u32 *numbits,
			
 
				+					     struct ocfs2_alloc_reservation *resv);
			
 
				 
			
 
				 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
			
 
				 
			
@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 
				 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
			
 
				 					  struct inode *local_alloc_inode);
			
 
				 
			
 
				+/*
			
 
				+ * ocfs2_la_default_mb() - determine a default size, in megabytes of
			
 
				+ * the local alloc.
			
 
				+ *
			
 
				+ * Generally, we'd like to pick as large a local alloc as
			
 
				+ * possible. Performance on large workloads tends to scale
			
 
				+ * proportionally to la size. In addition to that, the reservations
			
 
				+ * code functions more efficiently as it can reserve more windows for
			
 
				+ * write.
			
 
				+ *
			
 
				+ * Some things work against us when trying to choose a large local alloc:
			
 
				+ *
			
 
				+ * - We need to ensure our sizing is picked to leave enough space in
			
 
				+ *   group descriptors for other allocations (such as block groups,
			
 
				+ *   etc). Picking default sizes which are a multiple of 4 could help
			
 
				+ *   - block groups are allocated in 2mb and 4mb chunks.
			
 
				+ *
			
 
				+ * - Likewise, we don't want to starve other nodes of bits on small
			
 
				+ *   file systems. This can easily be taken care of by limiting our
			
 
				+ *   default to a reasonable size (256M) on larger cluster sizes.
			
 
				+ *
			
 
				+ * - Some file systems can't support very large sizes - 4k and 8k in
			
 
				+ *   particular are limited to less than 128 and 256 megabytes respectively.
			
 
				+ *
			
 
				+ * The following reference table shows group descriptor and local
			
 
				+ * alloc maximums at various cluster sizes (4k blocksize)
			
 
				+ *
			
 
				+ * csize: 4K	group: 126M	la: 121M
			
 
				+ * csize: 8K	group: 252M	la: 243M
			
 
				+ * csize: 16K	group: 504M	la: 486M
			
 
				+ * csize: 32K	group: 1008M	la: 972M
			
 
				+ * csize: 64K	group: 2016M	la: 1944M
			
 
				+ * csize: 128K	group: 4032M	la: 3888M
			
 
				+ * csize: 256K	group: 8064M	la: 7776M
			
 
				+ * csize: 512K	group: 16128M	la: 15552M
			
 
				+ * csize: 1024K	group: 32256M	la: 31104M
			
 
				+ */
			
 
				+#define	OCFS2_LA_MAX_DEFAULT_MB	256
			
 
				+#define	OCFS2_LA_OLD_DEFAULT	8
			
 
				+unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	unsigned int la_mb;
			
 
				+	unsigned int gd_mb;
			
 
				+	unsigned int megs_per_slot;
			
 
				+	struct super_block *sb = osb->sb;
			
 
				+
			
 
				+	gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
			
 
				+		8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
			
 
				+
			
 
				+	/*
			
 
				+	 * This takes care of files systems with very small group
			
 
				+	 * descriptors - 512 byte blocksize at cluster sizes lower
			
 
				+	 * than 16K and also 1k blocksize with 4k cluster size.
			
 
				+	 */
			
 
				+	if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
			
 
				+	    || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
			
 
				+		return OCFS2_LA_OLD_DEFAULT;
			
 
				+
			
 
				+	/*
			
 
				+	 * Leave enough room for some block groups and make the final
			
 
				+	 * value we work from a multiple of 4.
			
 
				+	 */
			
 
				+	gd_mb -= 16;
			
 
				+	gd_mb &= 0xFFFFFFFB;
			
 
				+
			
 
				+	la_mb = gd_mb;
			
 
				+
			
 
				+	/*
			
 
				+	 * Keep window sizes down to a reasonable default
			
 
				+	 */
			
 
				+	if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
			
 
				+		/*
			
 
				+		 * Some clustersize / blocksize combinations will have
			
 
				+		 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
			
 
				+		 * default size, but get poor distribution when
			
 
				+		 * limited to exactly 256 megabytes.
			
 
				+		 *
			
 
				+		 * As an example, 16K clustersize at 4K blocksize
			
 
				+		 * gives us a cluster group size of 504M. Paring the
			
 
				+		 * local alloc size down to 256 however, would give us
			
 
				+		 * only one window and around 200MB left in the
			
 
				+		 * cluster group. Instead, find the first size below
			
 
				+		 * 256 which would give us an even distribution.
			
 
				+		 *
			
 
				+		 * Larger cluster group sizes actually work out pretty
			
 
				+		 * well when pared to 256, so we don't have to do this
			
 
				+		 * for any group that fits more than two
			
 
				+		 * OCFS2_LA_MAX_DEFAULT_MB windows.
			
 
				+		 */
			
 
				+		if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
			
 
				+			la_mb = 256;
			
 
				+		else {
			
 
				+			unsigned int gd_mult = gd_mb;
			
 
				+
			
 
				+			while (gd_mult > 256)
			
 
				+				gd_mult = gd_mult >> 1;
			
 
				+
			
 
				+			la_mb = gd_mult;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
			
 
				+	megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
			
 
				+	/* Too many nodes, too few disk clusters. */
			
 
				+	if (megs_per_slot < la_mb)
			
 
				+		la_mb = megs_per_slot;
			
 
				+
			
 
				+	return la_mb;
			
 
				+}
			
 
				+
			
 
				+void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
			
 
				+{
			
 
				+	struct super_block *sb = osb->sb;
			
 
				+	unsigned int la_default_mb = ocfs2_la_default_mb(osb);
			
 
				+	unsigned int la_max_mb;
			
 
				+
			
 
				+	la_max_mb = ocfs2_clusters_to_megabytes(sb,
			
 
				+						ocfs2_local_alloc_size(sb) * 8);
			
 
				+
			
 
				+	mlog(0, "requested: %dM, max: %uM, default: %uM\n",
			
 
				+	     requested_mb, la_max_mb, la_default_mb);
			
 
				+
			
 
				+	if (requested_mb == -1) {
			
 
				+		/* No user request - use defaults */
			
 
				+		osb->local_alloc_default_bits =
			
 
				+			ocfs2_megabytes_to_clusters(sb, la_default_mb);
			
 
				+	} else if (requested_mb > la_max_mb) {
			
 
				+		/* Request is too big, we give the maximum available */
			
 
				+		osb->local_alloc_default_bits =
			
 
				+			ocfs2_megabytes_to_clusters(sb, la_max_mb);
			
 
				+	} else {
			
 
				+		osb->local_alloc_default_bits =
			
 
				+			ocfs2_megabytes_to_clusters(sb, requested_mb);
			
 
				+	}
			
 
				+
			
 
				+	osb->local_alloc_bits = osb->local_alloc_default_bits;
			
 
				+}
			
 
				+
			
 
				 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
			
@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 
				 		     osb->local_alloc_bits, (osb->bitmap_cpg - 1));
			
 
				 		osb->local_alloc_bits =
			
 
				 			ocfs2_megabytes_to_clusters(osb->sb,
			
 
				-						    OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
			
 
				+						    ocfs2_la_default_mb(osb));
			
 
				 	}
			
 
				 
			
 
				 	/* read the alloc off disk */
			
@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
				 
			
 
				 	osb->local_alloc_state = OCFS2_LA_DISABLED;
			
 
				 
			
 
				+	ocfs2_resmap_uninit(&osb->osb_la_resmap);
			
 
				+
			
 
				 	main_bm_inode = ocfs2_get_system_file_inode(osb,
			
 
				 						    GLOBAL_BITMAP_SYSTEM_INODE,
			
 
				 						    OCFS2_INVALID_SLOT);
			
@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 
				 	}
			
 
				 
			
 
				 	ocfs2_clear_local_alloc(alloc);
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 
			
 
				 	brelse(bh);
			
 
				 	osb->local_alloc_bh = NULL;
			
@@ -481,46 +617,6 @@ out:
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-/* Check to see if the local alloc window is within ac->ac_max_block */
			
 
				-static int ocfs2_local_alloc_in_range(struct inode *inode,
			
 
				-				      struct ocfs2_alloc_context *ac,
			
 
				-				      u32 bits_wanted)
			
 
				-{
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-	struct ocfs2_dinode *alloc;
			
 
				-	struct ocfs2_local_alloc *la;
			
 
				-	int start;
			
 
				-	u64 block_off;
			
 
				-
			
 
				-	if (!ac->ac_max_block)
			
 
				-		return 1;
			
 
				-
			
 
				-	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
			
 
				-	la = OCFS2_LOCAL_ALLOC(alloc);
			
 
				-
			
 
				-	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
			
 
				-	if (start == -1) {
			
 
				-		mlog_errno(-ENOSPC);
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	/*
			
 
				-	 * Converting (bm_off + start + bits_wanted) to blocks gives us
			
 
				-	 * the blkno just past our actual allocation.  This is perfect
			
 
				-	 * to compare with ac_max_block.
			
 
				-	 */
			
 
				-	block_off = ocfs2_clusters_to_blocks(inode->i_sb,
			
 
				-					     le32_to_cpu(la->la_bm_off) +
			
 
				-					     start + bits_wanted);
			
 
				-	mlog(0, "Checking %llu against %llu\n",
			
 
				-	     (unsigned long long)block_off,
			
 
				-	     (unsigned long long)ac->ac_max_block);
			
 
				-	if (block_off > ac->ac_max_block)
			
 
				-		return 0;
			
 
				-
			
 
				-	return 1;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * make sure we've got at least bits_wanted contiguous bits in the
			
 
				  * local alloc. You lose them when you drop i_mutex.
			
@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 
				 		mlog(0, "Calling in_range for max block %llu\n",
			
 
				 		     (unsigned long long)ac->ac_max_block);
			
 
				 
			
 
				-	if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
			
 
				-					bits_wanted)) {
			
 
				-		/*
			
 
				-		 * The window is outside ac->ac_max_block.
			
 
				-		 * This errno tells the caller to keep localalloc enabled
			
 
				-		 * but to get the allocation from the main bitmap.
			
 
				-		 */
			
 
				-		status = -EFBIG;
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				 	ac->ac_inode = local_alloc_inode;
			
 
				 	/* We should never use localalloc from another slot */
			
 
				 	ac->ac_alloc_slot = osb->slot_num;
			
@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 
				 	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
			
 
				 	la = OCFS2_LOCAL_ALLOC(alloc);
			
 
				 
			
 
				-	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
			
 
				+	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
			
 
				+						  ac->ac_resv);
			
 
				 	if (start == -1) {
			
 
				 		/* TODO: Shouldn't we just BUG here? */
			
 
				 		status = -ENOSPC;
			
@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 
				 
			
 
				 	bitmap = la->la_bitmap;
			
 
				 	*bit_off = le32_to_cpu(la->la_bm_off) + start;
			
 
				-	/* local alloc is always contiguous by nature -- we never
			
 
				-	 * delete bits from it! */
			
 
				 	*num_bits = bits_wanted;
			
 
				 
			
 
				 	status = ocfs2_journal_access_di(handle,
			
@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
			
 
				+				  bits_wanted);
			
 
				+
			
 
				 	while(bits_wanted--)
			
 
				 		ocfs2_set_bit(start++, bitmap);
			
 
				 
			
 
				 	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
			
 
				+	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				-
			
 
				-	status = 0;
			
 
				 bail:
			
 
				 	mlog_exit(status);
			
 
				 	return status;
			
@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 
				 }
			
 
				 
			
 
				 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
			
 
				-					     struct ocfs2_dinode *alloc,
			
 
				-					     u32 numbits)
			
 
				+				     struct ocfs2_dinode *alloc,
			
 
				+				     u32 *numbits,
			
 
				+				     struct ocfs2_alloc_reservation *resv)
			
 
				 {
			
 
				 	int numfound, bitoff, left, startoff, lastzero;
			
 
				+	int local_resv = 0;
			
 
				+	struct ocfs2_alloc_reservation r;
			
 
				 	void *bitmap = NULL;
			
 
				+	struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
			
 
				 
			
 
				-	mlog_entry("(numbits wanted = %u)\n", numbits);
			
 
				+	mlog_entry("(numbits wanted = %u)\n", *numbits);
			
 
				 
			
 
				 	if (!alloc->id1.bitmap1.i_total) {
			
 
				 		mlog(0, "No bits in my window!\n");
			
@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	if (!resv) {
			
 
				+		local_resv = 1;
			
 
				+		ocfs2_resv_init_once(&r);
			
 
				+		ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
			
 
				+		resv = &r;
			
 
				+	}
			
 
				+
			
 
				+	numfound = *numbits;
			
 
				+	if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
			
 
				+		if (numfound < *numbits)
			
 
				+			*numbits = numfound;
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Code error. While reservations are enabled, local
			
 
				+	 * allocation should _always_ go through them.
			
 
				+	 */
			
 
				+	BUG_ON(osb->osb_resv_level != 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * Reservations are disabled. Handle this the old way.
			
 
				+	 */
			
 
				+
			
 
				 	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
			
 
				 
			
 
				 	numfound = bitoff = startoff = 0;
			
@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 
				 			startoff = bitoff+1;
			
 
				 		}
			
 
				 		/* we got everything we needed */
			
 
				-		if (numfound == numbits) {
			
 
				+		if (numfound == *numbits) {
			
 
				 			/* mlog(0, "Found it all!\n"); */
			
 
				 			break;
			
 
				 		}
			
@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
 
				 	mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
			
 
				 	     numfound);
			
 
				 
			
 
				-	if (numfound == numbits)
			
 
				+	if (numfound == *numbits)
			
 
				 		bitoff = startoff - numfound;
			
 
				 	else
			
 
				 		bitoff = -1;
			
 
				 
			
 
				 bail:
			
 
				+	if (local_resv)
			
 
				+		ocfs2_resv_discard(resmap, resv);
			
 
				+
			
 
				 	mlog_exit(bitoff);
			
 
				 	return bitoff;
			
 
				 }
			
@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 
				 	/* we used the generic suballoc reserve function, but we set
			
 
				 	 * everything up nicely, so there's no reason why we can't use
			
 
				 	 * the more specific cluster api to claim bits. */
			
 
				-	status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
			
 
				+	status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
			
 
				 				      &cluster_off, &cluster_count);
			
 
				 	if (status == -ENOSPC) {
			
 
				 retry_enospc:
			
@@ -1063,7 +1175,7 @@ retry_enospc:
 
				 			goto bail;
			
 
				 
			
 
				 		ac->ac_bits_wanted = osb->local_alloc_default_bits;
			
 
				-		status = ocfs2_claim_clusters(osb, handle, ac,
			
 
				+		status = ocfs2_claim_clusters(handle, ac,
			
 
				 					      osb->local_alloc_bits,
			
 
				 					      &cluster_off,
			
 
				 					      &cluster_count);
			
@@ -1098,6 +1210,9 @@ retry_enospc:
 
				 	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
			
 
				 	       le16_to_cpu(la->la_size));
			
 
				 
			
 
				+	ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
			
 
				+			     OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
			
 
				+
			
 
				 	mlog(0, "New window allocated:\n");
			
 
				 	mlog(0, "window la_bm_off = %u\n",
			
 
				 	     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
			
@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 
				 	}
			
 
				 
			
 
				 	ocfs2_clear_local_alloc(alloc);
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto bail;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
			
 
				 
			
 
				 	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
			
 
				 					  main_bm_inode, main_bm_bh);
			
@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 
				 
			
 
				 	atomic_inc(&osb->alloc_stats.moves);
			
 
				 
			
 
				-	status = 0;
			
 
				 bail:
			
 
				 	if (handle)
			
 
				 		ocfs2_commit_trans(osb, handle);
			
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -30,6 +30,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
 
				 
			
 
				 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
			
 
				 
			
 
				+void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
			
 
				+unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
			
 
				+
			
 
				 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
			
 
				 				     int node_num,
			
 
				 				     struct ocfs2_dinode **alloc_copy);
			
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -41,44 +41,20 @@
 
				 #include "file.h"
			
 
				 #include "inode.h"
			
 
				 #include "mmap.h"
			
 
				+#include "super.h"
			
 
				 
			
 
				-static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
			
 
				-{
			
 
				-	/* The best way to deal with signals in the vm path is
			
 
				-	 * to block them upfront, rather than allowing the
			
 
				-	 * locking paths to return -ERESTARTSYS. */
			
 
				-	sigfillset(blocked);
			
 
				-
			
 
				-	/* We should technically never get a bad return value
			
 
				-	 * from sigprocmask */
			
 
				-	return sigprocmask(SIG_BLOCK, blocked, oldset);
			
 
				-}
			
 
				-
			
 
				-static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
			
 
				-{
			
 
				-	return sigprocmask(SIG_SETMASK, oldset, NULL);
			
 
				-}
			
 
				 
			
 
				 static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
			
 
				 {
			
 
				-	sigset_t blocked, oldset;
			
 
				-	int error, ret;
			
 
				+	sigset_t oldset;
			
 
				+	int ret;
			
 
				 
			
 
				 	mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
			
 
				 
			
 
				-	error = ocfs2_vm_op_block_sigs(&blocked, &oldset);
			
 
				-	if (error < 0) {
			
 
				-		mlog_errno(error);
			
 
				-		ret = VM_FAULT_SIGBUS;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				 	ret = filemap_fault(area, vmf);
			
 
				+	ocfs2_unblock_signals(&oldset);
			
 
				 
			
 
				-	error = ocfs2_vm_op_unblock_sigs(&oldset);
			
 
				-	if (error < 0)
			
 
				-		mlog_errno(error);
			
 
				-out:
			
 
				 	mlog_exit_ptr(vmf->page);
			
 
				 	return ret;
			
 
				 }
			
@@ -158,14 +134,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	struct page *page = vmf->page;
			
 
				 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
			
 
				 	struct buffer_head *di_bh = NULL;
			
 
				-	sigset_t blocked, oldset;
			
 
				-	int ret, ret2;
			
 
				+	sigset_t oldset;
			
 
				+	int ret;
			
 
				 
			
 
				-	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		return ret;
			
 
				-	}
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				 
			
 
				 	/*
			
 
				 	 * The cluster locks taken will block a truncate from another
			
@@ -193,9 +165,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
				 	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
 
				 out:
			
 
				-	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
			
 
				-	if (ret2 < 0)
			
 
				-		mlog_errno(ret2);
			
 
				+	ocfs2_unblock_signals(&oldset);
			
 
				 	if (ret)
			
 
				 		ret = VM_FAULT_SIGBUS;
			
 
				 	return ret;
			
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -239,6 +239,8 @@ static int ocfs2_mknod(struct inode *dir,
 
				 	};
			
 
				 	int did_quota_inode = 0;
			
 
				 	struct ocfs2_dir_lookup_result lookup = { NULL, };
			
 
				+	sigset_t oldset;
			
 
				+	int did_block_signals = 0;
			
 
				 
			
 
				 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
			
 
				 		   (unsigned long)dev, dentry->d_name.len,
			
@@ -350,6 +352,10 @@ static int ocfs2_mknod(struct inode *dir,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				+	/* Starting to change things, restart is no longer possible. */
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				+	did_block_signals = 1;
			
 
				+
			
 
				 	status = dquot_alloc_inode(inode);
			
 
				 	if (status)
			
 
				 		goto leave;
			
@@ -384,11 +390,7 @@ static int ocfs2_mknod(struct inode *dir,
 
				 			goto leave;
			
 
				 		}
			
 
				 		ocfs2_add_links_count(dirfe, 1);
			
 
				-		status = ocfs2_journal_dirty(handle, parent_fe_bh);
			
 
				-		if (status < 0) {
			
 
				-			mlog_errno(status);
			
 
				-			goto leave;
			
 
				-		}
			
 
				+		ocfs2_journal_dirty(handle, parent_fe_bh);
			
 
				 		inc_nlink(dir);
			
 
				 	}
			
 
				 
			
@@ -439,6 +441,8 @@ leave:
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 	ocfs2_inode_unlock(dir, 1);
			
 
				+	if (did_block_signals)
			
 
				+		ocfs2_unblock_signals(&oldset);
			
 
				 
			
 
				 	if (status == -ENOSPC)
			
 
				 		mlog(0, "Disk is full\n");
			
@@ -487,14 +491,15 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
				 	int status = 0;
			
 
				 	struct ocfs2_dinode *fe = NULL;
			
 
				 	struct ocfs2_extent_list *fel;
			
 
				-	u64 fe_blkno = 0;
			
 
				+	u64 suballoc_loc, fe_blkno = 0;
			
 
				 	u16 suballoc_bit;
			
 
				 	u16 feat;
			
 
				 
			
 
				 	*new_fe_bh = NULL;
			
 
				 
			
 
				-	status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
			
 
				-				       inode_ac, &suballoc_bit, &fe_blkno);
			
 
				+	status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
			
 
				+				       inode_ac, &suballoc_loc,
			
 
				+				       &suballoc_bit, &fe_blkno);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
 
				 		goto leave;
			
@@ -531,6 +536,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
				 	fe->i_generation = cpu_to_le32(inode->i_generation);
			
 
				 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
			
 
				 	fe->i_blkno = cpu_to_le64(fe_blkno);
			
 
				+	fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
			
 
				 	fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
			
 
				 	fe->i_uid = cpu_to_le32(inode->i_uid);
			
@@ -567,11 +573,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 
				 		fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, *new_fe_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, *new_fe_bh);
			
 
				 
			
 
				 	ocfs2_populate_inode(inode, fe, 1);
			
 
				 	ocfs2_ci_set_new(osb, INODE_CACHE(inode));
			
@@ -637,6 +639,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 	struct ocfs2_dinode *fe = NULL;
			
 
				 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
			
 
				 	struct ocfs2_dir_lookup_result lookup = { NULL, };
			
 
				+	sigset_t oldset;
			
 
				 
			
 
				 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
			
 
				 		   old_dentry->d_name.len, old_dentry->d_name.name,
			
@@ -693,6 +696,9 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 		goto out_unlock_inode;
			
 
				 	}
			
 
				 
			
 
				+	/* Starting to change things, restart is no longer possible. */
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				+
			
 
				 	err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
			
 
				 				      OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				 	if (err < 0) {
			
@@ -705,14 +711,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 	ocfs2_set_links_count(fe, inode->i_nlink);
			
 
				 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
			
 
				 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
			
 
				-
			
 
				-	err = ocfs2_journal_dirty(handle, fe_bh);
			
 
				-	if (err < 0) {
			
 
				-		ocfs2_add_links_count(fe, -1);
			
 
				-		drop_nlink(inode);
			
 
				-		mlog_errno(err);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, fe_bh);
			
 
				 
			
 
				 	err = ocfs2_add_entry(handle, dentry, inode,
			
 
				 			      OCFS2_I(inode)->ip_blkno,
			
@@ -736,6 +735,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 
				 
			
 
				 out_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				+	ocfs2_unblock_signals(&oldset);
			
 
				 out_unlock_inode:
			
 
				 	ocfs2_inode_unlock(inode, 1);
			
 
				 
			
@@ -909,12 +909,7 @@ static int ocfs2_unlink(struct inode *dir,
 
				 		drop_nlink(inode);
			
 
				 	drop_nlink(inode);
			
 
				 	ocfs2_set_links_count(fe, inode->i_nlink);
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, fe_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, fe_bh);
			
 
				 
			
 
				 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
			
 
				 	if (S_ISDIR(inode->i_mode))
			
@@ -1332,12 +1327,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 			ocfs2_set_links_count(newfe, 0);
			
 
				 		else
			
 
				 			ocfs2_add_links_count(newfe, -1);
			
 
				-
			
 
				-		status = ocfs2_journal_dirty(handle, newfe_bh);
			
 
				-		if (status < 0) {
			
 
				-			mlog_errno(status);
			
 
				-			goto bail;
			
 
				-		}
			
 
				+		ocfs2_journal_dirty(handle, newfe_bh);
			
 
				 	} else {
			
 
				 		/* if the name was not found in new_dir, add it now */
			
 
				 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
			
@@ -1356,10 +1346,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 
			
 
				 		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
			
 
				 		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
			
 
				-
			
 
				-		status = ocfs2_journal_dirty(handle, old_inode_bh);
			
 
				-		if (status < 0)
			
 
				-			mlog_errno(status);
			
 
				+		ocfs2_journal_dirty(handle, old_inode_bh);
			
 
				 	} else
			
 
				 		mlog_errno(status);
			
 
				 
			
@@ -1431,7 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
 
				 							 OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
			
 
				 			ocfs2_set_links_count(fe, old_dir->i_nlink);
			
 
				-			status = ocfs2_journal_dirty(handle, old_dir_bh);
			
 
				+			ocfs2_journal_dirty(handle, old_dir_bh);
			
 
				 		}
			
 
				 	}
			
 
				 	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
			
@@ -1563,11 +1550,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 
				 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
			
 
				 		       bytes_left);
			
 
				 
			
 
				-		status = ocfs2_journal_dirty(handle, bhs[virtual]);
			
 
				-		if (status < 0) {
			
 
				-			mlog_errno(status);
			
 
				-			goto bail;
			
 
				-		}
			
 
				+		ocfs2_journal_dirty(handle, bhs[virtual]);
			
 
				 
			
 
				 		virtual++;
			
 
				 		p_blkno++;
			
@@ -1611,6 +1594,8 @@ static int ocfs2_symlink(struct inode *dir,
 
				 	};
			
 
				 	int did_quota = 0, did_quota_inode = 0;
			
 
				 	struct ocfs2_dir_lookup_result lookup = { NULL, };
			
 
				+	sigset_t oldset;
			
 
				+	int did_block_signals = 0;
			
 
				 
			
 
				 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
			
 
				 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
			
@@ -1706,6 +1691,10 @@ static int ocfs2_symlink(struct inode *dir,
 
				 		goto bail;
			
 
				 	}
			
 
				 
			
 
				+	/* Starting to change things, restart is no longer possible. */
			
 
				+	ocfs2_block_signals(&oldset);
			
 
				+	did_block_signals = 1;
			
 
				+
			
 
				 	status = dquot_alloc_inode(inode);
			
 
				 	if (status)
			
 
				 		goto bail;
			
@@ -1814,6 +1803,8 @@ bail:
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 	ocfs2_inode_unlock(dir, 1);
			
 
				+	if (did_block_signals)
			
 
				+		ocfs2_unblock_signals(&oldset);
			
 
				 
			
 
				 	brelse(new_fe_bh);
			
 
				 	brelse(parent_fe_bh);
			
@@ -1961,12 +1952,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 
				 	if (S_ISDIR(inode->i_mode))
			
 
				 		ocfs2_add_links_count(orphan_fe, 1);
			
 
				 	orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, orphan_dir_bh);
			
 
				 
			
 
				 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
			
 
				 				   OCFS2_ORPHAN_NAMELEN, inode,
			
@@ -2065,12 +2051,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
 
				 	if (S_ISDIR(inode->i_mode))
			
 
				 		ocfs2_add_links_count(orphan_fe, -1);
			
 
				 	orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
			
 
				-
			
 
				-	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, orphan_dir_bh);
			
 
				 
			
 
				 leave:
			
 
				 	ocfs2_free_dir_lookup_result(&lookup);
			
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,7 @@
 
				 /* For struct ocfs2_blockcheck_stats */
			
 
				 #include "blockcheck.h"
			
 
				 
			
 
				+#include "reservations.h"
			
 
				 
			
 
				 /* Caching of metadata buffers */
			
 
				 
			
@@ -341,6 +342,9 @@ struct ocfs2_super
 
				 	 */
			
 
				 	unsigned int local_alloc_bits;
			
 
				 	unsigned int local_alloc_default_bits;
			
 
				+	/* osb_clusters_at_boot can become stale! Do not trust it to
			
 
				+	 * be up to date. */
			
 
				+	unsigned int osb_clusters_at_boot;
			
 
				 
			
 
				 	enum ocfs2_local_alloc_state local_alloc_state; /* protected
			
 
				 							 * by osb_lock */
			
@@ -349,6 +353,11 @@ struct ocfs2_super
 
				 
			
 
				 	u64 la_last_gd;
			
 
				 
			
 
				+	struct ocfs2_reservation_map	osb_la_resmap;
			
 
				+
			
 
				+	unsigned int	osb_resv_level;
			
 
				+	unsigned int	osb_dir_resv_level;
			
 
				+
			
 
				 	/* Next three fields are for local node slot recovery during
			
 
				 	 * mount. */
			
 
				 	int dirty;
			
@@ -482,6 +491,13 @@ static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
			
 
				 {
			
 
				 	if (ocfs2_supports_indexed_dirs(osb))
			
@@ -763,6 +779,12 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
 
				 	return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
			
 
				 }
			
 
				 
			
 
				+static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
			
 
				+						       unsigned int clusters)
			
 
				+{
			
 
				+	return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
			
 
				+}
			
 
				+
			
 
				 static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
			
 
				 {
			
 
				 	ext2_set_bit(bit, bitmap);
			
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -100,7 +100,8 @@
 
				 					 | OCFS2_FEATURE_INCOMPAT_XATTR \
			
 
				 					 | OCFS2_FEATURE_INCOMPAT_META_ECC \
			
 
				 					 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
			
 
				-					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
			
 
				+					 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
			
 
				+					 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
			
 
				 #define OCFS2_FEATURE_RO_COMPAT_SUPP	(OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
			
 
				 					 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
			
 
				 					 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
			
@@ -165,6 +166,9 @@
 
				 /* Refcount tree support */
			
 
				 #define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE	0x1000
			
 
				 
			
 
				+/* Discontigous block groups */
			
 
				+#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG	0x2000
			
 
				+
			
 
				 /*
			
 
				  * backup superblock flag is used to indicate that this volume
			
 
				  * has backup superblocks.
			
@@ -282,14 +286,6 @@
 
				 /* Journal limits (in bytes) */
			
 
				 #define OCFS2_MIN_JOURNAL_SIZE		(4 * 1024 * 1024)
			
 
				 
			
 
				-/*
			
 
				- * Default local alloc size (in megabytes)
			
 
				- *
			
 
				- * The value chosen should be such that most allocations, including new
			
 
				- * block groups, use local alloc.
			
 
				- */
			
 
				-#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE	8
			
 
				-
			
 
				 /*
			
 
				  * Inline extended attribute size (in bytes)
			
 
				  * The value chosen should be aligned to 16 byte boundaries.
			
@@ -512,7 +508,10 @@ struct ocfs2_extent_block
 
				 					   block group */
			
 
				 	__le32 h_fs_generation;		/* Must match super block */
			
 
				 	__le64 h_blkno;			/* Offset on disk, in blocks */
			
 
				-/*20*/	__le64 h_reserved3;
			
 
				+/*20*/	__le64 h_suballoc_loc;		/* Suballocator block group this
			
 
				+					   eb belongs to.  Only valid
			
 
				+					   if allocated from a
			
 
				+					   discontiguous block group */
			
 
				 	__le64 h_next_leaf_blk;		/* Offset on disk, in blocks,
			
 
				 					   of next leaf header pointing
			
 
				 					   to data */
			
@@ -679,7 +678,11 @@ struct ocfs2_dinode {
 
				 /*80*/	struct ocfs2_block_check i_check;	/* Error checking */
			
 
				 /*88*/	__le64 i_dx_root;		/* Pointer to dir index root block */
			
 
				 /*90*/	__le64 i_refcount_loc;
			
 
				-	__le64 i_reserved2[4];
			
 
				+	__le64 i_suballoc_loc;		/* Suballocator block group this
			
 
				+					   inode belongs to.  Only valid
			
 
				+					   if allocated from a
			
 
				+					   discontiguous block group */
			
 
				+/*A0*/	__le64 i_reserved2[3];
			
 
				 /*B8*/	union {
			
 
				 		__le64 i_pad1;		/* Generic way to refer to this
			
 
				 					   64bit union */
			
@@ -814,7 +817,12 @@ struct ocfs2_dx_root_block {
 
				 	__le32		dr_reserved2;
			
 
				 	__le64		dr_free_blk;		/* Pointer to head of free
			
 
				 						 * unindexed block list. */
			
 
				-	__le64		dr_reserved3[15];
			
 
				+	__le64		dr_suballoc_loc;	/* Suballocator block group
			
 
				+						   this root belongs to.
			
 
				+						   Only valid if allocated
			
 
				+						   from a discontiguous
			
 
				+						   block group */
			
 
				+	__le64		dr_reserved3[14];
			
 
				 	union {
			
 
				 		struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
			
 
				 						   * bits for maximum space
			
@@ -839,6 +847,13 @@ struct ocfs2_dx_leaf {
 
				 	struct ocfs2_dx_entry_list	dl_list;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Largest bitmap for a block (suballocator) group in bytes.  This limit
			
 
				+ * does not affect cluster groups (global allocator).  Cluster group
			
 
				+ * bitmaps run to the end of the block.
			
 
				+ */
			
 
				+#define OCFS2_MAX_BG_BITMAP_SIZE	256
			
 
				+
			
 
				 /*
			
 
				  * On disk allocator group structure for OCFS2
			
 
				  */
			
@@ -860,7 +875,29 @@ struct ocfs2_group_desc
 
				 	__le64   bg_blkno;               /* Offset on disk, in blocks */
			
 
				 /*30*/	struct ocfs2_block_check bg_check;	/* Error checking */
			
 
				 	__le64   bg_reserved2;
			
 
				-/*40*/	__u8    bg_bitmap[0];
			
 
				+/*40*/	union {
			
 
				+		__u8    bg_bitmap[0];
			
 
				+		struct {
			
 
				+			/*
			
 
				+			 * Block groups may be discontiguous when
			
 
				+			 * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set.
			
 
				+			 * The extents of a discontigous block group are
			
 
				+			 * stored in bg_list.  It is a flat list.
			
 
				+			 * l_tree_depth must always be zero.  A
			
 
				+			 * discontiguous group is signified by a non-zero
			
 
				+			 * bg_list->l_next_free_rec.  Only block groups
			
 
				+			 * can be discontiguous; Cluster groups cannot.
			
 
				+			 * We've never made a block group with more than
			
 
				+			 * 2048 blocks (256 bytes of bg_bitmap).  This
			
 
				+			 * codifies that limit so that we can fit bg_list.
			
 
				+			 * bg_size of a discontiguous block group will
			
 
				+			 * be 256 to match bg_bitmap_filler.
			
 
				+			 */
			
 
				+			__u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE];
			
 
				+/*140*/			struct ocfs2_extent_list bg_list;
			
 
				+		};
			
 
				+	};
			
 
				+/* Actual on-disk size is one block */
			
 
				 };
			
 
				 
			
 
				 struct ocfs2_refcount_rec {
			
@@ -905,7 +942,11 @@ struct ocfs2_refcount_block {
 
				 /*40*/	__le32 rf_generation;		/* generation number. all be the same
			
 
				 					 * for the same refcount tree. */
			
 
				 	__le32 rf_reserved0;
			
 
				-	__le64 rf_reserved1[7];
			
 
				+	__le64 rf_suballoc_loc;		/* Suballocator block group this
			
 
				+					   refcount block belongs to. Only
			
 
				+					   valid if allocated from a
			
 
				+					   discontiguous block group */
			
 
				+/*50*/	__le64 rf_reserved1[6];
			
 
				 /*80*/	union {
			
 
				 		struct ocfs2_refcount_list rf_records;  /* List of refcount
			
 
				 							  records */
			
@@ -1017,7 +1058,10 @@ struct ocfs2_xattr_block {
 
				 					real xattr or a xattr tree. */
			
 
				 	__le16	xb_reserved0;
			
 
				 	__le32  xb_reserved1;
			
 
				-	__le64	xb_reserved2;
			
 
				+	__le64	xb_suballoc_loc;	/* Suballocator block group this
			
 
				+					   xattr block belongs to. Only
			
 
				+					   valid if allocated from a
			
 
				+					   discontiguous block group */
			
 
				 /*30*/	union {
			
 
				 		struct ocfs2_xattr_header xb_header; /* xattr header if this
			
 
				 							block contains xattr */
			
@@ -1254,6 +1298,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
 
				 	return size / sizeof(struct ocfs2_extent_rec);
			
 
				 }
			
 
				 
			
 
				+static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb)
			
 
				+{
			
 
				+	int size;
			
 
				+
			
 
				+	size = sb->s_blocksize -
			
 
				+		offsetof(struct ocfs2_group_desc, bg_list.l_recs);
			
 
				+
			
 
				+	return size / sizeof(struct ocfs2_extent_rec);
			
 
				+}
			
 
				+
			
 
				 static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
			
 
				 {
			
 
				 	int size;
			
@@ -1284,13 +1338,23 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
 
				 	return size;
			
 
				 }
			
 
				 
			
 
				-static inline int ocfs2_group_bitmap_size(struct super_block *sb)
			
 
				+static inline int ocfs2_group_bitmap_size(struct super_block *sb,
			
 
				+					  int suballocator,
			
 
				+					  u32 feature_incompat)
			
 
				 {
			
 
				-	int size;
			
 
				-
			
 
				-	size = sb->s_blocksize -
			
 
				+	int size = sb->s_blocksize -
			
 
				 		offsetof(struct ocfs2_group_desc, bg_bitmap);
			
 
				 
			
 
				+	/*
			
 
				+	 * The cluster allocator uses the entire block.  Suballocators have
			
 
				+	 * never used more than OCFS2_MAX_BG_BITMAP_SIZE.  Unfortunately, older
			
 
				+	 * code expects bg_size set to the maximum.  Thus we must keep
			
 
				+	 * bg_size as-is unless discontig_bg is enabled.
			
 
				+	 */
			
 
				+	if (suballocator &&
			
 
				+	    (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
			
 
				+		size = OCFS2_MAX_BG_BITMAP_SIZE;
			
 
				+
			
 
				 	return size;
			
 
				 }
			
 
				 
			
@@ -1402,23 +1466,43 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
 
				 	return size / sizeof(struct ocfs2_extent_rec);
			
 
				 }
			
 
				 
			
 
				-static inline int ocfs2_local_alloc_size(int blocksize)
			
 
				+static inline int ocfs2_extent_recs_per_gd(int blocksize)
			
 
				 {
			
 
				 	int size;
			
 
				 
			
 
				 	size = blocksize -
			
 
				-		offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
			
 
				+		offsetof(struct ocfs2_group_desc, bg_list.l_recs);
			
 
				 
			
 
				-	return size;
			
 
				+	return size / sizeof(struct ocfs2_extent_rec);
			
 
				 }
			
 
				 
			
 
				-static inline int ocfs2_group_bitmap_size(int blocksize)
			
 
				+static inline int ocfs2_local_alloc_size(int blocksize)
			
 
				 {
			
 
				 	int size;
			
 
				 
			
 
				 	size = blocksize -
			
 
				+		offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
			
 
				+
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static inline int ocfs2_group_bitmap_size(int blocksize,
			
 
				+					  int suballocator,
			
 
				+					  uint32_t feature_incompat)
			
 
				+{
			
 
				+	int size = sb->s_blocksize -
			
 
				 		offsetof(struct ocfs2_group_desc, bg_bitmap);
			
 
				 
			
 
				+	/*
			
 
				+	 * The cluster allocator uses the entire block.  Suballocators have
			
 
				+	 * never used more than OCFS2_MAX_BG_BITMAP_SIZE.  Unfortunately, older
			
 
				+	 * code expects bg_size set to the maximum.  Thus we must keep
			
 
				+	 * bg_size as-is unless discontig_bg is enabled.
			
 
				+	 */
			
 
				+	if (suballocator &&
			
 
				+	    (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
			
 
				+		size = OCFS2_MAX_BG_BITMAP_SIZE;
			
 
				+
			
 
				 	return size;
			
 
				 }
			
 
				 
			
@@ -1491,5 +1575,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
 
				 	de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
			
 
				 }
			
 
				 
			
 
				+static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
			
 
				+{
			
 
				+	if ((offsetof(struct ocfs2_group_desc, bg_bitmap) +
			
 
				+	     le16_to_cpu(gd->bg_size)) !=
			
 
				+	    offsetof(struct ocfs2_group_desc, bg_list))
			
 
				+		return 0;
			
 
				+	/*
			
 
				+	 * Only valid to check l_next_free_rec if
			
 
				+	 * bg_bitmap + bg_size == bg_list.
			
 
				+	 */
			
 
				+	if (!gd->bg_list.l_next_free_rec)
			
 
				+		return 0;
			
 
				+	return 1;
			
 
				+}
			
 
				 #endif  /* _OCFS2_FS_H */
			
 
				 
			
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -261,10 +261,8 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
 
				 		brelse(bh);
			
 
				 		goto out;
			
 
				 	}
			
 
				-	err = ocfs2_journal_dirty(handle, bh);
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 	brelse(bh);
			
 
				-	if (err < 0)
			
 
				-		goto out;
			
 
				 out:
			
 
				 	if (err) {
			
 
				 		mutex_unlock(&gqinode->i_mutex);
			
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -119,12 +119,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
 
				 	lock_buffer(bh);
			
 
				 	modify(bh, private);
			
 
				 	unlock_buffer(bh);
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		ocfs2_commit_trans(OCFS2_SB(sb), handle);
			
 
				-		return status;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				+
			
 
				 	status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
			
 
				 	if (status < 0) {
			
 
				 		mlog_errno(status);
			
@@ -523,9 +519,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 
				 			ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
			
 
				 			le32_add_cpu(&dchunk->dqc_free, 1);
			
 
				 			unlock_buffer(qbh);
			
 
				-			status = ocfs2_journal_dirty(handle, qbh);
			
 
				-			if (status < 0)
			
 
				-				mlog_errno(status);
			
 
				+			ocfs2_journal_dirty(handle, qbh);
			
 
				 out_commit:
			
 
				 			mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
			
 
				 			ocfs2_commit_trans(OCFS2_SB(sb), handle);
			
@@ -631,9 +625,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 
				 		lock_buffer(bh);
			
 
				 		ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
			
 
				 		unlock_buffer(bh);
			
 
				-		status = ocfs2_journal_dirty(handle, bh);
			
 
				-		if (status < 0)
			
 
				-			mlog_errno(status);
			
 
				+		ocfs2_journal_dirty(handle, bh);
			
 
				 out_trans:
			
 
				 		ocfs2_commit_trans(osb, handle);
			
 
				 out_bh:
			
@@ -1009,11 +1001,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 
				 	       sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
			
 
				 	       OCFS2_QBLK_RESERVED_SPACE);
			
 
				 	unlock_buffer(bh);
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out_trans;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				 
			
 
				 	/* Initialize new block with structures */
			
 
				 	down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
			
@@ -1040,11 +1028,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 
				 	lock_buffer(dbh);
			
 
				 	memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
			
 
				 	unlock_buffer(dbh);
			
 
				-	status = ocfs2_journal_dirty(handle, dbh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out_trans;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, dbh);
			
 
				 
			
 
				 	/* Update local quotafile info */
			
 
				 	oinfo->dqi_blocks += 2;
			
@@ -1155,11 +1139,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 
				 	lock_buffer(bh);
			
 
				 	memset(bh->b_data, 0, sb->s_blocksize);
			
 
				 	unlock_buffer(bh);
			
 
				-	status = ocfs2_journal_dirty(handle, bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out_trans;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, bh);
			
 
				+
			
 
				 	/* Update chunk header */
			
 
				 	status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
			
 
				 					 chunk->qc_headerbh,
			
@@ -1173,11 +1154,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 
				 	lock_buffer(chunk->qc_headerbh);
			
 
				 	le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
			
 
				 	unlock_buffer(chunk->qc_headerbh);
			
 
				-	status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out_trans;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, chunk->qc_headerbh);
			
 
				+
			
 
				 	/* Update file header */
			
 
				 	oinfo->dqi_blocks++;
			
 
				 	status = ocfs2_local_write_info(sb, type);
			
@@ -1312,12 +1290,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
 
				 	ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
			
 
				 	le32_add_cpu(&dchunk->dqc_free, 1);
			
 
				 	unlock_buffer(od->dq_chunk->qc_headerbh);
			
 
				-	status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto out;
			
 
				-	}
			
 
				-	status = 0;
			
 
				+	ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
			
 
				+
			
 
				 out:
			
 
				 	/* Clear the read bit so that next time someone uses this
			
 
				 	 * dquot he reads fresh info from disk and allocates local
			
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
 
				 	struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
			
 
				 	u16 suballoc_bit_start;
			
 
				 	u32 num_got;
			
 
				-	u64 first_blkno;
			
 
				+	u64 suballoc_loc, first_blkno;
			
 
				 
			
 
				 	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
			
 
				 
			
@@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
 
				 		goto out_commit;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
			
 
				+	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
			
 
				 				   &suballoc_bit_start, &num_got,
			
 
				 				   &first_blkno);
			
 
				 	if (ret) {
			
@@ -626,6 +626,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
 
				 	memset(rb, 0, inode->i_sb->s_blocksize);
			
 
				 	strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
			
 
				 	rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
			
 
				+	rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
			
 
				 	rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
			
 
				 	rb->rf_blkno = cpu_to_le64(first_blkno);
			
@@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
 
				 	if (le32_to_cpu(rb->rf_count) == 1) {
			
 
				 		blk = le64_to_cpu(rb->rf_blkno);
			
 
				 		bit = le16_to_cpu(rb->rf_suballoc_bit);
			
 
				-		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				+		if (rb->rf_suballoc_loc)
			
 
				+			bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
			
 
				+		else
			
 
				+			bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				 
			
 
				 		alloc_inode = ocfs2_get_system_file_inode(osb,
			
 
				 					EXTENT_ALLOC_SYSTEM_INODE,
			
@@ -1268,9 +1272,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
 
				 	} else if (merge)
			
 
				 		ocfs2_refcount_rec_merge(rb, index);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				-	if (ret)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				 out:
			
 
				 	return ret;
			
 
				 }
			
@@ -1284,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
 
				 	int ret;
			
 
				 	u16 suballoc_bit_start;
			
 
				 	u32 num_got;
			
 
				-	u64 blkno;
			
 
				+	u64 suballoc_loc, blkno;
			
 
				 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
			
 
				 	struct buffer_head *new_bh = NULL;
			
 
				 	struct ocfs2_refcount_block *new_rb;
			
@@ -1298,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
			
 
				+	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
			
 
				 				   &suballoc_bit_start, &num_got,
			
 
				 				   &blkno);
			
 
				 	if (ret) {
			
@@ -1330,6 +1332,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
 
				 
			
 
				 	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
			
 
				 	new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
			
 
				+	new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
			
 
				 	new_rb->rf_blkno = cpu_to_le64(blkno);
			
 
				 	new_rb->rf_cpos = cpu_to_le32(0);
			
@@ -1524,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
 
				 	int ret;
			
 
				 	u16 suballoc_bit_start;
			
 
				 	u32 num_got, new_cpos;
			
 
				-	u64 blkno;
			
 
				+	u64 suballoc_loc, blkno;
			
 
				 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
			
 
				 	struct ocfs2_refcount_block *root_rb =
			
 
				 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
			
@@ -1548,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
			
 
				+	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
			
 
				 				   &suballoc_bit_start, &num_got,
			
 
				 				   &blkno);
			
 
				 	if (ret) {
			
@@ -1576,6 +1579,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
 
				 	memset(new_rb, 0, sb->s_blocksize);
			
 
				 	strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
			
 
				 	new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
			
 
				+	new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
			
 
				 	new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
			
 
				 	new_rb->rf_blkno = cpu_to_le64(blkno);
			
@@ -1694,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
 
				 	 * 2 more credits, one for the leaf refcount block, one for
			
 
				 	 * the extent block contains the extent rec.
			
 
				 	 */
			
 
				-	ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
			
 
				+	ret = ocfs2_extend_trans(handle, 2);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
@@ -1802,11 +1806,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
 
				 	if (merge)
			
 
				 		ocfs2_refcount_rec_merge(rb, index);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				 
			
 
				 	if (index == 0) {
			
 
				 		ret = ocfs2_adjust_refcount_rec(handle, ci,
			
@@ -1977,9 +1977,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
 
				 			ocfs2_refcount_rec_merge(rb, index);
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				-	if (ret)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, ref_leaf_bh);
			
 
				 
			
 
				 out:
			
 
				 	brelse(new_bh);
			
@@ -2112,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
 
				 	 */
			
 
				 	ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
			
 
				 					le16_to_cpu(rb->rf_suballoc_slot),
			
 
				+					le64_to_cpu(rb->rf_suballoc_loc),
			
 
				 					le64_to_cpu(rb->rf_blkno),
			
 
				 					le16_to_cpu(rb->rf_suballoc_bit));
			
 
				 	if (ret) {
			
@@ -2516,20 +2515,19 @@ out:
 
				  *
			
 
				  * Normally the refcount blocks store these refcount should be
			
 
				  * contiguous also, so that we can get the number easily.
			
 
				- * As for meta_ac, we will at most add split 2 refcount record and
			
 
				- * 2 more refcount block, so just check it in a rough way.
			
 
				+ * We will at most add split 2 refcount records and 2 more
			
 
				+ * refcount blocks, so just check it in a rough way.
			
 
				  *
			
 
				  * Caller must hold refcount tree lock.
			
 
				  */
			
 
				 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
			
 
				-					  struct buffer_head *di_bh,
			
 
				+					  u64 refcount_loc,
			
 
				 					  u64 phys_blkno,
			
 
				 					  u32 clusters,
			
 
				 					  int *credits,
			
 
				-					  struct ocfs2_alloc_context **meta_ac)
			
 
				+					  int *ref_blocks)
			
 
				 {
			
 
				-	int ret, ref_blocks = 0;
			
 
				-	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
			
 
				+	int ret;
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				 	struct buffer_head *ref_root_bh = NULL;
			
 
				 	struct ocfs2_refcount_tree *tree;
			
@@ -2546,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 
				 	BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
			
 
				 
			
 
				 	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
			
 
				-				      le64_to_cpu(di->i_refcount_loc), &tree);
			
 
				+				      refcount_loc, &tree);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_read_refcount_block(&tree->rf_ci,
			
 
				-					le64_to_cpu(di->i_refcount_loc),
			
 
				+	ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
			
 
				 					&ref_root_bh);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
@@ -2564,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
 
				 					       &tree->rf_ci,
			
 
				 					       ref_root_bh,
			
 
				 					       start_cpos, clusters,
			
 
				-					       &ref_blocks, credits);
			
 
				+					       ref_blocks, credits);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	mlog(0, "reserve new metadata %d, credits = %d\n",
			
 
				-	     ref_blocks, *credits);
			
 
				-
			
 
				-	if (ref_blocks) {
			
 
				-		ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
			
 
				-							ref_blocks, meta_ac);
			
 
				-		if (ret)
			
 
				-			mlog_errno(ret);
			
 
				-	}
			
 
				+	mlog(0, "reserve new metadata %d blocks, credits = %d\n",
			
 
				+	     *ref_blocks, *credits);
			
 
				 
			
 
				 out:
			
 
				 	brelse(ref_root_bh);
			
@@ -3040,11 +3030,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
 
				 		}
			
 
				 
			
 
				 		memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
			
 
				-		ret = ocfs2_journal_dirty(handle, new_bh);
			
 
				-		if (ret) {
			
 
				-			mlog_errno(ret);
			
 
				-			break;
			
 
				-		}
			
 
				+		ocfs2_journal_dirty(handle, new_bh);
			
 
				 
			
 
				 		brelse(new_bh);
			
 
				 		brelse(old_bh);
			
@@ -3282,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
 
				 		} else {
			
 
				 			delete = 1;
			
 
				 
			
 
				-			ret = __ocfs2_claim_clusters(osb, handle,
			
 
				+			ret = __ocfs2_claim_clusters(handle,
			
 
				 						     context->data_ac,
			
 
				 						     1, set_len,
			
 
				 						     &new_bit, &new_len);
			
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
 
				 			    struct ocfs2_cached_dealloc_ctxt *dealloc,
			
 
				 			    int delete);
			
 
				 int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
			
 
				-					  struct buffer_head *di_bh,
			
 
				+					  u64 refcount_loc,
			
 
				 					  u64 phys_blkno,
			
 
				 					  u32 clusters,
			
 
				 					  int *credits,
			
 
				-					  struct ocfs2_alloc_context **meta_ac);
			
 
				+					  int *ref_blocks);
			
 
				 int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
			
 
				 		       u32 cpos, u32 write_len, u32 max_cpos);
			
 
				 
			
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,847 @@
 
				+/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				+ * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				+ *
			
 
				+ * reservations.c
			
 
				+ *
			
 
				+ * Allocation reservations implementation
			
 
				+ *
			
 
				+ * Some code borrowed from fs/ext3/balloc.c and is:
			
 
				+ *
			
 
				+ * Copyright (C) 1992, 1993, 1994, 1995
			
 
				+ * Remy Card (card@masi.ibp.fr)
			
 
				+ * Laboratoire MASI - Institut Blaise Pascal
			
 
				+ * Universite Pierre et Marie Curie (Paris VI)
			
 
				+ *
			
 
				+ * The rest is copyright (C) 2010 Novell.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License version 2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/highmem.h>
			
 
				+#include <linux/bitops.h>
			
 
				+#include <linux/list.h>
			
 
				+
			
 
				+#define MLOG_MASK_PREFIX ML_RESERVATIONS
			
 
				+#include <cluster/masklog.h>
			
 
				+
			
 
				+#include "ocfs2.h"
			
 
				+
			
 
				+#ifdef CONFIG_OCFS2_DEBUG_FS
			
 
				+#define OCFS2_CHECK_RESERVATIONS
			
 
				+#endif
			
 
				+
			
 
				+DEFINE_SPINLOCK(resv_lock);
			
 
				+
			
 
				+#define	OCFS2_MIN_RESV_WINDOW_BITS	8
			
 
				+#define	OCFS2_MAX_RESV_WINDOW_BITS	1024
			
 
				+
			
 
				+int ocfs2_dir_resv_allowed(struct ocfs2_super *osb)
			
 
				+{
			
 
				+	return (osb->osb_resv_level && osb->osb_dir_resv_level);
			
 
				+}
			
 
				+
			
 
				+static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap,
			
 
				+					   struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	struct ocfs2_super *osb = resmap->m_osb;
			
 
				+	unsigned int bits;
			
 
				+
			
 
				+	if (!(resv->r_flags & OCFS2_RESV_FLAG_DIR)) {
			
 
				+		/* 8, 16, 32, 64, 128, 256, 512, 1024 */
			
 
				+		bits = 4 << osb->osb_resv_level;
			
 
				+	} else {
			
 
				+		bits = 4 << osb->osb_dir_resv_level;
			
 
				+	}
			
 
				+	return bits;
			
 
				+}
			
 
				+
			
 
				+static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	if (resv->r_len)
			
 
				+		return resv->r_start + resv->r_len - 1;
			
 
				+	return resv->r_start;
			
 
				+}
			
 
				+
			
 
				+static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	return !!(resv->r_len == 0);
			
 
				+}
			
 
				+
			
 
				+static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	if (resmap->m_osb->osb_resv_level == 0)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	struct ocfs2_super *osb = resmap->m_osb;
			
 
				+	struct rb_node *node;
			
 
				+	struct ocfs2_alloc_reservation *resv;
			
 
				+	int i = 0;
			
 
				+
			
 
				+	mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
			
 
				+	     osb->dev_str, resmap->m_bitmap_len);
			
 
				+
			
 
				+	node = rb_first(&resmap->m_reservations);
			
 
				+	while (node) {
			
 
				+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
			
 
				+
			
 
				+		mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
			
 
				+		     "\tlast_len: %u\n", resv->r_start,
			
 
				+		     ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
			
 
				+		     resv->r_last_len);
			
 
				+
			
 
				+		node = rb_next(node);
			
 
				+		i++;
			
 
				+	}
			
 
				+
			
 
				+	mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i);
			
 
				+
			
 
				+	i = 0;
			
 
				+	list_for_each_entry(resv, &resmap->m_lru, r_lru) {
			
 
				+		mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t"
			
 
				+		     "last_start: %u\tlast_len: %u\n", i, resv->r_start,
			
 
				+		     ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
			
 
				+		     resv->r_last_len);
			
 
				+
			
 
				+		i++;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifdef OCFS2_CHECK_RESERVATIONS
			
 
				+static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap,
			
 
				+				      int i,
			
 
				+				      struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	char *disk_bitmap = resmap->m_disk_bitmap;
			
 
				+	unsigned int start = resv->r_start;
			
 
				+	unsigned int end = ocfs2_resv_end(resv);
			
 
				+
			
 
				+	while (start <= end) {
			
 
				+		if (ocfs2_test_bit(start, disk_bitmap)) {
			
 
				+			mlog(ML_ERROR,
			
 
				+			     "reservation %d covers an allocated area "
			
 
				+			     "starting at bit %u!\n", i, start);
			
 
				+			return 1;
			
 
				+		}
			
 
				+
			
 
				+		start++;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	unsigned int off = 0;
			
 
				+	int i = 0;
			
 
				+	struct rb_node *node;
			
 
				+	struct ocfs2_alloc_reservation *resv;
			
 
				+
			
 
				+	node = rb_first(&resmap->m_reservations);
			
 
				+	while (node) {
			
 
				+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
			
 
				+
			
 
				+		if (i > 0 && resv->r_start <= off) {
			
 
				+			mlog(ML_ERROR, "reservation %d has bad start off!\n",
			
 
				+			     i);
			
 
				+			goto bad;
			
 
				+		}
			
 
				+
			
 
				+		if (resv->r_len == 0) {
			
 
				+			mlog(ML_ERROR, "reservation %d has no length!\n",
			
 
				+			     i);
			
 
				+			goto bad;
			
 
				+		}
			
 
				+
			
 
				+		if (resv->r_start > ocfs2_resv_end(resv)) {
			
 
				+			mlog(ML_ERROR, "reservation %d has invalid range!\n",
			
 
				+			     i);
			
 
				+			goto bad;
			
 
				+		}
			
 
				+
			
 
				+		if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) {
			
 
				+			mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
			
 
				+			     i);
			
 
				+			goto bad;
			
 
				+		}
			
 
				+
			
 
				+		if (ocfs2_validate_resmap_bits(resmap, i, resv))
			
 
				+			goto bad;
			
 
				+
			
 
				+		off = ocfs2_resv_end(resv);
			
 
				+		node = rb_next(node);
			
 
				+
			
 
				+		i++;
			
 
				+	}
			
 
				+	return;
			
 
				+
			
 
				+bad:
			
 
				+	ocfs2_dump_resv(resmap);
			
 
				+	BUG();
			
 
				+}
			
 
				+#else
			
 
				+static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	memset(resv, 0, sizeof(*resv));
			
 
				+	INIT_LIST_HEAD(&resv->r_lru);
			
 
				+}
			
 
				+
			
 
				+void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
			
 
				+			 unsigned int flags)
			
 
				+{
			
 
				+	BUG_ON(flags & ~OCFS2_RESV_TYPES);
			
 
				+
			
 
				+	resv->r_flags |= flags;
			
 
				+}
			
 
				+
			
 
				+int ocfs2_resmap_init(struct ocfs2_super *osb,
			
 
				+		      struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	memset(resmap, 0, sizeof(*resmap));
			
 
				+
			
 
				+	resmap->m_osb = osb;
			
 
				+	resmap->m_reservations = RB_ROOT;
			
 
				+	/* m_bitmap_len is initialized to zero by the above memset. */
			
 
				+	INIT_LIST_HEAD(&resmap->m_lru);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
			
 
				+				struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	if (!list_empty(&resv->r_lru))
			
 
				+		list_del_init(&resv->r_lru);
			
 
				+
			
 
				+	list_add_tail(&resv->r_lru, &resmap->m_lru);
			
 
				+}
			
 
				+
			
 
				+static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	resv->r_len = 0;
			
 
				+	resv->r_start = 0;
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
			
 
				+			      struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) {
			
 
				+		list_del_init(&resv->r_lru);
			
 
				+		rb_erase(&resv->r_node, &resmap->m_reservations);
			
 
				+		resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
			
 
				+				 struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	__ocfs2_resv_trunc(resv);
			
 
				+	/*
			
 
				+	 * last_len and last_start no longer make sense if
			
 
				+	 * we're changing the range of our allocations.
			
 
				+	 */
			
 
				+	resv->r_last_len = resv->r_last_start = 0;
			
 
				+
			
 
				+	ocfs2_resv_remove(resmap, resv);
			
 
				+}
			
 
				+
			
 
				+/* does nothing if 'resv' is null */
			
 
				+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
			
 
				+			struct ocfs2_alloc_reservation *resv)
			
 
				+{
			
 
				+	if (resv) {
			
 
				+		spin_lock(&resv_lock);
			
 
				+		__ocfs2_resv_discard(resmap, resv);
			
 
				+		spin_unlock(&resv_lock);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	struct rb_node *node;
			
 
				+	struct ocfs2_alloc_reservation *resv;
			
 
				+
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	while ((node = rb_last(&resmap->m_reservations)) != NULL) {
			
 
				+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
			
 
				+
			
 
				+		__ocfs2_resv_discard(resmap, resv);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
			
 
				+			  unsigned int clen, char *disk_bitmap)
			
 
				+{
			
 
				+	if (ocfs2_resmap_disabled(resmap))
			
 
				+		return;
			
 
				+
			
 
				+	spin_lock(&resv_lock);
			
 
				+
			
 
				+	ocfs2_resmap_clear_all_resv(resmap);
			
 
				+	resmap->m_bitmap_len = clen;
			
 
				+	resmap->m_disk_bitmap = disk_bitmap;
			
 
				+
			
 
				+	spin_unlock(&resv_lock);
			
 
				+}
			
 
				+
			
 
				+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
			
 
				+{
			
 
				+	/* Does nothing for now. Keep this around for API symmetry */
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
			
 
				+			      struct ocfs2_alloc_reservation *new)
			
 
				+{
			
 
				+	struct rb_root *root = &resmap->m_reservations;
			
 
				+	struct rb_node *parent = NULL;
			
 
				+	struct rb_node **p = &root->rb_node;
			
 
				+	struct ocfs2_alloc_reservation *tmp;
			
 
				+
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
			
 
				+	     new->r_len);
			
 
				+
			
 
				+	while (*p) {
			
 
				+		parent = *p;
			
 
				+
			
 
				+		tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
			
 
				+
			
 
				+		if (new->r_start < tmp->r_start) {
			
 
				+			p = &(*p)->rb_left;
			
 
				+
			
 
				+			/*
			
 
				+			 * This is a good place to check for
			
 
				+			 * overlapping reservations.
			
 
				+			 */
			
 
				+			BUG_ON(ocfs2_resv_end(new) >= tmp->r_start);
			
 
				+		} else if (new->r_start > ocfs2_resv_end(tmp)) {
			
 
				+			p = &(*p)->rb_right;
			
 
				+		} else {
			
 
				+			/* This should never happen! */
			
 
				+			mlog(ML_ERROR, "Duplicate reservation window!\n");
			
 
				+			BUG();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&new->r_node, parent, p);
			
 
				+	rb_insert_color(&new->r_node, root);
			
 
				+	new->r_flags |= OCFS2_RESV_FLAG_INUSE;
			
 
				+
			
 
				+	ocfs2_resv_mark_lru(resmap, new);
			
 
				+
			
 
				+	ocfs2_check_resmap(resmap);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_find_resv_lhs() - find the window which contains goal
			
 
				+ * @resmap: reservation map to search
			
 
				+ * @goal: which bit to search for
			
 
				+ *
			
 
				+ * If a window containing that goal is not found, we return the window
			
 
				+ * which comes before goal. Returns NULL on empty rbtree or no window
			
 
				+ * before goal.
			
 
				+ */
			
 
				+static struct ocfs2_alloc_reservation *
			
 
				+ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal)
			
 
				+{
			
 
				+	struct ocfs2_alloc_reservation *resv = NULL;
			
 
				+	struct ocfs2_alloc_reservation *prev_resv = NULL;
			
 
				+	struct rb_node *node = resmap->m_reservations.rb_node;
			
 
				+
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	if (!node)
			
 
				+		return NULL;
			
 
				+
			
 
				+	node = rb_first(&resmap->m_reservations);
			
 
				+	while (node) {
			
 
				+		resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
			
 
				+
			
 
				+		if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal)
			
 
				+			break;
			
 
				+
			
 
				+		/* Check if we overshot the reservation just before goal? */
			
 
				+		if (resv->r_start > goal) {
			
 
				+			resv = prev_resv;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		prev_resv = resv;
			
 
				+		node = rb_next(node);
			
 
				+	}
			
 
				+
			
 
				+	return resv;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * We are given a range within the bitmap, which corresponds to a gap
			
 
				+ * inside the reservations tree (search_start, search_len). The range
			
 
				+ * can be anything from the whole bitmap, to a gap between
			
 
				+ * reservations.
			
 
				+ *
			
 
				+ * The start value of *rstart is insignificant.
			
 
				+ *
			
 
				+ * This function searches the bitmap range starting at search_start
			
 
				+ * with length search_len for a set of contiguous free bits. We try
			
 
				+ * to find up to 'wanted' bits, but can sometimes return less.
			
 
				+ *
			
 
				+ * Returns the length of allocation, 0 if no free bits are found.
			
 
				+ *
			
 
				+ * *cstart and *clen will also be populated with the result.
			
 
				+ */
			
 
				+static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
			
 
				+				       unsigned int wanted,
			
 
				+				       unsigned int search_start,
			
 
				+				       unsigned int search_len,
			
 
				+				       unsigned int *rstart,
			
 
				+				       unsigned int *rlen)
			
 
				+{
			
 
				+	void *bitmap = resmap->m_disk_bitmap;
			
 
				+	unsigned int best_start, best_len = 0;
			
 
				+	int offset, start, found;
			
 
				+
			
 
				+	mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n",
			
 
				+	     wanted, search_start, search_len, resmap->m_bitmap_len);
			
 
				+
			
 
				+	found = best_start = best_len = 0;
			
 
				+
			
 
				+	start = search_start;
			
 
				+	while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
			
 
				+						 start)) != -1) {
			
 
				+		/* Search reached end of the region */
			
 
				+		if (offset >= (search_start + search_len))
			
 
				+			break;
			
 
				+
			
 
				+		if (offset == start) {
			
 
				+			/* we found a zero */
			
 
				+			found++;
			
 
				+			/* move start to the next bit to test */
			
 
				+			start++;
			
 
				+		} else {
			
 
				+			/* got a zero after some ones */
			
 
				+			found = 1;
			
 
				+			start = offset + 1;
			
 
				+		}
			
 
				+		if (found > best_len) {
			
 
				+			best_len = found;
			
 
				+			best_start = start - found;
			
 
				+		}
			
 
				+
			
 
				+		if (found >= wanted)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	if (best_len == 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	if (best_len >= wanted)
			
 
				+		best_len = wanted;
			
 
				+
			
 
				+	*rlen = best_len;
			
 
				+	*rstart = best_start;
			
 
				+
			
 
				+	mlog(0, "Found start: %u len: %u\n", best_start, best_len);
			
 
				+
			
 
				+	return *rlen;
			
 
				+}
			
 
				+
			
 
				+static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
			
 
				+				     struct ocfs2_alloc_reservation *resv,
			
 
				+				     unsigned int goal, unsigned int wanted)
			
 
				+{
			
 
				+	struct rb_root *root = &resmap->m_reservations;
			
 
				+	unsigned int gap_start, gap_end, gap_len;
			
 
				+	struct ocfs2_alloc_reservation *prev_resv, *next_resv;
			
 
				+	struct rb_node *prev, *next;
			
 
				+	unsigned int cstart, clen;
			
 
				+	unsigned int best_start = 0, best_len = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Nasty cases to consider:
			
 
				+	 *
			
 
				+	 * - rbtree is empty
			
 
				+	 * - our window should be first in all reservations
			
 
				+	 * - our window should be last in all reservations
			
 
				+	 * - need to make sure we don't go past end of bitmap
			
 
				+	 */
			
 
				+
			
 
				+	mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n",
			
 
				+	     resv->r_start, ocfs2_resv_end(resv), goal, wanted);
			
 
				+
			
 
				+	assert_spin_locked(&resv_lock);
			
 
				+
			
 
				+	if (RB_EMPTY_ROOT(root)) {
			
 
				+		/*
			
 
				+		 * Easiest case - empty tree. We can just take
			
 
				+		 * whatever window of free bits we want.
			
 
				+		 */
			
 
				+
			
 
				+		mlog(0, "Empty root\n");
			
 
				+
			
 
				+		clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
			
 
				+						   resmap->m_bitmap_len - goal,
			
 
				+						   &cstart, &clen);
			
 
				+
			
 
				+		/*
			
 
				+		 * This should never happen - the local alloc window
			
 
				+		 * will always have free bits when we're called.
			
 
				+		 */
			
 
				+		BUG_ON(goal == 0 && clen == 0);
			
 
				+
			
 
				+		if (clen == 0)
			
 
				+			return;
			
 
				+
			
 
				+		resv->r_start = cstart;
			
 
				+		resv->r_len = clen;
			
 
				+
			
 
				+		ocfs2_resv_insert(resmap, resv);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	prev_resv = ocfs2_find_resv_lhs(resmap, goal);
			
 
				+
			
 
				+	if (prev_resv == NULL) {
			
 
				+		mlog(0, "Goal on LHS of leftmost window\n");
			
 
				+
			
 
				+		/*
			
 
				+		 * A NULL here means that the search code couldn't
			
 
				+		 * find a window that starts before goal.
			
 
				+		 *
			
 
				+		 * However, we can take the first window after goal,
			
 
				+		 * which is also by definition, the leftmost window in
			
 
				+		 * the entire tree. If we can find free bits in the
			
 
				+		 * gap between goal and the LHS window, then the
			
 
				+		 * reservation can safely be placed there.
			
 
				+		 *
			
 
				+		 * Otherwise we fall back to a linear search, checking
			
 
				+		 * the gaps in between windows for a place to
			
 
				+		 * allocate.
			
 
				+		 */
			
 
				+
			
 
				+		next = rb_first(root);
			
 
				+		next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
			
 
				+				     r_node);
			
 
				+
			
 
				+		/*
			
 
				+		 * The search should never return such a window. (see
			
 
				+		 * comment above
			
 
				+		 */
			
 
				+		if (next_resv->r_start <= goal) {
			
 
				+			mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n",
			
 
				+			     goal, next_resv->r_start, next_resv->r_len);
			
 
				+			ocfs2_dump_resv(resmap);
			
 
				+			BUG();
			
 
				+		}
			
 
				+
			
 
				+		clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
			
 
				+						   next_resv->r_start - goal,
			
 
				+						   &cstart, &clen);
			
 
				+		if (clen) {
			
 
				+			best_len = clen;
			
 
				+			best_start = cstart;
			
 
				+			if (best_len == wanted)
			
 
				+				goto out_insert;
			
 
				+		}
			
 
				+
			
 
				+		prev_resv = next_resv;
			
 
				+		next_resv = NULL;
			
 
				+	}
			
 
				+
			
 
				+	prev = &prev_resv->r_node;
			
 
				+
			
 
				+	/* Now we do a linear search for a window, starting at 'prev_rsv' */
			
 
				+	while (1) {
			
 
				+		next = rb_next(prev);
			
 
				+		if (next) {
			
 
				+			mlog(0, "One more resv found in linear search\n");
			
 
				+			next_resv = rb_entry(next,
			
 
				+					     struct ocfs2_alloc_reservation,
			
 
				+					     r_node);
			
 
				+
			
 
				+			gap_start = ocfs2_resv_end(prev_resv) + 1;
			
 
				+			gap_end = next_resv->r_start - 1;
			
 
				+			gap_len = gap_end - gap_start + 1;
			
 
				+		} else {
			
 
				+			mlog(0, "No next node\n");
			
 
				+			/*
			
 
				+			 * We're at the rightmost edge of the
			
 
				+			 * tree. See if a reservation between this
			
 
				+			 * window and the end of the bitmap will work.
			
 
				+			 */
			
 
				+			gap_start = ocfs2_resv_end(prev_resv) + 1;
			
 
				+			gap_len = resmap->m_bitmap_len - gap_start;
			
 
				+			gap_end = resmap->m_bitmap_len - 1;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * No need to check this gap if we have already found
			
 
				+		 * a larger region of free bits.
			
 
				+		 */
			
 
				+		if (gap_len <= best_len)
			
 
				+			goto next_resv;
			
 
				+
			
 
				+		clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start,
			
 
				+						   gap_len, &cstart, &clen);
			
 
				+		if (clen == wanted) {
			
 
				+			best_len = clen;
			
 
				+			best_start = cstart;
			
 
				+			goto out_insert;
			
 
				+		} else if (clen > best_len) {
			
 
				+			best_len = clen;
			
 
				+			best_start = cstart;
			
 
				+		}
			
 
				+
			
 
				+next_resv:
			
 
				+		if (!next)
			
 
				+			break;
			
 
				+
			
 
				+		prev = next;
			
 
				+		prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
			
 
				+				     r_node);
			
 
				+	}
			
 
				+
			
 
				+out_insert:
			
 
				+	if (best_len) {
			
 
				+		resv->r_start = best_start;
			
 
				+		resv->r_len = best_len;
			
 
				+		ocfs2_resv_insert(resmap, resv);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
			
 
				+				   struct ocfs2_alloc_reservation *resv,
			
 
				+				   unsigned int wanted)
			
 
				+{
			
 
				+	struct ocfs2_alloc_reservation *lru_resv;
			
 
				+	int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP);
			
 
				+	unsigned int min_bits;
			
 
				+
			
 
				+	if (!tmpwindow)
			
 
				+		min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1;
			
 
				+	else
			
 
				+		min_bits = wanted; /* We at know the temp window will use all
			
 
				+				    * of these bits */
			
 
				+
			
 
				+	/*
			
 
				+	 * Take the first reservation off the LRU as our 'target'. We
			
 
				+	 * don't try to be smart about it. There might be a case for
			
 
				+	 * searching based on size but I don't have enough data to be
			
 
				+	 * sure. --Mark (3/16/2010)
			
 
				+	 */
			
 
				+	lru_resv = list_first_entry(&resmap->m_lru,
			
 
				+				    struct ocfs2_alloc_reservation, r_lru);
			
 
				+
			
 
				+	mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start,
			
 
				+	     lru_resv->r_len, ocfs2_resv_end(lru_resv));
			
 
				+
			
 
				+	/*
			
 
				+	 * Cannibalize (some or all) of the target reservation and
			
 
				+	 * feed it to the current window.
			
 
				+	 */
			
 
				+	if (lru_resv->r_len <= min_bits) {
			
 
				+		/*
			
 
				+		 * Discard completely if size is less than or equal to a
			
 
				+		 * reasonable threshold - 50% of window bits for non temporary
			
 
				+		 * windows.
			
 
				+		 */
			
 
				+		resv->r_start = lru_resv->r_start;
			
 
				+		resv->r_len = lru_resv->r_len;
			
 
				+
			
 
				+		__ocfs2_resv_discard(resmap, lru_resv);
			
 
				+	} else {
			
 
				+		unsigned int shrink;
			
 
				+		if (tmpwindow)
			
 
				+			shrink = min_bits;
			
 
				+		else
			
 
				+			shrink = lru_resv->r_len / 2;
			
 
				+
			
 
				+		lru_resv->r_len -= shrink;
			
 
				+
			
 
				+		resv->r_start = ocfs2_resv_end(lru_resv) + 1;
			
 
				+		resv->r_len = shrink;
			
 
				+	}
			
 
				+
			
 
				+	mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
			
 
				+	     "r_len: %u r_last_start: %u r_last_len: %u\n",
			
 
				+	     resv->r_start, ocfs2_resv_end(resv), resv->r_len,
			
 
				+	     resv->r_last_start, resv->r_last_len);
			
 
				+
			
 
				+	ocfs2_resv_insert(resmap, resv);
			
 
				+}
			
 
				+
			
 
				+static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
			
 
				+				   struct ocfs2_alloc_reservation *resv,
			
 
				+				   unsigned int wanted)
			
 
				+{
			
 
				+	unsigned int goal = 0;
			
 
				+
			
 
				+	BUG_ON(!ocfs2_resv_empty(resv));
			
 
				+
			
 
				+	/*
			
 
				+	 * Begin by trying to get a window as close to the previous
			
 
				+	 * one as possible. Using the most recent allocation as a
			
 
				+	 * start goal makes sense.
			
 
				+	 */
			
 
				+	if (resv->r_last_len) {
			
 
				+		goal = resv->r_last_start + resv->r_last_len;
			
 
				+		if (goal >= resmap->m_bitmap_len)
			
 
				+			goal = 0;
			
 
				+	}
			
 
				+
			
 
				+	__ocfs2_resv_find_window(resmap, resv, goal, wanted);
			
 
				+
			
 
				+	/* Search from last alloc didn't work, try once more from beginning. */
			
 
				+	if (ocfs2_resv_empty(resv) && goal != 0)
			
 
				+		__ocfs2_resv_find_window(resmap, resv, 0, wanted);
			
 
				+
			
 
				+	if (ocfs2_resv_empty(resv)) {
			
 
				+		/*
			
 
				+		 * Still empty? Pull oldest one off the LRU, remove it from
			
 
				+		 * tree, put this one in it's place.
			
 
				+		 */
			
 
				+		ocfs2_cannibalize_resv(resmap, resv, wanted);
			
 
				+	}
			
 
				+
			
 
				+	BUG_ON(ocfs2_resv_empty(resv));
			
 
				+}
			
 
				+
			
 
				+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
			
 
				+			   struct ocfs2_alloc_reservation *resv,
			
 
				+			   int *cstart, int *clen)
			
 
				+{
			
 
				+	unsigned int wanted = *clen;
			
 
				+
			
 
				+	if (resv == NULL || ocfs2_resmap_disabled(resmap))
			
 
				+		return -ENOSPC;
			
 
				+
			
 
				+	spin_lock(&resv_lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * We don't want to over-allocate for temporary
			
 
				+	 * windows. Otherwise, we run the risk of fragmenting the
			
 
				+	 * allocation space.
			
 
				+	 */
			
 
				+	wanted = ocfs2_resv_window_bits(resmap, resv);
			
 
				+	if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
			
 
				+		wanted = *clen;
			
 
				+
			
 
				+	if (ocfs2_resv_empty(resv)) {
			
 
				+		mlog(0, "empty reservation, find new window\n");
			
 
				+
			
 
				+		/*
			
 
				+		 * Try to get a window here. If it works, we must fall
			
 
				+		 * through and test the bitmap . This avoids some
			
 
				+		 * ping-ponging of windows due to non-reserved space
			
 
				+		 * being allocation before we initialize a window for
			
 
				+		 * that inode.
			
 
				+		 */
			
 
				+		ocfs2_resv_find_window(resmap, resv, wanted);
			
 
				+	}
			
 
				+
			
 
				+	BUG_ON(ocfs2_resv_empty(resv));
			
 
				+
			
 
				+	*cstart = resv->r_start;
			
 
				+	*clen = resv->r_len;
			
 
				+
			
 
				+	spin_unlock(&resv_lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void
			
 
				+	ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap,
			
 
				+				     struct ocfs2_alloc_reservation *resv,
			
 
				+				     unsigned int start, unsigned int end)
			
 
				+{
			
 
				+	unsigned int rhs = 0;
			
 
				+	unsigned int old_end = ocfs2_resv_end(resv);
			
 
				+
			
 
				+	BUG_ON(start != resv->r_start || old_end < end);
			
 
				+
			
 
				+	/*
			
 
				+	 * Completely used? We can remove it then.
			
 
				+	 */
			
 
				+	if (old_end == end) {
			
 
				+		__ocfs2_resv_discard(resmap, resv);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	rhs = old_end - end;
			
 
				+
			
 
				+	/*
			
 
				+	 * This should have been trapped above.
			
 
				+	 */
			
 
				+	BUG_ON(rhs == 0);
			
 
				+
			
 
				+	resv->r_start = end + 1;
			
 
				+	resv->r_len = old_end - resv->r_start + 1;
			
 
				+}
			
 
				+
			
 
				+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
			
 
				+			       struct ocfs2_alloc_reservation *resv,
			
 
				+			       u32 cstart, u32 clen)
			
 
				+{
			
 
				+	unsigned int cend = cstart + clen - 1;
			
 
				+
			
 
				+	if (resmap == NULL || ocfs2_resmap_disabled(resmap))
			
 
				+		return;
			
 
				+
			
 
				+	if (resv == NULL)
			
 
				+		return;
			
 
				+
			
 
				+	BUG_ON(cstart != resv->r_start);
			
 
				+
			
 
				+	spin_lock(&resv_lock);
			
 
				+
			
 
				+	mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
			
 
				+	     "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
			
 
				+	     cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
			
 
				+	     resv->r_len, resv->r_last_start, resv->r_last_len);
			
 
				+
			
 
				+	BUG_ON(cstart < resv->r_start);
			
 
				+	BUG_ON(cstart > ocfs2_resv_end(resv));
			
 
				+	BUG_ON(cend > ocfs2_resv_end(resv));
			
 
				+
			
 
				+	ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend);
			
 
				+	resv->r_last_start = cstart;
			
 
				+	resv->r_last_len = clen;
			
 
				+
			
 
				+	/*
			
 
				+	 * May have been discarded above from
			
 
				+	 * ocfs2_adjust_resv_from_alloc().
			
 
				+	 */
			
 
				+	if (!ocfs2_resv_empty(resv))
			
 
				+		ocfs2_resv_mark_lru(resmap, resv);
			
 
				+
			
 
				+	mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
			
 
				+	     "r_len: %u r_last_start: %u r_last_len: %u\n",
			
 
				+	     resv->r_start, ocfs2_resv_end(resv), resv->r_len,
			
 
				+	     resv->r_last_start, resv->r_last_len);
			
 
				+
			
 
				+	ocfs2_check_resmap(resmap);
			
 
				+
			
 
				+	spin_unlock(&resv_lock);
			
 
				+}
			
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,159 @@
 
				+/* -*- mode: c; c-basic-offset: 8; -*-
			
 
				+ * vim: noexpandtab sw=8 ts=8 sts=0:
			
 
				+ *
			
 
				+ * reservations.h
			
 
				+ *
			
 
				+ * Allocation reservations function prototypes and structures.
			
 
				+ *
			
 
				+ * Copyright (C) 2010 Novell.  All rights reserved.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public
			
 
				+ * License version 2 as published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * General Public License for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef	OCFS2_RESERVATIONS_H
			
 
				+#define	OCFS2_RESERVATIONS_H
			
 
				+
			
 
				+#include <linux/rbtree.h>
			
 
				+
			
 
				+#define OCFS2_DEFAULT_RESV_LEVEL	2
			
 
				+#define OCFS2_MAX_RESV_LEVEL	9
			
 
				+#define OCFS2_MIN_RESV_LEVEL	0
			
 
				+
			
 
				+struct ocfs2_alloc_reservation {
			
 
				+	struct rb_node	r_node;
			
 
				+
			
 
				+	unsigned int	r_start;	/* Begining of current window */
			
 
				+	unsigned int	r_len;		/* Length of the window */
			
 
				+
			
 
				+	unsigned int	r_last_len;	/* Length of most recent alloc */
			
 
				+	unsigned int	r_last_start;	/* Start of most recent alloc */
			
 
				+	struct list_head	r_lru;	/* LRU list head */
			
 
				+
			
 
				+	unsigned int	r_flags;
			
 
				+};
			
 
				+
			
 
				+#define	OCFS2_RESV_FLAG_INUSE	0x01	/* Set when r_node is part of a btree */
			
 
				+#define	OCFS2_RESV_FLAG_TMP	0x02	/* Temporary reservation, will be
			
 
				+					 * destroyed immedately after use */
			
 
				+#define	OCFS2_RESV_FLAG_DIR	0x04	/* Reservation is for an unindexed
			
 
				+					 * directory btree */
			
 
				+
			
 
				+struct ocfs2_reservation_map {
			
 
				+	struct rb_root		m_reservations;
			
 
				+	char			*m_disk_bitmap;
			
 
				+
			
 
				+	struct ocfs2_super	*m_osb;
			
 
				+
			
 
				+	/* The following are not initialized to meaningful values until a disk
			
 
				+	 * bitmap is provided. */
			
 
				+	u32			m_bitmap_len;	/* Number of valid
			
 
				+						 * bits available */
			
 
				+
			
 
				+	struct list_head	m_lru;		/* LRU of reservations
			
 
				+						 * structures. */
			
 
				+
			
 
				+};
			
 
				+
			
 
				+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
			
 
				+
			
 
				+#define OCFS2_RESV_TYPES	(OCFS2_RESV_FLAG_TMP|OCFS2_RESV_FLAG_DIR)
			
 
				+void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
			
 
				+			 unsigned int flags);
			
 
				+
			
 
				+int ocfs2_dir_resv_allowed(struct ocfs2_super *osb);
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resv_discard() - truncate a reservation
			
 
				+ * @resmap:
			
 
				+ * @resv: the reservation to truncate.
			
 
				+ *
			
 
				+ * After this function is called, the reservation will be empty, and
			
 
				+ * unlinked from the rbtree.
			
 
				+ */
			
 
				+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
			
 
				+			struct ocfs2_alloc_reservation *resv);
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
			
 
				+ * @resmap: struct ocfs2_reservation_map to initialize
			
 
				+ * @obj: unused for now
			
 
				+ * @ops: unused for now
			
 
				+ * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
			
 
				+ *
			
 
				+ * Only possible return value other than '0' is -ENOMEM for failure to
			
 
				+ * allocation mirror bitmap.
			
 
				+ */
			
 
				+int ocfs2_resmap_init(struct ocfs2_super *osb,
			
 
				+		      struct ocfs2_reservation_map *resmap);
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resmap_restart() - "restart" a reservation bitmap
			
 
				+ * @resmap: reservations bitmap
			
 
				+ * @clen: Number of valid bits in the bitmap
			
 
				+ * @disk_bitmap: the disk bitmap this resmap should refer to.
			
 
				+ *
			
 
				+ * Re-initialize the parameters of a reservation bitmap. This is
			
 
				+ * useful for local alloc window slides.
			
 
				+ *
			
 
				+ * This function will call ocfs2_trunc_resv against all existing
			
 
				+ * reservations. A future version will recalculate existing
			
 
				+ * reservations based on the new bitmap.
			
 
				+ */
			
 
				+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
			
 
				+			  unsigned int clen, char *disk_bitmap);
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
			
 
				+ * @resmap: the struct ocfs2_reservation_map to uninitialize
			
 
				+ */
			
 
				+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
			
 
				+ * @resmap: reservations bitmap
			
 
				+ * @resv: reservation to base search from
			
 
				+ * @cstart: start of proposed allocation
			
 
				+ * @clen: length (in clusters) of proposed allocation
			
 
				+ *
			
 
				+ * Using the reservation data from resv, this function will compare
			
 
				+ * resmap and resmap->m_disk_bitmap to determine what part (if any) of
			
 
				+ * the reservation window is still clear to use. If resv is empty,
			
 
				+ * this function will try to allocate a window for it.
			
 
				+ *
			
 
				+ * On success, zero is returned and the valid allocation area is set in cstart
			
 
				+ * and clen.
			
 
				+ *
			
 
				+ * Returns -ENOSPC if reservations are disabled.
			
 
				+ */
			
 
				+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
			
 
				+			   struct ocfs2_alloc_reservation *resv,
			
 
				+			   int *cstart, int *clen);
			
 
				+
			
 
				+/**
			
 
				+ * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
			
 
				+ * @resmap: reservations bitmap
			
 
				+ * @resv: optional reservation to recalulate based on new bitmap
			
 
				+ * @cstart: start of allocation in clusters
			
 
				+ * @clen: end of allocation in clusters.
			
 
				+ *
			
 
				+ * Tell the reservation code that bits were used to fulfill allocation in
			
 
				+ * resmap. The bits don't have to have been part of any existing
			
 
				+ * reservation. But we must always call this function when bits are claimed.
			
 
				+ * Internally, the reservations code will use this information to mark the
			
 
				+ * reservations bitmap. If resv is passed, it's next allocation window will be
			
 
				+ * calculated. It also expects that 'cstart' is the same as we passed back
			
 
				+ * from ocfs2_resmap_resv_bits().
			
 
				+ */
			
 
				+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
			
 
				+			       struct ocfs2_alloc_reservation *resv,
			
 
				+			       u32 cstart, u32 clen);
			
 
				+
			
 
				+#endif	/* OCFS2_RESERVATIONS_H */
			
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -134,11 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
 
				 		le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, group_bh);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_rollback;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, group_bh);
			
 
				 
			
 
				 	/* update the inode accordingly. */
			
 
				 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
			
@@ -319,7 +315,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
 
				 	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
			
 
				 
			
 
				 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
			
 
				-				 ocfs2_group_bitmap_size(osb->sb) * 8) {
			
 
				+		ocfs2_group_bitmap_size(osb->sb, 0,
			
 
				+					osb->s_feature_incompat) * 8) {
			
 
				 		mlog(ML_ERROR, "The disk is too old and small. "
			
 
				 		     "Force to do offline resize.");
			
 
				 		ret = -EINVAL;
			
@@ -500,7 +497,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 
				 	fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
			
 
				 
			
 
				 	if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
			
 
				-				 ocfs2_group_bitmap_size(osb->sb) * 8) {
			
 
				+		ocfs2_group_bitmap_size(osb->sb, 0,
			
 
				+					osb->s_feature_incompat) * 8) {
			
 
				 		mlog(ML_ERROR, "The disk is too old and small."
			
 
				 		     " Force to do offline resize.");
			
 
				 		ret = -EINVAL;
			
@@ -545,12 +543,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
 
				 
			
 
				 	group = (struct ocfs2_group_desc *)group_bh->b_data;
			
 
				 	group->bg_next_group = cr->c_blkno;
			
 
				-
			
 
				-	ret = ocfs2_journal_dirty(handle, group_bh);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, group_bh);
			
 
				 
			
 
				 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
			
 
				 				      main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
			
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -26,13 +26,14 @@
 
				 #ifndef _CHAINALLOC_H_
			
 
				 #define _CHAINALLOC_H_
			
 
				 
			
 
				+struct ocfs2_suballoc_result;
			
 
				 typedef int (group_search_t)(struct inode *,
			
 
				 			     struct buffer_head *,
			
 
				 			     u32,			/* bits_wanted */
			
 
				 			     u32,			/* min_bits */
			
 
				 			     u64,			/* max_block */
			
 
				-			     u16 *,			/* *bit_off */
			
 
				-			     u16 *);			/* *bits_found */
			
 
				+			     struct ocfs2_suballoc_result *);
			
 
				+							/* found bits */
			
 
				 
			
 
				 struct ocfs2_alloc_context {
			
 
				 	struct inode *ac_inode;    /* which bitmap are we allocating from? */
			
@@ -54,6 +55,8 @@ struct ocfs2_alloc_context {
 
				 	u64    ac_last_group;
			
 
				 	u64    ac_max_block;  /* Highest block number to allocate. 0 is
			
 
				 				 is the same as ~0 - unlimited */
			
 
				+
			
 
				+	struct ocfs2_alloc_reservation	*ac_resv;
			
 
				 };
			
 
				 
			
 
				 void ocfs2_init_steal_slots(struct ocfs2_super *osb);
			
@@ -80,22 +83,21 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
 
				 			   u32 bits_wanted,
			
 
				 			   struct ocfs2_alloc_context **ac);
			
 
				 
			
 
				-int ocfs2_claim_metadata(struct ocfs2_super *osb,
			
 
				-			 handle_t *handle,
			
 
				+int ocfs2_claim_metadata(handle_t *handle,
			
 
				 			 struct ocfs2_alloc_context *ac,
			
 
				 			 u32 bits_wanted,
			
 
				+			 u64 *suballoc_loc,
			
 
				 			 u16 *suballoc_bit_start,
			
 
				 			 u32 *num_bits,
			
 
				 			 u64 *blkno_start);
			
 
				-int ocfs2_claim_new_inode(struct ocfs2_super *osb,
			
 
				-			  handle_t *handle,
			
 
				+int ocfs2_claim_new_inode(handle_t *handle,
			
 
				 			  struct inode *dir,
			
 
				 			  struct buffer_head *parent_fe_bh,
			
 
				 			  struct ocfs2_alloc_context *ac,
			
 
				+			  u64 *suballoc_loc,
			
 
				 			  u16 *suballoc_bit,
			
 
				 			  u64 *fe_blkno);
			
 
				-int ocfs2_claim_clusters(struct ocfs2_super *osb,
			
 
				-			 handle_t *handle,
			
 
				+int ocfs2_claim_clusters(handle_t *handle,
			
 
				 			 struct ocfs2_alloc_context *ac,
			
 
				 			 u32 min_clusters,
			
 
				 			 u32 *cluster_start,
			
@@ -104,8 +106,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 
				  * Use this variant of ocfs2_claim_clusters to specify a maxiumum
			
 
				  * number of clusters smaller than the allocation reserved.
			
 
				  */
			
 
				-int __ocfs2_claim_clusters(struct ocfs2_super *osb,
			
 
				-			   handle_t *handle,
			
 
				+int __ocfs2_claim_clusters(handle_t *handle,
			
 
				 			   struct ocfs2_alloc_context *ac,
			
 
				 			   u32 min_clusters,
			
 
				 			   u32 max_clusters,
			
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -94,7 +94,9 @@ struct mount_options
 
				 	unsigned long	mount_opt;
			
 
				 	unsigned int	atime_quantum;
			
 
				 	signed short	slot;
			
 
				-	unsigned int	localalloc_opt;
			
 
				+	int		localalloc_opt;
			
 
				+	unsigned int	resv_level;
			
 
				+	int		dir_resv_level;
			
 
				 	char		cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
			
 
				 };
			
 
				 
			
@@ -176,6 +178,8 @@ enum {
 
				 	Opt_noacl,
			
 
				 	Opt_usrquota,
			
 
				 	Opt_grpquota,
			
 
				+	Opt_resv_level,
			
 
				+	Opt_dir_resv_level,
			
 
				 	Opt_err,
			
 
				 };
			
 
				 
			
@@ -202,6 +206,8 @@ static const match_table_t tokens = {
 
				 	{Opt_noacl, "noacl"},
			
 
				 	{Opt_usrquota, "usrquota"},
			
 
				 	{Opt_grpquota, "grpquota"},
			
 
				+	{Opt_resv_level, "resv_level=%u"},
			
 
				+	{Opt_dir_resv_level, "dir_resv_level=%u"},
			
 
				 	{Opt_err, NULL}
			
 
				 };
			
 
				 
			
@@ -1028,8 +1034,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 
				 	osb->s_atime_quantum = parsed_options.atime_quantum;
			
 
				 	osb->preferred_slot = parsed_options.slot;
			
 
				 	osb->osb_commit_interval = parsed_options.commit_interval;
			
 
				-	osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
			
 
				-	osb->local_alloc_bits = osb->local_alloc_default_bits;
			
 
				+
			
 
				+	ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
			
 
				+	osb->osb_resv_level = parsed_options.resv_level;
			
 
				+	osb->osb_dir_resv_level = parsed_options.resv_level;
			
 
				+	if (parsed_options.dir_resv_level == -1)
			
 
				+		osb->osb_dir_resv_level = parsed_options.resv_level;
			
 
				+	else
			
 
				+		osb->osb_dir_resv_level = parsed_options.dir_resv_level;
			
 
				 
			
 
				 	status = ocfs2_verify_userspace_stack(osb, &parsed_options);
			
 
				 	if (status)
			
@@ -1285,11 +1297,13 @@ static int ocfs2_parse_options(struct super_block *sb,
 
				 		   options ? options : "(none)");
			
 
				 
			
 
				 	mopt->commit_interval = 0;
			
 
				-	mopt->mount_opt = 0;
			
 
				+	mopt->mount_opt = OCFS2_MOUNT_NOINTR;
			
 
				 	mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
			
 
				 	mopt->slot = OCFS2_INVALID_SLOT;
			
 
				-	mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
			
 
				+	mopt->localalloc_opt = -1;
			
 
				 	mopt->cluster_stack[0] = '\0';
			
 
				+	mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
			
 
				+	mopt->dir_resv_level = -1;
			
 
				 
			
 
				 	if (!options) {
			
 
				 		status = 1;
			
@@ -1380,7 +1394,7 @@ static int ocfs2_parse_options(struct super_block *sb,
 
				 				status = 0;
			
 
				 				goto bail;
			
 
				 			}
			
 
				-			if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8))
			
 
				+			if (option >= 0)
			
 
				 				mopt->localalloc_opt = option;
			
 
				 			break;
			
 
				 		case Opt_localflocks:
			
@@ -1433,6 +1447,28 @@ static int ocfs2_parse_options(struct super_block *sb,
 
				 			mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
			
 
				 			mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
			
 
				 			break;
			
 
				+		case Opt_resv_level:
			
 
				+			if (is_remount)
			
 
				+				break;
			
 
				+			if (match_int(&args[0], &option)) {
			
 
				+				status = 0;
			
 
				+				goto bail;
			
 
				+			}
			
 
				+			if (option >= OCFS2_MIN_RESV_LEVEL &&
			
 
				+			    option < OCFS2_MAX_RESV_LEVEL)
			
 
				+				mopt->resv_level = option;
			
 
				+			break;
			
 
				+		case Opt_dir_resv_level:
			
 
				+			if (is_remount)
			
 
				+				break;
			
 
				+			if (match_int(&args[0], &option)) {
			
 
				+				status = 0;
			
 
				+				goto bail;
			
 
				+			}
			
 
				+			if (option >= OCFS2_MIN_RESV_LEVEL &&
			
 
				+			    option < OCFS2_MAX_RESV_LEVEL)
			
 
				+				mopt->dir_resv_level = option;
			
 
				+			break;
			
 
				 		default:
			
 
				 			mlog(ML_ERROR,
			
 
				 			     "Unrecognized mount option \"%s\" "
			
@@ -1487,7 +1523,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 
				 			   (unsigned) (osb->osb_commit_interval / HZ));
			
 
				 
			
 
				 	local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
			
 
				-	if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
			
 
				+	if (local_alloc_megs != ocfs2_la_default_mb(osb))
			
 
				 		seq_printf(s, ",localalloc=%d", local_alloc_megs);
			
 
				 
			
 
				 	if (opts & OCFS2_MOUNT_LOCALFLOCKS)
			
@@ -1514,6 +1550,12 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 
				 	else
			
 
				 		seq_printf(s, ",noacl");
			
 
				 
			
 
				+	if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
			
 
				+		seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
			
 
				+
			
 
				+	if (osb->osb_dir_resv_level != osb->osb_resv_level)
			
 
				+		seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1688,6 +1730,8 @@ static void ocfs2_inode_init_once(void *data)
 
				 	oi->ip_blkno = 0ULL;
			
 
				 	oi->ip_clusters = 0;
			
 
				 
			
 
				+	ocfs2_resv_init_once(&oi->ip_la_data_resv);
			
 
				+
			
 
				 	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
			
 
				 	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
			
 
				 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
			
@@ -2042,6 +2086,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 
			
 
				 	init_waitqueue_head(&osb->osb_mount_event);
			
 
				 
			
 
				+	status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
			
 
				+	if (status) {
			
 
				+		mlog_errno(status);
			
 
				+		goto bail;
			
 
				+	}
			
 
				+
			
 
				 	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
			
 
				 	if (!osb->vol_label) {
			
 
				 		mlog(ML_ERROR, "unable to alloc vol label\n");
			
@@ -2224,9 +2274,11 @@ static int ocfs2_initialize_super(struct super_block *sb,
 
				 	}
			
 
				 
			
 
				 	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
			
 
				+	osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
			
 
				 	iput(inode);
			
 
				 
			
 
				-	osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8;
			
 
				+	osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0,
			
 
				+				 osb->s_feature_incompat) * 8;
			
 
				 
			
 
				 	status = ocfs2_init_slot_info(osb);
			
 
				 	if (status < 0) {
			
@@ -2509,5 +2561,25 @@ void __ocfs2_abort(struct super_block* sb,
 
				 	ocfs2_handle_error(sb);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Void signal blockers, because in-kernel sigprocmask() only fails
			
 
				+ * when SIG_* is wrong.
			
 
				+ */
			
 
				+void ocfs2_block_signals(sigset_t *oldset)
			
 
				+{
			
 
				+	int rc;
			
 
				+	sigset_t blocked;
			
 
				+
			
 
				+	sigfillset(&blocked);
			
 
				+	rc = sigprocmask(SIG_BLOCK, &blocked, oldset);
			
 
				+	BUG_ON(rc);
			
 
				+}
			
 
				+
			
 
				+void ocfs2_unblock_signals(sigset_t *oldset)
			
 
				+{
			
 
				+	int rc = sigprocmask(SIG_SETMASK, oldset, NULL);
			
 
				+	BUG_ON(rc);
			
 
				+}
			
 
				+
			
 
				 module_init(ocfs2_init);
			
 
				 module_exit(ocfs2_exit);
			
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,4 +45,11 @@ void __ocfs2_abort(struct super_block *sb,
 
				 
			
 
				 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
			
 
				 
			
 
				+/*
			
 
				+ * Void signal blockers, because in-kernel sigprocmask() only fails
			
 
				+ * when SIG_* is wrong.
			
 
				+ */
			
 
				+void ocfs2_block_signals(sigset_t *oldset);
			
 
				+void ocfs2_unblock_signals(sigset_t *oldset);
			
 
				+
			
 
				 #endif /* OCFS2_SUPER_H */
			
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -79,6 +79,7 @@ struct ocfs2_xattr_set_ctxt {
 
				 	struct ocfs2_alloc_context *meta_ac;
			
 
				 	struct ocfs2_alloc_context *data_ac;
			
 
				 	struct ocfs2_cached_dealloc_ctxt dealloc;
			
 
				+	int set_abort;
			
 
				 };
			
 
				 
			
 
				 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
			
@@ -739,11 +740,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	status = ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				 
			
 
				 	clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
			
 
				 
			
@@ -786,12 +783,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
			
 
				-
			
 
				-	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				 
			
 
				 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
			
 
				 		ret = ocfs2_decrease_refcount(inode, handle,
			
@@ -1374,11 +1366,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
 
				 				memset(bh->b_data + cp_len, 0,
			
 
				 				       blocksize - cp_len);
			
 
				 
			
 
				-			ret = ocfs2_journal_dirty(handle, bh);
			
 
				-			if (ret < 0) {
			
 
				-				mlog_errno(ret);
			
 
				-				goto out;
			
 
				-			}
			
 
				+			ocfs2_journal_dirty(handle, bh);
			
 
				 			brelse(bh);
			
 
				 			bh = NULL;
			
 
				 
			
@@ -2148,15 +2136,19 @@ alloc_value:
 
				 		orig_clusters = ocfs2_xa_value_clusters(loc);
			
 
				 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
			
 
				 		if (rc < 0) {
			
 
				-			/*
			
 
				-			 * If we tried to grow an existing external value,
			
 
				-			 * ocfs2_xa_cleanuP-value_truncate() is going to
			
 
				-			 * let it stand.  We have to restore its original
			
 
				-			 * value size.
			
 
				-			 */
			
 
				-			loc->xl_entry->xe_value_size = orig_value_size;
			
 
				+			ctxt->set_abort = 1;
			
 
				 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
			
 
				 							orig_clusters);
			
 
				+			/*
			
 
				+			 * If we were growing an existing value,
			
 
				+			 * ocfs2_xa_cleanup_value_truncate() won't remove
			
 
				+			 * the entry. We need to restore the original value
			
 
				+			 * size.
			
 
				+			 */
			
 
				+			if (loc->xl_entry) {
			
 
				+				BUG_ON(!orig_value_size);
			
 
				+				loc->xl_entry->xe_value_size = orig_value_size;
			
 
				+			}
			
 
				 			mlog_errno(rc);
			
 
				 		}
			
 
				 	}
			
@@ -2479,7 +2471,10 @@ static int ocfs2_xattr_free_block(struct inode *inode,
 
				 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
			
 
				 	blk = le64_to_cpu(xb->xb_blkno);
			
 
				 	bit = le16_to_cpu(xb->xb_suballoc_bit);
			
 
				-	bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				+	if (xb->xb_suballoc_loc)
			
 
				+		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
			
 
				+	else
			
 
				+		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
			
 
				 
			
 
				 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
			
 
				 				EXTENT_ALLOC_SYSTEM_INODE,
			
@@ -2594,9 +2589,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
 
				 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
			
 
				 	spin_unlock(&oi->ip_lock);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, di_bh);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, di_bh);
			
 
				 out_commit:
			
 
				 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
			
 
				 out:
			
@@ -2724,9 +2717,7 @@ static int ocfs2_xattr_ibody_init(struct inode *inode,
 
				 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
			
 
				 	spin_unlock(&oi->ip_lock);
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(ctxt->handle, di_bh);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(ctxt->handle, di_bh);
			
 
				 
			
 
				 out:
			
 
				 	return ret;
			
@@ -2846,9 +2837,8 @@ static int ocfs2_create_xattr_block(struct inode *inode,
 
				 	int ret;
			
 
				 	u16 suballoc_bit_start;
			
 
				 	u32 num_got;
			
 
				-	u64 first_blkno;
			
 
				+	u64 suballoc_loc, first_blkno;
			
 
				 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 	struct buffer_head *new_bh = NULL;
			
 
				 	struct ocfs2_xattr_block *xblk;
			
 
				 
			
@@ -2859,9 +2849,9 @@ static int ocfs2_create_xattr_block(struct inode *inode,
 
				 		goto end;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1,
			
 
				-				   &suballoc_bit_start, &num_got,
			
 
				-				   &first_blkno);
			
 
				+	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
			
 
				+				   &suballoc_loc, &suballoc_bit_start,
			
 
				+				   &num_got, &first_blkno);
			
 
				 	if (ret < 0) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto end;
			
@@ -2883,8 +2873,10 @@ static int ocfs2_create_xattr_block(struct inode *inode,
 
				 	memset(xblk, 0, inode->i_sb->s_blocksize);
			
 
				 	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
			
 
				 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
			
 
				+	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
			
 
				 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
			
 
				-	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
			
 
				+	xblk->xb_fs_generation =
			
 
				+		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
			
 
				 	xblk->xb_blkno = cpu_to_le64(first_blkno);
			
 
				 	if (indexed) {
			
 
				 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
			
@@ -2956,7 +2948,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
 
				 		ret = ocfs2_xa_set(&loc, xi, ctxt);
			
 
				 		if (!ret)
			
 
				 			xs->here = loc.xl_entry;
			
 
				-		else if (ret != -ENOSPC)
			
 
				+		else if ((ret != -ENOSPC) || ctxt->set_abort)
			
 
				 			goto end;
			
 
				 		else {
			
 
				 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
			
@@ -3312,14 +3304,13 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
				 				goto out;
			
 
				 			}
			
 
				 
			
 
				-			ret = ocfs2_extend_trans(ctxt->handle, credits +
			
 
				-					ctxt->handle->h_buffer_credits);
			
 
				+			ret = ocfs2_extend_trans(ctxt->handle, credits);
			
 
				 			if (ret) {
			
 
				 				mlog_errno(ret);
			
 
				 				goto out;
			
 
				 			}
			
 
				 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
			
 
				-		} else if (ret == -ENOSPC) {
			
 
				+		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
			
 
				 			if (di->i_xattr_loc && !xbs->xattr_bh) {
			
 
				 				ret = ocfs2_xattr_block_find(inode,
			
 
				 							     xi->xi_name_index,
			
@@ -3343,8 +3334,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
				 					goto out;
			
 
				 				}
			
 
				 
			
 
				-				ret = ocfs2_extend_trans(ctxt->handle, credits +
			
 
				-					ctxt->handle->h_buffer_credits);
			
 
				+				ret = ocfs2_extend_trans(ctxt->handle, credits);
			
 
				 				if (ret) {
			
 
				 					mlog_errno(ret);
			
 
				 					goto out;
			
@@ -3378,8 +3368,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
 
				 					goto out;
			
 
				 				}
			
 
				 
			
 
				-				ret = ocfs2_extend_trans(ctxt->handle, credits +
			
 
				-						ctxt->handle->h_buffer_credits);
			
 
				+				ret = ocfs2_extend_trans(ctxt->handle, credits);
			
 
				 				if (ret) {
			
 
				 					mlog_errno(ret);
			
 
				 					goto out;
			
@@ -4249,7 +4238,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 
				 	u32 bit_off, len;
			
 
				 	u64 blkno;
			
 
				 	handle_t *handle = ctxt->handle;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				 	struct buffer_head *xb_bh = xs->xattr_bh;
			
 
				 	struct ocfs2_xattr_block *xb =
			
@@ -4277,7 +4265,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
			
 
				+	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
			
 
				 				     1, 1, &bit_off, &len);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
@@ -4887,8 +4875,7 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 
				 	 * We need to update the first bucket of the old extent and all
			
 
				 	 * the buckets going to the new extent.
			
 
				 	 */
			
 
				-	credits = ((num_buckets + 1) * blks_per_bucket) +
			
 
				-		handle->h_buffer_credits;
			
 
				+	credits = ((num_buckets + 1) * blks_per_bucket);
			
 
				 	ret = ocfs2_extend_trans(handle, credits);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
@@ -4958,7 +4945,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
 
				 				      u32 *first_hash)
			
 
				 {
			
 
				 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
			
 
				-	int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
			
 
				+	int ret, credits = 2 * blk_per_bucket;
			
 
				 
			
 
				 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
			
 
				 
			
@@ -5099,7 +5086,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
			
 
				+	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
			
 
				 				     clusters_to_add, &bit_off, &num_bits);
			
 
				 	if (ret < 0) {
			
 
				 		if (ret != -ENOSPC)
			
@@ -5153,9 +5140,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
 
				 		goto leave;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_journal_dirty(handle, root_bh);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				+	ocfs2_journal_dirty(handle, root_bh);
			
 
				 
			
 
				 leave:
			
 
				 	return ret;
			
@@ -5200,8 +5185,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
 
				 	 * existing bucket.  Then we add the last existing bucket, the
			
 
				 	 * new bucket, and the first bucket (3 * blk_per_bucket).
			
 
				 	 */
			
 
				-	credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
			
 
				-		  handle->h_buffer_credits;
			
 
				+	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
			
 
				 	ret = ocfs2_extend_trans(handle, credits);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
@@ -5477,12 +5461,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
 
				 	}
			
 
				 
			
 
				 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
			
 
				-
			
 
				-	ret = ocfs2_journal_dirty(handle, root_bh);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				+	ocfs2_journal_dirty(handle, root_bh);
			
 
				 
			
 
				 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
			
 
				 	if (ret)
			
@@ -6935,7 +6914,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_clusters(osb, handle, data_ac,
			
 
				+	ret = ocfs2_claim_clusters(handle, data_ac,
			
 
				 				   len, &p_cluster, &num_clusters);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);