15 years ago · bea9a6d239
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 
				 	struct page *new_page;
			
 
				 	unsigned int new_offset;
			
 
				 	struct buffer_head *bh_in = jh2bh(jh_in);
			
 
				-	struct jbd2_buffer_trigger_type *triggers;
			
 
				 	journal_t *journal = transaction->t_journal;
			
 
				 
			
 
				 	/*
			
@@ -328,21 +327,21 @@ repeat:
 
				 		done_copy_out = 1;
			
 
				 		new_page = virt_to_page(jh_in->b_frozen_data);
			
 
				 		new_offset = offset_in_page(jh_in->b_frozen_data);
			
 
				-		triggers = jh_in->b_frozen_triggers;
			
 
				 	} else {
			
 
				 		new_page = jh2bh(jh_in)->b_page;
			
 
				 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
			
 
				-		triggers = jh_in->b_triggers;
			
 
				 	}
			
 
				 
			
 
				 	mapped_data = kmap_atomic(new_page, KM_USER0);
			
 
				 	/*
			
 
				-	 * Fire any commit trigger.  Do this before checking for escaping,
			
 
				-	 * as the trigger may modify the magic offset.  If a copy-out
			
 
				-	 * happens afterwards, it will have the correct data in the buffer.
			
 
				+	 * Fire data frozen trigger if data already wasn't frozen.  Do this
			
 
				+	 * before checking for escaping, as the trigger may modify the magic
			
 
				+	 * offset.  If a copy-out happens afterwards, it will have the correct
			
 
				+	 * data in the buffer.
			
 
				 	 */
			
 
				-	jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
			
 
				-				   triggers);
			
 
				+	if (!done_copy_out)
			
 
				+		jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
			
 
				+					   jh_in->b_triggers);
			
 
				 
			
 
				 	/*
			
 
				 	 * Check for escaping
			
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
 
				 		page = jh2bh(jh)->b_page;
			
 
				 		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
			
 
				 		source = kmap_atomic(page, KM_USER0);
			
 
				+		/* Fire data frozen trigger just before we copy the data */
			
 
				+		jbd2_buffer_frozen_trigger(jh, source + offset,
			
 
				+					   jh->b_triggers);
			
 
				 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
			
 
				 		kunmap_atomic(source, KM_USER0);
			
 
				 
			
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
 
				 	jh->b_triggers = type;
			
 
				 }
			
 
				 
			
 
				-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
			
 
				+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
			
 
				 				struct jbd2_buffer_trigger_type *triggers)
			
 
				 {
			
 
				 	struct buffer_head *bh = jh2bh(jh);
			
 
				 
			
 
				-	if (!triggers || !triggers->t_commit)
			
 
				+	if (!triggers || !triggers->t_frozen)
			
 
				 		return;
			
 
				 
			
 
				-	triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
			
 
				+	triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
			
 
				 }
			
 
				 
			
 
				 void jbd2_buffer_abort_trigger(struct journal_head *jh,
			
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
 
				 			dump_stack();
			
 
				 			goto bail;
			
 
				 		}
			
 
				-
			
 
				-		past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
			
 
				-		mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
			
 
				-		     (unsigned long long)past_eof);
			
 
				-
			
 
				-		if (create && (iblock >= past_eof))
			
 
				-			set_buffer_new(bh_result);
			
 
				 	}
			
 
				 
			
 
				+	past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
			
 
				+	mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
			
 
				+	     (unsigned long long)past_eof);
			
 
				+	if (create && (iblock >= past_eof))
			
 
				+		set_buffer_new(bh_result);
			
 
				+
			
 
				 bail:
			
 
				 	if (err < 0)
			
 
				 		err = -EIO;
			
@@ -459,36 +458,6 @@ int walk_page_buffers(	handle_t *handle,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
			
 
				-							 struct page *page,
			
 
				-							 unsigned from,
			
 
				-							 unsigned to)
			
 
				-{
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				-	handle_t *handle;
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
			
 
				-	if (IS_ERR(handle)) {
			
 
				-		ret = -ENOMEM;
			
 
				-		mlog_errno(ret);
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (ocfs2_should_order_data(inode)) {
			
 
				-		ret = ocfs2_jbd2_file_inode(handle, inode);
			
 
				-		if (ret < 0)
			
 
				-			mlog_errno(ret);
			
 
				-	}
			
 
				-out:
			
 
				-	if (ret) {
			
 
				-		if (!IS_ERR(handle))
			
 
				-			ocfs2_commit_trans(osb, handle);
			
 
				-		handle = ERR_PTR(ret);
			
 
				-	}
			
 
				-	return handle;
			
 
				-}
			
 
				-
			
 
				 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
			
 
				 {
			
 
				 	sector_t status;
			
@@ -1131,23 +1100,37 @@ out:
 
				  */
			
 
				 static int ocfs2_grab_pages_for_write(struct address_space *mapping,
			
 
				 				      struct ocfs2_write_ctxt *wc,
			
 
				-				      u32 cpos, loff_t user_pos, int new,
			
 
				+				      u32 cpos, loff_t user_pos,
			
 
				+				      unsigned user_len, int new,
			
 
				 				      struct page *mmap_page)
			
 
				 {
			
 
				 	int ret = 0, i;
			
 
				-	unsigned long start, target_index, index;
			
 
				+	unsigned long start, target_index, end_index, index;
			
 
				 	struct inode *inode = mapping->host;
			
 
				+	loff_t last_byte;
			
 
				 
			
 
				 	target_index = user_pos >> PAGE_CACHE_SHIFT;
			
 
				 
			
 
				 	/*
			
 
				 	 * Figure out how many pages we'll be manipulating here. For
			
 
				 	 * non allocating write, we just change the one
			
 
				-	 * page. Otherwise, we'll need a whole clusters worth.
			
 
				+	 * page. Otherwise, we'll need a whole clusters worth.  If we're
			
 
				+	 * writing past i_size, we only need enough pages to cover the
			
 
				+	 * last page of the write.
			
 
				 	 */
			
 
				 	if (new) {
			
 
				 		wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
			
 
				 		start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
			
 
				+		/*
			
 
				+		 * We need the index *past* the last page we could possibly
			
 
				+		 * touch.  This is the page past the end of the write or
			
 
				+		 * i_size, whichever is greater.
			
 
				+		 */
			
 
				+		last_byte = max(user_pos + user_len, i_size_read(inode));
			
 
				+		BUG_ON(last_byte < 1);
			
 
				+		end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
			
 
				+		if ((start + wc->w_num_pages) > end_index)
			
 
				+			wc->w_num_pages = end_index - start;
			
 
				 	} else {
			
 
				 		wc->w_num_pages = 1;
			
 
				 		start = target_index;
			
@@ -1620,21 +1603,20 @@ out:
 
				  * write path can treat it as an non-allocating write, which has no
			
 
				  * special case code for sparse/nonsparse files.
			
 
				  */
			
 
				-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
			
 
				-					unsigned len,
			
 
				+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
			
 
				+					struct buffer_head *di_bh,
			
 
				+					loff_t pos, unsigned len,
			
 
				 					struct ocfs2_write_ctxt *wc)
			
 
				 {
			
 
				 	int ret;
			
 
				-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				 	loff_t newsize = pos + len;
			
 
				 
			
 
				-	if (ocfs2_sparse_alloc(osb))
			
 
				-		return 0;
			
 
				+	BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
			
 
				 
			
 
				 	if (newsize <= i_size_read(inode))
			
 
				 		return 0;
			
 
				 
			
 
				-	ret = ocfs2_extend_no_holes(inode, newsize, pos);
			
 
				+	ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
			
 
				 	if (ret)
			
 
				 		mlog_errno(ret);
			
 
				 
			
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
			
 
				+			   loff_t pos)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
			
 
				+	if (pos > i_size_read(inode))
			
 
				+		ret = ocfs2_zero_extend(inode, di_bh, pos);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 int ocfs2_write_begin_nolock(struct address_space *mapping,
			
 
				 			     loff_t pos, unsigned len, unsigned flags,
			
 
				 			     struct page **pagep, void **fsdata,
			
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
			
 
				+	if (ocfs2_sparse_alloc(osb))
			
 
				+		ret = ocfs2_zero_tail(inode, di_bh, pos);
			
 
				+	else
			
 
				+		ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
			
 
				+						   wc);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
 
				 		goto out;
			
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
				 	 * that we can zero and flush if we error after adding the
			
 
				 	 * extent.
			
 
				 	 */
			
 
				-	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
			
 
				+	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
			
 
				 					 cluster_of_pages, mmap_page);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
 
				 	struct dlm_ctxt *dlm = NULL;
			
 
				 	struct dlm_ctxt *new_ctxt = NULL;
			
 
				 
			
 
				-	if (strlen(domain) > O2NM_MAX_NAME_LEN) {
			
 
				+	if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
			
 
				 		ret = -ENAMETOOLONG;
			
 
				 		mlog(ML_ERROR, "domain name length too long\n");
			
 
				 		goto leave;
			
@@ -1709,6 +1709,7 @@ retry:
 
				 		}
			
 
				 
			
 
				 		if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
			
 
				+			spin_unlock(&dlm_domain_lock);
			
 
				 			mlog(ML_ERROR,
			
 
				 			     "Requested locking protocol version is not "
			
 
				 			     "compatible with already registered domain "
			
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
 
				 		mlog(0, "trying again...\n");
			
 
				 		goto again;
			
 
				 	}
			
 
				-	/* now that we are sure the MIGRATING state is there, drop
			
 
				-	 * the unneded state which blocked threads trying to DIRTY */
			
 
				-	spin_lock(&res->spinlock);
			
 
				-	BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
			
 
				-	BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
			
 
				-	res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
			
 
				-	spin_unlock(&res->spinlock);
			
 
				 
			
 
				+	ret = 0;
			
 
				 	/* did the target go down or die? */
			
 
				 	spin_lock(&dlm->spinlock);
			
 
				 	if (!test_bit(target, dlm->domain_map)) {
			
@@ -2825,10 +2819,22 @@ again:
 
				 	}
			
 
				 	spin_unlock(&dlm->spinlock);
			
 
				 
			
 
				+	/*
			
 
				+	 * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
			
 
				+	 * another try; otherwise, we are sure the MIGRATING state is there,
			
 
				+	 * drop the unneded state which blocked threads trying to DIRTY
			
 
				+	 */
			
 
				+	spin_lock(&res->spinlock);
			
 
				+	BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
			
 
				+	res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
			
 
				+	if (!ret)
			
 
				+		BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
			
 
				+	spin_unlock(&res->spinlock);
			
 
				+
			
 
				 	/*
			
 
				 	 * at this point:
			
 
				 	 *
			
 
				-	 *   o the DLM_LOCK_RES_MIGRATING flag is set
			
 
				+	 *   o the DLM_LOCK_RES_MIGRATING flag is set if target not down
			
 
				 	 *   o there are no pending asts on this lockres
			
 
				 	 *   o all processes trying to reserve an ast on this
			
 
				 	 *     lockres must wait for the MIGRATING flag to clear
			
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 
				 	if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
			
 
				 		int bit;
			
 
				 
			
 
				-		bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
			
 
				+		bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
			
 
				 		if (bit >= O2NM_MAX_NODES || bit < 0)
			
 
				 			dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
			
 
				 		else
			
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * While a write will already be ordering the data, a truncate will not.
			
 
				+ * Thus, we need to explicitly order the zeroed pages.
			
 
				+ */
			
 
				+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
			
 
				+{
			
 
				+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
			
 
				+	handle_t *handle = NULL;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (!ocfs2_should_order_data(inode))
			
 
				+		goto out;
			
 
				+
			
 
				+	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
			
 
				+	if (IS_ERR(handle)) {
			
 
				+		ret = -ENOMEM;
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	ret = ocfs2_jbd2_file_inode(handle, inode);
			
 
				+	if (ret < 0)
			
 
				+		mlog_errno(ret);
			
 
				+
			
 
				+out:
			
 
				+	if (ret) {
			
 
				+		if (!IS_ERR(handle))
			
 
				+			ocfs2_commit_trans(osb, handle);
			
 
				+		handle = ERR_PTR(ret);
			
 
				+	}
			
 
				+	return handle;
			
 
				+}
			
 
				+
			
 
				 /* Some parts of this taken from generic_cont_expand, which turned out
			
 
				  * to be too fragile to do exactly what we need without us having to
			
 
				  * worry about recursive locking in ->write_begin() and ->write_end(). */
			
 
				-static int ocfs2_write_zero_page(struct inode *inode,
			
 
				-				 u64 size)
			
 
				+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
			
 
				+				 u64 abs_to)
			
 
				 {
			
 
				 	struct address_space *mapping = inode->i_mapping;
			
 
				 	struct page *page;
			
 
				-	unsigned long index;
			
 
				-	unsigned int offset;
			
 
				+	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
			
 
				 	handle_t *handle = NULL;
			
 
				-	int ret;
			
 
				+	int ret = 0;
			
 
				+	unsigned zero_from, zero_to, block_start, block_end;
			
 
				 
			
 
				-	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
			
 
				-	/* ugh.  in prepare/commit_write, if from==to==start of block, we
			
 
				-	** skip the prepare.  make sure we never send an offset for the start
			
 
				-	** of a block
			
 
				-	*/
			
 
				-	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
			
 
				-		offset++;
			
 
				-	}
			
 
				-	index = size >> PAGE_CACHE_SHIFT;
			
 
				+	BUG_ON(abs_from >= abs_to);
			
 
				+	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
			
 
				+	BUG_ON(abs_from & (inode->i_blkbits - 1));
			
 
				 
			
 
				 	page = grab_cache_page(mapping, index);
			
 
				 	if (!page) {
			
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
			
 
				-	if (ret < 0) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_unlock;
			
 
				-	}
			
 
				+	/* Get the offsets within the page that we want to zero */
			
 
				+	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
			
 
				+	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
			
 
				+	if (!zero_to)
			
 
				+		zero_to = PAGE_CACHE_SIZE;
			
 
				 
			
 
				-	if (ocfs2_should_order_data(inode)) {
			
 
				-		handle = ocfs2_start_walk_page_trans(inode, page, offset,
			
 
				-						     offset);
			
 
				-		if (IS_ERR(handle)) {
			
 
				-			ret = PTR_ERR(handle);
			
 
				-			handle = NULL;
			
 
				+	mlog(0,
			
 
				+	     "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
			
 
				+	     (unsigned long long)abs_from, (unsigned long long)abs_to,
			
 
				+	     index, zero_from, zero_to);
			
 
				+
			
 
				+	/* We know that zero_from is block aligned */
			
 
				+	for (block_start = zero_from; block_start < zero_to;
			
 
				+	     block_start = block_end) {
			
 
				+		block_end = block_start + (1 << inode->i_blkbits);
			
 
				+
			
 
				+		/*
			
 
				+		 * block_start is block-aligned.  Bump it by one to
			
 
				+		 * force ocfs2_{prepare,commit}_write() to zero the
			
 
				+		 * whole block.
			
 
				+		 */
			
 
				+		ret = ocfs2_prepare_write_nolock(inode, page,
			
 
				+						 block_start + 1,
			
 
				+						 block_start + 1);
			
 
				+		if (ret < 0) {
			
 
				+			mlog_errno(ret);
			
 
				 			goto out_unlock;
			
 
				 		}
			
 
				-	}
			
 
				 
			
 
				-	/* must not update i_size! */
			
 
				-	ret = block_commit_write(page, offset, offset);
			
 
				-	if (ret < 0)
			
 
				-		mlog_errno(ret);
			
 
				-	else
			
 
				-		ret = 0;
			
 
				+		if (!handle) {
			
 
				+			handle = ocfs2_zero_start_ordered_transaction(inode);
			
 
				+			if (IS_ERR(handle)) {
			
 
				+				ret = PTR_ERR(handle);
			
 
				+				handle = NULL;
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		/* must not update i_size! */
			
 
				+		ret = block_commit_write(page, block_start + 1,
			
 
				+					 block_start + 1);
			
 
				+		if (ret < 0)
			
 
				+			mlog_errno(ret);
			
 
				+		else
			
 
				+			ret = 0;
			
 
				+	}
			
 
				 
			
 
				 	if (handle)
			
 
				 		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
			
 
				+
			
 
				 out_unlock:
			
 
				 	unlock_page(page);
			
 
				 	page_cache_release(page);
			
@@ -786,22 +838,114 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int ocfs2_zero_extend(struct inode *inode,
			
 
				-			     u64 zero_to_size)
			
 
				+/*
			
 
				+ * Find the next range to zero.  We do this in terms of bytes because
			
 
				+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
			
 
				+ * pagecache.  We may return multiple extents.
			
 
				+ *
			
 
				+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
			
 
				+ * needs to be zeroed.  range_start and range_end return the next zeroing
			
 
				+ * range.  A subsequent call should pass the previous range_end as its
			
 
				+ * zero_start.  If range_end is 0, there's nothing to do.
			
 
				+ *
			
 
				+ * Unwritten extents are skipped over.  Refcounted extents are CoWd.
			
 
				+ */
			
 
				+static int ocfs2_zero_extend_get_range(struct inode *inode,
			
 
				+				       struct buffer_head *di_bh,
			
 
				+				       u64 zero_start, u64 zero_end,
			
 
				+				       u64 *range_start, u64 *range_end)
			
 
				 {
			
 
				-	int ret = 0;
			
 
				-	u64 start_off;
			
 
				-	struct super_block *sb = inode->i_sb;
			
 
				+	int rc = 0, needs_cow = 0;
			
 
				+	u32 p_cpos, zero_clusters = 0;
			
 
				+	u32 zero_cpos =
			
 
				+		zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
			
 
				+	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
			
 
				+	unsigned int num_clusters = 0;
			
 
				+	unsigned int ext_flags = 0;
			
 
				 
			
 
				-	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
			
 
				-	while (start_off < zero_to_size) {
			
 
				-		ret = ocfs2_write_zero_page(inode, start_off);
			
 
				-		if (ret < 0) {
			
 
				-			mlog_errno(ret);
			
 
				+	while (zero_cpos < last_cpos) {
			
 
				+		rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
			
 
				+					&num_clusters, &ext_flags);
			
 
				+		if (rc) {
			
 
				+			mlog_errno(rc);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
			
 
				+			zero_clusters = num_clusters;
			
 
				+			if (ext_flags & OCFS2_EXT_REFCOUNTED)
			
 
				+				needs_cow = 1;
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		zero_cpos += num_clusters;
			
 
				+	}
			
 
				+	if (!zero_clusters) {
			
 
				+		*range_end = 0;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	while ((zero_cpos + zero_clusters) < last_cpos) {
			
 
				+		rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
			
 
				+					&p_cpos, &num_clusters,
			
 
				+					&ext_flags);
			
 
				+		if (rc) {
			
 
				+			mlog_errno(rc);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
 
				-		start_off += sb->s_blocksize;
			
 
				+		if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
			
 
				+			break;
			
 
				+		if (ext_flags & OCFS2_EXT_REFCOUNTED)
			
 
				+			needs_cow = 1;
			
 
				+		zero_clusters += num_clusters;
			
 
				+	}
			
 
				+	if ((zero_cpos + zero_clusters) > last_cpos)
			
 
				+		zero_clusters = last_cpos - zero_cpos;
			
 
				+
			
 
				+	if (needs_cow) {
			
 
				+		rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
			
 
				+					UINT_MAX);
			
 
				+		if (rc) {
			
 
				+			mlog_errno(rc);
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
			
 
				+	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
			
 
				+					     zero_cpos + zero_clusters);
			
 
				+
			
 
				+out:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
			
 
				+ * has made sure that the entire range needs zeroing.
			
 
				+ */
			
 
				+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
			
 
				+				   u64 range_end)
			
 
				+{
			
 
				+	int rc = 0;
			
 
				+	u64 next_pos;
			
 
				+	u64 zero_pos = range_start;
			
 
				+
			
 
				+	mlog(0, "range_start = %llu, range_end = %llu\n",
			
 
				+	     (unsigned long long)range_start,
			
 
				+	     (unsigned long long)range_end);
			
 
				+	BUG_ON(range_start >= range_end);
			
 
				+
			
 
				+	while (zero_pos < range_end) {
			
 
				+		next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
			
 
				+		if (next_pos > range_end)
			
 
				+			next_pos = range_end;
			
 
				+		rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
			
 
				+		if (rc < 0) {
			
 
				+			mlog_errno(rc);
			
 
				+			break;
			
 
				+		}
			
 
				+		zero_pos = next_pos;
			
 
				 
			
 
				 		/*
			
 
				 		 * Very large extends have the potential to lock up
			
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
 
				 		cond_resched();
			
 
				 	}
			
 
				 
			
 
				-out:
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
			
 
				+		      loff_t zero_to_size)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+	u64 zero_start, range_start = 0, range_end = 0;
			
 
				+	struct super_block *sb = inode->i_sb;
			
 
				+
			
 
				+	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
			
 
				+	mlog(0, "zero_start %llu for i_size %llu\n",
			
 
				+	     (unsigned long long)zero_start,
			
 
				+	     (unsigned long long)i_size_read(inode));
			
 
				+	while (zero_start < zero_to_size) {
			
 
				+		ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
			
 
				+						  zero_to_size,
			
 
				+						  &range_start,
			
 
				+						  &range_end);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			break;
			
 
				+		}
			
 
				+		if (!range_end)
			
 
				+			break;
			
 
				+		/* Trim the ends */
			
 
				+		if (range_start < zero_start)
			
 
				+			range_start = zero_start;
			
 
				+		if (range_end > zero_to_size)
			
 
				+			range_end = zero_to_size;
			
 
				+
			
 
				+		ret = ocfs2_zero_extend_range(inode, range_start,
			
 
				+					      range_end);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			break;
			
 
				+		}
			
 
				+		zero_start = range_end;
			
 
				+	}
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
			
 
				+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
			
 
				+			  u64 new_i_size, u64 zero_to)
			
 
				 {
			
 
				 	int ret;
			
 
				 	u32 clusters_to_add;
			
 
				 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
			
 
				 
			
 
				+	/*
			
 
				+	 * Only quota files call this without a bh, and they can't be
			
 
				+	 * refcounted.
			
 
				+	 */
			
 
				+	BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
			
 
				+	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
			
 
				+
			
 
				 	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
			
 
				 	if (clusters_to_add < oi->ip_clusters)
			
 
				 		clusters_to_add = 0;
			
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
 
				 	 * still need to zero the area between the old i_size and the
			
 
				 	 * new i_size.
			
 
				 	 */
			
 
				-	ret = ocfs2_zero_extend(inode, zero_to);
			
 
				+	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
			
 
				 	if (ret < 0)
			
 
				 		mlog_errno(ret);
			
 
				 
			
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 		goto out;
			
 
				 
			
 
				 	if (i_size_read(inode) == new_i_size)
			
 
				-  		goto out;
			
 
				+		goto out;
			
 
				 	BUG_ON(new_i_size < i_size_read(inode));
			
 
				 
			
 
				-	/*
			
 
				-	 * Fall through for converting inline data, even if the fs
			
 
				-	 * supports sparse files.
			
 
				-	 *
			
 
				-	 * The check for inline data here is legal - nobody can add
			
 
				-	 * the feature since we have i_mutex. We must check it again
			
 
				-	 * after acquiring ip_alloc_sem though, as paths like mmap
			
 
				-	 * might have raced us to converting the inode to extents.
			
 
				-	 */
			
 
				-	if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
			
 
				-	    && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
			
 
				-		goto out_update_size;
			
 
				-
			
 
				 	/*
			
 
				 	 * The alloc sem blocks people in read/write from reading our
			
 
				 	 * allocation until we're done changing it. We depend on
			
 
				 	 * i_mutex to block other extend/truncate calls while we're
			
 
				-	 * here.
			
 
				+	 * here.  We even have to hold it for sparse files because there
			
 
				+	 * might be some tail zeroing.
			
 
				 	 */
			
 
				 	down_write(&oi->ip_alloc_sem);
			
 
				 
			
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
 
				 		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
			
 
				 		if (ret) {
			
 
				 			up_write(&oi->ip_alloc_sem);
			
 
				-
			
 
				 			mlog_errno(ret);
			
 
				 			goto out;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
			
 
				-		ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
			
 
				+	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
			
 
				+		ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
			
 
				+	else
			
 
				+		ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
			
 
				+					    new_i_size);
			
 
				 
			
 
				 	up_write(&oi->ip_alloc_sem);
			
 
				 
			
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 
				 int ocfs2_simple_size_update(struct inode *inode,
			
 
				 			     struct buffer_head *di_bh,
			
 
				 			     u64 new_i_size);
			
 
				-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
			
 
				-			  u64 zero_to);
			
 
				+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
			
 
				+			  u64 new_i_size, u64 zero_to);
			
 
				+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
			
 
				+		      loff_t zero_to);
			
 
				 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
			
 
				 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
			
 
				 		  struct kstat *stat);
			
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
 
				 	return container_of(triggers, struct ocfs2_triggers, ot_triggers);
			
 
				 }
			
 
				 
			
 
				-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				 				 struct buffer_head *bh,
			
 
				 				 void *data, size_t size)
			
 
				 {
			
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
 
				  * Quota blocks have their own trigger because the struct ocfs2_block_check
			
 
				  * offset depends on the blocksize.
			
 
				  */
			
 
				-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				 				 struct buffer_head *bh,
			
 
				 				 void *data, size_t size)
			
 
				 {
			
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
 
				  * Directory blocks also have their own trigger because the
			
 
				  * struct ocfs2_block_check offset depends on the blocksize.
			
 
				  */
			
 
				-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
			
 
				 				 struct buffer_head *bh,
			
 
				 				 void *data, size_t size)
			
 
				 {
			
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 
				 
			
 
				 static struct ocfs2_triggers di_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_dinode, i_check),
			
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers eb_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_extent_block, h_check),
			
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers rb_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_refcount_block, rf_check),
			
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers gd_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_group_desc, bg_check),
			
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers db_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_db_commit_trigger,
			
 
				+		.t_frozen = ocfs2_db_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 };
			
 
				 
			
 
				 static struct ocfs2_triggers xb_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_xattr_block, xb_check),
			
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers dq_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_dq_commit_trigger,
			
 
				+		.t_frozen = ocfs2_dq_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 };
			
 
				 
			
 
				 static struct ocfs2_triggers dr_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_dx_root_block, dr_check),
			
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
 
				 
			
 
				 static struct ocfs2_triggers dl_triggers = {
			
 
				 	.ot_triggers = {
			
 
				-		.t_commit = ocfs2_commit_trigger,
			
 
				+		.t_frozen = ocfs2_frozen_trigger,
			
 
				 		.t_abort = ocfs2_abort_trigger,
			
 
				 	},
			
 
				 	.ot_offset	= offsetof(struct ocfs2_dx_leaf, dl_check),
			
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
 
				 	mutex_lock(&os->os_lock);
			
 
				 	ocfs2_queue_orphan_scan(osb);
			
 
				 	if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
			
 
				-		schedule_delayed_work(&os->os_orphan_scan_work,
			
 
				+		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
			
 
				 				      ocfs2_orphan_scan_timeout());
			
 
				 	mutex_unlock(&os->os_lock);
			
 
				 }
			
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
 
				 		atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
			
 
				 	else {
			
 
				 		atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
			
 
				-		schedule_delayed_work(&os->os_orphan_scan_work,
			
 
				-				      ocfs2_orphan_scan_timeout());
			
 
				+		queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
			
 
				+				   ocfs2_orphan_scan_timeout());
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
 
				 {
			
 
				 	unsigned int la_mb;
			
 
				 	unsigned int gd_mb;
			
 
				+	unsigned int la_max_mb;
			
 
				 	unsigned int megs_per_slot;
			
 
				 	struct super_block *sb = osb->sb;
			
 
				 
			
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
 
				 	if (megs_per_slot < la_mb)
			
 
				 		la_mb = megs_per_slot;
			
 
				 
			
 
				+	/* We can't store more bits than we can in a block. */
			
 
				+	la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
			
 
				+						ocfs2_local_alloc_size(sb) * 8);
			
 
				+	if (la_mb > la_max_mb)
			
 
				+		la_mb = la_max_mb;
			
 
				+
			
 
				 	return la_mb;
			
 
				 }
			
 
				 
			
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
 
				 		 * locking allocators ranks above a transaction start
			
 
				 		 */
			
 
				 		WARN_ON(journal_current_handle());
			
 
				-		status = ocfs2_extend_no_holes(gqinode,
			
 
				+		status = ocfs2_extend_no_holes(gqinode, NULL,
			
 
				 			gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
			
 
				 			gqinode->i_size);
			
 
				 		if (status < 0)
			
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 
				 	u64 p_blkno;
			
 
				 
			
 
				 	/* We are protected by dqio_sem so no locking needed */
			
 
				-	status = ocfs2_extend_no_holes(lqinode,
			
 
				+	status = ocfs2_extend_no_holes(lqinode, NULL,
			
 
				 				       lqinode->i_size + 2 * sb->s_blocksize,
			
 
				 				       lqinode->i_size);
			
 
				 	if (status < 0) {
			
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 
				 		return ocfs2_local_quota_add_chunk(sb, type, offset);
			
 
				 
			
 
				 	/* We are protected by dqio_sem so no locking needed */
			
 
				-	status = ocfs2_extend_no_holes(lqinode,
			
 
				+	status = ocfs2_extend_no_holes(lqinode, NULL,
			
 
				 				       lqinode->i_size + sb->s_blocksize,
			
 
				 				       lqinode->i_size);
			
 
				 	if (status < 0) {
			
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 
				 
			
 
				 	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
			
 
				 	end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
			
 
				+	/*
			
 
				+	 * We only duplicate pages until we reach the page contains i_size - 1.
			
 
				+	 * So trim 'end' to i_size.
			
 
				+	 */
			
 
				+	if (end > i_size_read(context->inode))
			
 
				+		end = i_size_read(context->inode);
			
 
				 
			
 
				 	while (offset < end) {
			
 
				 		page_index = offset >> PAGE_CACHE_SHIFT;
			
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
 
				 	struct inode *inode = old_dentry->d_inode;
			
 
				 	struct buffer_head *new_bh = NULL;
			
 
				 
			
 
				+	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
			
 
				+		ret = -EINVAL;
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				 	ret = filemap_fdatawrite(inode->i_mapping);
			
 
				 	if (ret) {
			
 
				 		mlog_errno(ret);
			
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
				 		     le16_to_cpu(bg->bg_free_bits_count));
			
 
				 	le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
			
 
				 		     le16_to_cpu(bg->bg_bits));
			
 
				-	cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg->bg_blkno);
			
 
				+	cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
			
 
				 	if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
			
 
				 		le16_add_cpu(&cl->cl_next_free_rec, 1);
			
 
				 
			
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
				 					 struct ocfs2_xattr_value_buf *vb,
			
 
				 					 struct ocfs2_xattr_set_ctxt *ctxt)
			
 
				 {
			
 
				-	int status = 0;
			
 
				+	int status = 0, credits;
			
 
				 	handle_t *handle = ctxt->handle;
			
 
				 	enum ocfs2_alloc_restarted why;
			
 
				 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
			
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
				 
			
 
				 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
			
 
				 
			
 
				-	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
			
 
				-			      OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+	while (clusters_to_add) {
			
 
				+		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
			
 
				+				       OCFS2_JOURNAL_ACCESS_WRITE);
			
 
				+		if (status < 0) {
			
 
				+			mlog_errno(status);
			
 
				+			break;
			
 
				+		}
			
 
				 
			
 
				-	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
			
 
				-	status = ocfs2_add_clusters_in_btree(handle,
			
 
				-					     &et,
			
 
				-					     &logical_start,
			
 
				-					     clusters_to_add,
			
 
				-					     0,
			
 
				-					     ctxt->data_ac,
			
 
				-					     ctxt->meta_ac,
			
 
				-					     &why);
			
 
				-	if (status < 0) {
			
 
				-		mlog_errno(status);
			
 
				-		goto leave;
			
 
				-	}
			
 
				+		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
			
 
				+		status = ocfs2_add_clusters_in_btree(handle,
			
 
				+						     &et,
			
 
				+						     &logical_start,
			
 
				+						     clusters_to_add,
			
 
				+						     0,
			
 
				+						     ctxt->data_ac,
			
 
				+						     ctxt->meta_ac,
			
 
				+						     &why);
			
 
				+		if ((status < 0) && (status != -EAGAIN)) {
			
 
				+			if (status != -ENOSPC)
			
 
				+				mlog_errno(status);
			
 
				+			break;
			
 
				+		}
			
 
				 
			
 
				-	ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				+		ocfs2_journal_dirty(handle, vb->vb_bh);
			
 
				 
			
 
				-	clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
			
 
				+		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
			
 
				+					 prev_clusters;
			
 
				 
			
 
				-	/*
			
 
				-	 * We should have already allocated enough space before the transaction,
			
 
				-	 * so no need to restart.
			
 
				-	 */
			
 
				-	BUG_ON(why != RESTART_NONE || clusters_to_add);
			
 
				-
			
 
				-leave:
			
 
				+		if (why != RESTART_NONE && clusters_to_add) {
			
 
				+			/*
			
 
				+			 * We can only fail in case the alloc file doesn't give
			
 
				+			 * up enough clusters.
			
 
				+			 */
			
 
				+			BUG_ON(why == RESTART_META);
			
 
				+
			
 
				+			mlog(0, "restarting xattr value extension for %u"
			
 
				+			     " clusters,.\n", clusters_to_add);
			
 
				+			credits = ocfs2_calc_extend_credits(inode->i_sb,
			
 
				+							    &vb->vb_xv->xr_list,
			
 
				+							    clusters_to_add);
			
 
				+			status = ocfs2_extend_trans(handle, credits);
			
 
				+			if (status < 0) {
			
 
				+				status = -ENOMEM;
			
 
				+				mlog_errno(status);
			
 
				+				break;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				 	return status;
			
 
				 }
			
@@ -6788,16 +6804,15 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
			
 
				+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
			
 
				 				u64 blkno, u64 new_blkno, u32 clusters,
			
 
				+				u32 *cpos, int num_buckets,
			
 
				 				struct ocfs2_alloc_context *meta_ac,
			
 
				 				struct ocfs2_alloc_context *data_ac,
			
 
				 				struct ocfs2_reflink_xattr_tree_args *args)
			
 
				 {
			
 
				 	int i, j, ret = 0;
			
 
				 	struct super_block *sb = args->reflink->old_inode->i_sb;
			
 
				-	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
			
 
				-	u32 num_buckets = clusters * bpc;
			
 
				 	int bpb = args->old_bucket->bu_blocks;
			
 
				 	struct ocfs2_xattr_value_buf vb = {
			
 
				 		.vb_access = ocfs2_journal_access,
			
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * The real bucket num in this series of blocks is stored
			
 
				-		 * in the 1st bucket.
			
 
				-		 */
			
 
				-		if (i == 0)
			
 
				-			num_buckets = le16_to_cpu(
			
 
				-				bucket_xh(args->old_bucket)->xh_num_buckets);
			
 
				-
			
 
				 		ret = ocfs2_xattr_bucket_journal_access(handle,
			
 
				 						args->new_bucket,
			
 
				 						OCFS2_JOURNAL_ACCESS_CREATE);
			
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
 
				 			       bucket_block(args->old_bucket, j),
			
 
				 			       sb->s_blocksize);
			
 
				 
			
 
				+		/*
			
 
				+		 * Record the start cpos so that we can use it to initialize
			
 
				+		 * our xattr tree we also set the xh_num_bucket for the new
			
 
				+		 * bucket.
			
 
				+		 */
			
 
				+		if (i == 0) {
			
 
				+			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
			
 
				+					    xh_entries[0].xe_name_hash);
			
 
				+			bucket_xh(args->new_bucket)->xh_num_buckets =
			
 
				+				cpu_to_le16(num_buckets);
			
 
				+		}
			
 
				+
			
 
				 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
			
 
				 
			
 
				 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
			
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
 
				 		}
			
 
				 
			
 
				 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
			
 
				+
			
 
				 		ocfs2_xattr_bucket_relse(args->old_bucket);
			
 
				 		ocfs2_xattr_bucket_relse(args->new_bucket);
			
 
				 	}
			
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
 
				 	ocfs2_xattr_bucket_relse(args->new_bucket);
			
 
				 	return ret;
			
 
				 }
			
 
				+
			
 
				+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
			
 
				+				struct inode *inode,
			
 
				+				struct ocfs2_reflink_xattr_tree_args *args,
			
 
				+				struct ocfs2_extent_tree *et,
			
 
				+				struct ocfs2_alloc_context *meta_ac,
			
 
				+				struct ocfs2_alloc_context *data_ac,
			
 
				+				u64 blkno, u32 cpos, u32 len)
			
 
				+{
			
 
				+	int ret, first_inserted = 0;
			
 
				+	u32 p_cluster, num_clusters, reflink_cpos = 0;
			
 
				+	u64 new_blkno;
			
 
				+	unsigned int num_buckets, reflink_buckets;
			
 
				+	unsigned int bpc =
			
 
				+		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
			
 
				+
			
 
				+	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
			
 
				+	if (ret) {
			
 
				+		mlog_errno(ret);
			
 
				+		goto out;
			
 
				+	}
			
 
				+	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
			
 
				+	ocfs2_xattr_bucket_relse(args->old_bucket);
			
 
				+
			
 
				+	while (len && num_buckets) {
			
 
				+		ret = ocfs2_claim_clusters(handle, data_ac,
			
 
				+					   1, &p_cluster, &num_clusters);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
			
 
				+		reflink_buckets = min(num_buckets, bpc * num_clusters);
			
 
				+
			
 
				+		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
			
 
				+						 new_blkno, num_clusters,
			
 
				+						 &reflink_cpos, reflink_buckets,
			
 
				+						 meta_ac, data_ac, args);
			
 
				+		if (ret) {
			
 
				+			mlog_errno(ret);
			
 
				+			goto out;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * For the 1st allocated cluster, we make it use the same cpos
			
 
				+		 * so that the xattr tree looks the same as the original one
			
 
				+		 * in the most case.
			
 
				+		 */
			
 
				+		if (!first_inserted) {
			
 
				+			reflink_cpos = cpos;
			
 
				+			first_inserted = 1;
			
 
				+		}
			
 
				+		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
			
 
				+					  num_clusters, 0, meta_ac);
			
 
				+		if (ret)
			
 
				+			mlog_errno(ret);
			
 
				+
			
 
				+		mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
			
 
				+		     (unsigned long long)new_blkno, num_clusters, reflink_cpos);
			
 
				+
			
 
				+		len -= num_clusters;
			
 
				+		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
			
 
				+		num_buckets -= reflink_buckets;
			
 
				+	}
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Create the same xattr extent record in the new inode's xattr tree.
			
 
				  */
			
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
 
				 				   void *para)
			
 
				 {
			
 
				 	int ret, credits = 0;
			
 
				-	u32 p_cluster, num_clusters;
			
 
				-	u64 new_blkno;
			
 
				 	handle_t *handle;
			
 
				 	struct ocfs2_reflink_xattr_tree_args *args =
			
 
				 			(struct ocfs2_reflink_xattr_tree_args *)para;
			
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
 
				 	struct ocfs2_alloc_context *data_ac = NULL;
			
 
				 	struct ocfs2_extent_tree et;
			
 
				 
			
 
				+	mlog(0, "reflink xattr buckets %llu len %u\n",
			
 
				+	     (unsigned long long)blkno, len);
			
 
				+
			
 
				 	ocfs2_init_xattr_tree_extent_tree(&et,
			
 
				 					  INODE_CACHE(args->reflink->new_inode),
			
 
				 					  args->new_blk_bh);
			
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	ret = ocfs2_claim_clusters(handle, data_ac,
			
 
				-				   len, &p_cluster, &num_clusters);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				-
			
 
				-	new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
			
 
				-
			
 
				-	mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
			
 
				-	     (unsigned long long)blkno, (unsigned long long)new_blkno, len);
			
 
				-	ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
			
 
				-					  meta_ac, data_ac, args);
			
 
				-	if (ret) {
			
 
				-		mlog_errno(ret);
			
 
				-		goto out_commit;
			
 
				-	}
			
 
				-
			
 
				-	mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
			
 
				-	     (unsigned long long)new_blkno, len, cpos);
			
 
				-	ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
			
 
				-				  len, 0, meta_ac);
			
 
				+	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
			
 
				+					  meta_ac, data_ac,
			
 
				+					  blkno, cpos, len);
			
 
				 	if (ret)
			
 
				 		mlog_errno(ret);
			
 
				 
			
 
				-out_commit:
			
 
				 	ocfs2_commit_trans(osb, handle);
			
 
				 
			
 
				 out:
			
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
				 
			
 
				 struct jbd2_buffer_trigger_type {
			
 
				 	/*
			
 
				-	 * Fired just before a buffer is written to the journal.
			
 
				-	 * mapped_data is a mapped buffer that is the frozen data for
			
 
				-	 * commit.
			
 
				+	 * Fired a the moment data to write to the journal are known to be
			
 
				+	 * stable - so either at the moment b_frozen_data is created or just
			
 
				+	 * before a buffer is written to the journal.  mapped_data is a mapped
			
 
				+	 * buffer that is the frozen data for commit.
			
 
				 	 */
			
 
				-	void (*t_commit)(struct jbd2_buffer_trigger_type *type,
			
 
				+	void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
			
 
				 			 struct buffer_head *bh, void *mapped_data,
			
 
				 			 size_t size);
			
 
				 
			
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
 
				 			struct buffer_head *bh);
			
 
				 };
			
 
				 
			
 
				-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
			
 
				+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
			
 
				 				       void *mapped_data,
			
 
				 				       struct jbd2_buffer_trigger_type *triggers);
			
 
				 extern void jbd2_buffer_abort_trigger(struct journal_head *jh,