浏览代码

ocfs2: sparse b-tree support

Introduce tree rotations into the b-tree code. This will allow ocfs2 to
support sparse files. Much of the added code is designed to be generic (in
the ocfs2 sense) so that it can later be re-used to implement large
extended attributes.

This patch only adds the rotation code and does minimal updates to callers
of the extent api.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Mark Fasheh 18 年之前
父节点
当前提交
dcd0538ff4
共有 8 个文件被更改,包括 1814 次插入311 次删除
  1. 1775 297
      fs/ocfs2/alloc.c
  2. 2 1
      fs/ocfs2/alloc.h
  3. 4 2
      fs/ocfs2/dir.c
  4. 15 8
      fs/ocfs2/file.c
  5. 1 0
      fs/ocfs2/file.h
  6. 4 1
      fs/ocfs2/namei.c
  7. 7 0
      fs/ocfs2/ocfs2.h
  8. 6 2
      fs/ocfs2/ocfs2_fs.h

文件差异内容过多而无法显示
+ 1775 - 297
fs/ocfs2/alloc.c


+ 2 - 1
fs/ocfs2/alloc.h

@@ -31,7 +31,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 			handle_t *handle,
 			handle_t *handle,
 			struct inode *inode,
 			struct inode *inode,
 			struct buffer_head *fe_bh,
 			struct buffer_head *fe_bh,
-			u64 blkno,
+			u32 cpos,
+			u64 start_blk,
 			u32 new_clusters,
 			u32 new_clusters,
 			struct ocfs2_alloc_context *meta_ac);
 			struct ocfs2_alloc_context *meta_ac);
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
 int ocfs2_num_free_extents(struct ocfs2_super *osb,

+ 4 - 2
fs/ocfs2/dir.c

@@ -365,8 +365,10 @@ int ocfs2_do_extend_dir(struct super_block *sb,
 	spin_unlock(&OCFS2_I(dir)->ip_lock);
 	spin_unlock(&OCFS2_I(dir)->ip_lock);
 
 
 	if (extend) {
 	if (extend) {
-		status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, 1,
-						    parent_fe_bh, handle,
+		u32 offset = OCFS2_I(dir)->ip_clusters;
+
+		status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
+						    1, parent_fe_bh, handle,
 						    data_ac, meta_ac, NULL);
 						    data_ac, meta_ac, NULL);
 		BUG_ON(status == -EAGAIN);
 		BUG_ON(status == -EAGAIN);
 		if (status < 0) {
 		if (status < 0) {

+ 15 - 8
fs/ocfs2/file.c

@@ -397,6 +397,7 @@ bail:
  */
  */
 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 			       struct inode *inode,
 			       struct inode *inode,
+			       u32 *logical_offset,
 			       u32 clusters_to_add,
 			       u32 clusters_to_add,
 			       struct buffer_head *fe_bh,
 			       struct buffer_head *fe_bh,
 			       handle_t *handle,
 			       handle_t *handle,
@@ -460,18 +461,14 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
 	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
 	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
-	status = ocfs2_insert_extent(osb, handle, inode, fe_bh, block,
-				     num_bits, meta_ac);
+	status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
+				     *logical_offset, block, num_bits,
+				     meta_ac);
 	if (status < 0) {
 	if (status < 0) {
 		mlog_errno(status);
 		mlog_errno(status);
 		goto leave;
 		goto leave;
 	}
 	}
 
 
-	le32_add_cpu(&fe->i_clusters, num_bits);
-	spin_lock(&OCFS2_I(inode)->ip_lock);
-	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
-	spin_unlock(&OCFS2_I(inode)->ip_lock);
-
 	status = ocfs2_journal_dirty(handle, fe_bh);
 	status = ocfs2_journal_dirty(handle, fe_bh);
 	if (status < 0) {
 	if (status < 0) {
 		mlog_errno(status);
 		mlog_errno(status);
@@ -479,6 +476,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 	}
 	}
 
 
 	clusters_to_add -= num_bits;
 	clusters_to_add -= num_bits;
+	*logical_offset += num_bits;
 
 
 	if (clusters_to_add) {
 	if (clusters_to_add) {
 		mlog(0, "need to alloc once more, clusters = %u, wanted = "
 		mlog(0, "need to alloc once more, clusters = %u, wanted = "
@@ -501,7 +499,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
 	int restart_func = 0;
 	int restart_func = 0;
 	int drop_alloc_sem = 0;
 	int drop_alloc_sem = 0;
 	int credits, num_free_extents;
 	int credits, num_free_extents;
-	u32 prev_clusters;
+	u32 prev_clusters, logical_start;
 	struct buffer_head *bh = NULL;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_dinode *fe = NULL;
 	struct ocfs2_dinode *fe = NULL;
 	handle_t *handle = NULL;
 	handle_t *handle = NULL;
@@ -512,6 +510,12 @@ static int ocfs2_extend_allocation(struct inode *inode,
 
 
 	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
 	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
 
 
+	/*
+	 * This function only exists for file systems which don't
+	 * support holes.
+	 */
+	BUG_ON(ocfs2_sparse_alloc(osb));
+
 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
 				  OCFS2_BH_CACHED, inode);
 				  OCFS2_BH_CACHED, inode);
 	if (status < 0) {
 	if (status < 0) {
@@ -526,6 +530,8 @@ static int ocfs2_extend_allocation(struct inode *inode,
 		goto leave;
 		goto leave;
 	}
 	}
 
 
+	logical_start = OCFS2_I(inode)->ip_clusters;
+
 restart_all:
 restart_all:
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
 
 
@@ -590,6 +596,7 @@ restarted_transaction:
 
 
 	status = ocfs2_do_extend_allocation(osb,
 	status = ocfs2_do_extend_allocation(osb,
 					    inode,
 					    inode,
+					    &logical_start,
 					    clusters_to_add,
 					    clusters_to_add,
 					    bh,
 					    bh,
 					    handle,
 					    handle,

+ 1 - 0
fs/ocfs2/file.h

@@ -39,6 +39,7 @@ enum ocfs2_alloc_restarted {
 };
 };
 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
 			       struct inode *inode,
 			       struct inode *inode,
+			       u32 *cluster_start,
 			       u32 clusters_to_add,
 			       u32 clusters_to_add,
 			       struct buffer_head *fe_bh,
 			       struct buffer_head *fe_bh,
 			       handle_t *handle,
 			       handle_t *handle,

+ 4 - 1
fs/ocfs2/namei.c

@@ -1671,8 +1671,11 @@ static int ocfs2_symlink(struct inode *dir,
 	inode->i_rdev = 0;
 	inode->i_rdev = 0;
 	newsize = l - 1;
 	newsize = l - 1;
 	if (l > ocfs2_fast_symlink_chars(sb)) {
 	if (l > ocfs2_fast_symlink_chars(sb)) {
+		u32 offset = 0;
+
 		inode->i_op = &ocfs2_symlink_inode_operations;
 		inode->i_op = &ocfs2_symlink_inode_operations;
-		status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
+		status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
+						    new_fe_bh,
 						    handle, data_ac, NULL,
 						    handle, data_ac, NULL,
 						    NULL);
 						    NULL);
 		if (status < 0) {
 		if (status < 0) {

+ 7 - 0
fs/ocfs2/ocfs2.h

@@ -303,6 +303,13 @@ static inline int ocfs2_should_order_data(struct inode *inode)
 	return 1;
 	return 1;
 }
 }
 
 
+static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
+{
+	if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
+		return 1;
+	return 0;
+}
+
 /* set / clear functions because cluster events can make these happen
 /* set / clear functions because cluster events can make these happen
  * in parallel so we want the transitions to be atomic. this also
  * in parallel so we want the transitions to be atomic. this also
  * means that any future flags osb_flags must be protected by spinlock
  * means that any future flags osb_flags must be protected by spinlock

+ 6 - 2
fs/ocfs2/ocfs2_fs.h

@@ -86,7 +86,8 @@
 	OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
 	OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
 
 
 #define OCFS2_FEATURE_COMPAT_SUPP	OCFS2_FEATURE_COMPAT_BACKUP_SB
 #define OCFS2_FEATURE_COMPAT_SUPP	OCFS2_FEATURE_COMPAT_BACKUP_SB
-#define OCFS2_FEATURE_INCOMPAT_SUPP	OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT
+#define OCFS2_FEATURE_INCOMPAT_SUPP	(OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
+					 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	0
 #define OCFS2_FEATURE_RO_COMPAT_SUPP	0
 
 
 /*
 /*
@@ -311,7 +312,10 @@ struct ocfs2_extent_list {
 /*00*/	__le16 l_tree_depth;		/* Extent tree depth from this
 /*00*/	__le16 l_tree_depth;		/* Extent tree depth from this
 					   point.  0 means data extents
 					   point.  0 means data extents
 					   hang directly off this
 					   hang directly off this
-					   header (a leaf) */
+					   header (a leaf)
+					   NOTE: The high 8 bits cannot be
+					   used - tree_depth is never that big.
+					*/
 	__le16 l_count;			/* Number of extent records */
 	__le16 l_count;			/* Number of extent records */
 	__le16 l_next_free_rec;		/* Next unused extent slot */
 	__le16 l_next_free_rec;		/* Next unused extent slot */
 	__le16 l_reserved1;
 	__le16 l_reserved1;

部分文件因为文件数量过多而无法显示