13 years ago · 119c0d4460
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -592,94 +592,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 
				 	return -1;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * claim the inode from the inode bitmap. If the group
			
 
				- * is uninit we need to take the groups's ext4_group_lock
			
 
				- * and clear the uninit flag. The inode bitmap update
			
 
				- * and group desc uninit flag clear should be done
			
 
				- * after holding ext4_group_lock so that ext4_read_inode_bitmap
			
 
				- * doesn't race with the ext4_claim_inode
			
 
				- */
			
 
				-static int ext4_claim_inode(struct super_block *sb,
			
 
				-			struct buffer_head *inode_bitmap_bh,
			
 
				-			unsigned long ino, ext4_group_t group, umode_t mode)
			
 
				-{
			
 
				-	int free = 0, retval = 0, count;
			
 
				-	struct ext4_sb_info *sbi = EXT4_SB(sb);
			
 
				-	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
			
 
				-	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
			
 
				-
			
 
				-	/*
			
 
				-	 * We have to be sure that new inode allocation does not race with
			
 
				-	 * inode table initialization, because otherwise we may end up
			
 
				-	 * allocating and writing new inode right before sb_issue_zeroout
			
 
				-	 * takes place and overwriting our new inode with zeroes. So we
			
 
				-	 * take alloc_sem to prevent it.
			
 
				-	 */
			
 
				-	down_read(&grp->alloc_sem);
			
 
				-	ext4_lock_group(sb, group);
			
 
				-	if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
			
 
				-		/* not a free inode */
			
 
				-		retval = 1;
			
 
				-		goto err_ret;
			
 
				-	}
			
 
				-	ino++;
			
 
				-	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
			
 
				-			ino > EXT4_INODES_PER_GROUP(sb)) {
			
 
				-		ext4_unlock_group(sb, group);
			
 
				-		up_read(&grp->alloc_sem);
			
 
				-		ext4_error(sb, "reserved inode or inode > inodes count - "
			
 
				-			   "block_group = %u, inode=%lu", group,
			
 
				-			   ino + group * EXT4_INODES_PER_GROUP(sb));
			
 
				-		return 1;
			
 
				-	}
			
 
				-	/* If we didn't allocate from within the initialized part of the inode
			
 
				-	 * table then we need to initialize up to this inode. */
			
 
				-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				-
			
 
				-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			
 
				-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
			
 
				-			/* When marking the block group with
			
 
				-			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
			
 
				-			 * on the value of bg_itable_unused even though
			
 
				-			 * mke2fs could have initialized the same for us.
			
 
				-			 * Instead we calculated the value below
			
 
				-			 */
			
 
				-
			
 
				-			free = 0;
			
 
				-		} else {
			
 
				-			free = EXT4_INODES_PER_GROUP(sb) -
			
 
				-				ext4_itable_unused_count(sb, gdp);
			
 
				-		}
			
 
				-
			
 
				-		/*
			
 
				-		 * Check the relative inode number against the last used
			
 
				-		 * relative inode number in this group. if it is greater
			
 
				-		 * we need to  update the bg_itable_unused count
			
 
				-		 *
			
 
				-		 */
			
 
				-		if (ino > free)
			
 
				-			ext4_itable_unused_set(sb, gdp,
			
 
				-					(EXT4_INODES_PER_GROUP(sb) - ino));
			
 
				-	}
			
 
				-	count = ext4_free_inodes_count(sb, gdp) - 1;
			
 
				-	ext4_free_inodes_set(sb, gdp, count);
			
 
				-	if (S_ISDIR(mode)) {
			
 
				-		count = ext4_used_dirs_count(sb, gdp) + 1;
			
 
				-		ext4_used_dirs_set(sb, gdp, count);
			
 
				-		if (sbi->s_log_groups_per_flex) {
			
 
				-			ext4_group_t f = ext4_flex_group(sbi, group);
			
 
				-
			
 
				-			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
			
 
				-		}
			
 
				-	}
			
 
				-	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
			
 
				-err_ret:
			
 
				-	ext4_unlock_group(sb, group);
			
 
				-	up_read(&grp->alloc_sem);
			
 
				-	return retval;
			
 
				-}
			
 
				-
			
 
				 /*
			
 
				  * There are two policies for allocating an inode.  If the new inode is
			
 
				  * a directory, then a forward search is made for a block group with both
			
@@ -741,6 +653,11 @@ got_group:
 
				 	if (ret2 == -1)
			
 
				 		goto out;
			
 
				 
			
 
				+	/*
			
 
				+	 * Normally we will only go through one pass of this loop,
			
 
				+	 * unless we get unlucky and it turns out the group we selected
			
 
				+	 * had its last inode grabbed by someone else.
			
 
				+	 */
			
 
				 	for (i = 0; i < ngroups; i++, ino = 0) {
			
 
				 		err = -EIO;
			
 
				 
			
@@ -757,51 +674,24 @@ repeat_in_this_group:
 
				 		ino = ext4_find_next_zero_bit((unsigned long *)
			
 
				 					      inode_bitmap_bh->b_data,
			
 
				 					      EXT4_INODES_PER_GROUP(sb), ino);
			
 
				-
			
 
				-		if (ino < EXT4_INODES_PER_GROUP(sb)) {
			
 
				-
			
 
				-			BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			
 
				-			err = ext4_journal_get_write_access(handle,
			
 
				-							    inode_bitmap_bh);
			
 
				-			if (err)
			
 
				-				goto fail;
			
 
				-
			
 
				-			BUFFER_TRACE(group_desc_bh, "get_write_access");
			
 
				-			err = ext4_journal_get_write_access(handle,
			
 
				-								group_desc_bh);
			
 
				-			if (err)
			
 
				-				goto fail;
			
 
				-			if (!ext4_claim_inode(sb, inode_bitmap_bh,
			
 
				-						ino, group, mode)) {
			
 
				-				/* we won it */
			
 
				-				BUFFER_TRACE(inode_bitmap_bh,
			
 
				-					"call ext4_handle_dirty_metadata");
			
 
				-				err = ext4_handle_dirty_metadata(handle,
			
 
				-								 NULL,
			
 
				-							inode_bitmap_bh);
			
 
				-				if (err)
			
 
				-					goto fail;
			
 
				-				/* zero bit is inode number 1*/
			
 
				-				ino++;
			
 
				-				goto got;
			
 
				-			}
			
 
				-			/* we lost it */
			
 
				-			ext4_handle_release_buffer(handle, inode_bitmap_bh);
			
 
				-			ext4_handle_release_buffer(handle, group_desc_bh);
			
 
				-
			
 
				-			if (++ino < EXT4_INODES_PER_GROUP(sb))
			
 
				-				goto repeat_in_this_group;
			
 
				+		if (ino >= EXT4_INODES_PER_GROUP(sb)) {
			
 
				+			if (++group == ngroups)
			
 
				+				group = 0;
			
 
				+			continue;
			
 
				 		}
			
 
				-
			
 
				-		/*
			
 
				-		 * This case is possible in concurrent environment.  It is very
			
 
				-		 * rare.  We cannot repeat the find_group_xxx() call because
			
 
				-		 * that will simply return the same blockgroup, because the
			
 
				-		 * group descriptor metadata has not yet been updated.
			
 
				-		 * So we just go onto the next blockgroup.
			
 
				-		 */
			
 
				-		if (++group == ngroups)
			
 
				-			group = 0;
			
 
				+		if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
			
 
				+			ext4_error(sb, "reserved inode found cleared - "
			
 
				+				   "inode=%lu", ino + 1);
			
 
				+			continue;
			
 
				+		}
			
 
				+		ext4_lock_group(sb, group);
			
 
				+		ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
			
 
				+		ext4_unlock_group(sb, group);
			
 
				+		ino++;		/* the inode bitmap is zero-based */
			
 
				+		if (!ret2)
			
 
				+			goto got; /* we grabbed the inode! */
			
 
				+		if (ino < EXT4_INODES_PER_GROUP(sb))
			
 
				+			goto repeat_in_this_group;
			
 
				 	}
			
 
				 	err = -ENOSPC;
			
 
				 	goto out;
			
@@ -838,6 +728,59 @@ got:
 
				 		if (err)
			
 
				 			goto fail;
			
 
				 	}
			
 
				+
			
 
				+	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			
 
				+	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				+	BUFFER_TRACE(group_desc_bh, "get_write_access");
			
 
				+	err = ext4_journal_get_write_access(handle, group_desc_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				+	/* Update the relevant bg descriptor fields */
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				+		int free;
			
 
				+		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
			
 
				+
			
 
				+		down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
			
 
				+		ext4_lock_group(sb, group); /* while we modify the bg desc */
			
 
				+		free = EXT4_INODES_PER_GROUP(sb) -
			
 
				+			ext4_itable_unused_count(sb, gdp);
			
 
				+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			
 
				+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
			
 
				+			free = 0;
			
 
				+		}
			
 
				+		/*
			
 
				+		 * Check the relative inode number against the last used
			
 
				+		 * relative inode number in this group. if it is greater
			
 
				+		 * we need to update the bg_itable_unused count
			
 
				+		 */
			
 
				+		if (ino > free)
			
 
				+			ext4_itable_unused_set(sb, gdp,
			
 
				+					(EXT4_INODES_PER_GROUP(sb) - ino));
			
 
				+		up_read(&grp->alloc_sem);
			
 
				+	}
			
 
				+	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
			
 
				+	if (S_ISDIR(mode)) {
			
 
				+		ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
			
 
				+		if (sbi->s_log_groups_per_flex) {
			
 
				+			ext4_group_t f = ext4_flex_group(sbi, group);
			
 
				+
			
 
				+			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
			
 
				+		}
			
 
				+	}
			
 
				+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
			
 
				+		gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
			
 
				+		ext4_unlock_group(sb, group);
			
 
				+	}
			
 
				+
			
 
				+	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
			
 
				+	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
			
 
				+	if (err)
			
 
				+		goto fail;
			
 
				+
			
 
				 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
			
 
				 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
			
 
				 	if (err)
			
@@ -1101,7 +1044,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
 
				  * where it is called from on active part of filesystem is ext4lazyinit
			
 
				  * thread, so we do not need any special locks, however we have to prevent
			
 
				  * inode allocation from the current group, so we take alloc_sem lock, to
			
 
				- * block ext4_claim_inode until we are finished.
			
 
				+ * block ext4_new_inode() until we are finished.
			
 
				  */
			
 
				 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
			
 
				 				 int barrier)