|
@@ -410,6 +410,43 @@ out:
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+struct orlov_stats {
|
|
|
+ __u32 free_inodes;
|
|
|
+ __u32 free_blocks;
|
|
|
+ __u32 used_dirs;
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Helper function for Orlov's allocator; returns critical information
|
|
|
+ * for a particular block group or flex_bg. If flex_size is 1, then g
|
|
|
+ * is a block group number; otherwise it is flex_bg number.
|
|
|
+ */
|
|
|
+void get_orlov_stats(struct super_block *sb, ext4_group_t g,
|
|
|
+ int flex_size, struct orlov_stats *stats)
|
|
|
+{
|
|
|
+ struct ext4_group_desc *desc;
|
|
|
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ stats->free_inodes = 0;
|
|
|
+ stats->free_blocks = 0;
|
|
|
+ stats->used_dirs = 0;
|
|
|
+
|
|
|
+ g *= flex_size;
|
|
|
+
|
|
|
+ for (i = 0; i < flex_size; i++) {
|
|
|
+ if (g >= ngroups)
|
|
|
+ break;
|
|
|
+ desc = ext4_get_group_desc(sb, g++, NULL);
|
|
|
+ if (!desc)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ stats->free_inodes += ext4_free_inodes_count(sb, desc);
|
|
|
+ stats->free_blocks += ext4_free_blks_count(sb, desc);
|
|
|
+ stats->used_dirs += ext4_used_dirs_count(sb, desc);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Orlov's allocator for directories.
|
|
|
*
|
|
@@ -425,35 +462,34 @@ out:
|
|
|
* it has too many directories already (max_dirs) or
|
|
|
* it has too few free inodes left (min_inodes) or
|
|
|
* it has too few free blocks left (min_blocks) or
|
|
|
- * it's already running too large debt (max_debt).
|
|
|
* Parent's group is preferred, if it doesn't satisfy these
|
|
|
* conditions we search cyclically through the rest. If none
|
|
|
* of the groups look good we just look for a group with more
|
|
|
* free inodes than average (starting at parent's group).
|
|
|
- *
|
|
|
- * Debt is incremented each time we allocate a directory and decremented
|
|
|
- * when we allocate an inode, within 0--255.
|
|
|
*/
|
|
|
|
|
|
-#define INODE_COST 64
|
|
|
-#define BLOCK_COST 256
|
|
|
-
|
|
|
static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
|
|
- ext4_group_t *group)
|
|
|
+ ext4_group_t *group, int mode)
|
|
|
{
|
|
|
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
|
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
- struct ext4_super_block *es = sbi->s_es;
|
|
|
ext4_group_t ngroups = sbi->s_groups_count;
|
|
|
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
|
|
|
unsigned int freei, avefreei;
|
|
|
ext4_fsblk_t freeb, avefreeb;
|
|
|
- ext4_fsblk_t blocks_per_dir;
|
|
|
unsigned int ndirs;
|
|
|
- int max_debt, max_dirs, min_inodes;
|
|
|
+ int max_dirs, min_inodes;
|
|
|
ext4_grpblk_t min_blocks;
|
|
|
- ext4_group_t i;
|
|
|
+ ext4_group_t i, grp, g;
|
|
|
struct ext4_group_desc *desc;
|
|
|
+ struct orlov_stats stats;
|
|
|
+ int flex_size = ext4_flex_bg_size(sbi);
|
|
|
+
|
|
|
+ if (flex_size > 1) {
|
|
|
+ ngroups = (ngroups + flex_size - 1) >>
|
|
|
+ sbi->s_log_groups_per_flex;
|
|
|
+ parent_group >>= sbi->s_log_groups_per_flex;
|
|
|
+ }
|
|
|
|
|
|
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
|
|
|
avefreei = freei / ngroups;
|
|
@@ -462,71 +498,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
|
|
do_div(avefreeb, ngroups);
|
|
|
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
|
|
|
|
|
|
- if ((parent == sb->s_root->d_inode) ||
|
|
|
- (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
|
|
|
+ if (S_ISDIR(mode) &&
|
|
|
+ ((parent == sb->s_root->d_inode) ||
|
|
|
+ (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
|
|
|
int best_ndir = inodes_per_group;
|
|
|
- ext4_group_t grp;
|
|
|
int ret = -1;
|
|
|
|
|
|
get_random_bytes(&grp, sizeof(grp));
|
|
|
parent_group = (unsigned)grp % ngroups;
|
|
|
for (i = 0; i < ngroups; i++) {
|
|
|
- grp = (parent_group + i) % ngroups;
|
|
|
- desc = ext4_get_group_desc(sb, grp, NULL);
|
|
|
- if (!desc || !ext4_free_inodes_count(sb, desc))
|
|
|
+ g = (parent_group + i) % ngroups;
|
|
|
+ get_orlov_stats(sb, g, flex_size, &stats);
|
|
|
+ if (!stats.free_inodes)
|
|
|
continue;
|
|
|
- if (ext4_used_dirs_count(sb, desc) >= best_ndir)
|
|
|
+ if (stats.used_dirs >= best_ndir)
|
|
|
continue;
|
|
|
- if (ext4_free_inodes_count(sb, desc) < avefreei)
|
|
|
+ if (stats.free_inodes < avefreei)
|
|
|
continue;
|
|
|
- if (ext4_free_blks_count(sb, desc) < avefreeb)
|
|
|
+ if (stats.free_blocks < avefreeb)
|
|
|
continue;
|
|
|
- *group = grp;
|
|
|
+ grp = g;
|
|
|
ret = 0;
|
|
|
- best_ndir = ext4_used_dirs_count(sb, desc);
|
|
|
+ best_ndir = stats.used_dirs;
|
|
|
+ }
|
|
|
+ if (ret)
|
|
|
+ goto fallback;
|
|
|
+ found_flex_bg:
|
|
|
+ if (flex_size == 1) {
|
|
|
+ *group = grp;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We pack inodes at the beginning of the flexgroup's
|
|
|
+ * inode tables. Block allocation decisions will do
|
|
|
+ * something similar, although regular files will
|
|
|
+ * start at 2nd block group of the flexgroup. See
|
|
|
+ * ext4_ext_find_goal() and ext4_find_near().
|
|
|
+ */
|
|
|
+ grp *= flex_size;
|
|
|
+ for (i = 0; i < flex_size; i++) {
|
|
|
+ if (grp+i >= sbi->s_groups_count)
|
|
|
+ break;
|
|
|
+ desc = ext4_get_group_desc(sb, grp+i, NULL);
|
|
|
+ if (desc && ext4_free_inodes_count(sb, desc)) {
|
|
|
+ *group = grp+i;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
}
|
|
|
- if (ret == 0)
|
|
|
- return ret;
|
|
|
goto fallback;
|
|
|
}
|
|
|
|
|
|
- blocks_per_dir = ext4_blocks_count(es) - freeb;
|
|
|
- do_div(blocks_per_dir, ndirs);
|
|
|
-
|
|
|
max_dirs = ndirs / ngroups + inodes_per_group / 16;
|
|
|
- min_inodes = avefreei - inodes_per_group / 4;
|
|
|
- min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
|
|
|
-
|
|
|
- max_debt = EXT4_BLOCKS_PER_GROUP(sb);
|
|
|
- max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
|
|
|
- if (max_debt * INODE_COST > inodes_per_group)
|
|
|
- max_debt = inodes_per_group / INODE_COST;
|
|
|
- if (max_debt > 255)
|
|
|
- max_debt = 255;
|
|
|
- if (max_debt == 0)
|
|
|
- max_debt = 1;
|
|
|
+ min_inodes = avefreei - inodes_per_group*flex_size / 4;
|
|
|
+ if (min_inodes < 1)
|
|
|
+ min_inodes = 1;
|
|
|
+ min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Start looking in the flex group where we last allocated an
|
|
|
+ * inode for this parent directory
|
|
|
+ */
|
|
|
+ if (EXT4_I(parent)->i_last_alloc_group != ~0) {
|
|
|
+ parent_group = EXT4_I(parent)->i_last_alloc_group;
|
|
|
+ if (flex_size > 1)
|
|
|
+ parent_group >>= sbi->s_log_groups_per_flex;
|
|
|
+ }
|
|
|
|
|
|
for (i = 0; i < ngroups; i++) {
|
|
|
- *group = (parent_group + i) % ngroups;
|
|
|
- desc = ext4_get_group_desc(sb, *group, NULL);
|
|
|
- if (!desc || !ext4_free_inodes_count(sb, desc))
|
|
|
- continue;
|
|
|
- if (ext4_used_dirs_count(sb, desc) >= max_dirs)
|
|
|
+ grp = (parent_group + i) % ngroups;
|
|
|
+ get_orlov_stats(sb, grp, flex_size, &stats);
|
|
|
+ if (stats.used_dirs >= max_dirs)
|
|
|
continue;
|
|
|
- if (ext4_free_inodes_count(sb, desc) < min_inodes)
|
|
|
+ if (stats.free_inodes < min_inodes)
|
|
|
continue;
|
|
|
- if (ext4_free_blks_count(sb, desc) < min_blocks)
|
|
|
+ if (stats.free_blocks < min_blocks)
|
|
|
continue;
|
|
|
- return 0;
|
|
|
+ goto found_flex_bg;
|
|
|
}
|
|
|
|
|
|
fallback:
|
|
|
+ ngroups = sbi->s_groups_count;
|
|
|
+ avefreei = freei / ngroups;
|
|
|
+ parent_group = EXT4_I(parent)->i_block_group;
|
|
|
for (i = 0; i < ngroups; i++) {
|
|
|
- *group = (parent_group + i) % ngroups;
|
|
|
- desc = ext4_get_group_desc(sb, *group, NULL);
|
|
|
+ grp = (parent_group + i) % ngroups;
|
|
|
+ desc = ext4_get_group_desc(sb, grp, NULL);
|
|
|
if (desc && ext4_free_inodes_count(sb, desc) &&
|
|
|
- ext4_free_inodes_count(sb, desc) >= avefreei)
|
|
|
+ ext4_free_inodes_count(sb, desc) >= avefreei) {
|
|
|
+ *group = grp;
|
|
|
return 0;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
if (avefreei) {
|
|
@@ -542,12 +604,51 @@ fallback:
|
|
|
}
|
|
|
|
|
|
static int find_group_other(struct super_block *sb, struct inode *parent,
|
|
|
- ext4_group_t *group)
|
|
|
+ ext4_group_t *group, int mode)
|
|
|
{
|
|
|
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
|
|
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
|
|
struct ext4_group_desc *desc;
|
|
|
- ext4_group_t i;
|
|
|
+ ext4_group_t i, last;
|
|
|
+ int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Try to place the inode is the same flex group as its
|
|
|
+ * parent. If we can't find space, use the Orlov algorithm to
|
|
|
+ * find another flex group, and store that information in the
|
|
|
+ * parent directory's inode information so that use that flex
|
|
|
+ * group for future allocations.
|
|
|
+ */
|
|
|
+ if (flex_size > 1) {
|
|
|
+ int retry = 0;
|
|
|
+
|
|
|
+ try_again:
|
|
|
+ parent_group &= ~(flex_size-1);
|
|
|
+ last = parent_group + flex_size;
|
|
|
+ if (last > ngroups)
|
|
|
+ last = ngroups;
|
|
|
+ for (i = parent_group; i < last; i++) {
|
|
|
+ desc = ext4_get_group_desc(sb, i, NULL);
|
|
|
+ if (desc && ext4_free_inodes_count(sb, desc)) {
|
|
|
+ *group = i;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
|
|
|
+ retry = 1;
|
|
|
+ parent_group = EXT4_I(parent)->i_last_alloc_group;
|
|
|
+ goto try_again;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * If this didn't work, use the Orlov search algorithm
|
|
|
+ * to find a new flex group; we pass in the mode to
|
|
|
+ * avoid the topdir algorithms.
|
|
|
+ */
|
|
|
+ *group = parent_group + flex_size;
|
|
|
+ if (*group > ngroups)
|
|
|
+ *group = 0;
|
|
|
+ return find_group_orlov(sb, parent, group, mode);
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
* Try to place the inode in its parent directory
|
|
@@ -716,10 +817,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|
|
sbi = EXT4_SB(sb);
|
|
|
es = sbi->s_es;
|
|
|
|
|
|
- if (sbi->s_log_groups_per_flex) {
|
|
|
+ if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
|
|
|
ret2 = find_group_flex(sb, dir, &group);
|
|
|
if (ret2 == -1) {
|
|
|
- ret2 = find_group_other(sb, dir, &group);
|
|
|
+ ret2 = find_group_other(sb, dir, &group, mode);
|
|
|
if (ret2 == 0 && once)
|
|
|
once = 0;
|
|
|
printk(KERN_NOTICE "ext4: find_group_flex "
|
|
@@ -733,11 +834,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|
|
if (test_opt(sb, OLDALLOC))
|
|
|
ret2 = find_group_dir(sb, dir, &group);
|
|
|
else
|
|
|
- ret2 = find_group_orlov(sb, dir, &group);
|
|
|
+ ret2 = find_group_orlov(sb, dir, &group, mode);
|
|
|
} else
|
|
|
- ret2 = find_group_other(sb, dir, &group);
|
|
|
+ ret2 = find_group_other(sb, dir, &group, mode);
|
|
|
|
|
|
got_group:
|
|
|
+ EXT4_I(dir)->i_last_alloc_group = group;
|
|
|
err = -ENOSPC;
|
|
|
if (ret2 == -1)
|
|
|
goto out;
|
|
@@ -894,6 +996,7 @@ got:
|
|
|
ei->i_file_acl = 0;
|
|
|
ei->i_dtime = 0;
|
|
|
ei->i_block_group = group;
|
|
|
+ ei->i_last_alloc_group = ~0;
|
|
|
|
|
|
ext4_set_inode_flags(inode);
|
|
|
if (IS_DIRSYNC(inode))
|