|
@@ -3153,29 +3153,28 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
|
|
|
struct ext4_extent_header *eh;
|
|
|
struct ext4_map_blocks split_map;
|
|
|
struct ext4_extent zero_ex;
|
|
|
- struct ext4_extent *ex;
|
|
|
+ struct ext4_extent *ex, *abut_ex;
|
|
|
ext4_lblk_t ee_block, eof_block;
|
|
|
- unsigned int ee_len, depth;
|
|
|
- int allocated, max_zeroout = 0;
|
|
|
+ unsigned int ee_len, depth, map_len = map->m_len;
|
|
|
+ int allocated = 0, max_zeroout = 0;
|
|
|
int err = 0;
|
|
|
int split_flag = 0;
|
|
|
|
|
|
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
|
|
|
"block %llu, max_blocks %u\n", inode->i_ino,
|
|
|
- (unsigned long long)map->m_lblk, map->m_len);
|
|
|
+ (unsigned long long)map->m_lblk, map_len);
|
|
|
|
|
|
sbi = EXT4_SB(inode->i_sb);
|
|
|
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
|
|
|
inode->i_sb->s_blocksize_bits;
|
|
|
- if (eof_block < map->m_lblk + map->m_len)
|
|
|
- eof_block = map->m_lblk + map->m_len;
|
|
|
+ if (eof_block < map->m_lblk + map_len)
|
|
|
+ eof_block = map->m_lblk + map_len;
|
|
|
|
|
|
depth = ext_depth(inode);
|
|
|
eh = path[depth].p_hdr;
|
|
|
ex = path[depth].p_ext;
|
|
|
ee_block = le32_to_cpu(ex->ee_block);
|
|
|
ee_len = ext4_ext_get_actual_len(ex);
|
|
|
- allocated = ee_len - (map->m_lblk - ee_block);
|
|
|
zero_ex.ee_len = 0;
|
|
|
|
|
|
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
|
|
@@ -3186,77 +3185,121 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
|
|
|
|
|
|
/*
|
|
|
* Attempt to transfer newly initialized blocks from the currently
|
|
|
- * uninitialized extent to its left neighbor. This is much cheaper
|
|
|
+ * uninitialized extent to its neighbor. This is much cheaper
|
|
|
* than an insertion followed by a merge as those involve costly
|
|
|
- * memmove() calls. This is the common case in steady state for
|
|
|
- * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
|
|
|
- * writes.
|
|
|
+ * memmove() calls. Transferring to the left is the common case in
|
|
|
+ * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
|
|
|
+ * followed by append writes.
|
|
|
*
|
|
|
* Limitations of the current logic:
|
|
|
- * - L1: we only deal with writes at the start of the extent.
|
|
|
- * The approach could be extended to writes at the end
|
|
|
- * of the extent but this scenario was deemed less common.
|
|
|
- * - L2: we do not deal with writes covering the whole extent.
|
|
|
+ * - L1: we do not deal with writes covering the whole extent.
|
|
|
* This would require removing the extent if the transfer
|
|
|
* is possible.
|
|
|
- * - L3: we only attempt to merge with an extent stored in the
|
|
|
+ * - L2: we only attempt to merge with an extent stored in the
|
|
|
* same extent tree node.
|
|
|
*/
|
|
|
- if ((map->m_lblk == ee_block) && /*L1*/
|
|
|
- (map->m_len < ee_len) && /*L2*/
|
|
|
- (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
|
|
|
- struct ext4_extent *prev_ex;
|
|
|
+ if ((map->m_lblk == ee_block) &&
|
|
|
+ /* See if we can merge left */
|
|
|
+ (map_len < ee_len) && /*L1*/
|
|
|
+ (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
|
|
|
ext4_lblk_t prev_lblk;
|
|
|
ext4_fsblk_t prev_pblk, ee_pblk;
|
|
|
- unsigned int prev_len, write_len;
|
|
|
+ unsigned int prev_len;
|
|
|
|
|
|
- prev_ex = ex - 1;
|
|
|
- prev_lblk = le32_to_cpu(prev_ex->ee_block);
|
|
|
- prev_len = ext4_ext_get_actual_len(prev_ex);
|
|
|
- prev_pblk = ext4_ext_pblock(prev_ex);
|
|
|
+ abut_ex = ex - 1;
|
|
|
+ prev_lblk = le32_to_cpu(abut_ex->ee_block);
|
|
|
+ prev_len = ext4_ext_get_actual_len(abut_ex);
|
|
|
+ prev_pblk = ext4_ext_pblock(abut_ex);
|
|
|
ee_pblk = ext4_ext_pblock(ex);
|
|
|
- write_len = map->m_len;
|
|
|
|
|
|
/*
|
|
|
- * A transfer of blocks from 'ex' to 'prev_ex' is allowed
|
|
|
+ * A transfer of blocks from 'ex' to 'abut_ex' is allowed
|
|
|
* upon those conditions:
|
|
|
- * - C1: prev_ex is initialized,
|
|
|
- * - C2: prev_ex is logically abutting ex,
|
|
|
- * - C3: prev_ex is physically abutting ex,
|
|
|
- * - C4: prev_ex can receive the additional blocks without
|
|
|
+ * - C1: abut_ex is initialized,
|
|
|
+ * - C2: abut_ex is logically abutting ex,
|
|
|
+ * - C3: abut_ex is physically abutting ex,
|
|
|
+ * - C4: abut_ex can receive the additional blocks without
|
|
|
* overflowing the (initialized) length limit.
|
|
|
*/
|
|
|
- if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
|
|
|
+ if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
|
|
|
((prev_lblk + prev_len) == ee_block) && /*C2*/
|
|
|
((prev_pblk + prev_len) == ee_pblk) && /*C3*/
|
|
|
- (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
|
|
|
+ (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
|
|
|
err = ext4_ext_get_access(handle, inode, path + depth);
|
|
|
if (err)
|
|
|
goto out;
|
|
|
|
|
|
trace_ext4_ext_convert_to_initialized_fastpath(inode,
|
|
|
- map, ex, prev_ex);
|
|
|
+ map, ex, abut_ex);
|
|
|
|
|
|
- /* Shift the start of ex by 'write_len' blocks */
|
|
|
- ex->ee_block = cpu_to_le32(ee_block + write_len);
|
|
|
- ext4_ext_store_pblock(ex, ee_pblk + write_len);
|
|
|
- ex->ee_len = cpu_to_le16(ee_len - write_len);
|
|
|
+ /* Shift the start of ex by 'map_len' blocks */
|
|
|
+ ex->ee_block = cpu_to_le32(ee_block + map_len);
|
|
|
+ ext4_ext_store_pblock(ex, ee_pblk + map_len);
|
|
|
+ ex->ee_len = cpu_to_le16(ee_len - map_len);
|
|
|
ext4_ext_mark_uninitialized(ex); /* Restore the flag */
|
|
|
|
|
|
- /* Extend prev_ex by 'write_len' blocks */
|
|
|
- prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
|
|
|
+ /* Extend abut_ex by 'map_len' blocks */
|
|
|
+ abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
|
|
|
|
|
|
- /* Mark the block containing both extents as dirty */
|
|
|
- ext4_ext_dirty(handle, inode, path + depth);
|
|
|
+ /* Result: number of initialized blocks past m_lblk */
|
|
|
+ allocated = map_len;
|
|
|
+ }
|
|
|
+ } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
|
|
|
+ (map_len < ee_len) && /*L1*/
|
|
|
+ ex < EXT_LAST_EXTENT(eh)) { /*L2*/
|
|
|
+ /* See if we can merge right */
|
|
|
+ ext4_lblk_t next_lblk;
|
|
|
+ ext4_fsblk_t next_pblk, ee_pblk;
|
|
|
+ unsigned int next_len;
|
|
|
+
|
|
|
+ abut_ex = ex + 1;
|
|
|
+ next_lblk = le32_to_cpu(abut_ex->ee_block);
|
|
|
+ next_len = ext4_ext_get_actual_len(abut_ex);
|
|
|
+ next_pblk = ext4_ext_pblock(abut_ex);
|
|
|
+ ee_pblk = ext4_ext_pblock(ex);
|
|
|
|
|
|
- /* Update path to point to the right extent */
|
|
|
- path[depth].p_ext = prev_ex;
|
|
|
+ /*
|
|
|
+ * A transfer of blocks from 'ex' to 'abut_ex' is allowed
|
|
|
+ * upon those conditions:
|
|
|
+ * - C1: abut_ex is initialized,
|
|
|
+ * - C2: abut_ex is logically abutting ex,
|
|
|
+ * - C3: abut_ex is physically abutting ex,
|
|
|
+ * - C4: abut_ex can receive the additional blocks without
|
|
|
+ * overflowing the (initialized) length limit.
|
|
|
+ */
|
|
|
+ if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/
|
|
|
+ ((map->m_lblk + map_len) == next_lblk) && /*C2*/
|
|
|
+ ((ee_pblk + ee_len) == next_pblk) && /*C3*/
|
|
|
+ (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
|
|
|
+ err = ext4_ext_get_access(handle, inode, path + depth);
|
|
|
+ if (err)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ trace_ext4_ext_convert_to_initialized_fastpath(inode,
|
|
|
+ map, ex, abut_ex);
|
|
|
+
|
|
|
+ /* Shift the start of abut_ex by 'map_len' blocks */
|
|
|
+ abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
|
|
|
+ ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
|
|
|
+ ex->ee_len = cpu_to_le16(ee_len - map_len);
|
|
|
+ ext4_ext_mark_uninitialized(ex); /* Restore the flag */
|
|
|
+
|
|
|
+ /* Extend abut_ex by 'map_len' blocks */
|
|
|
+ abut_ex->ee_len = cpu_to_le16(next_len + map_len);
|
|
|
|
|
|
/* Result: number of initialized blocks past m_lblk */
|
|
|
- allocated = write_len;
|
|
|
- goto out;
|
|
|
+ allocated = map_len;
|
|
|
}
|
|
|
}
|
|
|
+ if (allocated) {
|
|
|
+ /* Mark the block containing both extents as dirty */
|
|
|
+ ext4_ext_dirty(handle, inode, path + depth);
|
|
|
+
|
|
|
+ /* Update path to point to the right extent */
|
|
|
+ path[depth].p_ext = abut_ex;
|
|
|
+ goto out;
|
|
|
+ } else
|
|
|
+ allocated = ee_len - (map->m_lblk - ee_block);
|
|
|
|
|
|
WARN_ON(map->m_lblk < ee_block);
|
|
|
/*
|