|
@@ -886,18 +886,20 @@ static noinline_for_stack int
|
|
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|
struct ext4_buddy *e4b)
|
|
struct ext4_buddy *e4b)
|
|
{
|
|
{
|
|
- struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
- struct inode *inode = sbi->s_buddy_cache;
|
|
|
|
int blocks_per_page;
|
|
int blocks_per_page;
|
|
int block;
|
|
int block;
|
|
int pnum;
|
|
int pnum;
|
|
int poff;
|
|
int poff;
|
|
struct page *page;
|
|
struct page *page;
|
|
int ret;
|
|
int ret;
|
|
|
|
+ struct ext4_group_info *grp;
|
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
+ struct inode *inode = sbi->s_buddy_cache;
|
|
|
|
|
|
mb_debug("load group %u\n", group);
|
|
mb_debug("load group %u\n", group);
|
|
|
|
|
|
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
|
|
+ grp = ext4_get_group_info(sb, group);
|
|
|
|
|
|
e4b->bd_blkbits = sb->s_blocksize_bits;
|
|
e4b->bd_blkbits = sb->s_blocksize_bits;
|
|
e4b->bd_info = ext4_get_group_info(sb, group);
|
|
e4b->bd_info = ext4_get_group_info(sb, group);
|
|
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|
e4b->bd_group = group;
|
|
e4b->bd_group = group;
|
|
e4b->bd_buddy_page = NULL;
|
|
e4b->bd_buddy_page = NULL;
|
|
e4b->bd_bitmap_page = NULL;
|
|
e4b->bd_bitmap_page = NULL;
|
|
|
|
+ e4b->alloc_semp = &grp->alloc_sem;
|
|
|
|
+
|
|
|
|
+ /* Take the read lock on the group alloc
|
|
|
|
+ * sem. This would make sure a parallel
|
|
|
|
+ * ext4_mb_init_group happening on other
|
|
|
|
+ * groups mapped by the page is blocked
|
|
|
|
+ * till we are done with allocation
|
|
|
|
+ */
|
|
|
|
+ down_read(e4b->alloc_semp);
|
|
|
|
|
|
/*
|
|
/*
|
|
* the buddy cache inode stores the block bitmap
|
|
* the buddy cache inode stores the block bitmap
|
|
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|
page = find_get_page(inode->i_mapping, pnum);
|
|
page = find_get_page(inode->i_mapping, pnum);
|
|
if (page == NULL || !PageUptodate(page)) {
|
|
if (page == NULL || !PageUptodate(page)) {
|
|
if (page)
|
|
if (page)
|
|
|
|
+ /*
|
|
|
|
+ * drop the page reference and try
|
|
|
|
+ * to get the page with lock. If we
|
|
|
|
+ * are not uptodate that implies
|
|
|
|
+ * somebody just created the page but
|
|
|
|
+ * is yet to initialize the same. So
|
|
|
|
+ * wait for it to initialize.
|
|
|
|
+ */
|
|
page_cache_release(page);
|
|
page_cache_release(page);
|
|
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
|
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
|
if (page) {
|
|
if (page) {
|
|
@@ -985,6 +1004,9 @@ err:
|
|
page_cache_release(e4b->bd_buddy_page);
|
|
page_cache_release(e4b->bd_buddy_page);
|
|
e4b->bd_buddy = NULL;
|
|
e4b->bd_buddy = NULL;
|
|
e4b->bd_bitmap = NULL;
|
|
e4b->bd_bitmap = NULL;
|
|
|
|
+
|
|
|
|
+ /* Done with the buddy cache */
|
|
|
|
+ up_read(e4b->alloc_semp);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
|
|
page_cache_release(e4b->bd_bitmap_page);
|
|
page_cache_release(e4b->bd_bitmap_page);
|
|
if (e4b->bd_buddy_page)
|
|
if (e4b->bd_buddy_page)
|
|
page_cache_release(e4b->bd_buddy_page);
|
|
page_cache_release(e4b->bd_buddy_page);
|
|
|
|
+ /* Done with the buddy cache */
|
|
|
|
+ up_read(e4b->alloc_semp);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * lock the group_info alloc_sem of all the groups
|
|
|
|
+ * belonging to the same buddy cache page. This
|
|
|
|
+ * make sure other parallel operation on the buddy
|
|
|
|
+ * cache doesn't happen whild holding the buddy cache
|
|
|
|
+ * lock
|
|
|
|
+ */
|
|
|
|
+int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int block, pnum;
|
|
|
|
+ int blocks_per_page;
|
|
|
|
+ int groups_per_page;
|
|
|
|
+ ext4_group_t first_group;
|
|
|
|
+ struct ext4_group_info *grp;
|
|
|
|
+
|
|
|
|
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
|
|
+ /*
|
|
|
|
+ * the buddy cache inode stores the block bitmap
|
|
|
|
+ * and buddy information in consecutive blocks.
|
|
|
|
+ * So for each group we need two blocks.
|
|
|
|
+ */
|
|
|
|
+ block = group * 2;
|
|
|
|
+ pnum = block / blocks_per_page;
|
|
|
|
+ first_group = pnum * blocks_per_page / 2;
|
|
|
|
+
|
|
|
|
+ groups_per_page = blocks_per_page >> 1;
|
|
|
|
+ if (groups_per_page == 0)
|
|
|
|
+ groups_per_page = 1;
|
|
|
|
+ /* read all groups the page covers into the cache */
|
|
|
|
+ for (i = 0; i < groups_per_page; i++) {
|
|
|
|
+
|
|
|
|
+ if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
|
|
|
|
+ break;
|
|
|
|
+ grp = ext4_get_group_info(sb, first_group + i);
|
|
|
|
+ /* take all groups write allocation
|
|
|
|
+ * semaphore. This make sure there is
|
|
|
|
+ * no block allocation going on in any
|
|
|
|
+ * of that groups
|
|
|
|
+ */
|
|
|
|
+ down_write(&grp->alloc_sem);
|
|
|
|
+ }
|
|
|
|
+ return i;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
|
|
|
|
+ ext4_group_t group, int locked_group)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int block, pnum;
|
|
|
|
+ int blocks_per_page;
|
|
|
|
+ ext4_group_t first_group;
|
|
|
|
+ struct ext4_group_info *grp;
|
|
|
|
+
|
|
|
|
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
|
|
+ /*
|
|
|
|
+ * the buddy cache inode stores the block bitmap
|
|
|
|
+ * and buddy information in consecutive blocks.
|
|
|
|
+ * So for each group we need two blocks.
|
|
|
|
+ */
|
|
|
|
+ block = group * 2;
|
|
|
|
+ pnum = block / blocks_per_page;
|
|
|
|
+ first_group = pnum * blocks_per_page / 2;
|
|
|
|
+ /* release locks on all the groups */
|
|
|
|
+ for (i = 0; i < locked_group; i++) {
|
|
|
|
+
|
|
|
|
+ grp = ext4_get_group_info(sb, first_group + i);
|
|
|
|
+ /* take all groups write allocation
|
|
|
|
+ * semaphore. This make sure there is
|
|
|
|
+ * no block allocation going on in any
|
|
|
|
+ * of that groups
|
|
|
|
+ */
|
|
|
|
+ up_write(&grp->alloc_sem);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
|
|
|
+{
|
|
|
|
+
|
|
|
|
+ int ret;
|
|
|
|
+ void *bitmap;
|
|
|
|
+ int blocks_per_page;
|
|
|
|
+ int block, pnum, poff;
|
|
|
|
+ int num_grp_locked = 0;
|
|
|
|
+ struct ext4_group_info *this_grp;
|
|
|
|
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
+ struct inode *inode = sbi->s_buddy_cache;
|
|
|
|
+ struct page *page = NULL, *bitmap_page = NULL;
|
|
|
|
+
|
|
|
|
+ mb_debug("init group %lu\n", group);
|
|
|
|
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
|
|
+ this_grp = ext4_get_group_info(sb, group);
|
|
|
|
+ /*
|
|
|
|
+ * This ensures we don't add group
|
|
|
|
+ * to this buddy cache via resize
|
|
|
|
+ */
|
|
|
|
+ num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
|
|
|
|
+ if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
|
|
|
|
+ /*
|
|
|
|
+ * somebody initialized the group
|
|
|
|
+ * return without doing anything
|
|
|
|
+ */
|
|
|
|
+ ret = 0;
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
|
|
+ /*
|
|
|
|
+ * the buddy cache inode stores the block bitmap
|
|
|
|
+ * and buddy information in consecutive blocks.
|
|
|
|
+ * So for each group we need two blocks.
|
|
|
|
+ */
|
|
|
|
+ block = group * 2;
|
|
|
|
+ pnum = block / blocks_per_page;
|
|
|
|
+ poff = block % blocks_per_page;
|
|
|
|
+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
|
|
|
+ if (page) {
|
|
|
|
+ BUG_ON(page->mapping != inode->i_mapping);
|
|
|
|
+ ret = ext4_mb_init_cache(page, NULL);
|
|
|
|
+ if (ret) {
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ }
|
|
|
|
+ if (page == NULL || !PageUptodate(page)) {
|
|
|
|
+ ret = -EIO;
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
|
|
+ mark_page_accessed(page);
|
|
|
|
+ bitmap_page = page;
|
|
|
|
+ bitmap = page_address(page) + (poff * sb->s_blocksize);
|
|
|
|
+
|
|
|
|
+ /* init buddy cache */
|
|
|
|
+ block++;
|
|
|
|
+ pnum = block / blocks_per_page;
|
|
|
|
+ poff = block % blocks_per_page;
|
|
|
|
+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
|
|
|
+ if (page == bitmap_page) {
|
|
|
|
+ /*
|
|
|
|
+ * If both the bitmap and buddy are in
|
|
|
|
+ * the same page we don't need to force
|
|
|
|
+ * init the buddy
|
|
|
|
+ */
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ } else if (page) {
|
|
|
|
+ BUG_ON(page->mapping != inode->i_mapping);
|
|
|
|
+ ret = ext4_mb_init_cache(page, bitmap);
|
|
|
|
+ if (ret) {
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ }
|
|
|
|
+ if (page == NULL || !PageUptodate(page)) {
|
|
|
|
+ ret = -EIO;
|
|
|
|
+ goto err;
|
|
|
|
+ }
|
|
|
|
+ mark_page_accessed(page);
|
|
|
|
+err:
|
|
|
|
+ ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
|
|
|
|
+ if (bitmap_page)
|
|
|
|
+ page_cache_release(bitmap_page);
|
|
|
|
+ if (page)
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
static noinline_for_stack int
|
|
static noinline_for_stack int
|
|
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
|
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
|
{
|
|
{
|
|
@@ -1779,7 +1970,7 @@ repeat:
|
|
group = 0;
|
|
group = 0;
|
|
|
|
|
|
/* quick check to skip empty groups */
|
|
/* quick check to skip empty groups */
|
|
- grp = ext4_get_group_info(ac->ac_sb, group);
|
|
|
|
|
|
+ grp = ext4_get_group_info(sb, group);
|
|
if (grp->bb_free == 0)
|
|
if (grp->bb_free == 0)
|
|
continue;
|
|
continue;
|
|
|
|
|
|
@@ -1792,10 +1983,9 @@ repeat:
|
|
* we need full data about the group
|
|
* we need full data about the group
|
|
* to make a good selection
|
|
* to make a good selection
|
|
*/
|
|
*/
|
|
- err = ext4_mb_load_buddy(sb, group, &e4b);
|
|
|
|
|
|
+ err = ext4_mb_init_group(sb, group);
|
|
if (err)
|
|
if (err)
|
|
goto out;
|
|
goto out;
|
|
- ext4_mb_release_desc(&e4b);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
|
|
|
|
|
|
|
|
|
|
/* Create and initialize ext4_group_info data for the given group. */
|
|
/* Create and initialize ext4_group_info data for the given group. */
|
|
-static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|
|
|
|
|
+int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|
struct ext4_group_desc *desc)
|
|
struct ext4_group_desc *desc)
|
|
{
|
|
{
|
|
int i, len;
|
|
int i, len;
|
|
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
|
}
|
|
}
|
|
|
|
|
|
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
|
|
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
|
|
|
|
+ init_rwsem(&meta_group_info[i]->alloc_sem);
|
|
meta_group_info[i]->bb_free_root.rb_node = NULL;;
|
|
meta_group_info[i]->bb_free_root.rb_node = NULL;;
|
|
|
|
|
|
#ifdef DOUBLE_CHECK
|
|
#ifdef DOUBLE_CHECK
|
|
@@ -2330,54 +2521,6 @@ exit_meta_group_info:
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
} /* ext4_mb_add_groupinfo */
|
|
} /* ext4_mb_add_groupinfo */
|
|
|
|
|
|
-/*
|
|
|
|
- * Add a group to the existing groups.
|
|
|
|
- * This function is used for online resize
|
|
|
|
- */
|
|
|
|
-int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
|
|
|
|
- struct ext4_group_desc *desc)
|
|
|
|
-{
|
|
|
|
- struct ext4_sb_info *sbi = EXT4_SB(sb);
|
|
|
|
- struct inode *inode = sbi->s_buddy_cache;
|
|
|
|
- int blocks_per_page;
|
|
|
|
- int block;
|
|
|
|
- int pnum;
|
|
|
|
- struct page *page;
|
|
|
|
- int err;
|
|
|
|
-
|
|
|
|
- /* Add group based on group descriptor*/
|
|
|
|
- err = ext4_mb_add_groupinfo(sb, group, desc);
|
|
|
|
- if (err)
|
|
|
|
- return err;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
|
|
|
|
- * datas) are set not up to date so that they will be re-initilaized
|
|
|
|
- * during the next call to ext4_mb_load_buddy
|
|
|
|
- */
|
|
|
|
-
|
|
|
|
- /* Set buddy page as not up to date */
|
|
|
|
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
|
|
|
- block = group * 2;
|
|
|
|
- pnum = block / blocks_per_page;
|
|
|
|
- page = find_get_page(inode->i_mapping, pnum);
|
|
|
|
- if (page != NULL) {
|
|
|
|
- ClearPageUptodate(page);
|
|
|
|
- page_cache_release(page);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /* Set bitmap page as not up to date */
|
|
|
|
- block++;
|
|
|
|
- pnum = block / blocks_per_page;
|
|
|
|
- page = find_get_page(inode->i_mapping, pnum);
|
|
|
|
- if (page != NULL) {
|
|
|
|
- ClearPageUptodate(page);
|
|
|
|
- page_cache_release(page);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* Update an existing group.
|
|
* Update an existing group.
|
|
* This function is used for online resize
|
|
* This function is used for online resize
|
|
@@ -4588,11 +4731,6 @@ do_more:
|
|
err = ext4_journal_get_write_access(handle, gd_bh);
|
|
err = ext4_journal_get_write_access(handle, gd_bh);
|
|
if (err)
|
|
if (err)
|
|
goto error_return;
|
|
goto error_return;
|
|
-
|
|
|
|
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
|
|
|
|
- if (err)
|
|
|
|
- goto error_return;
|
|
|
|
-
|
|
|
|
#ifdef AGGRESSIVE_CHECK
|
|
#ifdef AGGRESSIVE_CHECK
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
@@ -4606,6 +4744,8 @@ do_more:
|
|
/* We dirtied the bitmap block */
|
|
/* We dirtied the bitmap block */
|
|
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
|
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
|
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
|
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
|
|
|
+ if (err)
|
|
|
|
+ goto error_return;
|
|
|
|
|
|
if (ac) {
|
|
if (ac) {
|
|
ac->ac_b_ex.fe_group = block_group;
|
|
ac->ac_b_ex.fe_group = block_group;
|
|
@@ -4614,6 +4754,9 @@ do_more:
|
|
ext4_mb_store_history(ac);
|
|
ext4_mb_store_history(ac);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ err = ext4_mb_load_buddy(sb, block_group, &e4b);
|
|
|
|
+ if (err)
|
|
|
|
+ goto error_return;
|
|
if (metadata && ext4_handle_valid(handle)) {
|
|
if (metadata && ext4_handle_valid(handle)) {
|
|
/* blocks being freed are metadata. these blocks shouldn't
|
|
/* blocks being freed are metadata. these blocks shouldn't
|
|
* be used until this transaction is committed */
|
|
* be used until this transaction is committed */
|