13 years ago · 18aadd47f8
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -104,6 +104,78 @@
 
				 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
			
 
				 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
			
 
				 
			
 
				+/**
			
 
				+ *   struct ext4_journal_cb_entry - Base structure for callback information.
			
 
				+ *
			
 
				+ *   This struct is a 'seed' structure for a using with your own callback
			
 
				+ *   structs. If you are using callbacks you must allocate one of these
			
 
				+ *   or another struct of your own definition which has this struct
			
 
				+ *   as it's first element and pass it to ext4_journal_callback_add().
			
 
				+ */
			
 
				+struct ext4_journal_cb_entry {
			
 
				+	/* list information for other callbacks attached to the same handle */
			
 
				+	struct list_head jce_list;
			
 
				+
			
 
				+	/*  Function to call with this callback structure */
			
 
				+	void (*jce_func)(struct super_block *sb,
			
 
				+			 struct ext4_journal_cb_entry *jce, int error);
			
 
				+
			
 
				+	/* user data goes here */
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * ext4_journal_callback_add: add a function to call after transaction commit
			
 
				+ * @handle: active journal transaction handle to register callback on
			
 
				+ * @func: callback function to call after the transaction has committed:
			
 
				+ *        @sb: superblock of current filesystem for transaction
			
 
				+ *        @jce: returned journal callback data
			
 
				+ *        @rc: journal state at commit (0 = transaction committed properly)
			
 
				+ * @jce: journal callback data (internal and function private data struct)
			
 
				+ *
			
 
				+ * The registered function will be called in the context of the journal thread
			
 
				+ * after the transaction for which the handle was created has completed.
			
 
				+ *
			
 
				+ * No locks are held when the callback function is called, so it is safe to
			
 
				+ * call blocking functions from within the callback, but the callback should
			
 
				+ * not block or run for too long, or the filesystem will be blocked waiting for
			
 
				+ * the next transaction to commit. No journaling functions can be used, or
			
 
				+ * there is a risk of deadlock.
			
 
				+ *
			
 
				+ * There is no guaranteed calling order of multiple registered callbacks on
			
 
				+ * the same transaction.
			
 
				+ */
			
 
				+static inline void ext4_journal_callback_add(handle_t *handle,
			
 
				+			void (*func)(struct super_block *sb,
			
 
				+				     struct ext4_journal_cb_entry *jce,
			
 
				+				     int rc),
			
 
				+			struct ext4_journal_cb_entry *jce)
			
 
				+{
			
 
				+	struct ext4_sb_info *sbi =
			
 
				+			EXT4_SB(handle->h_transaction->t_journal->j_private);
			
 
				+
			
 
				+	/* Add the jce to transaction's private list */
			
 
				+	jce->jce_func = func;
			
 
				+	spin_lock(&sbi->s_md_lock);
			
 
				+	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
			
 
				+	spin_unlock(&sbi->s_md_lock);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * ext4_journal_callback_del: delete a registered callback
			
 
				+ * @handle: active journal transaction handle on which callback was registered
			
 
				+ * @jce: registered journal callback entry to unregister
			
 
				+ */
			
 
				+static inline void ext4_journal_callback_del(handle_t *handle,
			
 
				+					     struct ext4_journal_cb_entry *jce)
			
 
				+{
			
 
				+	struct ext4_sb_info *sbi =
			
 
				+			EXT4_SB(handle->h_transaction->t_journal->j_private);
			
 
				+
			
 
				+	spin_lock(&sbi->s_md_lock);
			
 
				+	list_del_init(&jce->jce_list);
			
 
				+	spin_unlock(&sbi->s_md_lock);
			
 
				+}
			
 
				+
			
 
				 int
			
 
				 ext4_mark_iloc_dirty(handle_t *handle,
			
 
				 		     struct inode *inode,
			
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
 
				  * mballoc.c contains the multiblocks allocation routines
			
 
				  */
			
 
				 
			
 
				+#include "ext4_jbd2.h"
			
 
				 #include "mballoc.h"
			
 
				 #include <linux/debugfs.h>
			
 
				 #include <linux/slab.h>
			
@@ -339,7 +340,7 @@
 
				  */
			
 
				 static struct kmem_cache *ext4_pspace_cachep;
			
 
				 static struct kmem_cache *ext4_ac_cachep;
			
 
				-static struct kmem_cache *ext4_free_ext_cachep;
			
 
				+static struct kmem_cache *ext4_free_data_cachep;
			
 
				 
			
 
				 /* We create slab caches for groupinfo data structures based on the
			
 
				  * superblock block size.  There will be one per mounted filesystem for
			
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 
				 					ext4_group_t group);
			
 
				 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
			
 
				 						ext4_group_t group);
			
 
				-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
			
 
				+static void ext4_free_data_callback(struct super_block *sb,
			
 
				+				struct ext4_journal_cb_entry *jce, int rc);
			
 
				 
			
 
				 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
			
 
				 {
			
@@ -2473,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
				 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
			
 
				 				 &ext4_mb_seq_groups_fops, sb);
			
 
				 
			
 
				-	if (sbi->s_journal)
			
 
				-		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
			
 
				-
			
 
				 	return 0;
			
 
				 
			
 
				 out_free_locality_groups:
			
@@ -2588,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
 
				  * This function is called by the jbd2 layer once the commit has finished,
			
 
				  * so we know we can free the blocks that were released with that commit.
			
 
				  */
			
 
				-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
			
 
				+static void ext4_free_data_callback(struct super_block *sb,
			
 
				+				    struct ext4_journal_cb_entry *jce,
			
 
				+				    int rc)
			
 
				 {
			
 
				-	struct super_block *sb = journal->j_private;
			
 
				+	struct ext4_free_data *entry = (struct ext4_free_data *)jce;
			
 
				 	struct ext4_buddy e4b;
			
 
				 	struct ext4_group_info *db;
			
 
				 	int err, count = 0, count2 = 0;
			
 
				-	struct ext4_free_data *entry;
			
 
				-	struct list_head *l, *ltmp;
			
 
				 
			
 
				-	list_for_each_safe(l, ltmp, &txn->t_private_list) {
			
 
				-		entry = list_entry(l, struct ext4_free_data, list);
			
 
				+	mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
			
 
				+		 entry->efd_count, entry->efd_group, entry);
			
 
				 
			
 
				-		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
			
 
				-			 entry->count, entry->group, entry);
			
 
				+	if (test_opt(sb, DISCARD))
			
 
				+		ext4_issue_discard(sb, entry->efd_group,
			
 
				+				   entry->efd_start_cluster, entry->efd_count);
			
 
				 
			
 
				-		if (test_opt(sb, DISCARD))
			
 
				-			ext4_issue_discard(sb, entry->group,
			
 
				-					   entry->start_cluster, entry->count);
			
 
				+	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
			
 
				+	/* we expect to find existing buddy because it's pinned */
			
 
				+	BUG_ON(err != 0);
			
 
				 
			
 
				-		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
			
 
				-		/* we expect to find existing buddy because it's pinned */
			
 
				-		BUG_ON(err != 0);
			
 
				 
			
 
				-		db = e4b.bd_info;
			
 
				-		/* there are blocks to put in buddy to make them really free */
			
 
				-		count += entry->count;
			
 
				-		count2++;
			
 
				-		ext4_lock_group(sb, entry->group);
			
 
				-		/* Take it out of per group rb tree */
			
 
				-		rb_erase(&entry->node, &(db->bb_free_root));
			
 
				-		mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
			
 
				+	db = e4b.bd_info;
			
 
				+	/* there are blocks to put in buddy to make them really free */
			
 
				+	count += entry->efd_count;
			
 
				+	count2++;
			
 
				+	ext4_lock_group(sb, entry->efd_group);
			
 
				+	/* Take it out of per group rb tree */
			
 
				+	rb_erase(&entry->efd_node, &(db->bb_free_root));
			
 
				+	mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
			
 
				 
			
 
				-		/*
			
 
				-		 * Clear the trimmed flag for the group so that the next
			
 
				-		 * ext4_trim_fs can trim it.
			
 
				-		 * If the volume is mounted with -o discard, online discard
			
 
				-		 * is supported and the free blocks will be trimmed online.
			
 
				-		 */
			
 
				-		if (!test_opt(sb, DISCARD))
			
 
				-			EXT4_MB_GRP_CLEAR_TRIMMED(db);
			
 
				+	/*
			
 
				+	 * Clear the trimmed flag for the group so that the next
			
 
				+	 * ext4_trim_fs can trim it.
			
 
				+	 * If the volume is mounted with -o discard, online discard
			
 
				+	 * is supported and the free blocks will be trimmed online.
			
 
				+	 */
			
 
				+	if (!test_opt(sb, DISCARD))
			
 
				+		EXT4_MB_GRP_CLEAR_TRIMMED(db);
			
 
				 
			
 
				-		if (!db->bb_free_root.rb_node) {
			
 
				-			/* No more items in the per group rb tree
			
 
				-			 * balance refcounts from ext4_mb_free_metadata()
			
 
				-			 */
			
 
				-			page_cache_release(e4b.bd_buddy_page);
			
 
				-			page_cache_release(e4b.bd_bitmap_page);
			
 
				-		}
			
 
				-		ext4_unlock_group(sb, entry->group);
			
 
				-		kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				-		ext4_mb_unload_buddy(&e4b);
			
 
				+	if (!db->bb_free_root.rb_node) {
			
 
				+		/* No more items in the per group rb tree
			
 
				+		 * balance refcounts from ext4_mb_free_metadata()
			
 
				+		 */
			
 
				+		page_cache_release(e4b.bd_buddy_page);
			
 
				+		page_cache_release(e4b.bd_bitmap_page);
			
 
				 	}
			
 
				+	ext4_unlock_group(sb, entry->efd_group);
			
 
				+	kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				+	ext4_mb_unload_buddy(&e4b);
			
 
				 
			
 
				 	mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
			
 
				 }
			
@@ -2692,9 +2688,9 @@ int __init ext4_init_mballoc(void)
 
				 		return -ENOMEM;
			
 
				 	}
			
 
				 
			
 
				-	ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
			
 
				-					  SLAB_RECLAIM_ACCOUNT);
			
 
				-	if (ext4_free_ext_cachep == NULL) {
			
 
				+	ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
			
 
				+					   SLAB_RECLAIM_ACCOUNT);
			
 
				+	if (ext4_free_data_cachep == NULL) {
			
 
				 		kmem_cache_destroy(ext4_pspace_cachep);
			
 
				 		kmem_cache_destroy(ext4_ac_cachep);
			
 
				 		return -ENOMEM;
			
@@ -2712,7 +2708,7 @@ void ext4_exit_mballoc(void)
 
				 	rcu_barrier();
			
 
				 	kmem_cache_destroy(ext4_pspace_cachep);
			
 
				 	kmem_cache_destroy(ext4_ac_cachep);
			
 
				-	kmem_cache_destroy(ext4_free_ext_cachep);
			
 
				+	kmem_cache_destroy(ext4_free_data_cachep);
			
 
				 	ext4_groupinfo_destroy_slabs();
			
 
				 	ext4_remove_debugfs_entry();
			
 
				 }
			
@@ -3272,8 +3268,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
				 	n = rb_first(&(grp->bb_free_root));
			
 
				 
			
 
				 	while (n) {
			
 
				-		entry = rb_entry(n, struct ext4_free_data, node);
			
 
				-		ext4_set_bits(bitmap, entry->start_cluster, entry->count);
			
 
				+		entry = rb_entry(n, struct ext4_free_data, efd_node);
			
 
				+		ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
			
 
				 		n = rb_next(n);
			
 
				 	}
			
 
				 	return;
			
@@ -4379,9 +4375,9 @@ out:
 
				 static int can_merge(struct ext4_free_data *entry1,
			
 
				 			struct ext4_free_data *entry2)
			
 
				 {
			
 
				-	if ((entry1->t_tid == entry2->t_tid) &&
			
 
				-	    (entry1->group == entry2->group) &&
			
 
				-	    ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
			
 
				+	if ((entry1->efd_tid == entry2->efd_tid) &&
			
 
				+	    (entry1->efd_group == entry2->efd_group) &&
			
 
				+	    ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
			
 
				 		return 1;
			
 
				 	return 0;
			
 
				 }
			
@@ -4403,8 +4399,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	BUG_ON(e4b->bd_bitmap_page == NULL);
			
 
				 	BUG_ON(e4b->bd_buddy_page == NULL);
			
 
				 
			
 
				-	new_node = &new_entry->node;
			
 
				-	cluster = new_entry->start_cluster;
			
 
				+	new_node = &new_entry->efd_node;
			
 
				+	cluster = new_entry->efd_start_cluster;
			
 
				 
			
 
				 	if (!*n) {
			
 
				 		/* first free block exent. We need to
			
@@ -4417,10 +4413,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	}
			
 
				 	while (*n) {
			
 
				 		parent = *n;
			
 
				-		entry = rb_entry(parent, struct ext4_free_data, node);
			
 
				-		if (cluster < entry->start_cluster)
			
 
				+		entry = rb_entry(parent, struct ext4_free_data, efd_node);
			
 
				+		if (cluster < entry->efd_start_cluster)
			
 
				 			n = &(*n)->rb_left;
			
 
				-		else if (cluster >= (entry->start_cluster + entry->count))
			
 
				+		else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
			
 
				 			n = &(*n)->rb_right;
			
 
				 		else {
			
 
				 			ext4_grp_locked_error(sb, group, 0,
			
@@ -4437,34 +4433,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 
				 	/* Now try to see the extent can be merged to left and right */
			
 
				 	node = rb_prev(new_node);
			
 
				 	if (node) {
			
 
				-		entry = rb_entry(node, struct ext4_free_data, node);
			
 
				+		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				 		if (can_merge(entry, new_entry)) {
			
 
				-			new_entry->start_cluster = entry->start_cluster;
			
 
				-			new_entry->count += entry->count;
			
 
				+			new_entry->efd_start_cluster = entry->efd_start_cluster;
			
 
				+			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			spin_lock(&sbi->s_md_lock);
			
 
				-			list_del(&entry->list);
			
 
				-			spin_unlock(&sbi->s_md_lock);
			
 
				-			kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				+			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				+			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	node = rb_next(new_node);
			
 
				 	if (node) {
			
 
				-		entry = rb_entry(node, struct ext4_free_data, node);
			
 
				+		entry = rb_entry(node, struct ext4_free_data, efd_node);
			
 
				 		if (can_merge(new_entry, entry)) {
			
 
				-			new_entry->count += entry->count;
			
 
				+			new_entry->efd_count += entry->efd_count;
			
 
				 			rb_erase(node, &(db->bb_free_root));
			
 
				-			spin_lock(&sbi->s_md_lock);
			
 
				-			list_del(&entry->list);
			
 
				-			spin_unlock(&sbi->s_md_lock);
			
 
				-			kmem_cache_free(ext4_free_ext_cachep, entry);
			
 
				+			ext4_journal_callback_del(handle, &entry->efd_jce);
			
 
				+			kmem_cache_free(ext4_free_data_cachep, entry);
			
 
				 		}
			
 
				 	}
			
 
				 	/* Add the extent to transaction's private list */
			
 
				-	spin_lock(&sbi->s_md_lock);
			
 
				-	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
			
 
				-	spin_unlock(&sbi->s_md_lock);
			
 
				+	ext4_journal_callback_add(handle, ext4_free_data_callback,
			
 
				+				  &new_entry->efd_jce);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -4642,15 +4633,15 @@ do_more:
 
				 		 * blocks being freed are metadata. these blocks shouldn't
			
 
				 		 * be used until this transaction is committed
			
 
				 		 */
			
 
				-		new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
			
 
				+		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
			
 
				 		if (!new_entry) {
			
 
				 			err = -ENOMEM;
			
 
				 			goto error_return;
			
 
				 		}
			
 
				-		new_entry->start_cluster = bit;
			
 
				-		new_entry->group  = block_group;
			
 
				-		new_entry->count = count_clusters;
			
 
				-		new_entry->t_tid = handle->h_transaction->t_tid;
			
 
				+		new_entry->efd_start_cluster = bit;
			
 
				+		new_entry->efd_group = block_group;
			
 
				+		new_entry->efd_count = count_clusters;
			
 
				+		new_entry->efd_tid = handle->h_transaction->t_tid;
			
 
				 
			
 
				 		ext4_lock_group(sb, block_group);
			
 
				 		mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
			
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
 
				 
			
 
				 
			
 
				 struct ext4_free_data {
			
 
				-	/* this links the free block information from group_info */
			
 
				-	struct rb_node node;
			
 
				+	/* MUST be the first member */
			
 
				+	struct ext4_journal_cb_entry	efd_jce;
			
 
				+
			
 
				+	/* ext4_free_data private data starts from here */
			
 
				 
			
 
				-	/* this links the free block information from ext4_sb_info */
			
 
				-	struct list_head list;
			
 
				+	/* this links the free block information from group_info */
			
 
				+	struct rb_node			efd_node;
			
 
				 
			
 
				 	/* group which free block extent belongs */
			
 
				-	ext4_group_t group;
			
 
				+	ext4_group_t			efd_group;
			
 
				 
			
 
				 	/* free block extent */
			
 
				-	ext4_grpblk_t start_cluster;
			
 
				-	ext4_grpblk_t count;
			
 
				+	ext4_grpblk_t			efd_start_cluster;
			
 
				+	ext4_grpblk_t			efd_count;
			
 
				 
			
 
				 	/* transaction which freed this extent */
			
 
				-	tid_t	t_tid;
			
 
				+	tid_t				efd_tid;
			
 
				 };
			
 
				 
			
 
				 struct ext4_prealloc_space {
			
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -431,6 +431,22 @@ static int block_device_ejected(struct super_block *sb)
 
				 	return bdi->dev == NULL;
			
 
				 }
			
 
				 
			
 
				+static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
			
 
				+{
			
 
				+	struct super_block		*sb = journal->j_private;
			
 
				+	struct ext4_sb_info		*sbi = EXT4_SB(sb);
			
 
				+	int				error = is_journal_aborted(journal);
			
 
				+	struct ext4_journal_cb_entry	*jce, *tmp;
			
 
				+
			
 
				+	spin_lock(&sbi->s_md_lock);
			
 
				+	list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
			
 
				+		list_del_init(&jce->jce_list);
			
 
				+		spin_unlock(&sbi->s_md_lock);
			
 
				+		jce->jce_func(sb, jce, error);
			
 
				+		spin_lock(&sbi->s_md_lock);
			
 
				+	}
			
 
				+	spin_unlock(&sbi->s_md_lock);
			
 
				+}
			
 
				 
			
 
				 /* Deal with the reporting of failure conditions on a filesystem such as
			
 
				  * inconsistencies detected or read IO failures.
			
@@ -3694,6 +3710,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
				 	}
			
 
				 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
			
 
				 
			
 
				+	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
			
 
				+
			
 
				 	/*
			
 
				 	 * The journal may have updated the bg summary counts, so we
			
 
				 	 * need to update the global counters.