|
@@ -27,6 +27,7 @@
|
|
|
#include "buffer_head_io.h"
|
|
|
#include "blockcheck.h"
|
|
|
#include "refcounttree.h"
|
|
|
+#include "dlmglue.h"
|
|
|
|
|
|
static inline struct ocfs2_refcount_tree *
|
|
|
cache_info_to_refcount(struct ocfs2_caching_info *ci)
|
|
@@ -156,3 +157,361 @@ static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
|
|
|
.co_io_lock = ocfs2_refcount_cache_io_lock,
|
|
|
.co_io_unlock = ocfs2_refcount_cache_io_unlock,
|
|
|
};
|
|
|
+
|
|
|
+static struct ocfs2_refcount_tree *
|
|
|
+ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
|
|
|
+{
|
|
|
+ struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
|
|
|
+ struct ocfs2_refcount_tree *tree = NULL;
|
|
|
+
|
|
|
+ while (n) {
|
|
|
+ tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
|
|
|
+
|
|
|
+ if (blkno < tree->rf_blkno)
|
|
|
+ n = n->rb_left;
|
|
|
+ else if (blkno > tree->rf_blkno)
|
|
|
+ n = n->rb_right;
|
|
|
+ else
|
|
|
+ return tree;
|
|
|
+ }
|
|
|
+
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+/* osb_lock is already locked. */
|
|
|
+static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *new)
|
|
|
+{
|
|
|
+ u64 rf_blkno = new->rf_blkno;
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
+ struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
|
|
|
+ struct ocfs2_refcount_tree *tmp;
|
|
|
+
|
|
|
+ while (*p) {
|
|
|
+ parent = *p;
|
|
|
+
|
|
|
+ tmp = rb_entry(parent, struct ocfs2_refcount_tree,
|
|
|
+ rf_node);
|
|
|
+
|
|
|
+ if (rf_blkno < tmp->rf_blkno)
|
|
|
+ p = &(*p)->rb_left;
|
|
|
+ else if (rf_blkno > tmp->rf_blkno)
|
|
|
+ p = &(*p)->rb_right;
|
|
|
+ else {
|
|
|
+ /* This should never happen! */
|
|
|
+ mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
|
|
|
+ (unsigned long long)rf_blkno);
|
|
|
+ BUG();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rb_link_node(&new->rf_node, parent, p);
|
|
|
+ rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
|
|
|
+}
|
|
|
+
|
|
|
+static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
|
|
|
+{
|
|
|
+ ocfs2_metadata_cache_exit(&tree->rf_ci);
|
|
|
+ ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
|
|
|
+ ocfs2_lock_res_free(&tree->rf_lockres);
|
|
|
+ kfree(tree);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void
|
|
|
+ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *tree)
|
|
|
+{
|
|
|
+ rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
|
|
|
+ if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
|
|
|
+ osb->osb_ref_tree_lru = NULL;
|
|
|
+}
|
|
|
+
|
|
|
+static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *tree)
|
|
|
+{
|
|
|
+ spin_lock(&osb->osb_lock);
|
|
|
+ ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
|
|
|
+ spin_unlock(&osb->osb_lock);
|
|
|
+}
|
|
|
+
|
|
|
+void ocfs2_kref_remove_refcount_tree(struct kref *kref)
|
|
|
+{
|
|
|
+ struct ocfs2_refcount_tree *tree =
|
|
|
+ container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
|
|
|
+
|
|
|
+ ocfs2_free_refcount_tree(tree);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void
|
|
|
+ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
|
|
|
+{
|
|
|
+ kref_get(&tree->rf_getcnt);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void
|
|
|
+ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
|
|
|
+{
|
|
|
+ kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
|
|
|
+ struct super_block *sb)
|
|
|
+{
|
|
|
+ ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
|
|
|
+ mutex_init(&new->rf_io_mutex);
|
|
|
+ new->rf_sb = sb;
|
|
|
+ spin_lock_init(&new->rf_lock);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *new,
|
|
|
+ u64 rf_blkno, u32 generation)
|
|
|
+{
|
|
|
+ init_rwsem(&new->rf_sem);
|
|
|
+ ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
|
|
|
+ rf_blkno, generation);
|
|
|
+}
|
|
|
+
|
|
|
+static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
|
|
|
+ struct ocfs2_refcount_tree **ret_tree)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+ struct ocfs2_refcount_tree *tree, *new = NULL;
|
|
|
+ struct buffer_head *ref_root_bh = NULL;
|
|
|
+ struct ocfs2_refcount_block *ref_rb;
|
|
|
+
|
|
|
+ spin_lock(&osb->osb_lock);
|
|
|
+ if (osb->osb_ref_tree_lru &&
|
|
|
+ osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
|
|
|
+ tree = osb->osb_ref_tree_lru;
|
|
|
+ else
|
|
|
+ tree = ocfs2_find_refcount_tree(osb, rf_blkno);
|
|
|
+ if (tree)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ spin_unlock(&osb->osb_lock);
|
|
|
+
|
|
|
+ new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
|
|
|
+ if (!new) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ new->rf_blkno = rf_blkno;
|
|
|
+ kref_init(&new->rf_getcnt);
|
|
|
+ ocfs2_init_refcount_tree_ci(new, osb->sb);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need the generation to create the refcount tree lock and since
|
|
|
+ * it isn't changed during the tree modification, we are safe here to
|
|
|
+ * read without protection.
|
|
|
+ * We also have to purge the cache after we create the lock since the
|
|
|
+ * refcount block may have the stale data. It can only be trusted when
|
|
|
+ * we hold the refcount lock.
|
|
|
+ */
|
|
|
+ ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ ocfs2_metadata_cache_exit(&new->rf_ci);
|
|
|
+ kfree(new);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
|
|
|
+ new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
|
|
|
+ ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
|
|
|
+ new->rf_generation);
|
|
|
+ ocfs2_metadata_cache_purge(&new->rf_ci);
|
|
|
+
|
|
|
+ spin_lock(&osb->osb_lock);
|
|
|
+ tree = ocfs2_find_refcount_tree(osb, rf_blkno);
|
|
|
+ if (tree)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ocfs2_insert_refcount_tree(osb, new);
|
|
|
+
|
|
|
+ tree = new;
|
|
|
+ new = NULL;
|
|
|
+
|
|
|
+out:
|
|
|
+ *ret_tree = tree;
|
|
|
+
|
|
|
+ osb->osb_ref_tree_lru = tree;
|
|
|
+
|
|
|
+ spin_unlock(&osb->osb_lock);
|
|
|
+
|
|
|
+ if (new)
|
|
|
+ ocfs2_free_refcount_tree(new);
|
|
|
+
|
|
|
+ brelse(ref_root_bh);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+ struct buffer_head *di_bh = NULL;
|
|
|
+ struct ocfs2_dinode *di;
|
|
|
+
|
|
|
+ ret = ocfs2_read_inode_block(inode, &di_bh);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
|
|
|
+
|
|
|
+ di = (struct ocfs2_dinode *)di_bh->b_data;
|
|
|
+ *ref_blkno = le64_to_cpu(di->i_refcount_loc);
|
|
|
+ brelse(di_bh);
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *tree, int rw)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = ocfs2_refcount_lock(tree, rw);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (rw)
|
|
|
+ down_write(&tree->rf_sem);
|
|
|
+ else
|
|
|
+ down_read(&tree->rf_sem);
|
|
|
+
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Lock the refcount tree pointed by ref_blkno and return the tree.
|
|
|
+ * In most case, we lock the tree and read the refcount block.
|
|
|
+ * So read it here if the caller really needs it.
|
|
|
+ *
|
|
|
+ * If the tree has been re-created by other node, it will free the
|
|
|
+ * old one and re-create it.
|
|
|
+ */
|
|
|
+int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
|
|
|
+ u64 ref_blkno, int rw,
|
|
|
+ struct ocfs2_refcount_tree **ret_tree,
|
|
|
+ struct buffer_head **ref_bh)
|
|
|
+{
|
|
|
+ int ret, delete_tree = 0;
|
|
|
+ struct ocfs2_refcount_tree *tree = NULL;
|
|
|
+ struct buffer_head *ref_root_bh = NULL;
|
|
|
+ struct ocfs2_refcount_block *rb;
|
|
|
+
|
|
|
+again:
|
|
|
+ ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ ocfs2_refcount_tree_get(tree);
|
|
|
+
|
|
|
+ ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ ocfs2_refcount_tree_put(tree);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
|
|
|
+ &ref_root_bh);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ ocfs2_unlock_refcount_tree(osb, tree, rw);
|
|
|
+ ocfs2_refcount_tree_put(tree);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
|
|
|
+ /*
|
|
|
+ * If the refcount block has been freed and re-created, we may need
|
|
|
+ * to recreate the refcount tree also.
|
|
|
+ *
|
|
|
+ * Here we just remove the tree from the rb-tree, and the last
|
|
|
+ * kref holder will unlock and delete this refcount_tree.
|
|
|
+ * Then we goto "again" and ocfs2_get_refcount_tree will create
|
|
|
+ * the new refcount tree for us.
|
|
|
+ */
|
|
|
+ if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
|
|
|
+ if (!tree->rf_removed) {
|
|
|
+ ocfs2_erase_refcount_tree_from_list(osb, tree);
|
|
|
+ tree->rf_removed = 1;
|
|
|
+ delete_tree = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ ocfs2_unlock_refcount_tree(osb, tree, rw);
|
|
|
+ /*
|
|
|
+ * We get an extra reference when we create the refcount
|
|
|
+ * tree, so another put will destroy it.
|
|
|
+ */
|
|
|
+ if (delete_tree)
|
|
|
+ ocfs2_refcount_tree_put(tree);
|
|
|
+ brelse(ref_root_bh);
|
|
|
+ ref_root_bh = NULL;
|
|
|
+ goto again;
|
|
|
+ }
|
|
|
+
|
|
|
+ *ret_tree = tree;
|
|
|
+ if (ref_bh) {
|
|
|
+ *ref_bh = ref_root_bh;
|
|
|
+ ref_root_bh = NULL;
|
|
|
+ }
|
|
|
+out:
|
|
|
+ brelse(ref_root_bh);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
|
|
|
+ struct ocfs2_refcount_tree **ret_tree,
|
|
|
+ struct buffer_head **ref_bh)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+ u64 ref_blkno;
|
|
|
+
|
|
|
+ ret = ocfs2_get_refcount_block(inode, &ref_blkno);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
|
|
|
+ rw, ret_tree, ref_bh);
|
|
|
+}
|
|
|
+
|
|
|
+void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
|
|
|
+ struct ocfs2_refcount_tree *tree, int rw)
|
|
|
+{
|
|
|
+ if (rw)
|
|
|
+ up_write(&tree->rf_sem);
|
|
|
+ else
|
|
|
+ up_read(&tree->rf_sem);
|
|
|
+
|
|
|
+ ocfs2_refcount_unlock(tree, rw);
|
|
|
+ ocfs2_refcount_tree_put(tree);
|
|
|
+}
|
|
|
+
|
|
|
+void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
|
|
|
+{
|
|
|
+ struct rb_node *node;
|
|
|
+ struct ocfs2_refcount_tree *tree;
|
|
|
+ struct rb_root *root = &osb->osb_rf_lock_tree;
|
|
|
+
|
|
|
+ while ((node = rb_last(root)) != NULL) {
|
|
|
+ tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
|
|
|
+
|
|
|
+ mlog(0, "Purge tree %llu\n",
|
|
|
+ (unsigned long long) tree->rf_blkno);
|
|
|
+
|
|
|
+ rb_erase(&tree->rf_node, root);
|
|
|
+ ocfs2_free_refcount_tree(tree);
|
|
|
+ }
|
|
|
+}
|