|
@@ -18,6 +18,7 @@
|
|
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/slab.h>
|
|
|
|
+#include <linux/rbtree.h>
|
|
#include "ctree.h"
|
|
#include "ctree.h"
|
|
#include "disk-io.h"
|
|
#include "disk-io.h"
|
|
#include "transaction.h"
|
|
#include "transaction.h"
|
|
@@ -288,6 +289,412 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+enum mod_log_op {
|
|
|
|
+ MOD_LOG_KEY_REPLACE,
|
|
|
|
+ MOD_LOG_KEY_ADD,
|
|
|
|
+ MOD_LOG_KEY_REMOVE,
|
|
|
|
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING,
|
|
|
|
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING,
|
|
|
|
+ MOD_LOG_MOVE_KEYS,
|
|
|
|
+ MOD_LOG_ROOT_REPLACE,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+struct tree_mod_move {
|
|
|
|
+ int dst_slot;
|
|
|
|
+ int nr_items;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+struct tree_mod_root {
|
|
|
|
+ u64 logical;
|
|
|
|
+ u8 level;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+struct tree_mod_elem {
|
|
|
|
+ struct rb_node node;
|
|
|
|
+ u64 index; /* shifted logical */
|
|
|
|
+ struct seq_list elem;
|
|
|
|
+ enum mod_log_op op;
|
|
|
|
+
|
|
|
|
+ /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
|
|
|
|
+ int slot;
|
|
|
|
+
|
|
|
|
+ /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
|
|
|
|
+ u64 generation;
|
|
|
|
+
|
|
|
|
+ /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
|
|
|
|
+ struct btrfs_disk_key key;
|
|
|
|
+ u64 blockptr;
|
|
|
|
+
|
|
|
|
+ /* this is used for op == MOD_LOG_MOVE_KEYS */
|
|
|
|
+ struct tree_mod_move move;
|
|
|
|
+
|
|
|
|
+ /* this is used for op == MOD_LOG_ROOT_REPLACE */
|
|
|
|
+ struct tree_mod_root old_root;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
|
|
|
|
+{
|
|
|
|
+ elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
|
|
|
|
+ list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct seq_list *elem)
|
|
|
|
+{
|
|
|
|
+ elem->flags = 1;
|
|
|
|
+ spin_lock(&fs_info->tree_mod_seq_lock);
|
|
|
|
+ __get_tree_mod_seq(fs_info, elem);
|
|
|
|
+ spin_unlock(&fs_info->tree_mod_seq_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct seq_list *elem)
|
|
|
|
+{
|
|
|
|
+ struct rb_root *tm_root;
|
|
|
|
+ struct rb_node *node;
|
|
|
|
+ struct rb_node *next;
|
|
|
|
+ struct seq_list *cur_elem;
|
|
|
|
+ struct tree_mod_elem *tm;
|
|
|
|
+ u64 min_seq = (u64)-1;
|
|
|
|
+ u64 seq_putting = elem->seq;
|
|
|
|
+
|
|
|
|
+ if (!seq_putting)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ BUG_ON(!(elem->flags & 1));
|
|
|
|
+ spin_lock(&fs_info->tree_mod_seq_lock);
|
|
|
|
+ list_del(&elem->list);
|
|
|
|
+
|
|
|
|
+ list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
|
|
|
|
+ if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
|
|
|
|
+ if (seq_putting > cur_elem->seq) {
|
|
|
|
+ /*
|
|
|
|
+ * blocker with lower sequence number exists, we
|
|
|
|
+ * cannot remove anything from the log
|
|
|
|
+ */
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+ min_seq = cur_elem->seq;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * anything that's lower than the lowest existing (read: blocked)
|
|
|
|
+ * sequence number can be removed from the tree.
|
|
|
|
+ */
|
|
|
|
+ write_lock(&fs_info->tree_mod_log_lock);
|
|
|
|
+ tm_root = &fs_info->tree_mod_log;
|
|
|
|
+ for (node = rb_first(tm_root); node; node = next) {
|
|
|
|
+ next = rb_next(node);
|
|
|
|
+ tm = container_of(node, struct tree_mod_elem, node);
|
|
|
|
+ if (tm->elem.seq > min_seq)
|
|
|
|
+ continue;
|
|
|
|
+ rb_erase(node, tm_root);
|
|
|
|
+ list_del(&tm->elem.list);
|
|
|
|
+ kfree(tm);
|
|
|
|
+ }
|
|
|
|
+ write_unlock(&fs_info->tree_mod_log_lock);
|
|
|
|
+out:
|
|
|
|
+ spin_unlock(&fs_info->tree_mod_seq_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * key order of the log:
|
|
|
|
+ * index -> sequence
|
|
|
|
+ *
|
|
|
|
+ * the index is the shifted logical of the *new* root node for root replace
|
|
|
|
+ * operations, or the shifted logical of the affected block for all other
|
|
|
|
+ * operations.
|
|
|
|
+ */
|
|
|
|
+static noinline int
|
|
|
|
+__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
|
|
|
|
+{
|
|
|
|
+ struct rb_root *tm_root;
|
|
|
|
+ struct rb_node **new;
|
|
|
|
+ struct rb_node *parent = NULL;
|
|
|
|
+ struct tree_mod_elem *cur;
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ BUG_ON(!tm || !tm->elem.seq);
|
|
|
|
+
|
|
|
|
+ write_lock(&fs_info->tree_mod_log_lock);
|
|
|
|
+ tm_root = &fs_info->tree_mod_log;
|
|
|
|
+ new = &tm_root->rb_node;
|
|
|
|
+ while (*new) {
|
|
|
|
+ cur = container_of(*new, struct tree_mod_elem, node);
|
|
|
|
+ parent = *new;
|
|
|
|
+ if (cur->index < tm->index)
|
|
|
|
+ new = &((*new)->rb_left);
|
|
|
|
+ else if (cur->index > tm->index)
|
|
|
|
+ new = &((*new)->rb_right);
|
|
|
|
+ else if (cur->elem.seq < tm->elem.seq)
|
|
|
|
+ new = &((*new)->rb_left);
|
|
|
|
+ else if (cur->elem.seq > tm->elem.seq)
|
|
|
|
+ new = &((*new)->rb_right);
|
|
|
|
+ else {
|
|
|
|
+ kfree(tm);
|
|
|
|
+ ret = -EEXIST;
|
|
|
|
+ goto unlock;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ rb_link_node(&tm->node, parent, new);
|
|
|
|
+ rb_insert_color(&tm->node, tm_root);
|
|
|
|
+unlock:
|
|
|
|
+ write_unlock(&fs_info->tree_mod_log_lock);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
|
|
|
|
+ struct tree_mod_elem **tm_ret)
|
|
|
|
+{
|
|
|
|
+ struct tree_mod_elem *tm;
|
|
|
|
+ u64 seq = 0;
|
|
|
|
+
|
|
|
|
+ smp_mb();
|
|
|
|
+ if (list_empty(&fs_info->tree_mod_seq_list))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ tm = *tm_ret = kzalloc(sizeof(*tm), flags);
|
|
|
|
+ if (!tm)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
|
|
+ __get_tree_mod_seq(fs_info, &tm->elem);
|
|
|
|
+ seq = tm->elem.seq;
|
|
|
|
+ tm->elem.flags = 0;
|
|
|
|
+
|
|
|
|
+ return seq;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static noinline int
|
|
|
|
+tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct extent_buffer *eb, int slot,
|
|
|
|
+ enum mod_log_op op, gfp_t flags)
|
|
|
|
+{
|
|
|
|
+ struct tree_mod_elem *tm;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = tree_mod_alloc(fs_info, flags, &tm);
|
|
|
|
+ if (ret <= 0)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
|
|
|
|
+ if (op != MOD_LOG_KEY_ADD) {
|
|
|
|
+ btrfs_node_key(eb, &tm->key, slot);
|
|
|
|
+ tm->blockptr = btrfs_node_blockptr(eb, slot);
|
|
|
|
+ }
|
|
|
|
+ tm->op = op;
|
|
|
|
+ tm->slot = slot;
|
|
|
|
+ tm->generation = btrfs_node_ptr_generation(eb, slot);
|
|
|
|
+
|
|
|
|
+ return __tree_mod_log_insert(fs_info, tm);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static noinline int
|
|
|
|
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
|
|
|
|
+ int slot, enum mod_log_op op)
|
|
|
|
+{
|
|
|
|
+ return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static noinline int
|
|
|
|
+tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct extent_buffer *eb, int dst_slot, int src_slot,
|
|
|
|
+ int nr_items, gfp_t flags)
|
|
|
|
+{
|
|
|
|
+ struct tree_mod_elem *tm;
|
|
|
|
+ int ret;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ ret = tree_mod_alloc(fs_info, flags, &tm);
|
|
|
|
+ if (ret <= 0)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
|
|
|
|
+ ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
|
|
|
|
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
|
|
|
|
+ tm->slot = src_slot;
|
|
|
|
+ tm->move.dst_slot = dst_slot;
|
|
|
|
+ tm->move.nr_items = nr_items;
|
|
|
|
+ tm->op = MOD_LOG_MOVE_KEYS;
|
|
|
|
+
|
|
|
|
+ return __tree_mod_log_insert(fs_info, tm);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static noinline int
|
|
|
|
+tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct extent_buffer *old_root,
|
|
|
|
+ struct extent_buffer *new_root, gfp_t flags)
|
|
|
|
+{
|
|
|
|
+ struct tree_mod_elem *tm;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = tree_mod_alloc(fs_info, flags, &tm);
|
|
|
|
+ if (ret <= 0)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
|
|
+ tm->index = new_root->start >> PAGE_CACHE_SHIFT;
|
|
|
|
+ tm->old_root.logical = old_root->start;
|
|
|
|
+ tm->old_root.level = btrfs_header_level(old_root);
|
|
|
|
+ tm->generation = btrfs_header_generation(old_root);
|
|
|
|
+ tm->op = MOD_LOG_ROOT_REPLACE;
|
|
|
|
+
|
|
|
|
+ return __tree_mod_log_insert(fs_info, tm);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static struct tree_mod_elem *
|
|
|
|
+__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
|
|
|
|
+ int smallest)
|
|
|
|
+{
|
|
|
|
+ struct rb_root *tm_root;
|
|
|
|
+ struct rb_node *node;
|
|
|
|
+ struct tree_mod_elem *cur = NULL;
|
|
|
|
+ struct tree_mod_elem *found = NULL;
|
|
|
|
+ u64 index = start >> PAGE_CACHE_SHIFT;
|
|
|
|
+
|
|
|
|
+ read_lock(&fs_info->tree_mod_log_lock);
|
|
|
|
+ tm_root = &fs_info->tree_mod_log;
|
|
|
|
+ node = tm_root->rb_node;
|
|
|
|
+ while (node) {
|
|
|
|
+ cur = container_of(node, struct tree_mod_elem, node);
|
|
|
|
+ if (cur->index < index) {
|
|
|
|
+ node = node->rb_left;
|
|
|
|
+ } else if (cur->index > index) {
|
|
|
|
+ node = node->rb_right;
|
|
|
|
+ } else if (cur->elem.seq < min_seq) {
|
|
|
|
+ node = node->rb_left;
|
|
|
|
+ } else if (!smallest) {
|
|
|
|
+ /* we want the node with the highest seq */
|
|
|
|
+ if (found)
|
|
|
|
+ BUG_ON(found->elem.seq > cur->elem.seq);
|
|
|
|
+ found = cur;
|
|
|
|
+ node = node->rb_left;
|
|
|
|
+ } else if (cur->elem.seq > min_seq) {
|
|
|
|
+ /* we want the node with the smallest seq */
|
|
|
|
+ if (found)
|
|
|
|
+ BUG_ON(found->elem.seq < cur->elem.seq);
|
|
|
|
+ found = cur;
|
|
|
|
+ node = node->rb_right;
|
|
|
|
+ } else {
|
|
|
|
+ found = cur;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ read_unlock(&fs_info->tree_mod_log_lock);
|
|
|
|
+
|
|
|
|
+ return found;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * this returns the element from the log with the smallest time sequence
|
|
|
|
+ * value that's in the log (the oldest log item). any element with a time
|
|
|
|
+ * sequence lower than min_seq will be ignored.
|
|
|
|
+ */
|
|
|
|
+static struct tree_mod_elem *
|
|
|
|
+tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
|
|
|
|
+ u64 min_seq)
|
|
|
|
+{
|
|
|
|
+ return __tree_mod_log_search(fs_info, start, min_seq, 1);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * this returns the element from the log with the largest time sequence
|
|
|
|
+ * value that's in the log (the most recent log item). any element with
|
|
|
|
+ * a time sequence lower than min_seq will be ignored.
|
|
|
|
+ */
|
|
|
|
+static struct tree_mod_elem *
|
|
|
|
+tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
|
|
|
|
+{
|
|
|
|
+ return __tree_mod_log_search(fs_info, start, min_seq, 0);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
|
|
|
|
+ struct extent_buffer *src, unsigned long dst_offset,
|
|
|
|
+ unsigned long src_offset, int nr_items)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ smp_mb();
|
|
|
|
+ if (list_empty(&fs_info->tree_mod_seq_list))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /* speed this up by single seq for all operations? */
|
|
|
|
+ for (i = 0; i < nr_items; i++) {
|
|
|
|
+ ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
|
|
|
|
+ MOD_LOG_KEY_REMOVE);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+ ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
|
|
|
|
+ MOD_LOG_KEY_ADD);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
|
|
|
|
+ int dst_offset, int src_offset, int nr_items)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+ ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
|
|
|
|
+ nr_items, GFP_NOFS);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct extent_buffer *eb,
|
|
|
|
+ struct btrfs_disk_key *disk_key, int slot, int atomic)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
|
|
|
|
+ MOD_LOG_KEY_REPLACE,
|
|
|
|
+ atomic ? GFP_ATOMIC : GFP_NOFS);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
|
|
|
|
+ struct extent_buffer *eb)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int ret;
|
|
|
|
+ u32 nritems;
|
|
|
|
+
|
|
|
|
+ smp_mb();
|
|
|
|
+ if (list_empty(&fs_info->tree_mod_seq_list))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ if (btrfs_header_level(eb) == 0)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ nritems = btrfs_header_nritems(eb);
|
|
|
|
+ for (i = nritems - 1; i >= 0; i--) {
|
|
|
|
+ ret = tree_mod_log_insert_key(fs_info, eb, i,
|
|
|
|
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void
|
|
|
|
+tree_mod_log_set_root_pointer(struct btrfs_root *root,
|
|
|
|
+ struct extent_buffer *new_root_node)
|
|
|
|
+{
|
|
|
|
+ int ret;
|
|
|
|
+ tree_mod_log_free_eb(root->fs_info, root->node);
|
|
|
|
+ ret = tree_mod_log_insert_root(root->fs_info, root->node,
|
|
|
|
+ new_root_node, GFP_NOFS);
|
|
|
|
+ BUG_ON(ret < 0);
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* check if the tree block can be shared by multiple trees
|
|
* check if the tree block can be shared by multiple trees
|
|
*/
|
|
*/
|
|
@@ -2271,7 +2678,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
|
|
(unsigned long)btrfs_header_chunk_tree_uuid(split),
|
|
(unsigned long)btrfs_header_chunk_tree_uuid(split),
|
|
BTRFS_UUID_SIZE);
|
|
BTRFS_UUID_SIZE);
|
|
|
|
|
|
-
|
|
|
|
copy_extent_buffer(split, c,
|
|
copy_extent_buffer(split, c,
|
|
btrfs_node_key_ptr_offset(0),
|
|
btrfs_node_key_ptr_offset(0),
|
|
btrfs_node_key_ptr_offset(mid),
|
|
btrfs_node_key_ptr_offset(mid),
|