|
@@ -1871,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
|
|
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root,
|
|
|
u64 bytenr, u64 num_bytes, u64 parent,
|
|
|
- u64 root_objectid, u64 owner, u64 offset)
|
|
|
+ u64 root_objectid, u64 owner, u64 offset, int for_cow)
|
|
|
{
|
|
|
int ret;
|
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
+
|
|
|
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
|
|
|
root_objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
|
|
|
|
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
|
- ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
|
|
|
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
|
|
|
+ num_bytes,
|
|
|
parent, root_objectid, (int)owner,
|
|
|
- BTRFS_ADD_DELAYED_REF, NULL);
|
|
|
+ BTRFS_ADD_DELAYED_REF, NULL, for_cow);
|
|
|
} else {
|
|
|
- ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
|
|
|
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
|
|
|
+ num_bytes,
|
|
|
parent, root_objectid, owner, offset,
|
|
|
- BTRFS_ADD_DELAYED_REF, NULL);
|
|
|
+ BTRFS_ADD_DELAYED_REF, NULL, for_cow);
|
|
|
}
|
|
|
return ret;
|
|
|
}
|
|
@@ -2231,6 +2235,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * locked_ref is the head node, so we have to go one
|
|
|
+ * node back for any delayed ref updates
|
|
|
+ */
|
|
|
+ ref = select_delayed_ref(locked_ref);
|
|
|
+
|
|
|
+ if (ref && ref->seq &&
|
|
|
+ btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
|
|
|
+ /*
|
|
|
+ * there are still refs with lower seq numbers in the
|
|
|
+ * process of being added. Don't run this ref yet.
|
|
|
+ */
|
|
|
+ list_del_init(&locked_ref->cluster);
|
|
|
+ mutex_unlock(&locked_ref->mutex);
|
|
|
+ locked_ref = NULL;
|
|
|
+ delayed_refs->num_heads_ready++;
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ cond_resched();
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
* record the must insert reserved flag before we
|
|
|
* drop the spin lock.
|
|
@@ -2241,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
|
|
|
extent_op = locked_ref->extent_op;
|
|
|
locked_ref->extent_op = NULL;
|
|
|
|
|
|
- /*
|
|
|
- * locked_ref is the head node, so we have to go one
|
|
|
- * node back for any delayed ref updates
|
|
|
- */
|
|
|
- ref = select_delayed_ref(locked_ref);
|
|
|
if (!ref) {
|
|
|
/* All delayed refs have been processed, Go ahead
|
|
|
* and send the head node to run_one_delayed_ref,
|
|
@@ -2276,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
|
|
|
ref->in_tree = 0;
|
|
|
rb_erase(&ref->rb_node, &delayed_refs->root);
|
|
|
delayed_refs->num_entries--;
|
|
|
-
|
|
|
+ /*
|
|
|
+ * we modified num_entries, but as we're currently running
|
|
|
+ * delayed refs, skip
|
|
|
+ * wake_up(&delayed_refs->seq_wait);
|
|
|
+ * here.
|
|
|
+ */
|
|
|
spin_unlock(&delayed_refs->lock);
|
|
|
|
|
|
ret = run_one_delayed_ref(trans, root, ref, extent_op,
|
|
@@ -2297,6 +2323,23 @@ next:
|
|
|
return count;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
|
|
|
+ unsigned long num_refs)
|
|
|
+{
|
|
|
+ struct list_head *first_seq = delayed_refs->seq_head.next;
|
|
|
+
|
|
|
+ spin_unlock(&delayed_refs->lock);
|
|
|
+ pr_debug("waiting for more refs (num %ld, first %p)\n",
|
|
|
+ num_refs, first_seq);
|
|
|
+ wait_event(delayed_refs->seq_wait,
|
|
|
+ num_refs != delayed_refs->num_entries ||
|
|
|
+ delayed_refs->seq_head.next != first_seq);
|
|
|
+ pr_debug("done waiting for more refs (num %ld, first %p)\n",
|
|
|
+ delayed_refs->num_entries, delayed_refs->seq_head.next);
|
|
|
+ spin_lock(&delayed_refs->lock);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* this starts processing the delayed reference count updates and
|
|
|
* extent insertions we have queued up so far. count can be
|
|
@@ -2312,8 +2355,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_delayed_ref_node *ref;
|
|
|
struct list_head cluster;
|
|
|
int ret;
|
|
|
+ u64 delayed_start;
|
|
|
int run_all = count == (unsigned long)-1;
|
|
|
int run_most = 0;
|
|
|
+ unsigned long num_refs = 0;
|
|
|
+ int consider_waiting;
|
|
|
|
|
|
if (root == root->fs_info->extent_root)
|
|
|
root = root->fs_info->tree_root;
|
|
@@ -2325,6 +2371,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
|
|
delayed_refs = &trans->transaction->delayed_refs;
|
|
|
INIT_LIST_HEAD(&cluster);
|
|
|
again:
|
|
|
+ consider_waiting = 0;
|
|
|
spin_lock(&delayed_refs->lock);
|
|
|
if (count == 0) {
|
|
|
count = delayed_refs->num_entries * 2;
|
|
@@ -2341,11 +2388,35 @@ again:
|
|
|
* of refs to process starting at the first one we are able to
|
|
|
* lock
|
|
|
*/
|
|
|
+ delayed_start = delayed_refs->run_delayed_start;
|
|
|
ret = btrfs_find_ref_cluster(trans, &cluster,
|
|
|
delayed_refs->run_delayed_start);
|
|
|
if (ret)
|
|
|
break;
|
|
|
|
|
|
+ if (delayed_start >= delayed_refs->run_delayed_start) {
|
|
|
+ if (consider_waiting == 0) {
|
|
|
+ /*
|
|
|
+ * btrfs_find_ref_cluster looped. let's do one
|
|
|
+ * more cycle. if we don't run any delayed ref
|
|
|
+ * during that cycle (because we can't because
|
|
|
+ * all of them are blocked) and if the number of
|
|
|
+ * refs doesn't change, we avoid busy waiting.
|
|
|
+ */
|
|
|
+ consider_waiting = 1;
|
|
|
+ num_refs = delayed_refs->num_entries;
|
|
|
+ } else {
|
|
|
+ wait_for_more_refs(delayed_refs, num_refs);
|
|
|
+ /*
|
|
|
+ * after waiting, things have changed. we
|
|
|
+ * dropped the lock and someone else might have
|
|
|
+ * run some refs, built new clusters and so on.
|
|
|
+ * therefore, we restart staleness detection.
|
|
|
+ */
|
|
|
+ consider_waiting = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
ret = run_clustered_refs(trans, root, &cluster);
|
|
|
BUG_ON(ret < 0);
|
|
|
|
|
@@ -2353,6 +2424,11 @@ again:
|
|
|
|
|
|
if (count == 0)
|
|
|
break;
|
|
|
+
|
|
|
+ if (ret || delayed_refs->run_delayed_start == 0) {
|
|
|
+ /* refs were run, let's reset staleness detection */
|
|
|
+ consider_waiting = 0;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
if (run_all) {
|
|
@@ -2410,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
|
|
extent_op->update_key = 0;
|
|
|
extent_op->is_data = is_data ? 1 : 0;
|
|
|
|
|
|
- ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
|
|
|
+ ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
|
|
|
+ num_bytes, extent_op);
|
|
|
if (ret)
|
|
|
kfree(extent_op);
|
|
|
return ret;
|
|
@@ -2595,7 +2672,7 @@ out:
|
|
|
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root,
|
|
|
struct extent_buffer *buf,
|
|
|
- int full_backref, int inc)
|
|
|
+ int full_backref, int inc, int for_cow)
|
|
|
{
|
|
|
u64 bytenr;
|
|
|
u64 num_bytes;
|
|
@@ -2608,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
int level;
|
|
|
int ret = 0;
|
|
|
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
|
|
|
- u64, u64, u64, u64, u64, u64);
|
|
|
+ u64, u64, u64, u64, u64, u64, int);
|
|
|
|
|
|
ref_root = btrfs_header_owner(buf);
|
|
|
nritems = btrfs_header_nritems(buf);
|
|
@@ -2645,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|
|
key.offset -= btrfs_file_extent_offset(buf, fi);
|
|
|
ret = process_func(trans, root, bytenr, num_bytes,
|
|
|
parent, ref_root, key.objectid,
|
|
|
- key.offset);
|
|
|
+ key.offset, for_cow);
|
|
|
if (ret)
|
|
|
goto fail;
|
|
|
} else {
|
|
|
bytenr = btrfs_node_blockptr(buf, i);
|
|
|
num_bytes = btrfs_level_size(root, level - 1);
|
|
|
ret = process_func(trans, root, bytenr, num_bytes,
|
|
|
- parent, ref_root, level - 1, 0);
|
|
|
+ parent, ref_root, level - 1, 0,
|
|
|
+ for_cow);
|
|
|
if (ret)
|
|
|
goto fail;
|
|
|
}
|
|
@@ -2664,15 +2742,15 @@ fail:
|
|
|
}
|
|
|
|
|
|
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
|
- struct extent_buffer *buf, int full_backref)
|
|
|
+ struct extent_buffer *buf, int full_backref, int for_cow)
|
|
|
{
|
|
|
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
|
|
|
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
|
|
|
}
|
|
|
|
|
|
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
|
- struct extent_buffer *buf, int full_backref)
|
|
|
+ struct extent_buffer *buf, int full_backref, int for_cow)
|
|
|
{
|
|
|
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
|
|
|
+ return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
|
|
|
}
|
|
|
|
|
|
static int write_one_cache_group(struct btrfs_trans_handle *trans,
|
|
@@ -4954,6 +5032,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
|
|
rb_erase(&head->node.rb_node, &delayed_refs->root);
|
|
|
|
|
|
delayed_refs->num_entries--;
|
|
|
+ if (waitqueue_active(&delayed_refs->seq_wait))
|
|
|
+ wake_up(&delayed_refs->seq_wait);
|
|
|
|
|
|
/*
|
|
|
* we don't take a ref on the node because we're removing it from the
|
|
@@ -4981,16 +5061,17 @@ out:
|
|
|
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root,
|
|
|
struct extent_buffer *buf,
|
|
|
- u64 parent, int last_ref)
|
|
|
+ u64 parent, int last_ref, int for_cow)
|
|
|
{
|
|
|
struct btrfs_block_group_cache *cache = NULL;
|
|
|
int ret;
|
|
|
|
|
|
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
|
|
- ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
|
|
|
- parent, root->root_key.objectid,
|
|
|
- btrfs_header_level(buf),
|
|
|
- BTRFS_DROP_DELAYED_REF, NULL);
|
|
|
+ ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
|
|
|
+ buf->start, buf->len,
|
|
|
+ parent, root->root_key.objectid,
|
|
|
+ btrfs_header_level(buf),
|
|
|
+ BTRFS_DROP_DELAYED_REF, NULL, for_cow);
|
|
|
BUG_ON(ret);
|
|
|
}
|
|
|
|
|
@@ -5025,12 +5106,12 @@ out:
|
|
|
btrfs_put_block_group(cache);
|
|
|
}
|
|
|
|
|
|
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
|
- struct btrfs_root *root,
|
|
|
- u64 bytenr, u64 num_bytes, u64 parent,
|
|
|
- u64 root_objectid, u64 owner, u64 offset)
|
|
|
+int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|
|
+ u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
|
|
|
+ u64 owner, u64 offset, int for_cow)
|
|
|
{
|
|
|
int ret;
|
|
|
+ struct btrfs_fs_info *fs_info = root->fs_info;
|
|
|
|
|
|
/*
|
|
|
* tree log blocks never actually go into the extent allocation
|
|
@@ -5042,14 +5123,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|
|
btrfs_pin_extent(root, bytenr, num_bytes, 1);
|
|
|
ret = 0;
|
|
|
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
|
|
- ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
|
|
|
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
|
|
|
+ num_bytes,
|
|
|
parent, root_objectid, (int)owner,
|
|
|
- BTRFS_DROP_DELAYED_REF, NULL);
|
|
|
+ BTRFS_DROP_DELAYED_REF, NULL, for_cow);
|
|
|
BUG_ON(ret);
|
|
|
} else {
|
|
|
- ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
|
|
|
- parent, root_objectid, owner,
|
|
|
- offset, BTRFS_DROP_DELAYED_REF, NULL);
|
|
|
+ ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
|
|
|
+ num_bytes,
|
|
|
+ parent, root_objectid, owner,
|
|
|
+ offset, BTRFS_DROP_DELAYED_REF,
|
|
|
+ NULL, for_cow);
|
|
|
BUG_ON(ret);
|
|
|
}
|
|
|
return ret;
|
|
@@ -5877,9 +5961,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|
|
|
|
|
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
|
|
|
|
- ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
|
|
|
- 0, root_objectid, owner, offset,
|
|
|
- BTRFS_ADD_DELAYED_EXTENT, NULL);
|
|
|
+ ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
|
|
|
+ ins->offset, 0,
|
|
|
+ root_objectid, owner, offset,
|
|
|
+ BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -6049,7 +6134,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root, u32 blocksize,
|
|
|
u64 parent, u64 root_objectid,
|
|
|
struct btrfs_disk_key *key, int level,
|
|
|
- u64 hint, u64 empty_size)
|
|
|
+ u64 hint, u64 empty_size, int for_cow)
|
|
|
{
|
|
|
struct btrfs_key ins;
|
|
|
struct btrfs_block_rsv *block_rsv;
|
|
@@ -6093,10 +6178,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
|
|
|
extent_op->update_flags = 1;
|
|
|
extent_op->is_data = 0;
|
|
|
|
|
|
- ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
|
|
|
+ ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
|
|
|
+ ins.objectid,
|
|
|
ins.offset, parent, root_objectid,
|
|
|
level, BTRFS_ADD_DELAYED_EXTENT,
|
|
|
- extent_op);
|
|
|
+ extent_op, for_cow);
|
|
|
BUG_ON(ret);
|
|
|
}
|
|
|
return buf;
|
|
@@ -6113,6 +6199,7 @@ struct walk_control {
|
|
|
int keep_locks;
|
|
|
int reada_slot;
|
|
|
int reada_count;
|
|
|
+ int for_reloc;
|
|
|
};
|
|
|
|
|
|
#define DROP_REFERENCE 1
|
|
@@ -6251,9 +6338,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
|
|
|
/* wc->stage == UPDATE_BACKREF */
|
|
|
if (!(wc->flags[level] & flag)) {
|
|
|
BUG_ON(!path->locks[level]);
|
|
|
- ret = btrfs_inc_ref(trans, root, eb, 1);
|
|
|
+ ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
|
|
|
BUG_ON(ret);
|
|
|
- ret = btrfs_dec_ref(trans, root, eb, 0);
|
|
|
+ ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
|
|
|
BUG_ON(ret);
|
|
|
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
|
|
|
eb->len, flag, 0);
|
|
@@ -6397,7 +6484,7 @@ skip:
|
|
|
}
|
|
|
|
|
|
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
|
|
|
- root->root_key.objectid, level - 1, 0);
|
|
|
+ root->root_key.objectid, level - 1, 0, 0);
|
|
|
BUG_ON(ret);
|
|
|
}
|
|
|
btrfs_tree_unlock(next);
|
|
@@ -6471,9 +6558,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
|
|
if (wc->refs[level] == 1) {
|
|
|
if (level == 0) {
|
|
|
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
|
|
|
- ret = btrfs_dec_ref(trans, root, eb, 1);
|
|
|
+ ret = btrfs_dec_ref(trans, root, eb, 1,
|
|
|
+ wc->for_reloc);
|
|
|
else
|
|
|
- ret = btrfs_dec_ref(trans, root, eb, 0);
|
|
|
+ ret = btrfs_dec_ref(trans, root, eb, 0,
|
|
|
+ wc->for_reloc);
|
|
|
BUG_ON(ret);
|
|
|
}
|
|
|
/* make block locked assertion in clean_tree_block happy */
|
|
@@ -6500,7 +6589,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
|
|
|
btrfs_header_owner(path->nodes[level + 1]));
|
|
|
}
|
|
|
|
|
|
- btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
|
|
|
+ btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
|
|
|
out:
|
|
|
wc->refs[level] = 0;
|
|
|
wc->flags[level] = 0;
|
|
@@ -6584,7 +6673,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
|
|
|
* blocks are properly updated.
|
|
|
*/
|
|
|
void btrfs_drop_snapshot(struct btrfs_root *root,
|
|
|
- struct btrfs_block_rsv *block_rsv, int update_ref)
|
|
|
+ struct btrfs_block_rsv *block_rsv, int update_ref,
|
|
|
+ int for_reloc)
|
|
|
{
|
|
|
struct btrfs_path *path;
|
|
|
struct btrfs_trans_handle *trans;
|
|
@@ -6672,6 +6762,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
|
|
|
wc->stage = DROP_REFERENCE;
|
|
|
wc->update_ref = update_ref;
|
|
|
wc->keep_locks = 0;
|
|
|
+ wc->for_reloc = for_reloc;
|
|
|
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
|
|
|
|
|
|
while (1) {
|
|
@@ -6756,6 +6847,7 @@ out:
|
|
|
* drop subtree rooted at tree block 'node'.
|
|
|
*
|
|
|
* NOTE: this function will unlock and release tree block 'node'
|
|
|
+ * only used by relocation code
|
|
|
*/
|
|
|
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
|
|
struct btrfs_root *root,
|
|
@@ -6800,6 +6892,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
|
|
wc->stage = DROP_REFERENCE;
|
|
|
wc->update_ref = 0;
|
|
|
wc->keep_locks = 1;
|
|
|
+ wc->for_reloc = 1;
|
|
|
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
|
|
|
|
|
|
while (1) {
|