123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668 |
- /*
- * Copyright (C) 2009 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
- #include <linux/sched.h>
- #include <linux/sort.h>
- #include "ctree.h"
- #include "delayed-ref.h"
- #include "transaction.h"
- /*
- * delayed back reference update tracking. For subvolume trees
- * we queue up extent allocations and backref maintenance for
- * delayed processing. This avoids deep call chains where we
- * add extents in the middle of btrfs_search_slot, and it allows
- * us to buffer up frequently modified backrefs in an rb tree instead
- * of hammering updates on the extent allocation tree.
- *
- * Right now this code is only used for reference counted trees, but
- * the long term goal is to get rid of the similar code for delayed
- * extent tree modifications.
- */
- /*
- * entries in the rb tree are ordered by the byte number of the extent
- * and by the byte number of the parent block.
- */
- static int comp_entry(struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 parent)
- {
- if (bytenr < ref->bytenr)
- return -1;
- if (bytenr > ref->bytenr)
- return 1;
- if (parent < ref->parent)
- return -1;
- if (parent > ref->parent)
- return 1;
- return 0;
- }
- /*
- * insert a new ref into the rbtree. This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
- static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- u64 bytenr, u64 parent,
- struct rb_node *node)
- {
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct btrfs_delayed_ref_node *entry;
- int cmp;
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
- rb_node);
- cmp = comp_entry(entry, bytenr, parent);
- if (cmp < 0)
- p = &(*p)->rb_left;
- else if (cmp > 0)
- p = &(*p)->rb_right;
- else
- return entry;
- }
- entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- rb_link_node(node, parent_node, p);
- rb_insert_color(node, root);
- return NULL;
- }
- /*
- * find an entry based on (bytenr,parent). This returns the delayed
- * ref if it was able to find one, or NULL if nothing was in that spot
- */
- static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
- u64 bytenr, u64 parent,
- struct btrfs_delayed_ref_node **last)
- {
- struct rb_node *n = root->rb_node;
- struct btrfs_delayed_ref_node *entry;
- int cmp;
- while (n) {
- entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
- WARN_ON(!entry->in_tree);
- if (last)
- *last = entry;
- cmp = comp_entry(entry, bytenr, parent);
- if (cmp < 0)
- n = n->rb_left;
- else if (cmp > 0)
- n = n->rb_right;
- else
- return entry;
- }
- return NULL;
- }
- int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
- {
- struct btrfs_delayed_ref_root *delayed_refs;
- delayed_refs = &trans->transaction->delayed_refs;
- assert_spin_locked(&delayed_refs->lock);
- if (mutex_trylock(&head->mutex))
- return 0;
- atomic_inc(&head->node.refs);
- spin_unlock(&delayed_refs->lock);
- mutex_lock(&head->mutex);
- spin_lock(&delayed_refs->lock);
- if (!head->node.in_tree) {
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- return -EAGAIN;
- }
- btrfs_put_delayed_ref(&head->node);
- return 0;
- }
- int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 start)
- {
- int count = 0;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *head;
- delayed_refs = &trans->transaction->delayed_refs;
- if (start == 0) {
- node = rb_first(&delayed_refs->root);
- } else {
- ref = NULL;
- tree_search(&delayed_refs->root, start, (u64)-1, &ref);
- if (ref) {
- struct btrfs_delayed_ref_node *tmp;
- node = rb_prev(&ref->rb_node);
- while (node) {
- tmp = rb_entry(node,
- struct btrfs_delayed_ref_node,
- rb_node);
- if (tmp->bytenr < start)
- break;
- ref = tmp;
- node = rb_prev(&ref->rb_node);
- }
- node = &ref->rb_node;
- } else
- node = rb_first(&delayed_refs->root);
- }
- again:
- while (node && count < 32) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- head = btrfs_delayed_node_to_head(ref);
- if (list_empty(&head->cluster)) {
- list_add_tail(&head->cluster, cluster);
- delayed_refs->run_delayed_start =
- head->node.bytenr;
- count++;
- WARN_ON(delayed_refs->num_heads_ready == 0);
- delayed_refs->num_heads_ready--;
- } else if (count) {
- /* the goal of the clustering is to find extents
- * that are likely to end up in the same extent
- * leaf on disk. So, we don't want them spread
- * all over the tree. Stop now if we've hit
- * a head that was already in use
- */
- break;
- }
- }
- node = rb_next(node);
- }
- if (count) {
- return 0;
- } else if (start) {
- /*
- * we've gone to the end of the rbtree without finding any
- * clusters. start from the beginning and try again
- */
- start = 0;
- node = rb_first(&delayed_refs->root);
- goto again;
- }
- return 1;
- }
- /*
- * This checks to see if there are any delayed refs in the
- * btree for a given bytenr. It returns one if it finds any
- * and zero otherwise.
- *
- * If it only finds a head node, it returns 0.
- *
- * The idea is to use this when deciding if you can safely delete an
- * extent from the extent allocation tree. There may be a pending
- * ref in the rbtree that adds or removes references, so as long as this
- * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
- * allocation tree.
- */
- int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
- {
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *prev_node;
- int ret = 0;
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref) {
- prev_node = rb_prev(&ref->rb_node);
- if (!prev_node)
- goto out;
- ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
- rb_node);
- if (ref->bytenr == bytenr)
- ret = 1;
- }
- out:
- spin_unlock(&delayed_refs->lock);
- return ret;
- }
- /*
- * helper function to lookup reference count
- *
- * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree. This way you
- * can check to see what the reference count would be if all of the
- * delayed refs are processed.
- */
- int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u32 *refs)
- {
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *head;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
- u32 num_refs;
- int ret;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
- delayed_refs = &trans->transaction->delayed_refs;
- again:
- ret = btrfs_search_slot(trans, root->fs_info->extent_root,
- &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret == 0) {
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- num_refs = btrfs_extent_refs(leaf, ei);
- } else {
- num_refs = 0;
- ret = 0;
- }
- spin_lock(&delayed_refs->lock);
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref) {
- head = btrfs_delayed_node_to_head(ref);
- if (mutex_trylock(&head->mutex)) {
- num_refs += ref->ref_mod;
- mutex_unlock(&head->mutex);
- *refs = num_refs;
- goto out;
- }
- atomic_inc(&ref->refs);
- spin_unlock(&delayed_refs->lock);
- btrfs_release_path(root->fs_info->extent_root, path);
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(ref);
- goto again;
- } else {
- *refs = num_refs;
- }
- out:
- spin_unlock(&delayed_refs->lock);
- btrfs_free_path(path);
- return ret;
- }
- /*
- * helper function to update an extent delayed ref in the
- * rbtree. existing and update must both have the same
- * bytenr and parent
- *
- * This may free existing if the update cancels out whatever
- * operation it was doing.
- */
- static noinline void
- update_existing_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
- {
- struct btrfs_delayed_ref *existing_ref;
- struct btrfs_delayed_ref *ref;
- existing_ref = btrfs_delayed_node_to_ref(existing);
- ref = btrfs_delayed_node_to_ref(update);
- if (ref->pin)
- existing_ref->pin = 1;
- if (ref->action != existing_ref->action) {
- /*
- * this is effectively undoing either an add or a
- * drop. We decrement the ref_mod, and if it goes
- * down to zero we just delete the entry without
- * every changing the extent allocation tree.
- */
- existing->ref_mod--;
- if (existing->ref_mod == 0) {
- rb_erase(&existing->rb_node,
- &delayed_refs->root);
- existing->in_tree = 0;
- btrfs_put_delayed_ref(existing);
- delayed_refs->num_entries--;
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
- }
- } else {
- if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
- /* if we're adding refs, make sure all the
- * details match up. The extent could
- * have been totally freed and reallocated
- * by a different owner before the delayed
- * ref entries were removed.
- */
- existing_ref->owner_objectid = ref->owner_objectid;
- existing_ref->generation = ref->generation;
- existing_ref->root = ref->root;
- existing->num_bytes = update->num_bytes;
- }
- /*
- * the action on the existing ref matches
- * the action on the ref we're trying to add.
- * Bump the ref_mod by one so the backref that
- * is eventually added/removed has the correct
- * reference count
- */
- existing->ref_mod += update->ref_mod;
- }
- }
- /*
- * helper function to update the accounting in the head ref
- * existing and update must have the same bytenr
- */
- static noinline void
- update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
- {
- struct btrfs_delayed_ref_head *existing_ref;
- struct btrfs_delayed_ref_head *ref;
- existing_ref = btrfs_delayed_node_to_head(existing);
- ref = btrfs_delayed_node_to_head(update);
- if (ref->must_insert_reserved) {
- /* if the extent was freed and then
- * reallocated before the delayed ref
- * entries were processed, we can end up
- * with an existing head ref without
- * the must_insert_reserved flag set.
- * Set it again here
- */
- existing_ref->must_insert_reserved = ref->must_insert_reserved;
- /*
- * update the num_bytes so we make sure the accounting
- * is done correctly
- */
- existing->num_bytes = update->num_bytes;
- }
- /*
- * update the reference mod on the head to reflect this new operation
- */
- existing->ref_mod += update->ref_mod;
- }
- /*
- * helper function to actually insert a delayed ref into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count in the head node and properly dealing
- * with updating existing nodes as new modifications are queued.
- */
- static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
- u64 ref_generation, u64 owner_objectid, int action,
- int pin)
- {
- struct btrfs_delayed_ref_node *existing;
- struct btrfs_delayed_ref *full_ref;
- struct btrfs_delayed_ref_head *head_ref = NULL;
- struct btrfs_delayed_ref_root *delayed_refs;
- int count_mod = 1;
- int must_insert_reserved = 0;
- /*
- * the head node stores the sum of all the mods, so dropping a ref
- * should drop the sum in the head node by one.
- */
- if (parent == (u64)-1) {
- if (action == BTRFS_DROP_DELAYED_REF)
- count_mod = -1;
- else if (action == BTRFS_UPDATE_DELAYED_HEAD)
- count_mod = 0;
- }
- /*
- * BTRFS_ADD_DELAYED_EXTENT means that we need to update
- * the reserved accounting when the extent is finally added, or
- * if a later modification deletes the delayed ref without ever
- * inserting the extent into the extent allocation tree.
- * ref->must_insert_reserved is the flag used to record
- * that accounting mods are required.
- *
- * Once we record must_insert_reserved, switch the action to
- * BTRFS_ADD_DELAYED_REF because other special casing is not required.
- */
- if (action == BTRFS_ADD_DELAYED_EXTENT) {
- must_insert_reserved = 1;
- action = BTRFS_ADD_DELAYED_REF;
- } else {
- must_insert_reserved = 0;
- }
- delayed_refs = &trans->transaction->delayed_refs;
- /* first set the basic ref node struct up */
- atomic_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->parent = parent;
- ref->ref_mod = count_mod;
- ref->in_tree = 1;
- ref->num_bytes = num_bytes;
- if (btrfs_delayed_ref_is_head(ref)) {
- head_ref = btrfs_delayed_node_to_head(ref);
- head_ref->must_insert_reserved = must_insert_reserved;
- INIT_LIST_HEAD(&head_ref->cluster);
- mutex_init(&head_ref->mutex);
- } else {
- full_ref = btrfs_delayed_node_to_ref(ref);
- full_ref->root = ref_root;
- full_ref->generation = ref_generation;
- full_ref->owner_objectid = owner_objectid;
- full_ref->pin = pin;
- full_ref->action = action;
- }
- existing = tree_insert(&delayed_refs->root, bytenr,
- parent, &ref->rb_node);
- if (existing) {
- if (btrfs_delayed_ref_is_head(ref))
- update_existing_head_ref(existing, ref);
- else
- update_existing_ref(trans, delayed_refs, existing, ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
- kfree(ref);
- } else {
- if (btrfs_delayed_ref_is_head(ref)) {
- delayed_refs->num_heads++;
- delayed_refs->num_heads_ready++;
- }
- delayed_refs->num_entries++;
- trans->delayed_ref_updates++;
- }
- return 0;
- }
- /*
- * add a delayed ref to the tree. This does all of the accounting required
- * to make sure the delayed ref is eventually processed before this
- * transaction commits.
- */
- int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
- u64 ref_generation, u64 owner_objectid, int action,
- int pin)
- {
- struct btrfs_delayed_ref *ref;
- struct btrfs_delayed_ref_head *head_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
- /*
- * the parent = 0 case comes from cases where we don't actually
- * know the parent yet. It will get updated later via a add/drop
- * pair.
- */
- if (parent == 0)
- parent = bytenr;
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref) {
- kfree(ref);
- return -ENOMEM;
- }
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- /*
- * insert both the head node and the new ref without dropping
- * the spin lock
- */
- ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
- (u64)-1, 0, 0, 0, action, pin);
- BUG_ON(ret);
- ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, ref_generation,
- owner_objectid, action, pin);
- BUG_ON(ret);
- spin_unlock(&delayed_refs->lock);
- return 0;
- }
- /*
- * this does a simple search for the head node for a given extent.
- * It must be called with the delayed ref spinlock held, and it returns
- * the head node if any where found, or NULL if not.
- */
- struct btrfs_delayed_ref_head *
- btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
- {
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- delayed_refs = &trans->transaction->delayed_refs;
- ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
- if (ref)
- return btrfs_delayed_node_to_head(ref);
- return NULL;
- }
- /*
- * add a delayed ref to the tree. This does all of the accounting required
- * to make sure the delayed ref is eventually processed before this
- * transaction commits.
- *
- * The main point of this call is to add and remove a backreference in a single
- * shot, taking the lock only once, and only searching for the head node once.
- *
- * It is the same as doing a ref add and delete in two separate calls.
- */
- int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 orig_parent,
- u64 parent, u64 orig_ref_root, u64 ref_root,
- u64 orig_ref_generation, u64 ref_generation,
- u64 owner_objectid, int pin)
- {
- struct btrfs_delayed_ref *ref;
- struct btrfs_delayed_ref *old_ref;
- struct btrfs_delayed_ref_head *head_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- int ret;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
- old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
- if (!old_ref) {
- kfree(ref);
- return -ENOMEM;
- }
- /*
- * the parent = 0 case comes from cases where we don't actually
- * know the parent yet. It will get updated later via a add/drop
- * pair.
- */
- if (parent == 0)
- parent = bytenr;
- if (orig_parent == 0)
- orig_parent = bytenr;
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref) {
- kfree(ref);
- kfree(old_ref);
- return -ENOMEM;
- }
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- /*
- * insert both the head node and the new ref without dropping
- * the spin lock
- */
- ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
- (u64)-1, 0, 0, 0,
- BTRFS_UPDATE_DELAYED_HEAD, 0);
- BUG_ON(ret);
- ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
- parent, ref_root, ref_generation,
- owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
- BUG_ON(ret);
- ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
- orig_parent, orig_ref_root,
- orig_ref_generation, owner_objectid,
- BTRFS_DROP_DELAYED_REF, pin);
- BUG_ON(ret);
- spin_unlock(&delayed_refs->lock);
- return 0;
- }
|