delayed-ref.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * Copyright (C) 2009 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/sched.h>
  19. #include <linux/sort.h>
  20. #include <linux/ftrace.h>
  21. #include "ctree.h"
  22. #include "delayed-ref.h"
  23. #include "transaction.h"
  24. /*
  25. * delayed back reference update tracking. For subvolume trees
  26. * we queue up extent allocations and backref maintenance for
  27. * delayed processing. This avoids deep call chains where we
  28. * add extents in the middle of btrfs_search_slot, and it allows
  29. * us to buffer up frequently modified backrefs in an rb tree instead
  30. * of hammering updates on the extent allocation tree.
  31. *
  32. * Right now this code is only used for reference counted trees, but
  33. * the long term goal is to get rid of the similar code for delayed
  34. * extent tree modifications.
  35. */
  36. /*
  37. * entries in the rb tree are ordered by the byte number of the extent
  38. * and by the byte number of the parent block.
  39. */
  40. static int comp_entry(struct btrfs_delayed_ref_node *ref,
  41. u64 bytenr, u64 parent)
  42. {
  43. if (bytenr < ref->bytenr)
  44. return -1;
  45. if (bytenr > ref->bytenr)
  46. return 1;
  47. if (parent < ref->parent)
  48. return -1;
  49. if (parent > ref->parent)
  50. return 1;
  51. return 0;
  52. }
  53. /*
  54. * insert a new ref into the rbtree. This returns any existing refs
  55. * for the same (bytenr,parent) tuple, or NULL if the new node was properly
  56. * inserted.
  57. */
  58. static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
  59. u64 bytenr, u64 parent,
  60. struct rb_node *node)
  61. {
  62. struct rb_node **p = &root->rb_node;
  63. struct rb_node *parent_node = NULL;
  64. struct btrfs_delayed_ref_node *entry;
  65. int cmp;
  66. while (*p) {
  67. parent_node = *p;
  68. entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
  69. rb_node);
  70. cmp = comp_entry(entry, bytenr, parent);
  71. if (cmp < 0)
  72. p = &(*p)->rb_left;
  73. else if (cmp > 0)
  74. p = &(*p)->rb_right;
  75. else
  76. return entry;
  77. }
  78. entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
  79. rb_link_node(node, parent_node, p);
  80. rb_insert_color(node, root);
  81. return NULL;
  82. }
  83. /*
  84. * find an entry based on (bytenr,parent). This returns the delayed
  85. * ref if it was able to find one, or NULL if nothing was in that spot
  86. */
  87. static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
  88. u64 bytenr, u64 parent)
  89. {
  90. struct rb_node *n = root->rb_node;
  91. struct btrfs_delayed_ref_node *entry;
  92. int cmp;
  93. while (n) {
  94. entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
  95. WARN_ON(!entry->in_tree);
  96. cmp = comp_entry(entry, bytenr, parent);
  97. if (cmp < 0)
  98. n = n->rb_left;
  99. else if (cmp > 0)
  100. n = n->rb_right;
  101. else
  102. return entry;
  103. }
  104. return NULL;
  105. }
  106. /*
  107. * Locking on delayed refs is done by taking a lock on the head node,
  108. * which has the (impossible) parent id of (u64)-1. Once a lock is held
  109. * on the head node, you're allowed (and required) to process all the
  110. * delayed refs for a given byte number in the tree.
  111. *
  112. * This will walk forward in the rbtree until it finds a head node it
  113. * is able to lock. It might not lock the delayed ref you asked for,
  114. * and so it will return the one it did lock in next_ret and return 0.
  115. *
  116. * If no locks are taken, next_ret is set to null and 1 is returned. This
  117. * means there are no more unlocked head nodes in the rbtree.
  118. */
  119. int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
  120. struct btrfs_delayed_ref_node *ref,
  121. struct btrfs_delayed_ref_head **next_ret)
  122. {
  123. struct rb_node *node;
  124. struct btrfs_delayed_ref_head *head;
  125. int ret = 0;
  126. while (1) {
  127. if (btrfs_delayed_ref_is_head(ref)) {
  128. head = btrfs_delayed_node_to_head(ref);
  129. if (mutex_trylock(&head->mutex)) {
  130. *next_ret = head;
  131. ret = 0;
  132. break;
  133. }
  134. }
  135. node = rb_next(&ref->rb_node);
  136. if (!node) {
  137. ret = 1;
  138. *next_ret = NULL;
  139. break;
  140. }
  141. ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
  142. }
  143. return ret;
  144. }
  145. /*
  146. * This checks to see if there are any delayed refs in the
  147. * btree for a given bytenr. It returns one if it finds any
  148. * and zero otherwise.
  149. *
  150. * If it only finds a head node, it returns 0.
  151. *
  152. * The idea is to use this when deciding if you can safely delete an
  153. * extent from the extent allocation tree. There may be a pending
  154. * ref in the rbtree that adds or removes references, so as long as this
  155. * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
  156. * allocation tree.
  157. */
  158. int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
  159. {
  160. struct btrfs_delayed_ref_node *ref;
  161. struct btrfs_delayed_ref_root *delayed_refs;
  162. struct rb_node *prev_node;
  163. int ret = 0;
  164. delayed_refs = &trans->transaction->delayed_refs;
  165. spin_lock(&delayed_refs->lock);
  166. ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
  167. if (ref) {
  168. prev_node = rb_prev(&ref->rb_node);
  169. if (!prev_node)
  170. goto out;
  171. ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
  172. rb_node);
  173. if (ref->bytenr == bytenr)
  174. ret = 1;
  175. }
  176. out:
  177. spin_unlock(&delayed_refs->lock);
  178. return ret;
  179. }
  180. /*
  181. * helper function to lookup reference count
  182. *
  183. * the head node for delayed ref is used to store the sum of all the
  184. * reference count modifications queued up in the rbtree. This way you
  185. * can check to see what the reference count would be if all of the
  186. * delayed refs are processed.
  187. */
  188. int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
  189. struct btrfs_root *root, u64 bytenr,
  190. u64 num_bytes, u32 *refs)
  191. {
  192. struct btrfs_delayed_ref_node *ref;
  193. struct btrfs_delayed_ref_head *head;
  194. struct btrfs_delayed_ref_root *delayed_refs;
  195. struct btrfs_path *path;
  196. struct extent_buffer *leaf;
  197. struct btrfs_extent_item *ei;
  198. struct btrfs_key key;
  199. u32 num_refs;
  200. int ret;
  201. path = btrfs_alloc_path();
  202. if (!path)
  203. return -ENOMEM;
  204. key.objectid = bytenr;
  205. key.type = BTRFS_EXTENT_ITEM_KEY;
  206. key.offset = num_bytes;
  207. delayed_refs = &trans->transaction->delayed_refs;
  208. again:
  209. ret = btrfs_search_slot(trans, root->fs_info->extent_root,
  210. &key, path, 0, 0);
  211. if (ret < 0)
  212. goto out;
  213. if (ret == 0) {
  214. leaf = path->nodes[0];
  215. ei = btrfs_item_ptr(leaf, path->slots[0],
  216. struct btrfs_extent_item);
  217. num_refs = btrfs_extent_refs(leaf, ei);
  218. } else {
  219. num_refs = 0;
  220. ret = 0;
  221. }
  222. spin_lock(&delayed_refs->lock);
  223. ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
  224. if (ref) {
  225. head = btrfs_delayed_node_to_head(ref);
  226. if (mutex_trylock(&head->mutex)) {
  227. num_refs += ref->ref_mod;
  228. mutex_unlock(&head->mutex);
  229. *refs = num_refs;
  230. goto out;
  231. }
  232. atomic_inc(&ref->refs);
  233. spin_unlock(&delayed_refs->lock);
  234. btrfs_release_path(root->fs_info->extent_root, path);
  235. mutex_lock(&head->mutex);
  236. mutex_unlock(&head->mutex);
  237. btrfs_put_delayed_ref(ref);
  238. goto again;
  239. } else {
  240. *refs = num_refs;
  241. }
  242. out:
  243. spin_unlock(&delayed_refs->lock);
  244. btrfs_free_path(path);
  245. return ret;
  246. }
  247. /*
  248. * helper function to update an extent delayed ref in the
  249. * rbtree. existing and update must both have the same
  250. * bytenr and parent
  251. *
  252. * This may free existing if the update cancels out whatever
  253. * operation it was doing.
  254. */
  255. static noinline void
  256. update_existing_ref(struct btrfs_trans_handle *trans,
  257. struct btrfs_delayed_ref_root *delayed_refs,
  258. struct btrfs_delayed_ref_node *existing,
  259. struct btrfs_delayed_ref_node *update)
  260. {
  261. struct btrfs_delayed_ref *existing_ref;
  262. struct btrfs_delayed_ref *ref;
  263. existing_ref = btrfs_delayed_node_to_ref(existing);
  264. ref = btrfs_delayed_node_to_ref(update);
  265. if (ref->pin)
  266. existing_ref->pin = 1;
  267. if (ref->action != existing_ref->action) {
  268. /*
  269. * this is effectively undoing either an add or a
  270. * drop. We decrement the ref_mod, and if it goes
  271. * down to zero we just delete the entry without
  272. * every changing the extent allocation tree.
  273. */
  274. existing->ref_mod--;
  275. if (existing->ref_mod == 0) {
  276. rb_erase(&existing->rb_node,
  277. &delayed_refs->root);
  278. existing->in_tree = 0;
  279. btrfs_put_delayed_ref(existing);
  280. delayed_refs->num_entries--;
  281. if (trans->delayed_ref_updates)
  282. trans->delayed_ref_updates--;
  283. }
  284. } else {
  285. if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
  286. /* if we're adding refs, make sure all the
  287. * details match up. The extent could
  288. * have been totally freed and reallocated
  289. * by a different owner before the delayed
  290. * ref entries were removed.
  291. */
  292. existing_ref->owner_objectid = ref->owner_objectid;
  293. existing_ref->generation = ref->generation;
  294. existing_ref->root = ref->root;
  295. existing->num_bytes = update->num_bytes;
  296. }
  297. /*
  298. * the action on the existing ref matches
  299. * the action on the ref we're trying to add.
  300. * Bump the ref_mod by one so the backref that
  301. * is eventually added/removed has the correct
  302. * reference count
  303. */
  304. existing->ref_mod += update->ref_mod;
  305. }
  306. }
  307. /*
  308. * helper function to update the accounting in the head ref
  309. * existing and update must have the same bytenr
  310. */
  311. static noinline void
  312. update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
  313. struct btrfs_delayed_ref_node *update)
  314. {
  315. struct btrfs_delayed_ref_head *existing_ref;
  316. struct btrfs_delayed_ref_head *ref;
  317. existing_ref = btrfs_delayed_node_to_head(existing);
  318. ref = btrfs_delayed_node_to_head(update);
  319. if (ref->must_insert_reserved) {
  320. /* if the extent was freed and then
  321. * reallocated before the delayed ref
  322. * entries were processed, we can end up
  323. * with an existing head ref without
  324. * the must_insert_reserved flag set.
  325. * Set it again here
  326. */
  327. existing_ref->must_insert_reserved = ref->must_insert_reserved;
  328. /*
  329. * update the num_bytes so we make sure the accounting
  330. * is done correctly
  331. */
  332. existing->num_bytes = update->num_bytes;
  333. }
  334. /*
  335. * update the reference mod on the head to reflect this new operation
  336. */
  337. existing->ref_mod += update->ref_mod;
  338. }
  339. /*
  340. * helper function to actually insert a delayed ref into the rbtree.
  341. * this does all the dirty work in terms of maintaining the correct
  342. * overall modification count in the head node and properly dealing
  343. * with updating existing nodes as new modifications are queued.
  344. */
  345. static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
  346. struct btrfs_delayed_ref_node *ref,
  347. u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
  348. u64 ref_generation, u64 owner_objectid, int action,
  349. int pin)
  350. {
  351. struct btrfs_delayed_ref_node *existing;
  352. struct btrfs_delayed_ref *full_ref;
  353. struct btrfs_delayed_ref_head *head_ref;
  354. struct btrfs_delayed_ref_root *delayed_refs;
  355. int count_mod = 1;
  356. int must_insert_reserved = 0;
  357. /*
  358. * the head node stores the sum of all the mods, so dropping a ref
  359. * should drop the sum in the head node by one.
  360. */
  361. if (parent == (u64)-1 && action == BTRFS_DROP_DELAYED_REF)
  362. count_mod = -1;
  363. /*
  364. * BTRFS_ADD_DELAYED_EXTENT means that we need to update
  365. * the reserved accounting when the extent is finally added, or
  366. * if a later modification deletes the delayed ref without ever
  367. * inserting the extent into the extent allocation tree.
  368. * ref->must_insert_reserved is the flag used to record
  369. * that accounting mods are required.
  370. *
  371. * Once we record must_insert_reserved, switch the action to
  372. * BTRFS_ADD_DELAYED_REF because other special casing is not required.
  373. */
  374. if (action == BTRFS_ADD_DELAYED_EXTENT) {
  375. must_insert_reserved = 1;
  376. action = BTRFS_ADD_DELAYED_REF;
  377. } else {
  378. must_insert_reserved = 0;
  379. }
  380. delayed_refs = &trans->transaction->delayed_refs;
  381. /* first set the basic ref node struct up */
  382. atomic_set(&ref->refs, 1);
  383. ref->bytenr = bytenr;
  384. ref->parent = parent;
  385. ref->ref_mod = count_mod;
  386. ref->in_tree = 1;
  387. ref->num_bytes = num_bytes;
  388. if (btrfs_delayed_ref_is_head(ref)) {
  389. head_ref = btrfs_delayed_node_to_head(ref);
  390. head_ref->must_insert_reserved = must_insert_reserved;
  391. mutex_init(&head_ref->mutex);
  392. } else {
  393. full_ref = btrfs_delayed_node_to_ref(ref);
  394. full_ref->root = ref_root;
  395. full_ref->generation = ref_generation;
  396. full_ref->owner_objectid = owner_objectid;
  397. full_ref->pin = pin;
  398. full_ref->action = action;
  399. }
  400. existing = tree_insert(&delayed_refs->root, bytenr,
  401. parent, &ref->rb_node);
  402. if (existing) {
  403. if (btrfs_delayed_ref_is_head(ref))
  404. update_existing_head_ref(existing, ref);
  405. else
  406. update_existing_ref(trans, delayed_refs, existing, ref);
  407. /*
  408. * we've updated the existing ref, free the newly
  409. * allocated ref
  410. */
  411. kfree(ref);
  412. } else {
  413. delayed_refs->num_entries++;
  414. trans->delayed_ref_updates++;
  415. }
  416. return 0;
  417. }
  418. /*
  419. * add a delayed ref to the tree. This does all of the accounting required
  420. * to make sure the delayed ref is eventually processed before this
  421. * transaction commits.
  422. */
  423. int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
  424. u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
  425. u64 ref_generation, u64 owner_objectid, int action,
  426. int pin)
  427. {
  428. struct btrfs_delayed_ref *ref;
  429. struct btrfs_delayed_ref_head *head_ref;
  430. struct btrfs_delayed_ref_root *delayed_refs;
  431. int ret;
  432. ref = kmalloc(sizeof(*ref), GFP_NOFS);
  433. if (!ref)
  434. return -ENOMEM;
  435. /*
  436. * the parent = 0 case comes from cases where we don't actually
  437. * know the parent yet. It will get updated later via a add/drop
  438. * pair.
  439. */
  440. if (parent == 0)
  441. parent = bytenr;
  442. head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
  443. if (!head_ref) {
  444. kfree(ref);
  445. return -ENOMEM;
  446. }
  447. delayed_refs = &trans->transaction->delayed_refs;
  448. spin_lock(&delayed_refs->lock);
  449. /*
  450. * insert both the head node and the new ref without dropping
  451. * the spin lock
  452. */
  453. ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
  454. (u64)-1, 0, 0, 0, action, pin);
  455. BUG_ON(ret);
  456. ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
  457. parent, ref_root, ref_generation,
  458. owner_objectid, action, pin);
  459. BUG_ON(ret);
  460. spin_unlock(&delayed_refs->lock);
  461. return 0;
  462. }
  463. /*
  464. * add a delayed ref to the tree. This does all of the accounting required
  465. * to make sure the delayed ref is eventually processed before this
  466. * transaction commits.
  467. *
  468. * The main point of this call is to add and remove a backreference in a single
  469. * shot, taking the lock only once, and only searching for the head node once.
  470. *
  471. * It is the same as doing a ref add and delete in two separate calls.
  472. */
  473. int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
  474. u64 bytenr, u64 num_bytes, u64 orig_parent,
  475. u64 parent, u64 orig_ref_root, u64 ref_root,
  476. u64 orig_ref_generation, u64 ref_generation,
  477. u64 owner_objectid, int pin)
  478. {
  479. struct btrfs_delayed_ref *ref;
  480. struct btrfs_delayed_ref *old_ref;
  481. struct btrfs_delayed_ref_head *head_ref;
  482. struct btrfs_delayed_ref_root *delayed_refs;
  483. int ret;
  484. ref = kmalloc(sizeof(*ref), GFP_NOFS);
  485. if (!ref)
  486. return -ENOMEM;
  487. old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
  488. if (!old_ref) {
  489. kfree(ref);
  490. return -ENOMEM;
  491. }
  492. /*
  493. * the parent = 0 case comes from cases where we don't actually
  494. * know the parent yet. It will get updated later via a add/drop
  495. * pair.
  496. */
  497. if (parent == 0)
  498. parent = bytenr;
  499. if (orig_parent == 0)
  500. orig_parent = bytenr;
  501. head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
  502. if (!head_ref) {
  503. kfree(ref);
  504. kfree(old_ref);
  505. return -ENOMEM;
  506. }
  507. delayed_refs = &trans->transaction->delayed_refs;
  508. spin_lock(&delayed_refs->lock);
  509. /*
  510. * insert both the head node and the new ref without dropping
  511. * the spin lock
  512. */
  513. ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
  514. (u64)-1, 0, 0, 0,
  515. BTRFS_ADD_DELAYED_REF, 0);
  516. BUG_ON(ret);
  517. ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
  518. parent, ref_root, ref_generation,
  519. owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
  520. BUG_ON(ret);
  521. ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
  522. orig_parent, orig_ref_root,
  523. orig_ref_generation, owner_objectid,
  524. BTRFS_DROP_DELAYED_REF, pin);
  525. BUG_ON(ret);
  526. spin_unlock(&delayed_refs->lock);
  527. return 0;
  528. }