transaction.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/fs.h>
  19. #include <linux/sched.h>
  20. #include "ctree.h"
  21. #include "disk-io.h"
  22. #include "transaction.h"
  23. static int total_trans = 0;
  24. extern struct kmem_cache *btrfs_trans_handle_cachep;
  25. extern struct kmem_cache *btrfs_transaction_cachep;
  26. static struct workqueue_struct *trans_wq;
  27. #define BTRFS_ROOT_TRANS_TAG 0
  28. #define BTRFS_ROOT_DEFRAG_TAG 1
  29. static void put_transaction(struct btrfs_transaction *transaction)
  30. {
  31. WARN_ON(transaction->use_count == 0);
  32. transaction->use_count--;
  33. if (transaction->use_count == 0) {
  34. WARN_ON(total_trans == 0);
  35. total_trans--;
  36. list_del_init(&transaction->list);
  37. memset(transaction, 0, sizeof(*transaction));
  38. kmem_cache_free(btrfs_transaction_cachep, transaction);
  39. }
  40. }
  41. static int join_transaction(struct btrfs_root *root)
  42. {
  43. struct btrfs_transaction *cur_trans;
  44. cur_trans = root->fs_info->running_transaction;
  45. if (!cur_trans) {
  46. cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
  47. GFP_NOFS);
  48. total_trans++;
  49. BUG_ON(!cur_trans);
  50. root->fs_info->generation++;
  51. root->fs_info->running_transaction = cur_trans;
  52. cur_trans->num_writers = 0;
  53. cur_trans->transid = root->fs_info->generation;
  54. init_waitqueue_head(&cur_trans->writer_wait);
  55. init_waitqueue_head(&cur_trans->commit_wait);
  56. cur_trans->in_commit = 0;
  57. cur_trans->use_count = 1;
  58. cur_trans->commit_done = 0;
  59. cur_trans->start_time = get_seconds();
  60. list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
  61. init_bit_radix(&cur_trans->dirty_pages);
  62. }
  63. cur_trans->num_writers++;
  64. return 0;
  65. }
  66. static int record_root_in_trans(struct btrfs_root *root)
  67. {
  68. u64 running_trans_id = root->fs_info->running_transaction->transid;
  69. if (root->ref_cows && root->last_trans < running_trans_id) {
  70. WARN_ON(root == root->fs_info->extent_root);
  71. if (root->root_item.refs != 0) {
  72. radix_tree_tag_set(&root->fs_info->fs_roots_radix,
  73. (unsigned long)root->root_key.objectid,
  74. BTRFS_ROOT_TRANS_TAG);
  75. radix_tree_tag_set(&root->fs_info->fs_roots_radix,
  76. (unsigned long)root->root_key.objectid,
  77. BTRFS_ROOT_DEFRAG_TAG);
  78. root->commit_root = root->node;
  79. get_bh(root->node);
  80. } else {
  81. WARN_ON(1);
  82. }
  83. root->last_trans = running_trans_id;
  84. }
  85. return 0;
  86. }
  87. struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
  88. int num_blocks)
  89. {
  90. struct btrfs_trans_handle *h =
  91. kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
  92. int ret;
  93. mutex_lock(&root->fs_info->trans_mutex);
  94. ret = join_transaction(root);
  95. BUG_ON(ret);
  96. record_root_in_trans(root);
  97. h->transid = root->fs_info->running_transaction->transid;
  98. h->transaction = root->fs_info->running_transaction;
  99. h->blocks_reserved = num_blocks;
  100. h->blocks_used = 0;
  101. h->block_group = NULL;
  102. h->alloc_exclude_nr = 0;
  103. h->alloc_exclude_start = 0;
  104. root->fs_info->running_transaction->use_count++;
  105. mutex_unlock(&root->fs_info->trans_mutex);
  106. return h;
  107. }
  108. int btrfs_end_transaction(struct btrfs_trans_handle *trans,
  109. struct btrfs_root *root)
  110. {
  111. struct btrfs_transaction *cur_trans;
  112. mutex_lock(&root->fs_info->trans_mutex);
  113. cur_trans = root->fs_info->running_transaction;
  114. WARN_ON(cur_trans != trans->transaction);
  115. WARN_ON(cur_trans->num_writers < 1);
  116. cur_trans->num_writers--;
  117. if (waitqueue_active(&cur_trans->writer_wait))
  118. wake_up(&cur_trans->writer_wait);
  119. put_transaction(cur_trans);
  120. mutex_unlock(&root->fs_info->trans_mutex);
  121. memset(trans, 0, sizeof(*trans));
  122. kmem_cache_free(btrfs_trans_handle_cachep, trans);
  123. return 0;
  124. }
  125. int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
  126. struct btrfs_root *root)
  127. {
  128. unsigned long gang[16];
  129. int ret;
  130. int i;
  131. int err;
  132. int werr = 0;
  133. struct page *page;
  134. struct radix_tree_root *dirty_pages;
  135. struct inode *btree_inode = root->fs_info->btree_inode;
  136. if (!trans || !trans->transaction) {
  137. return filemap_write_and_wait(btree_inode->i_mapping);
  138. }
  139. dirty_pages = &trans->transaction->dirty_pages;
  140. while(1) {
  141. ret = find_first_radix_bit(dirty_pages, gang,
  142. 0, ARRAY_SIZE(gang));
  143. if (!ret)
  144. break;
  145. for (i = 0; i < ret; i++) {
  146. /* FIXME EIO */
  147. clear_radix_bit(dirty_pages, gang[i]);
  148. page = find_lock_page(btree_inode->i_mapping,
  149. gang[i]);
  150. if (!page)
  151. continue;
  152. if (PageWriteback(page)) {
  153. if (PageDirty(page))
  154. wait_on_page_writeback(page);
  155. else {
  156. unlock_page(page);
  157. page_cache_release(page);
  158. continue;
  159. }
  160. }
  161. err = write_one_page(page, 0);
  162. if (err)
  163. werr = err;
  164. page_cache_release(page);
  165. }
  166. }
  167. err = filemap_fdatawait(btree_inode->i_mapping);
  168. if (err)
  169. werr = err;
  170. return werr;
  171. }
  172. int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
  173. struct btrfs_root *root)
  174. {
  175. int ret;
  176. u64 old_extent_block;
  177. struct btrfs_fs_info *fs_info = root->fs_info;
  178. struct btrfs_root *tree_root = fs_info->tree_root;
  179. struct btrfs_root *extent_root = fs_info->extent_root;
  180. btrfs_write_dirty_block_groups(trans, extent_root);
  181. while(1) {
  182. old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
  183. if (old_extent_block == bh_blocknr(extent_root->node))
  184. break;
  185. btrfs_set_root_blocknr(&extent_root->root_item,
  186. bh_blocknr(extent_root->node));
  187. ret = btrfs_update_root(trans, tree_root,
  188. &extent_root->root_key,
  189. &extent_root->root_item);
  190. BUG_ON(ret);
  191. btrfs_write_dirty_block_groups(trans, extent_root);
  192. }
  193. return 0;
  194. }
  195. static int wait_for_commit(struct btrfs_root *root,
  196. struct btrfs_transaction *commit)
  197. {
  198. DEFINE_WAIT(wait);
  199. mutex_lock(&root->fs_info->trans_mutex);
  200. while(!commit->commit_done) {
  201. prepare_to_wait(&commit->commit_wait, &wait,
  202. TASK_UNINTERRUPTIBLE);
  203. if (commit->commit_done)
  204. break;
  205. mutex_unlock(&root->fs_info->trans_mutex);
  206. schedule();
  207. mutex_lock(&root->fs_info->trans_mutex);
  208. }
  209. mutex_unlock(&root->fs_info->trans_mutex);
  210. finish_wait(&commit->commit_wait, &wait);
  211. return 0;
  212. }
  213. struct dirty_root {
  214. struct list_head list;
  215. struct btrfs_root *root;
  216. };
  217. int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
  218. {
  219. struct dirty_root *dirty;
  220. dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
  221. if (!dirty)
  222. return -ENOMEM;
  223. dirty->root = root;
  224. list_add(&dirty->list, dead_list);
  225. return 0;
  226. }
  227. static int add_dirty_roots(struct btrfs_trans_handle *trans,
  228. struct radix_tree_root *radix,
  229. struct list_head *list)
  230. {
  231. struct dirty_root *dirty;
  232. struct btrfs_root *gang[8];
  233. struct btrfs_root *root;
  234. int i;
  235. int ret;
  236. int err = 0;
  237. u32 refs;
  238. while(1) {
  239. ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
  240. ARRAY_SIZE(gang),
  241. BTRFS_ROOT_TRANS_TAG);
  242. if (ret == 0)
  243. break;
  244. for (i = 0; i < ret; i++) {
  245. root = gang[i];
  246. radix_tree_tag_clear(radix,
  247. (unsigned long)root->root_key.objectid,
  248. BTRFS_ROOT_TRANS_TAG);
  249. if (root->commit_root == root->node) {
  250. WARN_ON(bh_blocknr(root->node) !=
  251. btrfs_root_blocknr(&root->root_item));
  252. brelse(root->commit_root);
  253. root->commit_root = NULL;
  254. continue;
  255. }
  256. dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
  257. BUG_ON(!dirty);
  258. dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
  259. BUG_ON(!dirty->root);
  260. memset(&root->root_item.drop_progress, 0,
  261. sizeof(struct btrfs_disk_key));
  262. root->root_item.drop_level = 0;
  263. memcpy(dirty->root, root, sizeof(*root));
  264. dirty->root->node = root->commit_root;
  265. root->commit_root = NULL;
  266. root->root_key.offset = root->fs_info->generation;
  267. btrfs_set_root_blocknr(&root->root_item,
  268. bh_blocknr(root->node));
  269. err = btrfs_insert_root(trans, root->fs_info->tree_root,
  270. &root->root_key,
  271. &root->root_item);
  272. if (err)
  273. break;
  274. refs = btrfs_root_refs(&dirty->root->root_item);
  275. btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
  276. err = btrfs_update_root(trans, root->fs_info->tree_root,
  277. &dirty->root->root_key,
  278. &dirty->root->root_item);
  279. BUG_ON(err);
  280. if (refs == 1) {
  281. list_add(&dirty->list, list);
  282. } else {
  283. WARN_ON(1);
  284. kfree(dirty->root);
  285. kfree(dirty);
  286. }
  287. }
  288. }
  289. return err;
  290. }
  291. int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
  292. {
  293. struct btrfs_root *gang[1];
  294. struct btrfs_root *root;
  295. struct btrfs_root *tree_root = info->tree_root;
  296. struct btrfs_trans_handle *trans;
  297. int i;
  298. int ret;
  299. int err = 0;
  300. u64 last = 0;
  301. trans = btrfs_start_transaction(tree_root, 1);
  302. while(1) {
  303. ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
  304. (void **)gang, last,
  305. ARRAY_SIZE(gang),
  306. BTRFS_ROOT_DEFRAG_TAG);
  307. if (ret == 0)
  308. break;
  309. for (i = 0; i < ret; i++) {
  310. root = gang[i];
  311. last = root->root_key.objectid + 1;
  312. radix_tree_tag_clear(&info->fs_roots_radix,
  313. (unsigned long)root->root_key.objectid,
  314. BTRFS_ROOT_DEFRAG_TAG);
  315. if (root->defrag_running)
  316. continue;
  317. while (1) {
  318. mutex_lock(&root->fs_info->trans_mutex);
  319. record_root_in_trans(root);
  320. mutex_unlock(&root->fs_info->trans_mutex);
  321. root->defrag_running = 1;
  322. err = btrfs_defrag_leaves(trans, root, 1);
  323. btrfs_end_transaction(trans, tree_root);
  324. mutex_unlock(&info->fs_mutex);
  325. btrfs_btree_balance_dirty(root);
  326. cond_resched();
  327. mutex_lock(&info->fs_mutex);
  328. trans = btrfs_start_transaction(tree_root, 1);
  329. if (err != -EAGAIN)
  330. break;
  331. }
  332. root->defrag_running = 0;
  333. radix_tree_tag_clear(&info->fs_roots_radix,
  334. (unsigned long)root->root_key.objectid,
  335. BTRFS_ROOT_DEFRAG_TAG);
  336. }
  337. }
  338. btrfs_end_transaction(trans, tree_root);
  339. return err;
  340. }
  341. static int drop_dirty_roots(struct btrfs_root *tree_root,
  342. struct list_head *list)
  343. {
  344. struct dirty_root *dirty;
  345. struct btrfs_trans_handle *trans;
  346. int ret = 0;
  347. int err;
  348. while(!list_empty(list)) {
  349. mutex_lock(&tree_root->fs_info->fs_mutex);
  350. dirty = list_entry(list->next, struct dirty_root, list);
  351. list_del_init(&dirty->list);
  352. while(1) {
  353. trans = btrfs_start_transaction(tree_root, 1);
  354. ret = btrfs_drop_snapshot(trans, dirty->root);
  355. if (ret != -EAGAIN) {
  356. break;
  357. }
  358. err = btrfs_update_root(trans,
  359. tree_root,
  360. &dirty->root->root_key,
  361. &dirty->root->root_item);
  362. if (err)
  363. ret = err;
  364. ret = btrfs_end_transaction(trans, tree_root);
  365. BUG_ON(ret);
  366. mutex_unlock(&tree_root->fs_info->fs_mutex);
  367. btrfs_btree_balance_dirty(tree_root);
  368. schedule();
  369. mutex_lock(&tree_root->fs_info->fs_mutex);
  370. }
  371. BUG_ON(ret);
  372. ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
  373. if (ret)
  374. break;
  375. ret = btrfs_end_transaction(trans, tree_root);
  376. BUG_ON(ret);
  377. kfree(dirty->root);
  378. kfree(dirty);
  379. mutex_unlock(&tree_root->fs_info->fs_mutex);
  380. btrfs_btree_balance_dirty(tree_root);
  381. schedule();
  382. }
  383. return ret;
  384. }
  385. int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  386. struct btrfs_root *root)
  387. {
  388. int ret = 0;
  389. struct btrfs_transaction *cur_trans;
  390. struct btrfs_transaction *prev_trans = NULL;
  391. struct list_head dirty_fs_roots;
  392. struct radix_tree_root pinned_copy;
  393. DEFINE_WAIT(wait);
  394. init_bit_radix(&pinned_copy);
  395. INIT_LIST_HEAD(&dirty_fs_roots);
  396. mutex_lock(&root->fs_info->trans_mutex);
  397. if (trans->transaction->in_commit) {
  398. cur_trans = trans->transaction;
  399. trans->transaction->use_count++;
  400. mutex_unlock(&root->fs_info->trans_mutex);
  401. btrfs_end_transaction(trans, root);
  402. mutex_unlock(&root->fs_info->fs_mutex);
  403. ret = wait_for_commit(root, cur_trans);
  404. BUG_ON(ret);
  405. put_transaction(cur_trans);
  406. mutex_lock(&root->fs_info->fs_mutex);
  407. return 0;
  408. }
  409. trans->transaction->in_commit = 1;
  410. cur_trans = trans->transaction;
  411. if (cur_trans->list.prev != &root->fs_info->trans_list) {
  412. prev_trans = list_entry(cur_trans->list.prev,
  413. struct btrfs_transaction, list);
  414. if (!prev_trans->commit_done) {
  415. prev_trans->use_count++;
  416. mutex_unlock(&root->fs_info->fs_mutex);
  417. mutex_unlock(&root->fs_info->trans_mutex);
  418. wait_for_commit(root, prev_trans);
  419. put_transaction(prev_trans);
  420. mutex_lock(&root->fs_info->fs_mutex);
  421. mutex_lock(&root->fs_info->trans_mutex);
  422. }
  423. }
  424. while (trans->transaction->num_writers > 1) {
  425. WARN_ON(cur_trans != trans->transaction);
  426. prepare_to_wait(&trans->transaction->writer_wait, &wait,
  427. TASK_UNINTERRUPTIBLE);
  428. if (trans->transaction->num_writers <= 1)
  429. break;
  430. mutex_unlock(&root->fs_info->fs_mutex);
  431. mutex_unlock(&root->fs_info->trans_mutex);
  432. schedule();
  433. mutex_lock(&root->fs_info->fs_mutex);
  434. mutex_lock(&root->fs_info->trans_mutex);
  435. finish_wait(&trans->transaction->writer_wait, &wait);
  436. }
  437. finish_wait(&trans->transaction->writer_wait, &wait);
  438. WARN_ON(cur_trans != trans->transaction);
  439. ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
  440. &dirty_fs_roots);
  441. BUG_ON(ret);
  442. ret = btrfs_commit_tree_roots(trans, root);
  443. BUG_ON(ret);
  444. cur_trans = root->fs_info->running_transaction;
  445. root->fs_info->running_transaction = NULL;
  446. btrfs_set_super_generation(&root->fs_info->super_copy,
  447. cur_trans->transid);
  448. btrfs_set_super_root(&root->fs_info->super_copy,
  449. bh_blocknr(root->fs_info->tree_root->node));
  450. memcpy(root->fs_info->disk_super, &root->fs_info->super_copy,
  451. sizeof(root->fs_info->super_copy));
  452. btrfs_copy_pinned(root, &pinned_copy);
  453. mutex_unlock(&root->fs_info->trans_mutex);
  454. mutex_unlock(&root->fs_info->fs_mutex);
  455. ret = btrfs_write_and_wait_transaction(trans, root);
  456. BUG_ON(ret);
  457. write_ctree_super(trans, root);
  458. mutex_lock(&root->fs_info->fs_mutex);
  459. btrfs_finish_extent_commit(trans, root, &pinned_copy);
  460. mutex_lock(&root->fs_info->trans_mutex);
  461. cur_trans->commit_done = 1;
  462. wake_up(&cur_trans->commit_wait);
  463. put_transaction(cur_trans);
  464. put_transaction(cur_trans);
  465. if (root->fs_info->closing)
  466. list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
  467. else
  468. list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
  469. mutex_unlock(&root->fs_info->trans_mutex);
  470. kmem_cache_free(btrfs_trans_handle_cachep, trans);
  471. if (root->fs_info->closing) {
  472. mutex_unlock(&root->fs_info->fs_mutex);
  473. drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
  474. mutex_lock(&root->fs_info->fs_mutex);
  475. }
  476. return ret;
  477. }
  478. void btrfs_transaction_cleaner(struct work_struct *work)
  479. {
  480. struct btrfs_fs_info *fs_info = container_of(work,
  481. struct btrfs_fs_info,
  482. trans_work.work);
  483. struct btrfs_root *root = fs_info->tree_root;
  484. struct btrfs_transaction *cur;
  485. struct btrfs_trans_handle *trans;
  486. struct list_head dirty_roots;
  487. unsigned long now;
  488. unsigned long delay = HZ * 30;
  489. int ret;
  490. INIT_LIST_HEAD(&dirty_roots);
  491. mutex_lock(&root->fs_info->fs_mutex);
  492. mutex_lock(&root->fs_info->trans_mutex);
  493. cur = root->fs_info->running_transaction;
  494. if (!cur) {
  495. mutex_unlock(&root->fs_info->trans_mutex);
  496. goto out;
  497. }
  498. now = get_seconds();
  499. if (now < cur->start_time || now - cur->start_time < 30) {
  500. mutex_unlock(&root->fs_info->trans_mutex);
  501. delay = HZ * 5;
  502. goto out;
  503. }
  504. mutex_unlock(&root->fs_info->trans_mutex);
  505. btrfs_defrag_dirty_roots(root->fs_info);
  506. trans = btrfs_start_transaction(root, 1);
  507. ret = btrfs_commit_transaction(trans, root);
  508. out:
  509. mutex_unlock(&root->fs_info->fs_mutex);
  510. mutex_lock(&root->fs_info->trans_mutex);
  511. list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
  512. mutex_unlock(&root->fs_info->trans_mutex);
  513. if (!list_empty(&dirty_roots)) {
  514. drop_dirty_roots(root, &dirty_roots);
  515. }
  516. btrfs_transaction_queue_work(root, delay);
  517. }
  518. void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
  519. {
  520. queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
  521. }
  522. void btrfs_transaction_flush_work(struct btrfs_root *root)
  523. {
  524. cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
  525. flush_workqueue(trans_wq);
  526. }
  527. void __init btrfs_init_transaction_sys(void)
  528. {
  529. trans_wq = create_workqueue("btrfs");
  530. }
  531. void __exit btrfs_exit_transaction_sys(void)
  532. {
  533. destroy_workqueue(trans_wq);
  534. }