transaction.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/fs.h>
  19. #include <linux/sched.h>
  20. #include "ctree.h"
  21. #include "disk-io.h"
  22. #include "transaction.h"
  23. static int total_trans = 0;
  24. extern struct kmem_cache *btrfs_trans_handle_cachep;
  25. extern struct kmem_cache *btrfs_transaction_cachep;
  26. static struct workqueue_struct *trans_wq;
  27. #define BTRFS_ROOT_TRANS_TAG 0
  28. #define BTRFS_ROOT_DEFRAG_TAG 1
  29. static void put_transaction(struct btrfs_transaction *transaction)
  30. {
  31. WARN_ON(transaction->use_count == 0);
  32. transaction->use_count--;
  33. if (transaction->use_count == 0) {
  34. WARN_ON(total_trans == 0);
  35. total_trans--;
  36. list_del_init(&transaction->list);
  37. memset(transaction, 0, sizeof(*transaction));
  38. kmem_cache_free(btrfs_transaction_cachep, transaction);
  39. }
  40. }
  41. static int join_transaction(struct btrfs_root *root)
  42. {
  43. struct btrfs_transaction *cur_trans;
  44. cur_trans = root->fs_info->running_transaction;
  45. if (!cur_trans) {
  46. cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
  47. GFP_NOFS);
  48. total_trans++;
  49. BUG_ON(!cur_trans);
  50. root->fs_info->generation++;
  51. root->fs_info->running_transaction = cur_trans;
  52. cur_trans->num_writers = 0;
  53. cur_trans->transid = root->fs_info->generation;
  54. init_waitqueue_head(&cur_trans->writer_wait);
  55. init_waitqueue_head(&cur_trans->commit_wait);
  56. cur_trans->in_commit = 0;
  57. cur_trans->use_count = 1;
  58. cur_trans->commit_done = 0;
  59. cur_trans->start_time = get_seconds();
  60. list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
  61. init_bit_radix(&cur_trans->dirty_pages);
  62. }
  63. cur_trans->num_writers++;
  64. return 0;
  65. }
  66. static int record_root_in_trans(struct btrfs_root *root)
  67. {
  68. u64 running_trans_id = root->fs_info->running_transaction->transid;
  69. if (root->ref_cows && root->last_trans < running_trans_id) {
  70. WARN_ON(root == root->fs_info->extent_root);
  71. if (root->root_item.refs != 0) {
  72. radix_tree_tag_set(&root->fs_info->fs_roots_radix,
  73. (unsigned long)root->root_key.objectid,
  74. BTRFS_ROOT_TRANS_TAG);
  75. radix_tree_tag_set(&root->fs_info->fs_roots_radix,
  76. (unsigned long)root->root_key.objectid,
  77. BTRFS_ROOT_DEFRAG_TAG);
  78. root->commit_root = root->node;
  79. get_bh(root->node);
  80. } else {
  81. WARN_ON(1);
  82. }
  83. root->last_trans = running_trans_id;
  84. }
  85. return 0;
  86. }
  87. struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
  88. int num_blocks)
  89. {
  90. struct btrfs_trans_handle *h =
  91. kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
  92. int ret;
  93. mutex_lock(&root->fs_info->trans_mutex);
  94. ret = join_transaction(root);
  95. BUG_ON(ret);
  96. record_root_in_trans(root);
  97. h->transid = root->fs_info->running_transaction->transid;
  98. h->transaction = root->fs_info->running_transaction;
  99. h->blocks_reserved = num_blocks;
  100. h->blocks_used = 0;
  101. h->block_group = NULL;
  102. root->fs_info->running_transaction->use_count++;
  103. mutex_unlock(&root->fs_info->trans_mutex);
  104. return h;
  105. }
  106. int btrfs_end_transaction(struct btrfs_trans_handle *trans,
  107. struct btrfs_root *root)
  108. {
  109. struct btrfs_transaction *cur_trans;
  110. mutex_lock(&root->fs_info->trans_mutex);
  111. cur_trans = root->fs_info->running_transaction;
  112. WARN_ON(cur_trans != trans->transaction);
  113. WARN_ON(cur_trans->num_writers < 1);
  114. cur_trans->num_writers--;
  115. if (waitqueue_active(&cur_trans->writer_wait))
  116. wake_up(&cur_trans->writer_wait);
  117. put_transaction(cur_trans);
  118. mutex_unlock(&root->fs_info->trans_mutex);
  119. memset(trans, 0, sizeof(*trans));
  120. kmem_cache_free(btrfs_trans_handle_cachep, trans);
  121. return 0;
  122. }
  123. int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
  124. struct btrfs_root *root)
  125. {
  126. unsigned long gang[16];
  127. int ret;
  128. int i;
  129. int err;
  130. int werr = 0;
  131. struct page *page;
  132. struct radix_tree_root *dirty_pages;
  133. struct inode *btree_inode = root->fs_info->btree_inode;
  134. if (!trans || !trans->transaction) {
  135. return filemap_write_and_wait(btree_inode->i_mapping);
  136. }
  137. dirty_pages = &trans->transaction->dirty_pages;
  138. while(1) {
  139. ret = find_first_radix_bit(dirty_pages, gang,
  140. 0, ARRAY_SIZE(gang));
  141. if (!ret)
  142. break;
  143. for (i = 0; i < ret; i++) {
  144. /* FIXME EIO */
  145. clear_radix_bit(dirty_pages, gang[i]);
  146. page = find_lock_page(btree_inode->i_mapping,
  147. gang[i]);
  148. if (!page)
  149. continue;
  150. if (PageWriteback(page)) {
  151. if (PageDirty(page))
  152. wait_on_page_writeback(page);
  153. else {
  154. unlock_page(page);
  155. page_cache_release(page);
  156. continue;
  157. }
  158. }
  159. err = write_one_page(page, 0);
  160. if (err)
  161. werr = err;
  162. page_cache_release(page);
  163. }
  164. }
  165. err = filemap_fdatawait(btree_inode->i_mapping);
  166. if (err)
  167. werr = err;
  168. return werr;
  169. }
  170. int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
  171. struct btrfs_root *root)
  172. {
  173. int ret;
  174. u64 old_extent_block;
  175. struct btrfs_fs_info *fs_info = root->fs_info;
  176. struct btrfs_root *tree_root = fs_info->tree_root;
  177. struct btrfs_root *extent_root = fs_info->extent_root;
  178. btrfs_write_dirty_block_groups(trans, extent_root);
  179. while(1) {
  180. old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
  181. if (old_extent_block == bh_blocknr(extent_root->node))
  182. break;
  183. btrfs_set_root_blocknr(&extent_root->root_item,
  184. bh_blocknr(extent_root->node));
  185. ret = btrfs_update_root(trans, tree_root,
  186. &extent_root->root_key,
  187. &extent_root->root_item);
  188. BUG_ON(ret);
  189. btrfs_write_dirty_block_groups(trans, extent_root);
  190. }
  191. return 0;
  192. }
  193. static int wait_for_commit(struct btrfs_root *root,
  194. struct btrfs_transaction *commit)
  195. {
  196. DEFINE_WAIT(wait);
  197. mutex_lock(&root->fs_info->trans_mutex);
  198. while(!commit->commit_done) {
  199. prepare_to_wait(&commit->commit_wait, &wait,
  200. TASK_UNINTERRUPTIBLE);
  201. if (commit->commit_done)
  202. break;
  203. mutex_unlock(&root->fs_info->trans_mutex);
  204. schedule();
  205. mutex_lock(&root->fs_info->trans_mutex);
  206. }
  207. mutex_unlock(&root->fs_info->trans_mutex);
  208. finish_wait(&commit->commit_wait, &wait);
  209. return 0;
  210. }
  211. struct dirty_root {
  212. struct list_head list;
  213. struct btrfs_root *root;
  214. };
  215. int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
  216. {
  217. struct dirty_root *dirty;
  218. dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
  219. if (!dirty)
  220. return -ENOMEM;
  221. dirty->root = root;
  222. list_add(&dirty->list, dead_list);
  223. return 0;
  224. }
  225. static int add_dirty_roots(struct btrfs_trans_handle *trans,
  226. struct radix_tree_root *radix,
  227. struct list_head *list)
  228. {
  229. struct dirty_root *dirty;
  230. struct btrfs_root *gang[8];
  231. struct btrfs_root *root;
  232. int i;
  233. int ret;
  234. int err = 0;
  235. u32 refs;
  236. while(1) {
  237. ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
  238. ARRAY_SIZE(gang),
  239. BTRFS_ROOT_TRANS_TAG);
  240. if (ret == 0)
  241. break;
  242. for (i = 0; i < ret; i++) {
  243. root = gang[i];
  244. radix_tree_tag_clear(radix,
  245. (unsigned long)root->root_key.objectid,
  246. BTRFS_ROOT_TRANS_TAG);
  247. if (root->commit_root == root->node) {
  248. WARN_ON(bh_blocknr(root->node) !=
  249. btrfs_root_blocknr(&root->root_item));
  250. brelse(root->commit_root);
  251. root->commit_root = NULL;
  252. continue;
  253. }
  254. dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
  255. BUG_ON(!dirty);
  256. dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS);
  257. BUG_ON(!dirty->root);
  258. memset(&root->root_item.drop_progress, 0,
  259. sizeof(struct btrfs_disk_key));
  260. root->root_item.drop_level = 0;
  261. memcpy(dirty->root, root, sizeof(*root));
  262. dirty->root->node = root->commit_root;
  263. root->commit_root = NULL;
  264. root->root_key.offset = root->fs_info->generation;
  265. btrfs_set_root_blocknr(&root->root_item,
  266. bh_blocknr(root->node));
  267. err = btrfs_insert_root(trans, root->fs_info->tree_root,
  268. &root->root_key,
  269. &root->root_item);
  270. if (err)
  271. break;
  272. refs = btrfs_root_refs(&dirty->root->root_item);
  273. btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
  274. err = btrfs_update_root(trans, root->fs_info->tree_root,
  275. &dirty->root->root_key,
  276. &dirty->root->root_item);
  277. BUG_ON(err);
  278. if (refs == 1) {
  279. list_add(&dirty->list, list);
  280. } else {
  281. WARN_ON(1);
  282. kfree(dirty->root);
  283. kfree(dirty);
  284. }
  285. }
  286. }
  287. return err;
  288. }
  289. int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
  290. {
  291. struct btrfs_root *gang[1];
  292. struct btrfs_root *root;
  293. struct btrfs_root *tree_root = info->tree_root;
  294. struct btrfs_trans_handle *trans;
  295. int i;
  296. int ret;
  297. int err = 0;
  298. u64 last = 0;
  299. trans = btrfs_start_transaction(tree_root, 1);
  300. while(1) {
  301. ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
  302. (void **)gang, last,
  303. ARRAY_SIZE(gang),
  304. BTRFS_ROOT_DEFRAG_TAG);
  305. if (ret == 0)
  306. break;
  307. for (i = 0; i < ret; i++) {
  308. root = gang[i];
  309. last = root->root_key.objectid + 1;
  310. radix_tree_tag_clear(&info->fs_roots_radix,
  311. (unsigned long)root->root_key.objectid,
  312. BTRFS_ROOT_DEFRAG_TAG);
  313. if (root->defrag_running)
  314. continue;
  315. while (1) {
  316. mutex_lock(&root->fs_info->trans_mutex);
  317. record_root_in_trans(root);
  318. mutex_unlock(&root->fs_info->trans_mutex);
  319. root->defrag_running = 1;
  320. err = btrfs_defrag_leaves(trans, root, 1);
  321. btrfs_end_transaction(trans, tree_root);
  322. mutex_unlock(&info->fs_mutex);
  323. btrfs_btree_balance_dirty(root);
  324. cond_resched();
  325. mutex_lock(&info->fs_mutex);
  326. trans = btrfs_start_transaction(tree_root, 1);
  327. if (err != -EAGAIN)
  328. break;
  329. }
  330. root->defrag_running = 0;
  331. }
  332. }
  333. btrfs_end_transaction(trans, tree_root);
  334. return err;
  335. }
  336. static int drop_dirty_roots(struct btrfs_root *tree_root,
  337. struct list_head *list)
  338. {
  339. struct dirty_root *dirty;
  340. struct btrfs_trans_handle *trans;
  341. int ret = 0;
  342. int err;
  343. while(!list_empty(list)) {
  344. mutex_lock(&tree_root->fs_info->fs_mutex);
  345. dirty = list_entry(list->next, struct dirty_root, list);
  346. list_del_init(&dirty->list);
  347. while(1) {
  348. trans = btrfs_start_transaction(tree_root, 1);
  349. ret = btrfs_drop_snapshot(trans, dirty->root);
  350. if (ret != -EAGAIN) {
  351. break;
  352. }
  353. err = btrfs_update_root(trans,
  354. tree_root,
  355. &dirty->root->root_key,
  356. &dirty->root->root_item);
  357. if (err)
  358. ret = err;
  359. ret = btrfs_end_transaction(trans, tree_root);
  360. BUG_ON(ret);
  361. mutex_unlock(&tree_root->fs_info->fs_mutex);
  362. btrfs_btree_balance_dirty(tree_root);
  363. schedule();
  364. mutex_lock(&tree_root->fs_info->fs_mutex);
  365. }
  366. BUG_ON(ret);
  367. ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key);
  368. if (ret)
  369. break;
  370. ret = btrfs_end_transaction(trans, tree_root);
  371. BUG_ON(ret);
  372. kfree(dirty->root);
  373. kfree(dirty);
  374. mutex_unlock(&tree_root->fs_info->fs_mutex);
  375. btrfs_btree_balance_dirty(tree_root);
  376. schedule();
  377. }
  378. return ret;
  379. }
  380. int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  381. struct btrfs_root *root)
  382. {
  383. int ret = 0;
  384. struct btrfs_transaction *cur_trans;
  385. struct btrfs_transaction *prev_trans = NULL;
  386. struct list_head dirty_fs_roots;
  387. struct radix_tree_root pinned_copy;
  388. DEFINE_WAIT(wait);
  389. init_bit_radix(&pinned_copy);
  390. INIT_LIST_HEAD(&dirty_fs_roots);
  391. mutex_lock(&root->fs_info->trans_mutex);
  392. if (trans->transaction->in_commit) {
  393. cur_trans = trans->transaction;
  394. trans->transaction->use_count++;
  395. mutex_unlock(&root->fs_info->trans_mutex);
  396. btrfs_end_transaction(trans, root);
  397. mutex_unlock(&root->fs_info->fs_mutex);
  398. ret = wait_for_commit(root, cur_trans);
  399. BUG_ON(ret);
  400. put_transaction(cur_trans);
  401. mutex_lock(&root->fs_info->fs_mutex);
  402. return 0;
  403. }
  404. trans->transaction->in_commit = 1;
  405. cur_trans = trans->transaction;
  406. if (cur_trans->list.prev != &root->fs_info->trans_list) {
  407. prev_trans = list_entry(cur_trans->list.prev,
  408. struct btrfs_transaction, list);
  409. if (!prev_trans->commit_done) {
  410. prev_trans->use_count++;
  411. mutex_unlock(&root->fs_info->fs_mutex);
  412. mutex_unlock(&root->fs_info->trans_mutex);
  413. wait_for_commit(root, prev_trans);
  414. put_transaction(prev_trans);
  415. mutex_lock(&root->fs_info->fs_mutex);
  416. mutex_lock(&root->fs_info->trans_mutex);
  417. }
  418. }
  419. while (trans->transaction->num_writers > 1) {
  420. WARN_ON(cur_trans != trans->transaction);
  421. prepare_to_wait(&trans->transaction->writer_wait, &wait,
  422. TASK_UNINTERRUPTIBLE);
  423. if (trans->transaction->num_writers <= 1)
  424. break;
  425. mutex_unlock(&root->fs_info->fs_mutex);
  426. mutex_unlock(&root->fs_info->trans_mutex);
  427. schedule();
  428. mutex_lock(&root->fs_info->fs_mutex);
  429. mutex_lock(&root->fs_info->trans_mutex);
  430. finish_wait(&trans->transaction->writer_wait, &wait);
  431. }
  432. finish_wait(&trans->transaction->writer_wait, &wait);
  433. WARN_ON(cur_trans != trans->transaction);
  434. ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
  435. &dirty_fs_roots);
  436. BUG_ON(ret);
  437. ret = btrfs_commit_tree_roots(trans, root);
  438. BUG_ON(ret);
  439. cur_trans = root->fs_info->running_transaction;
  440. root->fs_info->running_transaction = NULL;
  441. btrfs_set_super_generation(&root->fs_info->super_copy,
  442. cur_trans->transid);
  443. btrfs_set_super_root(&root->fs_info->super_copy,
  444. bh_blocknr(root->fs_info->tree_root->node));
  445. memcpy(root->fs_info->disk_super, &root->fs_info->super_copy,
  446. sizeof(root->fs_info->super_copy));
  447. btrfs_copy_pinned(root, &pinned_copy);
  448. mutex_unlock(&root->fs_info->trans_mutex);
  449. mutex_unlock(&root->fs_info->fs_mutex);
  450. ret = btrfs_write_and_wait_transaction(trans, root);
  451. BUG_ON(ret);
  452. write_ctree_super(trans, root);
  453. mutex_lock(&root->fs_info->fs_mutex);
  454. btrfs_finish_extent_commit(trans, root, &pinned_copy);
  455. mutex_lock(&root->fs_info->trans_mutex);
  456. cur_trans->commit_done = 1;
  457. wake_up(&cur_trans->commit_wait);
  458. put_transaction(cur_trans);
  459. put_transaction(cur_trans);
  460. if (root->fs_info->closing)
  461. list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
  462. else
  463. list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
  464. mutex_unlock(&root->fs_info->trans_mutex);
  465. kmem_cache_free(btrfs_trans_handle_cachep, trans);
  466. if (root->fs_info->closing) {
  467. mutex_unlock(&root->fs_info->fs_mutex);
  468. drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
  469. mutex_lock(&root->fs_info->fs_mutex);
  470. }
  471. return ret;
  472. }
  473. void btrfs_transaction_cleaner(struct work_struct *work)
  474. {
  475. struct btrfs_fs_info *fs_info = container_of(work,
  476. struct btrfs_fs_info,
  477. trans_work.work);
  478. struct btrfs_root *root = fs_info->tree_root;
  479. struct btrfs_transaction *cur;
  480. struct btrfs_trans_handle *trans;
  481. struct list_head dirty_roots;
  482. unsigned long now;
  483. unsigned long delay = HZ * 30;
  484. int ret;
  485. INIT_LIST_HEAD(&dirty_roots);
  486. mutex_lock(&root->fs_info->fs_mutex);
  487. mutex_lock(&root->fs_info->trans_mutex);
  488. cur = root->fs_info->running_transaction;
  489. if (!cur) {
  490. mutex_unlock(&root->fs_info->trans_mutex);
  491. goto out;
  492. }
  493. now = get_seconds();
  494. if (now < cur->start_time || now - cur->start_time < 30) {
  495. mutex_unlock(&root->fs_info->trans_mutex);
  496. delay = HZ * 5;
  497. goto out;
  498. }
  499. mutex_unlock(&root->fs_info->trans_mutex);
  500. btrfs_defrag_dirty_roots(root->fs_info);
  501. trans = btrfs_start_transaction(root, 1);
  502. ret = btrfs_commit_transaction(trans, root);
  503. out:
  504. mutex_unlock(&root->fs_info->fs_mutex);
  505. mutex_lock(&root->fs_info->trans_mutex);
  506. list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
  507. mutex_unlock(&root->fs_info->trans_mutex);
  508. if (!list_empty(&dirty_roots)) {
  509. drop_dirty_roots(root, &dirty_roots);
  510. }
  511. btrfs_transaction_queue_work(root, delay);
  512. }
  513. void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
  514. {
  515. queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
  516. }
  517. void btrfs_transaction_flush_work(struct btrfs_root *root)
  518. {
  519. cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
  520. flush_workqueue(trans_wq);
  521. }
  522. void __init btrfs_init_transaction_sys(void)
  523. {
  524. trans_wq = create_workqueue("btrfs");
  525. }
  526. void __exit btrfs_exit_transaction_sys(void)
  527. {
  528. destroy_workqueue(trans_wq);
  529. }