disk-io.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. #include <linux/module.h>
  2. #include <linux/fs.h>
  3. #include <linux/blkdev.h>
  4. #include <linux/crypto.h>
  5. #include <linux/scatterlist.h>
  6. #include <linux/swap.h>
  7. #include <linux/radix-tree.h>
  8. #include "ctree.h"
  9. #include "disk-io.h"
  10. #include "transaction.h"
  11. #include "btrfs_inode.h"
  12. static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
  13. {
  14. struct btrfs_node *node = btrfs_buffer_node(buf);
  15. if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
  16. BUG();
  17. }
  18. return 0;
  19. }
  20. struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
  21. {
  22. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  23. int blockbits = root->fs_info->sb->s_blocksize_bits;
  24. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  25. struct page *page;
  26. struct buffer_head *bh;
  27. struct buffer_head *head;
  28. struct buffer_head *ret = NULL;
  29. page = find_lock_page(mapping, index);
  30. if (!page)
  31. return NULL;
  32. if (!page_has_buffers(page))
  33. goto out_unlock;
  34. head = page_buffers(page);
  35. bh = head;
  36. do {
  37. if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
  38. ret = bh;
  39. get_bh(bh);
  40. goto out_unlock;
  41. }
  42. bh = bh->b_this_page;
  43. } while (bh != head);
  44. out_unlock:
  45. unlock_page(page);
  46. if (ret) {
  47. touch_buffer(ret);
  48. }
  49. page_cache_release(page);
  50. return ret;
  51. }
  52. struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
  53. u64 blocknr)
  54. {
  55. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  56. int blockbits = root->fs_info->sb->s_blocksize_bits;
  57. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  58. struct page *page;
  59. struct buffer_head *bh;
  60. struct buffer_head *head;
  61. struct buffer_head *ret = NULL;
  62. u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
  63. page = grab_cache_page(mapping, index);
  64. if (!page)
  65. return NULL;
  66. if (!page_has_buffers(page))
  67. create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
  68. head = page_buffers(page);
  69. bh = head;
  70. do {
  71. if (!buffer_mapped(bh)) {
  72. bh->b_bdev = root->fs_info->sb->s_bdev;
  73. bh->b_blocknr = first_block;
  74. set_buffer_mapped(bh);
  75. }
  76. if (bh->b_blocknr == blocknr) {
  77. ret = bh;
  78. get_bh(bh);
  79. goto out_unlock;
  80. }
  81. bh = bh->b_this_page;
  82. first_block++;
  83. } while (bh != head);
  84. out_unlock:
  85. unlock_page(page);
  86. if (ret)
  87. touch_buffer(ret);
  88. page_cache_release(page);
  89. return ret;
  90. }
  91. static sector_t max_block(struct block_device *bdev)
  92. {
  93. sector_t retval = ~((sector_t)0);
  94. loff_t sz = i_size_read(bdev->bd_inode);
  95. if (sz) {
  96. unsigned int size = block_size(bdev);
  97. unsigned int sizebits = blksize_bits(size);
  98. retval = (sz >> sizebits);
  99. }
  100. return retval;
  101. }
  102. static int btree_get_block(struct inode *inode, sector_t iblock,
  103. struct buffer_head *bh, int create)
  104. {
  105. if (iblock >= max_block(inode->i_sb->s_bdev)) {
  106. if (create)
  107. return -EIO;
  108. /*
  109. * for reads, we're just trying to fill a partial page.
  110. * return a hole, they will have to call get_block again
  111. * before they can fill it, and they will get -EIO at that
  112. * time
  113. */
  114. return 0;
  115. }
  116. bh->b_bdev = inode->i_sb->s_bdev;
  117. bh->b_blocknr = iblock;
  118. set_buffer_mapped(bh);
  119. return 0;
  120. }
  121. int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
  122. char *result)
  123. {
  124. struct scatterlist sg;
  125. struct crypto_hash *tfm = root->fs_info->hash_tfm;
  126. struct hash_desc desc;
  127. int ret;
  128. desc.tfm = tfm;
  129. desc.flags = 0;
  130. sg_init_one(&sg, data, len);
  131. spin_lock(&root->fs_info->hash_lock);
  132. ret = crypto_hash_digest(&desc, &sg, 1, result);
  133. spin_unlock(&root->fs_info->hash_lock);
  134. if (ret) {
  135. printk("sha256 digest failed\n");
  136. }
  137. return ret;
  138. }
  139. static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
  140. int verify)
  141. {
  142. char result[BTRFS_CSUM_SIZE];
  143. int ret;
  144. struct btrfs_node *node;
  145. ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
  146. bh->b_size - BTRFS_CSUM_SIZE, result);
  147. if (ret)
  148. return ret;
  149. if (verify) {
  150. if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
  151. printk("checksum verify failed on %lu\n",
  152. bh->b_blocknr);
  153. return 1;
  154. }
  155. } else {
  156. node = btrfs_buffer_node(bh);
  157. memcpy(node->header.csum, result, BTRFS_CSUM_SIZE);
  158. }
  159. return 0;
  160. }
  161. static int btree_writepage(struct page *page, struct writeback_control *wbc)
  162. {
  163. struct buffer_head *bh;
  164. struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
  165. struct buffer_head *head;
  166. if (!page_has_buffers(page)) {
  167. create_empty_buffers(page, root->fs_info->sb->s_blocksize,
  168. (1 << BH_Dirty)|(1 << BH_Uptodate));
  169. }
  170. head = page_buffers(page);
  171. bh = head;
  172. do {
  173. if (buffer_dirty(bh))
  174. csum_tree_block(root, bh, 0);
  175. bh = bh->b_this_page;
  176. } while (bh != head);
  177. return block_write_full_page(page, btree_get_block, wbc);
  178. }
  179. static int btree_readpage(struct file * file, struct page * page)
  180. {
  181. return block_read_full_page(page, btree_get_block);
  182. }
  183. static struct address_space_operations btree_aops = {
  184. .readpage = btree_readpage,
  185. .writepage = btree_writepage,
  186. .sync_page = block_sync_page,
  187. };
  188. struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
  189. {
  190. struct buffer_head *bh = NULL;
  191. bh = btrfs_find_create_tree_block(root, blocknr);
  192. if (!bh)
  193. return bh;
  194. if (buffer_uptodate(bh))
  195. goto uptodate;
  196. lock_buffer(bh);
  197. if (!buffer_uptodate(bh)) {
  198. get_bh(bh);
  199. bh->b_end_io = end_buffer_read_sync;
  200. submit_bh(READ, bh);
  201. wait_on_buffer(bh);
  202. if (!buffer_uptodate(bh))
  203. goto fail;
  204. csum_tree_block(root, bh, 1);
  205. } else {
  206. unlock_buffer(bh);
  207. }
  208. uptodate:
  209. if (check_tree_block(root, bh))
  210. BUG();
  211. return bh;
  212. fail:
  213. brelse(bh);
  214. return NULL;
  215. }
  216. int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  217. struct buffer_head *buf)
  218. {
  219. WARN_ON(atomic_read(&buf->b_count) == 0);
  220. mark_buffer_dirty(buf);
  221. return 0;
  222. }
  223. int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  224. struct buffer_head *buf)
  225. {
  226. WARN_ON(atomic_read(&buf->b_count) == 0);
  227. clear_buffer_dirty(buf);
  228. return 0;
  229. }
  230. static int __setup_root(int blocksize,
  231. struct btrfs_root *root,
  232. struct btrfs_fs_info *fs_info,
  233. u64 objectid)
  234. {
  235. root->node = NULL;
  236. root->inode = NULL;
  237. root->commit_root = NULL;
  238. root->blocksize = blocksize;
  239. root->ref_cows = 0;
  240. root->fs_info = fs_info;
  241. root->objectid = objectid;
  242. root->last_trans = 0;
  243. memset(&root->root_key, 0, sizeof(root->root_key));
  244. memset(&root->root_item, 0, sizeof(root->root_item));
  245. return 0;
  246. }
  247. static int find_and_setup_root(int blocksize,
  248. struct btrfs_root *tree_root,
  249. struct btrfs_fs_info *fs_info,
  250. u64 objectid,
  251. struct btrfs_root *root)
  252. {
  253. int ret;
  254. __setup_root(blocksize, root, fs_info, objectid);
  255. ret = btrfs_find_last_root(tree_root, objectid,
  256. &root->root_item, &root->root_key);
  257. BUG_ON(ret);
  258. root->node = read_tree_block(root,
  259. btrfs_root_blocknr(&root->root_item));
  260. BUG_ON(!root->node);
  261. return 0;
  262. }
  263. struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
  264. struct btrfs_key *location)
  265. {
  266. struct btrfs_root *root;
  267. struct btrfs_root *tree_root = fs_info->tree_root;
  268. struct btrfs_path *path;
  269. struct btrfs_leaf *l;
  270. int ret = 0;
  271. printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags);
  272. root = kmalloc(sizeof(*root), GFP_NOFS);
  273. if (!root) {
  274. printk("failed1\n");
  275. return ERR_PTR(-ENOMEM);
  276. }
  277. if (location->offset == (u64)-1) {
  278. ret = find_and_setup_root(fs_info->sb->s_blocksize,
  279. fs_info->tree_root, fs_info,
  280. location->objectid, root);
  281. if (ret) {
  282. printk("failed2\n");
  283. kfree(root);
  284. return ERR_PTR(ret);
  285. }
  286. goto insert;
  287. }
  288. __setup_root(fs_info->sb->s_blocksize, root, fs_info,
  289. location->objectid);
  290. path = btrfs_alloc_path();
  291. BUG_ON(!path);
  292. ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
  293. if (ret != 0) {
  294. printk("internal search_slot gives us %d\n", ret);
  295. if (ret > 0)
  296. ret = -ENOENT;
  297. goto out;
  298. }
  299. l = btrfs_buffer_leaf(path->nodes[0]);
  300. memcpy(&root->root_item,
  301. btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item),
  302. sizeof(root->root_item));
  303. memcpy(&root->root_key, location, sizeof(*location));
  304. ret = 0;
  305. out:
  306. btrfs_release_path(root, path);
  307. btrfs_free_path(path);
  308. if (ret) {
  309. kfree(root);
  310. return ERR_PTR(ret);
  311. }
  312. root->node = read_tree_block(root,
  313. btrfs_root_blocknr(&root->root_item));
  314. BUG_ON(!root->node);
  315. insert:
  316. printk("inserting %p\n", root);
  317. root->ref_cows = 1;
  318. ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root,
  319. root);
  320. if (ret) {
  321. printk("radix_tree_insert gives us %d\n", ret);
  322. brelse(root->node);
  323. kfree(root);
  324. return ERR_PTR(ret);
  325. }
  326. printk("all worked\n");
  327. return root;
  328. }
  329. struct btrfs_root *open_ctree(struct super_block *sb)
  330. {
  331. struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
  332. GFP_NOFS);
  333. struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
  334. GFP_NOFS);
  335. struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root),
  336. GFP_NOFS);
  337. struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
  338. GFP_NOFS);
  339. int ret;
  340. struct btrfs_super_block *disk_super;
  341. init_bit_radix(&fs_info->pinned_radix);
  342. init_bit_radix(&fs_info->pending_del_radix);
  343. INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
  344. sb_set_blocksize(sb, 4096);
  345. fs_info->running_transaction = NULL;
  346. fs_info->tree_root = tree_root;
  347. fs_info->extent_root = extent_root;
  348. fs_info->inode_root = inode_root;
  349. fs_info->last_inode_alloc = 0;
  350. fs_info->highest_inode = 0;
  351. fs_info->sb = sb;
  352. fs_info->btree_inode = new_inode(sb);
  353. fs_info->btree_inode->i_ino = 1;
  354. fs_info->btree_inode->i_nlink = 1;
  355. fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
  356. fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
  357. BTRFS_I(fs_info->btree_inode)->root = tree_root;
  358. memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
  359. sizeof(struct btrfs_key));
  360. insert_inode_hash(fs_info->btree_inode);
  361. mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  362. fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
  363. spin_lock_init(&fs_info->hash_lock);
  364. if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
  365. printk("failed to allocate sha256 hash\n");
  366. return NULL;
  367. }
  368. mutex_init(&fs_info->trans_mutex);
  369. mutex_init(&fs_info->fs_mutex);
  370. memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
  371. memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
  372. __setup_root(sb->s_blocksize, tree_root,
  373. fs_info, BTRFS_ROOT_TREE_OBJECTID);
  374. fs_info->sb_buffer = read_tree_block(tree_root,
  375. BTRFS_SUPER_INFO_OFFSET /
  376. sb->s_blocksize);
  377. if (!fs_info->sb_buffer)
  378. return NULL;
  379. disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
  380. if (!btrfs_super_root(disk_super))
  381. return NULL;
  382. fs_info->disk_super = disk_super;
  383. tree_root->node = read_tree_block(tree_root,
  384. btrfs_super_root(disk_super));
  385. BUG_ON(!tree_root->node);
  386. mutex_lock(&fs_info->fs_mutex);
  387. ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
  388. BTRFS_EXTENT_TREE_OBJECTID, extent_root);
  389. BUG_ON(ret);
  390. ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
  391. BTRFS_INODE_MAP_OBJECTID, inode_root);
  392. BUG_ON(ret);
  393. fs_info->generation = btrfs_super_generation(disk_super) + 1;
  394. ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc);
  395. if (ret == 0)
  396. fs_info->highest_inode = fs_info->last_inode_alloc;
  397. memset(&fs_info->kobj, 0, sizeof(fs_info->kobj));
  398. kobj_set_kset_s(fs_info, btrfs_subsys);
  399. kobject_set_name(&fs_info->kobj, "%s", sb->s_id);
  400. kobject_register(&fs_info->kobj);
  401. mutex_unlock(&fs_info->fs_mutex);
  402. return tree_root;
  403. }
  404. int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
  405. *root)
  406. {
  407. struct buffer_head *bh = root->fs_info->sb_buffer;
  408. btrfs_set_super_root(root->fs_info->disk_super,
  409. root->fs_info->tree_root->node->b_blocknr);
  410. lock_buffer(bh);
  411. WARN_ON(atomic_read(&bh->b_count) < 1);
  412. clear_buffer_dirty(bh);
  413. csum_tree_block(root, bh, 0);
  414. bh->b_end_io = end_buffer_write_sync;
  415. get_bh(bh);
  416. submit_bh(WRITE, bh);
  417. wait_on_buffer(bh);
  418. if (!buffer_uptodate(bh)) {
  419. WARN_ON(1);
  420. return -EIO;
  421. }
  422. return 0;
  423. }
  424. int del_fs_roots(struct btrfs_fs_info *fs_info)
  425. {
  426. int ret;
  427. struct btrfs_root *gang[8];
  428. int i;
  429. while(1) {
  430. ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
  431. (void **)gang, 0,
  432. ARRAY_SIZE(gang));
  433. if (!ret)
  434. break;
  435. for (i = 0; i < ret; i++) {
  436. radix_tree_delete(&fs_info->fs_roots_radix,
  437. (unsigned long)gang[i]);
  438. if (gang[i]->inode)
  439. iput(gang[i]->inode);
  440. else
  441. printk("no inode for root %p\n", gang[i]);
  442. if (gang[i]->node)
  443. brelse(gang[i]->node);
  444. if (gang[i]->commit_root)
  445. brelse(gang[i]->commit_root);
  446. kfree(gang[i]);
  447. }
  448. }
  449. return 0;
  450. }
  451. int close_ctree(struct btrfs_root *root)
  452. {
  453. int ret;
  454. struct btrfs_trans_handle *trans;
  455. struct btrfs_fs_info *fs_info = root->fs_info;
  456. mutex_lock(&fs_info->fs_mutex);
  457. trans = btrfs_start_transaction(root, 1);
  458. btrfs_commit_transaction(trans, root);
  459. /* run commit again to drop the original snapshot */
  460. trans = btrfs_start_transaction(root, 1);
  461. btrfs_commit_transaction(trans, root);
  462. ret = btrfs_write_and_wait_transaction(NULL, root);
  463. BUG_ON(ret);
  464. write_ctree_super(NULL, root);
  465. mutex_unlock(&fs_info->fs_mutex);
  466. if (fs_info->extent_root->node)
  467. btrfs_block_release(fs_info->extent_root,
  468. fs_info->extent_root->node);
  469. if (fs_info->inode_root->node)
  470. btrfs_block_release(fs_info->inode_root,
  471. fs_info->inode_root->node);
  472. if (fs_info->tree_root->node)
  473. btrfs_block_release(fs_info->tree_root,
  474. fs_info->tree_root->node);
  475. btrfs_block_release(root, fs_info->sb_buffer);
  476. crypto_free_hash(fs_info->hash_tfm);
  477. truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
  478. iput(fs_info->btree_inode);
  479. del_fs_roots(fs_info);
  480. kfree(fs_info->extent_root);
  481. kfree(fs_info->inode_root);
  482. kfree(fs_info->tree_root);
  483. kobject_unregister(&fs_info->kobj);
  484. return 0;
  485. }
  486. void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
  487. {
  488. brelse(buf);
  489. }