disk-io.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. #include <linux/module.h>
  2. #include <linux/fs.h>
  3. #include <linux/blkdev.h>
  4. #include <linux/crypto.h>
  5. #include <linux/scatterlist.h>
  6. #include <linux/swap.h>
  7. #include <linux/radix-tree.h>
  8. #include "ctree.h"
  9. #include "disk-io.h"
  10. #include "transaction.h"
  11. #include "btrfs_inode.h"
  12. static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
  13. {
  14. struct btrfs_node *node = btrfs_buffer_node(buf);
  15. if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
  16. BUG();
  17. }
  18. return 0;
  19. }
  20. struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
  21. {
  22. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  23. int blockbits = root->fs_info->sb->s_blocksize_bits;
  24. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  25. struct page *page;
  26. struct buffer_head *bh;
  27. struct buffer_head *head;
  28. struct buffer_head *ret = NULL;
  29. page = find_lock_page(mapping, index);
  30. if (!page)
  31. return NULL;
  32. if (!page_has_buffers(page))
  33. goto out_unlock;
  34. head = page_buffers(page);
  35. bh = head;
  36. do {
  37. if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
  38. ret = bh;
  39. get_bh(bh);
  40. goto out_unlock;
  41. }
  42. bh = bh->b_this_page;
  43. } while (bh != head);
  44. out_unlock:
  45. unlock_page(page);
  46. if (ret) {
  47. touch_buffer(ret);
  48. }
  49. page_cache_release(page);
  50. return ret;
  51. }
  52. struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
  53. u64 blocknr)
  54. {
  55. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  56. int blockbits = root->fs_info->sb->s_blocksize_bits;
  57. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  58. struct page *page;
  59. struct buffer_head *bh;
  60. struct buffer_head *head;
  61. struct buffer_head *ret = NULL;
  62. u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
  63. page = grab_cache_page(mapping, index);
  64. if (!page)
  65. return NULL;
  66. if (!page_has_buffers(page))
  67. create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
  68. head = page_buffers(page);
  69. bh = head;
  70. do {
  71. if (!buffer_mapped(bh)) {
  72. bh->b_bdev = root->fs_info->sb->s_bdev;
  73. bh->b_blocknr = first_block;
  74. set_buffer_mapped(bh);
  75. }
  76. if (bh->b_blocknr == blocknr) {
  77. ret = bh;
  78. get_bh(bh);
  79. goto out_unlock;
  80. }
  81. bh = bh->b_this_page;
  82. first_block++;
  83. } while (bh != head);
  84. out_unlock:
  85. unlock_page(page);
  86. if (ret)
  87. touch_buffer(ret);
  88. page_cache_release(page);
  89. return ret;
  90. }
  91. static sector_t max_block(struct block_device *bdev)
  92. {
  93. sector_t retval = ~((sector_t)0);
  94. loff_t sz = i_size_read(bdev->bd_inode);
  95. if (sz) {
  96. unsigned int size = block_size(bdev);
  97. unsigned int sizebits = blksize_bits(size);
  98. retval = (sz >> sizebits);
  99. }
  100. return retval;
  101. }
  102. static int btree_get_block(struct inode *inode, sector_t iblock,
  103. struct buffer_head *bh, int create)
  104. {
  105. if (iblock >= max_block(inode->i_sb->s_bdev)) {
  106. if (create)
  107. return -EIO;
  108. /*
  109. * for reads, we're just trying to fill a partial page.
  110. * return a hole, they will have to call get_block again
  111. * before they can fill it, and they will get -EIO at that
  112. * time
  113. */
  114. return 0;
  115. }
  116. bh->b_bdev = inode->i_sb->s_bdev;
  117. bh->b_blocknr = iblock;
  118. set_buffer_mapped(bh);
  119. return 0;
  120. }
  121. int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
  122. char *result)
  123. {
  124. struct scatterlist sg;
  125. struct crypto_hash *tfm = root->fs_info->hash_tfm;
  126. struct hash_desc desc;
  127. int ret;
  128. desc.tfm = tfm;
  129. desc.flags = 0;
  130. sg_init_one(&sg, data, len);
  131. spin_lock(&root->fs_info->hash_lock);
  132. ret = crypto_hash_digest(&desc, &sg, 1, result);
  133. spin_unlock(&root->fs_info->hash_lock);
  134. if (ret) {
  135. printk("sha256 digest failed\n");
  136. }
  137. return ret;
  138. }
  139. static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
  140. int verify)
  141. {
  142. char result[BTRFS_CSUM_SIZE];
  143. int ret;
  144. struct btrfs_node *node;
  145. ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
  146. bh->b_size - BTRFS_CSUM_SIZE, result);
  147. if (ret)
  148. return ret;
  149. if (verify) {
  150. if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
  151. printk("checksum verify failed on %lu\n",
  152. bh->b_blocknr);
  153. return 1;
  154. }
  155. } else {
  156. node = btrfs_buffer_node(bh);
  157. memcpy(node->header.csum, result, BTRFS_CSUM_SIZE);
  158. }
  159. return 0;
  160. }
  161. static int btree_writepage(struct page *page, struct writeback_control *wbc)
  162. {
  163. struct buffer_head *bh;
  164. struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
  165. struct buffer_head *head;
  166. if (!page_has_buffers(page)) {
  167. create_empty_buffers(page, root->fs_info->sb->s_blocksize,
  168. (1 << BH_Dirty)|(1 << BH_Uptodate));
  169. }
  170. head = page_buffers(page);
  171. bh = head;
  172. do {
  173. if (buffer_dirty(bh))
  174. csum_tree_block(root, bh, 0);
  175. bh = bh->b_this_page;
  176. } while (bh != head);
  177. return block_write_full_page(page, btree_get_block, wbc);
  178. }
  179. static int btree_readpage(struct file * file, struct page * page)
  180. {
  181. return block_read_full_page(page, btree_get_block);
  182. }
  183. static struct address_space_operations btree_aops = {
  184. .readpage = btree_readpage,
  185. .writepage = btree_writepage,
  186. .sync_page = block_sync_page,
  187. };
  188. struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
  189. {
  190. struct buffer_head *bh = NULL;
  191. bh = btrfs_find_create_tree_block(root, blocknr);
  192. if (!bh)
  193. return bh;
  194. if (buffer_uptodate(bh))
  195. goto uptodate;
  196. lock_buffer(bh);
  197. if (!buffer_uptodate(bh)) {
  198. get_bh(bh);
  199. bh->b_end_io = end_buffer_read_sync;
  200. submit_bh(READ, bh);
  201. wait_on_buffer(bh);
  202. if (!buffer_uptodate(bh))
  203. goto fail;
  204. csum_tree_block(root, bh, 1);
  205. } else {
  206. unlock_buffer(bh);
  207. }
  208. uptodate:
  209. if (check_tree_block(root, bh))
  210. BUG();
  211. return bh;
  212. fail:
  213. brelse(bh);
  214. return NULL;
  215. }
  216. int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  217. struct buffer_head *buf)
  218. {
  219. WARN_ON(atomic_read(&buf->b_count) == 0);
  220. mark_buffer_dirty(buf);
  221. return 0;
  222. }
  223. int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  224. struct buffer_head *buf)
  225. {
  226. WARN_ON(atomic_read(&buf->b_count) == 0);
  227. clear_buffer_dirty(buf);
  228. return 0;
  229. }
  230. static int __setup_root(int blocksize,
  231. struct btrfs_root *root,
  232. struct btrfs_fs_info *fs_info,
  233. u64 objectid)
  234. {
  235. root->node = NULL;
  236. root->inode = NULL;
  237. root->commit_root = NULL;
  238. root->blocksize = blocksize;
  239. root->ref_cows = 0;
  240. root->fs_info = fs_info;
  241. root->objectid = objectid;
  242. root->last_trans = 0;
  243. root->highest_inode = 0;
  244. root->last_inode_alloc = 0;
  245. memset(&root->root_key, 0, sizeof(root->root_key));
  246. memset(&root->root_item, 0, sizeof(root->root_item));
  247. return 0;
  248. }
  249. static int find_and_setup_root(int blocksize,
  250. struct btrfs_root *tree_root,
  251. struct btrfs_fs_info *fs_info,
  252. u64 objectid,
  253. struct btrfs_root *root)
  254. {
  255. int ret;
  256. __setup_root(blocksize, root, fs_info, objectid);
  257. ret = btrfs_find_last_root(tree_root, objectid,
  258. &root->root_item, &root->root_key);
  259. BUG_ON(ret);
  260. root->node = read_tree_block(root,
  261. btrfs_root_blocknr(&root->root_item));
  262. BUG_ON(!root->node);
  263. return 0;
  264. }
  265. struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
  266. struct btrfs_key *location)
  267. {
  268. struct btrfs_root *root;
  269. struct btrfs_root *tree_root = fs_info->tree_root;
  270. struct btrfs_path *path;
  271. struct btrfs_leaf *l;
  272. u64 highest_inode;
  273. int ret = 0;
  274. printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags);
  275. root = kmalloc(sizeof(*root), GFP_NOFS);
  276. if (!root) {
  277. printk("failed1\n");
  278. return ERR_PTR(-ENOMEM);
  279. }
  280. if (location->offset == (u64)-1) {
  281. ret = find_and_setup_root(fs_info->sb->s_blocksize,
  282. fs_info->tree_root, fs_info,
  283. location->objectid, root);
  284. if (ret) {
  285. printk("failed2\n");
  286. kfree(root);
  287. return ERR_PTR(ret);
  288. }
  289. goto insert;
  290. }
  291. __setup_root(fs_info->sb->s_blocksize, root, fs_info,
  292. location->objectid);
  293. path = btrfs_alloc_path();
  294. BUG_ON(!path);
  295. ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
  296. if (ret != 0) {
  297. printk("internal search_slot gives us %d\n", ret);
  298. if (ret > 0)
  299. ret = -ENOENT;
  300. goto out;
  301. }
  302. l = btrfs_buffer_leaf(path->nodes[0]);
  303. memcpy(&root->root_item,
  304. btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item),
  305. sizeof(root->root_item));
  306. memcpy(&root->root_key, location, sizeof(*location));
  307. ret = 0;
  308. out:
  309. btrfs_release_path(root, path);
  310. btrfs_free_path(path);
  311. if (ret) {
  312. kfree(root);
  313. return ERR_PTR(ret);
  314. }
  315. root->node = read_tree_block(root,
  316. btrfs_root_blocknr(&root->root_item));
  317. BUG_ON(!root->node);
  318. insert:
  319. printk("inserting %p\n", root);
  320. root->ref_cows = 1;
  321. ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root,
  322. root);
  323. if (ret) {
  324. printk("radix_tree_insert gives us %d\n", ret);
  325. brelse(root->node);
  326. kfree(root);
  327. return ERR_PTR(ret);
  328. }
  329. ret = btrfs_find_highest_inode(root, &highest_inode);
  330. if (ret == 0) {
  331. root->highest_inode = highest_inode;
  332. root->last_inode_alloc = highest_inode;
  333. printk("highest inode is %Lu\n", highest_inode);
  334. }
  335. printk("all worked\n");
  336. return root;
  337. }
  338. struct btrfs_root *open_ctree(struct super_block *sb)
  339. {
  340. struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
  341. GFP_NOFS);
  342. struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
  343. GFP_NOFS);
  344. struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
  345. GFP_NOFS);
  346. int ret;
  347. struct btrfs_super_block *disk_super;
  348. init_bit_radix(&fs_info->pinned_radix);
  349. init_bit_radix(&fs_info->pending_del_radix);
  350. INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
  351. sb_set_blocksize(sb, 4096);
  352. fs_info->running_transaction = NULL;
  353. fs_info->tree_root = tree_root;
  354. fs_info->extent_root = extent_root;
  355. fs_info->sb = sb;
  356. fs_info->btree_inode = new_inode(sb);
  357. fs_info->btree_inode->i_ino = 1;
  358. fs_info->btree_inode->i_nlink = 1;
  359. fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
  360. fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
  361. BTRFS_I(fs_info->btree_inode)->root = tree_root;
  362. memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
  363. sizeof(struct btrfs_key));
  364. insert_inode_hash(fs_info->btree_inode);
  365. mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  366. fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
  367. spin_lock_init(&fs_info->hash_lock);
  368. if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
  369. printk("failed to allocate sha256 hash\n");
  370. return NULL;
  371. }
  372. mutex_init(&fs_info->trans_mutex);
  373. mutex_init(&fs_info->fs_mutex);
  374. memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
  375. memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
  376. __setup_root(sb->s_blocksize, tree_root,
  377. fs_info, BTRFS_ROOT_TREE_OBJECTID);
  378. fs_info->sb_buffer = read_tree_block(tree_root,
  379. BTRFS_SUPER_INFO_OFFSET /
  380. sb->s_blocksize);
  381. if (!fs_info->sb_buffer)
  382. return NULL;
  383. disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
  384. if (!btrfs_super_root(disk_super))
  385. return NULL;
  386. fs_info->disk_super = disk_super;
  387. tree_root->node = read_tree_block(tree_root,
  388. btrfs_super_root(disk_super));
  389. BUG_ON(!tree_root->node);
  390. mutex_lock(&fs_info->fs_mutex);
  391. ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
  392. BTRFS_EXTENT_TREE_OBJECTID, extent_root);
  393. BUG_ON(ret);
  394. fs_info->generation = btrfs_super_generation(disk_super) + 1;
  395. memset(&fs_info->kobj, 0, sizeof(fs_info->kobj));
  396. kobj_set_kset_s(fs_info, btrfs_subsys);
  397. kobject_set_name(&fs_info->kobj, "%s", sb->s_id);
  398. kobject_register(&fs_info->kobj);
  399. mutex_unlock(&fs_info->fs_mutex);
  400. return tree_root;
  401. }
  402. int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
  403. *root)
  404. {
  405. struct buffer_head *bh = root->fs_info->sb_buffer;
  406. btrfs_set_super_root(root->fs_info->disk_super,
  407. root->fs_info->tree_root->node->b_blocknr);
  408. lock_buffer(bh);
  409. WARN_ON(atomic_read(&bh->b_count) < 1);
  410. clear_buffer_dirty(bh);
  411. csum_tree_block(root, bh, 0);
  412. bh->b_end_io = end_buffer_write_sync;
  413. get_bh(bh);
  414. submit_bh(WRITE, bh);
  415. wait_on_buffer(bh);
  416. if (!buffer_uptodate(bh)) {
  417. WARN_ON(1);
  418. return -EIO;
  419. }
  420. return 0;
  421. }
  422. int del_fs_roots(struct btrfs_fs_info *fs_info)
  423. {
  424. int ret;
  425. struct btrfs_root *gang[8];
  426. int i;
  427. while(1) {
  428. ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
  429. (void **)gang, 0,
  430. ARRAY_SIZE(gang));
  431. if (!ret)
  432. break;
  433. for (i = 0; i < ret; i++) {
  434. radix_tree_delete(&fs_info->fs_roots_radix,
  435. (unsigned long)gang[i]);
  436. if (gang[i]->inode)
  437. iput(gang[i]->inode);
  438. else
  439. printk("no inode for root %p\n", gang[i]);
  440. if (gang[i]->node)
  441. brelse(gang[i]->node);
  442. if (gang[i]->commit_root)
  443. brelse(gang[i]->commit_root);
  444. kfree(gang[i]);
  445. }
  446. }
  447. return 0;
  448. }
  449. int close_ctree(struct btrfs_root *root)
  450. {
  451. int ret;
  452. struct btrfs_trans_handle *trans;
  453. struct btrfs_fs_info *fs_info = root->fs_info;
  454. mutex_lock(&fs_info->fs_mutex);
  455. trans = btrfs_start_transaction(root, 1);
  456. btrfs_commit_transaction(trans, root);
  457. /* run commit again to drop the original snapshot */
  458. trans = btrfs_start_transaction(root, 1);
  459. btrfs_commit_transaction(trans, root);
  460. ret = btrfs_write_and_wait_transaction(NULL, root);
  461. BUG_ON(ret);
  462. write_ctree_super(NULL, root);
  463. mutex_unlock(&fs_info->fs_mutex);
  464. if (fs_info->extent_root->node)
  465. btrfs_block_release(fs_info->extent_root,
  466. fs_info->extent_root->node);
  467. if (fs_info->tree_root->node)
  468. btrfs_block_release(fs_info->tree_root,
  469. fs_info->tree_root->node);
  470. btrfs_block_release(root, fs_info->sb_buffer);
  471. crypto_free_hash(fs_info->hash_tfm);
  472. truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
  473. iput(fs_info->btree_inode);
  474. del_fs_roots(fs_info);
  475. kfree(fs_info->extent_root);
  476. kfree(fs_info->tree_root);
  477. kobject_unregister(&fs_info->kobj);
  478. return 0;
  479. }
  480. void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
  481. {
  482. brelse(buf);
  483. }