disk-io.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. #include <linux/module.h>
  2. #include <linux/fs.h>
  3. #include <linux/blkdev.h>
  4. #include <linux/crypto.h>
  5. #include <linux/scatterlist.h>
  6. #include <linux/swap.h>
  7. #include <linux/radix-tree.h>
  8. #include "ctree.h"
  9. #include "disk-io.h"
  10. #include "transaction.h"
  11. #include "btrfs_inode.h"
  12. static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
  13. {
  14. struct btrfs_node *node = btrfs_buffer_node(buf);
  15. if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
  16. BUG();
  17. }
  18. return 0;
  19. }
  20. struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
  21. {
  22. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  23. int blockbits = root->fs_info->sb->s_blocksize_bits;
  24. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  25. struct page *page;
  26. struct buffer_head *bh;
  27. struct buffer_head *head;
  28. struct buffer_head *ret = NULL;
  29. page = find_lock_page(mapping, index);
  30. if (!page)
  31. return NULL;
  32. if (!page_has_buffers(page))
  33. goto out_unlock;
  34. head = page_buffers(page);
  35. bh = head;
  36. do {
  37. if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
  38. ret = bh;
  39. get_bh(bh);
  40. goto out_unlock;
  41. }
  42. bh = bh->b_this_page;
  43. } while (bh != head);
  44. out_unlock:
  45. unlock_page(page);
  46. if (ret) {
  47. touch_buffer(ret);
  48. }
  49. page_cache_release(page);
  50. return ret;
  51. }
  52. struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
  53. u64 blocknr)
  54. {
  55. struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
  56. int blockbits = root->fs_info->sb->s_blocksize_bits;
  57. unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
  58. struct page *page;
  59. struct buffer_head *bh;
  60. struct buffer_head *head;
  61. struct buffer_head *ret = NULL;
  62. u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
  63. page = grab_cache_page(mapping, index);
  64. if (!page)
  65. return NULL;
  66. if (!page_has_buffers(page))
  67. create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
  68. head = page_buffers(page);
  69. bh = head;
  70. do {
  71. if (!buffer_mapped(bh)) {
  72. bh->b_bdev = root->fs_info->sb->s_bdev;
  73. bh->b_blocknr = first_block;
  74. set_buffer_mapped(bh);
  75. }
  76. if (bh->b_blocknr == blocknr) {
  77. ret = bh;
  78. get_bh(bh);
  79. goto out_unlock;
  80. }
  81. bh = bh->b_this_page;
  82. first_block++;
  83. } while (bh != head);
  84. out_unlock:
  85. unlock_page(page);
  86. if (ret)
  87. touch_buffer(ret);
  88. page_cache_release(page);
  89. return ret;
  90. }
  91. static sector_t max_block(struct block_device *bdev)
  92. {
  93. sector_t retval = ~((sector_t)0);
  94. loff_t sz = i_size_read(bdev->bd_inode);
  95. if (sz) {
  96. unsigned int size = block_size(bdev);
  97. unsigned int sizebits = blksize_bits(size);
  98. retval = (sz >> sizebits);
  99. }
  100. return retval;
  101. }
  102. static int btree_get_block(struct inode *inode, sector_t iblock,
  103. struct buffer_head *bh, int create)
  104. {
  105. if (iblock >= max_block(inode->i_sb->s_bdev)) {
  106. if (create)
  107. return -EIO;
  108. /*
  109. * for reads, we're just trying to fill a partial page.
  110. * return a hole, they will have to call get_block again
  111. * before they can fill it, and they will get -EIO at that
  112. * time
  113. */
  114. return 0;
  115. }
  116. bh->b_bdev = inode->i_sb->s_bdev;
  117. bh->b_blocknr = iblock;
  118. set_buffer_mapped(bh);
  119. return 0;
  120. }
  121. int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
  122. char *result)
  123. {
  124. struct scatterlist sg;
  125. struct crypto_hash *tfm = root->fs_info->hash_tfm;
  126. struct hash_desc desc;
  127. int ret;
  128. desc.tfm = tfm;
  129. desc.flags = 0;
  130. sg_init_one(&sg, data, len);
  131. spin_lock(&root->fs_info->hash_lock);
  132. ret = crypto_hash_digest(&desc, &sg, 1, result);
  133. spin_unlock(&root->fs_info->hash_lock);
  134. if (ret) {
  135. printk("sha256 digest failed\n");
  136. }
  137. return ret;
  138. }
  139. static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
  140. int verify)
  141. {
  142. char result[BTRFS_CSUM_SIZE];
  143. int ret;
  144. struct btrfs_node *node;
  145. ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
  146. bh->b_size - BTRFS_CSUM_SIZE, result);
  147. if (ret)
  148. return ret;
  149. if (verify) {
  150. if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
  151. printk("checksum verify failed on %lu\n",
  152. bh->b_blocknr);
  153. return 1;
  154. }
  155. } else {
  156. node = btrfs_buffer_node(bh);
  157. memcpy(node->header.csum, result, BTRFS_CSUM_SIZE);
  158. }
  159. return 0;
  160. }
  161. static int btree_writepage(struct page *page, struct writeback_control *wbc)
  162. {
  163. struct buffer_head *bh;
  164. struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
  165. struct buffer_head *head;
  166. if (!page_has_buffers(page)) {
  167. create_empty_buffers(page, root->fs_info->sb->s_blocksize,
  168. (1 << BH_Dirty)|(1 << BH_Uptodate));
  169. }
  170. head = page_buffers(page);
  171. bh = head;
  172. do {
  173. if (buffer_dirty(bh))
  174. csum_tree_block(root, bh, 0);
  175. bh = bh->b_this_page;
  176. } while (bh != head);
  177. return block_write_full_page(page, btree_get_block, wbc);
  178. }
  179. static int btree_readpage(struct file * file, struct page * page)
  180. {
  181. return block_read_full_page(page, btree_get_block);
  182. }
  183. static struct address_space_operations btree_aops = {
  184. .readpage = btree_readpage,
  185. .writepage = btree_writepage,
  186. .sync_page = block_sync_page,
  187. };
  188. struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
  189. {
  190. struct buffer_head *bh = NULL;
  191. bh = btrfs_find_create_tree_block(root, blocknr);
  192. if (!bh)
  193. return bh;
  194. if (buffer_uptodate(bh))
  195. goto uptodate;
  196. lock_buffer(bh);
  197. if (!buffer_uptodate(bh)) {
  198. get_bh(bh);
  199. bh->b_end_io = end_buffer_read_sync;
  200. submit_bh(READ, bh);
  201. wait_on_buffer(bh);
  202. if (!buffer_uptodate(bh))
  203. goto fail;
  204. csum_tree_block(root, bh, 1);
  205. } else {
  206. unlock_buffer(bh);
  207. }
  208. uptodate:
  209. if (check_tree_block(root, bh))
  210. BUG();
  211. return bh;
  212. fail:
  213. brelse(bh);
  214. return NULL;
  215. }
  216. int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  217. struct buffer_head *buf)
  218. {
  219. WARN_ON(atomic_read(&buf->b_count) == 0);
  220. mark_buffer_dirty(buf);
  221. return 0;
  222. }
  223. int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  224. struct buffer_head *buf)
  225. {
  226. WARN_ON(atomic_read(&buf->b_count) == 0);
  227. clear_buffer_dirty(buf);
  228. return 0;
  229. }
  230. static int __setup_root(int blocksize,
  231. struct btrfs_root *root,
  232. struct btrfs_fs_info *fs_info,
  233. u64 objectid)
  234. {
  235. root->node = NULL;
  236. root->inode = NULL;
  237. root->commit_root = NULL;
  238. root->blocksize = blocksize;
  239. root->ref_cows = 0;
  240. root->fs_info = fs_info;
  241. root->objectid = objectid;
  242. root->last_trans = 0;
  243. root->highest_inode = 0;
  244. root->last_inode_alloc = 0;
  245. memset(&root->root_key, 0, sizeof(root->root_key));
  246. memset(&root->root_item, 0, sizeof(root->root_item));
  247. return 0;
  248. }
  249. static int find_and_setup_root(int blocksize,
  250. struct btrfs_root *tree_root,
  251. struct btrfs_fs_info *fs_info,
  252. u64 objectid,
  253. struct btrfs_root *root)
  254. {
  255. int ret;
  256. __setup_root(blocksize, root, fs_info, objectid);
  257. ret = btrfs_find_last_root(tree_root, objectid,
  258. &root->root_item, &root->root_key);
  259. BUG_ON(ret);
  260. root->node = read_tree_block(root,
  261. btrfs_root_blocknr(&root->root_item));
  262. BUG_ON(!root->node);
  263. return 0;
  264. }
  265. struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
  266. struct btrfs_key *location)
  267. {
  268. struct btrfs_root *root;
  269. struct btrfs_root *tree_root = fs_info->tree_root;
  270. struct btrfs_path *path;
  271. struct btrfs_leaf *l;
  272. u64 highest_inode;
  273. int ret = 0;
  274. printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags);
  275. root = radix_tree_lookup(&fs_info->fs_roots_radix,
  276. (unsigned long)location->objectid);
  277. if (root) {
  278. printk("found %p in cache\n", root);
  279. return root;
  280. }
  281. root = kmalloc(sizeof(*root), GFP_NOFS);
  282. if (!root) {
  283. printk("failed1\n");
  284. return ERR_PTR(-ENOMEM);
  285. }
  286. if (location->offset == (u64)-1) {
  287. ret = find_and_setup_root(fs_info->sb->s_blocksize,
  288. fs_info->tree_root, fs_info,
  289. location->objectid, root);
  290. if (ret) {
  291. printk("failed2\n");
  292. kfree(root);
  293. return ERR_PTR(ret);
  294. }
  295. goto insert;
  296. }
  297. __setup_root(fs_info->sb->s_blocksize, root, fs_info,
  298. location->objectid);
  299. path = btrfs_alloc_path();
  300. BUG_ON(!path);
  301. ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
  302. if (ret != 0) {
  303. printk("internal search_slot gives us %d\n", ret);
  304. if (ret > 0)
  305. ret = -ENOENT;
  306. goto out;
  307. }
  308. l = btrfs_buffer_leaf(path->nodes[0]);
  309. memcpy(&root->root_item,
  310. btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item),
  311. sizeof(root->root_item));
  312. memcpy(&root->root_key, location, sizeof(*location));
  313. ret = 0;
  314. out:
  315. btrfs_release_path(root, path);
  316. btrfs_free_path(path);
  317. if (ret) {
  318. kfree(root);
  319. return ERR_PTR(ret);
  320. }
  321. root->node = read_tree_block(root,
  322. btrfs_root_blocknr(&root->root_item));
  323. BUG_ON(!root->node);
  324. insert:
  325. printk("inserting %p\n", root);
  326. root->ref_cows = 1;
  327. ret = radix_tree_insert(&fs_info->fs_roots_radix,
  328. (unsigned long)root->root_key.objectid,
  329. root);
  330. if (ret) {
  331. printk("radix_tree_insert gives us %d\n", ret);
  332. brelse(root->node);
  333. kfree(root);
  334. return ERR_PTR(ret);
  335. }
  336. ret = btrfs_find_highest_inode(root, &highest_inode);
  337. if (ret == 0) {
  338. root->highest_inode = highest_inode;
  339. root->last_inode_alloc = highest_inode;
  340. printk("highest inode is %Lu\n", highest_inode);
  341. }
  342. printk("all worked\n");
  343. return root;
  344. }
  345. struct btrfs_root *open_ctree(struct super_block *sb)
  346. {
  347. struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
  348. GFP_NOFS);
  349. struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
  350. GFP_NOFS);
  351. struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
  352. GFP_NOFS);
  353. int ret;
  354. struct btrfs_super_block *disk_super;
  355. init_bit_radix(&fs_info->pinned_radix);
  356. init_bit_radix(&fs_info->pending_del_radix);
  357. INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
  358. sb_set_blocksize(sb, 4096);
  359. fs_info->running_transaction = NULL;
  360. fs_info->tree_root = tree_root;
  361. fs_info->extent_root = extent_root;
  362. fs_info->sb = sb;
  363. fs_info->btree_inode = new_inode(sb);
  364. fs_info->btree_inode->i_ino = 1;
  365. fs_info->btree_inode->i_nlink = 1;
  366. fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
  367. fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
  368. BTRFS_I(fs_info->btree_inode)->root = tree_root;
  369. memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
  370. sizeof(struct btrfs_key));
  371. insert_inode_hash(fs_info->btree_inode);
  372. mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  373. fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
  374. spin_lock_init(&fs_info->hash_lock);
  375. if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
  376. printk("failed to allocate sha256 hash\n");
  377. return NULL;
  378. }
  379. mutex_init(&fs_info->trans_mutex);
  380. mutex_init(&fs_info->fs_mutex);
  381. memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
  382. memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
  383. __setup_root(sb->s_blocksize, tree_root,
  384. fs_info, BTRFS_ROOT_TREE_OBJECTID);
  385. fs_info->sb_buffer = read_tree_block(tree_root,
  386. BTRFS_SUPER_INFO_OFFSET /
  387. sb->s_blocksize);
  388. if (!fs_info->sb_buffer)
  389. return NULL;
  390. disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
  391. if (!btrfs_super_root(disk_super))
  392. return NULL;
  393. fs_info->disk_super = disk_super;
  394. tree_root->node = read_tree_block(tree_root,
  395. btrfs_super_root(disk_super));
  396. BUG_ON(!tree_root->node);
  397. mutex_lock(&fs_info->fs_mutex);
  398. ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
  399. BTRFS_EXTENT_TREE_OBJECTID, extent_root);
  400. BUG_ON(ret);
  401. fs_info->generation = btrfs_super_generation(disk_super) + 1;
  402. memset(&fs_info->kobj, 0, sizeof(fs_info->kobj));
  403. kobj_set_kset_s(fs_info, btrfs_subsys);
  404. kobject_set_name(&fs_info->kobj, "%s", sb->s_id);
  405. kobject_register(&fs_info->kobj);
  406. mutex_unlock(&fs_info->fs_mutex);
  407. return tree_root;
  408. }
  409. int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
  410. *root)
  411. {
  412. struct buffer_head *bh = root->fs_info->sb_buffer;
  413. btrfs_set_super_root(root->fs_info->disk_super,
  414. root->fs_info->tree_root->node->b_blocknr);
  415. lock_buffer(bh);
  416. WARN_ON(atomic_read(&bh->b_count) < 1);
  417. clear_buffer_dirty(bh);
  418. csum_tree_block(root, bh, 0);
  419. bh->b_end_io = end_buffer_write_sync;
  420. get_bh(bh);
  421. submit_bh(WRITE, bh);
  422. wait_on_buffer(bh);
  423. if (!buffer_uptodate(bh)) {
  424. WARN_ON(1);
  425. return -EIO;
  426. }
  427. return 0;
  428. }
  429. static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
  430. {
  431. radix_tree_delete(&fs_info->fs_roots_radix,
  432. (unsigned long)root->root_key.objectid);
  433. if (root->inode)
  434. iput(root->inode);
  435. if (root->node)
  436. brelse(root->node);
  437. if (root->commit_root)
  438. brelse(root->commit_root);
  439. kfree(root);
  440. return 0;
  441. }
  442. int del_fs_roots(struct btrfs_fs_info *fs_info)
  443. {
  444. int ret;
  445. struct btrfs_root *gang[8];
  446. int i;
  447. while(1) {
  448. ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
  449. (void **)gang, 0,
  450. ARRAY_SIZE(gang));
  451. if (!ret)
  452. break;
  453. for (i = 0; i < ret; i++)
  454. free_fs_root(fs_info, gang[i]);
  455. }
  456. return 0;
  457. }
  458. int close_ctree(struct btrfs_root *root)
  459. {
  460. int ret;
  461. struct btrfs_trans_handle *trans;
  462. struct btrfs_fs_info *fs_info = root->fs_info;
  463. mutex_lock(&fs_info->fs_mutex);
  464. trans = btrfs_start_transaction(root, 1);
  465. btrfs_commit_transaction(trans, root);
  466. /* run commit again to drop the original snapshot */
  467. trans = btrfs_start_transaction(root, 1);
  468. btrfs_commit_transaction(trans, root);
  469. ret = btrfs_write_and_wait_transaction(NULL, root);
  470. BUG_ON(ret);
  471. write_ctree_super(NULL, root);
  472. mutex_unlock(&fs_info->fs_mutex);
  473. if (fs_info->extent_root->node)
  474. btrfs_block_release(fs_info->extent_root,
  475. fs_info->extent_root->node);
  476. if (fs_info->tree_root->node)
  477. btrfs_block_release(fs_info->tree_root,
  478. fs_info->tree_root->node);
  479. btrfs_block_release(root, fs_info->sb_buffer);
  480. crypto_free_hash(fs_info->hash_tfm);
  481. truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
  482. iput(fs_info->btree_inode);
  483. del_fs_roots(fs_info);
  484. kfree(fs_info->extent_root);
  485. kfree(fs_info->tree_root);
  486. kobject_unregister(&fs_info->kobj);
  487. return 0;
  488. }
  489. void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
  490. {
  491. brelse(buf);
  492. }