disk-io.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/fs.h>
  19. #include <linux/blkdev.h>
  20. #include <linux/crc32c.h>
  21. #include <linux/scatterlist.h>
  22. #include <linux/swap.h>
  23. #include <linux/radix-tree.h>
  24. #include <linux/writeback.h>
  25. #include <linux/buffer_head.h> // for block_sync_page
  26. #include "ctree.h"
  27. #include "disk-io.h"
  28. #include "transaction.h"
  29. #include "btrfs_inode.h"
  30. #include "print-tree.h"
  31. #if 0
  32. static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
  33. {
  34. if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) {
  35. printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n",
  36. (unsigned long long)extent_buffer_blocknr(buf),
  37. (unsigned long long)btrfs_header_blocknr(buf));
  38. return 1;
  39. }
  40. return 0;
  41. }
  42. #endif
  43. static struct extent_io_ops btree_extent_io_ops;
  44. struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
  45. u64 bytenr, u32 blocksize)
  46. {
  47. struct inode *btree_inode = root->fs_info->btree_inode;
  48. struct extent_buffer *eb;
  49. eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
  50. bytenr, blocksize, GFP_NOFS);
  51. return eb;
  52. }
  53. struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
  54. u64 bytenr, u32 blocksize)
  55. {
  56. struct inode *btree_inode = root->fs_info->btree_inode;
  57. struct extent_buffer *eb;
  58. eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
  59. bytenr, blocksize, NULL, GFP_NOFS);
  60. return eb;
  61. }
  62. struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
  63. size_t page_offset, u64 start, u64 len,
  64. int create)
  65. {
  66. struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
  67. struct extent_map *em;
  68. int ret;
  69. again:
  70. spin_lock(&em_tree->lock);
  71. em = lookup_extent_mapping(em_tree, start, len);
  72. spin_unlock(&em_tree->lock);
  73. if (em) {
  74. goto out;
  75. }
  76. em = alloc_extent_map(GFP_NOFS);
  77. if (!em) {
  78. em = ERR_PTR(-ENOMEM);
  79. goto out;
  80. }
  81. em->start = 0;
  82. em->len = i_size_read(inode);
  83. em->block_start = 0;
  84. em->bdev = inode->i_sb->s_bdev;
  85. spin_lock(&em_tree->lock);
  86. ret = add_extent_mapping(em_tree, em);
  87. spin_unlock(&em_tree->lock);
  88. if (ret == -EEXIST) {
  89. free_extent_map(em);
  90. em = NULL;
  91. goto again;
  92. } else if (ret) {
  93. em = ERR_PTR(ret);
  94. }
  95. out:
  96. return em;
  97. }
  98. u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
  99. {
  100. return crc32c(seed, data, len);
  101. }
  102. void btrfs_csum_final(u32 crc, char *result)
  103. {
  104. *(__le32 *)result = ~cpu_to_le32(crc);
  105. }
  106. static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
  107. int verify)
  108. {
  109. char result[BTRFS_CRC32_SIZE];
  110. unsigned long len;
  111. unsigned long cur_len;
  112. unsigned long offset = BTRFS_CSUM_SIZE;
  113. char *map_token = NULL;
  114. char *kaddr;
  115. unsigned long map_start;
  116. unsigned long map_len;
  117. int err;
  118. u32 crc = ~(u32)0;
  119. len = buf->len - offset;
  120. while(len > 0) {
  121. err = map_private_extent_buffer(buf, offset, 32,
  122. &map_token, &kaddr,
  123. &map_start, &map_len, KM_USER0);
  124. if (err) {
  125. printk("failed to map extent buffer! %lu\n",
  126. offset);
  127. return 1;
  128. }
  129. cur_len = min(len, map_len - (offset - map_start));
  130. crc = btrfs_csum_data(root, kaddr + offset - map_start,
  131. crc, cur_len);
  132. len -= cur_len;
  133. offset += cur_len;
  134. unmap_extent_buffer(buf, map_token, KM_USER0);
  135. }
  136. btrfs_csum_final(crc, result);
  137. if (verify) {
  138. int from_this_trans = 0;
  139. if (root->fs_info->running_transaction &&
  140. btrfs_header_generation(buf) ==
  141. root->fs_info->running_transaction->transid)
  142. from_this_trans = 1;
  143. /* FIXME, this is not good */
  144. if (from_this_trans == 0 &&
  145. memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) {
  146. u32 val;
  147. u32 found = 0;
  148. memcpy(&found, result, BTRFS_CRC32_SIZE);
  149. read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
  150. printk("btrfs: %s checksum verify failed on %llu "
  151. "wanted %X found %X from_this_trans %d\n",
  152. root->fs_info->sb->s_id,
  153. buf->start, val, found, from_this_trans);
  154. return 1;
  155. }
  156. } else {
  157. write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE);
  158. }
  159. return 0;
  160. }
  161. int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
  162. {
  163. struct extent_io_tree *tree;
  164. u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
  165. u64 found_start;
  166. int found_level;
  167. unsigned long len;
  168. struct extent_buffer *eb;
  169. tree = &BTRFS_I(page->mapping->host)->io_tree;
  170. if (page->private == EXTENT_PAGE_PRIVATE)
  171. goto out;
  172. if (!page->private)
  173. goto out;
  174. len = page->private >> 2;
  175. if (len == 0) {
  176. WARN_ON(1);
  177. }
  178. eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
  179. read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1,
  180. btree_get_extent);
  181. btrfs_clear_buffer_defrag(eb);
  182. found_start = btrfs_header_bytenr(eb);
  183. if (found_start != start) {
  184. printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
  185. start, found_start, len);
  186. WARN_ON(1);
  187. goto err;
  188. }
  189. if (eb->first_page != page) {
  190. printk("bad first page %lu %lu\n", eb->first_page->index,
  191. page->index);
  192. WARN_ON(1);
  193. goto err;
  194. }
  195. if (!PageUptodate(page)) {
  196. printk("csum not up to date page %lu\n", page->index);
  197. WARN_ON(1);
  198. goto err;
  199. }
  200. found_level = btrfs_header_level(eb);
  201. csum_tree_block(root, eb, 0);
  202. err:
  203. free_extent_buffer(eb);
  204. out:
  205. return 0;
  206. }
  207. static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
  208. {
  209. struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
  210. csum_dirty_buffer(root, page);
  211. return 0;
  212. }
  213. static int btree_writepage(struct page *page, struct writeback_control *wbc)
  214. {
  215. struct extent_io_tree *tree;
  216. tree = &BTRFS_I(page->mapping->host)->io_tree;
  217. return extent_write_full_page(tree, page, btree_get_extent, wbc);
  218. }
  219. static int btree_writepages(struct address_space *mapping,
  220. struct writeback_control *wbc)
  221. {
  222. struct extent_io_tree *tree;
  223. tree = &BTRFS_I(mapping->host)->io_tree;
  224. if (wbc->sync_mode == WB_SYNC_NONE) {
  225. u64 num_dirty;
  226. u64 start = 0;
  227. unsigned long thresh = 96 * 1024 * 1024;
  228. if (wbc->for_kupdate)
  229. return 0;
  230. if (current_is_pdflush()) {
  231. thresh = 96 * 1024 * 1024;
  232. } else {
  233. thresh = 8 * 1024 * 1024;
  234. }
  235. num_dirty = count_range_bits(tree, &start, (u64)-1,
  236. thresh, EXTENT_DIRTY);
  237. if (num_dirty < thresh) {
  238. return 0;
  239. }
  240. }
  241. return extent_writepages(tree, mapping, btree_get_extent, wbc);
  242. }
  243. int btree_readpage(struct file *file, struct page *page)
  244. {
  245. struct extent_io_tree *tree;
  246. tree = &BTRFS_I(page->mapping->host)->io_tree;
  247. return extent_read_full_page(tree, page, btree_get_extent);
  248. }
  249. static int btree_releasepage(struct page *page, gfp_t gfp_flags)
  250. {
  251. struct extent_io_tree *tree;
  252. struct extent_map_tree *map;
  253. int ret;
  254. tree = &BTRFS_I(page->mapping->host)->io_tree;
  255. map = &BTRFS_I(page->mapping->host)->extent_tree;
  256. ret = try_release_extent_mapping(map, tree, page, gfp_flags);
  257. if (ret == 1) {
  258. ClearPagePrivate(page);
  259. set_page_private(page, 0);
  260. page_cache_release(page);
  261. }
  262. return ret;
  263. }
  264. static void btree_invalidatepage(struct page *page, unsigned long offset)
  265. {
  266. struct extent_io_tree *tree;
  267. tree = &BTRFS_I(page->mapping->host)->io_tree;
  268. extent_invalidatepage(tree, page, offset);
  269. btree_releasepage(page, GFP_NOFS);
  270. }
  271. #if 0
  272. static int btree_writepage(struct page *page, struct writeback_control *wbc)
  273. {
  274. struct buffer_head *bh;
  275. struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
  276. struct buffer_head *head;
  277. if (!page_has_buffers(page)) {
  278. create_empty_buffers(page, root->fs_info->sb->s_blocksize,
  279. (1 << BH_Dirty)|(1 << BH_Uptodate));
  280. }
  281. head = page_buffers(page);
  282. bh = head;
  283. do {
  284. if (buffer_dirty(bh))
  285. csum_tree_block(root, bh, 0);
  286. bh = bh->b_this_page;
  287. } while (bh != head);
  288. return block_write_full_page(page, btree_get_block, wbc);
  289. }
  290. #endif
  291. static struct address_space_operations btree_aops = {
  292. .readpage = btree_readpage,
  293. .writepage = btree_writepage,
  294. .writepages = btree_writepages,
  295. .releasepage = btree_releasepage,
  296. .invalidatepage = btree_invalidatepage,
  297. .sync_page = block_sync_page,
  298. };
  299. int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
  300. {
  301. struct extent_buffer *buf = NULL;
  302. struct inode *btree_inode = root->fs_info->btree_inode;
  303. int ret = 0;
  304. buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
  305. if (!buf)
  306. return 0;
  307. read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
  308. buf, 0, 0, btree_get_extent);
  309. free_extent_buffer(buf);
  310. return ret;
  311. }
  312. struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
  313. u32 blocksize)
  314. {
  315. struct extent_buffer *buf = NULL;
  316. struct inode *btree_inode = root->fs_info->btree_inode;
  317. struct extent_io_tree *io_tree;
  318. u64 end;
  319. int ret;
  320. io_tree = &BTRFS_I(btree_inode)->io_tree;
  321. buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
  322. if (!buf)
  323. return NULL;
  324. read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1,
  325. btree_get_extent);
  326. if (buf->flags & EXTENT_CSUM)
  327. return buf;
  328. end = buf->start + PAGE_CACHE_SIZE - 1;
  329. if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
  330. buf->flags |= EXTENT_CSUM;
  331. return buf;
  332. }
  333. lock_extent(io_tree, buf->start, end, GFP_NOFS);
  334. if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
  335. buf->flags |= EXTENT_CSUM;
  336. goto out_unlock;
  337. }
  338. ret = csum_tree_block(root, buf, 1);
  339. set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
  340. buf->flags |= EXTENT_CSUM;
  341. out_unlock:
  342. unlock_extent(io_tree, buf->start, end, GFP_NOFS);
  343. return buf;
  344. }
  345. int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  346. struct extent_buffer *buf)
  347. {
  348. struct inode *btree_inode = root->fs_info->btree_inode;
  349. if (btrfs_header_generation(buf) ==
  350. root->fs_info->running_transaction->transid)
  351. clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
  352. buf);
  353. return 0;
  354. }
  355. int wait_on_tree_block_writeback(struct btrfs_root *root,
  356. struct extent_buffer *buf)
  357. {
  358. struct inode *btree_inode = root->fs_info->btree_inode;
  359. wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree,
  360. buf);
  361. return 0;
  362. }
  363. static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
  364. u32 stripesize, struct btrfs_root *root,
  365. struct btrfs_fs_info *fs_info,
  366. u64 objectid)
  367. {
  368. root->node = NULL;
  369. root->inode = NULL;
  370. root->commit_root = NULL;
  371. root->sectorsize = sectorsize;
  372. root->nodesize = nodesize;
  373. root->leafsize = leafsize;
  374. root->stripesize = stripesize;
  375. root->ref_cows = 0;
  376. root->fs_info = fs_info;
  377. root->objectid = objectid;
  378. root->last_trans = 0;
  379. root->highest_inode = 0;
  380. root->last_inode_alloc = 0;
  381. root->name = NULL;
  382. root->in_sysfs = 0;
  383. memset(&root->root_key, 0, sizeof(root->root_key));
  384. memset(&root->root_item, 0, sizeof(root->root_item));
  385. memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
  386. memset(&root->root_kobj, 0, sizeof(root->root_kobj));
  387. init_completion(&root->kobj_unregister);
  388. root->defrag_running = 0;
  389. root->defrag_level = 0;
  390. root->root_key.objectid = objectid;
  391. return 0;
  392. }
  393. static int find_and_setup_root(struct btrfs_root *tree_root,
  394. struct btrfs_fs_info *fs_info,
  395. u64 objectid,
  396. struct btrfs_root *root)
  397. {
  398. int ret;
  399. u32 blocksize;
  400. __setup_root(tree_root->nodesize, tree_root->leafsize,
  401. tree_root->sectorsize, tree_root->stripesize,
  402. root, fs_info, objectid);
  403. ret = btrfs_find_last_root(tree_root, objectid,
  404. &root->root_item, &root->root_key);
  405. BUG_ON(ret);
  406. blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
  407. root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
  408. blocksize);
  409. BUG_ON(!root->node);
  410. return 0;
  411. }
  412. struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info,
  413. struct btrfs_key *location)
  414. {
  415. struct btrfs_root *root;
  416. struct btrfs_root *tree_root = fs_info->tree_root;
  417. struct btrfs_path *path;
  418. struct extent_buffer *l;
  419. u64 highest_inode;
  420. u32 blocksize;
  421. int ret = 0;
  422. root = kzalloc(sizeof(*root), GFP_NOFS);
  423. if (!root)
  424. return ERR_PTR(-ENOMEM);
  425. if (location->offset == (u64)-1) {
  426. ret = find_and_setup_root(tree_root, fs_info,
  427. location->objectid, root);
  428. if (ret) {
  429. kfree(root);
  430. return ERR_PTR(ret);
  431. }
  432. goto insert;
  433. }
  434. __setup_root(tree_root->nodesize, tree_root->leafsize,
  435. tree_root->sectorsize, tree_root->stripesize,
  436. root, fs_info, location->objectid);
  437. path = btrfs_alloc_path();
  438. BUG_ON(!path);
  439. ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
  440. if (ret != 0) {
  441. if (ret > 0)
  442. ret = -ENOENT;
  443. goto out;
  444. }
  445. l = path->nodes[0];
  446. read_extent_buffer(l, &root->root_item,
  447. btrfs_item_ptr_offset(l, path->slots[0]),
  448. sizeof(root->root_item));
  449. memcpy(&root->root_key, location, sizeof(*location));
  450. ret = 0;
  451. out:
  452. btrfs_release_path(root, path);
  453. btrfs_free_path(path);
  454. if (ret) {
  455. kfree(root);
  456. return ERR_PTR(ret);
  457. }
  458. blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
  459. root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
  460. blocksize);
  461. BUG_ON(!root->node);
  462. insert:
  463. root->ref_cows = 1;
  464. ret = btrfs_find_highest_inode(root, &highest_inode);
  465. if (ret == 0) {
  466. root->highest_inode = highest_inode;
  467. root->last_inode_alloc = highest_inode;
  468. }
  469. return root;
  470. }
  471. struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
  472. u64 root_objectid)
  473. {
  474. struct btrfs_root *root;
  475. if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
  476. return fs_info->tree_root;
  477. if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
  478. return fs_info->extent_root;
  479. root = radix_tree_lookup(&fs_info->fs_roots_radix,
  480. (unsigned long)root_objectid);
  481. return root;
  482. }
  483. struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
  484. struct btrfs_key *location)
  485. {
  486. struct btrfs_root *root;
  487. int ret;
  488. if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
  489. return fs_info->tree_root;
  490. if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
  491. return fs_info->extent_root;
  492. root = radix_tree_lookup(&fs_info->fs_roots_radix,
  493. (unsigned long)location->objectid);
  494. if (root)
  495. return root;
  496. root = btrfs_read_fs_root_no_radix(fs_info, location);
  497. if (IS_ERR(root))
  498. return root;
  499. ret = radix_tree_insert(&fs_info->fs_roots_radix,
  500. (unsigned long)root->root_key.objectid,
  501. root);
  502. if (ret) {
  503. free_extent_buffer(root->node);
  504. kfree(root);
  505. return ERR_PTR(ret);
  506. }
  507. ret = btrfs_find_dead_roots(fs_info->tree_root,
  508. root->root_key.objectid, root);
  509. BUG_ON(ret);
  510. return root;
  511. }
  512. struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
  513. struct btrfs_key *location,
  514. const char *name, int namelen)
  515. {
  516. struct btrfs_root *root;
  517. int ret;
  518. root = btrfs_read_fs_root_no_name(fs_info, location);
  519. if (!root)
  520. return NULL;
  521. if (root->in_sysfs)
  522. return root;
  523. ret = btrfs_set_root_name(root, name, namelen);
  524. if (ret) {
  525. free_extent_buffer(root->node);
  526. kfree(root);
  527. return ERR_PTR(ret);
  528. }
  529. ret = btrfs_sysfs_add_root(root);
  530. if (ret) {
  531. free_extent_buffer(root->node);
  532. kfree(root->name);
  533. kfree(root);
  534. return ERR_PTR(ret);
  535. }
  536. root->in_sysfs = 1;
  537. return root;
  538. }
  539. #if 0
  540. static int add_hasher(struct btrfs_fs_info *info, char *type) {
  541. struct btrfs_hasher *hasher;
  542. hasher = kmalloc(sizeof(*hasher), GFP_NOFS);
  543. if (!hasher)
  544. return -ENOMEM;
  545. hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC);
  546. if (!hasher->hash_tfm) {
  547. kfree(hasher);
  548. return -EINVAL;
  549. }
  550. spin_lock(&info->hash_lock);
  551. list_add(&hasher->list, &info->hashers);
  552. spin_unlock(&info->hash_lock);
  553. return 0;
  554. }
  555. #endif
  556. struct btrfs_root *open_ctree(struct super_block *sb)
  557. {
  558. u32 sectorsize;
  559. u32 nodesize;
  560. u32 leafsize;
  561. u32 blocksize;
  562. u32 stripesize;
  563. struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
  564. GFP_NOFS);
  565. struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
  566. GFP_NOFS);
  567. struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
  568. GFP_NOFS);
  569. int ret;
  570. int err = -EIO;
  571. struct btrfs_super_block *disk_super;
  572. if (!extent_root || !tree_root || !fs_info) {
  573. err = -ENOMEM;
  574. goto fail;
  575. }
  576. INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
  577. INIT_LIST_HEAD(&fs_info->trans_list);
  578. INIT_LIST_HEAD(&fs_info->dead_roots);
  579. INIT_LIST_HEAD(&fs_info->hashers);
  580. spin_lock_init(&fs_info->hash_lock);
  581. spin_lock_init(&fs_info->delalloc_lock);
  582. spin_lock_init(&fs_info->new_trans_lock);
  583. memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj));
  584. init_completion(&fs_info->kobj_unregister);
  585. sb_set_blocksize(sb, 4096);
  586. fs_info->running_transaction = NULL;
  587. fs_info->last_trans_committed = 0;
  588. fs_info->tree_root = tree_root;
  589. fs_info->extent_root = extent_root;
  590. fs_info->sb = sb;
  591. fs_info->throttles = 0;
  592. fs_info->mount_opt = 0;
  593. fs_info->max_extent = (u64)-1;
  594. fs_info->max_inline = 8192 * 1024;
  595. fs_info->delalloc_bytes = 0;
  596. fs_info->btree_inode = new_inode(sb);
  597. fs_info->btree_inode->i_ino = 1;
  598. fs_info->btree_inode->i_nlink = 1;
  599. fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
  600. fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
  601. extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
  602. fs_info->btree_inode->i_mapping,
  603. GFP_NOFS);
  604. extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
  605. GFP_NOFS);
  606. BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
  607. extent_io_tree_init(&fs_info->free_space_cache,
  608. fs_info->btree_inode->i_mapping, GFP_NOFS);
  609. extent_io_tree_init(&fs_info->block_group_cache,
  610. fs_info->btree_inode->i_mapping, GFP_NOFS);
  611. extent_io_tree_init(&fs_info->pinned_extents,
  612. fs_info->btree_inode->i_mapping, GFP_NOFS);
  613. extent_io_tree_init(&fs_info->pending_del,
  614. fs_info->btree_inode->i_mapping, GFP_NOFS);
  615. extent_io_tree_init(&fs_info->extent_ins,
  616. fs_info->btree_inode->i_mapping, GFP_NOFS);
  617. fs_info->do_barriers = 1;
  618. fs_info->closing = 0;
  619. fs_info->total_pinned = 0;
  620. fs_info->last_alloc = 0;
  621. fs_info->last_data_alloc = 0;
  622. #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
  623. INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
  624. #else
  625. INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
  626. #endif
  627. BTRFS_I(fs_info->btree_inode)->root = tree_root;
  628. memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
  629. sizeof(struct btrfs_key));
  630. insert_inode_hash(fs_info->btree_inode);
  631. mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
  632. mutex_init(&fs_info->trans_mutex);
  633. mutex_init(&fs_info->fs_mutex);
  634. #if 0
  635. ret = add_hasher(fs_info, "crc32c");
  636. if (ret) {
  637. printk("btrfs: failed hash setup, modprobe cryptomgr?\n");
  638. err = -ENOMEM;
  639. goto fail_iput;
  640. }
  641. #endif
  642. __setup_root(512, 512, 512, 512, tree_root,
  643. fs_info, BTRFS_ROOT_TREE_OBJECTID);
  644. fs_info->sb_buffer = read_tree_block(tree_root,
  645. BTRFS_SUPER_INFO_OFFSET,
  646. 512);
  647. if (!fs_info->sb_buffer)
  648. goto fail_iput;
  649. read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0,
  650. sizeof(fs_info->super_copy));
  651. read_extent_buffer(fs_info->sb_buffer, fs_info->fsid,
  652. (unsigned long)btrfs_super_fsid(fs_info->sb_buffer),
  653. BTRFS_FSID_SIZE);
  654. disk_super = &fs_info->super_copy;
  655. if (!btrfs_super_root(disk_super))
  656. goto fail_sb_buffer;
  657. nodesize = btrfs_super_nodesize(disk_super);
  658. leafsize = btrfs_super_leafsize(disk_super);
  659. sectorsize = btrfs_super_sectorsize(disk_super);
  660. stripesize = btrfs_super_stripesize(disk_super);
  661. tree_root->nodesize = nodesize;
  662. tree_root->leafsize = leafsize;
  663. tree_root->sectorsize = sectorsize;
  664. tree_root->stripesize = stripesize;
  665. sb_set_blocksize(sb, sectorsize);
  666. i_size_write(fs_info->btree_inode,
  667. btrfs_super_total_bytes(disk_super));
  668. if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
  669. sizeof(disk_super->magic))) {
  670. printk("btrfs: valid FS not found on %s\n", sb->s_id);
  671. goto fail_sb_buffer;
  672. }
  673. blocksize = btrfs_level_size(tree_root,
  674. btrfs_super_root_level(disk_super));
  675. tree_root->node = read_tree_block(tree_root,
  676. btrfs_super_root(disk_super),
  677. blocksize);
  678. if (!tree_root->node)
  679. goto fail_sb_buffer;
  680. mutex_lock(&fs_info->fs_mutex);
  681. ret = find_and_setup_root(tree_root, fs_info,
  682. BTRFS_EXTENT_TREE_OBJECTID, extent_root);
  683. if (ret) {
  684. mutex_unlock(&fs_info->fs_mutex);
  685. goto fail_tree_root;
  686. }
  687. btrfs_read_block_groups(extent_root);
  688. fs_info->generation = btrfs_super_generation(disk_super) + 1;
  689. mutex_unlock(&fs_info->fs_mutex);
  690. return tree_root;
  691. fail_tree_root:
  692. free_extent_buffer(tree_root->node);
  693. fail_sb_buffer:
  694. free_extent_buffer(fs_info->sb_buffer);
  695. fail_iput:
  696. iput(fs_info->btree_inode);
  697. fail:
  698. kfree(extent_root);
  699. kfree(tree_root);
  700. kfree(fs_info);
  701. return ERR_PTR(err);
  702. }
  703. int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
  704. *root)
  705. {
  706. int ret;
  707. struct extent_buffer *super = root->fs_info->sb_buffer;
  708. struct inode *btree_inode = root->fs_info->btree_inode;
  709. struct super_block *sb = root->fs_info->sb;
  710. if (!btrfs_test_opt(root, NOBARRIER))
  711. blkdev_issue_flush(sb->s_bdev, NULL);
  712. set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super);
  713. ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
  714. super->start, super->len);
  715. if (!btrfs_test_opt(root, NOBARRIER))
  716. blkdev_issue_flush(sb->s_bdev, NULL);
  717. return ret;
  718. }
  719. int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
  720. {
  721. radix_tree_delete(&fs_info->fs_roots_radix,
  722. (unsigned long)root->root_key.objectid);
  723. if (root->in_sysfs)
  724. btrfs_sysfs_del_root(root);
  725. if (root->inode)
  726. iput(root->inode);
  727. if (root->node)
  728. free_extent_buffer(root->node);
  729. if (root->commit_root)
  730. free_extent_buffer(root->commit_root);
  731. if (root->name)
  732. kfree(root->name);
  733. kfree(root);
  734. return 0;
  735. }
  736. static int del_fs_roots(struct btrfs_fs_info *fs_info)
  737. {
  738. int ret;
  739. struct btrfs_root *gang[8];
  740. int i;
  741. while(1) {
  742. ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
  743. (void **)gang, 0,
  744. ARRAY_SIZE(gang));
  745. if (!ret)
  746. break;
  747. for (i = 0; i < ret; i++)
  748. btrfs_free_fs_root(fs_info, gang[i]);
  749. }
  750. return 0;
  751. }
  752. int close_ctree(struct btrfs_root *root)
  753. {
  754. int ret;
  755. struct btrfs_trans_handle *trans;
  756. struct btrfs_fs_info *fs_info = root->fs_info;
  757. fs_info->closing = 1;
  758. btrfs_transaction_flush_work(root);
  759. mutex_lock(&fs_info->fs_mutex);
  760. btrfs_defrag_dirty_roots(root->fs_info);
  761. trans = btrfs_start_transaction(root, 1);
  762. ret = btrfs_commit_transaction(trans, root);
  763. /* run commit again to drop the original snapshot */
  764. trans = btrfs_start_transaction(root, 1);
  765. btrfs_commit_transaction(trans, root);
  766. ret = btrfs_write_and_wait_transaction(NULL, root);
  767. BUG_ON(ret);
  768. write_ctree_super(NULL, root);
  769. mutex_unlock(&fs_info->fs_mutex);
  770. if (fs_info->delalloc_bytes) {
  771. printk("btrfs: at unmount delalloc count %Lu\n",
  772. fs_info->delalloc_bytes);
  773. }
  774. if (fs_info->extent_root->node)
  775. free_extent_buffer(fs_info->extent_root->node);
  776. if (fs_info->tree_root->node)
  777. free_extent_buffer(fs_info->tree_root->node);
  778. free_extent_buffer(fs_info->sb_buffer);
  779. btrfs_free_block_groups(root->fs_info);
  780. del_fs_roots(fs_info);
  781. filemap_write_and_wait(fs_info->btree_inode->i_mapping);
  782. extent_io_tree_empty_lru(&fs_info->free_space_cache);
  783. extent_io_tree_empty_lru(&fs_info->block_group_cache);
  784. extent_io_tree_empty_lru(&fs_info->pinned_extents);
  785. extent_io_tree_empty_lru(&fs_info->pending_del);
  786. extent_io_tree_empty_lru(&fs_info->extent_ins);
  787. extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
  788. truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
  789. iput(fs_info->btree_inode);
  790. #if 0
  791. while(!list_empty(&fs_info->hashers)) {
  792. struct btrfs_hasher *hasher;
  793. hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
  794. hashers);
  795. list_del(&hasher->hashers);
  796. crypto_free_hash(&fs_info->hash_tfm);
  797. kfree(hasher);
  798. }
  799. #endif
  800. kfree(fs_info->extent_root);
  801. kfree(fs_info->tree_root);
  802. return 0;
  803. }
  804. int btrfs_buffer_uptodate(struct extent_buffer *buf)
  805. {
  806. struct inode *btree_inode = buf->first_page->mapping->host;
  807. return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
  808. }
  809. int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
  810. {
  811. struct inode *btree_inode = buf->first_page->mapping->host;
  812. return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
  813. buf);
  814. }
  815. void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
  816. {
  817. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  818. u64 transid = btrfs_header_generation(buf);
  819. struct inode *btree_inode = root->fs_info->btree_inode;
  820. if (transid != root->fs_info->generation) {
  821. printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
  822. (unsigned long long)buf->start,
  823. transid, root->fs_info->generation);
  824. WARN_ON(1);
  825. }
  826. set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
  827. }
  828. void btrfs_throttle(struct btrfs_root *root)
  829. {
  830. struct backing_dev_info *bdi;
  831. bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
  832. if (root->fs_info->throttles && bdi_write_congested(bdi)) {
  833. #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
  834. congestion_wait(WRITE, HZ/20);
  835. #else
  836. blk_congestion_wait(WRITE, HZ/20);
  837. #endif
  838. }
  839. }
  840. void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
  841. {
  842. balance_dirty_pages_ratelimited_nr(
  843. root->fs_info->btree_inode->i_mapping, 1);
  844. }
  845. void btrfs_set_buffer_defrag(struct extent_buffer *buf)
  846. {
  847. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  848. struct inode *btree_inode = root->fs_info->btree_inode;
  849. set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
  850. buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
  851. }
  852. void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
  853. {
  854. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  855. struct inode *btree_inode = root->fs_info->btree_inode;
  856. set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
  857. buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
  858. GFP_NOFS);
  859. }
  860. int btrfs_buffer_defrag(struct extent_buffer *buf)
  861. {
  862. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  863. struct inode *btree_inode = root->fs_info->btree_inode;
  864. return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
  865. buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
  866. }
  867. int btrfs_buffer_defrag_done(struct extent_buffer *buf)
  868. {
  869. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  870. struct inode *btree_inode = root->fs_info->btree_inode;
  871. return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
  872. buf->start, buf->start + buf->len - 1,
  873. EXTENT_DEFRAG_DONE, 0);
  874. }
  875. int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
  876. {
  877. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  878. struct inode *btree_inode = root->fs_info->btree_inode;
  879. return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
  880. buf->start, buf->start + buf->len - 1,
  881. EXTENT_DEFRAG_DONE, GFP_NOFS);
  882. }
  883. int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
  884. {
  885. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  886. struct inode *btree_inode = root->fs_info->btree_inode;
  887. return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
  888. buf->start, buf->start + buf->len - 1,
  889. EXTENT_DEFRAG, GFP_NOFS);
  890. }
  891. int btrfs_read_buffer(struct extent_buffer *buf)
  892. {
  893. struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
  894. struct inode *btree_inode = root->fs_info->btree_inode;
  895. return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
  896. buf, 0, 1, btree_get_extent);
  897. }
  898. static struct extent_io_ops btree_extent_io_ops = {
  899. .writepage_io_hook = btree_writepage_io_hook,
  900. };