refcounttree.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * refcounttree.c
  5. *
  6. * Copyright (C) 2009 Oracle. All rights reserved.
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public
  10. * License version 2 as published by the Free Software Foundation.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. */
  17. #define MLOG_MASK_PREFIX ML_REFCOUNT
  18. #include <cluster/masklog.h>
  19. #include "ocfs2.h"
  20. #include "inode.h"
  21. #include "alloc.h"
  22. #include "suballoc.h"
  23. #include "journal.h"
  24. #include "uptodate.h"
  25. #include "super.h"
  26. #include "buffer_head_io.h"
  27. #include "blockcheck.h"
  28. #include "refcounttree.h"
  29. #include "sysfile.h"
  30. #include "dlmglue.h"
  31. static inline struct ocfs2_refcount_tree *
  32. cache_info_to_refcount(struct ocfs2_caching_info *ci)
  33. {
  34. return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
  35. }
  36. static int ocfs2_validate_refcount_block(struct super_block *sb,
  37. struct buffer_head *bh)
  38. {
  39. int rc;
  40. struct ocfs2_refcount_block *rb =
  41. (struct ocfs2_refcount_block *)bh->b_data;
  42. mlog(0, "Validating refcount block %llu\n",
  43. (unsigned long long)bh->b_blocknr);
  44. BUG_ON(!buffer_uptodate(bh));
  45. /*
  46. * If the ecc fails, we return the error but otherwise
  47. * leave the filesystem running. We know any error is
  48. * local to this block.
  49. */
  50. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
  51. if (rc) {
  52. mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
  53. (unsigned long long)bh->b_blocknr);
  54. return rc;
  55. }
  56. if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
  57. ocfs2_error(sb,
  58. "Refcount block #%llu has bad signature %.*s",
  59. (unsigned long long)bh->b_blocknr, 7,
  60. rb->rf_signature);
  61. return -EINVAL;
  62. }
  63. if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
  64. ocfs2_error(sb,
  65. "Refcount block #%llu has an invalid rf_blkno "
  66. "of %llu",
  67. (unsigned long long)bh->b_blocknr,
  68. (unsigned long long)le64_to_cpu(rb->rf_blkno));
  69. return -EINVAL;
  70. }
  71. if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
  72. ocfs2_error(sb,
  73. "Refcount block #%llu has an invalid "
  74. "rf_fs_generation of #%u",
  75. (unsigned long long)bh->b_blocknr,
  76. le32_to_cpu(rb->rf_fs_generation));
  77. return -EINVAL;
  78. }
  79. return 0;
  80. }
  81. static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
  82. u64 rb_blkno,
  83. struct buffer_head **bh)
  84. {
  85. int rc;
  86. struct buffer_head *tmp = *bh;
  87. rc = ocfs2_read_block(ci, rb_blkno, &tmp,
  88. ocfs2_validate_refcount_block);
  89. /* If ocfs2_read_block() got us a new bh, pass it up. */
  90. if (!rc && !*bh)
  91. *bh = tmp;
  92. return rc;
  93. }
  94. static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
  95. {
  96. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  97. return rf->rf_blkno;
  98. }
  99. static struct super_block *
  100. ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
  101. {
  102. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  103. return rf->rf_sb;
  104. }
  105. static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
  106. {
  107. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  108. spin_lock(&rf->rf_lock);
  109. }
  110. static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
  111. {
  112. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  113. spin_unlock(&rf->rf_lock);
  114. }
  115. static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
  116. {
  117. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  118. mutex_lock(&rf->rf_io_mutex);
  119. }
  120. static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
  121. {
  122. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  123. mutex_unlock(&rf->rf_io_mutex);
  124. }
  125. static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
  126. .co_owner = ocfs2_refcount_cache_owner,
  127. .co_get_super = ocfs2_refcount_cache_get_super,
  128. .co_cache_lock = ocfs2_refcount_cache_lock,
  129. .co_cache_unlock = ocfs2_refcount_cache_unlock,
  130. .co_io_lock = ocfs2_refcount_cache_io_lock,
  131. .co_io_unlock = ocfs2_refcount_cache_io_unlock,
  132. };
  133. static struct ocfs2_refcount_tree *
  134. ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
  135. {
  136. struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
  137. struct ocfs2_refcount_tree *tree = NULL;
  138. while (n) {
  139. tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
  140. if (blkno < tree->rf_blkno)
  141. n = n->rb_left;
  142. else if (blkno > tree->rf_blkno)
  143. n = n->rb_right;
  144. else
  145. return tree;
  146. }
  147. return NULL;
  148. }
  149. /* osb_lock is already locked. */
  150. static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
  151. struct ocfs2_refcount_tree *new)
  152. {
  153. u64 rf_blkno = new->rf_blkno;
  154. struct rb_node *parent = NULL;
  155. struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
  156. struct ocfs2_refcount_tree *tmp;
  157. while (*p) {
  158. parent = *p;
  159. tmp = rb_entry(parent, struct ocfs2_refcount_tree,
  160. rf_node);
  161. if (rf_blkno < tmp->rf_blkno)
  162. p = &(*p)->rb_left;
  163. else if (rf_blkno > tmp->rf_blkno)
  164. p = &(*p)->rb_right;
  165. else {
  166. /* This should never happen! */
  167. mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
  168. (unsigned long long)rf_blkno);
  169. BUG();
  170. }
  171. }
  172. rb_link_node(&new->rf_node, parent, p);
  173. rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
  174. }
  175. static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
  176. {
  177. ocfs2_metadata_cache_exit(&tree->rf_ci);
  178. ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
  179. ocfs2_lock_res_free(&tree->rf_lockres);
  180. kfree(tree);
  181. }
  182. static inline void
  183. ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
  184. struct ocfs2_refcount_tree *tree)
  185. {
  186. rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
  187. if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
  188. osb->osb_ref_tree_lru = NULL;
  189. }
  190. static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
  191. struct ocfs2_refcount_tree *tree)
  192. {
  193. spin_lock(&osb->osb_lock);
  194. ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
  195. spin_unlock(&osb->osb_lock);
  196. }
  197. void ocfs2_kref_remove_refcount_tree(struct kref *kref)
  198. {
  199. struct ocfs2_refcount_tree *tree =
  200. container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
  201. ocfs2_free_refcount_tree(tree);
  202. }
  203. static inline void
  204. ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
  205. {
  206. kref_get(&tree->rf_getcnt);
  207. }
  208. static inline void
  209. ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
  210. {
  211. kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
  212. }
  213. static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
  214. struct super_block *sb)
  215. {
  216. ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
  217. mutex_init(&new->rf_io_mutex);
  218. new->rf_sb = sb;
  219. spin_lock_init(&new->rf_lock);
  220. }
  221. static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
  222. struct ocfs2_refcount_tree *new,
  223. u64 rf_blkno, u32 generation)
  224. {
  225. init_rwsem(&new->rf_sem);
  226. ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
  227. rf_blkno, generation);
  228. }
  229. static struct ocfs2_refcount_tree*
  230. ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
  231. {
  232. struct ocfs2_refcount_tree *new;
  233. new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
  234. if (!new)
  235. return NULL;
  236. new->rf_blkno = rf_blkno;
  237. kref_init(&new->rf_getcnt);
  238. ocfs2_init_refcount_tree_ci(new, osb->sb);
  239. return new;
  240. }
  241. static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
  242. struct ocfs2_refcount_tree **ret_tree)
  243. {
  244. int ret = 0;
  245. struct ocfs2_refcount_tree *tree, *new = NULL;
  246. struct buffer_head *ref_root_bh = NULL;
  247. struct ocfs2_refcount_block *ref_rb;
  248. spin_lock(&osb->osb_lock);
  249. if (osb->osb_ref_tree_lru &&
  250. osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
  251. tree = osb->osb_ref_tree_lru;
  252. else
  253. tree = ocfs2_find_refcount_tree(osb, rf_blkno);
  254. if (tree)
  255. goto out;
  256. spin_unlock(&osb->osb_lock);
  257. new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
  258. if (!new) {
  259. ret = -ENOMEM;
  260. mlog_errno(ret);
  261. return ret;
  262. }
  263. /*
  264. * We need the generation to create the refcount tree lock and since
  265. * it isn't changed during the tree modification, we are safe here to
  266. * read without protection.
  267. * We also have to purge the cache after we create the lock since the
  268. * refcount block may have the stale data. It can only be trusted when
  269. * we hold the refcount lock.
  270. */
  271. ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
  272. if (ret) {
  273. mlog_errno(ret);
  274. ocfs2_metadata_cache_exit(&new->rf_ci);
  275. kfree(new);
  276. return ret;
  277. }
  278. ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  279. new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
  280. ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
  281. new->rf_generation);
  282. ocfs2_metadata_cache_purge(&new->rf_ci);
  283. spin_lock(&osb->osb_lock);
  284. tree = ocfs2_find_refcount_tree(osb, rf_blkno);
  285. if (tree)
  286. goto out;
  287. ocfs2_insert_refcount_tree(osb, new);
  288. tree = new;
  289. new = NULL;
  290. out:
  291. *ret_tree = tree;
  292. osb->osb_ref_tree_lru = tree;
  293. spin_unlock(&osb->osb_lock);
  294. if (new)
  295. ocfs2_free_refcount_tree(new);
  296. brelse(ref_root_bh);
  297. return ret;
  298. }
  299. static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
  300. {
  301. int ret;
  302. struct buffer_head *di_bh = NULL;
  303. struct ocfs2_dinode *di;
  304. ret = ocfs2_read_inode_block(inode, &di_bh);
  305. if (ret) {
  306. mlog_errno(ret);
  307. goto out;
  308. }
  309. BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
  310. di = (struct ocfs2_dinode *)di_bh->b_data;
  311. *ref_blkno = le64_to_cpu(di->i_refcount_loc);
  312. brelse(di_bh);
  313. out:
  314. return ret;
  315. }
  316. static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
  317. struct ocfs2_refcount_tree *tree, int rw)
  318. {
  319. int ret;
  320. ret = ocfs2_refcount_lock(tree, rw);
  321. if (ret) {
  322. mlog_errno(ret);
  323. goto out;
  324. }
  325. if (rw)
  326. down_write(&tree->rf_sem);
  327. else
  328. down_read(&tree->rf_sem);
  329. out:
  330. return ret;
  331. }
  332. /*
  333. * Lock the refcount tree pointed by ref_blkno and return the tree.
  334. * In most case, we lock the tree and read the refcount block.
  335. * So read it here if the caller really needs it.
  336. *
  337. * If the tree has been re-created by other node, it will free the
  338. * old one and re-create it.
  339. */
  340. int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
  341. u64 ref_blkno, int rw,
  342. struct ocfs2_refcount_tree **ret_tree,
  343. struct buffer_head **ref_bh)
  344. {
  345. int ret, delete_tree = 0;
  346. struct ocfs2_refcount_tree *tree = NULL;
  347. struct buffer_head *ref_root_bh = NULL;
  348. struct ocfs2_refcount_block *rb;
  349. again:
  350. ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
  351. if (ret) {
  352. mlog_errno(ret);
  353. return ret;
  354. }
  355. ocfs2_refcount_tree_get(tree);
  356. ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
  357. if (ret) {
  358. mlog_errno(ret);
  359. ocfs2_refcount_tree_put(tree);
  360. goto out;
  361. }
  362. ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
  363. &ref_root_bh);
  364. if (ret) {
  365. mlog_errno(ret);
  366. ocfs2_unlock_refcount_tree(osb, tree, rw);
  367. ocfs2_refcount_tree_put(tree);
  368. goto out;
  369. }
  370. rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  371. /*
  372. * If the refcount block has been freed and re-created, we may need
  373. * to recreate the refcount tree also.
  374. *
  375. * Here we just remove the tree from the rb-tree, and the last
  376. * kref holder will unlock and delete this refcount_tree.
  377. * Then we goto "again" and ocfs2_get_refcount_tree will create
  378. * the new refcount tree for us.
  379. */
  380. if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
  381. if (!tree->rf_removed) {
  382. ocfs2_erase_refcount_tree_from_list(osb, tree);
  383. tree->rf_removed = 1;
  384. delete_tree = 1;
  385. }
  386. ocfs2_unlock_refcount_tree(osb, tree, rw);
  387. /*
  388. * We get an extra reference when we create the refcount
  389. * tree, so another put will destroy it.
  390. */
  391. if (delete_tree)
  392. ocfs2_refcount_tree_put(tree);
  393. brelse(ref_root_bh);
  394. ref_root_bh = NULL;
  395. goto again;
  396. }
  397. *ret_tree = tree;
  398. if (ref_bh) {
  399. *ref_bh = ref_root_bh;
  400. ref_root_bh = NULL;
  401. }
  402. out:
  403. brelse(ref_root_bh);
  404. return ret;
  405. }
  406. int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
  407. struct ocfs2_refcount_tree **ret_tree,
  408. struct buffer_head **ref_bh)
  409. {
  410. int ret;
  411. u64 ref_blkno;
  412. ret = ocfs2_get_refcount_block(inode, &ref_blkno);
  413. if (ret) {
  414. mlog_errno(ret);
  415. return ret;
  416. }
  417. return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
  418. rw, ret_tree, ref_bh);
  419. }
  420. void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
  421. struct ocfs2_refcount_tree *tree, int rw)
  422. {
  423. if (rw)
  424. up_write(&tree->rf_sem);
  425. else
  426. up_read(&tree->rf_sem);
  427. ocfs2_refcount_unlock(tree, rw);
  428. ocfs2_refcount_tree_put(tree);
  429. }
  430. void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
  431. {
  432. struct rb_node *node;
  433. struct ocfs2_refcount_tree *tree;
  434. struct rb_root *root = &osb->osb_rf_lock_tree;
  435. while ((node = rb_last(root)) != NULL) {
  436. tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
  437. mlog(0, "Purge tree %llu\n",
  438. (unsigned long long) tree->rf_blkno);
  439. rb_erase(&tree->rf_node, root);
  440. ocfs2_free_refcount_tree(tree);
  441. }
  442. }
  443. /*
  444. * Create a refcount tree for an inode.
  445. * We take for granted that the inode is already locked.
  446. */
  447. static int ocfs2_create_refcount_tree(struct inode *inode,
  448. struct buffer_head *di_bh)
  449. {
  450. int ret;
  451. handle_t *handle = NULL;
  452. struct ocfs2_alloc_context *meta_ac = NULL;
  453. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  454. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  455. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  456. struct buffer_head *new_bh = NULL;
  457. struct ocfs2_refcount_block *rb;
  458. struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
  459. u16 suballoc_bit_start;
  460. u32 num_got;
  461. u64 first_blkno;
  462. BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
  463. mlog(0, "create tree for inode %lu\n", inode->i_ino);
  464. ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
  465. if (ret) {
  466. mlog_errno(ret);
  467. goto out;
  468. }
  469. handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
  470. if (IS_ERR(handle)) {
  471. ret = PTR_ERR(handle);
  472. mlog_errno(ret);
  473. goto out;
  474. }
  475. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  476. OCFS2_JOURNAL_ACCESS_WRITE);
  477. if (ret) {
  478. mlog_errno(ret);
  479. goto out_commit;
  480. }
  481. ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
  482. &suballoc_bit_start, &num_got,
  483. &first_blkno);
  484. if (ret) {
  485. mlog_errno(ret);
  486. goto out_commit;
  487. }
  488. new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
  489. if (!new_tree) {
  490. ret = -ENOMEM;
  491. mlog_errno(ret);
  492. goto out_commit;
  493. }
  494. new_bh = sb_getblk(inode->i_sb, first_blkno);
  495. ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
  496. ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
  497. OCFS2_JOURNAL_ACCESS_CREATE);
  498. if (ret) {
  499. mlog_errno(ret);
  500. goto out_commit;
  501. }
  502. /* Initialize ocfs2_refcount_block. */
  503. rb = (struct ocfs2_refcount_block *)new_bh->b_data;
  504. memset(rb, 0, inode->i_sb->s_blocksize);
  505. strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
  506. rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
  507. rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  508. rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
  509. rb->rf_blkno = cpu_to_le64(first_blkno);
  510. rb->rf_count = cpu_to_le32(1);
  511. rb->rf_records.rl_count =
  512. cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
  513. spin_lock(&osb->osb_lock);
  514. rb->rf_generation = osb->s_next_generation++;
  515. spin_unlock(&osb->osb_lock);
  516. ocfs2_journal_dirty(handle, new_bh);
  517. spin_lock(&oi->ip_lock);
  518. oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
  519. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  520. di->i_refcount_loc = cpu_to_le64(first_blkno);
  521. spin_unlock(&oi->ip_lock);
  522. mlog(0, "created tree for inode %lu, refblock %llu\n",
  523. inode->i_ino, (unsigned long long)first_blkno);
  524. ocfs2_journal_dirty(handle, di_bh);
  525. /*
  526. * We have to init the tree lock here since it will use
  527. * the generation number to create it.
  528. */
  529. new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
  530. ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
  531. new_tree->rf_generation);
  532. spin_lock(&osb->osb_lock);
  533. tree = ocfs2_find_refcount_tree(osb, first_blkno);
  534. /*
  535. * We've just created a new refcount tree in this block. If
  536. * we found a refcount tree on the ocfs2_super, it must be
  537. * one we just deleted. We free the old tree before
  538. * inserting the new tree.
  539. */
  540. BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
  541. if (tree)
  542. ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
  543. ocfs2_insert_refcount_tree(osb, new_tree);
  544. spin_unlock(&osb->osb_lock);
  545. new_tree = NULL;
  546. if (tree)
  547. ocfs2_refcount_tree_put(tree);
  548. out_commit:
  549. ocfs2_commit_trans(osb, handle);
  550. out:
  551. if (new_tree) {
  552. ocfs2_metadata_cache_exit(&new_tree->rf_ci);
  553. kfree(new_tree);
  554. }
  555. brelse(new_bh);
  556. if (meta_ac)
  557. ocfs2_free_alloc_context(meta_ac);
  558. return ret;
  559. }
  560. static int ocfs2_set_refcount_tree(struct inode *inode,
  561. struct buffer_head *di_bh,
  562. u64 refcount_loc)
  563. {
  564. int ret;
  565. handle_t *handle = NULL;
  566. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  567. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  568. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  569. struct buffer_head *ref_root_bh = NULL;
  570. struct ocfs2_refcount_block *rb;
  571. struct ocfs2_refcount_tree *ref_tree;
  572. BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
  573. ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
  574. &ref_tree, &ref_root_bh);
  575. if (ret) {
  576. mlog_errno(ret);
  577. return ret;
  578. }
  579. handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
  580. if (IS_ERR(handle)) {
  581. ret = PTR_ERR(handle);
  582. mlog_errno(ret);
  583. goto out;
  584. }
  585. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  586. OCFS2_JOURNAL_ACCESS_WRITE);
  587. if (ret) {
  588. mlog_errno(ret);
  589. goto out_commit;
  590. }
  591. ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
  592. OCFS2_JOURNAL_ACCESS_WRITE);
  593. if (ret) {
  594. mlog_errno(ret);
  595. goto out_commit;
  596. }
  597. rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  598. le32_add_cpu(&rb->rf_count, 1);
  599. ocfs2_journal_dirty(handle, ref_root_bh);
  600. spin_lock(&oi->ip_lock);
  601. oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
  602. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  603. di->i_refcount_loc = cpu_to_le64(refcount_loc);
  604. spin_unlock(&oi->ip_lock);
  605. ocfs2_journal_dirty(handle, di_bh);
  606. out_commit:
  607. ocfs2_commit_trans(osb, handle);
  608. out:
  609. ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  610. brelse(ref_root_bh);
  611. return ret;
  612. }
  613. int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
  614. {
  615. int ret, delete_tree = 0;
  616. handle_t *handle = NULL;
  617. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  618. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  619. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  620. struct ocfs2_refcount_block *rb;
  621. struct inode *alloc_inode = NULL;
  622. struct buffer_head *alloc_bh = NULL;
  623. struct buffer_head *blk_bh = NULL;
  624. struct ocfs2_refcount_tree *ref_tree;
  625. int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
  626. u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
  627. u16 bit = 0;
  628. if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
  629. return 0;
  630. BUG_ON(!ref_blkno);
  631. ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
  632. if (ret) {
  633. mlog_errno(ret);
  634. return ret;
  635. }
  636. rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
  637. /*
  638. * If we are the last user, we need to free the block.
  639. * So lock the allocator ahead.
  640. */
  641. if (le32_to_cpu(rb->rf_count) == 1) {
  642. blk = le64_to_cpu(rb->rf_blkno);
  643. bit = le16_to_cpu(rb->rf_suballoc_bit);
  644. bg_blkno = ocfs2_which_suballoc_group(blk, bit);
  645. alloc_inode = ocfs2_get_system_file_inode(osb,
  646. EXTENT_ALLOC_SYSTEM_INODE,
  647. le16_to_cpu(rb->rf_suballoc_slot));
  648. if (!alloc_inode) {
  649. ret = -ENOMEM;
  650. mlog_errno(ret);
  651. goto out;
  652. }
  653. mutex_lock(&alloc_inode->i_mutex);
  654. ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
  655. if (ret) {
  656. mlog_errno(ret);
  657. goto out_mutex;
  658. }
  659. credits += OCFS2_SUBALLOC_FREE;
  660. }
  661. handle = ocfs2_start_trans(osb, credits);
  662. if (IS_ERR(handle)) {
  663. ret = PTR_ERR(handle);
  664. mlog_errno(ret);
  665. goto out_unlock;
  666. }
  667. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  668. OCFS2_JOURNAL_ACCESS_WRITE);
  669. if (ret) {
  670. mlog_errno(ret);
  671. goto out_commit;
  672. }
  673. ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
  674. OCFS2_JOURNAL_ACCESS_WRITE);
  675. if (ret) {
  676. mlog_errno(ret);
  677. goto out_commit;
  678. }
  679. spin_lock(&oi->ip_lock);
  680. oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
  681. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  682. di->i_refcount_loc = 0;
  683. spin_unlock(&oi->ip_lock);
  684. ocfs2_journal_dirty(handle, di_bh);
  685. le32_add_cpu(&rb->rf_count , -1);
  686. ocfs2_journal_dirty(handle, blk_bh);
  687. if (!rb->rf_count) {
  688. delete_tree = 1;
  689. ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
  690. ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
  691. alloc_bh, bit, bg_blkno, 1);
  692. if (ret)
  693. mlog_errno(ret);
  694. }
  695. out_commit:
  696. ocfs2_commit_trans(osb, handle);
  697. out_unlock:
  698. if (alloc_inode) {
  699. ocfs2_inode_unlock(alloc_inode, 1);
  700. brelse(alloc_bh);
  701. }
  702. out_mutex:
  703. if (alloc_inode) {
  704. mutex_unlock(&alloc_inode->i_mutex);
  705. iput(alloc_inode);
  706. }
  707. out:
  708. ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  709. if (delete_tree)
  710. ocfs2_refcount_tree_put(ref_tree);
  711. brelse(blk_bh);
  712. return ret;
  713. }