refcounttree.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * refcounttree.c
  5. *
  6. * Copyright (C) 2009 Oracle. All rights reserved.
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public
  10. * License version 2 as published by the Free Software Foundation.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. */
  17. #include <linux/sort.h>
  18. #define MLOG_MASK_PREFIX ML_REFCOUNT
  19. #include <cluster/masklog.h>
  20. #include "ocfs2.h"
  21. #include "inode.h"
  22. #include "alloc.h"
  23. #include "suballoc.h"
  24. #include "journal.h"
  25. #include "uptodate.h"
  26. #include "super.h"
  27. #include "buffer_head_io.h"
  28. #include "blockcheck.h"
  29. #include "refcounttree.h"
  30. #include "sysfile.h"
  31. #include "dlmglue.h"
  32. #include "extent_map.h"
  33. static inline struct ocfs2_refcount_tree *
  34. cache_info_to_refcount(struct ocfs2_caching_info *ci)
  35. {
  36. return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
  37. }
  38. static int ocfs2_validate_refcount_block(struct super_block *sb,
  39. struct buffer_head *bh)
  40. {
  41. int rc;
  42. struct ocfs2_refcount_block *rb =
  43. (struct ocfs2_refcount_block *)bh->b_data;
  44. mlog(0, "Validating refcount block %llu\n",
  45. (unsigned long long)bh->b_blocknr);
  46. BUG_ON(!buffer_uptodate(bh));
  47. /*
  48. * If the ecc fails, we return the error but otherwise
  49. * leave the filesystem running. We know any error is
  50. * local to this block.
  51. */
  52. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
  53. if (rc) {
  54. mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
  55. (unsigned long long)bh->b_blocknr);
  56. return rc;
  57. }
  58. if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
  59. ocfs2_error(sb,
  60. "Refcount block #%llu has bad signature %.*s",
  61. (unsigned long long)bh->b_blocknr, 7,
  62. rb->rf_signature);
  63. return -EINVAL;
  64. }
  65. if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
  66. ocfs2_error(sb,
  67. "Refcount block #%llu has an invalid rf_blkno "
  68. "of %llu",
  69. (unsigned long long)bh->b_blocknr,
  70. (unsigned long long)le64_to_cpu(rb->rf_blkno));
  71. return -EINVAL;
  72. }
  73. if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
  74. ocfs2_error(sb,
  75. "Refcount block #%llu has an invalid "
  76. "rf_fs_generation of #%u",
  77. (unsigned long long)bh->b_blocknr,
  78. le32_to_cpu(rb->rf_fs_generation));
  79. return -EINVAL;
  80. }
  81. return 0;
  82. }
  83. static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
  84. u64 rb_blkno,
  85. struct buffer_head **bh)
  86. {
  87. int rc;
  88. struct buffer_head *tmp = *bh;
  89. rc = ocfs2_read_block(ci, rb_blkno, &tmp,
  90. ocfs2_validate_refcount_block);
  91. /* If ocfs2_read_block() got us a new bh, pass it up. */
  92. if (!rc && !*bh)
  93. *bh = tmp;
  94. return rc;
  95. }
  96. static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
  97. {
  98. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  99. return rf->rf_blkno;
  100. }
  101. static struct super_block *
  102. ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
  103. {
  104. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  105. return rf->rf_sb;
  106. }
  107. static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
  108. {
  109. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  110. spin_lock(&rf->rf_lock);
  111. }
  112. static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
  113. {
  114. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  115. spin_unlock(&rf->rf_lock);
  116. }
  117. static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
  118. {
  119. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  120. mutex_lock(&rf->rf_io_mutex);
  121. }
  122. static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
  123. {
  124. struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
  125. mutex_unlock(&rf->rf_io_mutex);
  126. }
  127. static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
  128. .co_owner = ocfs2_refcount_cache_owner,
  129. .co_get_super = ocfs2_refcount_cache_get_super,
  130. .co_cache_lock = ocfs2_refcount_cache_lock,
  131. .co_cache_unlock = ocfs2_refcount_cache_unlock,
  132. .co_io_lock = ocfs2_refcount_cache_io_lock,
  133. .co_io_unlock = ocfs2_refcount_cache_io_unlock,
  134. };
  135. static struct ocfs2_refcount_tree *
  136. ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
  137. {
  138. struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
  139. struct ocfs2_refcount_tree *tree = NULL;
  140. while (n) {
  141. tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
  142. if (blkno < tree->rf_blkno)
  143. n = n->rb_left;
  144. else if (blkno > tree->rf_blkno)
  145. n = n->rb_right;
  146. else
  147. return tree;
  148. }
  149. return NULL;
  150. }
  151. /* osb_lock is already locked. */
  152. static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
  153. struct ocfs2_refcount_tree *new)
  154. {
  155. u64 rf_blkno = new->rf_blkno;
  156. struct rb_node *parent = NULL;
  157. struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
  158. struct ocfs2_refcount_tree *tmp;
  159. while (*p) {
  160. parent = *p;
  161. tmp = rb_entry(parent, struct ocfs2_refcount_tree,
  162. rf_node);
  163. if (rf_blkno < tmp->rf_blkno)
  164. p = &(*p)->rb_left;
  165. else if (rf_blkno > tmp->rf_blkno)
  166. p = &(*p)->rb_right;
  167. else {
  168. /* This should never happen! */
  169. mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
  170. (unsigned long long)rf_blkno);
  171. BUG();
  172. }
  173. }
  174. rb_link_node(&new->rf_node, parent, p);
  175. rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
  176. }
  177. static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
  178. {
  179. ocfs2_metadata_cache_exit(&tree->rf_ci);
  180. ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
  181. ocfs2_lock_res_free(&tree->rf_lockres);
  182. kfree(tree);
  183. }
  184. static inline void
  185. ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
  186. struct ocfs2_refcount_tree *tree)
  187. {
  188. rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
  189. if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
  190. osb->osb_ref_tree_lru = NULL;
  191. }
  192. static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
  193. struct ocfs2_refcount_tree *tree)
  194. {
  195. spin_lock(&osb->osb_lock);
  196. ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
  197. spin_unlock(&osb->osb_lock);
  198. }
  199. void ocfs2_kref_remove_refcount_tree(struct kref *kref)
  200. {
  201. struct ocfs2_refcount_tree *tree =
  202. container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
  203. ocfs2_free_refcount_tree(tree);
  204. }
  205. static inline void
  206. ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
  207. {
  208. kref_get(&tree->rf_getcnt);
  209. }
  210. static inline void
  211. ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
  212. {
  213. kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
  214. }
  215. static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
  216. struct super_block *sb)
  217. {
  218. ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
  219. mutex_init(&new->rf_io_mutex);
  220. new->rf_sb = sb;
  221. spin_lock_init(&new->rf_lock);
  222. }
  223. static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
  224. struct ocfs2_refcount_tree *new,
  225. u64 rf_blkno, u32 generation)
  226. {
  227. init_rwsem(&new->rf_sem);
  228. ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
  229. rf_blkno, generation);
  230. }
  231. static struct ocfs2_refcount_tree*
  232. ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
  233. {
  234. struct ocfs2_refcount_tree *new;
  235. new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
  236. if (!new)
  237. return NULL;
  238. new->rf_blkno = rf_blkno;
  239. kref_init(&new->rf_getcnt);
  240. ocfs2_init_refcount_tree_ci(new, osb->sb);
  241. return new;
  242. }
  243. static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
  244. struct ocfs2_refcount_tree **ret_tree)
  245. {
  246. int ret = 0;
  247. struct ocfs2_refcount_tree *tree, *new = NULL;
  248. struct buffer_head *ref_root_bh = NULL;
  249. struct ocfs2_refcount_block *ref_rb;
  250. spin_lock(&osb->osb_lock);
  251. if (osb->osb_ref_tree_lru &&
  252. osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
  253. tree = osb->osb_ref_tree_lru;
  254. else
  255. tree = ocfs2_find_refcount_tree(osb, rf_blkno);
  256. if (tree)
  257. goto out;
  258. spin_unlock(&osb->osb_lock);
  259. new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
  260. if (!new) {
  261. ret = -ENOMEM;
  262. mlog_errno(ret);
  263. return ret;
  264. }
  265. /*
  266. * We need the generation to create the refcount tree lock and since
  267. * it isn't changed during the tree modification, we are safe here to
  268. * read without protection.
  269. * We also have to purge the cache after we create the lock since the
  270. * refcount block may have the stale data. It can only be trusted when
  271. * we hold the refcount lock.
  272. */
  273. ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
  274. if (ret) {
  275. mlog_errno(ret);
  276. ocfs2_metadata_cache_exit(&new->rf_ci);
  277. kfree(new);
  278. return ret;
  279. }
  280. ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  281. new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
  282. ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
  283. new->rf_generation);
  284. ocfs2_metadata_cache_purge(&new->rf_ci);
  285. spin_lock(&osb->osb_lock);
  286. tree = ocfs2_find_refcount_tree(osb, rf_blkno);
  287. if (tree)
  288. goto out;
  289. ocfs2_insert_refcount_tree(osb, new);
  290. tree = new;
  291. new = NULL;
  292. out:
  293. *ret_tree = tree;
  294. osb->osb_ref_tree_lru = tree;
  295. spin_unlock(&osb->osb_lock);
  296. if (new)
  297. ocfs2_free_refcount_tree(new);
  298. brelse(ref_root_bh);
  299. return ret;
  300. }
  301. static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
  302. {
  303. int ret;
  304. struct buffer_head *di_bh = NULL;
  305. struct ocfs2_dinode *di;
  306. ret = ocfs2_read_inode_block(inode, &di_bh);
  307. if (ret) {
  308. mlog_errno(ret);
  309. goto out;
  310. }
  311. BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
  312. di = (struct ocfs2_dinode *)di_bh->b_data;
  313. *ref_blkno = le64_to_cpu(di->i_refcount_loc);
  314. brelse(di_bh);
  315. out:
  316. return ret;
  317. }
  318. static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
  319. struct ocfs2_refcount_tree *tree, int rw)
  320. {
  321. int ret;
  322. ret = ocfs2_refcount_lock(tree, rw);
  323. if (ret) {
  324. mlog_errno(ret);
  325. goto out;
  326. }
  327. if (rw)
  328. down_write(&tree->rf_sem);
  329. else
  330. down_read(&tree->rf_sem);
  331. out:
  332. return ret;
  333. }
  334. /*
  335. * Lock the refcount tree pointed by ref_blkno and return the tree.
  336. * In most case, we lock the tree and read the refcount block.
  337. * So read it here if the caller really needs it.
  338. *
  339. * If the tree has been re-created by other node, it will free the
  340. * old one and re-create it.
  341. */
  342. int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
  343. u64 ref_blkno, int rw,
  344. struct ocfs2_refcount_tree **ret_tree,
  345. struct buffer_head **ref_bh)
  346. {
  347. int ret, delete_tree = 0;
  348. struct ocfs2_refcount_tree *tree = NULL;
  349. struct buffer_head *ref_root_bh = NULL;
  350. struct ocfs2_refcount_block *rb;
  351. again:
  352. ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
  353. if (ret) {
  354. mlog_errno(ret);
  355. return ret;
  356. }
  357. ocfs2_refcount_tree_get(tree);
  358. ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
  359. if (ret) {
  360. mlog_errno(ret);
  361. ocfs2_refcount_tree_put(tree);
  362. goto out;
  363. }
  364. ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
  365. &ref_root_bh);
  366. if (ret) {
  367. mlog_errno(ret);
  368. ocfs2_unlock_refcount_tree(osb, tree, rw);
  369. ocfs2_refcount_tree_put(tree);
  370. goto out;
  371. }
  372. rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  373. /*
  374. * If the refcount block has been freed and re-created, we may need
  375. * to recreate the refcount tree also.
  376. *
  377. * Here we just remove the tree from the rb-tree, and the last
  378. * kref holder will unlock and delete this refcount_tree.
  379. * Then we goto "again" and ocfs2_get_refcount_tree will create
  380. * the new refcount tree for us.
  381. */
  382. if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
  383. if (!tree->rf_removed) {
  384. ocfs2_erase_refcount_tree_from_list(osb, tree);
  385. tree->rf_removed = 1;
  386. delete_tree = 1;
  387. }
  388. ocfs2_unlock_refcount_tree(osb, tree, rw);
  389. /*
  390. * We get an extra reference when we create the refcount
  391. * tree, so another put will destroy it.
  392. */
  393. if (delete_tree)
  394. ocfs2_refcount_tree_put(tree);
  395. brelse(ref_root_bh);
  396. ref_root_bh = NULL;
  397. goto again;
  398. }
  399. *ret_tree = tree;
  400. if (ref_bh) {
  401. *ref_bh = ref_root_bh;
  402. ref_root_bh = NULL;
  403. }
  404. out:
  405. brelse(ref_root_bh);
  406. return ret;
  407. }
  408. int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
  409. struct ocfs2_refcount_tree **ret_tree,
  410. struct buffer_head **ref_bh)
  411. {
  412. int ret;
  413. u64 ref_blkno;
  414. ret = ocfs2_get_refcount_block(inode, &ref_blkno);
  415. if (ret) {
  416. mlog_errno(ret);
  417. return ret;
  418. }
  419. return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
  420. rw, ret_tree, ref_bh);
  421. }
  422. void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
  423. struct ocfs2_refcount_tree *tree, int rw)
  424. {
  425. if (rw)
  426. up_write(&tree->rf_sem);
  427. else
  428. up_read(&tree->rf_sem);
  429. ocfs2_refcount_unlock(tree, rw);
  430. ocfs2_refcount_tree_put(tree);
  431. }
  432. void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
  433. {
  434. struct rb_node *node;
  435. struct ocfs2_refcount_tree *tree;
  436. struct rb_root *root = &osb->osb_rf_lock_tree;
  437. while ((node = rb_last(root)) != NULL) {
  438. tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
  439. mlog(0, "Purge tree %llu\n",
  440. (unsigned long long) tree->rf_blkno);
  441. rb_erase(&tree->rf_node, root);
  442. ocfs2_free_refcount_tree(tree);
  443. }
  444. }
  445. /*
  446. * Create a refcount tree for an inode.
  447. * We take for granted that the inode is already locked.
  448. */
  449. static int ocfs2_create_refcount_tree(struct inode *inode,
  450. struct buffer_head *di_bh)
  451. {
  452. int ret;
  453. handle_t *handle = NULL;
  454. struct ocfs2_alloc_context *meta_ac = NULL;
  455. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  456. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  457. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  458. struct buffer_head *new_bh = NULL;
  459. struct ocfs2_refcount_block *rb;
  460. struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
  461. u16 suballoc_bit_start;
  462. u32 num_got;
  463. u64 first_blkno;
  464. BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
  465. mlog(0, "create tree for inode %lu\n", inode->i_ino);
  466. ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
  467. if (ret) {
  468. mlog_errno(ret);
  469. goto out;
  470. }
  471. handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
  472. if (IS_ERR(handle)) {
  473. ret = PTR_ERR(handle);
  474. mlog_errno(ret);
  475. goto out;
  476. }
  477. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  478. OCFS2_JOURNAL_ACCESS_WRITE);
  479. if (ret) {
  480. mlog_errno(ret);
  481. goto out_commit;
  482. }
  483. ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
  484. &suballoc_bit_start, &num_got,
  485. &first_blkno);
  486. if (ret) {
  487. mlog_errno(ret);
  488. goto out_commit;
  489. }
  490. new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
  491. if (!new_tree) {
  492. ret = -ENOMEM;
  493. mlog_errno(ret);
  494. goto out_commit;
  495. }
  496. new_bh = sb_getblk(inode->i_sb, first_blkno);
  497. ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
  498. ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
  499. OCFS2_JOURNAL_ACCESS_CREATE);
  500. if (ret) {
  501. mlog_errno(ret);
  502. goto out_commit;
  503. }
  504. /* Initialize ocfs2_refcount_block. */
  505. rb = (struct ocfs2_refcount_block *)new_bh->b_data;
  506. memset(rb, 0, inode->i_sb->s_blocksize);
  507. strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
  508. rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
  509. rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  510. rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
  511. rb->rf_blkno = cpu_to_le64(first_blkno);
  512. rb->rf_count = cpu_to_le32(1);
  513. rb->rf_records.rl_count =
  514. cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
  515. spin_lock(&osb->osb_lock);
  516. rb->rf_generation = osb->s_next_generation++;
  517. spin_unlock(&osb->osb_lock);
  518. ocfs2_journal_dirty(handle, new_bh);
  519. spin_lock(&oi->ip_lock);
  520. oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
  521. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  522. di->i_refcount_loc = cpu_to_le64(first_blkno);
  523. spin_unlock(&oi->ip_lock);
  524. mlog(0, "created tree for inode %lu, refblock %llu\n",
  525. inode->i_ino, (unsigned long long)first_blkno);
  526. ocfs2_journal_dirty(handle, di_bh);
  527. /*
  528. * We have to init the tree lock here since it will use
  529. * the generation number to create it.
  530. */
  531. new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
  532. ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
  533. new_tree->rf_generation);
  534. spin_lock(&osb->osb_lock);
  535. tree = ocfs2_find_refcount_tree(osb, first_blkno);
  536. /*
  537. * We've just created a new refcount tree in this block. If
  538. * we found a refcount tree on the ocfs2_super, it must be
  539. * one we just deleted. We free the old tree before
  540. * inserting the new tree.
  541. */
  542. BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
  543. if (tree)
  544. ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
  545. ocfs2_insert_refcount_tree(osb, new_tree);
  546. spin_unlock(&osb->osb_lock);
  547. new_tree = NULL;
  548. if (tree)
  549. ocfs2_refcount_tree_put(tree);
  550. out_commit:
  551. ocfs2_commit_trans(osb, handle);
  552. out:
  553. if (new_tree) {
  554. ocfs2_metadata_cache_exit(&new_tree->rf_ci);
  555. kfree(new_tree);
  556. }
  557. brelse(new_bh);
  558. if (meta_ac)
  559. ocfs2_free_alloc_context(meta_ac);
  560. return ret;
  561. }
  562. static int ocfs2_set_refcount_tree(struct inode *inode,
  563. struct buffer_head *di_bh,
  564. u64 refcount_loc)
  565. {
  566. int ret;
  567. handle_t *handle = NULL;
  568. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  569. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  570. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  571. struct buffer_head *ref_root_bh = NULL;
  572. struct ocfs2_refcount_block *rb;
  573. struct ocfs2_refcount_tree *ref_tree;
  574. BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
  575. ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
  576. &ref_tree, &ref_root_bh);
  577. if (ret) {
  578. mlog_errno(ret);
  579. return ret;
  580. }
  581. handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
  582. if (IS_ERR(handle)) {
  583. ret = PTR_ERR(handle);
  584. mlog_errno(ret);
  585. goto out;
  586. }
  587. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  588. OCFS2_JOURNAL_ACCESS_WRITE);
  589. if (ret) {
  590. mlog_errno(ret);
  591. goto out_commit;
  592. }
  593. ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
  594. OCFS2_JOURNAL_ACCESS_WRITE);
  595. if (ret) {
  596. mlog_errno(ret);
  597. goto out_commit;
  598. }
  599. rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  600. le32_add_cpu(&rb->rf_count, 1);
  601. ocfs2_journal_dirty(handle, ref_root_bh);
  602. spin_lock(&oi->ip_lock);
  603. oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
  604. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  605. di->i_refcount_loc = cpu_to_le64(refcount_loc);
  606. spin_unlock(&oi->ip_lock);
  607. ocfs2_journal_dirty(handle, di_bh);
  608. out_commit:
  609. ocfs2_commit_trans(osb, handle);
  610. out:
  611. ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  612. brelse(ref_root_bh);
  613. return ret;
  614. }
  615. int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
  616. {
  617. int ret, delete_tree = 0;
  618. handle_t *handle = NULL;
  619. struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
  620. struct ocfs2_inode_info *oi = OCFS2_I(inode);
  621. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  622. struct ocfs2_refcount_block *rb;
  623. struct inode *alloc_inode = NULL;
  624. struct buffer_head *alloc_bh = NULL;
  625. struct buffer_head *blk_bh = NULL;
  626. struct ocfs2_refcount_tree *ref_tree;
  627. int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
  628. u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
  629. u16 bit = 0;
  630. if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
  631. return 0;
  632. BUG_ON(!ref_blkno);
  633. ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
  634. if (ret) {
  635. mlog_errno(ret);
  636. return ret;
  637. }
  638. rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
  639. /*
  640. * If we are the last user, we need to free the block.
  641. * So lock the allocator ahead.
  642. */
  643. if (le32_to_cpu(rb->rf_count) == 1) {
  644. blk = le64_to_cpu(rb->rf_blkno);
  645. bit = le16_to_cpu(rb->rf_suballoc_bit);
  646. bg_blkno = ocfs2_which_suballoc_group(blk, bit);
  647. alloc_inode = ocfs2_get_system_file_inode(osb,
  648. EXTENT_ALLOC_SYSTEM_INODE,
  649. le16_to_cpu(rb->rf_suballoc_slot));
  650. if (!alloc_inode) {
  651. ret = -ENOMEM;
  652. mlog_errno(ret);
  653. goto out;
  654. }
  655. mutex_lock(&alloc_inode->i_mutex);
  656. ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
  657. if (ret) {
  658. mlog_errno(ret);
  659. goto out_mutex;
  660. }
  661. credits += OCFS2_SUBALLOC_FREE;
  662. }
  663. handle = ocfs2_start_trans(osb, credits);
  664. if (IS_ERR(handle)) {
  665. ret = PTR_ERR(handle);
  666. mlog_errno(ret);
  667. goto out_unlock;
  668. }
  669. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  670. OCFS2_JOURNAL_ACCESS_WRITE);
  671. if (ret) {
  672. mlog_errno(ret);
  673. goto out_commit;
  674. }
  675. ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
  676. OCFS2_JOURNAL_ACCESS_WRITE);
  677. if (ret) {
  678. mlog_errno(ret);
  679. goto out_commit;
  680. }
  681. spin_lock(&oi->ip_lock);
  682. oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
  683. di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
  684. di->i_refcount_loc = 0;
  685. spin_unlock(&oi->ip_lock);
  686. ocfs2_journal_dirty(handle, di_bh);
  687. le32_add_cpu(&rb->rf_count , -1);
  688. ocfs2_journal_dirty(handle, blk_bh);
  689. if (!rb->rf_count) {
  690. delete_tree = 1;
  691. ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
  692. ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
  693. alloc_bh, bit, bg_blkno, 1);
  694. if (ret)
  695. mlog_errno(ret);
  696. }
  697. out_commit:
  698. ocfs2_commit_trans(osb, handle);
  699. out_unlock:
  700. if (alloc_inode) {
  701. ocfs2_inode_unlock(alloc_inode, 1);
  702. brelse(alloc_bh);
  703. }
  704. out_mutex:
  705. if (alloc_inode) {
  706. mutex_unlock(&alloc_inode->i_mutex);
  707. iput(alloc_inode);
  708. }
  709. out:
  710. ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
  711. if (delete_tree)
  712. ocfs2_refcount_tree_put(ref_tree);
  713. brelse(blk_bh);
  714. return ret;
  715. }
  716. static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
  717. struct buffer_head *ref_leaf_bh,
  718. u64 cpos, unsigned int len,
  719. struct ocfs2_refcount_rec *ret_rec,
  720. int *index)
  721. {
  722. int i = 0;
  723. struct ocfs2_refcount_block *rb =
  724. (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  725. struct ocfs2_refcount_rec *rec = NULL;
  726. for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
  727. rec = &rb->rf_records.rl_recs[i];
  728. if (le64_to_cpu(rec->r_cpos) +
  729. le32_to_cpu(rec->r_clusters) <= cpos)
  730. continue;
  731. else if (le64_to_cpu(rec->r_cpos) > cpos)
  732. break;
  733. /* ok, cpos fail in this rec. Just return. */
  734. if (ret_rec)
  735. *ret_rec = *rec;
  736. goto out;
  737. }
  738. if (ret_rec) {
  739. /* We meet with a hole here, so fake the rec. */
  740. ret_rec->r_cpos = cpu_to_le64(cpos);
  741. ret_rec->r_refcount = 0;
  742. if (i < le16_to_cpu(rb->rf_records.rl_used) &&
  743. le64_to_cpu(rec->r_cpos) < cpos + len)
  744. ret_rec->r_clusters =
  745. cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
  746. else
  747. ret_rec->r_clusters = cpu_to_le32(len);
  748. }
  749. out:
  750. *index = i;
  751. }
  752. /*
  753. * Given a cpos and len, try to find the refcount record which contains cpos.
  754. * 1. If cpos can be found in one refcount record, return the record.
  755. * 2. If cpos can't be found, return a fake record which start from cpos
  756. * and end at a small value between cpos+len and start of the next record.
  757. * This fake record has r_refcount = 0.
  758. */
  759. static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
  760. struct buffer_head *ref_root_bh,
  761. u64 cpos, unsigned int len,
  762. struct ocfs2_refcount_rec *ret_rec,
  763. int *index,
  764. struct buffer_head **ret_bh)
  765. {
  766. int ret = 0, i, found;
  767. u32 low_cpos;
  768. struct ocfs2_extent_list *el;
  769. struct ocfs2_extent_rec *tmp, *rec = NULL;
  770. struct ocfs2_extent_block *eb;
  771. struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
  772. struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
  773. struct ocfs2_refcount_block *rb =
  774. (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  775. if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
  776. ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
  777. ret_rec, index);
  778. *ret_bh = ref_root_bh;
  779. get_bh(ref_root_bh);
  780. return 0;
  781. }
  782. el = &rb->rf_list;
  783. low_cpos = cpos & OCFS2_32BIT_POS_MASK;
  784. if (el->l_tree_depth) {
  785. ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
  786. if (ret) {
  787. mlog_errno(ret);
  788. goto out;
  789. }
  790. eb = (struct ocfs2_extent_block *) eb_bh->b_data;
  791. el = &eb->h_list;
  792. if (el->l_tree_depth) {
  793. ocfs2_error(sb,
  794. "refcount tree %llu has non zero tree "
  795. "depth in leaf btree tree block %llu\n",
  796. (unsigned long long)ocfs2_metadata_cache_owner(ci),
  797. (unsigned long long)eb_bh->b_blocknr);
  798. ret = -EROFS;
  799. goto out;
  800. }
  801. }
  802. found = 0;
  803. for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
  804. rec = &el->l_recs[i];
  805. if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
  806. found = 1;
  807. break;
  808. }
  809. }
  810. /* adjust len when we have ocfs2_extent_rec after it. */
  811. if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
  812. tmp = &el->l_recs[i+1];
  813. if (le32_to_cpu(tmp->e_cpos) < cpos + len)
  814. len = le32_to_cpu(tmp->e_cpos) - cpos;
  815. }
  816. ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
  817. &ref_leaf_bh);
  818. if (ret) {
  819. mlog_errno(ret);
  820. goto out;
  821. }
  822. ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
  823. ret_rec, index);
  824. *ret_bh = ref_leaf_bh;
  825. out:
  826. brelse(eb_bh);
  827. return ret;
  828. }
  829. enum ocfs2_ref_rec_contig {
  830. REF_CONTIG_NONE = 0,
  831. REF_CONTIG_LEFT,
  832. REF_CONTIG_RIGHT,
  833. REF_CONTIG_LEFTRIGHT,
  834. };
  835. static enum ocfs2_ref_rec_contig
  836. ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
  837. int index)
  838. {
  839. if ((rb->rf_records.rl_recs[index].r_refcount ==
  840. rb->rf_records.rl_recs[index + 1].r_refcount) &&
  841. (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
  842. le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
  843. le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
  844. return REF_CONTIG_RIGHT;
  845. return REF_CONTIG_NONE;
  846. }
  847. static enum ocfs2_ref_rec_contig
  848. ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
  849. int index)
  850. {
  851. enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
  852. if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
  853. ret = ocfs2_refcount_rec_adjacent(rb, index);
  854. if (index > 0) {
  855. enum ocfs2_ref_rec_contig tmp;
  856. tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
  857. if (tmp == REF_CONTIG_RIGHT) {
  858. if (ret == REF_CONTIG_RIGHT)
  859. ret = REF_CONTIG_LEFTRIGHT;
  860. else
  861. ret = REF_CONTIG_LEFT;
  862. }
  863. }
  864. return ret;
  865. }
  866. static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
  867. int index)
  868. {
  869. BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
  870. rb->rf_records.rl_recs[index+1].r_refcount);
  871. le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
  872. le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
  873. if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
  874. memmove(&rb->rf_records.rl_recs[index + 1],
  875. &rb->rf_records.rl_recs[index + 2],
  876. sizeof(struct ocfs2_refcount_rec) *
  877. (le16_to_cpu(rb->rf_records.rl_used) - index - 2));
  878. memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
  879. 0, sizeof(struct ocfs2_refcount_rec));
  880. le16_add_cpu(&rb->rf_records.rl_used, -1);
  881. }
  882. /*
  883. * Merge the refcount rec if we are contiguous with the adjacent recs.
  884. */
  885. static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
  886. int index)
  887. {
  888. enum ocfs2_ref_rec_contig contig =
  889. ocfs2_refcount_rec_contig(rb, index);
  890. if (contig == REF_CONTIG_NONE)
  891. return;
  892. if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
  893. BUG_ON(index == 0);
  894. index--;
  895. }
  896. ocfs2_rotate_refcount_rec_left(rb, index);
  897. if (contig == REF_CONTIG_LEFTRIGHT)
  898. ocfs2_rotate_refcount_rec_left(rb, index);
  899. }
  900. static int ocfs2_change_refcount_rec(handle_t *handle,
  901. struct ocfs2_caching_info *ci,
  902. struct buffer_head *ref_leaf_bh,
  903. int index, int change)
  904. {
  905. int ret;
  906. struct ocfs2_refcount_block *rb =
  907. (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  908. struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
  909. ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
  910. OCFS2_JOURNAL_ACCESS_WRITE);
  911. if (ret) {
  912. mlog_errno(ret);
  913. goto out;
  914. }
  915. mlog(0, "change index %d, old count %u, change %d\n", index,
  916. le32_to_cpu(rec->r_refcount), change);
  917. le32_add_cpu(&rec->r_refcount, change);
  918. ocfs2_refcount_rec_merge(rb, index);
  919. ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
  920. if (ret)
  921. mlog_errno(ret);
  922. out:
  923. return ret;
  924. }
  925. static int ocfs2_expand_inline_ref_root(handle_t *handle,
  926. struct ocfs2_caching_info *ci,
  927. struct buffer_head *ref_root_bh,
  928. struct buffer_head **ref_leaf_bh,
  929. struct ocfs2_alloc_context *meta_ac)
  930. {
  931. int ret;
  932. u16 suballoc_bit_start;
  933. u32 num_got;
  934. u64 blkno;
  935. struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
  936. struct buffer_head *new_bh = NULL;
  937. struct ocfs2_refcount_block *new_rb;
  938. struct ocfs2_refcount_block *root_rb =
  939. (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  940. ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
  941. OCFS2_JOURNAL_ACCESS_WRITE);
  942. if (ret) {
  943. mlog_errno(ret);
  944. goto out;
  945. }
  946. ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
  947. &suballoc_bit_start, &num_got,
  948. &blkno);
  949. if (ret) {
  950. mlog_errno(ret);
  951. goto out;
  952. }
  953. new_bh = sb_getblk(sb, blkno);
  954. if (new_bh == NULL) {
  955. ret = -EIO;
  956. mlog_errno(ret);
  957. goto out;
  958. }
  959. ocfs2_set_new_buffer_uptodate(ci, new_bh);
  960. ret = ocfs2_journal_access_rb(handle, ci, new_bh,
  961. OCFS2_JOURNAL_ACCESS_CREATE);
  962. if (ret) {
  963. mlog_errno(ret);
  964. goto out;
  965. }
  966. /*
  967. * Initialize ocfs2_refcount_block.
  968. * It should contain the same information as the old root.
  969. * so just memcpy it and change the corresponding field.
  970. */
  971. memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
  972. new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
  973. new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
  974. new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  975. new_rb->rf_blkno = cpu_to_le64(blkno);
  976. new_rb->rf_cpos = cpu_to_le32(0);
  977. new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
  978. new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
  979. ocfs2_journal_dirty(handle, new_bh);
  980. /* Now change the root. */
  981. memset(&root_rb->rf_list, 0, sb->s_blocksize -
  982. offsetof(struct ocfs2_refcount_block, rf_list));
  983. root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
  984. root_rb->rf_clusters = cpu_to_le32(1);
  985. root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
  986. root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
  987. root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
  988. root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
  989. ocfs2_journal_dirty(handle, ref_root_bh);
  990. mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno,
  991. le16_to_cpu(new_rb->rf_records.rl_used));
  992. *ref_leaf_bh = new_bh;
  993. new_bh = NULL;
  994. out:
  995. brelse(new_bh);
  996. return ret;
  997. }
  998. static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
  999. struct ocfs2_refcount_rec *next)
  1000. {
  1001. if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
  1002. ocfs2_get_ref_rec_low_cpos(next))
  1003. return 1;
  1004. return 0;
  1005. }
  1006. static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
  1007. {
  1008. const struct ocfs2_refcount_rec *l = a, *r = b;
  1009. u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
  1010. u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
  1011. if (l_cpos > r_cpos)
  1012. return 1;
  1013. if (l_cpos < r_cpos)
  1014. return -1;
  1015. return 0;
  1016. }
  1017. static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
  1018. {
  1019. const struct ocfs2_refcount_rec *l = a, *r = b;
  1020. u64 l_cpos = le64_to_cpu(l->r_cpos);
  1021. u64 r_cpos = le64_to_cpu(r->r_cpos);
  1022. if (l_cpos > r_cpos)
  1023. return 1;
  1024. if (l_cpos < r_cpos)
  1025. return -1;
  1026. return 0;
  1027. }
  1028. static void swap_refcount_rec(void *a, void *b, int size)
  1029. {
  1030. struct ocfs2_refcount_rec *l = a, *r = b, tmp;
  1031. tmp = *(struct ocfs2_refcount_rec *)l;
  1032. *(struct ocfs2_refcount_rec *)l =
  1033. *(struct ocfs2_refcount_rec *)r;
  1034. *(struct ocfs2_refcount_rec *)r = tmp;
  1035. }
  1036. /*
  1037. * The refcount cpos are ordered by their 64bit cpos,
  1038. * But we will use the low 32 bit to be the e_cpos in the b-tree.
  1039. * So we need to make sure that this pos isn't intersected with others.
  1040. *
  1041. * Note: The refcount block is already sorted by their low 32 bit cpos,
  1042. * So just try the middle pos first, and we will exit when we find
  1043. * the good position.
  1044. */
  1045. static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
  1046. u32 *split_pos, int *split_index)
  1047. {
  1048. int num_used = le16_to_cpu(rl->rl_used);
  1049. int delta, middle = num_used / 2;
  1050. for (delta = 0; delta < middle; delta++) {
  1051. /* Let's check delta earlier than middle */
  1052. if (ocfs2_refcount_rec_no_intersect(
  1053. &rl->rl_recs[middle - delta - 1],
  1054. &rl->rl_recs[middle - delta])) {
  1055. *split_index = middle - delta;
  1056. break;
  1057. }
  1058. /* For even counts, don't walk off the end */
  1059. if ((middle + delta + 1) == num_used)
  1060. continue;
  1061. /* Now try delta past middle */
  1062. if (ocfs2_refcount_rec_no_intersect(
  1063. &rl->rl_recs[middle + delta],
  1064. &rl->rl_recs[middle + delta + 1])) {
  1065. *split_index = middle + delta + 1;
  1066. break;
  1067. }
  1068. }
  1069. if (delta >= middle)
  1070. return -ENOSPC;
  1071. *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
  1072. return 0;
  1073. }
  1074. static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
  1075. struct buffer_head *new_bh,
  1076. u32 *split_cpos)
  1077. {
  1078. int split_index = 0, num_moved, ret;
  1079. u32 cpos = 0;
  1080. struct ocfs2_refcount_block *rb =
  1081. (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1082. struct ocfs2_refcount_list *rl = &rb->rf_records;
  1083. struct ocfs2_refcount_block *new_rb =
  1084. (struct ocfs2_refcount_block *)new_bh->b_data;
  1085. struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
  1086. mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n",
  1087. (unsigned long long)ref_leaf_bh->b_blocknr,
  1088. le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
  1089. /*
  1090. * XXX: Improvement later.
  1091. * If we know all the high 32 bit cpos is the same, no need to sort.
  1092. *
  1093. * In order to make the whole process safe, we do:
  1094. * 1. sort the entries by their low 32 bit cpos first so that we can
  1095. * find the split cpos easily.
  1096. * 2. call ocfs2_insert_extent to insert the new refcount block.
  1097. * 3. move the refcount rec to the new block.
  1098. * 4. sort the entries by their 64 bit cpos.
  1099. * 5. dirty the new_rb and rb.
  1100. */
  1101. sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
  1102. sizeof(struct ocfs2_refcount_rec),
  1103. cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
  1104. ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
  1105. if (ret) {
  1106. mlog_errno(ret);
  1107. return ret;
  1108. }
  1109. new_rb->rf_cpos = cpu_to_le32(cpos);
  1110. /* move refcount records starting from split_index to the new block. */
  1111. num_moved = le16_to_cpu(rl->rl_used) - split_index;
  1112. memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
  1113. num_moved * sizeof(struct ocfs2_refcount_rec));
  1114. /*ok, remove the entries we just moved over to the other block. */
  1115. memset(&rl->rl_recs[split_index], 0,
  1116. num_moved * sizeof(struct ocfs2_refcount_rec));
  1117. /* change old and new rl_used accordingly. */
  1118. le16_add_cpu(&rl->rl_used, -num_moved);
  1119. new_rl->rl_used = cpu_to_le32(num_moved);
  1120. sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
  1121. sizeof(struct ocfs2_refcount_rec),
  1122. cmp_refcount_rec_by_cpos, swap_refcount_rec);
  1123. sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
  1124. sizeof(struct ocfs2_refcount_rec),
  1125. cmp_refcount_rec_by_cpos, swap_refcount_rec);
  1126. *split_cpos = cpos;
  1127. return 0;
  1128. }
  1129. static int ocfs2_new_leaf_refcount_block(handle_t *handle,
  1130. struct ocfs2_caching_info *ci,
  1131. struct buffer_head *ref_root_bh,
  1132. struct buffer_head *ref_leaf_bh,
  1133. struct ocfs2_alloc_context *meta_ac)
  1134. {
  1135. int ret;
  1136. u16 suballoc_bit_start;
  1137. u32 num_got, new_cpos;
  1138. u64 blkno;
  1139. struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
  1140. struct ocfs2_refcount_block *root_rb =
  1141. (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  1142. struct buffer_head *new_bh = NULL;
  1143. struct ocfs2_refcount_block *new_rb;
  1144. struct ocfs2_extent_tree ref_et;
  1145. BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
  1146. ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
  1147. OCFS2_JOURNAL_ACCESS_WRITE);
  1148. if (ret) {
  1149. mlog_errno(ret);
  1150. goto out;
  1151. }
  1152. ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
  1153. OCFS2_JOURNAL_ACCESS_WRITE);
  1154. if (ret) {
  1155. mlog_errno(ret);
  1156. goto out;
  1157. }
  1158. ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
  1159. &suballoc_bit_start, &num_got,
  1160. &blkno);
  1161. if (ret) {
  1162. mlog_errno(ret);
  1163. goto out;
  1164. }
  1165. new_bh = sb_getblk(sb, blkno);
  1166. if (new_bh == NULL) {
  1167. ret = -EIO;
  1168. mlog_errno(ret);
  1169. goto out;
  1170. }
  1171. ocfs2_set_new_buffer_uptodate(ci, new_bh);
  1172. ret = ocfs2_journal_access_rb(handle, ci, new_bh,
  1173. OCFS2_JOURNAL_ACCESS_CREATE);
  1174. if (ret) {
  1175. mlog_errno(ret);
  1176. goto out;
  1177. }
  1178. /* Initialize ocfs2_refcount_block. */
  1179. new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
  1180. memset(new_rb, 0, sb->s_blocksize);
  1181. strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
  1182. new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
  1183. new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
  1184. new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
  1185. new_rb->rf_blkno = cpu_to_le64(blkno);
  1186. new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
  1187. new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
  1188. new_rb->rf_records.rl_count =
  1189. cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
  1190. new_rb->rf_generation = root_rb->rf_generation;
  1191. ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
  1192. if (ret) {
  1193. mlog_errno(ret);
  1194. goto out;
  1195. }
  1196. ocfs2_journal_dirty(handle, ref_leaf_bh);
  1197. ocfs2_journal_dirty(handle, new_bh);
  1198. ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
  1199. mlog(0, "insert new leaf block %llu at %u\n",
  1200. (unsigned long long)new_bh->b_blocknr, new_cpos);
  1201. /* Insert the new leaf block with the specific offset cpos. */
  1202. ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
  1203. 1, 0, meta_ac);
  1204. if (ret)
  1205. mlog_errno(ret);
  1206. out:
  1207. brelse(new_bh);
  1208. return ret;
  1209. }
  1210. static int ocfs2_expand_refcount_tree(handle_t *handle,
  1211. struct ocfs2_caching_info *ci,
  1212. struct buffer_head *ref_root_bh,
  1213. struct buffer_head *ref_leaf_bh,
  1214. struct ocfs2_alloc_context *meta_ac)
  1215. {
  1216. int ret;
  1217. struct buffer_head *expand_bh = NULL;
  1218. if (ref_root_bh == ref_leaf_bh) {
  1219. /*
  1220. * the old root bh hasn't been expanded to a b-tree,
  1221. * so expand it first.
  1222. */
  1223. ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
  1224. &expand_bh, meta_ac);
  1225. if (ret) {
  1226. mlog_errno(ret);
  1227. goto out;
  1228. }
  1229. } else {
  1230. expand_bh = ref_leaf_bh;
  1231. get_bh(expand_bh);
  1232. }
  1233. /* Now add a new refcount block into the tree.*/
  1234. ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
  1235. expand_bh, meta_ac);
  1236. if (ret)
  1237. mlog_errno(ret);
  1238. out:
  1239. brelse(expand_bh);
  1240. return ret;
  1241. }
  1242. /*
  1243. * Adjust the extent rec in b-tree representing ref_leaf_bh.
  1244. *
  1245. * Only called when we have inserted a new refcount rec at index 0
  1246. * which means ocfs2_extent_rec.e_cpos may need some change.
  1247. */
  1248. static int ocfs2_adjust_refcount_rec(handle_t *handle,
  1249. struct ocfs2_caching_info *ci,
  1250. struct buffer_head *ref_root_bh,
  1251. struct buffer_head *ref_leaf_bh,
  1252. struct ocfs2_refcount_rec *rec)
  1253. {
  1254. int ret = 0, i;
  1255. u32 new_cpos, old_cpos;
  1256. struct ocfs2_path *path = NULL;
  1257. struct ocfs2_extent_tree et;
  1258. struct ocfs2_refcount_block *rb =
  1259. (struct ocfs2_refcount_block *)ref_root_bh->b_data;
  1260. struct ocfs2_extent_list *el;
  1261. if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
  1262. goto out;
  1263. rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1264. old_cpos = le32_to_cpu(rb->rf_cpos);
  1265. new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
  1266. if (old_cpos <= new_cpos)
  1267. goto out;
  1268. ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
  1269. path = ocfs2_new_path_from_et(&et);
  1270. if (!path) {
  1271. ret = -ENOMEM;
  1272. mlog_errno(ret);
  1273. goto out;
  1274. }
  1275. ret = ocfs2_find_path(ci, path, old_cpos);
  1276. if (ret) {
  1277. mlog_errno(ret);
  1278. goto out;
  1279. }
  1280. /*
  1281. * 2 more credits, one for the leaf refcount block, one for
  1282. * the extent block contains the extent rec.
  1283. */
  1284. ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
  1285. if (ret < 0) {
  1286. mlog_errno(ret);
  1287. goto out;
  1288. }
  1289. ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
  1290. OCFS2_JOURNAL_ACCESS_WRITE);
  1291. if (ret < 0) {
  1292. mlog_errno(ret);
  1293. goto out;
  1294. }
  1295. ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
  1296. OCFS2_JOURNAL_ACCESS_WRITE);
  1297. if (ret < 0) {
  1298. mlog_errno(ret);
  1299. goto out;
  1300. }
  1301. /* change the leaf extent block first. */
  1302. el = path_leaf_el(path);
  1303. for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
  1304. if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
  1305. break;
  1306. BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
  1307. el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
  1308. /* change the r_cpos in the leaf block. */
  1309. rb->rf_cpos = cpu_to_le32(new_cpos);
  1310. ocfs2_journal_dirty(handle, path_leaf_bh(path));
  1311. ocfs2_journal_dirty(handle, ref_leaf_bh);
  1312. out:
  1313. ocfs2_free_path(path);
  1314. return ret;
  1315. }
  1316. static int ocfs2_insert_refcount_rec(handle_t *handle,
  1317. struct ocfs2_caching_info *ci,
  1318. struct buffer_head *ref_root_bh,
  1319. struct buffer_head *ref_leaf_bh,
  1320. struct ocfs2_refcount_rec *rec,
  1321. int index,
  1322. struct ocfs2_alloc_context *meta_ac)
  1323. {
  1324. int ret;
  1325. struct ocfs2_refcount_block *rb =
  1326. (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1327. struct ocfs2_refcount_list *rf_list = &rb->rf_records;
  1328. struct buffer_head *new_bh = NULL;
  1329. BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
  1330. if (rf_list->rl_used == rf_list->rl_count) {
  1331. u64 cpos = le64_to_cpu(rec->r_cpos);
  1332. u32 len = le32_to_cpu(rec->r_clusters);
  1333. ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
  1334. ref_leaf_bh, meta_ac);
  1335. if (ret) {
  1336. mlog_errno(ret);
  1337. goto out;
  1338. }
  1339. ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
  1340. cpos, len, NULL, &index,
  1341. &new_bh);
  1342. if (ret) {
  1343. mlog_errno(ret);
  1344. goto out;
  1345. }
  1346. ref_leaf_bh = new_bh;
  1347. rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1348. rf_list = &rb->rf_records;
  1349. }
  1350. ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
  1351. OCFS2_JOURNAL_ACCESS_WRITE);
  1352. if (ret) {
  1353. mlog_errno(ret);
  1354. goto out;
  1355. }
  1356. if (index < le16_to_cpu(rf_list->rl_used))
  1357. memmove(&rf_list->rl_recs[index + 1],
  1358. &rf_list->rl_recs[index],
  1359. (le16_to_cpu(rf_list->rl_used) - index) *
  1360. sizeof(struct ocfs2_refcount_rec));
  1361. mlog(0, "insert refcount record start %llu, len %u, count %u "
  1362. "to leaf block %llu at index %d\n",
  1363. (unsigned long long)le64_to_cpu(rec->r_cpos),
  1364. le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount),
  1365. (unsigned long long)ref_leaf_bh->b_blocknr, index);
  1366. rf_list->rl_recs[index] = *rec;
  1367. le16_add_cpu(&rf_list->rl_used, 1);
  1368. ocfs2_refcount_rec_merge(rb, index);
  1369. ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
  1370. if (ret) {
  1371. mlog_errno(ret);
  1372. goto out;
  1373. }
  1374. if (index == 0) {
  1375. ret = ocfs2_adjust_refcount_rec(handle, ci,
  1376. ref_root_bh,
  1377. ref_leaf_bh, rec);
  1378. if (ret)
  1379. mlog_errno(ret);
  1380. }
  1381. out:
  1382. brelse(new_bh);
  1383. return ret;
  1384. }
  1385. /*
  1386. * Split the refcount_rec indexed by "index" in ref_leaf_bh.
  1387. * This is much simple than our b-tree code.
  1388. * split_rec is the new refcount rec we want to insert.
  1389. * If split_rec->r_refcount > 0, we are changing the refcount(in case we
  1390. * increase refcount or decrease a refcount to non-zero).
  1391. * If split_rec->r_refcount == 0, we are punching a hole in current refcount
  1392. * rec( in case we decrease a refcount to zero).
  1393. */
  1394. static int ocfs2_split_refcount_rec(handle_t *handle,
  1395. struct ocfs2_caching_info *ci,
  1396. struct buffer_head *ref_root_bh,
  1397. struct buffer_head *ref_leaf_bh,
  1398. struct ocfs2_refcount_rec *split_rec,
  1399. int index,
  1400. struct ocfs2_alloc_context *meta_ac,
  1401. struct ocfs2_cached_dealloc_ctxt *dealloc)
  1402. {
  1403. int ret, recs_need;
  1404. u32 len;
  1405. struct ocfs2_refcount_block *rb =
  1406. (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1407. struct ocfs2_refcount_list *rf_list = &rb->rf_records;
  1408. struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
  1409. struct ocfs2_refcount_rec *tail_rec = NULL;
  1410. struct buffer_head *new_bh = NULL;
  1411. BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
  1412. mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n",
  1413. le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters),
  1414. le64_to_cpu(split_rec->r_cpos),
  1415. le32_to_cpu(split_rec->r_clusters));
  1416. /*
  1417. * If we just need to split the header or tail clusters,
  1418. * no more recs are needed, just split is OK.
  1419. * Otherwise we at least need one new recs.
  1420. */
  1421. if (!split_rec->r_refcount &&
  1422. (split_rec->r_cpos == orig_rec->r_cpos ||
  1423. le64_to_cpu(split_rec->r_cpos) +
  1424. le32_to_cpu(split_rec->r_clusters) ==
  1425. le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
  1426. recs_need = 0;
  1427. else
  1428. recs_need = 1;
  1429. /*
  1430. * We need one more rec if we split in the middle and the new rec have
  1431. * some refcount in it.
  1432. */
  1433. if (split_rec->r_refcount &&
  1434. (split_rec->r_cpos != orig_rec->r_cpos &&
  1435. le64_to_cpu(split_rec->r_cpos) +
  1436. le32_to_cpu(split_rec->r_clusters) !=
  1437. le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
  1438. recs_need++;
  1439. /* If the leaf block don't have enough record, expand it. */
  1440. if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
  1441. struct ocfs2_refcount_rec tmp_rec;
  1442. u64 cpos = le64_to_cpu(orig_rec->r_cpos);
  1443. len = le32_to_cpu(orig_rec->r_clusters);
  1444. ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
  1445. ref_leaf_bh, meta_ac);
  1446. if (ret) {
  1447. mlog_errno(ret);
  1448. goto out;
  1449. }
  1450. /*
  1451. * We have to re-get it since now cpos may be moved to
  1452. * another leaf block.
  1453. */
  1454. ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
  1455. cpos, len, &tmp_rec, &index,
  1456. &new_bh);
  1457. if (ret) {
  1458. mlog_errno(ret);
  1459. goto out;
  1460. }
  1461. ref_leaf_bh = new_bh;
  1462. rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
  1463. rf_list = &rb->rf_records;
  1464. orig_rec = &rf_list->rl_recs[index];
  1465. }
  1466. ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
  1467. OCFS2_JOURNAL_ACCESS_WRITE);
  1468. if (ret) {
  1469. mlog_errno(ret);
  1470. goto out;
  1471. }
  1472. /*
  1473. * We have calculated out how many new records we need and store
  1474. * in recs_need, so spare enough space first by moving the records
  1475. * after "index" to the end.
  1476. */
  1477. if (index != le16_to_cpu(rf_list->rl_used) - 1)
  1478. memmove(&rf_list->rl_recs[index + 1 + recs_need],
  1479. &rf_list->rl_recs[index + 1],
  1480. (le16_to_cpu(rf_list->rl_used) - index - 1) *
  1481. sizeof(struct ocfs2_refcount_rec));
  1482. len = (le64_to_cpu(orig_rec->r_cpos) +
  1483. le32_to_cpu(orig_rec->r_clusters)) -
  1484. (le64_to_cpu(split_rec->r_cpos) +
  1485. le32_to_cpu(split_rec->r_clusters));
  1486. /*
  1487. * If we have "len", the we will split in the tail and move it
  1488. * to the end of the space we have just spared.
  1489. */
  1490. if (len) {
  1491. tail_rec = &rf_list->rl_recs[index + recs_need];
  1492. memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
  1493. le64_add_cpu(&tail_rec->r_cpos,
  1494. le32_to_cpu(tail_rec->r_clusters) - len);
  1495. tail_rec->r_clusters = le32_to_cpu(len);
  1496. }
  1497. /*
  1498. * If the split pos isn't the same as the original one, we need to
  1499. * split in the head.
  1500. *
  1501. * Note: We have the chance that split_rec.r_refcount = 0,
  1502. * recs_need = 0 and len > 0, which means we just cut the head from
  1503. * the orig_rec and in that case we have done some modification in
  1504. * orig_rec above, so the check for r_cpos is faked.
  1505. */
  1506. if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
  1507. len = le64_to_cpu(split_rec->r_cpos) -
  1508. le64_to_cpu(orig_rec->r_cpos);
  1509. orig_rec->r_clusters = cpu_to_le32(len);
  1510. index++;
  1511. }
  1512. le16_add_cpu(&rf_list->rl_used, recs_need);
  1513. if (split_rec->r_refcount) {
  1514. rf_list->rl_recs[index] = *split_rec;
  1515. mlog(0, "insert refcount record start %llu, len %u, count %u "
  1516. "to leaf block %llu at index %d\n",
  1517. (unsigned long long)le64_to_cpu(split_rec->r_cpos),
  1518. le32_to_cpu(split_rec->r_clusters),
  1519. le32_to_cpu(split_rec->r_refcount),
  1520. (unsigned long long)ref_leaf_bh->b_blocknr, index);
  1521. ocfs2_refcount_rec_merge(rb, index);
  1522. }
  1523. ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
  1524. if (ret)
  1525. mlog_errno(ret);
  1526. out:
  1527. brelse(new_bh);
  1528. return ret;
  1529. }
  1530. static int __ocfs2_increase_refcount(handle_t *handle,
  1531. struct ocfs2_caching_info *ci,
  1532. struct buffer_head *ref_root_bh,
  1533. u64 cpos, u32 len,
  1534. struct ocfs2_alloc_context *meta_ac,
  1535. struct ocfs2_cached_dealloc_ctxt *dealloc)
  1536. {
  1537. int ret = 0, index;
  1538. struct buffer_head *ref_leaf_bh = NULL;
  1539. struct ocfs2_refcount_rec rec;
  1540. unsigned int set_len = 0;
  1541. mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n",
  1542. (unsigned long long)ocfs2_metadata_cache_owner(ci),
  1543. (unsigned long long)cpos, len);
  1544. while (len) {
  1545. ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
  1546. cpos, len, &rec, &index,
  1547. &ref_leaf_bh);
  1548. if (ret) {
  1549. mlog_errno(ret);
  1550. goto out;
  1551. }
  1552. set_len = le32_to_cpu(rec.r_clusters);
  1553. /*
  1554. * Here we may meet with 3 situations:
  1555. *
  1556. * 1. If we find an already existing record, and the length
  1557. * is the same, cool, we just need to increase the r_refcount
  1558. * and it is OK.
  1559. * 2. If we find a hole, just insert it with r_refcount = 1.
  1560. * 3. If we are in the middle of one extent record, split
  1561. * it.
  1562. */
  1563. if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
  1564. set_len <= len) {
  1565. mlog(0, "increase refcount rec, start %llu, len %u, "
  1566. "count %u\n", (unsigned long long)cpos, set_len,
  1567. le32_to_cpu(rec.r_refcount));
  1568. ret = ocfs2_change_refcount_rec(handle, ci,
  1569. ref_leaf_bh, index, 1);
  1570. if (ret) {
  1571. mlog_errno(ret);
  1572. goto out;
  1573. }
  1574. } else if (!rec.r_refcount) {
  1575. rec.r_refcount = cpu_to_le32(1);
  1576. mlog(0, "insert refcount rec, start %llu, len %u\n",
  1577. (unsigned long long)le64_to_cpu(rec.r_cpos),
  1578. set_len);
  1579. ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
  1580. ref_leaf_bh,
  1581. &rec, index, meta_ac);
  1582. if (ret) {
  1583. mlog_errno(ret);
  1584. goto out;
  1585. }
  1586. } else {
  1587. set_len = min((u64)(cpos + len),
  1588. le64_to_cpu(rec.r_cpos) + set_len) - cpos;
  1589. rec.r_cpos = cpu_to_le64(cpos);
  1590. rec.r_clusters = cpu_to_le32(set_len);
  1591. le32_add_cpu(&rec.r_refcount, 1);
  1592. mlog(0, "split refcount rec, start %llu, "
  1593. "len %u, count %u\n",
  1594. (unsigned long long)le64_to_cpu(rec.r_cpos),
  1595. set_len, le32_to_cpu(rec.r_refcount));
  1596. ret = ocfs2_split_refcount_rec(handle, ci,
  1597. ref_root_bh, ref_leaf_bh,
  1598. &rec, index,
  1599. meta_ac, dealloc);
  1600. if (ret) {
  1601. mlog_errno(ret);
  1602. goto out;
  1603. }
  1604. }
  1605. cpos += set_len;
  1606. len -= set_len;
  1607. brelse(ref_leaf_bh);
  1608. ref_leaf_bh = NULL;
  1609. }
  1610. out:
  1611. brelse(ref_leaf_bh);
  1612. return ret;
  1613. }