suballoc.c 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463
  1. /* -*- mode: c; c-basic-offset: 8; -*-
  2. * vim: noexpandtab sw=8 ts=8 sts=0:
  3. *
  4. * suballoc.c
  5. *
  6. * metadata alloc and free
  7. * Inspired by ext3 block groups.
  8. *
  9. * Copyright (C) 2002, 2004 Oracle. All rights reserved.
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public
  13. * License as published by the Free Software Foundation; either
  14. * version 2 of the License, or (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public
  22. * License along with this program; if not, write to the
  23. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24. * Boston, MA 021110-1307, USA.
  25. */
  26. #include <linux/fs.h>
  27. #include <linux/types.h>
  28. #include <linux/slab.h>
  29. #include <linux/highmem.h>
  30. #define MLOG_MASK_PREFIX ML_DISK_ALLOC
  31. #include <cluster/masklog.h>
  32. #include "ocfs2.h"
  33. #include "alloc.h"
  34. #include "blockcheck.h"
  35. #include "dlmglue.h"
  36. #include "inode.h"
  37. #include "journal.h"
  38. #include "localalloc.h"
  39. #include "suballoc.h"
  40. #include "super.h"
  41. #include "sysfile.h"
  42. #include "uptodate.h"
  43. #include "buffer_head_io.h"
  44. #define NOT_ALLOC_NEW_GROUP 0
  45. #define ALLOC_NEW_GROUP 0x1
  46. #define ALLOC_GROUPS_FROM_GLOBAL 0x2
  47. #define OCFS2_MAX_TO_STEAL 1024
  48. static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
  49. static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
  50. static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
  51. static int ocfs2_block_group_fill(handle_t *handle,
  52. struct inode *alloc_inode,
  53. struct buffer_head *bg_bh,
  54. u64 group_blkno,
  55. u16 my_chain,
  56. struct ocfs2_chain_list *cl);
  57. static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
  58. struct inode *alloc_inode,
  59. struct buffer_head *bh,
  60. u64 max_block,
  61. u64 *last_alloc_group,
  62. int flags);
  63. static int ocfs2_cluster_group_search(struct inode *inode,
  64. struct buffer_head *group_bh,
  65. u32 bits_wanted, u32 min_bits,
  66. u64 max_block,
  67. u16 *bit_off, u16 *bits_found);
  68. static int ocfs2_block_group_search(struct inode *inode,
  69. struct buffer_head *group_bh,
  70. u32 bits_wanted, u32 min_bits,
  71. u64 max_block,
  72. u16 *bit_off, u16 *bits_found);
  73. static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
  74. struct ocfs2_alloc_context *ac,
  75. handle_t *handle,
  76. u32 bits_wanted,
  77. u32 min_bits,
  78. u16 *bit_off,
  79. unsigned int *num_bits,
  80. u64 *bg_blkno);
  81. static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
  82. int nr);
  83. static inline int ocfs2_block_group_set_bits(handle_t *handle,
  84. struct inode *alloc_inode,
  85. struct ocfs2_group_desc *bg,
  86. struct buffer_head *group_bh,
  87. unsigned int bit_off,
  88. unsigned int num_bits);
  89. static inline int ocfs2_block_group_clear_bits(handle_t *handle,
  90. struct inode *alloc_inode,
  91. struct ocfs2_group_desc *bg,
  92. struct buffer_head *group_bh,
  93. unsigned int bit_off,
  94. unsigned int num_bits);
  95. static int ocfs2_relink_block_group(handle_t *handle,
  96. struct inode *alloc_inode,
  97. struct buffer_head *fe_bh,
  98. struct buffer_head *bg_bh,
  99. struct buffer_head *prev_bg_bh,
  100. u16 chain);
  101. static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
  102. u32 wanted);
  103. static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
  104. u64 bg_blkno,
  105. u16 bg_bit_off);
  106. static inline void ocfs2_block_to_cluster_group(struct inode *inode,
  107. u64 data_blkno,
  108. u64 *bg_blkno,
  109. u16 *bg_bit_off);
  110. static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
  111. u32 bits_wanted, u64 max_block,
  112. int flags,
  113. struct ocfs2_alloc_context **ac);
  114. void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
  115. {
  116. struct inode *inode = ac->ac_inode;
  117. if (inode) {
  118. if (ac->ac_which != OCFS2_AC_USE_LOCAL)
  119. ocfs2_inode_unlock(inode, 1);
  120. mutex_unlock(&inode->i_mutex);
  121. iput(inode);
  122. ac->ac_inode = NULL;
  123. }
  124. brelse(ac->ac_bh);
  125. ac->ac_bh = NULL;
  126. }
  127. void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
  128. {
  129. ocfs2_free_ac_resource(ac);
  130. kfree(ac);
  131. }
  132. static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
  133. {
  134. return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
  135. }
  136. #define do_error(fmt, ...) \
  137. do{ \
  138. if (clean_error) \
  139. mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
  140. else \
  141. ocfs2_error(sb, fmt, ##__VA_ARGS__); \
  142. } while (0)
  143. static int ocfs2_validate_gd_self(struct super_block *sb,
  144. struct buffer_head *bh,
  145. int clean_error)
  146. {
  147. struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
  148. if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
  149. do_error("Group descriptor #%llu has bad signature %.*s",
  150. (unsigned long long)bh->b_blocknr, 7,
  151. gd->bg_signature);
  152. return -EINVAL;
  153. }
  154. if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
  155. do_error("Group descriptor #%llu has an invalid bg_blkno "
  156. "of %llu",
  157. (unsigned long long)bh->b_blocknr,
  158. (unsigned long long)le64_to_cpu(gd->bg_blkno));
  159. return -EINVAL;
  160. }
  161. if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
  162. do_error("Group descriptor #%llu has an invalid "
  163. "fs_generation of #%u",
  164. (unsigned long long)bh->b_blocknr,
  165. le32_to_cpu(gd->bg_generation));
  166. return -EINVAL;
  167. }
  168. if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
  169. do_error("Group descriptor #%llu has bit count %u but "
  170. "claims that %u are free",
  171. (unsigned long long)bh->b_blocknr,
  172. le16_to_cpu(gd->bg_bits),
  173. le16_to_cpu(gd->bg_free_bits_count));
  174. return -EINVAL;
  175. }
  176. if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
  177. do_error("Group descriptor #%llu has bit count %u but "
  178. "max bitmap bits of %u",
  179. (unsigned long long)bh->b_blocknr,
  180. le16_to_cpu(gd->bg_bits),
  181. 8 * le16_to_cpu(gd->bg_size));
  182. return -EINVAL;
  183. }
  184. return 0;
  185. }
  186. static int ocfs2_validate_gd_parent(struct super_block *sb,
  187. struct ocfs2_dinode *di,
  188. struct buffer_head *bh,
  189. int clean_error)
  190. {
  191. unsigned int max_bits;
  192. struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
  193. if (di->i_blkno != gd->bg_parent_dinode) {
  194. do_error("Group descriptor #%llu has bad parent "
  195. "pointer (%llu, expected %llu)",
  196. (unsigned long long)bh->b_blocknr,
  197. (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
  198. (unsigned long long)le64_to_cpu(di->i_blkno));
  199. return -EINVAL;
  200. }
  201. max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
  202. if (le16_to_cpu(gd->bg_bits) > max_bits) {
  203. do_error("Group descriptor #%llu has bit count of %u",
  204. (unsigned long long)bh->b_blocknr,
  205. le16_to_cpu(gd->bg_bits));
  206. return -EINVAL;
  207. }
  208. if (le16_to_cpu(gd->bg_chain) >=
  209. le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
  210. do_error("Group descriptor #%llu has bad chain %u",
  211. (unsigned long long)bh->b_blocknr,
  212. le16_to_cpu(gd->bg_chain));
  213. return -EINVAL;
  214. }
  215. return 0;
  216. }
  217. #undef do_error
  218. /*
  219. * This version only prints errors. It does not fail the filesystem, and
  220. * exists only for resize.
  221. */
  222. int ocfs2_check_group_descriptor(struct super_block *sb,
  223. struct ocfs2_dinode *di,
  224. struct buffer_head *bh)
  225. {
  226. int rc;
  227. struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
  228. BUG_ON(!buffer_uptodate(bh));
  229. /*
  230. * If the ecc fails, we return the error but otherwise
  231. * leave the filesystem running. We know any error is
  232. * local to this block.
  233. */
  234. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
  235. if (rc) {
  236. mlog(ML_ERROR,
  237. "Checksum failed for group descriptor %llu\n",
  238. (unsigned long long)bh->b_blocknr);
  239. } else
  240. rc = ocfs2_validate_gd_self(sb, bh, 1);
  241. if (!rc)
  242. rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
  243. return rc;
  244. }
  245. static int ocfs2_validate_group_descriptor(struct super_block *sb,
  246. struct buffer_head *bh)
  247. {
  248. int rc;
  249. struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
  250. mlog(0, "Validating group descriptor %llu\n",
  251. (unsigned long long)bh->b_blocknr);
  252. BUG_ON(!buffer_uptodate(bh));
  253. /*
  254. * If the ecc fails, we return the error but otherwise
  255. * leave the filesystem running. We know any error is
  256. * local to this block.
  257. */
  258. rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
  259. if (rc)
  260. return rc;
  261. /*
  262. * Errors after here are fatal.
  263. */
  264. return ocfs2_validate_gd_self(sb, bh, 0);
  265. }
  266. int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
  267. u64 gd_blkno, struct buffer_head **bh)
  268. {
  269. int rc;
  270. struct buffer_head *tmp = *bh;
  271. rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
  272. ocfs2_validate_group_descriptor);
  273. if (rc)
  274. goto out;
  275. rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
  276. if (rc) {
  277. brelse(tmp);
  278. goto out;
  279. }
  280. /* If ocfs2_read_block() got us a new bh, pass it up. */
  281. if (!*bh)
  282. *bh = tmp;
  283. out:
  284. return rc;
  285. }
  286. static int ocfs2_block_group_fill(handle_t *handle,
  287. struct inode *alloc_inode,
  288. struct buffer_head *bg_bh,
  289. u64 group_blkno,
  290. u16 my_chain,
  291. struct ocfs2_chain_list *cl)
  292. {
  293. int status = 0;
  294. struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
  295. struct super_block * sb = alloc_inode->i_sb;
  296. mlog_entry_void();
  297. if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
  298. ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
  299. "b_blocknr (%llu)",
  300. (unsigned long long)group_blkno,
  301. (unsigned long long) bg_bh->b_blocknr);
  302. status = -EIO;
  303. goto bail;
  304. }
  305. status = ocfs2_journal_access_gd(handle,
  306. INODE_CACHE(alloc_inode),
  307. bg_bh,
  308. OCFS2_JOURNAL_ACCESS_CREATE);
  309. if (status < 0) {
  310. mlog_errno(status);
  311. goto bail;
  312. }
  313. memset(bg, 0, sb->s_blocksize);
  314. strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
  315. bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
  316. bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
  317. bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
  318. bg->bg_chain = cpu_to_le16(my_chain);
  319. bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
  320. bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
  321. bg->bg_blkno = cpu_to_le64(group_blkno);
  322. /* set the 1st bit in the bitmap to account for the descriptor block */
  323. ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
  324. bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
  325. status = ocfs2_journal_dirty(handle, bg_bh);
  326. if (status < 0)
  327. mlog_errno(status);
  328. /* There is no need to zero out or otherwise initialize the
  329. * other blocks in a group - All valid FS metadata in a block
  330. * group stores the superblock fs_generation value at
  331. * allocation time. */
  332. bail:
  333. mlog_exit(status);
  334. return status;
  335. }
  336. static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
  337. {
  338. u16 curr, best;
  339. best = curr = 0;
  340. while (curr < le16_to_cpu(cl->cl_count)) {
  341. if (le32_to_cpu(cl->cl_recs[best].c_total) >
  342. le32_to_cpu(cl->cl_recs[curr].c_total))
  343. best = curr;
  344. curr++;
  345. }
  346. return best;
  347. }
  348. /*
  349. * We expect the block group allocator to already be locked.
  350. */
  351. static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
  352. struct inode *alloc_inode,
  353. struct buffer_head *bh,
  354. u64 max_block,
  355. u64 *last_alloc_group,
  356. int flags)
  357. {
  358. int status, credits;
  359. struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
  360. struct ocfs2_chain_list *cl;
  361. struct ocfs2_alloc_context *ac = NULL;
  362. handle_t *handle = NULL;
  363. u32 bit_off, num_bits;
  364. u16 alloc_rec;
  365. u64 bg_blkno;
  366. struct buffer_head *bg_bh = NULL;
  367. struct ocfs2_group_desc *bg;
  368. BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
  369. mlog_entry_void();
  370. cl = &fe->id2.i_chain;
  371. status = ocfs2_reserve_clusters_with_limit(osb,
  372. le16_to_cpu(cl->cl_cpg),
  373. max_block, flags, &ac);
  374. if (status < 0) {
  375. if (status != -ENOSPC)
  376. mlog_errno(status);
  377. goto bail;
  378. }
  379. credits = ocfs2_calc_group_alloc_credits(osb->sb,
  380. le16_to_cpu(cl->cl_cpg));
  381. handle = ocfs2_start_trans(osb, credits);
  382. if (IS_ERR(handle)) {
  383. status = PTR_ERR(handle);
  384. handle = NULL;
  385. mlog_errno(status);
  386. goto bail;
  387. }
  388. if (last_alloc_group && *last_alloc_group != 0) {
  389. mlog(0, "use old allocation group %llu for block group alloc\n",
  390. (unsigned long long)*last_alloc_group);
  391. ac->ac_last_group = *last_alloc_group;
  392. }
  393. status = ocfs2_claim_clusters(osb,
  394. handle,
  395. ac,
  396. le16_to_cpu(cl->cl_cpg),
  397. &bit_off,
  398. &num_bits);
  399. if (status < 0) {
  400. if (status != -ENOSPC)
  401. mlog_errno(status);
  402. goto bail;
  403. }
  404. alloc_rec = ocfs2_find_smallest_chain(cl);
  405. /* setup the group */
  406. bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
  407. mlog(0, "new descriptor, record %u, at block %llu\n",
  408. alloc_rec, (unsigned long long)bg_blkno);
  409. bg_bh = sb_getblk(osb->sb, bg_blkno);
  410. if (!bg_bh) {
  411. status = -EIO;
  412. mlog_errno(status);
  413. goto bail;
  414. }
  415. ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
  416. status = ocfs2_block_group_fill(handle,
  417. alloc_inode,
  418. bg_bh,
  419. bg_blkno,
  420. alloc_rec,
  421. cl);
  422. if (status < 0) {
  423. mlog_errno(status);
  424. goto bail;
  425. }
  426. bg = (struct ocfs2_group_desc *) bg_bh->b_data;
  427. status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
  428. bh, OCFS2_JOURNAL_ACCESS_WRITE);
  429. if (status < 0) {
  430. mlog_errno(status);
  431. goto bail;
  432. }
  433. le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
  434. le16_to_cpu(bg->bg_free_bits_count));
  435. le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
  436. cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno);
  437. if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
  438. le16_add_cpu(&cl->cl_next_free_rec, 1);
  439. le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
  440. le16_to_cpu(bg->bg_free_bits_count));
  441. le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
  442. le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
  443. status = ocfs2_journal_dirty(handle, bh);
  444. if (status < 0) {
  445. mlog_errno(status);
  446. goto bail;
  447. }
  448. spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
  449. OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
  450. fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
  451. le32_to_cpu(fe->i_clusters)));
  452. spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
  453. i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
  454. alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
  455. status = 0;
  456. /* save the new last alloc group so that the caller can cache it. */
  457. if (last_alloc_group)
  458. *last_alloc_group = ac->ac_last_group;
  459. bail:
  460. if (handle)
  461. ocfs2_commit_trans(osb, handle);
  462. if (ac)
  463. ocfs2_free_alloc_context(ac);
  464. brelse(bg_bh);
  465. mlog_exit(status);
  466. return status;
  467. }
  468. static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
  469. struct ocfs2_alloc_context *ac,
  470. int type,
  471. u32 slot,
  472. u64 *last_alloc_group,
  473. int flags)
  474. {
  475. int status;
  476. u32 bits_wanted = ac->ac_bits_wanted;
  477. struct inode *alloc_inode;
  478. struct buffer_head *bh = NULL;
  479. struct ocfs2_dinode *fe;
  480. u32 free_bits;
  481. mlog_entry_void();
  482. alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
  483. if (!alloc_inode) {
  484. mlog_errno(-EINVAL);
  485. return -EINVAL;
  486. }
  487. mutex_lock(&alloc_inode->i_mutex);
  488. status = ocfs2_inode_lock(alloc_inode, &bh, 1);
  489. if (status < 0) {
  490. mutex_unlock(&alloc_inode->i_mutex);
  491. iput(alloc_inode);
  492. mlog_errno(status);
  493. return status;
  494. }
  495. ac->ac_inode = alloc_inode;
  496. ac->ac_alloc_slot = slot;
  497. fe = (struct ocfs2_dinode *) bh->b_data;
  498. /* The bh was validated by the inode read inside
  499. * ocfs2_inode_lock(). Any corruption is a code bug. */
  500. BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
  501. if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
  502. ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
  503. (unsigned long long)le64_to_cpu(fe->i_blkno));
  504. status = -EIO;
  505. goto bail;
  506. }
  507. free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
  508. le32_to_cpu(fe->id1.bitmap1.i_used);
  509. if (bits_wanted > free_bits) {
  510. /* cluster bitmap never grows */
  511. if (ocfs2_is_cluster_bitmap(alloc_inode)) {
  512. mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
  513. bits_wanted, free_bits);
  514. status = -ENOSPC;
  515. goto bail;
  516. }
  517. if (!(flags & ALLOC_NEW_GROUP)) {
  518. mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
  519. "and we don't alloc a new group for it.\n",
  520. slot, bits_wanted, free_bits);
  521. status = -ENOSPC;
  522. goto bail;
  523. }
  524. status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
  525. ac->ac_max_block,
  526. last_alloc_group, flags);
  527. if (status < 0) {
  528. if (status != -ENOSPC)
  529. mlog_errno(status);
  530. goto bail;
  531. }
  532. atomic_inc(&osb->alloc_stats.bg_extends);
  533. /* You should never ask for this much metadata */
  534. BUG_ON(bits_wanted >
  535. (le32_to_cpu(fe->id1.bitmap1.i_total)
  536. - le32_to_cpu(fe->id1.bitmap1.i_used)));
  537. }
  538. get_bh(bh);
  539. ac->ac_bh = bh;
  540. bail:
  541. brelse(bh);
  542. mlog_exit(status);
  543. return status;
  544. }
  545. static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
  546. {
  547. spin_lock(&osb->osb_lock);
  548. osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
  549. spin_unlock(&osb->osb_lock);
  550. atomic_set(&osb->s_num_inodes_stolen, 0);
  551. }
  552. static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb)
  553. {
  554. spin_lock(&osb->osb_lock);
  555. osb->s_meta_steal_slot = OCFS2_INVALID_SLOT;
  556. spin_unlock(&osb->osb_lock);
  557. atomic_set(&osb->s_num_meta_stolen, 0);
  558. }
  559. void ocfs2_init_steal_slots(struct ocfs2_super *osb)
  560. {
  561. ocfs2_init_inode_steal_slot(osb);
  562. ocfs2_init_meta_steal_slot(osb);
  563. }
  564. static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
  565. {
  566. spin_lock(&osb->osb_lock);
  567. if (type == INODE_ALLOC_SYSTEM_INODE)
  568. osb->s_inode_steal_slot = slot;
  569. else if (type == EXTENT_ALLOC_SYSTEM_INODE)
  570. osb->s_meta_steal_slot = slot;
  571. spin_unlock(&osb->osb_lock);
  572. }
  573. static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type)
  574. {
  575. int slot = OCFS2_INVALID_SLOT;
  576. spin_lock(&osb->osb_lock);
  577. if (type == INODE_ALLOC_SYSTEM_INODE)
  578. slot = osb->s_inode_steal_slot;
  579. else if (type == EXTENT_ALLOC_SYSTEM_INODE)
  580. slot = osb->s_meta_steal_slot;
  581. spin_unlock(&osb->osb_lock);
  582. return slot;
  583. }
  584. static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
  585. {
  586. return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE);
  587. }
  588. static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb)
  589. {
  590. return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE);
  591. }
  592. static int ocfs2_steal_resource(struct ocfs2_super *osb,
  593. struct ocfs2_alloc_context *ac,
  594. int type)
  595. {
  596. int i, status = -ENOSPC;
  597. int slot = __ocfs2_get_steal_slot(osb, type);
  598. /* Start to steal resource from the first slot after ours. */
  599. if (slot == OCFS2_INVALID_SLOT)
  600. slot = osb->slot_num + 1;
  601. for (i = 0; i < osb->max_slots; i++, slot++) {
  602. if (slot == osb->max_slots)
  603. slot = 0;
  604. if (slot == osb->slot_num)
  605. continue;
  606. status = ocfs2_reserve_suballoc_bits(osb, ac,
  607. type,
  608. (u32)slot, NULL,
  609. NOT_ALLOC_NEW_GROUP);
  610. if (status >= 0) {
  611. __ocfs2_set_steal_slot(osb, slot, type);
  612. break;
  613. }
  614. ocfs2_free_ac_resource(ac);
  615. }
  616. return status;
  617. }
  618. static int ocfs2_steal_inode(struct ocfs2_super *osb,
  619. struct ocfs2_alloc_context *ac)
  620. {
  621. return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE);
  622. }
  623. static int ocfs2_steal_meta(struct ocfs2_super *osb,
  624. struct ocfs2_alloc_context *ac)
  625. {
  626. return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE);
  627. }
  628. int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
  629. int blocks,
  630. struct ocfs2_alloc_context **ac)
  631. {
  632. int status;
  633. int slot = ocfs2_get_meta_steal_slot(osb);
  634. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  635. if (!(*ac)) {
  636. status = -ENOMEM;
  637. mlog_errno(status);
  638. goto bail;
  639. }
  640. (*ac)->ac_bits_wanted = blocks;
  641. (*ac)->ac_which = OCFS2_AC_USE_META;
  642. (*ac)->ac_group_search = ocfs2_block_group_search;
  643. if (slot != OCFS2_INVALID_SLOT &&
  644. atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL)
  645. goto extent_steal;
  646. atomic_set(&osb->s_num_meta_stolen, 0);
  647. status = ocfs2_reserve_suballoc_bits(osb, (*ac),
  648. EXTENT_ALLOC_SYSTEM_INODE,
  649. (u32)osb->slot_num, NULL,
  650. ALLOC_NEW_GROUP);
  651. if (status >= 0) {
  652. status = 0;
  653. if (slot != OCFS2_INVALID_SLOT)
  654. ocfs2_init_meta_steal_slot(osb);
  655. goto bail;
  656. } else if (status < 0 && status != -ENOSPC) {
  657. mlog_errno(status);
  658. goto bail;
  659. }
  660. ocfs2_free_ac_resource(*ac);
  661. extent_steal:
  662. status = ocfs2_steal_meta(osb, *ac);
  663. atomic_inc(&osb->s_num_meta_stolen);
  664. if (status < 0) {
  665. if (status != -ENOSPC)
  666. mlog_errno(status);
  667. goto bail;
  668. }
  669. status = 0;
  670. bail:
  671. if ((status < 0) && *ac) {
  672. ocfs2_free_alloc_context(*ac);
  673. *ac = NULL;
  674. }
  675. mlog_exit(status);
  676. return status;
  677. }
  678. int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
  679. struct ocfs2_extent_list *root_el,
  680. struct ocfs2_alloc_context **ac)
  681. {
  682. return ocfs2_reserve_new_metadata_blocks(osb,
  683. ocfs2_extend_meta_needed(root_el),
  684. ac);
  685. }
  686. int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
  687. struct ocfs2_alloc_context **ac)
  688. {
  689. int status;
  690. int slot = ocfs2_get_inode_steal_slot(osb);
  691. u64 alloc_group;
  692. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  693. if (!(*ac)) {
  694. status = -ENOMEM;
  695. mlog_errno(status);
  696. goto bail;
  697. }
  698. (*ac)->ac_bits_wanted = 1;
  699. (*ac)->ac_which = OCFS2_AC_USE_INODE;
  700. (*ac)->ac_group_search = ocfs2_block_group_search;
  701. /*
  702. * stat(2) can't handle i_ino > 32bits, so we tell the
  703. * lower levels not to allocate us a block group past that
  704. * limit. The 'inode64' mount option avoids this behavior.
  705. */
  706. if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
  707. (*ac)->ac_max_block = (u32)~0U;
  708. /*
  709. * slot is set when we successfully steal inode from other nodes.
  710. * It is reset in 3 places:
  711. * 1. when we flush the truncate log
  712. * 2. when we complete local alloc recovery.
  713. * 3. when we successfully allocate from our own slot.
  714. * After it is set, we will go on stealing inodes until we find the
  715. * need to check our slots to see whether there is some space for us.
  716. */
  717. if (slot != OCFS2_INVALID_SLOT &&
  718. atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL)
  719. goto inode_steal;
  720. atomic_set(&osb->s_num_inodes_stolen, 0);
  721. alloc_group = osb->osb_inode_alloc_group;
  722. status = ocfs2_reserve_suballoc_bits(osb, *ac,
  723. INODE_ALLOC_SYSTEM_INODE,
  724. (u32)osb->slot_num,
  725. &alloc_group,
  726. ALLOC_NEW_GROUP |
  727. ALLOC_GROUPS_FROM_GLOBAL);
  728. if (status >= 0) {
  729. status = 0;
  730. spin_lock(&osb->osb_lock);
  731. osb->osb_inode_alloc_group = alloc_group;
  732. spin_unlock(&osb->osb_lock);
  733. mlog(0, "after reservation, new allocation group is "
  734. "%llu\n", (unsigned long long)alloc_group);
  735. /*
  736. * Some inodes must be freed by us, so try to allocate
  737. * from our own next time.
  738. */
  739. if (slot != OCFS2_INVALID_SLOT)
  740. ocfs2_init_inode_steal_slot(osb);
  741. goto bail;
  742. } else if (status < 0 && status != -ENOSPC) {
  743. mlog_errno(status);
  744. goto bail;
  745. }
  746. ocfs2_free_ac_resource(*ac);
  747. inode_steal:
  748. status = ocfs2_steal_inode(osb, *ac);
  749. atomic_inc(&osb->s_num_inodes_stolen);
  750. if (status < 0) {
  751. if (status != -ENOSPC)
  752. mlog_errno(status);
  753. goto bail;
  754. }
  755. status = 0;
  756. bail:
  757. if ((status < 0) && *ac) {
  758. ocfs2_free_alloc_context(*ac);
  759. *ac = NULL;
  760. }
  761. mlog_exit(status);
  762. return status;
  763. }
  764. /* local alloc code has to do the same thing, so rather than do this
  765. * twice.. */
  766. int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
  767. struct ocfs2_alloc_context *ac)
  768. {
  769. int status;
  770. ac->ac_which = OCFS2_AC_USE_MAIN;
  771. ac->ac_group_search = ocfs2_cluster_group_search;
  772. status = ocfs2_reserve_suballoc_bits(osb, ac,
  773. GLOBAL_BITMAP_SYSTEM_INODE,
  774. OCFS2_INVALID_SLOT, NULL,
  775. ALLOC_NEW_GROUP);
  776. if (status < 0 && status != -ENOSPC) {
  777. mlog_errno(status);
  778. goto bail;
  779. }
  780. bail:
  781. return status;
  782. }
  783. /* Callers don't need to care which bitmap (local alloc or main) to
  784. * use so we figure it out for them, but unfortunately this clutters
  785. * things a bit. */
  786. static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
  787. u32 bits_wanted, u64 max_block,
  788. int flags,
  789. struct ocfs2_alloc_context **ac)
  790. {
  791. int status;
  792. mlog_entry_void();
  793. *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
  794. if (!(*ac)) {
  795. status = -ENOMEM;
  796. mlog_errno(status);
  797. goto bail;
  798. }
  799. (*ac)->ac_bits_wanted = bits_wanted;
  800. (*ac)->ac_max_block = max_block;
  801. status = -ENOSPC;
  802. if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
  803. ocfs2_alloc_should_use_local(osb, bits_wanted)) {
  804. status = ocfs2_reserve_local_alloc_bits(osb,
  805. bits_wanted,
  806. *ac);
  807. if (status == -EFBIG) {
  808. /* The local alloc window is outside ac_max_block.
  809. * use the main bitmap. */
  810. status = -ENOSPC;
  811. } else if ((status < 0) && (status != -ENOSPC)) {
  812. mlog_errno(status);
  813. goto bail;
  814. }
  815. }
  816. if (status == -ENOSPC) {
  817. status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
  818. if (status < 0) {
  819. if (status != -ENOSPC)
  820. mlog_errno(status);
  821. goto bail;
  822. }
  823. }
  824. status = 0;
  825. bail:
  826. if ((status < 0) && *ac) {
  827. ocfs2_free_alloc_context(*ac);
  828. *ac = NULL;
  829. }
  830. mlog_exit(status);
  831. return status;
  832. }
  833. int ocfs2_reserve_clusters(struct ocfs2_super *osb,
  834. u32 bits_wanted,
  835. struct ocfs2_alloc_context **ac)
  836. {
  837. return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
  838. ALLOC_NEW_GROUP, ac);
  839. }
  840. /*
  841. * More or less lifted from ext3. I'll leave their description below:
  842. *
  843. * "For ext3 allocations, we must not reuse any blocks which are
  844. * allocated in the bitmap buffer's "last committed data" copy. This
  845. * prevents deletes from freeing up the page for reuse until we have
  846. * committed the delete transaction.
  847. *
  848. * If we didn't do this, then deleting something and reallocating it as
  849. * data would allow the old block to be overwritten before the
  850. * transaction committed (because we force data to disk before commit).
  851. * This would lead to corruption if we crashed between overwriting the
  852. * data and committing the delete.
  853. *
  854. * @@@ We may want to make this allocation behaviour conditional on
  855. * data-writes at some point, and disable it for metadata allocations or
  856. * sync-data inodes."
  857. *
  858. * Note: OCFS2 already does this differently for metadata vs data
  859. * allocations, as those bitmaps are separate and undo access is never
  860. * called on a metadata group descriptor.
  861. */
  862. static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
  863. int nr)
  864. {
  865. struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
  866. int ret;
  867. if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
  868. return 0;
  869. if (!buffer_jbd(bg_bh))
  870. return 1;
  871. jbd_lock_bh_state(bg_bh);
  872. bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
  873. if (bg)
  874. ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
  875. else
  876. ret = 1;
  877. jbd_unlock_bh_state(bg_bh);
  878. return ret;
  879. }
  880. static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
  881. struct buffer_head *bg_bh,
  882. unsigned int bits_wanted,
  883. unsigned int total_bits,
  884. u16 *bit_off,
  885. u16 *bits_found)
  886. {
  887. void *bitmap;
  888. u16 best_offset, best_size;
  889. int offset, start, found, status = 0;
  890. struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
  891. /* Callers got this descriptor from
  892. * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
  893. BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
  894. found = start = best_offset = best_size = 0;
  895. bitmap = bg->bg_bitmap;
  896. while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
  897. if (offset == total_bits)
  898. break;
  899. if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
  900. /* We found a zero, but we can't use it as it
  901. * hasn't been put to disk yet! */
  902. found = 0;
  903. start = offset + 1;
  904. } else if (offset == start) {
  905. /* we found a zero */
  906. found++;
  907. /* move start to the next bit to test */
  908. start++;
  909. } else {
  910. /* got a zero after some ones */
  911. found = 1;
  912. start = offset + 1;
  913. }
  914. if (found > best_size) {
  915. best_size = found;
  916. best_offset = start - found;
  917. }
  918. /* we got everything we needed */
  919. if (found == bits_wanted) {
  920. /* mlog(0, "Found it all!\n"); */
  921. break;
  922. }
  923. }
  924. /* XXX: I think the first clause is equivalent to the second
  925. * - jlbec */
  926. if (found == bits_wanted) {
  927. *bit_off = start - found;
  928. *bits_found = found;
  929. } else if (best_size) {
  930. *bit_off = best_offset;
  931. *bits_found = best_size;
  932. } else {
  933. status = -ENOSPC;
  934. /* No error log here -- see the comment above
  935. * ocfs2_test_bg_bit_allocatable */
  936. }
  937. return status;
  938. }
  939. static inline int ocfs2_block_group_set_bits(handle_t *handle,
  940. struct inode *alloc_inode,
  941. struct ocfs2_group_desc *bg,
  942. struct buffer_head *group_bh,
  943. unsigned int bit_off,
  944. unsigned int num_bits)
  945. {
  946. int status;
  947. void *bitmap = bg->bg_bitmap;
  948. int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
  949. mlog_entry_void();
  950. /* All callers get the descriptor via
  951. * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
  952. BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
  953. BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
  954. mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
  955. num_bits);
  956. if (ocfs2_is_cluster_bitmap(alloc_inode))
  957. journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
  958. status = ocfs2_journal_access_gd(handle,
  959. INODE_CACHE(alloc_inode),
  960. group_bh,
  961. journal_type);
  962. if (status < 0) {
  963. mlog_errno(status);
  964. goto bail;
  965. }
  966. le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
  967. while(num_bits--)
  968. ocfs2_set_bit(bit_off++, bitmap);
  969. status = ocfs2_journal_dirty(handle,
  970. group_bh);
  971. if (status < 0) {
  972. mlog_errno(status);
  973. goto bail;
  974. }
  975. bail:
  976. mlog_exit(status);
  977. return status;
  978. }
  979. /* find the one with the most empty bits */
  980. static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
  981. {
  982. u16 curr, best;
  983. BUG_ON(!cl->cl_next_free_rec);
  984. best = curr = 0;
  985. while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
  986. if (le32_to_cpu(cl->cl_recs[curr].c_free) >
  987. le32_to_cpu(cl->cl_recs[best].c_free))
  988. best = curr;
  989. curr++;
  990. }
  991. BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
  992. return best;
  993. }
  994. static int ocfs2_relink_block_group(handle_t *handle,
  995. struct inode *alloc_inode,
  996. struct buffer_head *fe_bh,
  997. struct buffer_head *bg_bh,
  998. struct buffer_head *prev_bg_bh,
  999. u16 chain)
  1000. {
  1001. int status;
  1002. /* there is a really tiny chance the journal calls could fail,
  1003. * but we wouldn't want inconsistent blocks in *any* case. */
  1004. u64 fe_ptr, bg_ptr, prev_bg_ptr;
  1005. struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
  1006. struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
  1007. struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
  1008. /* The caller got these descriptors from
  1009. * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
  1010. BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
  1011. BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
  1012. mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
  1013. (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
  1014. (unsigned long long)le64_to_cpu(bg->bg_blkno),
  1015. (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
  1016. fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
  1017. bg_ptr = le64_to_cpu(bg->bg_next_group);
  1018. prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
  1019. status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
  1020. prev_bg_bh,
  1021. OCFS2_JOURNAL_ACCESS_WRITE);
  1022. if (status < 0) {
  1023. mlog_errno(status);
  1024. goto out_rollback;
  1025. }
  1026. prev_bg->bg_next_group = bg->bg_next_group;
  1027. status = ocfs2_journal_dirty(handle, prev_bg_bh);
  1028. if (status < 0) {
  1029. mlog_errno(status);
  1030. goto out_rollback;
  1031. }
  1032. status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
  1033. bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
  1034. if (status < 0) {
  1035. mlog_errno(status);
  1036. goto out_rollback;
  1037. }
  1038. bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
  1039. status = ocfs2_journal_dirty(handle, bg_bh);
  1040. if (status < 0) {
  1041. mlog_errno(status);
  1042. goto out_rollback;
  1043. }
  1044. status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
  1045. fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
  1046. if (status < 0) {
  1047. mlog_errno(status);
  1048. goto out_rollback;
  1049. }
  1050. fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
  1051. status = ocfs2_journal_dirty(handle, fe_bh);
  1052. if (status < 0) {
  1053. mlog_errno(status);
  1054. goto out_rollback;
  1055. }
  1056. status = 0;
  1057. out_rollback:
  1058. if (status < 0) {
  1059. fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
  1060. bg->bg_next_group = cpu_to_le64(bg_ptr);
  1061. prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
  1062. }
  1063. mlog_exit(status);
  1064. return status;
  1065. }
  1066. static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
  1067. u32 wanted)
  1068. {
  1069. return le16_to_cpu(bg->bg_free_bits_count) > wanted;
  1070. }
  1071. /* return 0 on success, -ENOSPC to keep searching and any other < 0
  1072. * value on error. */
  1073. static int ocfs2_cluster_group_search(struct inode *inode,
  1074. struct buffer_head *group_bh,
  1075. u32 bits_wanted, u32 min_bits,
  1076. u64 max_block,
  1077. u16 *bit_off, u16 *bits_found)
  1078. {
  1079. int search = -ENOSPC;
  1080. int ret;
  1081. u64 blkoff;
  1082. struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
  1083. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  1084. u16 tmp_off, tmp_found;
  1085. unsigned int max_bits, gd_cluster_off;
  1086. BUG_ON(!ocfs2_is_cluster_bitmap(inode));
  1087. if (gd->bg_free_bits_count) {
  1088. max_bits = le16_to_cpu(gd->bg_bits);
  1089. /* Tail groups in cluster bitmaps which aren't cpg
  1090. * aligned are prone to partial extention by a failed
  1091. * fs resize. If the file system resize never got to
  1092. * update the dinode cluster count, then we don't want
  1093. * to trust any clusters past it, regardless of what
  1094. * the group descriptor says. */
  1095. gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
  1096. le64_to_cpu(gd->bg_blkno));
  1097. if ((gd_cluster_off + max_bits) >
  1098. OCFS2_I(inode)->ip_clusters) {
  1099. max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
  1100. mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
  1101. (unsigned long long)le64_to_cpu(gd->bg_blkno),
  1102. le16_to_cpu(gd->bg_bits),
  1103. OCFS2_I(inode)->ip_clusters, max_bits);
  1104. }
  1105. ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
  1106. group_bh, bits_wanted,
  1107. max_bits,
  1108. &tmp_off, &tmp_found);
  1109. if (ret)
  1110. return ret;
  1111. if (max_block) {
  1112. blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
  1113. gd_cluster_off +
  1114. tmp_off + tmp_found);
  1115. mlog(0, "Checking %llu against %llu\n",
  1116. (unsigned long long)blkoff,
  1117. (unsigned long long)max_block);
  1118. if (blkoff > max_block)
  1119. return -ENOSPC;
  1120. }
  1121. /* ocfs2_block_group_find_clear_bits() might
  1122. * return success, but we still want to return
  1123. * -ENOSPC unless it found the minimum number
  1124. * of bits. */
  1125. if (min_bits <= tmp_found) {
  1126. *bit_off = tmp_off;
  1127. *bits_found = tmp_found;
  1128. search = 0; /* success */
  1129. } else if (tmp_found) {
  1130. /*
  1131. * Don't show bits which we'll be returning
  1132. * for allocation to the local alloc bitmap.
  1133. */
  1134. ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
  1135. }
  1136. }
  1137. return search;
  1138. }
  1139. static int ocfs2_block_group_search(struct inode *inode,
  1140. struct buffer_head *group_bh,
  1141. u32 bits_wanted, u32 min_bits,
  1142. u64 max_block,
  1143. u16 *bit_off, u16 *bits_found)
  1144. {
  1145. int ret = -ENOSPC;
  1146. u64 blkoff;
  1147. struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
  1148. BUG_ON(min_bits != 1);
  1149. BUG_ON(ocfs2_is_cluster_bitmap(inode));
  1150. if (bg->bg_free_bits_count) {
  1151. ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
  1152. group_bh, bits_wanted,
  1153. le16_to_cpu(bg->bg_bits),
  1154. bit_off, bits_found);
  1155. if (!ret && max_block) {
  1156. blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
  1157. *bits_found;
  1158. mlog(0, "Checking %llu against %llu\n",
  1159. (unsigned long long)blkoff,
  1160. (unsigned long long)max_block);
  1161. if (blkoff > max_block)
  1162. ret = -ENOSPC;
  1163. }
  1164. }
  1165. return ret;
  1166. }
  1167. static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
  1168. handle_t *handle,
  1169. struct buffer_head *di_bh,
  1170. u32 num_bits,
  1171. u16 chain)
  1172. {
  1173. int ret;
  1174. u32 tmp_used;
  1175. struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
  1176. struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
  1177. ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  1178. OCFS2_JOURNAL_ACCESS_WRITE);
  1179. if (ret < 0) {
  1180. mlog_errno(ret);
  1181. goto out;
  1182. }
  1183. tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
  1184. di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
  1185. le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
  1186. ret = ocfs2_journal_dirty(handle, di_bh);
  1187. if (ret < 0)
  1188. mlog_errno(ret);
  1189. out:
  1190. return ret;
  1191. }
  1192. static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
  1193. handle_t *handle,
  1194. u32 bits_wanted,
  1195. u32 min_bits,
  1196. u16 *bit_off,
  1197. unsigned int *num_bits,
  1198. u64 gd_blkno,
  1199. u16 *bits_left)
  1200. {
  1201. int ret;
  1202. u16 found;
  1203. struct buffer_head *group_bh = NULL;
  1204. struct ocfs2_group_desc *gd;
  1205. struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
  1206. struct inode *alloc_inode = ac->ac_inode;
  1207. ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
  1208. &group_bh);
  1209. if (ret < 0) {
  1210. mlog_errno(ret);
  1211. return ret;
  1212. }
  1213. gd = (struct ocfs2_group_desc *) group_bh->b_data;
  1214. ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
  1215. ac->ac_max_block, bit_off, &found);
  1216. if (ret < 0) {
  1217. if (ret != -ENOSPC)
  1218. mlog_errno(ret);
  1219. goto out;
  1220. }
  1221. *num_bits = found;
  1222. ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
  1223. *num_bits,
  1224. le16_to_cpu(gd->bg_chain));
  1225. if (ret < 0) {
  1226. mlog_errno(ret);
  1227. goto out;
  1228. }
  1229. ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
  1230. *bit_off, *num_bits);
  1231. if (ret < 0)
  1232. mlog_errno(ret);
  1233. *bits_left = le16_to_cpu(gd->bg_free_bits_count);
  1234. out:
  1235. brelse(group_bh);
  1236. return ret;
  1237. }
  1238. static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
  1239. handle_t *handle,
  1240. u32 bits_wanted,
  1241. u32 min_bits,
  1242. u16 *bit_off,
  1243. unsigned int *num_bits,
  1244. u64 *bg_blkno,
  1245. u16 *bits_left)
  1246. {
  1247. int status;
  1248. u16 chain, tmp_bits;
  1249. u32 tmp_used;
  1250. u64 next_group;
  1251. struct inode *alloc_inode = ac->ac_inode;
  1252. struct buffer_head *group_bh = NULL;
  1253. struct buffer_head *prev_group_bh = NULL;
  1254. struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
  1255. struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
  1256. struct ocfs2_group_desc *bg;
  1257. chain = ac->ac_chain;
  1258. mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
  1259. bits_wanted, chain,
  1260. (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
  1261. status = ocfs2_read_group_descriptor(alloc_inode, fe,
  1262. le64_to_cpu(cl->cl_recs[chain].c_blkno),
  1263. &group_bh);
  1264. if (status < 0) {
  1265. mlog_errno(status);
  1266. goto bail;
  1267. }
  1268. bg = (struct ocfs2_group_desc *) group_bh->b_data;
  1269. status = -ENOSPC;
  1270. /* for now, the chain search is a bit simplistic. We just use
  1271. * the 1st group with any empty bits. */
  1272. while ((status = ac->ac_group_search(alloc_inode, group_bh,
  1273. bits_wanted, min_bits,
  1274. ac->ac_max_block, bit_off,
  1275. &tmp_bits)) == -ENOSPC) {
  1276. if (!bg->bg_next_group)
  1277. break;
  1278. brelse(prev_group_bh);
  1279. prev_group_bh = NULL;
  1280. next_group = le64_to_cpu(bg->bg_next_group);
  1281. prev_group_bh = group_bh;
  1282. group_bh = NULL;
  1283. status = ocfs2_read_group_descriptor(alloc_inode, fe,
  1284. next_group, &group_bh);
  1285. if (status < 0) {
  1286. mlog_errno(status);
  1287. goto bail;
  1288. }
  1289. bg = (struct ocfs2_group_desc *) group_bh->b_data;
  1290. }
  1291. if (status < 0) {
  1292. if (status != -ENOSPC)
  1293. mlog_errno(status);
  1294. goto bail;
  1295. }
  1296. mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
  1297. tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
  1298. *num_bits = tmp_bits;
  1299. BUG_ON(*num_bits == 0);
  1300. /*
  1301. * Keep track of previous block descriptor read. When
  1302. * we find a target, if we have read more than X
  1303. * number of descriptors, and the target is reasonably
  1304. * empty, relink him to top of his chain.
  1305. *
  1306. * We've read 0 extra blocks and only send one more to
  1307. * the transaction, yet the next guy to search has a
  1308. * much easier time.
  1309. *
  1310. * Do this *after* figuring out how many bits we're taking out
  1311. * of our target group.
  1312. */
  1313. if (ac->ac_allow_chain_relink &&
  1314. (prev_group_bh) &&
  1315. (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
  1316. status = ocfs2_relink_block_group(handle, alloc_inode,
  1317. ac->ac_bh, group_bh,
  1318. prev_group_bh, chain);
  1319. if (status < 0) {
  1320. mlog_errno(status);
  1321. goto bail;
  1322. }
  1323. }
  1324. /* Ok, claim our bits now: set the info on dinode, chainlist
  1325. * and then the group */
  1326. status = ocfs2_journal_access_di(handle,
  1327. INODE_CACHE(alloc_inode),
  1328. ac->ac_bh,
  1329. OCFS2_JOURNAL_ACCESS_WRITE);
  1330. if (status < 0) {
  1331. mlog_errno(status);
  1332. goto bail;
  1333. }
  1334. tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
  1335. fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
  1336. le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
  1337. status = ocfs2_journal_dirty(handle,
  1338. ac->ac_bh);
  1339. if (status < 0) {
  1340. mlog_errno(status);
  1341. goto bail;
  1342. }
  1343. status = ocfs2_block_group_set_bits(handle,
  1344. alloc_inode,
  1345. bg,
  1346. group_bh,
  1347. *bit_off,
  1348. *num_bits);
  1349. if (status < 0) {
  1350. mlog_errno(status);
  1351. goto bail;
  1352. }
  1353. mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
  1354. (unsigned long long)le64_to_cpu(fe->i_blkno));
  1355. *bg_blkno = le64_to_cpu(bg->bg_blkno);
  1356. *bits_left = le16_to_cpu(bg->bg_free_bits_count);
  1357. bail:
  1358. brelse(group_bh);
  1359. brelse(prev_group_bh);
  1360. mlog_exit(status);
  1361. return status;
  1362. }
  1363. /* will give out up to bits_wanted contiguous bits. */
  1364. static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
  1365. struct ocfs2_alloc_context *ac,
  1366. handle_t *handle,
  1367. u32 bits_wanted,
  1368. u32 min_bits,
  1369. u16 *bit_off,
  1370. unsigned int *num_bits,
  1371. u64 *bg_blkno)
  1372. {
  1373. int status;
  1374. u16 victim, i;
  1375. u16 bits_left = 0;
  1376. u64 hint_blkno = ac->ac_last_group;
  1377. struct ocfs2_chain_list *cl;
  1378. struct ocfs2_dinode *fe;
  1379. mlog_entry_void();
  1380. BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
  1381. BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
  1382. BUG_ON(!ac->ac_bh);
  1383. fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
  1384. /* The bh was validated by the inode read during
  1385. * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
  1386. BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
  1387. if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
  1388. le32_to_cpu(fe->id1.bitmap1.i_total)) {
  1389. ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
  1390. "bits but only %u total.",
  1391. (unsigned long long)le64_to_cpu(fe->i_blkno),
  1392. le32_to_cpu(fe->id1.bitmap1.i_used),
  1393. le32_to_cpu(fe->id1.bitmap1.i_total));
  1394. status = -EIO;
  1395. goto bail;
  1396. }
  1397. if (hint_blkno) {
  1398. /* Attempt to short-circuit the usual search mechanism
  1399. * by jumping straight to the most recently used
  1400. * allocation group. This helps us mantain some
  1401. * contiguousness across allocations. */
  1402. status = ocfs2_search_one_group(ac, handle, bits_wanted,
  1403. min_bits, bit_off, num_bits,
  1404. hint_blkno, &bits_left);
  1405. if (!status) {
  1406. /* Be careful to update *bg_blkno here as the
  1407. * caller is expecting it to be filled in, and
  1408. * ocfs2_search_one_group() won't do that for
  1409. * us. */
  1410. *bg_blkno = hint_blkno;
  1411. goto set_hint;
  1412. }
  1413. if (status < 0 && status != -ENOSPC) {
  1414. mlog_errno(status);
  1415. goto bail;
  1416. }
  1417. }
  1418. cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
  1419. victim = ocfs2_find_victim_chain(cl);
  1420. ac->ac_chain = victim;
  1421. ac->ac_allow_chain_relink = 1;
  1422. status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
  1423. num_bits, bg_blkno, &bits_left);
  1424. if (!status)
  1425. goto set_hint;
  1426. if (status < 0 && status != -ENOSPC) {
  1427. mlog_errno(status);
  1428. goto bail;
  1429. }
  1430. mlog(0, "Search of victim chain %u came up with nothing, "
  1431. "trying all chains now.\n", victim);
  1432. /* If we didn't pick a good victim, then just default to
  1433. * searching each chain in order. Don't allow chain relinking
  1434. * because we only calculate enough journal credits for one
  1435. * relink per alloc. */
  1436. ac->ac_allow_chain_relink = 0;
  1437. for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
  1438. if (i == victim)
  1439. continue;
  1440. if (!cl->cl_recs[i].c_free)
  1441. continue;
  1442. ac->ac_chain = i;
  1443. status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
  1444. bit_off, num_bits, bg_blkno,
  1445. &bits_left);
  1446. if (!status)
  1447. break;
  1448. if (status < 0 && status != -ENOSPC) {
  1449. mlog_errno(status);
  1450. goto bail;
  1451. }
  1452. }
  1453. set_hint:
  1454. if (status != -ENOSPC) {
  1455. /* If the next search of this group is not likely to
  1456. * yield a suitable extent, then we reset the last
  1457. * group hint so as to not waste a disk read */
  1458. if (bits_left < min_bits)
  1459. ac->ac_last_group = 0;
  1460. else
  1461. ac->ac_last_group = *bg_blkno;
  1462. }
  1463. bail:
  1464. mlog_exit(status);
  1465. return status;
  1466. }
  1467. int ocfs2_claim_metadata(struct ocfs2_super *osb,
  1468. handle_t *handle,
  1469. struct ocfs2_alloc_context *ac,
  1470. u32 bits_wanted,
  1471. u16 *suballoc_bit_start,
  1472. unsigned int *num_bits,
  1473. u64 *blkno_start)
  1474. {
  1475. int status;
  1476. u64 bg_blkno;
  1477. BUG_ON(!ac);
  1478. BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
  1479. BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
  1480. status = ocfs2_claim_suballoc_bits(osb,
  1481. ac,
  1482. handle,
  1483. bits_wanted,
  1484. 1,
  1485. suballoc_bit_start,
  1486. num_bits,
  1487. &bg_blkno);
  1488. if (status < 0) {
  1489. mlog_errno(status);
  1490. goto bail;
  1491. }
  1492. atomic_inc(&osb->alloc_stats.bg_allocs);
  1493. *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
  1494. ac->ac_bits_given += (*num_bits);
  1495. status = 0;
  1496. bail:
  1497. mlog_exit(status);
  1498. return status;
  1499. }
  1500. static void ocfs2_init_inode_ac_group(struct inode *dir,
  1501. struct buffer_head *parent_fe_bh,
  1502. struct ocfs2_alloc_context *ac)
  1503. {
  1504. struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
  1505. /*
  1506. * Try to allocate inodes from some specific group.
  1507. *
  1508. * If the parent dir has recorded the last group used in allocation,
  1509. * cool, use it. Otherwise if we try to allocate new inode from the
  1510. * same slot the parent dir belongs to, use the same chunk.
  1511. *
  1512. * We are very careful here to avoid the mistake of setting
  1513. * ac_last_group to a group descriptor from a different (unlocked) slot.
  1514. */
  1515. if (OCFS2_I(dir)->ip_last_used_group &&
  1516. OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
  1517. ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
  1518. else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
  1519. ac->ac_last_group = ocfs2_which_suballoc_group(
  1520. le64_to_cpu(fe->i_blkno),
  1521. le16_to_cpu(fe->i_suballoc_bit));
  1522. }
  1523. static inline void ocfs2_save_inode_ac_group(struct inode *dir,
  1524. struct ocfs2_alloc_context *ac)
  1525. {
  1526. OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
  1527. OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
  1528. }
  1529. int ocfs2_claim_new_inode(struct ocfs2_super *osb,
  1530. handle_t *handle,
  1531. struct inode *dir,
  1532. struct buffer_head *parent_fe_bh,
  1533. struct ocfs2_alloc_context *ac,
  1534. u16 *suballoc_bit,
  1535. u64 *fe_blkno)
  1536. {
  1537. int status;
  1538. unsigned int num_bits;
  1539. u64 bg_blkno;
  1540. mlog_entry_void();
  1541. BUG_ON(!ac);
  1542. BUG_ON(ac->ac_bits_given != 0);
  1543. BUG_ON(ac->ac_bits_wanted != 1);
  1544. BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
  1545. ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
  1546. status = ocfs2_claim_suballoc_bits(osb,
  1547. ac,
  1548. handle,
  1549. 1,
  1550. 1,
  1551. suballoc_bit,
  1552. &num_bits,
  1553. &bg_blkno);
  1554. if (status < 0) {
  1555. mlog_errno(status);
  1556. goto bail;
  1557. }
  1558. atomic_inc(&osb->alloc_stats.bg_allocs);
  1559. BUG_ON(num_bits != 1);
  1560. *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
  1561. ac->ac_bits_given++;
  1562. ocfs2_save_inode_ac_group(dir, ac);
  1563. status = 0;
  1564. bail:
  1565. mlog_exit(status);
  1566. return status;
  1567. }
  1568. /* translate a group desc. blkno and it's bitmap offset into
  1569. * disk cluster offset. */
  1570. static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
  1571. u64 bg_blkno,
  1572. u16 bg_bit_off)
  1573. {
  1574. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  1575. u32 cluster = 0;
  1576. BUG_ON(!ocfs2_is_cluster_bitmap(inode));
  1577. if (bg_blkno != osb->first_cluster_group_blkno)
  1578. cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
  1579. cluster += (u32) bg_bit_off;
  1580. return cluster;
  1581. }
  1582. /* given a cluster offset, calculate which block group it belongs to
  1583. * and return that block offset. */
  1584. u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
  1585. {
  1586. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  1587. u32 group_no;
  1588. BUG_ON(!ocfs2_is_cluster_bitmap(inode));
  1589. group_no = cluster / osb->bitmap_cpg;
  1590. if (!group_no)
  1591. return osb->first_cluster_group_blkno;
  1592. return ocfs2_clusters_to_blocks(inode->i_sb,
  1593. group_no * osb->bitmap_cpg);
  1594. }
  1595. /* given the block number of a cluster start, calculate which cluster
  1596. * group and descriptor bitmap offset that corresponds to. */
  1597. static inline void ocfs2_block_to_cluster_group(struct inode *inode,
  1598. u64 data_blkno,
  1599. u64 *bg_blkno,
  1600. u16 *bg_bit_off)
  1601. {
  1602. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  1603. u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
  1604. BUG_ON(!ocfs2_is_cluster_bitmap(inode));
  1605. *bg_blkno = ocfs2_which_cluster_group(inode,
  1606. data_cluster);
  1607. if (*bg_blkno == osb->first_cluster_group_blkno)
  1608. *bg_bit_off = (u16) data_cluster;
  1609. else
  1610. *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
  1611. data_blkno - *bg_blkno);
  1612. }
  1613. /*
  1614. * min_bits - minimum contiguous chunk from this total allocation we
  1615. * can handle. set to what we asked for originally for a full
  1616. * contig. allocation, set to '1' to indicate we can deal with extents
  1617. * of any size.
  1618. */
  1619. int __ocfs2_claim_clusters(struct ocfs2_super *osb,
  1620. handle_t *handle,
  1621. struct ocfs2_alloc_context *ac,
  1622. u32 min_clusters,
  1623. u32 max_clusters,
  1624. u32 *cluster_start,
  1625. u32 *num_clusters)
  1626. {
  1627. int status;
  1628. unsigned int bits_wanted = max_clusters;
  1629. u64 bg_blkno = 0;
  1630. u16 bg_bit_off;
  1631. mlog_entry_void();
  1632. BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
  1633. BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
  1634. && ac->ac_which != OCFS2_AC_USE_MAIN);
  1635. if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
  1636. status = ocfs2_claim_local_alloc_bits(osb,
  1637. handle,
  1638. ac,
  1639. bits_wanted,
  1640. cluster_start,
  1641. num_clusters);
  1642. if (!status)
  1643. atomic_inc(&osb->alloc_stats.local_data);
  1644. } else {
  1645. if (min_clusters > (osb->bitmap_cpg - 1)) {
  1646. /* The only paths asking for contiguousness
  1647. * should know about this already. */
  1648. mlog(ML_ERROR, "minimum allocation requested %u exceeds "
  1649. "group bitmap size %u!\n", min_clusters,
  1650. osb->bitmap_cpg);
  1651. status = -ENOSPC;
  1652. goto bail;
  1653. }
  1654. /* clamp the current request down to a realistic size. */
  1655. if (bits_wanted > (osb->bitmap_cpg - 1))
  1656. bits_wanted = osb->bitmap_cpg - 1;
  1657. status = ocfs2_claim_suballoc_bits(osb,
  1658. ac,
  1659. handle,
  1660. bits_wanted,
  1661. min_clusters,
  1662. &bg_bit_off,
  1663. num_clusters,
  1664. &bg_blkno);
  1665. if (!status) {
  1666. *cluster_start =
  1667. ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
  1668. bg_blkno,
  1669. bg_bit_off);
  1670. atomic_inc(&osb->alloc_stats.bitmap_data);
  1671. }
  1672. }
  1673. if (status < 0) {
  1674. if (status != -ENOSPC)
  1675. mlog_errno(status);
  1676. goto bail;
  1677. }
  1678. ac->ac_bits_given += *num_clusters;
  1679. bail:
  1680. mlog_exit(status);
  1681. return status;
  1682. }
  1683. int ocfs2_claim_clusters(struct ocfs2_super *osb,
  1684. handle_t *handle,
  1685. struct ocfs2_alloc_context *ac,
  1686. u32 min_clusters,
  1687. u32 *cluster_start,
  1688. u32 *num_clusters)
  1689. {
  1690. unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
  1691. return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
  1692. bits_wanted, cluster_start, num_clusters);
  1693. }
  1694. static inline int ocfs2_block_group_clear_bits(handle_t *handle,
  1695. struct inode *alloc_inode,
  1696. struct ocfs2_group_desc *bg,
  1697. struct buffer_head *group_bh,
  1698. unsigned int bit_off,
  1699. unsigned int num_bits)
  1700. {
  1701. int status;
  1702. unsigned int tmp;
  1703. int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
  1704. struct ocfs2_group_desc *undo_bg = NULL;
  1705. int cluster_bitmap = 0;
  1706. mlog_entry_void();
  1707. /* The caller got this descriptor from
  1708. * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
  1709. BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
  1710. mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
  1711. if (ocfs2_is_cluster_bitmap(alloc_inode))
  1712. journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
  1713. status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
  1714. group_bh, journal_type);
  1715. if (status < 0) {
  1716. mlog_errno(status);
  1717. goto bail;
  1718. }
  1719. if (ocfs2_is_cluster_bitmap(alloc_inode))
  1720. cluster_bitmap = 1;
  1721. if (cluster_bitmap) {
  1722. jbd_lock_bh_state(group_bh);
  1723. undo_bg = (struct ocfs2_group_desc *)
  1724. bh2jh(group_bh)->b_committed_data;
  1725. BUG_ON(!undo_bg);
  1726. }
  1727. tmp = num_bits;
  1728. while(tmp--) {
  1729. ocfs2_clear_bit((bit_off + tmp),
  1730. (unsigned long *) bg->bg_bitmap);
  1731. if (cluster_bitmap)
  1732. ocfs2_set_bit(bit_off + tmp,
  1733. (unsigned long *) undo_bg->bg_bitmap);
  1734. }
  1735. le16_add_cpu(&bg->bg_free_bits_count, num_bits);
  1736. if (cluster_bitmap)
  1737. jbd_unlock_bh_state(group_bh);
  1738. status = ocfs2_journal_dirty(handle, group_bh);
  1739. if (status < 0)
  1740. mlog_errno(status);
  1741. bail:
  1742. return status;
  1743. }
  1744. /*
  1745. * expects the suballoc inode to already be locked.
  1746. */
  1747. int ocfs2_free_suballoc_bits(handle_t *handle,
  1748. struct inode *alloc_inode,
  1749. struct buffer_head *alloc_bh,
  1750. unsigned int start_bit,
  1751. u64 bg_blkno,
  1752. unsigned int count)
  1753. {
  1754. int status = 0;
  1755. u32 tmp_used;
  1756. struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
  1757. struct ocfs2_chain_list *cl = &fe->id2.i_chain;
  1758. struct buffer_head *group_bh = NULL;
  1759. struct ocfs2_group_desc *group;
  1760. mlog_entry_void();
  1761. /* The alloc_bh comes from ocfs2_free_dinode() or
  1762. * ocfs2_free_clusters(). The callers have all locked the
  1763. * allocator and gotten alloc_bh from the lock call. This
  1764. * validates the dinode buffer. Any corruption that has happended
  1765. * is a code bug. */
  1766. BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
  1767. BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
  1768. mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
  1769. (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
  1770. (unsigned long long)bg_blkno, start_bit);
  1771. status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
  1772. &group_bh);
  1773. if (status < 0) {
  1774. mlog_errno(status);
  1775. goto bail;
  1776. }
  1777. group = (struct ocfs2_group_desc *) group_bh->b_data;
  1778. BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
  1779. status = ocfs2_block_group_clear_bits(handle, alloc_inode,
  1780. group, group_bh,
  1781. start_bit, count);
  1782. if (status < 0) {
  1783. mlog_errno(status);
  1784. goto bail;
  1785. }
  1786. status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
  1787. alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
  1788. if (status < 0) {
  1789. mlog_errno(status);
  1790. goto bail;
  1791. }
  1792. le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
  1793. count);
  1794. tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
  1795. fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
  1796. status = ocfs2_journal_dirty(handle, alloc_bh);
  1797. if (status < 0) {
  1798. mlog_errno(status);
  1799. goto bail;
  1800. }
  1801. bail:
  1802. brelse(group_bh);
  1803. mlog_exit(status);
  1804. return status;
  1805. }
  1806. int ocfs2_free_dinode(handle_t *handle,
  1807. struct inode *inode_alloc_inode,
  1808. struct buffer_head *inode_alloc_bh,
  1809. struct ocfs2_dinode *di)
  1810. {
  1811. u64 blk = le64_to_cpu(di->i_blkno);
  1812. u16 bit = le16_to_cpu(di->i_suballoc_bit);
  1813. u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
  1814. return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
  1815. inode_alloc_bh, bit, bg_blkno, 1);
  1816. }
  1817. int ocfs2_free_clusters(handle_t *handle,
  1818. struct inode *bitmap_inode,
  1819. struct buffer_head *bitmap_bh,
  1820. u64 start_blk,
  1821. unsigned int num_clusters)
  1822. {
  1823. int status;
  1824. u16 bg_start_bit;
  1825. u64 bg_blkno;
  1826. struct ocfs2_dinode *fe;
  1827. /* You can't ever have a contiguous set of clusters
  1828. * bigger than a block group bitmap so we never have to worry
  1829. * about looping on them. */
  1830. mlog_entry_void();
  1831. /* This is expensive. We can safely remove once this stuff has
  1832. * gotten tested really well. */
  1833. BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
  1834. fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
  1835. ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
  1836. &bg_start_bit);
  1837. mlog(0, "want to free %u clusters starting at block %llu\n",
  1838. num_clusters, (unsigned long long)start_blk);
  1839. mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
  1840. (unsigned long long)bg_blkno, bg_start_bit);
  1841. status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
  1842. bg_start_bit, bg_blkno,
  1843. num_clusters);
  1844. if (status < 0) {
  1845. mlog_errno(status);
  1846. goto out;
  1847. }
  1848. ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
  1849. num_clusters);
  1850. out:
  1851. mlog_exit(status);
  1852. return status;
  1853. }
  1854. static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
  1855. {
  1856. printk("Block Group:\n");
  1857. printk("bg_signature: %s\n", bg->bg_signature);
  1858. printk("bg_size: %u\n", bg->bg_size);
  1859. printk("bg_bits: %u\n", bg->bg_bits);
  1860. printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
  1861. printk("bg_chain: %u\n", bg->bg_chain);
  1862. printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
  1863. printk("bg_next_group: %llu\n",
  1864. (unsigned long long)bg->bg_next_group);
  1865. printk("bg_parent_dinode: %llu\n",
  1866. (unsigned long long)bg->bg_parent_dinode);
  1867. printk("bg_blkno: %llu\n",
  1868. (unsigned long long)bg->bg_blkno);
  1869. }
  1870. static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
  1871. {
  1872. int i;
  1873. printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
  1874. printk("i_signature: %s\n", fe->i_signature);
  1875. printk("i_size: %llu\n",
  1876. (unsigned long long)fe->i_size);
  1877. printk("i_clusters: %u\n", fe->i_clusters);
  1878. printk("i_generation: %u\n",
  1879. le32_to_cpu(fe->i_generation));
  1880. printk("id1.bitmap1.i_used: %u\n",
  1881. le32_to_cpu(fe->id1.bitmap1.i_used));
  1882. printk("id1.bitmap1.i_total: %u\n",
  1883. le32_to_cpu(fe->id1.bitmap1.i_total));
  1884. printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
  1885. printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
  1886. printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
  1887. printk("id2.i_chain.cl_next_free_rec: %u\n",
  1888. fe->id2.i_chain.cl_next_free_rec);
  1889. for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
  1890. printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
  1891. fe->id2.i_chain.cl_recs[i].c_free);
  1892. printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
  1893. fe->id2.i_chain.cl_recs[i].c_total);
  1894. printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
  1895. (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
  1896. }
  1897. }
  1898. /*
  1899. * For a given allocation, determine which allocators will need to be
  1900. * accessed, and lock them, reserving the appropriate number of bits.
  1901. *
  1902. * Sparse file systems call this from ocfs2_write_begin_nolock()
  1903. * and ocfs2_allocate_unwritten_extents().
  1904. *
  1905. * File systems which don't support holes call this from
  1906. * ocfs2_extend_allocation().
  1907. */
  1908. int ocfs2_lock_allocators(struct inode *inode,
  1909. struct ocfs2_extent_tree *et,
  1910. u32 clusters_to_add, u32 extents_to_split,
  1911. struct ocfs2_alloc_context **data_ac,
  1912. struct ocfs2_alloc_context **meta_ac)
  1913. {
  1914. int ret = 0, num_free_extents;
  1915. unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
  1916. struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
  1917. *meta_ac = NULL;
  1918. if (data_ac)
  1919. *data_ac = NULL;
  1920. BUG_ON(clusters_to_add != 0 && data_ac == NULL);
  1921. num_free_extents = ocfs2_num_free_extents(osb, et);
  1922. if (num_free_extents < 0) {
  1923. ret = num_free_extents;
  1924. mlog_errno(ret);
  1925. goto out;
  1926. }
  1927. /*
  1928. * Sparse allocation file systems need to be more conservative
  1929. * with reserving room for expansion - the actual allocation
  1930. * happens while we've got a journal handle open so re-taking
  1931. * a cluster lock (because we ran out of room for another
  1932. * extent) will violate ordering rules.
  1933. *
  1934. * Most of the time we'll only be seeing this 1 cluster at a time
  1935. * anyway.
  1936. *
  1937. * Always lock for any unwritten extents - we might want to
  1938. * add blocks during a split.
  1939. */
  1940. if (!num_free_extents ||
  1941. (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
  1942. ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
  1943. if (ret < 0) {
  1944. if (ret != -ENOSPC)
  1945. mlog_errno(ret);
  1946. goto out;
  1947. }
  1948. }
  1949. if (clusters_to_add == 0)
  1950. goto out;
  1951. ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
  1952. if (ret < 0) {
  1953. if (ret != -ENOSPC)
  1954. mlog_errno(ret);
  1955. goto out;
  1956. }
  1957. out:
  1958. if (ret) {
  1959. if (*meta_ac) {
  1960. ocfs2_free_alloc_context(*meta_ac);
  1961. *meta_ac = NULL;
  1962. }
  1963. /*
  1964. * We cannot have an error and a non null *data_ac.
  1965. */
  1966. }
  1967. return ret;
  1968. }
  1969. /*
  1970. * Read the inode specified by blkno to get suballoc_slot and
  1971. * suballoc_bit.
  1972. */
  1973. static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
  1974. u16 *suballoc_slot, u16 *suballoc_bit)
  1975. {
  1976. int status;
  1977. struct buffer_head *inode_bh = NULL;
  1978. struct ocfs2_dinode *inode_fe;
  1979. mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
  1980. /* dirty read disk */
  1981. status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
  1982. if (status < 0) {
  1983. mlog(ML_ERROR, "read block %llu failed %d\n",
  1984. (unsigned long long)blkno, status);
  1985. goto bail;
  1986. }
  1987. inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
  1988. if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
  1989. mlog(ML_ERROR, "invalid inode %llu requested\n",
  1990. (unsigned long long)blkno);
  1991. status = -EINVAL;
  1992. goto bail;
  1993. }
  1994. if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
  1995. (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
  1996. mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
  1997. (unsigned long long)blkno,
  1998. (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
  1999. status = -EINVAL;
  2000. goto bail;
  2001. }
  2002. if (suballoc_slot)
  2003. *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
  2004. if (suballoc_bit)
  2005. *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
  2006. bail:
  2007. brelse(inode_bh);
  2008. mlog_exit(status);
  2009. return status;
  2010. }
  2011. /*
  2012. * test whether bit is SET in allocator bitmap or not. on success, 0
  2013. * is returned and *res is 1 for SET; 0 otherwise. when fails, errno
  2014. * is returned and *res is meaningless. Call this after you have
  2015. * cluster locked against suballoc, or you may get a result based on
  2016. * non-up2date contents
  2017. */
  2018. static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
  2019. struct inode *suballoc,
  2020. struct buffer_head *alloc_bh, u64 blkno,
  2021. u16 bit, int *res)
  2022. {
  2023. struct ocfs2_dinode *alloc_fe;
  2024. struct ocfs2_group_desc *group;
  2025. struct buffer_head *group_bh = NULL;
  2026. u64 bg_blkno;
  2027. int status;
  2028. mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
  2029. (unsigned int)bit);
  2030. alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
  2031. if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
  2032. mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
  2033. (unsigned int)bit,
  2034. ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
  2035. status = -EINVAL;
  2036. goto bail;
  2037. }
  2038. bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
  2039. status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
  2040. &group_bh);
  2041. if (status < 0) {
  2042. mlog(ML_ERROR, "read group %llu failed %d\n",
  2043. (unsigned long long)bg_blkno, status);
  2044. goto bail;
  2045. }
  2046. group = (struct ocfs2_group_desc *) group_bh->b_data;
  2047. *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
  2048. bail:
  2049. brelse(group_bh);
  2050. mlog_exit(status);
  2051. return status;
  2052. }
  2053. /*
  2054. * Test if the bit representing this inode (blkno) is set in the
  2055. * suballocator.
  2056. *
  2057. * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
  2058. *
  2059. * In the event of failure, a negative value is returned and *res is
  2060. * meaningless.
  2061. *
  2062. * Callers must make sure to hold nfs_sync_lock to prevent
  2063. * ocfs2_delete_inode() on another node from accessing the same
  2064. * suballocator concurrently.
  2065. */
  2066. int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
  2067. {
  2068. int status;
  2069. u16 suballoc_bit = 0, suballoc_slot = 0;
  2070. struct inode *inode_alloc_inode;
  2071. struct buffer_head *alloc_bh = NULL;
  2072. mlog_entry("blkno: %llu", (unsigned long long)blkno);
  2073. status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
  2074. &suballoc_bit);
  2075. if (status < 0) {
  2076. mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
  2077. goto bail;
  2078. }
  2079. inode_alloc_inode =
  2080. ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
  2081. suballoc_slot);
  2082. if (!inode_alloc_inode) {
  2083. /* the error code could be inaccurate, but we are not able to
  2084. * get the correct one. */
  2085. status = -EINVAL;
  2086. mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
  2087. (u32)suballoc_slot);
  2088. goto bail;
  2089. }
  2090. mutex_lock(&inode_alloc_inode->i_mutex);
  2091. status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
  2092. if (status < 0) {
  2093. mutex_unlock(&inode_alloc_inode->i_mutex);
  2094. mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
  2095. (u32)suballoc_slot, status);
  2096. goto bail;
  2097. }
  2098. status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
  2099. blkno, suballoc_bit, res);
  2100. if (status < 0)
  2101. mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
  2102. ocfs2_inode_unlock(inode_alloc_inode, 0);
  2103. mutex_unlock(&inode_alloc_inode->i_mutex);
  2104. iput(inode_alloc_inode);
  2105. brelse(alloc_bh);
  2106. bail:
  2107. mlog_exit(status);
  2108. return status;
  2109. }