xfs_ialloc.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_dir2.h"
  28. #include "xfs_dmapi.h"
  29. #include "xfs_mount.h"
  30. #include "xfs_bmap_btree.h"
  31. #include "xfs_alloc_btree.h"
  32. #include "xfs_ialloc_btree.h"
  33. #include "xfs_dir2_sf.h"
  34. #include "xfs_attr_sf.h"
  35. #include "xfs_dinode.h"
  36. #include "xfs_inode.h"
  37. #include "xfs_btree.h"
  38. #include "xfs_ialloc.h"
  39. #include "xfs_alloc.h"
  40. #include "xfs_rtalloc.h"
  41. #include "xfs_error.h"
  42. #include "xfs_bmap.h"
  43. #include "xfs_imap.h"
  44. /*
  45. * Allocation group level functions.
  46. */
  47. static inline int
  48. xfs_ialloc_cluster_alignment(
  49. xfs_alloc_arg_t *args)
  50. {
  51. if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
  52. args->mp->m_sb.sb_inoalignmt >=
  53. XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
  54. return args->mp->m_sb.sb_inoalignmt;
  55. return 1;
  56. }
  57. /*
  58. * Lookup the record equal to ino in the btree given by cur.
  59. */
  60. STATIC int /* error */
  61. xfs_inobt_lookup_eq(
  62. struct xfs_btree_cur *cur, /* btree cursor */
  63. xfs_agino_t ino, /* starting inode of chunk */
  64. __int32_t fcnt, /* free inode count */
  65. xfs_inofree_t free, /* free inode mask */
  66. int *stat) /* success/failure */
  67. {
  68. cur->bc_rec.i.ir_startino = ino;
  69. cur->bc_rec.i.ir_freecount = fcnt;
  70. cur->bc_rec.i.ir_free = free;
  71. return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  72. }
  73. /*
  74. * Lookup the first record greater than or equal to ino
  75. * in the btree given by cur.
  76. */
  77. int /* error */
  78. xfs_inobt_lookup_ge(
  79. struct xfs_btree_cur *cur, /* btree cursor */
  80. xfs_agino_t ino, /* starting inode of chunk */
  81. __int32_t fcnt, /* free inode count */
  82. xfs_inofree_t free, /* free inode mask */
  83. int *stat) /* success/failure */
  84. {
  85. cur->bc_rec.i.ir_startino = ino;
  86. cur->bc_rec.i.ir_freecount = fcnt;
  87. cur->bc_rec.i.ir_free = free;
  88. return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  89. }
  90. /*
  91. * Lookup the first record less than or equal to ino
  92. * in the btree given by cur.
  93. */
  94. int /* error */
  95. xfs_inobt_lookup_le(
  96. struct xfs_btree_cur *cur, /* btree cursor */
  97. xfs_agino_t ino, /* starting inode of chunk */
  98. __int32_t fcnt, /* free inode count */
  99. xfs_inofree_t free, /* free inode mask */
  100. int *stat) /* success/failure */
  101. {
  102. cur->bc_rec.i.ir_startino = ino;
  103. cur->bc_rec.i.ir_freecount = fcnt;
  104. cur->bc_rec.i.ir_free = free;
  105. return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  106. }
  107. /*
  108. * Update the record referred to by cur to the value given
  109. * by [ino, fcnt, free].
  110. * This either works (return 0) or gets an EFSCORRUPTED error.
  111. */
  112. STATIC int /* error */
  113. xfs_inobt_update(
  114. struct xfs_btree_cur *cur, /* btree cursor */
  115. xfs_agino_t ino, /* starting inode of chunk */
  116. __int32_t fcnt, /* free inode count */
  117. xfs_inofree_t free) /* free inode mask */
  118. {
  119. union xfs_btree_rec rec;
  120. rec.inobt.ir_startino = cpu_to_be32(ino);
  121. rec.inobt.ir_freecount = cpu_to_be32(fcnt);
  122. rec.inobt.ir_free = cpu_to_be64(free);
  123. return xfs_btree_update(cur, &rec);
  124. }
  125. /*
  126. * Get the data from the pointed-to record.
  127. */
  128. int /* error */
  129. xfs_inobt_get_rec(
  130. struct xfs_btree_cur *cur, /* btree cursor */
  131. xfs_agino_t *ino, /* output: starting inode of chunk */
  132. __int32_t *fcnt, /* output: number of free inodes */
  133. xfs_inofree_t *free, /* output: free inode mask */
  134. int *stat) /* output: success/failure */
  135. {
  136. union xfs_btree_rec *rec;
  137. int error;
  138. error = xfs_btree_get_rec(cur, &rec, stat);
  139. if (!error && *stat == 1) {
  140. *ino = be32_to_cpu(rec->inobt.ir_startino);
  141. *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
  142. *free = be64_to_cpu(rec->inobt.ir_free);
  143. }
  144. return error;
  145. }
  146. /*
  147. * Allocate new inodes in the allocation group specified by agbp.
  148. * Return 0 for success, else error code.
  149. */
  150. STATIC int /* error code or 0 */
  151. xfs_ialloc_ag_alloc(
  152. xfs_trans_t *tp, /* transaction pointer */
  153. xfs_buf_t *agbp, /* alloc group buffer */
  154. int *alloc)
  155. {
  156. xfs_agi_t *agi; /* allocation group header */
  157. xfs_alloc_arg_t args; /* allocation argument structure */
  158. int blks_per_cluster; /* fs blocks per inode cluster */
  159. xfs_btree_cur_t *cur; /* inode btree cursor */
  160. xfs_daddr_t d; /* disk addr of buffer */
  161. xfs_agnumber_t agno;
  162. int error;
  163. xfs_buf_t *fbuf; /* new free inodes' buffer */
  164. xfs_dinode_t *free; /* new free inode structure */
  165. int i; /* inode counter */
  166. int j; /* block counter */
  167. int nbufs; /* num bufs of new inodes */
  168. xfs_agino_t newino; /* new first inode's number */
  169. xfs_agino_t newlen; /* new number of inodes */
  170. int ninodes; /* num inodes per buf */
  171. xfs_agino_t thisino; /* current inode number, for loop */
  172. int version; /* inode version number to use */
  173. int isaligned = 0; /* inode allocation at stripe unit */
  174. /* boundary */
  175. unsigned int gen;
  176. args.tp = tp;
  177. args.mp = tp->t_mountp;
  178. /*
  179. * Locking will ensure that we don't have two callers in here
  180. * at one time.
  181. */
  182. newlen = XFS_IALLOC_INODES(args.mp);
  183. if (args.mp->m_maxicount &&
  184. args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
  185. return XFS_ERROR(ENOSPC);
  186. args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
  187. /*
  188. * First try to allocate inodes contiguous with the last-allocated
  189. * chunk of inodes. If the filesystem is striped, this will fill
  190. * an entire stripe unit with inodes.
  191. */
  192. agi = XFS_BUF_TO_AGI(agbp);
  193. newino = be32_to_cpu(agi->agi_newino);
  194. args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
  195. XFS_IALLOC_BLOCKS(args.mp);
  196. if (likely(newino != NULLAGINO &&
  197. (args.agbno < be32_to_cpu(agi->agi_length)))) {
  198. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  199. be32_to_cpu(agi->agi_seqno), args.agbno);
  200. args.type = XFS_ALLOCTYPE_THIS_BNO;
  201. args.mod = args.total = args.wasdel = args.isfl =
  202. args.userdata = args.minalignslop = 0;
  203. args.prod = 1;
  204. /*
  205. * We need to take into account alignment here to ensure that
  206. * we don't modify the free list if we fail to have an exact
  207. * block. If we don't have an exact match, and every oher
  208. * attempt allocation attempt fails, we'll end up cancelling
  209. * a dirty transaction and shutting down.
  210. *
  211. * For an exact allocation, alignment must be 1,
  212. * however we need to take cluster alignment into account when
  213. * fixing up the freelist. Use the minalignslop field to
  214. * indicate that extra blocks might be required for alignment,
  215. * but not to use them in the actual exact allocation.
  216. */
  217. args.alignment = 1;
  218. args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
  219. /* Allow space for the inode btree to split. */
  220. args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
  221. if ((error = xfs_alloc_vextent(&args)))
  222. return error;
  223. } else
  224. args.fsbno = NULLFSBLOCK;
  225. if (unlikely(args.fsbno == NULLFSBLOCK)) {
  226. /*
  227. * Set the alignment for the allocation.
  228. * If stripe alignment is turned on then align at stripe unit
  229. * boundary.
  230. * If the cluster size is smaller than a filesystem block
  231. * then we're doing I/O for inodes in filesystem block size
  232. * pieces, so don't need alignment anyway.
  233. */
  234. isaligned = 0;
  235. if (args.mp->m_sinoalign) {
  236. ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
  237. args.alignment = args.mp->m_dalign;
  238. isaligned = 1;
  239. } else
  240. args.alignment = xfs_ialloc_cluster_alignment(&args);
  241. /*
  242. * Need to figure out where to allocate the inode blocks.
  243. * Ideally they should be spaced out through the a.g.
  244. * For now, just allocate blocks up front.
  245. */
  246. args.agbno = be32_to_cpu(agi->agi_root);
  247. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  248. be32_to_cpu(agi->agi_seqno), args.agbno);
  249. /*
  250. * Allocate a fixed-size extent of inodes.
  251. */
  252. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  253. args.mod = args.total = args.wasdel = args.isfl =
  254. args.userdata = args.minalignslop = 0;
  255. args.prod = 1;
  256. /*
  257. * Allow space for the inode btree to split.
  258. */
  259. args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
  260. if ((error = xfs_alloc_vextent(&args)))
  261. return error;
  262. }
  263. /*
  264. * If stripe alignment is turned on, then try again with cluster
  265. * alignment.
  266. */
  267. if (isaligned && args.fsbno == NULLFSBLOCK) {
  268. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  269. args.agbno = be32_to_cpu(agi->agi_root);
  270. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  271. be32_to_cpu(agi->agi_seqno), args.agbno);
  272. args.alignment = xfs_ialloc_cluster_alignment(&args);
  273. if ((error = xfs_alloc_vextent(&args)))
  274. return error;
  275. }
  276. if (args.fsbno == NULLFSBLOCK) {
  277. *alloc = 0;
  278. return 0;
  279. }
  280. ASSERT(args.len == args.minlen);
  281. /*
  282. * Convert the results.
  283. */
  284. newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
  285. /*
  286. * Loop over the new block(s), filling in the inodes.
  287. * For small block sizes, manipulate the inodes in buffers
  288. * which are multiples of the blocks size.
  289. */
  290. if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
  291. blks_per_cluster = 1;
  292. nbufs = (int)args.len;
  293. ninodes = args.mp->m_sb.sb_inopblock;
  294. } else {
  295. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
  296. args.mp->m_sb.sb_blocksize;
  297. nbufs = (int)args.len / blks_per_cluster;
  298. ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
  299. }
  300. /*
  301. * Figure out what version number to use in the inodes we create.
  302. * If the superblock version has caught up to the one that supports
  303. * the new inode format, then use the new inode version. Otherwise
  304. * use the old version so that old kernels will continue to be
  305. * able to use the file system.
  306. */
  307. if (xfs_sb_version_hasnlink(&args.mp->m_sb))
  308. version = 2;
  309. else
  310. version = 1;
  311. /*
  312. * Seed the new inode cluster with a random generation number. This
  313. * prevents short-term reuse of generation numbers if a chunk is
  314. * freed and then immediately reallocated. We use random numbers
  315. * rather than a linear progression to prevent the next generation
  316. * number from being easily guessable.
  317. */
  318. gen = random32();
  319. for (j = 0; j < nbufs; j++) {
  320. /*
  321. * Get the block.
  322. */
  323. d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
  324. args.agbno + (j * blks_per_cluster));
  325. fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
  326. args.mp->m_bsize * blks_per_cluster,
  327. XFS_BUF_LOCK);
  328. ASSERT(fbuf);
  329. ASSERT(!XFS_BUF_GETERROR(fbuf));
  330. /*
  331. * Initialize all inodes in this buffer and then log them.
  332. *
  333. * XXX: It would be much better if we had just one transaction to
  334. * log a whole cluster of inodes instead of all the indivdual
  335. * transactions causing a lot of log traffic.
  336. */
  337. xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
  338. for (i = 0; i < ninodes; i++) {
  339. int ioffset = i << args.mp->m_sb.sb_inodelog;
  340. uint isize = sizeof(struct xfs_dinode);
  341. free = XFS_MAKE_IPTR(args.mp, fbuf, i);
  342. free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  343. free->di_version = version;
  344. free->di_gen = cpu_to_be32(gen);
  345. free->di_next_unlinked = cpu_to_be32(NULLAGINO);
  346. xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
  347. }
  348. xfs_trans_inode_alloc_buf(tp, fbuf);
  349. }
  350. be32_add_cpu(&agi->agi_count, newlen);
  351. be32_add_cpu(&agi->agi_freecount, newlen);
  352. agno = be32_to_cpu(agi->agi_seqno);
  353. down_read(&args.mp->m_peraglock);
  354. args.mp->m_perag[agno].pagi_freecount += newlen;
  355. up_read(&args.mp->m_peraglock);
  356. agi->agi_newino = cpu_to_be32(newino);
  357. /*
  358. * Insert records describing the new inode chunk into the btree.
  359. */
  360. cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
  361. for (thisino = newino;
  362. thisino < newino + newlen;
  363. thisino += XFS_INODES_PER_CHUNK) {
  364. if ((error = xfs_inobt_lookup_eq(cur, thisino,
  365. XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
  366. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  367. return error;
  368. }
  369. ASSERT(i == 0);
  370. if ((error = xfs_btree_insert(cur, &i))) {
  371. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  372. return error;
  373. }
  374. ASSERT(i == 1);
  375. }
  376. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  377. /*
  378. * Log allocation group header fields
  379. */
  380. xfs_ialloc_log_agi(tp, agbp,
  381. XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
  382. /*
  383. * Modify/log superblock values for inode count and inode free count.
  384. */
  385. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
  386. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
  387. *alloc = 1;
  388. return 0;
  389. }
  390. STATIC_INLINE xfs_agnumber_t
  391. xfs_ialloc_next_ag(
  392. xfs_mount_t *mp)
  393. {
  394. xfs_agnumber_t agno;
  395. spin_lock(&mp->m_agirotor_lock);
  396. agno = mp->m_agirotor;
  397. if (++mp->m_agirotor == mp->m_maxagi)
  398. mp->m_agirotor = 0;
  399. spin_unlock(&mp->m_agirotor_lock);
  400. return agno;
  401. }
  402. /*
  403. * Select an allocation group to look for a free inode in, based on the parent
  404. * inode and then mode. Return the allocation group buffer.
  405. */
  406. STATIC xfs_buf_t * /* allocation group buffer */
  407. xfs_ialloc_ag_select(
  408. xfs_trans_t *tp, /* transaction pointer */
  409. xfs_ino_t parent, /* parent directory inode number */
  410. mode_t mode, /* bits set to indicate file type */
  411. int okalloc) /* ok to allocate more space */
  412. {
  413. xfs_buf_t *agbp; /* allocation group header buffer */
  414. xfs_agnumber_t agcount; /* number of ag's in the filesystem */
  415. xfs_agnumber_t agno; /* current ag number */
  416. int flags; /* alloc buffer locking flags */
  417. xfs_extlen_t ineed; /* blocks needed for inode allocation */
  418. xfs_extlen_t longest = 0; /* longest extent available */
  419. xfs_mount_t *mp; /* mount point structure */
  420. int needspace; /* file mode implies space allocated */
  421. xfs_perag_t *pag; /* per allocation group data */
  422. xfs_agnumber_t pagno; /* parent (starting) ag number */
  423. /*
  424. * Files of these types need at least one block if length > 0
  425. * (and they won't fit in the inode, but that's hard to figure out).
  426. */
  427. needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
  428. mp = tp->t_mountp;
  429. agcount = mp->m_maxagi;
  430. if (S_ISDIR(mode))
  431. pagno = xfs_ialloc_next_ag(mp);
  432. else {
  433. pagno = XFS_INO_TO_AGNO(mp, parent);
  434. if (pagno >= agcount)
  435. pagno = 0;
  436. }
  437. ASSERT(pagno < agcount);
  438. /*
  439. * Loop through allocation groups, looking for one with a little
  440. * free space in it. Note we don't look for free inodes, exactly.
  441. * Instead, we include whether there is a need to allocate inodes
  442. * to mean that blocks must be allocated for them,
  443. * if none are currently free.
  444. */
  445. agno = pagno;
  446. flags = XFS_ALLOC_FLAG_TRYLOCK;
  447. down_read(&mp->m_peraglock);
  448. for (;;) {
  449. pag = &mp->m_perag[agno];
  450. if (!pag->pagi_init) {
  451. if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  452. agbp = NULL;
  453. goto nextag;
  454. }
  455. } else
  456. agbp = NULL;
  457. if (!pag->pagi_inodeok) {
  458. xfs_ialloc_next_ag(mp);
  459. goto unlock_nextag;
  460. }
  461. /*
  462. * Is there enough free space for the file plus a block
  463. * of inodes (if we need to allocate some)?
  464. */
  465. ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
  466. if (ineed && !pag->pagf_init) {
  467. if (agbp == NULL &&
  468. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  469. agbp = NULL;
  470. goto nextag;
  471. }
  472. (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
  473. }
  474. if (!ineed || pag->pagf_init) {
  475. if (ineed && !(longest = pag->pagf_longest))
  476. longest = pag->pagf_flcount > 0;
  477. if (!ineed ||
  478. (pag->pagf_freeblks >= needspace + ineed &&
  479. longest >= ineed &&
  480. okalloc)) {
  481. if (agbp == NULL &&
  482. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  483. agbp = NULL;
  484. goto nextag;
  485. }
  486. up_read(&mp->m_peraglock);
  487. return agbp;
  488. }
  489. }
  490. unlock_nextag:
  491. if (agbp)
  492. xfs_trans_brelse(tp, agbp);
  493. nextag:
  494. /*
  495. * No point in iterating over the rest, if we're shutting
  496. * down.
  497. */
  498. if (XFS_FORCED_SHUTDOWN(mp)) {
  499. up_read(&mp->m_peraglock);
  500. return NULL;
  501. }
  502. agno++;
  503. if (agno >= agcount)
  504. agno = 0;
  505. if (agno == pagno) {
  506. if (flags == 0) {
  507. up_read(&mp->m_peraglock);
  508. return NULL;
  509. }
  510. flags = 0;
  511. }
  512. }
  513. }
  514. /*
  515. * Visible inode allocation functions.
  516. */
  517. /*
  518. * Allocate an inode on disk.
  519. * Mode is used to tell whether the new inode will need space, and whether
  520. * it is a directory.
  521. *
  522. * The arguments IO_agbp and alloc_done are defined to work within
  523. * the constraint of one allocation per transaction.
  524. * xfs_dialloc() is designed to be called twice if it has to do an
  525. * allocation to make more free inodes. On the first call,
  526. * IO_agbp should be set to NULL. If an inode is available,
  527. * i.e., xfs_dialloc() did not need to do an allocation, an inode
  528. * number is returned. In this case, IO_agbp would be set to the
  529. * current ag_buf and alloc_done set to false.
  530. * If an allocation needed to be done, xfs_dialloc would return
  531. * the current ag_buf in IO_agbp and set alloc_done to true.
  532. * The caller should then commit the current transaction, allocate a new
  533. * transaction, and call xfs_dialloc() again, passing in the previous
  534. * value of IO_agbp. IO_agbp should be held across the transactions.
  535. * Since the agbp is locked across the two calls, the second call is
  536. * guaranteed to have a free inode available.
  537. *
  538. * Once we successfully pick an inode its number is returned and the
  539. * on-disk data structures are updated. The inode itself is not read
  540. * in, since doing so would break ordering constraints with xfs_reclaim.
  541. */
  542. int
  543. xfs_dialloc(
  544. xfs_trans_t *tp, /* transaction pointer */
  545. xfs_ino_t parent, /* parent inode (directory) */
  546. mode_t mode, /* mode bits for new inode */
  547. int okalloc, /* ok to allocate more space */
  548. xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
  549. boolean_t *alloc_done, /* true if we needed to replenish
  550. inode freelist */
  551. xfs_ino_t *inop) /* inode number allocated */
  552. {
  553. xfs_agnumber_t agcount; /* number of allocation groups */
  554. xfs_buf_t *agbp; /* allocation group header's buffer */
  555. xfs_agnumber_t agno; /* allocation group number */
  556. xfs_agi_t *agi; /* allocation group header structure */
  557. xfs_btree_cur_t *cur; /* inode allocation btree cursor */
  558. int error; /* error return value */
  559. int i; /* result code */
  560. int ialloced; /* inode allocation status */
  561. int noroom = 0; /* no space for inode blk allocation */
  562. xfs_ino_t ino; /* fs-relative inode to be returned */
  563. /* REFERENCED */
  564. int j; /* result code */
  565. xfs_mount_t *mp; /* file system mount structure */
  566. int offset; /* index of inode in chunk */
  567. xfs_agino_t pagino; /* parent's a.g. relative inode # */
  568. xfs_agnumber_t pagno; /* parent's allocation group number */
  569. xfs_inobt_rec_incore_t rec; /* inode allocation record */
  570. xfs_agnumber_t tagno; /* testing allocation group number */
  571. xfs_btree_cur_t *tcur; /* temp cursor */
  572. xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
  573. if (*IO_agbp == NULL) {
  574. /*
  575. * We do not have an agbp, so select an initial allocation
  576. * group for inode allocation.
  577. */
  578. agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
  579. /*
  580. * Couldn't find an allocation group satisfying the
  581. * criteria, give up.
  582. */
  583. if (!agbp) {
  584. *inop = NULLFSINO;
  585. return 0;
  586. }
  587. agi = XFS_BUF_TO_AGI(agbp);
  588. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  589. } else {
  590. /*
  591. * Continue where we left off before. In this case, we
  592. * know that the allocation group has free inodes.
  593. */
  594. agbp = *IO_agbp;
  595. agi = XFS_BUF_TO_AGI(agbp);
  596. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  597. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  598. }
  599. mp = tp->t_mountp;
  600. agcount = mp->m_sb.sb_agcount;
  601. agno = be32_to_cpu(agi->agi_seqno);
  602. tagno = agno;
  603. pagno = XFS_INO_TO_AGNO(mp, parent);
  604. pagino = XFS_INO_TO_AGINO(mp, parent);
  605. /*
  606. * If we have already hit the ceiling of inode blocks then clear
  607. * okalloc so we scan all available agi structures for a free
  608. * inode.
  609. */
  610. if (mp->m_maxicount &&
  611. mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
  612. noroom = 1;
  613. okalloc = 0;
  614. }
  615. /*
  616. * Loop until we find an allocation group that either has free inodes
  617. * or in which we can allocate some inodes. Iterate through the
  618. * allocation groups upward, wrapping at the end.
  619. */
  620. *alloc_done = B_FALSE;
  621. while (!agi->agi_freecount) {
  622. /*
  623. * Don't do anything if we're not supposed to allocate
  624. * any blocks, just go on to the next ag.
  625. */
  626. if (okalloc) {
  627. /*
  628. * Try to allocate some new inodes in the allocation
  629. * group.
  630. */
  631. if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
  632. xfs_trans_brelse(tp, agbp);
  633. if (error == ENOSPC) {
  634. *inop = NULLFSINO;
  635. return 0;
  636. } else
  637. return error;
  638. }
  639. if (ialloced) {
  640. /*
  641. * We successfully allocated some inodes, return
  642. * the current context to the caller so that it
  643. * can commit the current transaction and call
  644. * us again where we left off.
  645. */
  646. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  647. *alloc_done = B_TRUE;
  648. *IO_agbp = agbp;
  649. *inop = NULLFSINO;
  650. return 0;
  651. }
  652. }
  653. /*
  654. * If it failed, give up on this ag.
  655. */
  656. xfs_trans_brelse(tp, agbp);
  657. /*
  658. * Go on to the next ag: get its ag header.
  659. */
  660. nextag:
  661. if (++tagno == agcount)
  662. tagno = 0;
  663. if (tagno == agno) {
  664. *inop = NULLFSINO;
  665. return noroom ? ENOSPC : 0;
  666. }
  667. down_read(&mp->m_peraglock);
  668. if (mp->m_perag[tagno].pagi_inodeok == 0) {
  669. up_read(&mp->m_peraglock);
  670. goto nextag;
  671. }
  672. error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
  673. up_read(&mp->m_peraglock);
  674. if (error)
  675. goto nextag;
  676. agi = XFS_BUF_TO_AGI(agbp);
  677. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  678. }
  679. /*
  680. * Here with an allocation group that has a free inode.
  681. * Reset agno since we may have chosen a new ag in the
  682. * loop above.
  683. */
  684. agno = tagno;
  685. *IO_agbp = NULL;
  686. cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
  687. /*
  688. * If pagino is 0 (this is the root inode allocation) use newino.
  689. * This must work because we've just allocated some.
  690. */
  691. if (!pagino)
  692. pagino = be32_to_cpu(agi->agi_newino);
  693. #ifdef DEBUG
  694. if (cur->bc_nlevels == 1) {
  695. int freecount = 0;
  696. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  697. goto error0;
  698. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  699. do {
  700. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  701. &rec.ir_freecount, &rec.ir_free, &i)))
  702. goto error0;
  703. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  704. freecount += rec.ir_freecount;
  705. if ((error = xfs_btree_increment(cur, 0, &i)))
  706. goto error0;
  707. } while (i == 1);
  708. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  709. XFS_FORCED_SHUTDOWN(mp));
  710. }
  711. #endif
  712. /*
  713. * If in the same a.g. as the parent, try to get near the parent.
  714. */
  715. if (pagno == agno) {
  716. if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
  717. goto error0;
  718. if (i != 0 &&
  719. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  720. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  721. j == 1 &&
  722. rec.ir_freecount > 0) {
  723. /*
  724. * Found a free inode in the same chunk
  725. * as parent, done.
  726. */
  727. }
  728. /*
  729. * In the same a.g. as parent, but parent's chunk is full.
  730. */
  731. else {
  732. int doneleft; /* done, to the left */
  733. int doneright; /* done, to the right */
  734. if (error)
  735. goto error0;
  736. ASSERT(i == 1);
  737. ASSERT(j == 1);
  738. /*
  739. * Duplicate the cursor, search left & right
  740. * simultaneously.
  741. */
  742. if ((error = xfs_btree_dup_cursor(cur, &tcur)))
  743. goto error0;
  744. /*
  745. * Search left with tcur, back up 1 record.
  746. */
  747. if ((error = xfs_btree_decrement(tcur, 0, &i)))
  748. goto error1;
  749. doneleft = !i;
  750. if (!doneleft) {
  751. if ((error = xfs_inobt_get_rec(tcur,
  752. &trec.ir_startino,
  753. &trec.ir_freecount,
  754. &trec.ir_free, &i)))
  755. goto error1;
  756. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  757. }
  758. /*
  759. * Search right with cur, go forward 1 record.
  760. */
  761. if ((error = xfs_btree_increment(cur, 0, &i)))
  762. goto error1;
  763. doneright = !i;
  764. if (!doneright) {
  765. if ((error = xfs_inobt_get_rec(cur,
  766. &rec.ir_startino,
  767. &rec.ir_freecount,
  768. &rec.ir_free, &i)))
  769. goto error1;
  770. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  771. }
  772. /*
  773. * Loop until we find the closest inode chunk
  774. * with a free one.
  775. */
  776. while (!doneleft || !doneright) {
  777. int useleft; /* using left inode
  778. chunk this time */
  779. /*
  780. * Figure out which block is closer,
  781. * if both are valid.
  782. */
  783. if (!doneleft && !doneright)
  784. useleft =
  785. pagino -
  786. (trec.ir_startino +
  787. XFS_INODES_PER_CHUNK - 1) <
  788. rec.ir_startino - pagino;
  789. else
  790. useleft = !doneleft;
  791. /*
  792. * If checking the left, does it have
  793. * free inodes?
  794. */
  795. if (useleft && trec.ir_freecount) {
  796. /*
  797. * Yes, set it up as the chunk to use.
  798. */
  799. rec = trec;
  800. xfs_btree_del_cursor(cur,
  801. XFS_BTREE_NOERROR);
  802. cur = tcur;
  803. break;
  804. }
  805. /*
  806. * If checking the right, does it have
  807. * free inodes?
  808. */
  809. if (!useleft && rec.ir_freecount) {
  810. /*
  811. * Yes, it's already set up.
  812. */
  813. xfs_btree_del_cursor(tcur,
  814. XFS_BTREE_NOERROR);
  815. break;
  816. }
  817. /*
  818. * If used the left, get another one
  819. * further left.
  820. */
  821. if (useleft) {
  822. if ((error = xfs_btree_decrement(tcur, 0,
  823. &i)))
  824. goto error1;
  825. doneleft = !i;
  826. if (!doneleft) {
  827. if ((error = xfs_inobt_get_rec(
  828. tcur,
  829. &trec.ir_startino,
  830. &trec.ir_freecount,
  831. &trec.ir_free, &i)))
  832. goto error1;
  833. XFS_WANT_CORRUPTED_GOTO(i == 1,
  834. error1);
  835. }
  836. }
  837. /*
  838. * If used the right, get another one
  839. * further right.
  840. */
  841. else {
  842. if ((error = xfs_btree_increment(cur, 0,
  843. &i)))
  844. goto error1;
  845. doneright = !i;
  846. if (!doneright) {
  847. if ((error = xfs_inobt_get_rec(
  848. cur,
  849. &rec.ir_startino,
  850. &rec.ir_freecount,
  851. &rec.ir_free, &i)))
  852. goto error1;
  853. XFS_WANT_CORRUPTED_GOTO(i == 1,
  854. error1);
  855. }
  856. }
  857. }
  858. ASSERT(!doneleft || !doneright);
  859. }
  860. }
  861. /*
  862. * In a different a.g. from the parent.
  863. * See if the most recently allocated block has any free.
  864. */
  865. else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
  866. if ((error = xfs_inobt_lookup_eq(cur,
  867. be32_to_cpu(agi->agi_newino), 0, 0, &i)))
  868. goto error0;
  869. if (i == 1 &&
  870. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  871. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  872. j == 1 &&
  873. rec.ir_freecount > 0) {
  874. /*
  875. * The last chunk allocated in the group still has
  876. * a free inode.
  877. */
  878. }
  879. /*
  880. * None left in the last group, search the whole a.g.
  881. */
  882. else {
  883. if (error)
  884. goto error0;
  885. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  886. goto error0;
  887. ASSERT(i == 1);
  888. for (;;) {
  889. if ((error = xfs_inobt_get_rec(cur,
  890. &rec.ir_startino,
  891. &rec.ir_freecount, &rec.ir_free,
  892. &i)))
  893. goto error0;
  894. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  895. if (rec.ir_freecount > 0)
  896. break;
  897. if ((error = xfs_btree_increment(cur, 0, &i)))
  898. goto error0;
  899. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  900. }
  901. }
  902. }
  903. offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
  904. ASSERT(offset >= 0);
  905. ASSERT(offset < XFS_INODES_PER_CHUNK);
  906. ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
  907. XFS_INODES_PER_CHUNK) == 0);
  908. ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
  909. XFS_INOBT_CLR_FREE(&rec, offset);
  910. rec.ir_freecount--;
  911. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
  912. rec.ir_free)))
  913. goto error0;
  914. be32_add_cpu(&agi->agi_freecount, -1);
  915. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  916. down_read(&mp->m_peraglock);
  917. mp->m_perag[tagno].pagi_freecount--;
  918. up_read(&mp->m_peraglock);
  919. #ifdef DEBUG
  920. if (cur->bc_nlevels == 1) {
  921. int freecount = 0;
  922. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  923. goto error0;
  924. do {
  925. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  926. &rec.ir_freecount, &rec.ir_free, &i)))
  927. goto error0;
  928. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  929. freecount += rec.ir_freecount;
  930. if ((error = xfs_btree_increment(cur, 0, &i)))
  931. goto error0;
  932. } while (i == 1);
  933. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  934. XFS_FORCED_SHUTDOWN(mp));
  935. }
  936. #endif
  937. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  938. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
  939. *inop = ino;
  940. return 0;
  941. error1:
  942. xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
  943. error0:
  944. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  945. return error;
  946. }
  947. /*
  948. * Free disk inode. Carefully avoids touching the incore inode, all
  949. * manipulations incore are the caller's responsibility.
  950. * The on-disk inode is not changed by this operation, only the
  951. * btree (free inode mask) is changed.
  952. */
  953. int
  954. xfs_difree(
  955. xfs_trans_t *tp, /* transaction pointer */
  956. xfs_ino_t inode, /* inode to be freed */
  957. xfs_bmap_free_t *flist, /* extents to free */
  958. int *delete, /* set if inode cluster was deleted */
  959. xfs_ino_t *first_ino) /* first inode in deleted cluster */
  960. {
  961. /* REFERENCED */
  962. xfs_agblock_t agbno; /* block number containing inode */
  963. xfs_buf_t *agbp; /* buffer containing allocation group header */
  964. xfs_agino_t agino; /* inode number relative to allocation group */
  965. xfs_agnumber_t agno; /* allocation group number */
  966. xfs_agi_t *agi; /* allocation group header */
  967. xfs_btree_cur_t *cur; /* inode btree cursor */
  968. int error; /* error return value */
  969. int i; /* result code */
  970. int ilen; /* inodes in an inode cluster */
  971. xfs_mount_t *mp; /* mount structure for filesystem */
  972. int off; /* offset of inode in inode chunk */
  973. xfs_inobt_rec_incore_t rec; /* btree record */
  974. mp = tp->t_mountp;
  975. /*
  976. * Break up inode number into its components.
  977. */
  978. agno = XFS_INO_TO_AGNO(mp, inode);
  979. if (agno >= mp->m_sb.sb_agcount) {
  980. cmn_err(CE_WARN,
  981. "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
  982. agno, mp->m_sb.sb_agcount, mp->m_fsname);
  983. ASSERT(0);
  984. return XFS_ERROR(EINVAL);
  985. }
  986. agino = XFS_INO_TO_AGINO(mp, inode);
  987. if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
  988. cmn_err(CE_WARN,
  989. "xfs_difree: inode != XFS_AGINO_TO_INO() "
  990. "(%llu != %llu) on %s. Returning EINVAL.",
  991. (unsigned long long)inode,
  992. (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
  993. mp->m_fsname);
  994. ASSERT(0);
  995. return XFS_ERROR(EINVAL);
  996. }
  997. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  998. if (agbno >= mp->m_sb.sb_agblocks) {
  999. cmn_err(CE_WARN,
  1000. "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
  1001. agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
  1002. ASSERT(0);
  1003. return XFS_ERROR(EINVAL);
  1004. }
  1005. /*
  1006. * Get the allocation group header.
  1007. */
  1008. down_read(&mp->m_peraglock);
  1009. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1010. up_read(&mp->m_peraglock);
  1011. if (error) {
  1012. cmn_err(CE_WARN,
  1013. "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
  1014. error, mp->m_fsname);
  1015. return error;
  1016. }
  1017. agi = XFS_BUF_TO_AGI(agbp);
  1018. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1019. ASSERT(agbno < be32_to_cpu(agi->agi_length));
  1020. /*
  1021. * Initialize the cursor.
  1022. */
  1023. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1024. #ifdef DEBUG
  1025. if (cur->bc_nlevels == 1) {
  1026. int freecount = 0;
  1027. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1028. goto error0;
  1029. do {
  1030. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  1031. &rec.ir_freecount, &rec.ir_free, &i)))
  1032. goto error0;
  1033. if (i) {
  1034. freecount += rec.ir_freecount;
  1035. if ((error = xfs_btree_increment(cur, 0, &i)))
  1036. goto error0;
  1037. }
  1038. } while (i == 1);
  1039. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1040. XFS_FORCED_SHUTDOWN(mp));
  1041. }
  1042. #endif
  1043. /*
  1044. * Look for the entry describing this inode.
  1045. */
  1046. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  1047. cmn_err(CE_WARN,
  1048. "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
  1049. error, mp->m_fsname);
  1050. goto error0;
  1051. }
  1052. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1053. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
  1054. &rec.ir_free, &i))) {
  1055. cmn_err(CE_WARN,
  1056. "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
  1057. error, mp->m_fsname);
  1058. goto error0;
  1059. }
  1060. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1061. /*
  1062. * Get the offset in the inode chunk.
  1063. */
  1064. off = agino - rec.ir_startino;
  1065. ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
  1066. ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
  1067. /*
  1068. * Mark the inode free & increment the count.
  1069. */
  1070. XFS_INOBT_SET_FREE(&rec, off);
  1071. rec.ir_freecount++;
  1072. /*
  1073. * When an inode cluster is free, it becomes eligible for removal
  1074. */
  1075. if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
  1076. (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
  1077. *delete = 1;
  1078. *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
  1079. /*
  1080. * Remove the inode cluster from the AGI B+Tree, adjust the
  1081. * AGI and Superblock inode counts, and mark the disk space
  1082. * to be freed when the transaction is committed.
  1083. */
  1084. ilen = XFS_IALLOC_INODES(mp);
  1085. be32_add_cpu(&agi->agi_count, -ilen);
  1086. be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
  1087. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
  1088. down_read(&mp->m_peraglock);
  1089. mp->m_perag[agno].pagi_freecount -= ilen - 1;
  1090. up_read(&mp->m_peraglock);
  1091. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
  1092. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
  1093. if ((error = xfs_btree_delete(cur, &i))) {
  1094. cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
  1095. error, mp->m_fsname);
  1096. goto error0;
  1097. }
  1098. xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
  1099. agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
  1100. XFS_IALLOC_BLOCKS(mp), flist, mp);
  1101. } else {
  1102. *delete = 0;
  1103. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
  1104. cmn_err(CE_WARN,
  1105. "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
  1106. error, mp->m_fsname);
  1107. goto error0;
  1108. }
  1109. /*
  1110. * Change the inode free counts and log the ag/sb changes.
  1111. */
  1112. be32_add_cpu(&agi->agi_freecount, 1);
  1113. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  1114. down_read(&mp->m_peraglock);
  1115. mp->m_perag[agno].pagi_freecount++;
  1116. up_read(&mp->m_peraglock);
  1117. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
  1118. }
  1119. #ifdef DEBUG
  1120. if (cur->bc_nlevels == 1) {
  1121. int freecount = 0;
  1122. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1123. goto error0;
  1124. do {
  1125. if ((error = xfs_inobt_get_rec(cur,
  1126. &rec.ir_startino,
  1127. &rec.ir_freecount,
  1128. &rec.ir_free, &i)))
  1129. goto error0;
  1130. if (i) {
  1131. freecount += rec.ir_freecount;
  1132. if ((error = xfs_btree_increment(cur, 0, &i)))
  1133. goto error0;
  1134. }
  1135. } while (i == 1);
  1136. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1137. XFS_FORCED_SHUTDOWN(mp));
  1138. }
  1139. #endif
  1140. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1141. return 0;
  1142. error0:
  1143. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1144. return error;
  1145. }
  1146. /*
  1147. * Return the location of the inode in imap, for mapping it into a buffer.
  1148. */
  1149. int
  1150. xfs_imap(
  1151. xfs_mount_t *mp, /* file system mount structure */
  1152. xfs_trans_t *tp, /* transaction pointer */
  1153. xfs_ino_t ino, /* inode to locate */
  1154. struct xfs_imap *imap, /* location map structure */
  1155. uint flags) /* flags for inode btree lookup */
  1156. {
  1157. xfs_agblock_t agbno; /* block number of inode in the alloc group */
  1158. xfs_agino_t agino; /* inode number within alloc group */
  1159. xfs_agnumber_t agno; /* allocation group number */
  1160. int blks_per_cluster; /* num blocks per inode cluster */
  1161. xfs_agblock_t chunk_agbno; /* first block in inode chunk */
  1162. xfs_agblock_t cluster_agbno; /* first block in inode cluster */
  1163. int error; /* error code */
  1164. int offset; /* index of inode in its buffer */
  1165. int offset_agbno; /* blks from chunk start to inode */
  1166. ASSERT(ino != NULLFSINO);
  1167. /*
  1168. * Split up the inode number into its parts.
  1169. */
  1170. agno = XFS_INO_TO_AGNO(mp, ino);
  1171. agino = XFS_INO_TO_AGINO(mp, ino);
  1172. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  1173. if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
  1174. ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1175. #ifdef DEBUG
  1176. /* no diagnostics for bulkstat, ino comes from userspace */
  1177. if (flags & XFS_IMAP_BULKSTAT)
  1178. return XFS_ERROR(EINVAL);
  1179. if (agno >= mp->m_sb.sb_agcount) {
  1180. xfs_fs_cmn_err(CE_ALERT, mp,
  1181. "xfs_imap: agno (%d) >= "
  1182. "mp->m_sb.sb_agcount (%d)",
  1183. agno, mp->m_sb.sb_agcount);
  1184. }
  1185. if (agbno >= mp->m_sb.sb_agblocks) {
  1186. xfs_fs_cmn_err(CE_ALERT, mp,
  1187. "xfs_imap: agbno (0x%llx) >= "
  1188. "mp->m_sb.sb_agblocks (0x%lx)",
  1189. (unsigned long long) agbno,
  1190. (unsigned long) mp->m_sb.sb_agblocks);
  1191. }
  1192. if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1193. xfs_fs_cmn_err(CE_ALERT, mp,
  1194. "xfs_imap: ino (0x%llx) != "
  1195. "XFS_AGINO_TO_INO(mp, agno, agino) "
  1196. "(0x%llx)",
  1197. ino, XFS_AGINO_TO_INO(mp, agno, agino));
  1198. }
  1199. xfs_stack_trace();
  1200. #endif /* DEBUG */
  1201. return XFS_ERROR(EINVAL);
  1202. }
  1203. /*
  1204. * If the inode cluster size is the same as the blocksize or
  1205. * smaller we get to the buffer by simple arithmetics.
  1206. */
  1207. if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
  1208. offset = XFS_INO_TO_OFFSET(mp, ino);
  1209. ASSERT(offset < mp->m_sb.sb_inopblock);
  1210. imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
  1211. imap->im_len = XFS_FSB_TO_BB(mp, 1);
  1212. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1213. return 0;
  1214. }
  1215. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
  1216. /*
  1217. * If we get a block number passed from bulkstat we can use it to
  1218. * find the buffer easily.
  1219. */
  1220. if (imap->im_blkno) {
  1221. offset = XFS_INO_TO_OFFSET(mp, ino);
  1222. ASSERT(offset < mp->m_sb.sb_inopblock);
  1223. cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno);
  1224. offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
  1225. imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
  1226. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1227. return 0;
  1228. }
  1229. /*
  1230. * If the inode chunks are aligned then use simple maths to
  1231. * find the location. Otherwise we have to do a btree
  1232. * lookup to find the location.
  1233. */
  1234. if (mp->m_inoalign_mask) {
  1235. offset_agbno = agbno & mp->m_inoalign_mask;
  1236. chunk_agbno = agbno - offset_agbno;
  1237. } else {
  1238. xfs_btree_cur_t *cur; /* inode btree cursor */
  1239. xfs_agino_t chunk_agino; /* first agino in inode chunk */
  1240. __int32_t chunk_cnt; /* count of free inodes in chunk */
  1241. xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
  1242. xfs_buf_t *agbp; /* agi buffer */
  1243. int i; /* temp state */
  1244. down_read(&mp->m_peraglock);
  1245. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1246. up_read(&mp->m_peraglock);
  1247. if (error) {
  1248. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1249. "xfs_ialloc_read_agi() returned "
  1250. "error %d, agno %d",
  1251. error, agno);
  1252. return error;
  1253. }
  1254. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1255. error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
  1256. if (error) {
  1257. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1258. "xfs_inobt_lookup_le() failed");
  1259. goto error0;
  1260. }
  1261. error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
  1262. &chunk_free, &i);
  1263. if (error) {
  1264. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1265. "xfs_inobt_get_rec() failed");
  1266. goto error0;
  1267. }
  1268. if (i == 0) {
  1269. #ifdef DEBUG
  1270. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1271. "xfs_inobt_get_rec() failed");
  1272. #endif /* DEBUG */
  1273. error = XFS_ERROR(EINVAL);
  1274. }
  1275. error0:
  1276. xfs_trans_brelse(tp, agbp);
  1277. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1278. if (error)
  1279. return error;
  1280. chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
  1281. offset_agbno = agbno - chunk_agbno;
  1282. }
  1283. ASSERT(agbno >= chunk_agbno);
  1284. cluster_agbno = chunk_agbno +
  1285. ((offset_agbno / blks_per_cluster) * blks_per_cluster);
  1286. offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1287. XFS_INO_TO_OFFSET(mp, ino);
  1288. imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
  1289. imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
  1290. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1291. /*
  1292. * If the inode number maps to a block outside the bounds
  1293. * of the file system then return NULL rather than calling
  1294. * read_buf and panicing when we get an error from the
  1295. * driver.
  1296. */
  1297. if ((imap->im_blkno + imap->im_len) >
  1298. XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
  1299. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1300. "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
  1301. " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
  1302. (unsigned long long) imap->im_blkno,
  1303. (unsigned long long) imap->im_len,
  1304. XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
  1305. return XFS_ERROR(EINVAL);
  1306. }
  1307. return 0;
  1308. }
  1309. /*
  1310. * Compute and fill in value of m_in_maxlevels.
  1311. */
  1312. void
  1313. xfs_ialloc_compute_maxlevels(
  1314. xfs_mount_t *mp) /* file system mount structure */
  1315. {
  1316. int level;
  1317. uint maxblocks;
  1318. uint maxleafents;
  1319. int minleafrecs;
  1320. int minnoderecs;
  1321. maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
  1322. XFS_INODES_PER_CHUNK_LOG;
  1323. minleafrecs = mp->m_alloc_mnr[0];
  1324. minnoderecs = mp->m_alloc_mnr[1];
  1325. maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  1326. for (level = 1; maxblocks > 1; level++)
  1327. maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  1328. mp->m_in_maxlevels = level;
  1329. }
  1330. /*
  1331. * Log specified fields for the ag hdr (inode section)
  1332. */
  1333. void
  1334. xfs_ialloc_log_agi(
  1335. xfs_trans_t *tp, /* transaction pointer */
  1336. xfs_buf_t *bp, /* allocation group header buffer */
  1337. int fields) /* bitmask of fields to log */
  1338. {
  1339. int first; /* first byte number */
  1340. int last; /* last byte number */
  1341. static const short offsets[] = { /* field starting offsets */
  1342. /* keep in sync with bit definitions */
  1343. offsetof(xfs_agi_t, agi_magicnum),
  1344. offsetof(xfs_agi_t, agi_versionnum),
  1345. offsetof(xfs_agi_t, agi_seqno),
  1346. offsetof(xfs_agi_t, agi_length),
  1347. offsetof(xfs_agi_t, agi_count),
  1348. offsetof(xfs_agi_t, agi_root),
  1349. offsetof(xfs_agi_t, agi_level),
  1350. offsetof(xfs_agi_t, agi_freecount),
  1351. offsetof(xfs_agi_t, agi_newino),
  1352. offsetof(xfs_agi_t, agi_dirino),
  1353. offsetof(xfs_agi_t, agi_unlinked),
  1354. sizeof(xfs_agi_t)
  1355. };
  1356. #ifdef DEBUG
  1357. xfs_agi_t *agi; /* allocation group header */
  1358. agi = XFS_BUF_TO_AGI(bp);
  1359. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1360. #endif
  1361. /*
  1362. * Compute byte offsets for the first and last fields.
  1363. */
  1364. xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
  1365. /*
  1366. * Log the allocation group inode header buffer.
  1367. */
  1368. xfs_trans_log_buf(tp, bp, first, last);
  1369. }
  1370. #ifdef DEBUG
  1371. STATIC void
  1372. xfs_check_agi_unlinked(
  1373. struct xfs_agi *agi)
  1374. {
  1375. int i;
  1376. for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
  1377. ASSERT(agi->agi_unlinked[i]);
  1378. }
  1379. #else
  1380. #define xfs_check_agi_unlinked(agi)
  1381. #endif
  1382. /*
  1383. * Read in the allocation group header (inode allocation section)
  1384. */
  1385. int
  1386. xfs_read_agi(
  1387. struct xfs_mount *mp, /* file system mount structure */
  1388. struct xfs_trans *tp, /* transaction pointer */
  1389. xfs_agnumber_t agno, /* allocation group number */
  1390. struct xfs_buf **bpp) /* allocation group hdr buf */
  1391. {
  1392. struct xfs_agi *agi; /* allocation group header */
  1393. int agi_ok; /* agi is consistent */
  1394. int error;
  1395. ASSERT(agno != NULLAGNUMBER);
  1396. error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
  1397. XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
  1398. XFS_FSS_TO_BB(mp, 1), 0, bpp);
  1399. if (error)
  1400. return error;
  1401. ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
  1402. agi = XFS_BUF_TO_AGI(*bpp);
  1403. /*
  1404. * Validate the magic number of the agi block.
  1405. */
  1406. agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
  1407. XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
  1408. be32_to_cpu(agi->agi_seqno) == agno;
  1409. if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
  1410. XFS_RANDOM_IALLOC_READ_AGI))) {
  1411. XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
  1412. mp, agi);
  1413. xfs_trans_brelse(tp, *bpp);
  1414. return XFS_ERROR(EFSCORRUPTED);
  1415. }
  1416. XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
  1417. xfs_check_agi_unlinked(agi);
  1418. return 0;
  1419. }
  1420. int
  1421. xfs_ialloc_read_agi(
  1422. struct xfs_mount *mp, /* file system mount structure */
  1423. struct xfs_trans *tp, /* transaction pointer */
  1424. xfs_agnumber_t agno, /* allocation group number */
  1425. struct xfs_buf **bpp) /* allocation group hdr buf */
  1426. {
  1427. struct xfs_agi *agi; /* allocation group header */
  1428. struct xfs_perag *pag; /* per allocation group data */
  1429. int error;
  1430. error = xfs_read_agi(mp, tp, agno, bpp);
  1431. if (error)
  1432. return error;
  1433. agi = XFS_BUF_TO_AGI(*bpp);
  1434. pag = &mp->m_perag[agno];
  1435. if (!pag->pagi_init) {
  1436. pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
  1437. pag->pagi_count = be32_to_cpu(agi->agi_count);
  1438. pag->pagi_init = 1;
  1439. }
  1440. /*
  1441. * It's possible for these to be out of sync if
  1442. * we are in the middle of a forced shutdown.
  1443. */
  1444. ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
  1445. XFS_FORCED_SHUTDOWN(mp));
  1446. return 0;
  1447. }
  1448. /*
  1449. * Read in the agi to initialise the per-ag data in the mount structure
  1450. */
  1451. int
  1452. xfs_ialloc_pagi_init(
  1453. xfs_mount_t *mp, /* file system mount structure */
  1454. xfs_trans_t *tp, /* transaction pointer */
  1455. xfs_agnumber_t agno) /* allocation group number */
  1456. {
  1457. xfs_buf_t *bp = NULL;
  1458. int error;
  1459. error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
  1460. if (error)
  1461. return error;
  1462. if (bp)
  1463. xfs_trans_brelse(tp, bp);
  1464. return 0;
  1465. }