xfs_ialloc.c 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_dir2.h"
  28. #include "xfs_dmapi.h"
  29. #include "xfs_mount.h"
  30. #include "xfs_bmap_btree.h"
  31. #include "xfs_alloc_btree.h"
  32. #include "xfs_ialloc_btree.h"
  33. #include "xfs_dir2_sf.h"
  34. #include "xfs_attr_sf.h"
  35. #include "xfs_dinode.h"
  36. #include "xfs_inode.h"
  37. #include "xfs_btree.h"
  38. #include "xfs_ialloc.h"
  39. #include "xfs_alloc.h"
  40. #include "xfs_rtalloc.h"
  41. #include "xfs_error.h"
  42. #include "xfs_bmap.h"
  43. /*
  44. * Allocation group level functions.
  45. */
  46. static inline int
  47. xfs_ialloc_cluster_alignment(
  48. xfs_alloc_arg_t *args)
  49. {
  50. if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
  51. args->mp->m_sb.sb_inoalignmt >=
  52. XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
  53. return args->mp->m_sb.sb_inoalignmt;
  54. return 1;
  55. }
  56. /*
  57. * Lookup the record equal to ino in the btree given by cur.
  58. */
  59. STATIC int /* error */
  60. xfs_inobt_lookup_eq(
  61. struct xfs_btree_cur *cur, /* btree cursor */
  62. xfs_agino_t ino, /* starting inode of chunk */
  63. __int32_t fcnt, /* free inode count */
  64. xfs_inofree_t free, /* free inode mask */
  65. int *stat) /* success/failure */
  66. {
  67. cur->bc_rec.i.ir_startino = ino;
  68. cur->bc_rec.i.ir_freecount = fcnt;
  69. cur->bc_rec.i.ir_free = free;
  70. return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  71. }
  72. /*
  73. * Lookup the first record greater than or equal to ino
  74. * in the btree given by cur.
  75. */
  76. int /* error */
  77. xfs_inobt_lookup_ge(
  78. struct xfs_btree_cur *cur, /* btree cursor */
  79. xfs_agino_t ino, /* starting inode of chunk */
  80. __int32_t fcnt, /* free inode count */
  81. xfs_inofree_t free, /* free inode mask */
  82. int *stat) /* success/failure */
  83. {
  84. cur->bc_rec.i.ir_startino = ino;
  85. cur->bc_rec.i.ir_freecount = fcnt;
  86. cur->bc_rec.i.ir_free = free;
  87. return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  88. }
  89. /*
  90. * Lookup the first record less than or equal to ino
  91. * in the btree given by cur.
  92. */
  93. int /* error */
  94. xfs_inobt_lookup_le(
  95. struct xfs_btree_cur *cur, /* btree cursor */
  96. xfs_agino_t ino, /* starting inode of chunk */
  97. __int32_t fcnt, /* free inode count */
  98. xfs_inofree_t free, /* free inode mask */
  99. int *stat) /* success/failure */
  100. {
  101. cur->bc_rec.i.ir_startino = ino;
  102. cur->bc_rec.i.ir_freecount = fcnt;
  103. cur->bc_rec.i.ir_free = free;
  104. return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  105. }
  106. /*
  107. * Update the record referred to by cur to the value given.
  108. * This either works (return 0) or gets an EFSCORRUPTED error.
  109. */
  110. STATIC int /* error */
  111. xfs_inobt_update(
  112. struct xfs_btree_cur *cur, /* btree cursor */
  113. xfs_inobt_rec_incore_t *irec) /* btree record */
  114. {
  115. union xfs_btree_rec rec;
  116. rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
  117. rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
  118. rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
  119. return xfs_btree_update(cur, &rec);
  120. }
  121. /*
  122. * Get the data from the pointed-to record.
  123. */
  124. int /* error */
  125. xfs_inobt_get_rec(
  126. struct xfs_btree_cur *cur, /* btree cursor */
  127. xfs_inobt_rec_incore_t *irec, /* btree record */
  128. int *stat) /* output: success/failure */
  129. {
  130. union xfs_btree_rec *rec;
  131. int error;
  132. error = xfs_btree_get_rec(cur, &rec, stat);
  133. if (!error && *stat == 1) {
  134. irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
  135. irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
  136. irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
  137. }
  138. return error;
  139. }
  140. /*
  141. * Initialise a new set of inodes.
  142. */
  143. STATIC void
  144. xfs_ialloc_inode_init(
  145. struct xfs_mount *mp,
  146. struct xfs_trans *tp,
  147. xfs_agnumber_t agno,
  148. xfs_agblock_t agbno,
  149. xfs_agblock_t length,
  150. unsigned int gen)
  151. {
  152. struct xfs_buf *fbuf;
  153. struct xfs_dinode *free;
  154. int blks_per_cluster, nbufs, ninodes;
  155. int version;
  156. int i, j;
  157. xfs_daddr_t d;
  158. /*
  159. * Loop over the new block(s), filling in the inodes.
  160. * For small block sizes, manipulate the inodes in buffers
  161. * which are multiples of the blocks size.
  162. */
  163. if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
  164. blks_per_cluster = 1;
  165. nbufs = length;
  166. ninodes = mp->m_sb.sb_inopblock;
  167. } else {
  168. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
  169. mp->m_sb.sb_blocksize;
  170. nbufs = length / blks_per_cluster;
  171. ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
  172. }
  173. /*
  174. * Figure out what version number to use in the inodes we create.
  175. * If the superblock version has caught up to the one that supports
  176. * the new inode format, then use the new inode version. Otherwise
  177. * use the old version so that old kernels will continue to be
  178. * able to use the file system.
  179. */
  180. if (xfs_sb_version_hasnlink(&mp->m_sb))
  181. version = 2;
  182. else
  183. version = 1;
  184. for (j = 0; j < nbufs; j++) {
  185. /*
  186. * Get the block.
  187. */
  188. d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
  189. fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
  190. mp->m_bsize * blks_per_cluster,
  191. XFS_BUF_LOCK);
  192. ASSERT(fbuf);
  193. ASSERT(!XFS_BUF_GETERROR(fbuf));
  194. /*
  195. * Initialize all inodes in this buffer and then log them.
  196. *
  197. * XXX: It would be much better if we had just one transaction
  198. * to log a whole cluster of inodes instead of all the
  199. * individual transactions causing a lot of log traffic.
  200. */
  201. xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
  202. for (i = 0; i < ninodes; i++) {
  203. int ioffset = i << mp->m_sb.sb_inodelog;
  204. uint isize = sizeof(struct xfs_dinode);
  205. free = xfs_make_iptr(mp, fbuf, i);
  206. free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  207. free->di_version = version;
  208. free->di_gen = cpu_to_be32(gen);
  209. free->di_next_unlinked = cpu_to_be32(NULLAGINO);
  210. xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
  211. }
  212. xfs_trans_inode_alloc_buf(tp, fbuf);
  213. }
  214. }
  215. /*
  216. * Allocate new inodes in the allocation group specified by agbp.
  217. * Return 0 for success, else error code.
  218. */
  219. STATIC int /* error code or 0 */
  220. xfs_ialloc_ag_alloc(
  221. xfs_trans_t *tp, /* transaction pointer */
  222. xfs_buf_t *agbp, /* alloc group buffer */
  223. int *alloc)
  224. {
  225. xfs_agi_t *agi; /* allocation group header */
  226. xfs_alloc_arg_t args; /* allocation argument structure */
  227. xfs_btree_cur_t *cur; /* inode btree cursor */
  228. xfs_agnumber_t agno;
  229. int error;
  230. int i;
  231. xfs_agino_t newino; /* new first inode's number */
  232. xfs_agino_t newlen; /* new number of inodes */
  233. xfs_agino_t thisino; /* current inode number, for loop */
  234. int isaligned = 0; /* inode allocation at stripe unit */
  235. /* boundary */
  236. args.tp = tp;
  237. args.mp = tp->t_mountp;
  238. /*
  239. * Locking will ensure that we don't have two callers in here
  240. * at one time.
  241. */
  242. newlen = XFS_IALLOC_INODES(args.mp);
  243. if (args.mp->m_maxicount &&
  244. args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
  245. return XFS_ERROR(ENOSPC);
  246. args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
  247. /*
  248. * First try to allocate inodes contiguous with the last-allocated
  249. * chunk of inodes. If the filesystem is striped, this will fill
  250. * an entire stripe unit with inodes.
  251. */
  252. agi = XFS_BUF_TO_AGI(agbp);
  253. newino = be32_to_cpu(agi->agi_newino);
  254. agno = be32_to_cpu(agi->agi_seqno);
  255. args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
  256. XFS_IALLOC_BLOCKS(args.mp);
  257. if (likely(newino != NULLAGINO &&
  258. (args.agbno < be32_to_cpu(agi->agi_length)))) {
  259. args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
  260. args.type = XFS_ALLOCTYPE_THIS_BNO;
  261. args.mod = args.total = args.wasdel = args.isfl =
  262. args.userdata = args.minalignslop = 0;
  263. args.prod = 1;
  264. /*
  265. * We need to take into account alignment here to ensure that
  266. * we don't modify the free list if we fail to have an exact
  267. * block. If we don't have an exact match, and every oher
  268. * attempt allocation attempt fails, we'll end up cancelling
  269. * a dirty transaction and shutting down.
  270. *
  271. * For an exact allocation, alignment must be 1,
  272. * however we need to take cluster alignment into account when
  273. * fixing up the freelist. Use the minalignslop field to
  274. * indicate that extra blocks might be required for alignment,
  275. * but not to use them in the actual exact allocation.
  276. */
  277. args.alignment = 1;
  278. args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
  279. /* Allow space for the inode btree to split. */
  280. args.minleft = args.mp->m_in_maxlevels - 1;
  281. if ((error = xfs_alloc_vextent(&args)))
  282. return error;
  283. } else
  284. args.fsbno = NULLFSBLOCK;
  285. if (unlikely(args.fsbno == NULLFSBLOCK)) {
  286. /*
  287. * Set the alignment for the allocation.
  288. * If stripe alignment is turned on then align at stripe unit
  289. * boundary.
  290. * If the cluster size is smaller than a filesystem block
  291. * then we're doing I/O for inodes in filesystem block size
  292. * pieces, so don't need alignment anyway.
  293. */
  294. isaligned = 0;
  295. if (args.mp->m_sinoalign) {
  296. ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
  297. args.alignment = args.mp->m_dalign;
  298. isaligned = 1;
  299. } else
  300. args.alignment = xfs_ialloc_cluster_alignment(&args);
  301. /*
  302. * Need to figure out where to allocate the inode blocks.
  303. * Ideally they should be spaced out through the a.g.
  304. * For now, just allocate blocks up front.
  305. */
  306. args.agbno = be32_to_cpu(agi->agi_root);
  307. args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
  308. /*
  309. * Allocate a fixed-size extent of inodes.
  310. */
  311. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  312. args.mod = args.total = args.wasdel = args.isfl =
  313. args.userdata = args.minalignslop = 0;
  314. args.prod = 1;
  315. /*
  316. * Allow space for the inode btree to split.
  317. */
  318. args.minleft = args.mp->m_in_maxlevels - 1;
  319. if ((error = xfs_alloc_vextent(&args)))
  320. return error;
  321. }
  322. /*
  323. * If stripe alignment is turned on, then try again with cluster
  324. * alignment.
  325. */
  326. if (isaligned && args.fsbno == NULLFSBLOCK) {
  327. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  328. args.agbno = be32_to_cpu(agi->agi_root);
  329. args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
  330. args.alignment = xfs_ialloc_cluster_alignment(&args);
  331. if ((error = xfs_alloc_vextent(&args)))
  332. return error;
  333. }
  334. if (args.fsbno == NULLFSBLOCK) {
  335. *alloc = 0;
  336. return 0;
  337. }
  338. ASSERT(args.len == args.minlen);
  339. /*
  340. * Stamp and write the inode buffers.
  341. *
  342. * Seed the new inode cluster with a random generation number. This
  343. * prevents short-term reuse of generation numbers if a chunk is
  344. * freed and then immediately reallocated. We use random numbers
  345. * rather than a linear progression to prevent the next generation
  346. * number from being easily guessable.
  347. */
  348. xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
  349. random32());
  350. /*
  351. * Convert the results.
  352. */
  353. newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
  354. be32_add_cpu(&agi->agi_count, newlen);
  355. be32_add_cpu(&agi->agi_freecount, newlen);
  356. down_read(&args.mp->m_peraglock);
  357. args.mp->m_perag[agno].pagi_freecount += newlen;
  358. up_read(&args.mp->m_peraglock);
  359. agi->agi_newino = cpu_to_be32(newino);
  360. /*
  361. * Insert records describing the new inode chunk into the btree.
  362. */
  363. cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
  364. for (thisino = newino;
  365. thisino < newino + newlen;
  366. thisino += XFS_INODES_PER_CHUNK) {
  367. if ((error = xfs_inobt_lookup_eq(cur, thisino,
  368. XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
  369. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  370. return error;
  371. }
  372. ASSERT(i == 0);
  373. if ((error = xfs_btree_insert(cur, &i))) {
  374. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  375. return error;
  376. }
  377. ASSERT(i == 1);
  378. }
  379. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  380. /*
  381. * Log allocation group header fields
  382. */
  383. xfs_ialloc_log_agi(tp, agbp,
  384. XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
  385. /*
  386. * Modify/log superblock values for inode count and inode free count.
  387. */
  388. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
  389. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
  390. *alloc = 1;
  391. return 0;
  392. }
  393. STATIC_INLINE xfs_agnumber_t
  394. xfs_ialloc_next_ag(
  395. xfs_mount_t *mp)
  396. {
  397. xfs_agnumber_t agno;
  398. spin_lock(&mp->m_agirotor_lock);
  399. agno = mp->m_agirotor;
  400. if (++mp->m_agirotor == mp->m_maxagi)
  401. mp->m_agirotor = 0;
  402. spin_unlock(&mp->m_agirotor_lock);
  403. return agno;
  404. }
  405. /*
  406. * Select an allocation group to look for a free inode in, based on the parent
  407. * inode and then mode. Return the allocation group buffer.
  408. */
  409. STATIC xfs_buf_t * /* allocation group buffer */
  410. xfs_ialloc_ag_select(
  411. xfs_trans_t *tp, /* transaction pointer */
  412. xfs_ino_t parent, /* parent directory inode number */
  413. mode_t mode, /* bits set to indicate file type */
  414. int okalloc) /* ok to allocate more space */
  415. {
  416. xfs_buf_t *agbp; /* allocation group header buffer */
  417. xfs_agnumber_t agcount; /* number of ag's in the filesystem */
  418. xfs_agnumber_t agno; /* current ag number */
  419. int flags; /* alloc buffer locking flags */
  420. xfs_extlen_t ineed; /* blocks needed for inode allocation */
  421. xfs_extlen_t longest = 0; /* longest extent available */
  422. xfs_mount_t *mp; /* mount point structure */
  423. int needspace; /* file mode implies space allocated */
  424. xfs_perag_t *pag; /* per allocation group data */
  425. xfs_agnumber_t pagno; /* parent (starting) ag number */
  426. /*
  427. * Files of these types need at least one block if length > 0
  428. * (and they won't fit in the inode, but that's hard to figure out).
  429. */
  430. needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
  431. mp = tp->t_mountp;
  432. agcount = mp->m_maxagi;
  433. if (S_ISDIR(mode))
  434. pagno = xfs_ialloc_next_ag(mp);
  435. else {
  436. pagno = XFS_INO_TO_AGNO(mp, parent);
  437. if (pagno >= agcount)
  438. pagno = 0;
  439. }
  440. ASSERT(pagno < agcount);
  441. /*
  442. * Loop through allocation groups, looking for one with a little
  443. * free space in it. Note we don't look for free inodes, exactly.
  444. * Instead, we include whether there is a need to allocate inodes
  445. * to mean that blocks must be allocated for them,
  446. * if none are currently free.
  447. */
  448. agno = pagno;
  449. flags = XFS_ALLOC_FLAG_TRYLOCK;
  450. down_read(&mp->m_peraglock);
  451. for (;;) {
  452. pag = &mp->m_perag[agno];
  453. if (!pag->pagi_init) {
  454. if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  455. agbp = NULL;
  456. goto nextag;
  457. }
  458. } else
  459. agbp = NULL;
  460. if (!pag->pagi_inodeok) {
  461. xfs_ialloc_next_ag(mp);
  462. goto unlock_nextag;
  463. }
  464. /*
  465. * Is there enough free space for the file plus a block
  466. * of inodes (if we need to allocate some)?
  467. */
  468. ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
  469. if (ineed && !pag->pagf_init) {
  470. if (agbp == NULL &&
  471. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  472. agbp = NULL;
  473. goto nextag;
  474. }
  475. (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
  476. }
  477. if (!ineed || pag->pagf_init) {
  478. if (ineed && !(longest = pag->pagf_longest))
  479. longest = pag->pagf_flcount > 0;
  480. if (!ineed ||
  481. (pag->pagf_freeblks >= needspace + ineed &&
  482. longest >= ineed &&
  483. okalloc)) {
  484. if (agbp == NULL &&
  485. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  486. agbp = NULL;
  487. goto nextag;
  488. }
  489. up_read(&mp->m_peraglock);
  490. return agbp;
  491. }
  492. }
  493. unlock_nextag:
  494. if (agbp)
  495. xfs_trans_brelse(tp, agbp);
  496. nextag:
  497. /*
  498. * No point in iterating over the rest, if we're shutting
  499. * down.
  500. */
  501. if (XFS_FORCED_SHUTDOWN(mp)) {
  502. up_read(&mp->m_peraglock);
  503. return NULL;
  504. }
  505. agno++;
  506. if (agno >= agcount)
  507. agno = 0;
  508. if (agno == pagno) {
  509. if (flags == 0) {
  510. up_read(&mp->m_peraglock);
  511. return NULL;
  512. }
  513. flags = 0;
  514. }
  515. }
  516. }
  517. /*
  518. * Visible inode allocation functions.
  519. */
  520. /*
  521. * Allocate an inode on disk.
  522. * Mode is used to tell whether the new inode will need space, and whether
  523. * it is a directory.
  524. *
  525. * The arguments IO_agbp and alloc_done are defined to work within
  526. * the constraint of one allocation per transaction.
  527. * xfs_dialloc() is designed to be called twice if it has to do an
  528. * allocation to make more free inodes. On the first call,
  529. * IO_agbp should be set to NULL. If an inode is available,
  530. * i.e., xfs_dialloc() did not need to do an allocation, an inode
  531. * number is returned. In this case, IO_agbp would be set to the
  532. * current ag_buf and alloc_done set to false.
  533. * If an allocation needed to be done, xfs_dialloc would return
  534. * the current ag_buf in IO_agbp and set alloc_done to true.
  535. * The caller should then commit the current transaction, allocate a new
  536. * transaction, and call xfs_dialloc() again, passing in the previous
  537. * value of IO_agbp. IO_agbp should be held across the transactions.
  538. * Since the agbp is locked across the two calls, the second call is
  539. * guaranteed to have a free inode available.
  540. *
  541. * Once we successfully pick an inode its number is returned and the
  542. * on-disk data structures are updated. The inode itself is not read
  543. * in, since doing so would break ordering constraints with xfs_reclaim.
  544. */
  545. int
  546. xfs_dialloc(
  547. xfs_trans_t *tp, /* transaction pointer */
  548. xfs_ino_t parent, /* parent inode (directory) */
  549. mode_t mode, /* mode bits for new inode */
  550. int okalloc, /* ok to allocate more space */
  551. xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
  552. boolean_t *alloc_done, /* true if we needed to replenish
  553. inode freelist */
  554. xfs_ino_t *inop) /* inode number allocated */
  555. {
  556. xfs_agnumber_t agcount; /* number of allocation groups */
  557. xfs_buf_t *agbp; /* allocation group header's buffer */
  558. xfs_agnumber_t agno; /* allocation group number */
  559. xfs_agi_t *agi; /* allocation group header structure */
  560. xfs_btree_cur_t *cur; /* inode allocation btree cursor */
  561. int error; /* error return value */
  562. int i; /* result code */
  563. int ialloced; /* inode allocation status */
  564. int noroom = 0; /* no space for inode blk allocation */
  565. xfs_ino_t ino; /* fs-relative inode to be returned */
  566. /* REFERENCED */
  567. int j; /* result code */
  568. xfs_mount_t *mp; /* file system mount structure */
  569. int offset; /* index of inode in chunk */
  570. xfs_agino_t pagino; /* parent's a.g. relative inode # */
  571. xfs_agnumber_t pagno; /* parent's allocation group number */
  572. xfs_inobt_rec_incore_t rec; /* inode allocation record */
  573. xfs_agnumber_t tagno; /* testing allocation group number */
  574. xfs_btree_cur_t *tcur; /* temp cursor */
  575. xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
  576. if (*IO_agbp == NULL) {
  577. /*
  578. * We do not have an agbp, so select an initial allocation
  579. * group for inode allocation.
  580. */
  581. agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
  582. /*
  583. * Couldn't find an allocation group satisfying the
  584. * criteria, give up.
  585. */
  586. if (!agbp) {
  587. *inop = NULLFSINO;
  588. return 0;
  589. }
  590. agi = XFS_BUF_TO_AGI(agbp);
  591. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  592. } else {
  593. /*
  594. * Continue where we left off before. In this case, we
  595. * know that the allocation group has free inodes.
  596. */
  597. agbp = *IO_agbp;
  598. agi = XFS_BUF_TO_AGI(agbp);
  599. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  600. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  601. }
  602. mp = tp->t_mountp;
  603. agcount = mp->m_sb.sb_agcount;
  604. agno = be32_to_cpu(agi->agi_seqno);
  605. tagno = agno;
  606. pagno = XFS_INO_TO_AGNO(mp, parent);
  607. pagino = XFS_INO_TO_AGINO(mp, parent);
  608. /*
  609. * If we have already hit the ceiling of inode blocks then clear
  610. * okalloc so we scan all available agi structures for a free
  611. * inode.
  612. */
  613. if (mp->m_maxicount &&
  614. mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
  615. noroom = 1;
  616. okalloc = 0;
  617. }
  618. /*
  619. * Loop until we find an allocation group that either has free inodes
  620. * or in which we can allocate some inodes. Iterate through the
  621. * allocation groups upward, wrapping at the end.
  622. */
  623. *alloc_done = B_FALSE;
  624. while (!agi->agi_freecount) {
  625. /*
  626. * Don't do anything if we're not supposed to allocate
  627. * any blocks, just go on to the next ag.
  628. */
  629. if (okalloc) {
  630. /*
  631. * Try to allocate some new inodes in the allocation
  632. * group.
  633. */
  634. if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
  635. xfs_trans_brelse(tp, agbp);
  636. if (error == ENOSPC) {
  637. *inop = NULLFSINO;
  638. return 0;
  639. } else
  640. return error;
  641. }
  642. if (ialloced) {
  643. /*
  644. * We successfully allocated some inodes, return
  645. * the current context to the caller so that it
  646. * can commit the current transaction and call
  647. * us again where we left off.
  648. */
  649. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  650. *alloc_done = B_TRUE;
  651. *IO_agbp = agbp;
  652. *inop = NULLFSINO;
  653. return 0;
  654. }
  655. }
  656. /*
  657. * If it failed, give up on this ag.
  658. */
  659. xfs_trans_brelse(tp, agbp);
  660. /*
  661. * Go on to the next ag: get its ag header.
  662. */
  663. nextag:
  664. if (++tagno == agcount)
  665. tagno = 0;
  666. if (tagno == agno) {
  667. *inop = NULLFSINO;
  668. return noroom ? ENOSPC : 0;
  669. }
  670. down_read(&mp->m_peraglock);
  671. if (mp->m_perag[tagno].pagi_inodeok == 0) {
  672. up_read(&mp->m_peraglock);
  673. goto nextag;
  674. }
  675. error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
  676. up_read(&mp->m_peraglock);
  677. if (error)
  678. goto nextag;
  679. agi = XFS_BUF_TO_AGI(agbp);
  680. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  681. }
  682. /*
  683. * Here with an allocation group that has a free inode.
  684. * Reset agno since we may have chosen a new ag in the
  685. * loop above.
  686. */
  687. agno = tagno;
  688. *IO_agbp = NULL;
  689. cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
  690. /*
  691. * If pagino is 0 (this is the root inode allocation) use newino.
  692. * This must work because we've just allocated some.
  693. */
  694. if (!pagino)
  695. pagino = be32_to_cpu(agi->agi_newino);
  696. #ifdef DEBUG
  697. if (cur->bc_nlevels == 1) {
  698. int freecount = 0;
  699. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  700. goto error0;
  701. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  702. do {
  703. error = xfs_inobt_get_rec(cur, &rec, &i);
  704. if (error)
  705. goto error0;
  706. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  707. freecount += rec.ir_freecount;
  708. if ((error = xfs_btree_increment(cur, 0, &i)))
  709. goto error0;
  710. } while (i == 1);
  711. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  712. XFS_FORCED_SHUTDOWN(mp));
  713. }
  714. #endif
  715. /*
  716. * If in the same a.g. as the parent, try to get near the parent.
  717. */
  718. if (pagno == agno) {
  719. if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
  720. goto error0;
  721. if (i != 0 &&
  722. (error = xfs_inobt_get_rec(cur, &rec, &j)) == 0 &&
  723. j == 1 &&
  724. rec.ir_freecount > 0) {
  725. /*
  726. * Found a free inode in the same chunk
  727. * as parent, done.
  728. */
  729. }
  730. /*
  731. * In the same a.g. as parent, but parent's chunk is full.
  732. */
  733. else {
  734. int doneleft; /* done, to the left */
  735. int doneright; /* done, to the right */
  736. if (error)
  737. goto error0;
  738. ASSERT(i == 1);
  739. ASSERT(j == 1);
  740. /*
  741. * Duplicate the cursor, search left & right
  742. * simultaneously.
  743. */
  744. if ((error = xfs_btree_dup_cursor(cur, &tcur)))
  745. goto error0;
  746. /*
  747. * Search left with tcur, back up 1 record.
  748. */
  749. if ((error = xfs_btree_decrement(tcur, 0, &i)))
  750. goto error1;
  751. doneleft = !i;
  752. if (!doneleft) {
  753. error = xfs_inobt_get_rec(tcur, &trec, &i);
  754. if (error)
  755. goto error1;
  756. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  757. }
  758. /*
  759. * Search right with cur, go forward 1 record.
  760. */
  761. if ((error = xfs_btree_increment(cur, 0, &i)))
  762. goto error1;
  763. doneright = !i;
  764. if (!doneright) {
  765. error = xfs_inobt_get_rec(cur, &rec, &i);
  766. if (error)
  767. goto error1;
  768. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  769. }
  770. /*
  771. * Loop until we find the closest inode chunk
  772. * with a free one.
  773. */
  774. while (!doneleft || !doneright) {
  775. int useleft; /* using left inode
  776. chunk this time */
  777. /*
  778. * Figure out which block is closer,
  779. * if both are valid.
  780. */
  781. if (!doneleft && !doneright)
  782. useleft =
  783. pagino -
  784. (trec.ir_startino +
  785. XFS_INODES_PER_CHUNK - 1) <
  786. rec.ir_startino - pagino;
  787. else
  788. useleft = !doneleft;
  789. /*
  790. * If checking the left, does it have
  791. * free inodes?
  792. */
  793. if (useleft && trec.ir_freecount) {
  794. /*
  795. * Yes, set it up as the chunk to use.
  796. */
  797. rec = trec;
  798. xfs_btree_del_cursor(cur,
  799. XFS_BTREE_NOERROR);
  800. cur = tcur;
  801. break;
  802. }
  803. /*
  804. * If checking the right, does it have
  805. * free inodes?
  806. */
  807. if (!useleft && rec.ir_freecount) {
  808. /*
  809. * Yes, it's already set up.
  810. */
  811. xfs_btree_del_cursor(tcur,
  812. XFS_BTREE_NOERROR);
  813. break;
  814. }
  815. /*
  816. * If used the left, get another one
  817. * further left.
  818. */
  819. if (useleft) {
  820. if ((error = xfs_btree_decrement(tcur, 0,
  821. &i)))
  822. goto error1;
  823. doneleft = !i;
  824. if (!doneleft) {
  825. error = xfs_inobt_get_rec(
  826. tcur, &trec, &i);
  827. if (error)
  828. goto error1;
  829. XFS_WANT_CORRUPTED_GOTO(i == 1,
  830. error1);
  831. }
  832. }
  833. /*
  834. * If used the right, get another one
  835. * further right.
  836. */
  837. else {
  838. if ((error = xfs_btree_increment(cur, 0,
  839. &i)))
  840. goto error1;
  841. doneright = !i;
  842. if (!doneright) {
  843. error = xfs_inobt_get_rec(
  844. cur, &rec, &i);
  845. if (error)
  846. goto error1;
  847. XFS_WANT_CORRUPTED_GOTO(i == 1,
  848. error1);
  849. }
  850. }
  851. }
  852. ASSERT(!doneleft || !doneright);
  853. }
  854. }
  855. /*
  856. * In a different a.g. from the parent.
  857. * See if the most recently allocated block has any free.
  858. */
  859. else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
  860. if ((error = xfs_inobt_lookup_eq(cur,
  861. be32_to_cpu(agi->agi_newino), 0, 0, &i)))
  862. goto error0;
  863. if (i == 1 &&
  864. (error = xfs_inobt_get_rec(cur, &rec, &j)) == 0 &&
  865. j == 1 &&
  866. rec.ir_freecount > 0) {
  867. /*
  868. * The last chunk allocated in the group still has
  869. * a free inode.
  870. */
  871. }
  872. /*
  873. * None left in the last group, search the whole a.g.
  874. */
  875. else {
  876. if (error)
  877. goto error0;
  878. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  879. goto error0;
  880. ASSERT(i == 1);
  881. for (;;) {
  882. error = xfs_inobt_get_rec(cur, &rec, &i);
  883. if (error)
  884. goto error0;
  885. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  886. if (rec.ir_freecount > 0)
  887. break;
  888. if ((error = xfs_btree_increment(cur, 0, &i)))
  889. goto error0;
  890. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  891. }
  892. }
  893. }
  894. offset = xfs_ialloc_find_free(&rec.ir_free);
  895. ASSERT(offset >= 0);
  896. ASSERT(offset < XFS_INODES_PER_CHUNK);
  897. ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
  898. XFS_INODES_PER_CHUNK) == 0);
  899. ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
  900. rec.ir_free &= ~XFS_INOBT_MASK(offset);
  901. rec.ir_freecount--;
  902. error = xfs_inobt_update(cur, &rec);
  903. if (error)
  904. goto error0;
  905. be32_add_cpu(&agi->agi_freecount, -1);
  906. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  907. down_read(&mp->m_peraglock);
  908. mp->m_perag[tagno].pagi_freecount--;
  909. up_read(&mp->m_peraglock);
  910. #ifdef DEBUG
  911. if (cur->bc_nlevels == 1) {
  912. int freecount = 0;
  913. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  914. goto error0;
  915. do {
  916. error = xfs_inobt_get_rec(cur, &rec, &i);
  917. if (error)
  918. goto error0;
  919. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  920. freecount += rec.ir_freecount;
  921. if ((error = xfs_btree_increment(cur, 0, &i)))
  922. goto error0;
  923. } while (i == 1);
  924. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  925. XFS_FORCED_SHUTDOWN(mp));
  926. }
  927. #endif
  928. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  929. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
  930. *inop = ino;
  931. return 0;
  932. error1:
  933. xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
  934. error0:
  935. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  936. return error;
  937. }
  938. /*
  939. * Free disk inode. Carefully avoids touching the incore inode, all
  940. * manipulations incore are the caller's responsibility.
  941. * The on-disk inode is not changed by this operation, only the
  942. * btree (free inode mask) is changed.
  943. */
  944. int
  945. xfs_difree(
  946. xfs_trans_t *tp, /* transaction pointer */
  947. xfs_ino_t inode, /* inode to be freed */
  948. xfs_bmap_free_t *flist, /* extents to free */
  949. int *delete, /* set if inode cluster was deleted */
  950. xfs_ino_t *first_ino) /* first inode in deleted cluster */
  951. {
  952. /* REFERENCED */
  953. xfs_agblock_t agbno; /* block number containing inode */
  954. xfs_buf_t *agbp; /* buffer containing allocation group header */
  955. xfs_agino_t agino; /* inode number relative to allocation group */
  956. xfs_agnumber_t agno; /* allocation group number */
  957. xfs_agi_t *agi; /* allocation group header */
  958. xfs_btree_cur_t *cur; /* inode btree cursor */
  959. int error; /* error return value */
  960. int i; /* result code */
  961. int ilen; /* inodes in an inode cluster */
  962. xfs_mount_t *mp; /* mount structure for filesystem */
  963. int off; /* offset of inode in inode chunk */
  964. xfs_inobt_rec_incore_t rec; /* btree record */
  965. mp = tp->t_mountp;
  966. /*
  967. * Break up inode number into its components.
  968. */
  969. agno = XFS_INO_TO_AGNO(mp, inode);
  970. if (agno >= mp->m_sb.sb_agcount) {
  971. cmn_err(CE_WARN,
  972. "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
  973. agno, mp->m_sb.sb_agcount, mp->m_fsname);
  974. ASSERT(0);
  975. return XFS_ERROR(EINVAL);
  976. }
  977. agino = XFS_INO_TO_AGINO(mp, inode);
  978. if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
  979. cmn_err(CE_WARN,
  980. "xfs_difree: inode != XFS_AGINO_TO_INO() "
  981. "(%llu != %llu) on %s. Returning EINVAL.",
  982. (unsigned long long)inode,
  983. (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
  984. mp->m_fsname);
  985. ASSERT(0);
  986. return XFS_ERROR(EINVAL);
  987. }
  988. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  989. if (agbno >= mp->m_sb.sb_agblocks) {
  990. cmn_err(CE_WARN,
  991. "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
  992. agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
  993. ASSERT(0);
  994. return XFS_ERROR(EINVAL);
  995. }
  996. /*
  997. * Get the allocation group header.
  998. */
  999. down_read(&mp->m_peraglock);
  1000. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1001. up_read(&mp->m_peraglock);
  1002. if (error) {
  1003. cmn_err(CE_WARN,
  1004. "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
  1005. error, mp->m_fsname);
  1006. return error;
  1007. }
  1008. agi = XFS_BUF_TO_AGI(agbp);
  1009. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1010. ASSERT(agbno < be32_to_cpu(agi->agi_length));
  1011. /*
  1012. * Initialize the cursor.
  1013. */
  1014. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1015. #ifdef DEBUG
  1016. if (cur->bc_nlevels == 1) {
  1017. int freecount = 0;
  1018. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1019. goto error0;
  1020. do {
  1021. error = xfs_inobt_get_rec(cur, &rec, &i);
  1022. if (error)
  1023. goto error0;
  1024. if (i) {
  1025. freecount += rec.ir_freecount;
  1026. if ((error = xfs_btree_increment(cur, 0, &i)))
  1027. goto error0;
  1028. }
  1029. } while (i == 1);
  1030. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1031. XFS_FORCED_SHUTDOWN(mp));
  1032. }
  1033. #endif
  1034. /*
  1035. * Look for the entry describing this inode.
  1036. */
  1037. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  1038. cmn_err(CE_WARN,
  1039. "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
  1040. error, mp->m_fsname);
  1041. goto error0;
  1042. }
  1043. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1044. error = xfs_inobt_get_rec(cur, &rec, &i);
  1045. if (error) {
  1046. cmn_err(CE_WARN,
  1047. "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
  1048. error, mp->m_fsname);
  1049. goto error0;
  1050. }
  1051. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1052. /*
  1053. * Get the offset in the inode chunk.
  1054. */
  1055. off = agino - rec.ir_startino;
  1056. ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
  1057. ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
  1058. /*
  1059. * Mark the inode free & increment the count.
  1060. */
  1061. rec.ir_free |= XFS_INOBT_MASK(off);
  1062. rec.ir_freecount++;
  1063. /*
  1064. * When an inode cluster is free, it becomes eligible for removal
  1065. */
  1066. if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
  1067. (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
  1068. *delete = 1;
  1069. *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
  1070. /*
  1071. * Remove the inode cluster from the AGI B+Tree, adjust the
  1072. * AGI and Superblock inode counts, and mark the disk space
  1073. * to be freed when the transaction is committed.
  1074. */
  1075. ilen = XFS_IALLOC_INODES(mp);
  1076. be32_add_cpu(&agi->agi_count, -ilen);
  1077. be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
  1078. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
  1079. down_read(&mp->m_peraglock);
  1080. mp->m_perag[agno].pagi_freecount -= ilen - 1;
  1081. up_read(&mp->m_peraglock);
  1082. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
  1083. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
  1084. if ((error = xfs_btree_delete(cur, &i))) {
  1085. cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
  1086. error, mp->m_fsname);
  1087. goto error0;
  1088. }
  1089. xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
  1090. agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
  1091. XFS_IALLOC_BLOCKS(mp), flist, mp);
  1092. } else {
  1093. *delete = 0;
  1094. error = xfs_inobt_update(cur, &rec);
  1095. if (error) {
  1096. cmn_err(CE_WARN,
  1097. "xfs_difree: xfs_inobt_update returned an error %d on %s.",
  1098. error, mp->m_fsname);
  1099. goto error0;
  1100. }
  1101. /*
  1102. * Change the inode free counts and log the ag/sb changes.
  1103. */
  1104. be32_add_cpu(&agi->agi_freecount, 1);
  1105. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  1106. down_read(&mp->m_peraglock);
  1107. mp->m_perag[agno].pagi_freecount++;
  1108. up_read(&mp->m_peraglock);
  1109. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
  1110. }
  1111. #ifdef DEBUG
  1112. if (cur->bc_nlevels == 1) {
  1113. int freecount = 0;
  1114. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1115. goto error0;
  1116. do {
  1117. error = xfs_inobt_get_rec(cur, &rec, &i);
  1118. if (error)
  1119. goto error0;
  1120. if (i) {
  1121. freecount += rec.ir_freecount;
  1122. if ((error = xfs_btree_increment(cur, 0, &i)))
  1123. goto error0;
  1124. }
  1125. } while (i == 1);
  1126. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1127. XFS_FORCED_SHUTDOWN(mp));
  1128. }
  1129. #endif
  1130. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1131. return 0;
  1132. error0:
  1133. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1134. return error;
  1135. }
  1136. /*
  1137. * Return the location of the inode in imap, for mapping it into a buffer.
  1138. */
  1139. int
  1140. xfs_imap(
  1141. xfs_mount_t *mp, /* file system mount structure */
  1142. xfs_trans_t *tp, /* transaction pointer */
  1143. xfs_ino_t ino, /* inode to locate */
  1144. struct xfs_imap *imap, /* location map structure */
  1145. uint flags) /* flags for inode btree lookup */
  1146. {
  1147. xfs_agblock_t agbno; /* block number of inode in the alloc group */
  1148. xfs_agino_t agino; /* inode number within alloc group */
  1149. xfs_agnumber_t agno; /* allocation group number */
  1150. int blks_per_cluster; /* num blocks per inode cluster */
  1151. xfs_agblock_t chunk_agbno; /* first block in inode chunk */
  1152. xfs_agblock_t cluster_agbno; /* first block in inode cluster */
  1153. int error; /* error code */
  1154. int offset; /* index of inode in its buffer */
  1155. int offset_agbno; /* blks from chunk start to inode */
  1156. ASSERT(ino != NULLFSINO);
  1157. /*
  1158. * Split up the inode number into its parts.
  1159. */
  1160. agno = XFS_INO_TO_AGNO(mp, ino);
  1161. agino = XFS_INO_TO_AGINO(mp, ino);
  1162. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  1163. if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
  1164. ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1165. #ifdef DEBUG
  1166. /* no diagnostics for bulkstat, ino comes from userspace */
  1167. if (flags & XFS_IGET_BULKSTAT)
  1168. return XFS_ERROR(EINVAL);
  1169. if (agno >= mp->m_sb.sb_agcount) {
  1170. xfs_fs_cmn_err(CE_ALERT, mp,
  1171. "xfs_imap: agno (%d) >= "
  1172. "mp->m_sb.sb_agcount (%d)",
  1173. agno, mp->m_sb.sb_agcount);
  1174. }
  1175. if (agbno >= mp->m_sb.sb_agblocks) {
  1176. xfs_fs_cmn_err(CE_ALERT, mp,
  1177. "xfs_imap: agbno (0x%llx) >= "
  1178. "mp->m_sb.sb_agblocks (0x%lx)",
  1179. (unsigned long long) agbno,
  1180. (unsigned long) mp->m_sb.sb_agblocks);
  1181. }
  1182. if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1183. xfs_fs_cmn_err(CE_ALERT, mp,
  1184. "xfs_imap: ino (0x%llx) != "
  1185. "XFS_AGINO_TO_INO(mp, agno, agino) "
  1186. "(0x%llx)",
  1187. ino, XFS_AGINO_TO_INO(mp, agno, agino));
  1188. }
  1189. xfs_stack_trace();
  1190. #endif /* DEBUG */
  1191. return XFS_ERROR(EINVAL);
  1192. }
  1193. /*
  1194. * If the inode cluster size is the same as the blocksize or
  1195. * smaller we get to the buffer by simple arithmetics.
  1196. */
  1197. if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
  1198. offset = XFS_INO_TO_OFFSET(mp, ino);
  1199. ASSERT(offset < mp->m_sb.sb_inopblock);
  1200. imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
  1201. imap->im_len = XFS_FSB_TO_BB(mp, 1);
  1202. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1203. return 0;
  1204. }
  1205. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
  1206. /*
  1207. * If we get a block number passed from bulkstat we can use it to
  1208. * find the buffer easily.
  1209. */
  1210. if (imap->im_blkno) {
  1211. offset = XFS_INO_TO_OFFSET(mp, ino);
  1212. ASSERT(offset < mp->m_sb.sb_inopblock);
  1213. cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
  1214. offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
  1215. imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
  1216. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1217. return 0;
  1218. }
  1219. /*
  1220. * If the inode chunks are aligned then use simple maths to
  1221. * find the location. Otherwise we have to do a btree
  1222. * lookup to find the location.
  1223. */
  1224. if (mp->m_inoalign_mask) {
  1225. offset_agbno = agbno & mp->m_inoalign_mask;
  1226. chunk_agbno = agbno - offset_agbno;
  1227. } else {
  1228. xfs_btree_cur_t *cur; /* inode btree cursor */
  1229. xfs_inobt_rec_incore_t chunk_rec;
  1230. xfs_buf_t *agbp; /* agi buffer */
  1231. int i; /* temp state */
  1232. down_read(&mp->m_peraglock);
  1233. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1234. up_read(&mp->m_peraglock);
  1235. if (error) {
  1236. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1237. "xfs_ialloc_read_agi() returned "
  1238. "error %d, agno %d",
  1239. error, agno);
  1240. return error;
  1241. }
  1242. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1243. error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
  1244. if (error) {
  1245. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1246. "xfs_inobt_lookup_le() failed");
  1247. goto error0;
  1248. }
  1249. error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
  1250. if (error) {
  1251. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1252. "xfs_inobt_get_rec() failed");
  1253. goto error0;
  1254. }
  1255. if (i == 0) {
  1256. #ifdef DEBUG
  1257. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1258. "xfs_inobt_get_rec() failed");
  1259. #endif /* DEBUG */
  1260. error = XFS_ERROR(EINVAL);
  1261. }
  1262. error0:
  1263. xfs_trans_brelse(tp, agbp);
  1264. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1265. if (error)
  1266. return error;
  1267. chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
  1268. offset_agbno = agbno - chunk_agbno;
  1269. }
  1270. ASSERT(agbno >= chunk_agbno);
  1271. cluster_agbno = chunk_agbno +
  1272. ((offset_agbno / blks_per_cluster) * blks_per_cluster);
  1273. offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1274. XFS_INO_TO_OFFSET(mp, ino);
  1275. imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
  1276. imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
  1277. imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
  1278. /*
  1279. * If the inode number maps to a block outside the bounds
  1280. * of the file system then return NULL rather than calling
  1281. * read_buf and panicing when we get an error from the
  1282. * driver.
  1283. */
  1284. if ((imap->im_blkno + imap->im_len) >
  1285. XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
  1286. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
  1287. "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
  1288. " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
  1289. (unsigned long long) imap->im_blkno,
  1290. (unsigned long long) imap->im_len,
  1291. XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
  1292. return XFS_ERROR(EINVAL);
  1293. }
  1294. return 0;
  1295. }
  1296. /*
  1297. * Compute and fill in value of m_in_maxlevels.
  1298. */
  1299. void
  1300. xfs_ialloc_compute_maxlevels(
  1301. xfs_mount_t *mp) /* file system mount structure */
  1302. {
  1303. int level;
  1304. uint maxblocks;
  1305. uint maxleafents;
  1306. int minleafrecs;
  1307. int minnoderecs;
  1308. maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
  1309. XFS_INODES_PER_CHUNK_LOG;
  1310. minleafrecs = mp->m_alloc_mnr[0];
  1311. minnoderecs = mp->m_alloc_mnr[1];
  1312. maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  1313. for (level = 1; maxblocks > 1; level++)
  1314. maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  1315. mp->m_in_maxlevels = level;
  1316. }
  1317. /*
  1318. * Log specified fields for the ag hdr (inode section)
  1319. */
  1320. void
  1321. xfs_ialloc_log_agi(
  1322. xfs_trans_t *tp, /* transaction pointer */
  1323. xfs_buf_t *bp, /* allocation group header buffer */
  1324. int fields) /* bitmask of fields to log */
  1325. {
  1326. int first; /* first byte number */
  1327. int last; /* last byte number */
  1328. static const short offsets[] = { /* field starting offsets */
  1329. /* keep in sync with bit definitions */
  1330. offsetof(xfs_agi_t, agi_magicnum),
  1331. offsetof(xfs_agi_t, agi_versionnum),
  1332. offsetof(xfs_agi_t, agi_seqno),
  1333. offsetof(xfs_agi_t, agi_length),
  1334. offsetof(xfs_agi_t, agi_count),
  1335. offsetof(xfs_agi_t, agi_root),
  1336. offsetof(xfs_agi_t, agi_level),
  1337. offsetof(xfs_agi_t, agi_freecount),
  1338. offsetof(xfs_agi_t, agi_newino),
  1339. offsetof(xfs_agi_t, agi_dirino),
  1340. offsetof(xfs_agi_t, agi_unlinked),
  1341. sizeof(xfs_agi_t)
  1342. };
  1343. #ifdef DEBUG
  1344. xfs_agi_t *agi; /* allocation group header */
  1345. agi = XFS_BUF_TO_AGI(bp);
  1346. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1347. #endif
  1348. /*
  1349. * Compute byte offsets for the first and last fields.
  1350. */
  1351. xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
  1352. /*
  1353. * Log the allocation group inode header buffer.
  1354. */
  1355. xfs_trans_log_buf(tp, bp, first, last);
  1356. }
  1357. #ifdef DEBUG
  1358. STATIC void
  1359. xfs_check_agi_unlinked(
  1360. struct xfs_agi *agi)
  1361. {
  1362. int i;
  1363. for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
  1364. ASSERT(agi->agi_unlinked[i]);
  1365. }
  1366. #else
  1367. #define xfs_check_agi_unlinked(agi)
  1368. #endif
  1369. /*
  1370. * Read in the allocation group header (inode allocation section)
  1371. */
  1372. int
  1373. xfs_read_agi(
  1374. struct xfs_mount *mp, /* file system mount structure */
  1375. struct xfs_trans *tp, /* transaction pointer */
  1376. xfs_agnumber_t agno, /* allocation group number */
  1377. struct xfs_buf **bpp) /* allocation group hdr buf */
  1378. {
  1379. struct xfs_agi *agi; /* allocation group header */
  1380. int agi_ok; /* agi is consistent */
  1381. int error;
  1382. ASSERT(agno != NULLAGNUMBER);
  1383. error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
  1384. XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
  1385. XFS_FSS_TO_BB(mp, 1), 0, bpp);
  1386. if (error)
  1387. return error;
  1388. ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
  1389. agi = XFS_BUF_TO_AGI(*bpp);
  1390. /*
  1391. * Validate the magic number of the agi block.
  1392. */
  1393. agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
  1394. XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
  1395. be32_to_cpu(agi->agi_seqno) == agno;
  1396. if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
  1397. XFS_RANDOM_IALLOC_READ_AGI))) {
  1398. XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
  1399. mp, agi);
  1400. xfs_trans_brelse(tp, *bpp);
  1401. return XFS_ERROR(EFSCORRUPTED);
  1402. }
  1403. XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
  1404. xfs_check_agi_unlinked(agi);
  1405. return 0;
  1406. }
  1407. int
  1408. xfs_ialloc_read_agi(
  1409. struct xfs_mount *mp, /* file system mount structure */
  1410. struct xfs_trans *tp, /* transaction pointer */
  1411. xfs_agnumber_t agno, /* allocation group number */
  1412. struct xfs_buf **bpp) /* allocation group hdr buf */
  1413. {
  1414. struct xfs_agi *agi; /* allocation group header */
  1415. struct xfs_perag *pag; /* per allocation group data */
  1416. int error;
  1417. error = xfs_read_agi(mp, tp, agno, bpp);
  1418. if (error)
  1419. return error;
  1420. agi = XFS_BUF_TO_AGI(*bpp);
  1421. pag = &mp->m_perag[agno];
  1422. if (!pag->pagi_init) {
  1423. pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
  1424. pag->pagi_count = be32_to_cpu(agi->agi_count);
  1425. pag->pagi_init = 1;
  1426. }
  1427. /*
  1428. * It's possible for these to be out of sync if
  1429. * we are in the middle of a forced shutdown.
  1430. */
  1431. ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
  1432. XFS_FORCED_SHUTDOWN(mp));
  1433. return 0;
  1434. }
  1435. /*
  1436. * Read in the agi to initialise the per-ag data in the mount structure
  1437. */
  1438. int
  1439. xfs_ialloc_pagi_init(
  1440. xfs_mount_t *mp, /* file system mount structure */
  1441. xfs_trans_t *tp, /* transaction pointer */
  1442. xfs_agnumber_t agno) /* allocation group number */
  1443. {
  1444. xfs_buf_t *bp = NULL;
  1445. int error;
  1446. error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
  1447. if (error)
  1448. return error;
  1449. if (bp)
  1450. xfs_trans_brelse(tp, bp);
  1451. return 0;
  1452. }