xfs_ialloc.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_dir2.h"
  28. #include "xfs_dmapi.h"
  29. #include "xfs_mount.h"
  30. #include "xfs_bmap_btree.h"
  31. #include "xfs_alloc_btree.h"
  32. #include "xfs_ialloc_btree.h"
  33. #include "xfs_dir2_sf.h"
  34. #include "xfs_attr_sf.h"
  35. #include "xfs_dinode.h"
  36. #include "xfs_inode.h"
  37. #include "xfs_btree.h"
  38. #include "xfs_ialloc.h"
  39. #include "xfs_alloc.h"
  40. #include "xfs_rtalloc.h"
  41. #include "xfs_error.h"
  42. #include "xfs_bmap.h"
  43. /*
  44. * Log specified fields for the inode given by bp and off.
  45. */
  46. STATIC void
  47. xfs_ialloc_log_di(
  48. xfs_trans_t *tp, /* transaction pointer */
  49. xfs_buf_t *bp, /* inode buffer */
  50. int off, /* index of inode in buffer */
  51. int fields) /* bitmask of fields to log */
  52. {
  53. int first; /* first byte number */
  54. int ioffset; /* off in bytes */
  55. int last; /* last byte number */
  56. xfs_mount_t *mp; /* mount point structure */
  57. static const short offsets[] = { /* field offsets */
  58. /* keep in sync with bits */
  59. offsetof(xfs_dinode_core_t, di_magic),
  60. offsetof(xfs_dinode_core_t, di_mode),
  61. offsetof(xfs_dinode_core_t, di_version),
  62. offsetof(xfs_dinode_core_t, di_format),
  63. offsetof(xfs_dinode_core_t, di_onlink),
  64. offsetof(xfs_dinode_core_t, di_uid),
  65. offsetof(xfs_dinode_core_t, di_gid),
  66. offsetof(xfs_dinode_core_t, di_nlink),
  67. offsetof(xfs_dinode_core_t, di_projid),
  68. offsetof(xfs_dinode_core_t, di_pad),
  69. offsetof(xfs_dinode_core_t, di_atime),
  70. offsetof(xfs_dinode_core_t, di_mtime),
  71. offsetof(xfs_dinode_core_t, di_ctime),
  72. offsetof(xfs_dinode_core_t, di_size),
  73. offsetof(xfs_dinode_core_t, di_nblocks),
  74. offsetof(xfs_dinode_core_t, di_extsize),
  75. offsetof(xfs_dinode_core_t, di_nextents),
  76. offsetof(xfs_dinode_core_t, di_anextents),
  77. offsetof(xfs_dinode_core_t, di_forkoff),
  78. offsetof(xfs_dinode_core_t, di_aformat),
  79. offsetof(xfs_dinode_core_t, di_dmevmask),
  80. offsetof(xfs_dinode_core_t, di_dmstate),
  81. offsetof(xfs_dinode_core_t, di_flags),
  82. offsetof(xfs_dinode_core_t, di_gen),
  83. offsetof(xfs_dinode_t, di_next_unlinked),
  84. offsetof(xfs_dinode_t, di_u),
  85. offsetof(xfs_dinode_t, di_a),
  86. sizeof(xfs_dinode_t)
  87. };
  88. ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
  89. ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
  90. mp = tp->t_mountp;
  91. /*
  92. * Get the inode-relative first and last bytes for these fields
  93. */
  94. xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
  95. /*
  96. * Convert to buffer offsets and log it.
  97. */
  98. ioffset = off << mp->m_sb.sb_inodelog;
  99. first += ioffset;
  100. last += ioffset;
  101. xfs_trans_log_buf(tp, bp, first, last);
  102. }
  103. /*
  104. * Allocation group level functions.
  105. */
  106. static inline int
  107. xfs_ialloc_cluster_alignment(
  108. xfs_alloc_arg_t *args)
  109. {
  110. if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
  111. args->mp->m_sb.sb_inoalignmt >=
  112. XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
  113. return args->mp->m_sb.sb_inoalignmt;
  114. return 1;
  115. }
  116. /*
  117. * Lookup the record equal to ino in the btree given by cur.
  118. */
  119. STATIC int /* error */
  120. xfs_inobt_lookup_eq(
  121. struct xfs_btree_cur *cur, /* btree cursor */
  122. xfs_agino_t ino, /* starting inode of chunk */
  123. __int32_t fcnt, /* free inode count */
  124. xfs_inofree_t free, /* free inode mask */
  125. int *stat) /* success/failure */
  126. {
  127. cur->bc_rec.i.ir_startino = ino;
  128. cur->bc_rec.i.ir_freecount = fcnt;
  129. cur->bc_rec.i.ir_free = free;
  130. return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  131. }
  132. /*
  133. * Lookup the first record greater than or equal to ino
  134. * in the btree given by cur.
  135. */
  136. int /* error */
  137. xfs_inobt_lookup_ge(
  138. struct xfs_btree_cur *cur, /* btree cursor */
  139. xfs_agino_t ino, /* starting inode of chunk */
  140. __int32_t fcnt, /* free inode count */
  141. xfs_inofree_t free, /* free inode mask */
  142. int *stat) /* success/failure */
  143. {
  144. cur->bc_rec.i.ir_startino = ino;
  145. cur->bc_rec.i.ir_freecount = fcnt;
  146. cur->bc_rec.i.ir_free = free;
  147. return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  148. }
  149. /*
  150. * Lookup the first record less than or equal to ino
  151. * in the btree given by cur.
  152. */
  153. int /* error */
  154. xfs_inobt_lookup_le(
  155. struct xfs_btree_cur *cur, /* btree cursor */
  156. xfs_agino_t ino, /* starting inode of chunk */
  157. __int32_t fcnt, /* free inode count */
  158. xfs_inofree_t free, /* free inode mask */
  159. int *stat) /* success/failure */
  160. {
  161. cur->bc_rec.i.ir_startino = ino;
  162. cur->bc_rec.i.ir_freecount = fcnt;
  163. cur->bc_rec.i.ir_free = free;
  164. return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  165. }
  166. /*
  167. * Update the record referred to by cur to the value given
  168. * by [ino, fcnt, free].
  169. * This either works (return 0) or gets an EFSCORRUPTED error.
  170. */
  171. STATIC int /* error */
  172. xfs_inobt_update(
  173. struct xfs_btree_cur *cur, /* btree cursor */
  174. xfs_agino_t ino, /* starting inode of chunk */
  175. __int32_t fcnt, /* free inode count */
  176. xfs_inofree_t free) /* free inode mask */
  177. {
  178. union xfs_btree_rec rec;
  179. rec.inobt.ir_startino = cpu_to_be32(ino);
  180. rec.inobt.ir_freecount = cpu_to_be32(fcnt);
  181. rec.inobt.ir_free = cpu_to_be64(free);
  182. return xfs_btree_update(cur, &rec);
  183. }
  184. /*
  185. * Allocate new inodes in the allocation group specified by agbp.
  186. * Return 0 for success, else error code.
  187. */
  188. STATIC int /* error code or 0 */
  189. xfs_ialloc_ag_alloc(
  190. xfs_trans_t *tp, /* transaction pointer */
  191. xfs_buf_t *agbp, /* alloc group buffer */
  192. int *alloc)
  193. {
  194. xfs_agi_t *agi; /* allocation group header */
  195. xfs_alloc_arg_t args; /* allocation argument structure */
  196. int blks_per_cluster; /* fs blocks per inode cluster */
  197. xfs_btree_cur_t *cur; /* inode btree cursor */
  198. xfs_daddr_t d; /* disk addr of buffer */
  199. xfs_agnumber_t agno;
  200. int error;
  201. xfs_buf_t *fbuf; /* new free inodes' buffer */
  202. xfs_dinode_t *free; /* new free inode structure */
  203. int i; /* inode counter */
  204. int j; /* block counter */
  205. int nbufs; /* num bufs of new inodes */
  206. xfs_agino_t newino; /* new first inode's number */
  207. xfs_agino_t newlen; /* new number of inodes */
  208. int ninodes; /* num inodes per buf */
  209. xfs_agino_t thisino; /* current inode number, for loop */
  210. int version; /* inode version number to use */
  211. int isaligned = 0; /* inode allocation at stripe unit */
  212. /* boundary */
  213. unsigned int gen;
  214. args.tp = tp;
  215. args.mp = tp->t_mountp;
  216. /*
  217. * Locking will ensure that we don't have two callers in here
  218. * at one time.
  219. */
  220. newlen = XFS_IALLOC_INODES(args.mp);
  221. if (args.mp->m_maxicount &&
  222. args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
  223. return XFS_ERROR(ENOSPC);
  224. args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
  225. /*
  226. * First try to allocate inodes contiguous with the last-allocated
  227. * chunk of inodes. If the filesystem is striped, this will fill
  228. * an entire stripe unit with inodes.
  229. */
  230. agi = XFS_BUF_TO_AGI(agbp);
  231. newino = be32_to_cpu(agi->agi_newino);
  232. args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
  233. XFS_IALLOC_BLOCKS(args.mp);
  234. if (likely(newino != NULLAGINO &&
  235. (args.agbno < be32_to_cpu(agi->agi_length)))) {
  236. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  237. be32_to_cpu(agi->agi_seqno), args.agbno);
  238. args.type = XFS_ALLOCTYPE_THIS_BNO;
  239. args.mod = args.total = args.wasdel = args.isfl =
  240. args.userdata = args.minalignslop = 0;
  241. args.prod = 1;
  242. /*
  243. * We need to take into account alignment here to ensure that
  244. * we don't modify the free list if we fail to have an exact
  245. * block. If we don't have an exact match, and every oher
  246. * attempt allocation attempt fails, we'll end up cancelling
  247. * a dirty transaction and shutting down.
  248. *
  249. * For an exact allocation, alignment must be 1,
  250. * however we need to take cluster alignment into account when
  251. * fixing up the freelist. Use the minalignslop field to
  252. * indicate that extra blocks might be required for alignment,
  253. * but not to use them in the actual exact allocation.
  254. */
  255. args.alignment = 1;
  256. args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
  257. /* Allow space for the inode btree to split. */
  258. args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
  259. if ((error = xfs_alloc_vextent(&args)))
  260. return error;
  261. } else
  262. args.fsbno = NULLFSBLOCK;
  263. if (unlikely(args.fsbno == NULLFSBLOCK)) {
  264. /*
  265. * Set the alignment for the allocation.
  266. * If stripe alignment is turned on then align at stripe unit
  267. * boundary.
  268. * If the cluster size is smaller than a filesystem block
  269. * then we're doing I/O for inodes in filesystem block size
  270. * pieces, so don't need alignment anyway.
  271. */
  272. isaligned = 0;
  273. if (args.mp->m_sinoalign) {
  274. ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
  275. args.alignment = args.mp->m_dalign;
  276. isaligned = 1;
  277. } else
  278. args.alignment = xfs_ialloc_cluster_alignment(&args);
  279. /*
  280. * Need to figure out where to allocate the inode blocks.
  281. * Ideally they should be spaced out through the a.g.
  282. * For now, just allocate blocks up front.
  283. */
  284. args.agbno = be32_to_cpu(agi->agi_root);
  285. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  286. be32_to_cpu(agi->agi_seqno), args.agbno);
  287. /*
  288. * Allocate a fixed-size extent of inodes.
  289. */
  290. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  291. args.mod = args.total = args.wasdel = args.isfl =
  292. args.userdata = args.minalignslop = 0;
  293. args.prod = 1;
  294. /*
  295. * Allow space for the inode btree to split.
  296. */
  297. args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
  298. if ((error = xfs_alloc_vextent(&args)))
  299. return error;
  300. }
  301. /*
  302. * If stripe alignment is turned on, then try again with cluster
  303. * alignment.
  304. */
  305. if (isaligned && args.fsbno == NULLFSBLOCK) {
  306. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  307. args.agbno = be32_to_cpu(agi->agi_root);
  308. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  309. be32_to_cpu(agi->agi_seqno), args.agbno);
  310. args.alignment = xfs_ialloc_cluster_alignment(&args);
  311. if ((error = xfs_alloc_vextent(&args)))
  312. return error;
  313. }
  314. if (args.fsbno == NULLFSBLOCK) {
  315. *alloc = 0;
  316. return 0;
  317. }
  318. ASSERT(args.len == args.minlen);
  319. /*
  320. * Convert the results.
  321. */
  322. newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
  323. /*
  324. * Loop over the new block(s), filling in the inodes.
  325. * For small block sizes, manipulate the inodes in buffers
  326. * which are multiples of the blocks size.
  327. */
  328. if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
  329. blks_per_cluster = 1;
  330. nbufs = (int)args.len;
  331. ninodes = args.mp->m_sb.sb_inopblock;
  332. } else {
  333. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
  334. args.mp->m_sb.sb_blocksize;
  335. nbufs = (int)args.len / blks_per_cluster;
  336. ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
  337. }
  338. /*
  339. * Figure out what version number to use in the inodes we create.
  340. * If the superblock version has caught up to the one that supports
  341. * the new inode format, then use the new inode version. Otherwise
  342. * use the old version so that old kernels will continue to be
  343. * able to use the file system.
  344. */
  345. if (xfs_sb_version_hasnlink(&args.mp->m_sb))
  346. version = XFS_DINODE_VERSION_2;
  347. else
  348. version = XFS_DINODE_VERSION_1;
  349. /*
  350. * Seed the new inode cluster with a random generation number. This
  351. * prevents short-term reuse of generation numbers if a chunk is
  352. * freed and then immediately reallocated. We use random numbers
  353. * rather than a linear progression to prevent the next generation
  354. * number from being easily guessable.
  355. */
  356. gen = random32();
  357. for (j = 0; j < nbufs; j++) {
  358. /*
  359. * Get the block.
  360. */
  361. d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
  362. args.agbno + (j * blks_per_cluster));
  363. fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
  364. args.mp->m_bsize * blks_per_cluster,
  365. XFS_BUF_LOCK);
  366. ASSERT(fbuf);
  367. ASSERT(!XFS_BUF_GETERROR(fbuf));
  368. /*
  369. * Set initial values for the inodes in this buffer.
  370. */
  371. xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
  372. for (i = 0; i < ninodes; i++) {
  373. free = XFS_MAKE_IPTR(args.mp, fbuf, i);
  374. free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
  375. free->di_core.di_version = version;
  376. free->di_core.di_gen = cpu_to_be32(gen);
  377. free->di_next_unlinked = cpu_to_be32(NULLAGINO);
  378. xfs_ialloc_log_di(tp, fbuf, i,
  379. XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
  380. }
  381. xfs_trans_inode_alloc_buf(tp, fbuf);
  382. }
  383. be32_add_cpu(&agi->agi_count, newlen);
  384. be32_add_cpu(&agi->agi_freecount, newlen);
  385. agno = be32_to_cpu(agi->agi_seqno);
  386. down_read(&args.mp->m_peraglock);
  387. args.mp->m_perag[agno].pagi_freecount += newlen;
  388. up_read(&args.mp->m_peraglock);
  389. agi->agi_newino = cpu_to_be32(newino);
  390. /*
  391. * Insert records describing the new inode chunk into the btree.
  392. */
  393. cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
  394. for (thisino = newino;
  395. thisino < newino + newlen;
  396. thisino += XFS_INODES_PER_CHUNK) {
  397. if ((error = xfs_inobt_lookup_eq(cur, thisino,
  398. XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
  399. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  400. return error;
  401. }
  402. ASSERT(i == 0);
  403. if ((error = xfs_btree_insert(cur, &i))) {
  404. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  405. return error;
  406. }
  407. ASSERT(i == 1);
  408. }
  409. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  410. /*
  411. * Log allocation group header fields
  412. */
  413. xfs_ialloc_log_agi(tp, agbp,
  414. XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
  415. /*
  416. * Modify/log superblock values for inode count and inode free count.
  417. */
  418. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
  419. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
  420. *alloc = 1;
  421. return 0;
  422. }
  423. STATIC_INLINE xfs_agnumber_t
  424. xfs_ialloc_next_ag(
  425. xfs_mount_t *mp)
  426. {
  427. xfs_agnumber_t agno;
  428. spin_lock(&mp->m_agirotor_lock);
  429. agno = mp->m_agirotor;
  430. if (++mp->m_agirotor == mp->m_maxagi)
  431. mp->m_agirotor = 0;
  432. spin_unlock(&mp->m_agirotor_lock);
  433. return agno;
  434. }
  435. /*
  436. * Select an allocation group to look for a free inode in, based on the parent
  437. * inode and then mode. Return the allocation group buffer.
  438. */
  439. STATIC xfs_buf_t * /* allocation group buffer */
  440. xfs_ialloc_ag_select(
  441. xfs_trans_t *tp, /* transaction pointer */
  442. xfs_ino_t parent, /* parent directory inode number */
  443. mode_t mode, /* bits set to indicate file type */
  444. int okalloc) /* ok to allocate more space */
  445. {
  446. xfs_buf_t *agbp; /* allocation group header buffer */
  447. xfs_agnumber_t agcount; /* number of ag's in the filesystem */
  448. xfs_agnumber_t agno; /* current ag number */
  449. int flags; /* alloc buffer locking flags */
  450. xfs_extlen_t ineed; /* blocks needed for inode allocation */
  451. xfs_extlen_t longest = 0; /* longest extent available */
  452. xfs_mount_t *mp; /* mount point structure */
  453. int needspace; /* file mode implies space allocated */
  454. xfs_perag_t *pag; /* per allocation group data */
  455. xfs_agnumber_t pagno; /* parent (starting) ag number */
  456. /*
  457. * Files of these types need at least one block if length > 0
  458. * (and they won't fit in the inode, but that's hard to figure out).
  459. */
  460. needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
  461. mp = tp->t_mountp;
  462. agcount = mp->m_maxagi;
  463. if (S_ISDIR(mode))
  464. pagno = xfs_ialloc_next_ag(mp);
  465. else {
  466. pagno = XFS_INO_TO_AGNO(mp, parent);
  467. if (pagno >= agcount)
  468. pagno = 0;
  469. }
  470. ASSERT(pagno < agcount);
  471. /*
  472. * Loop through allocation groups, looking for one with a little
  473. * free space in it. Note we don't look for free inodes, exactly.
  474. * Instead, we include whether there is a need to allocate inodes
  475. * to mean that blocks must be allocated for them,
  476. * if none are currently free.
  477. */
  478. agno = pagno;
  479. flags = XFS_ALLOC_FLAG_TRYLOCK;
  480. down_read(&mp->m_peraglock);
  481. for (;;) {
  482. pag = &mp->m_perag[agno];
  483. if (!pag->pagi_init) {
  484. if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  485. agbp = NULL;
  486. goto nextag;
  487. }
  488. } else
  489. agbp = NULL;
  490. if (!pag->pagi_inodeok) {
  491. xfs_ialloc_next_ag(mp);
  492. goto unlock_nextag;
  493. }
  494. /*
  495. * Is there enough free space for the file plus a block
  496. * of inodes (if we need to allocate some)?
  497. */
  498. ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
  499. if (ineed && !pag->pagf_init) {
  500. if (agbp == NULL &&
  501. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  502. agbp = NULL;
  503. goto nextag;
  504. }
  505. (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
  506. }
  507. if (!ineed || pag->pagf_init) {
  508. if (ineed && !(longest = pag->pagf_longest))
  509. longest = pag->pagf_flcount > 0;
  510. if (!ineed ||
  511. (pag->pagf_freeblks >= needspace + ineed &&
  512. longest >= ineed &&
  513. okalloc)) {
  514. if (agbp == NULL &&
  515. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  516. agbp = NULL;
  517. goto nextag;
  518. }
  519. up_read(&mp->m_peraglock);
  520. return agbp;
  521. }
  522. }
  523. unlock_nextag:
  524. if (agbp)
  525. xfs_trans_brelse(tp, agbp);
  526. nextag:
  527. /*
  528. * No point in iterating over the rest, if we're shutting
  529. * down.
  530. */
  531. if (XFS_FORCED_SHUTDOWN(mp)) {
  532. up_read(&mp->m_peraglock);
  533. return NULL;
  534. }
  535. agno++;
  536. if (agno >= agcount)
  537. agno = 0;
  538. if (agno == pagno) {
  539. if (flags == 0) {
  540. up_read(&mp->m_peraglock);
  541. return NULL;
  542. }
  543. flags = 0;
  544. }
  545. }
  546. }
  547. /*
  548. * Visible inode allocation functions.
  549. */
  550. /*
  551. * Allocate an inode on disk.
  552. * Mode is used to tell whether the new inode will need space, and whether
  553. * it is a directory.
  554. *
  555. * The arguments IO_agbp and alloc_done are defined to work within
  556. * the constraint of one allocation per transaction.
  557. * xfs_dialloc() is designed to be called twice if it has to do an
  558. * allocation to make more free inodes. On the first call,
  559. * IO_agbp should be set to NULL. If an inode is available,
  560. * i.e., xfs_dialloc() did not need to do an allocation, an inode
  561. * number is returned. In this case, IO_agbp would be set to the
  562. * current ag_buf and alloc_done set to false.
  563. * If an allocation needed to be done, xfs_dialloc would return
  564. * the current ag_buf in IO_agbp and set alloc_done to true.
  565. * The caller should then commit the current transaction, allocate a new
  566. * transaction, and call xfs_dialloc() again, passing in the previous
  567. * value of IO_agbp. IO_agbp should be held across the transactions.
  568. * Since the agbp is locked across the two calls, the second call is
  569. * guaranteed to have a free inode available.
  570. *
  571. * Once we successfully pick an inode its number is returned and the
  572. * on-disk data structures are updated. The inode itself is not read
  573. * in, since doing so would break ordering constraints with xfs_reclaim.
  574. */
  575. int
  576. xfs_dialloc(
  577. xfs_trans_t *tp, /* transaction pointer */
  578. xfs_ino_t parent, /* parent inode (directory) */
  579. mode_t mode, /* mode bits for new inode */
  580. int okalloc, /* ok to allocate more space */
  581. xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
  582. boolean_t *alloc_done, /* true if we needed to replenish
  583. inode freelist */
  584. xfs_ino_t *inop) /* inode number allocated */
  585. {
  586. xfs_agnumber_t agcount; /* number of allocation groups */
  587. xfs_buf_t *agbp; /* allocation group header's buffer */
  588. xfs_agnumber_t agno; /* allocation group number */
  589. xfs_agi_t *agi; /* allocation group header structure */
  590. xfs_btree_cur_t *cur; /* inode allocation btree cursor */
  591. int error; /* error return value */
  592. int i; /* result code */
  593. int ialloced; /* inode allocation status */
  594. int noroom = 0; /* no space for inode blk allocation */
  595. xfs_ino_t ino; /* fs-relative inode to be returned */
  596. /* REFERENCED */
  597. int j; /* result code */
  598. xfs_mount_t *mp; /* file system mount structure */
  599. int offset; /* index of inode in chunk */
  600. xfs_agino_t pagino; /* parent's a.g. relative inode # */
  601. xfs_agnumber_t pagno; /* parent's allocation group number */
  602. xfs_inobt_rec_incore_t rec; /* inode allocation record */
  603. xfs_agnumber_t tagno; /* testing allocation group number */
  604. xfs_btree_cur_t *tcur; /* temp cursor */
  605. xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
  606. if (*IO_agbp == NULL) {
  607. /*
  608. * We do not have an agbp, so select an initial allocation
  609. * group for inode allocation.
  610. */
  611. agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
  612. /*
  613. * Couldn't find an allocation group satisfying the
  614. * criteria, give up.
  615. */
  616. if (!agbp) {
  617. *inop = NULLFSINO;
  618. return 0;
  619. }
  620. agi = XFS_BUF_TO_AGI(agbp);
  621. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  622. } else {
  623. /*
  624. * Continue where we left off before. In this case, we
  625. * know that the allocation group has free inodes.
  626. */
  627. agbp = *IO_agbp;
  628. agi = XFS_BUF_TO_AGI(agbp);
  629. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  630. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  631. }
  632. mp = tp->t_mountp;
  633. agcount = mp->m_sb.sb_agcount;
  634. agno = be32_to_cpu(agi->agi_seqno);
  635. tagno = agno;
  636. pagno = XFS_INO_TO_AGNO(mp, parent);
  637. pagino = XFS_INO_TO_AGINO(mp, parent);
  638. /*
  639. * If we have already hit the ceiling of inode blocks then clear
  640. * okalloc so we scan all available agi structures for a free
  641. * inode.
  642. */
  643. if (mp->m_maxicount &&
  644. mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
  645. noroom = 1;
  646. okalloc = 0;
  647. }
  648. /*
  649. * Loop until we find an allocation group that either has free inodes
  650. * or in which we can allocate some inodes. Iterate through the
  651. * allocation groups upward, wrapping at the end.
  652. */
  653. *alloc_done = B_FALSE;
  654. while (!agi->agi_freecount) {
  655. /*
  656. * Don't do anything if we're not supposed to allocate
  657. * any blocks, just go on to the next ag.
  658. */
  659. if (okalloc) {
  660. /*
  661. * Try to allocate some new inodes in the allocation
  662. * group.
  663. */
  664. if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
  665. xfs_trans_brelse(tp, agbp);
  666. if (error == ENOSPC) {
  667. *inop = NULLFSINO;
  668. return 0;
  669. } else
  670. return error;
  671. }
  672. if (ialloced) {
  673. /*
  674. * We successfully allocated some inodes, return
  675. * the current context to the caller so that it
  676. * can commit the current transaction and call
  677. * us again where we left off.
  678. */
  679. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  680. *alloc_done = B_TRUE;
  681. *IO_agbp = agbp;
  682. *inop = NULLFSINO;
  683. return 0;
  684. }
  685. }
  686. /*
  687. * If it failed, give up on this ag.
  688. */
  689. xfs_trans_brelse(tp, agbp);
  690. /*
  691. * Go on to the next ag: get its ag header.
  692. */
  693. nextag:
  694. if (++tagno == agcount)
  695. tagno = 0;
  696. if (tagno == agno) {
  697. *inop = NULLFSINO;
  698. return noroom ? ENOSPC : 0;
  699. }
  700. down_read(&mp->m_peraglock);
  701. if (mp->m_perag[tagno].pagi_inodeok == 0) {
  702. up_read(&mp->m_peraglock);
  703. goto nextag;
  704. }
  705. error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
  706. up_read(&mp->m_peraglock);
  707. if (error)
  708. goto nextag;
  709. agi = XFS_BUF_TO_AGI(agbp);
  710. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  711. }
  712. /*
  713. * Here with an allocation group that has a free inode.
  714. * Reset agno since we may have chosen a new ag in the
  715. * loop above.
  716. */
  717. agno = tagno;
  718. *IO_agbp = NULL;
  719. cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
  720. /*
  721. * If pagino is 0 (this is the root inode allocation) use newino.
  722. * This must work because we've just allocated some.
  723. */
  724. if (!pagino)
  725. pagino = be32_to_cpu(agi->agi_newino);
  726. #ifdef DEBUG
  727. if (cur->bc_nlevels == 1) {
  728. int freecount = 0;
  729. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  730. goto error0;
  731. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  732. do {
  733. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  734. &rec.ir_freecount, &rec.ir_free, &i)))
  735. goto error0;
  736. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  737. freecount += rec.ir_freecount;
  738. if ((error = xfs_btree_increment(cur, 0, &i)))
  739. goto error0;
  740. } while (i == 1);
  741. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  742. XFS_FORCED_SHUTDOWN(mp));
  743. }
  744. #endif
  745. /*
  746. * If in the same a.g. as the parent, try to get near the parent.
  747. */
  748. if (pagno == agno) {
  749. if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
  750. goto error0;
  751. if (i != 0 &&
  752. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  753. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  754. j == 1 &&
  755. rec.ir_freecount > 0) {
  756. /*
  757. * Found a free inode in the same chunk
  758. * as parent, done.
  759. */
  760. }
  761. /*
  762. * In the same a.g. as parent, but parent's chunk is full.
  763. */
  764. else {
  765. int doneleft; /* done, to the left */
  766. int doneright; /* done, to the right */
  767. if (error)
  768. goto error0;
  769. ASSERT(i == 1);
  770. ASSERT(j == 1);
  771. /*
  772. * Duplicate the cursor, search left & right
  773. * simultaneously.
  774. */
  775. if ((error = xfs_btree_dup_cursor(cur, &tcur)))
  776. goto error0;
  777. /*
  778. * Search left with tcur, back up 1 record.
  779. */
  780. if ((error = xfs_btree_decrement(tcur, 0, &i)))
  781. goto error1;
  782. doneleft = !i;
  783. if (!doneleft) {
  784. if ((error = xfs_inobt_get_rec(tcur,
  785. &trec.ir_startino,
  786. &trec.ir_freecount,
  787. &trec.ir_free, &i)))
  788. goto error1;
  789. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  790. }
  791. /*
  792. * Search right with cur, go forward 1 record.
  793. */
  794. if ((error = xfs_btree_increment(cur, 0, &i)))
  795. goto error1;
  796. doneright = !i;
  797. if (!doneright) {
  798. if ((error = xfs_inobt_get_rec(cur,
  799. &rec.ir_startino,
  800. &rec.ir_freecount,
  801. &rec.ir_free, &i)))
  802. goto error1;
  803. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  804. }
  805. /*
  806. * Loop until we find the closest inode chunk
  807. * with a free one.
  808. */
  809. while (!doneleft || !doneright) {
  810. int useleft; /* using left inode
  811. chunk this time */
  812. /*
  813. * Figure out which block is closer,
  814. * if both are valid.
  815. */
  816. if (!doneleft && !doneright)
  817. useleft =
  818. pagino -
  819. (trec.ir_startino +
  820. XFS_INODES_PER_CHUNK - 1) <
  821. rec.ir_startino - pagino;
  822. else
  823. useleft = !doneleft;
  824. /*
  825. * If checking the left, does it have
  826. * free inodes?
  827. */
  828. if (useleft && trec.ir_freecount) {
  829. /*
  830. * Yes, set it up as the chunk to use.
  831. */
  832. rec = trec;
  833. xfs_btree_del_cursor(cur,
  834. XFS_BTREE_NOERROR);
  835. cur = tcur;
  836. break;
  837. }
  838. /*
  839. * If checking the right, does it have
  840. * free inodes?
  841. */
  842. if (!useleft && rec.ir_freecount) {
  843. /*
  844. * Yes, it's already set up.
  845. */
  846. xfs_btree_del_cursor(tcur,
  847. XFS_BTREE_NOERROR);
  848. break;
  849. }
  850. /*
  851. * If used the left, get another one
  852. * further left.
  853. */
  854. if (useleft) {
  855. if ((error = xfs_btree_decrement(tcur, 0,
  856. &i)))
  857. goto error1;
  858. doneleft = !i;
  859. if (!doneleft) {
  860. if ((error = xfs_inobt_get_rec(
  861. tcur,
  862. &trec.ir_startino,
  863. &trec.ir_freecount,
  864. &trec.ir_free, &i)))
  865. goto error1;
  866. XFS_WANT_CORRUPTED_GOTO(i == 1,
  867. error1);
  868. }
  869. }
  870. /*
  871. * If used the right, get another one
  872. * further right.
  873. */
  874. else {
  875. if ((error = xfs_btree_increment(cur, 0,
  876. &i)))
  877. goto error1;
  878. doneright = !i;
  879. if (!doneright) {
  880. if ((error = xfs_inobt_get_rec(
  881. cur,
  882. &rec.ir_startino,
  883. &rec.ir_freecount,
  884. &rec.ir_free, &i)))
  885. goto error1;
  886. XFS_WANT_CORRUPTED_GOTO(i == 1,
  887. error1);
  888. }
  889. }
  890. }
  891. ASSERT(!doneleft || !doneright);
  892. }
  893. }
  894. /*
  895. * In a different a.g. from the parent.
  896. * See if the most recently allocated block has any free.
  897. */
  898. else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
  899. if ((error = xfs_inobt_lookup_eq(cur,
  900. be32_to_cpu(agi->agi_newino), 0, 0, &i)))
  901. goto error0;
  902. if (i == 1 &&
  903. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  904. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  905. j == 1 &&
  906. rec.ir_freecount > 0) {
  907. /*
  908. * The last chunk allocated in the group still has
  909. * a free inode.
  910. */
  911. }
  912. /*
  913. * None left in the last group, search the whole a.g.
  914. */
  915. else {
  916. if (error)
  917. goto error0;
  918. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  919. goto error0;
  920. ASSERT(i == 1);
  921. for (;;) {
  922. if ((error = xfs_inobt_get_rec(cur,
  923. &rec.ir_startino,
  924. &rec.ir_freecount, &rec.ir_free,
  925. &i)))
  926. goto error0;
  927. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  928. if (rec.ir_freecount > 0)
  929. break;
  930. if ((error = xfs_btree_increment(cur, 0, &i)))
  931. goto error0;
  932. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  933. }
  934. }
  935. }
  936. offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
  937. ASSERT(offset >= 0);
  938. ASSERT(offset < XFS_INODES_PER_CHUNK);
  939. ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
  940. XFS_INODES_PER_CHUNK) == 0);
  941. ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
  942. XFS_INOBT_CLR_FREE(&rec, offset);
  943. rec.ir_freecount--;
  944. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
  945. rec.ir_free)))
  946. goto error0;
  947. be32_add_cpu(&agi->agi_freecount, -1);
  948. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  949. down_read(&mp->m_peraglock);
  950. mp->m_perag[tagno].pagi_freecount--;
  951. up_read(&mp->m_peraglock);
  952. #ifdef DEBUG
  953. if (cur->bc_nlevels == 1) {
  954. int freecount = 0;
  955. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  956. goto error0;
  957. do {
  958. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  959. &rec.ir_freecount, &rec.ir_free, &i)))
  960. goto error0;
  961. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  962. freecount += rec.ir_freecount;
  963. if ((error = xfs_btree_increment(cur, 0, &i)))
  964. goto error0;
  965. } while (i == 1);
  966. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  967. XFS_FORCED_SHUTDOWN(mp));
  968. }
  969. #endif
  970. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  971. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
  972. *inop = ino;
  973. return 0;
  974. error1:
  975. xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
  976. error0:
  977. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  978. return error;
  979. }
  980. /*
  981. * Free disk inode. Carefully avoids touching the incore inode, all
  982. * manipulations incore are the caller's responsibility.
  983. * The on-disk inode is not changed by this operation, only the
  984. * btree (free inode mask) is changed.
  985. */
  986. int
  987. xfs_difree(
  988. xfs_trans_t *tp, /* transaction pointer */
  989. xfs_ino_t inode, /* inode to be freed */
  990. xfs_bmap_free_t *flist, /* extents to free */
  991. int *delete, /* set if inode cluster was deleted */
  992. xfs_ino_t *first_ino) /* first inode in deleted cluster */
  993. {
  994. /* REFERENCED */
  995. xfs_agblock_t agbno; /* block number containing inode */
  996. xfs_buf_t *agbp; /* buffer containing allocation group header */
  997. xfs_agino_t agino; /* inode number relative to allocation group */
  998. xfs_agnumber_t agno; /* allocation group number */
  999. xfs_agi_t *agi; /* allocation group header */
  1000. xfs_btree_cur_t *cur; /* inode btree cursor */
  1001. int error; /* error return value */
  1002. int i; /* result code */
  1003. int ilen; /* inodes in an inode cluster */
  1004. xfs_mount_t *mp; /* mount structure for filesystem */
  1005. int off; /* offset of inode in inode chunk */
  1006. xfs_inobt_rec_incore_t rec; /* btree record */
  1007. mp = tp->t_mountp;
  1008. /*
  1009. * Break up inode number into its components.
  1010. */
  1011. agno = XFS_INO_TO_AGNO(mp, inode);
  1012. if (agno >= mp->m_sb.sb_agcount) {
  1013. cmn_err(CE_WARN,
  1014. "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
  1015. agno, mp->m_sb.sb_agcount, mp->m_fsname);
  1016. ASSERT(0);
  1017. return XFS_ERROR(EINVAL);
  1018. }
  1019. agino = XFS_INO_TO_AGINO(mp, inode);
  1020. if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1021. cmn_err(CE_WARN,
  1022. "xfs_difree: inode != XFS_AGINO_TO_INO() "
  1023. "(%llu != %llu) on %s. Returning EINVAL.",
  1024. (unsigned long long)inode,
  1025. (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
  1026. mp->m_fsname);
  1027. ASSERT(0);
  1028. return XFS_ERROR(EINVAL);
  1029. }
  1030. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  1031. if (agbno >= mp->m_sb.sb_agblocks) {
  1032. cmn_err(CE_WARN,
  1033. "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
  1034. agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
  1035. ASSERT(0);
  1036. return XFS_ERROR(EINVAL);
  1037. }
  1038. /*
  1039. * Get the allocation group header.
  1040. */
  1041. down_read(&mp->m_peraglock);
  1042. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1043. up_read(&mp->m_peraglock);
  1044. if (error) {
  1045. cmn_err(CE_WARN,
  1046. "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
  1047. error, mp->m_fsname);
  1048. return error;
  1049. }
  1050. agi = XFS_BUF_TO_AGI(agbp);
  1051. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1052. ASSERT(agbno < be32_to_cpu(agi->agi_length));
  1053. /*
  1054. * Initialize the cursor.
  1055. */
  1056. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1057. #ifdef DEBUG
  1058. if (cur->bc_nlevels == 1) {
  1059. int freecount = 0;
  1060. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1061. goto error0;
  1062. do {
  1063. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  1064. &rec.ir_freecount, &rec.ir_free, &i)))
  1065. goto error0;
  1066. if (i) {
  1067. freecount += rec.ir_freecount;
  1068. if ((error = xfs_btree_increment(cur, 0, &i)))
  1069. goto error0;
  1070. }
  1071. } while (i == 1);
  1072. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1073. XFS_FORCED_SHUTDOWN(mp));
  1074. }
  1075. #endif
  1076. /*
  1077. * Look for the entry describing this inode.
  1078. */
  1079. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  1080. cmn_err(CE_WARN,
  1081. "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
  1082. error, mp->m_fsname);
  1083. goto error0;
  1084. }
  1085. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1086. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
  1087. &rec.ir_free, &i))) {
  1088. cmn_err(CE_WARN,
  1089. "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
  1090. error, mp->m_fsname);
  1091. goto error0;
  1092. }
  1093. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  1094. /*
  1095. * Get the offset in the inode chunk.
  1096. */
  1097. off = agino - rec.ir_startino;
  1098. ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
  1099. ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
  1100. /*
  1101. * Mark the inode free & increment the count.
  1102. */
  1103. XFS_INOBT_SET_FREE(&rec, off);
  1104. rec.ir_freecount++;
  1105. /*
  1106. * When an inode cluster is free, it becomes eligible for removal
  1107. */
  1108. if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
  1109. (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
  1110. *delete = 1;
  1111. *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
  1112. /*
  1113. * Remove the inode cluster from the AGI B+Tree, adjust the
  1114. * AGI and Superblock inode counts, and mark the disk space
  1115. * to be freed when the transaction is committed.
  1116. */
  1117. ilen = XFS_IALLOC_INODES(mp);
  1118. be32_add_cpu(&agi->agi_count, -ilen);
  1119. be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
  1120. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
  1121. down_read(&mp->m_peraglock);
  1122. mp->m_perag[agno].pagi_freecount -= ilen - 1;
  1123. up_read(&mp->m_peraglock);
  1124. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
  1125. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
  1126. if ((error = xfs_inobt_delete(cur, &i))) {
  1127. cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
  1128. error, mp->m_fsname);
  1129. goto error0;
  1130. }
  1131. xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
  1132. agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
  1133. XFS_IALLOC_BLOCKS(mp), flist, mp);
  1134. } else {
  1135. *delete = 0;
  1136. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
  1137. cmn_err(CE_WARN,
  1138. "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
  1139. error, mp->m_fsname);
  1140. goto error0;
  1141. }
  1142. /*
  1143. * Change the inode free counts and log the ag/sb changes.
  1144. */
  1145. be32_add_cpu(&agi->agi_freecount, 1);
  1146. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  1147. down_read(&mp->m_peraglock);
  1148. mp->m_perag[agno].pagi_freecount++;
  1149. up_read(&mp->m_peraglock);
  1150. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
  1151. }
  1152. #ifdef DEBUG
  1153. if (cur->bc_nlevels == 1) {
  1154. int freecount = 0;
  1155. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1156. goto error0;
  1157. do {
  1158. if ((error = xfs_inobt_get_rec(cur,
  1159. &rec.ir_startino,
  1160. &rec.ir_freecount,
  1161. &rec.ir_free, &i)))
  1162. goto error0;
  1163. if (i) {
  1164. freecount += rec.ir_freecount;
  1165. if ((error = xfs_btree_increment(cur, 0, &i)))
  1166. goto error0;
  1167. }
  1168. } while (i == 1);
  1169. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1170. XFS_FORCED_SHUTDOWN(mp));
  1171. }
  1172. #endif
  1173. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1174. return 0;
  1175. error0:
  1176. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1177. return error;
  1178. }
  1179. /*
  1180. * Return the location of the inode in bno/off, for mapping it into a buffer.
  1181. */
  1182. /*ARGSUSED*/
  1183. int
  1184. xfs_dilocate(
  1185. xfs_mount_t *mp, /* file system mount structure */
  1186. xfs_trans_t *tp, /* transaction pointer */
  1187. xfs_ino_t ino, /* inode to locate */
  1188. xfs_fsblock_t *bno, /* output: block containing inode */
  1189. int *len, /* output: num blocks in inode cluster */
  1190. int *off, /* output: index in block of inode */
  1191. uint flags) /* flags concerning inode lookup */
  1192. {
  1193. xfs_agblock_t agbno; /* block number of inode in the alloc group */
  1194. xfs_buf_t *agbp; /* agi buffer */
  1195. xfs_agino_t agino; /* inode number within alloc group */
  1196. xfs_agnumber_t agno; /* allocation group number */
  1197. int blks_per_cluster; /* num blocks per inode cluster */
  1198. xfs_agblock_t chunk_agbno; /* first block in inode chunk */
  1199. xfs_agino_t chunk_agino; /* first agino in inode chunk */
  1200. __int32_t chunk_cnt; /* count of free inodes in chunk */
  1201. xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
  1202. xfs_agblock_t cluster_agbno; /* first block in inode cluster */
  1203. xfs_btree_cur_t *cur; /* inode btree cursor */
  1204. int error; /* error code */
  1205. int i; /* temp state */
  1206. int offset; /* index of inode in its buffer */
  1207. int offset_agbno; /* blks from chunk start to inode */
  1208. ASSERT(ino != NULLFSINO);
  1209. /*
  1210. * Split up the inode number into its parts.
  1211. */
  1212. agno = XFS_INO_TO_AGNO(mp, ino);
  1213. agino = XFS_INO_TO_AGINO(mp, ino);
  1214. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  1215. if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
  1216. ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1217. #ifdef DEBUG
  1218. /* no diagnostics for bulkstat, ino comes from userspace */
  1219. if (flags & XFS_IMAP_BULKSTAT)
  1220. return XFS_ERROR(EINVAL);
  1221. if (agno >= mp->m_sb.sb_agcount) {
  1222. xfs_fs_cmn_err(CE_ALERT, mp,
  1223. "xfs_dilocate: agno (%d) >= "
  1224. "mp->m_sb.sb_agcount (%d)",
  1225. agno, mp->m_sb.sb_agcount);
  1226. }
  1227. if (agbno >= mp->m_sb.sb_agblocks) {
  1228. xfs_fs_cmn_err(CE_ALERT, mp,
  1229. "xfs_dilocate: agbno (0x%llx) >= "
  1230. "mp->m_sb.sb_agblocks (0x%lx)",
  1231. (unsigned long long) agbno,
  1232. (unsigned long) mp->m_sb.sb_agblocks);
  1233. }
  1234. if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1235. xfs_fs_cmn_err(CE_ALERT, mp,
  1236. "xfs_dilocate: ino (0x%llx) != "
  1237. "XFS_AGINO_TO_INO(mp, agno, agino) "
  1238. "(0x%llx)",
  1239. ino, XFS_AGINO_TO_INO(mp, agno, agino));
  1240. }
  1241. xfs_stack_trace();
  1242. #endif /* DEBUG */
  1243. return XFS_ERROR(EINVAL);
  1244. }
  1245. if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
  1246. !(flags & XFS_IMAP_LOOKUP)) {
  1247. offset = XFS_INO_TO_OFFSET(mp, ino);
  1248. ASSERT(offset < mp->m_sb.sb_inopblock);
  1249. *bno = XFS_AGB_TO_FSB(mp, agno, agbno);
  1250. *off = offset;
  1251. *len = 1;
  1252. return 0;
  1253. }
  1254. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
  1255. if (*bno != NULLFSBLOCK) {
  1256. offset = XFS_INO_TO_OFFSET(mp, ino);
  1257. ASSERT(offset < mp->m_sb.sb_inopblock);
  1258. cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
  1259. *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1260. offset;
  1261. *len = blks_per_cluster;
  1262. return 0;
  1263. }
  1264. if (mp->m_inoalign_mask) {
  1265. offset_agbno = agbno & mp->m_inoalign_mask;
  1266. chunk_agbno = agbno - offset_agbno;
  1267. } else {
  1268. down_read(&mp->m_peraglock);
  1269. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1270. up_read(&mp->m_peraglock);
  1271. if (error) {
  1272. #ifdef DEBUG
  1273. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1274. "xfs_ialloc_read_agi() returned "
  1275. "error %d, agno %d",
  1276. error, agno);
  1277. #endif /* DEBUG */
  1278. return error;
  1279. }
  1280. cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
  1281. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  1282. #ifdef DEBUG
  1283. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1284. "xfs_inobt_lookup_le() failed");
  1285. #endif /* DEBUG */
  1286. goto error0;
  1287. }
  1288. if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
  1289. &chunk_free, &i))) {
  1290. #ifdef DEBUG
  1291. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1292. "xfs_inobt_get_rec() failed");
  1293. #endif /* DEBUG */
  1294. goto error0;
  1295. }
  1296. if (i == 0) {
  1297. #ifdef DEBUG
  1298. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1299. "xfs_inobt_get_rec() failed");
  1300. #endif /* DEBUG */
  1301. error = XFS_ERROR(EINVAL);
  1302. }
  1303. xfs_trans_brelse(tp, agbp);
  1304. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1305. if (error)
  1306. return error;
  1307. chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
  1308. offset_agbno = agbno - chunk_agbno;
  1309. }
  1310. ASSERT(agbno >= chunk_agbno);
  1311. cluster_agbno = chunk_agbno +
  1312. ((offset_agbno / blks_per_cluster) * blks_per_cluster);
  1313. offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1314. XFS_INO_TO_OFFSET(mp, ino);
  1315. *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
  1316. *off = offset;
  1317. *len = blks_per_cluster;
  1318. return 0;
  1319. error0:
  1320. xfs_trans_brelse(tp, agbp);
  1321. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1322. return error;
  1323. }
  1324. /*
  1325. * Compute and fill in value of m_in_maxlevels.
  1326. */
  1327. void
  1328. xfs_ialloc_compute_maxlevels(
  1329. xfs_mount_t *mp) /* file system mount structure */
  1330. {
  1331. int level;
  1332. uint maxblocks;
  1333. uint maxleafents;
  1334. int minleafrecs;
  1335. int minnoderecs;
  1336. maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
  1337. XFS_INODES_PER_CHUNK_LOG;
  1338. minleafrecs = mp->m_alloc_mnr[0];
  1339. minnoderecs = mp->m_alloc_mnr[1];
  1340. maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  1341. for (level = 1; maxblocks > 1; level++)
  1342. maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  1343. mp->m_in_maxlevels = level;
  1344. }
  1345. /*
  1346. * Log specified fields for the ag hdr (inode section)
  1347. */
  1348. void
  1349. xfs_ialloc_log_agi(
  1350. xfs_trans_t *tp, /* transaction pointer */
  1351. xfs_buf_t *bp, /* allocation group header buffer */
  1352. int fields) /* bitmask of fields to log */
  1353. {
  1354. int first; /* first byte number */
  1355. int last; /* last byte number */
  1356. static const short offsets[] = { /* field starting offsets */
  1357. /* keep in sync with bit definitions */
  1358. offsetof(xfs_agi_t, agi_magicnum),
  1359. offsetof(xfs_agi_t, agi_versionnum),
  1360. offsetof(xfs_agi_t, agi_seqno),
  1361. offsetof(xfs_agi_t, agi_length),
  1362. offsetof(xfs_agi_t, agi_count),
  1363. offsetof(xfs_agi_t, agi_root),
  1364. offsetof(xfs_agi_t, agi_level),
  1365. offsetof(xfs_agi_t, agi_freecount),
  1366. offsetof(xfs_agi_t, agi_newino),
  1367. offsetof(xfs_agi_t, agi_dirino),
  1368. offsetof(xfs_agi_t, agi_unlinked),
  1369. sizeof(xfs_agi_t)
  1370. };
  1371. #ifdef DEBUG
  1372. xfs_agi_t *agi; /* allocation group header */
  1373. agi = XFS_BUF_TO_AGI(bp);
  1374. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1375. #endif
  1376. /*
  1377. * Compute byte offsets for the first and last fields.
  1378. */
  1379. xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
  1380. /*
  1381. * Log the allocation group inode header buffer.
  1382. */
  1383. xfs_trans_log_buf(tp, bp, first, last);
  1384. }
  1385. /*
  1386. * Read in the allocation group header (inode allocation section)
  1387. */
  1388. int
  1389. xfs_ialloc_read_agi(
  1390. xfs_mount_t *mp, /* file system mount structure */
  1391. xfs_trans_t *tp, /* transaction pointer */
  1392. xfs_agnumber_t agno, /* allocation group number */
  1393. xfs_buf_t **bpp) /* allocation group hdr buf */
  1394. {
  1395. xfs_agi_t *agi; /* allocation group header */
  1396. int agi_ok; /* agi is consistent */
  1397. xfs_buf_t *bp; /* allocation group hdr buf */
  1398. xfs_perag_t *pag; /* per allocation group data */
  1399. int error;
  1400. ASSERT(agno != NULLAGNUMBER);
  1401. error = xfs_trans_read_buf(
  1402. mp, tp, mp->m_ddev_targp,
  1403. XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
  1404. XFS_FSS_TO_BB(mp, 1), 0, &bp);
  1405. if (error)
  1406. return error;
  1407. ASSERT(bp && !XFS_BUF_GETERROR(bp));
  1408. /*
  1409. * Validate the magic number of the agi block.
  1410. */
  1411. agi = XFS_BUF_TO_AGI(bp);
  1412. agi_ok =
  1413. be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
  1414. XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
  1415. if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
  1416. XFS_RANDOM_IALLOC_READ_AGI))) {
  1417. XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
  1418. mp, agi);
  1419. xfs_trans_brelse(tp, bp);
  1420. return XFS_ERROR(EFSCORRUPTED);
  1421. }
  1422. pag = &mp->m_perag[agno];
  1423. if (!pag->pagi_init) {
  1424. pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
  1425. pag->pagi_count = be32_to_cpu(agi->agi_count);
  1426. pag->pagi_init = 1;
  1427. } else {
  1428. /*
  1429. * It's possible for these to be out of sync if
  1430. * we are in the middle of a forced shutdown.
  1431. */
  1432. ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
  1433. XFS_FORCED_SHUTDOWN(mp));
  1434. }
  1435. #ifdef DEBUG
  1436. {
  1437. int i;
  1438. for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
  1439. ASSERT(agi->agi_unlinked[i]);
  1440. }
  1441. #endif
  1442. XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
  1443. *bpp = bp;
  1444. return 0;
  1445. }
  1446. /*
  1447. * Read in the agi to initialise the per-ag data in the mount structure
  1448. */
  1449. int
  1450. xfs_ialloc_pagi_init(
  1451. xfs_mount_t *mp, /* file system mount structure */
  1452. xfs_trans_t *tp, /* transaction pointer */
  1453. xfs_agnumber_t agno) /* allocation group number */
  1454. {
  1455. xfs_buf_t *bp = NULL;
  1456. int error;
  1457. error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
  1458. if (error)
  1459. return error;
  1460. if (bp)
  1461. xfs_trans_brelse(tp, bp);
  1462. return 0;
  1463. }