xfs_ialloc.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_dir.h"
  28. #include "xfs_dir2.h"
  29. #include "xfs_dmapi.h"
  30. #include "xfs_mount.h"
  31. #include "xfs_bmap_btree.h"
  32. #include "xfs_alloc_btree.h"
  33. #include "xfs_ialloc_btree.h"
  34. #include "xfs_dir_sf.h"
  35. #include "xfs_dir2_sf.h"
  36. #include "xfs_attr_sf.h"
  37. #include "xfs_dinode.h"
  38. #include "xfs_inode.h"
  39. #include "xfs_btree.h"
  40. #include "xfs_ialloc.h"
  41. #include "xfs_alloc.h"
  42. #include "xfs_rtalloc.h"
  43. #include "xfs_error.h"
  44. #include "xfs_bmap.h"
  45. /*
  46. * Log specified fields for the inode given by bp and off.
  47. */
  48. STATIC void
  49. xfs_ialloc_log_di(
  50. xfs_trans_t *tp, /* transaction pointer */
  51. xfs_buf_t *bp, /* inode buffer */
  52. int off, /* index of inode in buffer */
  53. int fields) /* bitmask of fields to log */
  54. {
  55. int first; /* first byte number */
  56. int ioffset; /* off in bytes */
  57. int last; /* last byte number */
  58. xfs_mount_t *mp; /* mount point structure */
  59. static const short offsets[] = { /* field offsets */
  60. /* keep in sync with bits */
  61. offsetof(xfs_dinode_core_t, di_magic),
  62. offsetof(xfs_dinode_core_t, di_mode),
  63. offsetof(xfs_dinode_core_t, di_version),
  64. offsetof(xfs_dinode_core_t, di_format),
  65. offsetof(xfs_dinode_core_t, di_onlink),
  66. offsetof(xfs_dinode_core_t, di_uid),
  67. offsetof(xfs_dinode_core_t, di_gid),
  68. offsetof(xfs_dinode_core_t, di_nlink),
  69. offsetof(xfs_dinode_core_t, di_projid),
  70. offsetof(xfs_dinode_core_t, di_pad),
  71. offsetof(xfs_dinode_core_t, di_atime),
  72. offsetof(xfs_dinode_core_t, di_mtime),
  73. offsetof(xfs_dinode_core_t, di_ctime),
  74. offsetof(xfs_dinode_core_t, di_size),
  75. offsetof(xfs_dinode_core_t, di_nblocks),
  76. offsetof(xfs_dinode_core_t, di_extsize),
  77. offsetof(xfs_dinode_core_t, di_nextents),
  78. offsetof(xfs_dinode_core_t, di_anextents),
  79. offsetof(xfs_dinode_core_t, di_forkoff),
  80. offsetof(xfs_dinode_core_t, di_aformat),
  81. offsetof(xfs_dinode_core_t, di_dmevmask),
  82. offsetof(xfs_dinode_core_t, di_dmstate),
  83. offsetof(xfs_dinode_core_t, di_flags),
  84. offsetof(xfs_dinode_core_t, di_gen),
  85. offsetof(xfs_dinode_t, di_next_unlinked),
  86. offsetof(xfs_dinode_t, di_u),
  87. offsetof(xfs_dinode_t, di_a),
  88. sizeof(xfs_dinode_t)
  89. };
  90. ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
  91. ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
  92. mp = tp->t_mountp;
  93. /*
  94. * Get the inode-relative first and last bytes for these fields
  95. */
  96. xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
  97. /*
  98. * Convert to buffer offsets and log it.
  99. */
  100. ioffset = off << mp->m_sb.sb_inodelog;
  101. first += ioffset;
  102. last += ioffset;
  103. xfs_trans_log_buf(tp, bp, first, last);
  104. }
  105. /*
  106. * Allocation group level functions.
  107. */
  108. /*
  109. * Allocate new inodes in the allocation group specified by agbp.
  110. * Return 0 for success, else error code.
  111. */
  112. STATIC int /* error code or 0 */
  113. xfs_ialloc_ag_alloc(
  114. xfs_trans_t *tp, /* transaction pointer */
  115. xfs_buf_t *agbp, /* alloc group buffer */
  116. int *alloc)
  117. {
  118. xfs_agi_t *agi; /* allocation group header */
  119. xfs_alloc_arg_t args; /* allocation argument structure */
  120. int blks_per_cluster; /* fs blocks per inode cluster */
  121. xfs_btree_cur_t *cur; /* inode btree cursor */
  122. xfs_daddr_t d; /* disk addr of buffer */
  123. int error;
  124. xfs_buf_t *fbuf; /* new free inodes' buffer */
  125. xfs_dinode_t *free; /* new free inode structure */
  126. int i; /* inode counter */
  127. int j; /* block counter */
  128. int nbufs; /* num bufs of new inodes */
  129. xfs_agino_t newino; /* new first inode's number */
  130. xfs_agino_t newlen; /* new number of inodes */
  131. int ninodes; /* num inodes per buf */
  132. xfs_agino_t thisino; /* current inode number, for loop */
  133. int version; /* inode version number to use */
  134. int isaligned; /* inode allocation at stripe unit */
  135. /* boundary */
  136. xfs_dinode_core_t dic; /* a dinode_core to copy to new */
  137. /* inodes */
  138. args.tp = tp;
  139. args.mp = tp->t_mountp;
  140. /*
  141. * Locking will ensure that we don't have two callers in here
  142. * at one time.
  143. */
  144. newlen = XFS_IALLOC_INODES(args.mp);
  145. if (args.mp->m_maxicount &&
  146. args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
  147. return XFS_ERROR(ENOSPC);
  148. args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
  149. /*
  150. * Set the alignment for the allocation.
  151. * If stripe alignment is turned on then align at stripe unit
  152. * boundary.
  153. * If the cluster size is smaller than a filesystem block
  154. * then we're doing I/O for inodes in filesystem block size pieces,
  155. * so don't need alignment anyway.
  156. */
  157. isaligned = 0;
  158. if (args.mp->m_sinoalign) {
  159. ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
  160. args.alignment = args.mp->m_dalign;
  161. isaligned = 1;
  162. } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
  163. args.mp->m_sb.sb_inoalignmt >=
  164. XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
  165. args.alignment = args.mp->m_sb.sb_inoalignmt;
  166. else
  167. args.alignment = 1;
  168. agi = XFS_BUF_TO_AGI(agbp);
  169. /*
  170. * Need to figure out where to allocate the inode blocks.
  171. * Ideally they should be spaced out through the a.g.
  172. * For now, just allocate blocks up front.
  173. */
  174. args.agbno = be32_to_cpu(agi->agi_root);
  175. args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
  176. args.agbno);
  177. /*
  178. * Allocate a fixed-size extent of inodes.
  179. */
  180. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  181. args.mod = args.total = args.wasdel = args.isfl = args.userdata =
  182. args.minalignslop = 0;
  183. args.prod = 1;
  184. /*
  185. * Allow space for the inode btree to split.
  186. */
  187. args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
  188. if ((error = xfs_alloc_vextent(&args)))
  189. return error;
  190. /*
  191. * If stripe alignment is turned on, then try again with cluster
  192. * alignment.
  193. */
  194. if (isaligned && args.fsbno == NULLFSBLOCK) {
  195. args.type = XFS_ALLOCTYPE_NEAR_BNO;
  196. args.agbno = be32_to_cpu(agi->agi_root);
  197. args.fsbno = XFS_AGB_TO_FSB(args.mp,
  198. be32_to_cpu(agi->agi_seqno), args.agbno);
  199. if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
  200. args.mp->m_sb.sb_inoalignmt >=
  201. XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
  202. args.alignment = args.mp->m_sb.sb_inoalignmt;
  203. else
  204. args.alignment = 1;
  205. if ((error = xfs_alloc_vextent(&args)))
  206. return error;
  207. }
  208. if (args.fsbno == NULLFSBLOCK) {
  209. *alloc = 0;
  210. return 0;
  211. }
  212. ASSERT(args.len == args.minlen);
  213. /*
  214. * Convert the results.
  215. */
  216. newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
  217. /*
  218. * Loop over the new block(s), filling in the inodes.
  219. * For small block sizes, manipulate the inodes in buffers
  220. * which are multiples of the blocks size.
  221. */
  222. if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
  223. blks_per_cluster = 1;
  224. nbufs = (int)args.len;
  225. ninodes = args.mp->m_sb.sb_inopblock;
  226. } else {
  227. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
  228. args.mp->m_sb.sb_blocksize;
  229. nbufs = (int)args.len / blks_per_cluster;
  230. ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
  231. }
  232. /*
  233. * Figure out what version number to use in the inodes we create.
  234. * If the superblock version has caught up to the one that supports
  235. * the new inode format, then use the new inode version. Otherwise
  236. * use the old version so that old kernels will continue to be
  237. * able to use the file system.
  238. */
  239. if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
  240. version = XFS_DINODE_VERSION_2;
  241. else
  242. version = XFS_DINODE_VERSION_1;
  243. memset(&dic, 0, sizeof(xfs_dinode_core_t));
  244. INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
  245. INT_SET(dic.di_version, ARCH_CONVERT, version);
  246. for (j = 0; j < nbufs; j++) {
  247. /*
  248. * Get the block.
  249. */
  250. d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
  251. args.agbno + (j * blks_per_cluster));
  252. fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
  253. args.mp->m_bsize * blks_per_cluster,
  254. XFS_BUF_LOCK);
  255. ASSERT(fbuf);
  256. ASSERT(!XFS_BUF_GETERROR(fbuf));
  257. /*
  258. * Loop over the inodes in this buffer.
  259. */
  260. for (i = 0; i < ninodes; i++) {
  261. free = XFS_MAKE_IPTR(args.mp, fbuf, i);
  262. memcpy(&(free->di_core), &dic, sizeof(xfs_dinode_core_t));
  263. INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
  264. xfs_ialloc_log_di(tp, fbuf, i,
  265. XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
  266. }
  267. xfs_trans_inode_alloc_buf(tp, fbuf);
  268. }
  269. be32_add(&agi->agi_count, newlen);
  270. be32_add(&agi->agi_freecount, newlen);
  271. down_read(&args.mp->m_peraglock);
  272. args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
  273. up_read(&args.mp->m_peraglock);
  274. agi->agi_newino = cpu_to_be32(newino);
  275. /*
  276. * Insert records describing the new inode chunk into the btree.
  277. */
  278. cur = xfs_btree_init_cursor(args.mp, tp, agbp,
  279. be32_to_cpu(agi->agi_seqno),
  280. XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
  281. for (thisino = newino;
  282. thisino < newino + newlen;
  283. thisino += XFS_INODES_PER_CHUNK) {
  284. if ((error = xfs_inobt_lookup_eq(cur, thisino,
  285. XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
  286. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  287. return error;
  288. }
  289. ASSERT(i == 0);
  290. if ((error = xfs_inobt_insert(cur, &i))) {
  291. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  292. return error;
  293. }
  294. ASSERT(i == 1);
  295. }
  296. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  297. /*
  298. * Log allocation group header fields
  299. */
  300. xfs_ialloc_log_agi(tp, agbp,
  301. XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
  302. /*
  303. * Modify/log superblock values for inode count and inode free count.
  304. */
  305. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
  306. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
  307. *alloc = 1;
  308. return 0;
  309. }
  310. STATIC __inline xfs_agnumber_t
  311. xfs_ialloc_next_ag(
  312. xfs_mount_t *mp)
  313. {
  314. xfs_agnumber_t agno;
  315. spin_lock(&mp->m_agirotor_lock);
  316. agno = mp->m_agirotor;
  317. if (++mp->m_agirotor == mp->m_maxagi)
  318. mp->m_agirotor = 0;
  319. spin_unlock(&mp->m_agirotor_lock);
  320. return agno;
  321. }
  322. /*
  323. * Select an allocation group to look for a free inode in, based on the parent
  324. * inode and then mode. Return the allocation group buffer.
  325. */
  326. STATIC xfs_buf_t * /* allocation group buffer */
  327. xfs_ialloc_ag_select(
  328. xfs_trans_t *tp, /* transaction pointer */
  329. xfs_ino_t parent, /* parent directory inode number */
  330. mode_t mode, /* bits set to indicate file type */
  331. int okalloc) /* ok to allocate more space */
  332. {
  333. xfs_buf_t *agbp; /* allocation group header buffer */
  334. xfs_agnumber_t agcount; /* number of ag's in the filesystem */
  335. xfs_agnumber_t agno; /* current ag number */
  336. int flags; /* alloc buffer locking flags */
  337. xfs_extlen_t ineed; /* blocks needed for inode allocation */
  338. xfs_extlen_t longest = 0; /* longest extent available */
  339. xfs_mount_t *mp; /* mount point structure */
  340. int needspace; /* file mode implies space allocated */
  341. xfs_perag_t *pag; /* per allocation group data */
  342. xfs_agnumber_t pagno; /* parent (starting) ag number */
  343. /*
  344. * Files of these types need at least one block if length > 0
  345. * (and they won't fit in the inode, but that's hard to figure out).
  346. */
  347. needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
  348. mp = tp->t_mountp;
  349. agcount = mp->m_maxagi;
  350. if (S_ISDIR(mode))
  351. pagno = xfs_ialloc_next_ag(mp);
  352. else {
  353. pagno = XFS_INO_TO_AGNO(mp, parent);
  354. if (pagno >= agcount)
  355. pagno = 0;
  356. }
  357. ASSERT(pagno < agcount);
  358. /*
  359. * Loop through allocation groups, looking for one with a little
  360. * free space in it. Note we don't look for free inodes, exactly.
  361. * Instead, we include whether there is a need to allocate inodes
  362. * to mean that blocks must be allocated for them,
  363. * if none are currently free.
  364. */
  365. agno = pagno;
  366. flags = XFS_ALLOC_FLAG_TRYLOCK;
  367. down_read(&mp->m_peraglock);
  368. for (;;) {
  369. pag = &mp->m_perag[agno];
  370. if (!pag->pagi_init) {
  371. if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  372. agbp = NULL;
  373. goto nextag;
  374. }
  375. } else
  376. agbp = NULL;
  377. if (!pag->pagi_inodeok) {
  378. xfs_ialloc_next_ag(mp);
  379. goto unlock_nextag;
  380. }
  381. /*
  382. * Is there enough free space for the file plus a block
  383. * of inodes (if we need to allocate some)?
  384. */
  385. ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
  386. if (ineed && !pag->pagf_init) {
  387. if (agbp == NULL &&
  388. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  389. agbp = NULL;
  390. goto nextag;
  391. }
  392. (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
  393. }
  394. if (!ineed || pag->pagf_init) {
  395. if (ineed && !(longest = pag->pagf_longest))
  396. longest = pag->pagf_flcount > 0;
  397. if (!ineed ||
  398. (pag->pagf_freeblks >= needspace + ineed &&
  399. longest >= ineed &&
  400. okalloc)) {
  401. if (agbp == NULL &&
  402. xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
  403. agbp = NULL;
  404. goto nextag;
  405. }
  406. up_read(&mp->m_peraglock);
  407. return agbp;
  408. }
  409. }
  410. unlock_nextag:
  411. if (agbp)
  412. xfs_trans_brelse(tp, agbp);
  413. nextag:
  414. /*
  415. * No point in iterating over the rest, if we're shutting
  416. * down.
  417. */
  418. if (XFS_FORCED_SHUTDOWN(mp)) {
  419. up_read(&mp->m_peraglock);
  420. return (xfs_buf_t *)0;
  421. }
  422. agno++;
  423. if (agno >= agcount)
  424. agno = 0;
  425. if (agno == pagno) {
  426. if (flags == 0) {
  427. up_read(&mp->m_peraglock);
  428. return (xfs_buf_t *)0;
  429. }
  430. flags = 0;
  431. }
  432. }
  433. }
  434. /*
  435. * Visible inode allocation functions.
  436. */
  437. /*
  438. * Allocate an inode on disk.
  439. * Mode is used to tell whether the new inode will need space, and whether
  440. * it is a directory.
  441. *
  442. * The arguments IO_agbp and alloc_done are defined to work within
  443. * the constraint of one allocation per transaction.
  444. * xfs_dialloc() is designed to be called twice if it has to do an
  445. * allocation to make more free inodes. On the first call,
  446. * IO_agbp should be set to NULL. If an inode is available,
  447. * i.e., xfs_dialloc() did not need to do an allocation, an inode
  448. * number is returned. In this case, IO_agbp would be set to the
  449. * current ag_buf and alloc_done set to false.
  450. * If an allocation needed to be done, xfs_dialloc would return
  451. * the current ag_buf in IO_agbp and set alloc_done to true.
  452. * The caller should then commit the current transaction, allocate a new
  453. * transaction, and call xfs_dialloc() again, passing in the previous
  454. * value of IO_agbp. IO_agbp should be held across the transactions.
  455. * Since the agbp is locked across the two calls, the second call is
  456. * guaranteed to have a free inode available.
  457. *
  458. * Once we successfully pick an inode its number is returned and the
  459. * on-disk data structures are updated. The inode itself is not read
  460. * in, since doing so would break ordering constraints with xfs_reclaim.
  461. */
  462. int
  463. xfs_dialloc(
  464. xfs_trans_t *tp, /* transaction pointer */
  465. xfs_ino_t parent, /* parent inode (directory) */
  466. mode_t mode, /* mode bits for new inode */
  467. int okalloc, /* ok to allocate more space */
  468. xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
  469. boolean_t *alloc_done, /* true if we needed to replenish
  470. inode freelist */
  471. xfs_ino_t *inop) /* inode number allocated */
  472. {
  473. xfs_agnumber_t agcount; /* number of allocation groups */
  474. xfs_buf_t *agbp; /* allocation group header's buffer */
  475. xfs_agnumber_t agno; /* allocation group number */
  476. xfs_agi_t *agi; /* allocation group header structure */
  477. xfs_btree_cur_t *cur; /* inode allocation btree cursor */
  478. int error; /* error return value */
  479. int i; /* result code */
  480. int ialloced; /* inode allocation status */
  481. int noroom = 0; /* no space for inode blk allocation */
  482. xfs_ino_t ino; /* fs-relative inode to be returned */
  483. /* REFERENCED */
  484. int j; /* result code */
  485. xfs_mount_t *mp; /* file system mount structure */
  486. int offset; /* index of inode in chunk */
  487. xfs_agino_t pagino; /* parent's a.g. relative inode # */
  488. xfs_agnumber_t pagno; /* parent's allocation group number */
  489. xfs_inobt_rec_t rec; /* inode allocation record */
  490. xfs_agnumber_t tagno; /* testing allocation group number */
  491. xfs_btree_cur_t *tcur; /* temp cursor */
  492. xfs_inobt_rec_t trec; /* temp inode allocation record */
  493. if (*IO_agbp == NULL) {
  494. /*
  495. * We do not have an agbp, so select an initial allocation
  496. * group for inode allocation.
  497. */
  498. agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
  499. /*
  500. * Couldn't find an allocation group satisfying the
  501. * criteria, give up.
  502. */
  503. if (!agbp) {
  504. *inop = NULLFSINO;
  505. return 0;
  506. }
  507. agi = XFS_BUF_TO_AGI(agbp);
  508. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  509. } else {
  510. /*
  511. * Continue where we left off before. In this case, we
  512. * know that the allocation group has free inodes.
  513. */
  514. agbp = *IO_agbp;
  515. agi = XFS_BUF_TO_AGI(agbp);
  516. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  517. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  518. }
  519. mp = tp->t_mountp;
  520. agcount = mp->m_sb.sb_agcount;
  521. agno = be32_to_cpu(agi->agi_seqno);
  522. tagno = agno;
  523. pagno = XFS_INO_TO_AGNO(mp, parent);
  524. pagino = XFS_INO_TO_AGINO(mp, parent);
  525. /*
  526. * If we have already hit the ceiling of inode blocks then clear
  527. * okalloc so we scan all available agi structures for a free
  528. * inode.
  529. */
  530. if (mp->m_maxicount &&
  531. mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
  532. noroom = 1;
  533. okalloc = 0;
  534. }
  535. /*
  536. * Loop until we find an allocation group that either has free inodes
  537. * or in which we can allocate some inodes. Iterate through the
  538. * allocation groups upward, wrapping at the end.
  539. */
  540. *alloc_done = B_FALSE;
  541. while (!agi->agi_freecount) {
  542. /*
  543. * Don't do anything if we're not supposed to allocate
  544. * any blocks, just go on to the next ag.
  545. */
  546. if (okalloc) {
  547. /*
  548. * Try to allocate some new inodes in the allocation
  549. * group.
  550. */
  551. if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
  552. xfs_trans_brelse(tp, agbp);
  553. if (error == ENOSPC) {
  554. *inop = NULLFSINO;
  555. return 0;
  556. } else
  557. return error;
  558. }
  559. if (ialloced) {
  560. /*
  561. * We successfully allocated some inodes, return
  562. * the current context to the caller so that it
  563. * can commit the current transaction and call
  564. * us again where we left off.
  565. */
  566. ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
  567. *alloc_done = B_TRUE;
  568. *IO_agbp = agbp;
  569. *inop = NULLFSINO;
  570. return 0;
  571. }
  572. }
  573. /*
  574. * If it failed, give up on this ag.
  575. */
  576. xfs_trans_brelse(tp, agbp);
  577. /*
  578. * Go on to the next ag: get its ag header.
  579. */
  580. nextag:
  581. if (++tagno == agcount)
  582. tagno = 0;
  583. if (tagno == agno) {
  584. *inop = NULLFSINO;
  585. return noroom ? ENOSPC : 0;
  586. }
  587. down_read(&mp->m_peraglock);
  588. if (mp->m_perag[tagno].pagi_inodeok == 0) {
  589. up_read(&mp->m_peraglock);
  590. goto nextag;
  591. }
  592. error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
  593. up_read(&mp->m_peraglock);
  594. if (error)
  595. goto nextag;
  596. agi = XFS_BUF_TO_AGI(agbp);
  597. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  598. }
  599. /*
  600. * Here with an allocation group that has a free inode.
  601. * Reset agno since we may have chosen a new ag in the
  602. * loop above.
  603. */
  604. agno = tagno;
  605. *IO_agbp = NULL;
  606. cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno),
  607. XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
  608. /*
  609. * If pagino is 0 (this is the root inode allocation) use newino.
  610. * This must work because we've just allocated some.
  611. */
  612. if (!pagino)
  613. pagino = be32_to_cpu(agi->agi_newino);
  614. #ifdef DEBUG
  615. if (cur->bc_nlevels == 1) {
  616. int freecount = 0;
  617. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  618. goto error0;
  619. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  620. do {
  621. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  622. &rec.ir_freecount, &rec.ir_free, &i)))
  623. goto error0;
  624. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  625. freecount += rec.ir_freecount;
  626. if ((error = xfs_inobt_increment(cur, 0, &i)))
  627. goto error0;
  628. } while (i == 1);
  629. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  630. XFS_FORCED_SHUTDOWN(mp));
  631. }
  632. #endif
  633. /*
  634. * If in the same a.g. as the parent, try to get near the parent.
  635. */
  636. if (pagno == agno) {
  637. if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
  638. goto error0;
  639. if (i != 0 &&
  640. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  641. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  642. j == 1 &&
  643. rec.ir_freecount > 0) {
  644. /*
  645. * Found a free inode in the same chunk
  646. * as parent, done.
  647. */
  648. }
  649. /*
  650. * In the same a.g. as parent, but parent's chunk is full.
  651. */
  652. else {
  653. int doneleft; /* done, to the left */
  654. int doneright; /* done, to the right */
  655. if (error)
  656. goto error0;
  657. ASSERT(i == 1);
  658. ASSERT(j == 1);
  659. /*
  660. * Duplicate the cursor, search left & right
  661. * simultaneously.
  662. */
  663. if ((error = xfs_btree_dup_cursor(cur, &tcur)))
  664. goto error0;
  665. /*
  666. * Search left with tcur, back up 1 record.
  667. */
  668. if ((error = xfs_inobt_decrement(tcur, 0, &i)))
  669. goto error1;
  670. doneleft = !i;
  671. if (!doneleft) {
  672. if ((error = xfs_inobt_get_rec(tcur,
  673. &trec.ir_startino,
  674. &trec.ir_freecount,
  675. &trec.ir_free, &i)))
  676. goto error1;
  677. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  678. }
  679. /*
  680. * Search right with cur, go forward 1 record.
  681. */
  682. if ((error = xfs_inobt_increment(cur, 0, &i)))
  683. goto error1;
  684. doneright = !i;
  685. if (!doneright) {
  686. if ((error = xfs_inobt_get_rec(cur,
  687. &rec.ir_startino,
  688. &rec.ir_freecount,
  689. &rec.ir_free, &i)))
  690. goto error1;
  691. XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
  692. }
  693. /*
  694. * Loop until we find the closest inode chunk
  695. * with a free one.
  696. */
  697. while (!doneleft || !doneright) {
  698. int useleft; /* using left inode
  699. chunk this time */
  700. /*
  701. * Figure out which block is closer,
  702. * if both are valid.
  703. */
  704. if (!doneleft && !doneright)
  705. useleft =
  706. pagino -
  707. (trec.ir_startino +
  708. XFS_INODES_PER_CHUNK - 1) <
  709. rec.ir_startino - pagino;
  710. else
  711. useleft = !doneleft;
  712. /*
  713. * If checking the left, does it have
  714. * free inodes?
  715. */
  716. if (useleft && trec.ir_freecount) {
  717. /*
  718. * Yes, set it up as the chunk to use.
  719. */
  720. rec = trec;
  721. xfs_btree_del_cursor(cur,
  722. XFS_BTREE_NOERROR);
  723. cur = tcur;
  724. break;
  725. }
  726. /*
  727. * If checking the right, does it have
  728. * free inodes?
  729. */
  730. if (!useleft && rec.ir_freecount) {
  731. /*
  732. * Yes, it's already set up.
  733. */
  734. xfs_btree_del_cursor(tcur,
  735. XFS_BTREE_NOERROR);
  736. break;
  737. }
  738. /*
  739. * If used the left, get another one
  740. * further left.
  741. */
  742. if (useleft) {
  743. if ((error = xfs_inobt_decrement(tcur, 0,
  744. &i)))
  745. goto error1;
  746. doneleft = !i;
  747. if (!doneleft) {
  748. if ((error = xfs_inobt_get_rec(
  749. tcur,
  750. &trec.ir_startino,
  751. &trec.ir_freecount,
  752. &trec.ir_free, &i)))
  753. goto error1;
  754. XFS_WANT_CORRUPTED_GOTO(i == 1,
  755. error1);
  756. }
  757. }
  758. /*
  759. * If used the right, get another one
  760. * further right.
  761. */
  762. else {
  763. if ((error = xfs_inobt_increment(cur, 0,
  764. &i)))
  765. goto error1;
  766. doneright = !i;
  767. if (!doneright) {
  768. if ((error = xfs_inobt_get_rec(
  769. cur,
  770. &rec.ir_startino,
  771. &rec.ir_freecount,
  772. &rec.ir_free, &i)))
  773. goto error1;
  774. XFS_WANT_CORRUPTED_GOTO(i == 1,
  775. error1);
  776. }
  777. }
  778. }
  779. ASSERT(!doneleft || !doneright);
  780. }
  781. }
  782. /*
  783. * In a different a.g. from the parent.
  784. * See if the most recently allocated block has any free.
  785. */
  786. else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
  787. if ((error = xfs_inobt_lookup_eq(cur,
  788. be32_to_cpu(agi->agi_newino), 0, 0, &i)))
  789. goto error0;
  790. if (i == 1 &&
  791. (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  792. &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
  793. j == 1 &&
  794. rec.ir_freecount > 0) {
  795. /*
  796. * The last chunk allocated in the group still has
  797. * a free inode.
  798. */
  799. }
  800. /*
  801. * None left in the last group, search the whole a.g.
  802. */
  803. else {
  804. if (error)
  805. goto error0;
  806. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  807. goto error0;
  808. ASSERT(i == 1);
  809. for (;;) {
  810. if ((error = xfs_inobt_get_rec(cur,
  811. &rec.ir_startino,
  812. &rec.ir_freecount, &rec.ir_free,
  813. &i)))
  814. goto error0;
  815. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  816. if (rec.ir_freecount > 0)
  817. break;
  818. if ((error = xfs_inobt_increment(cur, 0, &i)))
  819. goto error0;
  820. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  821. }
  822. }
  823. }
  824. offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
  825. ASSERT(offset >= 0);
  826. ASSERT(offset < XFS_INODES_PER_CHUNK);
  827. ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
  828. XFS_INODES_PER_CHUNK) == 0);
  829. ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
  830. XFS_INOBT_CLR_FREE(&rec, offset);
  831. rec.ir_freecount--;
  832. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
  833. rec.ir_free)))
  834. goto error0;
  835. be32_add(&agi->agi_freecount, -1);
  836. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  837. down_read(&mp->m_peraglock);
  838. mp->m_perag[tagno].pagi_freecount--;
  839. up_read(&mp->m_peraglock);
  840. #ifdef DEBUG
  841. if (cur->bc_nlevels == 1) {
  842. int freecount = 0;
  843. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  844. goto error0;
  845. do {
  846. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  847. &rec.ir_freecount, &rec.ir_free, &i)))
  848. goto error0;
  849. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  850. freecount += rec.ir_freecount;
  851. if ((error = xfs_inobt_increment(cur, 0, &i)))
  852. goto error0;
  853. } while (i == 1);
  854. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  855. XFS_FORCED_SHUTDOWN(mp));
  856. }
  857. #endif
  858. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  859. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
  860. *inop = ino;
  861. return 0;
  862. error1:
  863. xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
  864. error0:
  865. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  866. return error;
  867. }
  868. /*
  869. * Free disk inode. Carefully avoids touching the incore inode, all
  870. * manipulations incore are the caller's responsibility.
  871. * The on-disk inode is not changed by this operation, only the
  872. * btree (free inode mask) is changed.
  873. */
  874. int
  875. xfs_difree(
  876. xfs_trans_t *tp, /* transaction pointer */
  877. xfs_ino_t inode, /* inode to be freed */
  878. xfs_bmap_free_t *flist, /* extents to free */
  879. int *delete, /* set if inode cluster was deleted */
  880. xfs_ino_t *first_ino) /* first inode in deleted cluster */
  881. {
  882. /* REFERENCED */
  883. xfs_agblock_t agbno; /* block number containing inode */
  884. xfs_buf_t *agbp; /* buffer containing allocation group header */
  885. xfs_agino_t agino; /* inode number relative to allocation group */
  886. xfs_agnumber_t agno; /* allocation group number */
  887. xfs_agi_t *agi; /* allocation group header */
  888. xfs_btree_cur_t *cur; /* inode btree cursor */
  889. int error; /* error return value */
  890. int i; /* result code */
  891. int ilen; /* inodes in an inode cluster */
  892. xfs_mount_t *mp; /* mount structure for filesystem */
  893. int off; /* offset of inode in inode chunk */
  894. xfs_inobt_rec_t rec; /* btree record */
  895. mp = tp->t_mountp;
  896. /*
  897. * Break up inode number into its components.
  898. */
  899. agno = XFS_INO_TO_AGNO(mp, inode);
  900. if (agno >= mp->m_sb.sb_agcount) {
  901. cmn_err(CE_WARN,
  902. "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
  903. agno, mp->m_sb.sb_agcount, mp->m_fsname);
  904. ASSERT(0);
  905. return XFS_ERROR(EINVAL);
  906. }
  907. agino = XFS_INO_TO_AGINO(mp, inode);
  908. if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
  909. cmn_err(CE_WARN,
  910. "xfs_difree: inode != XFS_AGINO_TO_INO() "
  911. "(%llu != %llu) on %s. Returning EINVAL.",
  912. (unsigned long long)inode,
  913. (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
  914. mp->m_fsname);
  915. ASSERT(0);
  916. return XFS_ERROR(EINVAL);
  917. }
  918. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  919. if (agbno >= mp->m_sb.sb_agblocks) {
  920. cmn_err(CE_WARN,
  921. "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
  922. agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
  923. ASSERT(0);
  924. return XFS_ERROR(EINVAL);
  925. }
  926. /*
  927. * Get the allocation group header.
  928. */
  929. down_read(&mp->m_peraglock);
  930. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  931. up_read(&mp->m_peraglock);
  932. if (error) {
  933. cmn_err(CE_WARN,
  934. "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
  935. error, mp->m_fsname);
  936. return error;
  937. }
  938. agi = XFS_BUF_TO_AGI(agbp);
  939. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  940. ASSERT(agbno < be32_to_cpu(agi->agi_length));
  941. /*
  942. * Initialize the cursor.
  943. */
  944. cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
  945. (xfs_inode_t *)0, 0);
  946. #ifdef DEBUG
  947. if (cur->bc_nlevels == 1) {
  948. int freecount = 0;
  949. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  950. goto error0;
  951. do {
  952. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
  953. &rec.ir_freecount, &rec.ir_free, &i)))
  954. goto error0;
  955. if (i) {
  956. freecount += rec.ir_freecount;
  957. if ((error = xfs_inobt_increment(cur, 0, &i)))
  958. goto error0;
  959. }
  960. } while (i == 1);
  961. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  962. XFS_FORCED_SHUTDOWN(mp));
  963. }
  964. #endif
  965. /*
  966. * Look for the entry describing this inode.
  967. */
  968. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  969. cmn_err(CE_WARN,
  970. "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
  971. error, mp->m_fsname);
  972. goto error0;
  973. }
  974. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  975. if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
  976. &rec.ir_free, &i))) {
  977. cmn_err(CE_WARN,
  978. "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
  979. error, mp->m_fsname);
  980. goto error0;
  981. }
  982. XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
  983. /*
  984. * Get the offset in the inode chunk.
  985. */
  986. off = agino - rec.ir_startino;
  987. ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
  988. ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
  989. /*
  990. * Mark the inode free & increment the count.
  991. */
  992. XFS_INOBT_SET_FREE(&rec, off);
  993. rec.ir_freecount++;
  994. /*
  995. * When an inode cluster is free, it becomes elgible for removal
  996. */
  997. if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
  998. (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
  999. *delete = 1;
  1000. *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
  1001. /*
  1002. * Remove the inode cluster from the AGI B+Tree, adjust the
  1003. * AGI and Superblock inode counts, and mark the disk space
  1004. * to be freed when the transaction is committed.
  1005. */
  1006. ilen = XFS_IALLOC_INODES(mp);
  1007. be32_add(&agi->agi_count, -ilen);
  1008. be32_add(&agi->agi_freecount, -(ilen - 1));
  1009. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
  1010. down_read(&mp->m_peraglock);
  1011. mp->m_perag[agno].pagi_freecount -= ilen - 1;
  1012. up_read(&mp->m_peraglock);
  1013. xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
  1014. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
  1015. if ((error = xfs_inobt_delete(cur, &i))) {
  1016. cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
  1017. error, mp->m_fsname);
  1018. goto error0;
  1019. }
  1020. xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
  1021. agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
  1022. XFS_IALLOC_BLOCKS(mp), flist, mp);
  1023. } else {
  1024. *delete = 0;
  1025. if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
  1026. cmn_err(CE_WARN,
  1027. "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
  1028. error, mp->m_fsname);
  1029. goto error0;
  1030. }
  1031. /*
  1032. * Change the inode free counts and log the ag/sb changes.
  1033. */
  1034. be32_add(&agi->agi_freecount, 1);
  1035. xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
  1036. down_read(&mp->m_peraglock);
  1037. mp->m_perag[agno].pagi_freecount++;
  1038. up_read(&mp->m_peraglock);
  1039. xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
  1040. }
  1041. #ifdef DEBUG
  1042. if (cur->bc_nlevels == 1) {
  1043. int freecount = 0;
  1044. if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
  1045. goto error0;
  1046. do {
  1047. if ((error = xfs_inobt_get_rec(cur,
  1048. &rec.ir_startino,
  1049. &rec.ir_freecount,
  1050. &rec.ir_free, &i)))
  1051. goto error0;
  1052. if (i) {
  1053. freecount += rec.ir_freecount;
  1054. if ((error = xfs_inobt_increment(cur, 0, &i)))
  1055. goto error0;
  1056. }
  1057. } while (i == 1);
  1058. ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
  1059. XFS_FORCED_SHUTDOWN(mp));
  1060. }
  1061. #endif
  1062. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1063. return 0;
  1064. error0:
  1065. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1066. return error;
  1067. }
  1068. /*
  1069. * Return the location of the inode in bno/off, for mapping it into a buffer.
  1070. */
  1071. /*ARGSUSED*/
  1072. int
  1073. xfs_dilocate(
  1074. xfs_mount_t *mp, /* file system mount structure */
  1075. xfs_trans_t *tp, /* transaction pointer */
  1076. xfs_ino_t ino, /* inode to locate */
  1077. xfs_fsblock_t *bno, /* output: block containing inode */
  1078. int *len, /* output: num blocks in inode cluster */
  1079. int *off, /* output: index in block of inode */
  1080. uint flags) /* flags concerning inode lookup */
  1081. {
  1082. xfs_agblock_t agbno; /* block number of inode in the alloc group */
  1083. xfs_buf_t *agbp; /* agi buffer */
  1084. xfs_agino_t agino; /* inode number within alloc group */
  1085. xfs_agnumber_t agno; /* allocation group number */
  1086. int blks_per_cluster; /* num blocks per inode cluster */
  1087. xfs_agblock_t chunk_agbno; /* first block in inode chunk */
  1088. xfs_agino_t chunk_agino; /* first agino in inode chunk */
  1089. __int32_t chunk_cnt; /* count of free inodes in chunk */
  1090. xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
  1091. xfs_agblock_t cluster_agbno; /* first block in inode cluster */
  1092. xfs_btree_cur_t *cur; /* inode btree cursor */
  1093. int error; /* error code */
  1094. int i; /* temp state */
  1095. int offset; /* index of inode in its buffer */
  1096. int offset_agbno; /* blks from chunk start to inode */
  1097. ASSERT(ino != NULLFSINO);
  1098. /*
  1099. * Split up the inode number into its parts.
  1100. */
  1101. agno = XFS_INO_TO_AGNO(mp, ino);
  1102. agino = XFS_INO_TO_AGINO(mp, ino);
  1103. agbno = XFS_AGINO_TO_AGBNO(mp, agino);
  1104. if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
  1105. ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1106. #ifdef DEBUG
  1107. if (agno >= mp->m_sb.sb_agcount) {
  1108. xfs_fs_cmn_err(CE_ALERT, mp,
  1109. "xfs_dilocate: agno (%d) >= "
  1110. "mp->m_sb.sb_agcount (%d)",
  1111. agno, mp->m_sb.sb_agcount);
  1112. }
  1113. if (agbno >= mp->m_sb.sb_agblocks) {
  1114. xfs_fs_cmn_err(CE_ALERT, mp,
  1115. "xfs_dilocate: agbno (0x%llx) >= "
  1116. "mp->m_sb.sb_agblocks (0x%lx)",
  1117. (unsigned long long) agbno,
  1118. (unsigned long) mp->m_sb.sb_agblocks);
  1119. }
  1120. if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
  1121. xfs_fs_cmn_err(CE_ALERT, mp,
  1122. "xfs_dilocate: ino (0x%llx) != "
  1123. "XFS_AGINO_TO_INO(mp, agno, agino) "
  1124. "(0x%llx)",
  1125. ino, XFS_AGINO_TO_INO(mp, agno, agino));
  1126. }
  1127. #endif /* DEBUG */
  1128. return XFS_ERROR(EINVAL);
  1129. }
  1130. if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
  1131. !(flags & XFS_IMAP_LOOKUP)) {
  1132. offset = XFS_INO_TO_OFFSET(mp, ino);
  1133. ASSERT(offset < mp->m_sb.sb_inopblock);
  1134. *bno = XFS_AGB_TO_FSB(mp, agno, agbno);
  1135. *off = offset;
  1136. *len = 1;
  1137. return 0;
  1138. }
  1139. blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
  1140. if (*bno != NULLFSBLOCK) {
  1141. offset = XFS_INO_TO_OFFSET(mp, ino);
  1142. ASSERT(offset < mp->m_sb.sb_inopblock);
  1143. cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
  1144. *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1145. offset;
  1146. *len = blks_per_cluster;
  1147. return 0;
  1148. }
  1149. if (mp->m_inoalign_mask) {
  1150. offset_agbno = agbno & mp->m_inoalign_mask;
  1151. chunk_agbno = agbno - offset_agbno;
  1152. } else {
  1153. down_read(&mp->m_peraglock);
  1154. error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
  1155. up_read(&mp->m_peraglock);
  1156. if (error) {
  1157. #ifdef DEBUG
  1158. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1159. "xfs_ialloc_read_agi() returned "
  1160. "error %d, agno %d",
  1161. error, agno);
  1162. #endif /* DEBUG */
  1163. return error;
  1164. }
  1165. cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO,
  1166. (xfs_inode_t *)0, 0);
  1167. if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
  1168. #ifdef DEBUG
  1169. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1170. "xfs_inobt_lookup_le() failed");
  1171. #endif /* DEBUG */
  1172. goto error0;
  1173. }
  1174. if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
  1175. &chunk_free, &i))) {
  1176. #ifdef DEBUG
  1177. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1178. "xfs_inobt_get_rec() failed");
  1179. #endif /* DEBUG */
  1180. goto error0;
  1181. }
  1182. if (i == 0) {
  1183. #ifdef DEBUG
  1184. xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
  1185. "xfs_inobt_get_rec() failed");
  1186. #endif /* DEBUG */
  1187. error = XFS_ERROR(EINVAL);
  1188. }
  1189. xfs_trans_brelse(tp, agbp);
  1190. xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
  1191. if (error)
  1192. return error;
  1193. chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
  1194. offset_agbno = agbno - chunk_agbno;
  1195. }
  1196. ASSERT(agbno >= chunk_agbno);
  1197. cluster_agbno = chunk_agbno +
  1198. ((offset_agbno / blks_per_cluster) * blks_per_cluster);
  1199. offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
  1200. XFS_INO_TO_OFFSET(mp, ino);
  1201. *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
  1202. *off = offset;
  1203. *len = blks_per_cluster;
  1204. return 0;
  1205. error0:
  1206. xfs_trans_brelse(tp, agbp);
  1207. xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
  1208. return error;
  1209. }
  1210. /*
  1211. * Compute and fill in value of m_in_maxlevels.
  1212. */
  1213. void
  1214. xfs_ialloc_compute_maxlevels(
  1215. xfs_mount_t *mp) /* file system mount structure */
  1216. {
  1217. int level;
  1218. uint maxblocks;
  1219. uint maxleafents;
  1220. int minleafrecs;
  1221. int minnoderecs;
  1222. maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
  1223. XFS_INODES_PER_CHUNK_LOG;
  1224. minleafrecs = mp->m_alloc_mnr[0];
  1225. minnoderecs = mp->m_alloc_mnr[1];
  1226. maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
  1227. for (level = 1; maxblocks > 1; level++)
  1228. maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
  1229. mp->m_in_maxlevels = level;
  1230. }
  1231. /*
  1232. * Log specified fields for the ag hdr (inode section)
  1233. */
  1234. void
  1235. xfs_ialloc_log_agi(
  1236. xfs_trans_t *tp, /* transaction pointer */
  1237. xfs_buf_t *bp, /* allocation group header buffer */
  1238. int fields) /* bitmask of fields to log */
  1239. {
  1240. int first; /* first byte number */
  1241. int last; /* last byte number */
  1242. static const short offsets[] = { /* field starting offsets */
  1243. /* keep in sync with bit definitions */
  1244. offsetof(xfs_agi_t, agi_magicnum),
  1245. offsetof(xfs_agi_t, agi_versionnum),
  1246. offsetof(xfs_agi_t, agi_seqno),
  1247. offsetof(xfs_agi_t, agi_length),
  1248. offsetof(xfs_agi_t, agi_count),
  1249. offsetof(xfs_agi_t, agi_root),
  1250. offsetof(xfs_agi_t, agi_level),
  1251. offsetof(xfs_agi_t, agi_freecount),
  1252. offsetof(xfs_agi_t, agi_newino),
  1253. offsetof(xfs_agi_t, agi_dirino),
  1254. offsetof(xfs_agi_t, agi_unlinked),
  1255. sizeof(xfs_agi_t)
  1256. };
  1257. #ifdef DEBUG
  1258. xfs_agi_t *agi; /* allocation group header */
  1259. agi = XFS_BUF_TO_AGI(bp);
  1260. ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
  1261. #endif
  1262. /*
  1263. * Compute byte offsets for the first and last fields.
  1264. */
  1265. xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
  1266. /*
  1267. * Log the allocation group inode header buffer.
  1268. */
  1269. xfs_trans_log_buf(tp, bp, first, last);
  1270. }
  1271. /*
  1272. * Read in the allocation group header (inode allocation section)
  1273. */
  1274. int
  1275. xfs_ialloc_read_agi(
  1276. xfs_mount_t *mp, /* file system mount structure */
  1277. xfs_trans_t *tp, /* transaction pointer */
  1278. xfs_agnumber_t agno, /* allocation group number */
  1279. xfs_buf_t **bpp) /* allocation group hdr buf */
  1280. {
  1281. xfs_agi_t *agi; /* allocation group header */
  1282. int agi_ok; /* agi is consistent */
  1283. xfs_buf_t *bp; /* allocation group hdr buf */
  1284. xfs_perag_t *pag; /* per allocation group data */
  1285. int error;
  1286. ASSERT(agno != NULLAGNUMBER);
  1287. error = xfs_trans_read_buf(
  1288. mp, tp, mp->m_ddev_targp,
  1289. XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
  1290. XFS_FSS_TO_BB(mp, 1), 0, &bp);
  1291. if (error)
  1292. return error;
  1293. ASSERT(bp && !XFS_BUF_GETERROR(bp));
  1294. /*
  1295. * Validate the magic number of the agi block.
  1296. */
  1297. agi = XFS_BUF_TO_AGI(bp);
  1298. agi_ok =
  1299. be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
  1300. XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
  1301. if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
  1302. XFS_RANDOM_IALLOC_READ_AGI))) {
  1303. XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
  1304. mp, agi);
  1305. xfs_trans_brelse(tp, bp);
  1306. return XFS_ERROR(EFSCORRUPTED);
  1307. }
  1308. pag = &mp->m_perag[agno];
  1309. if (!pag->pagi_init) {
  1310. pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
  1311. pag->pagi_init = 1;
  1312. } else {
  1313. /*
  1314. * It's possible for these to be out of sync if
  1315. * we are in the middle of a forced shutdown.
  1316. */
  1317. ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
  1318. XFS_FORCED_SHUTDOWN(mp));
  1319. }
  1320. #ifdef DEBUG
  1321. {
  1322. int i;
  1323. for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
  1324. ASSERT(agi->agi_unlinked[i]);
  1325. }
  1326. #endif
  1327. XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
  1328. *bpp = bp;
  1329. return 0;
  1330. }