xfs_inode_item.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_sb.h"
  26. #include "xfs_ag.h"
  27. #include "xfs_mount.h"
  28. #include "xfs_trans_priv.h"
  29. #include "xfs_bmap_btree.h"
  30. #include "xfs_dinode.h"
  31. #include "xfs_inode.h"
  32. #include "xfs_inode_item.h"
  33. #include "xfs_error.h"
  34. #include "xfs_trace.h"
  35. kmem_zone_t *xfs_ili_zone; /* inode log item zone */
  36. static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
  37. {
  38. return container_of(lip, struct xfs_inode_log_item, ili_item);
  39. }
  40. /*
  41. * This returns the number of iovecs needed to log the given inode item.
  42. *
  43. * We need one iovec for the inode log format structure, one for the
  44. * inode core, and possibly one for the inode data/extents/b-tree root
  45. * and one for the inode attribute data/extents/b-tree root.
  46. */
  47. STATIC uint
  48. xfs_inode_item_size(
  49. struct xfs_log_item *lip)
  50. {
  51. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  52. struct xfs_inode *ip = iip->ili_inode;
  53. uint nvecs = 2;
  54. switch (ip->i_d.di_format) {
  55. case XFS_DINODE_FMT_EXTENTS:
  56. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  57. ip->i_d.di_nextents > 0 &&
  58. ip->i_df.if_bytes > 0)
  59. nvecs++;
  60. break;
  61. case XFS_DINODE_FMT_BTREE:
  62. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  63. ip->i_df.if_broot_bytes > 0)
  64. nvecs++;
  65. break;
  66. case XFS_DINODE_FMT_LOCAL:
  67. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  68. ip->i_df.if_bytes > 0)
  69. nvecs++;
  70. break;
  71. case XFS_DINODE_FMT_DEV:
  72. case XFS_DINODE_FMT_UUID:
  73. break;
  74. default:
  75. ASSERT(0);
  76. break;
  77. }
  78. if (!XFS_IFORK_Q(ip))
  79. return nvecs;
  80. /*
  81. * Log any necessary attribute data.
  82. */
  83. switch (ip->i_d.di_aformat) {
  84. case XFS_DINODE_FMT_EXTENTS:
  85. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  86. ip->i_d.di_anextents > 0 &&
  87. ip->i_afp->if_bytes > 0)
  88. nvecs++;
  89. break;
  90. case XFS_DINODE_FMT_BTREE:
  91. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  92. ip->i_afp->if_broot_bytes > 0)
  93. nvecs++;
  94. break;
  95. case XFS_DINODE_FMT_LOCAL:
  96. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  97. ip->i_afp->if_bytes > 0)
  98. nvecs++;
  99. break;
  100. default:
  101. ASSERT(0);
  102. break;
  103. }
  104. return nvecs;
  105. }
  106. /*
  107. * xfs_inode_item_format_extents - convert in-core extents to on-disk form
  108. *
  109. * For either the data or attr fork in extent format, we need to endian convert
  110. * the in-core extent as we place them into the on-disk inode. In this case, we
  111. * need to do this conversion before we write the extents into the log. Because
  112. * we don't have the disk inode to write into here, we allocate a buffer and
  113. * format the extents into it via xfs_iextents_copy(). We free the buffer in
  114. * the unlock routine after the copy for the log has been made.
  115. *
  116. * In the case of the data fork, the in-core and on-disk fork sizes can be
  117. * different due to delayed allocation extents. We only log on-disk extents
  118. * here, so always use the physical fork size to determine the size of the
  119. * buffer we need to allocate.
  120. */
  121. STATIC void
  122. xfs_inode_item_format_extents(
  123. struct xfs_inode *ip,
  124. struct xfs_log_iovec *vecp,
  125. int whichfork,
  126. int type)
  127. {
  128. xfs_bmbt_rec_t *ext_buffer;
  129. ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
  130. if (whichfork == XFS_DATA_FORK)
  131. ip->i_itemp->ili_extents_buf = ext_buffer;
  132. else
  133. ip->i_itemp->ili_aextents_buf = ext_buffer;
  134. vecp->i_addr = ext_buffer;
  135. vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
  136. vecp->i_type = type;
  137. }
  138. /*
  139. * This is called to fill in the vector of log iovecs for the
  140. * given inode log item. It fills the first item with an inode
  141. * log format structure, the second with the on-disk inode structure,
  142. * and a possible third and/or fourth with the inode data/extents/b-tree
  143. * root and inode attributes data/extents/b-tree root.
  144. */
  145. STATIC void
  146. xfs_inode_item_format(
  147. struct xfs_log_item *lip,
  148. struct xfs_log_iovec *vecp)
  149. {
  150. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  151. struct xfs_inode *ip = iip->ili_inode;
  152. uint nvecs;
  153. size_t data_bytes;
  154. xfs_mount_t *mp;
  155. vecp->i_addr = &iip->ili_format;
  156. vecp->i_len = sizeof(xfs_inode_log_format_t);
  157. vecp->i_type = XLOG_REG_TYPE_IFORMAT;
  158. vecp++;
  159. nvecs = 1;
  160. vecp->i_addr = &ip->i_d;
  161. vecp->i_len = sizeof(struct xfs_icdinode);
  162. vecp->i_type = XLOG_REG_TYPE_ICORE;
  163. vecp++;
  164. nvecs++;
  165. /*
  166. * If this is really an old format inode, then we need to
  167. * log it as such. This means that we have to copy the link
  168. * count from the new field to the old. We don't have to worry
  169. * about the new fields, because nothing trusts them as long as
  170. * the old inode version number is there. If the superblock already
  171. * has a new version number, then we don't bother converting back.
  172. */
  173. mp = ip->i_mount;
  174. ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
  175. if (ip->i_d.di_version == 1) {
  176. if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
  177. /*
  178. * Convert it back.
  179. */
  180. ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
  181. ip->i_d.di_onlink = ip->i_d.di_nlink;
  182. } else {
  183. /*
  184. * The superblock version has already been bumped,
  185. * so just make the conversion to the new inode
  186. * format permanent.
  187. */
  188. ip->i_d.di_version = 2;
  189. ip->i_d.di_onlink = 0;
  190. memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  191. }
  192. }
  193. switch (ip->i_d.di_format) {
  194. case XFS_DINODE_FMT_EXTENTS:
  195. iip->ili_fields &=
  196. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  197. XFS_ILOG_DEV | XFS_ILOG_UUID);
  198. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  199. ip->i_d.di_nextents > 0 &&
  200. ip->i_df.if_bytes > 0) {
  201. ASSERT(ip->i_df.if_u1.if_extents != NULL);
  202. ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0);
  203. ASSERT(iip->ili_extents_buf == NULL);
  204. #ifdef XFS_NATIVE_HOST
  205. if (ip->i_d.di_nextents == ip->i_df.if_bytes /
  206. (uint)sizeof(xfs_bmbt_rec_t)) {
  207. /*
  208. * There are no delayed allocation
  209. * extents, so just point to the
  210. * real extents array.
  211. */
  212. vecp->i_addr = ip->i_df.if_u1.if_extents;
  213. vecp->i_len = ip->i_df.if_bytes;
  214. vecp->i_type = XLOG_REG_TYPE_IEXT;
  215. } else
  216. #endif
  217. {
  218. xfs_inode_item_format_extents(ip, vecp,
  219. XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
  220. }
  221. ASSERT(vecp->i_len <= ip->i_df.if_bytes);
  222. iip->ili_format.ilf_dsize = vecp->i_len;
  223. vecp++;
  224. nvecs++;
  225. } else {
  226. iip->ili_fields &= ~XFS_ILOG_DEXT;
  227. }
  228. break;
  229. case XFS_DINODE_FMT_BTREE:
  230. iip->ili_fields &=
  231. ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
  232. XFS_ILOG_DEV | XFS_ILOG_UUID);
  233. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  234. ip->i_df.if_broot_bytes > 0) {
  235. ASSERT(ip->i_df.if_broot != NULL);
  236. vecp->i_addr = ip->i_df.if_broot;
  237. vecp->i_len = ip->i_df.if_broot_bytes;
  238. vecp->i_type = XLOG_REG_TYPE_IBROOT;
  239. vecp++;
  240. nvecs++;
  241. iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
  242. } else {
  243. ASSERT(!(iip->ili_fields &
  244. XFS_ILOG_DBROOT));
  245. #ifdef XFS_TRANS_DEBUG
  246. if (iip->ili_root_size > 0) {
  247. ASSERT(iip->ili_root_size ==
  248. ip->i_df.if_broot_bytes);
  249. ASSERT(memcmp(iip->ili_orig_root,
  250. ip->i_df.if_broot,
  251. iip->ili_root_size) == 0);
  252. } else {
  253. ASSERT(ip->i_df.if_broot_bytes == 0);
  254. }
  255. #endif
  256. iip->ili_fields &= ~XFS_ILOG_DBROOT;
  257. }
  258. break;
  259. case XFS_DINODE_FMT_LOCAL:
  260. iip->ili_fields &=
  261. ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
  262. XFS_ILOG_DEV | XFS_ILOG_UUID);
  263. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  264. ip->i_df.if_bytes > 0) {
  265. ASSERT(ip->i_df.if_u1.if_data != NULL);
  266. ASSERT(ip->i_d.di_size > 0);
  267. vecp->i_addr = ip->i_df.if_u1.if_data;
  268. /*
  269. * Round i_bytes up to a word boundary.
  270. * The underlying memory is guaranteed to
  271. * to be there by xfs_idata_realloc().
  272. */
  273. data_bytes = roundup(ip->i_df.if_bytes, 4);
  274. ASSERT((ip->i_df.if_real_bytes == 0) ||
  275. (ip->i_df.if_real_bytes == data_bytes));
  276. vecp->i_len = (int)data_bytes;
  277. vecp->i_type = XLOG_REG_TYPE_ILOCAL;
  278. vecp++;
  279. nvecs++;
  280. iip->ili_format.ilf_dsize = (unsigned)data_bytes;
  281. } else {
  282. iip->ili_fields &= ~XFS_ILOG_DDATA;
  283. }
  284. break;
  285. case XFS_DINODE_FMT_DEV:
  286. iip->ili_fields &=
  287. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  288. XFS_ILOG_DEXT | XFS_ILOG_UUID);
  289. if (iip->ili_fields & XFS_ILOG_DEV) {
  290. iip->ili_format.ilf_u.ilfu_rdev =
  291. ip->i_df.if_u2.if_rdev;
  292. }
  293. break;
  294. case XFS_DINODE_FMT_UUID:
  295. iip->ili_fields &=
  296. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  297. XFS_ILOG_DEXT | XFS_ILOG_DEV);
  298. if (iip->ili_fields & XFS_ILOG_UUID) {
  299. iip->ili_format.ilf_u.ilfu_uuid =
  300. ip->i_df.if_u2.if_uuid;
  301. }
  302. break;
  303. default:
  304. ASSERT(0);
  305. break;
  306. }
  307. /*
  308. * If there are no attributes associated with the file, then we're done.
  309. */
  310. if (!XFS_IFORK_Q(ip)) {
  311. iip->ili_fields &=
  312. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
  313. goto out;
  314. }
  315. switch (ip->i_d.di_aformat) {
  316. case XFS_DINODE_FMT_EXTENTS:
  317. iip->ili_fields &=
  318. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
  319. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  320. ip->i_d.di_anextents > 0 &&
  321. ip->i_afp->if_bytes > 0) {
  322. ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==
  323. ip->i_d.di_anextents);
  324. ASSERT(ip->i_afp->if_u1.if_extents != NULL);
  325. #ifdef XFS_NATIVE_HOST
  326. /*
  327. * There are not delayed allocation extents
  328. * for attributes, so just point at the array.
  329. */
  330. vecp->i_addr = ip->i_afp->if_u1.if_extents;
  331. vecp->i_len = ip->i_afp->if_bytes;
  332. vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
  333. #else
  334. ASSERT(iip->ili_aextents_buf == NULL);
  335. xfs_inode_item_format_extents(ip, vecp,
  336. XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
  337. #endif
  338. iip->ili_format.ilf_asize = vecp->i_len;
  339. vecp++;
  340. nvecs++;
  341. } else {
  342. iip->ili_fields &= ~XFS_ILOG_AEXT;
  343. }
  344. break;
  345. case XFS_DINODE_FMT_BTREE:
  346. iip->ili_fields &=
  347. ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
  348. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  349. ip->i_afp->if_broot_bytes > 0) {
  350. ASSERT(ip->i_afp->if_broot != NULL);
  351. vecp->i_addr = ip->i_afp->if_broot;
  352. vecp->i_len = ip->i_afp->if_broot_bytes;
  353. vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
  354. vecp++;
  355. nvecs++;
  356. iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
  357. } else {
  358. iip->ili_fields &= ~XFS_ILOG_ABROOT;
  359. }
  360. break;
  361. case XFS_DINODE_FMT_LOCAL:
  362. iip->ili_fields &=
  363. ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
  364. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  365. ip->i_afp->if_bytes > 0) {
  366. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  367. vecp->i_addr = ip->i_afp->if_u1.if_data;
  368. /*
  369. * Round i_bytes up to a word boundary.
  370. * The underlying memory is guaranteed to
  371. * to be there by xfs_idata_realloc().
  372. */
  373. data_bytes = roundup(ip->i_afp->if_bytes, 4);
  374. ASSERT((ip->i_afp->if_real_bytes == 0) ||
  375. (ip->i_afp->if_real_bytes == data_bytes));
  376. vecp->i_len = (int)data_bytes;
  377. vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
  378. vecp++;
  379. nvecs++;
  380. iip->ili_format.ilf_asize = (unsigned)data_bytes;
  381. } else {
  382. iip->ili_fields &= ~XFS_ILOG_ADATA;
  383. }
  384. break;
  385. default:
  386. ASSERT(0);
  387. break;
  388. }
  389. out:
  390. /*
  391. * Now update the log format that goes out to disk from the in-core
  392. * values. We always write the inode core to make the arithmetic
  393. * games in recovery easier, which isn't a big deal as just about any
  394. * transaction would dirty it anyway.
  395. */
  396. iip->ili_format.ilf_fields = XFS_ILOG_CORE |
  397. (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
  398. iip->ili_format.ilf_size = nvecs;
  399. }
  400. /*
  401. * This is called to pin the inode associated with the inode log
  402. * item in memory so it cannot be written out.
  403. */
  404. STATIC void
  405. xfs_inode_item_pin(
  406. struct xfs_log_item *lip)
  407. {
  408. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  409. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  410. trace_xfs_inode_pin(ip, _RET_IP_);
  411. atomic_inc(&ip->i_pincount);
  412. }
  413. /*
  414. * This is called to unpin the inode associated with the inode log
  415. * item which was previously pinned with a call to xfs_inode_item_pin().
  416. *
  417. * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
  418. */
  419. STATIC void
  420. xfs_inode_item_unpin(
  421. struct xfs_log_item *lip,
  422. int remove)
  423. {
  424. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  425. trace_xfs_inode_unpin(ip, _RET_IP_);
  426. ASSERT(atomic_read(&ip->i_pincount) > 0);
  427. if (atomic_dec_and_test(&ip->i_pincount))
  428. wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
  429. }
  430. STATIC uint
  431. xfs_inode_item_push(
  432. struct xfs_log_item *lip,
  433. struct list_head *buffer_list)
  434. {
  435. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  436. struct xfs_inode *ip = iip->ili_inode;
  437. struct xfs_buf *bp = NULL;
  438. uint rval = XFS_ITEM_SUCCESS;
  439. int error;
  440. if (xfs_ipincount(ip) > 0)
  441. return XFS_ITEM_PINNED;
  442. if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
  443. return XFS_ITEM_LOCKED;
  444. /*
  445. * Re-check the pincount now that we stabilized the value by
  446. * taking the ilock.
  447. */
  448. if (xfs_ipincount(ip) > 0) {
  449. rval = XFS_ITEM_PINNED;
  450. goto out_unlock;
  451. }
  452. /*
  453. * Someone else is already flushing the inode. Nothing we can do
  454. * here but wait for the flush to finish and remove the item from
  455. * the AIL.
  456. */
  457. if (!xfs_iflock_nowait(ip)) {
  458. rval = XFS_ITEM_FLUSHING;
  459. goto out_unlock;
  460. }
  461. /*
  462. * Stale inode items should force out the iclog.
  463. */
  464. if (ip->i_flags & XFS_ISTALE) {
  465. xfs_ifunlock(ip);
  466. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  467. return XFS_ITEM_PINNED;
  468. }
  469. ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  470. ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  471. spin_unlock(&lip->li_ailp->xa_lock);
  472. error = xfs_iflush(ip, &bp);
  473. if (!error) {
  474. if (!xfs_buf_delwri_queue(bp, buffer_list))
  475. rval = XFS_ITEM_FLUSHING;
  476. xfs_buf_relse(bp);
  477. }
  478. spin_lock(&lip->li_ailp->xa_lock);
  479. out_unlock:
  480. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  481. return rval;
  482. }
  483. /*
  484. * Unlock the inode associated with the inode log item.
  485. * Clear the fields of the inode and inode log item that
  486. * are specific to the current transaction. If the
  487. * hold flags is set, do not unlock the inode.
  488. */
  489. STATIC void
  490. xfs_inode_item_unlock(
  491. struct xfs_log_item *lip)
  492. {
  493. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  494. struct xfs_inode *ip = iip->ili_inode;
  495. unsigned short lock_flags;
  496. ASSERT(ip->i_itemp != NULL);
  497. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  498. /*
  499. * If the inode needed a separate buffer with which to log
  500. * its extents, then free it now.
  501. */
  502. if (iip->ili_extents_buf != NULL) {
  503. ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
  504. ASSERT(ip->i_d.di_nextents > 0);
  505. ASSERT(iip->ili_fields & XFS_ILOG_DEXT);
  506. ASSERT(ip->i_df.if_bytes > 0);
  507. kmem_free(iip->ili_extents_buf);
  508. iip->ili_extents_buf = NULL;
  509. }
  510. if (iip->ili_aextents_buf != NULL) {
  511. ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
  512. ASSERT(ip->i_d.di_anextents > 0);
  513. ASSERT(iip->ili_fields & XFS_ILOG_AEXT);
  514. ASSERT(ip->i_afp->if_bytes > 0);
  515. kmem_free(iip->ili_aextents_buf);
  516. iip->ili_aextents_buf = NULL;
  517. }
  518. lock_flags = iip->ili_lock_flags;
  519. iip->ili_lock_flags = 0;
  520. if (lock_flags)
  521. xfs_iunlock(ip, lock_flags);
  522. }
  523. /*
  524. * This is called to find out where the oldest active copy of the inode log
  525. * item in the on disk log resides now that the last log write of it completed
  526. * at the given lsn. Since we always re-log all dirty data in an inode, the
  527. * latest copy in the on disk log is the only one that matters. Therefore,
  528. * simply return the given lsn.
  529. *
  530. * If the inode has been marked stale because the cluster is being freed, we
  531. * don't want to (re-)insert this inode into the AIL. There is a race condition
  532. * where the cluster buffer may be unpinned before the inode is inserted into
  533. * the AIL during transaction committed processing. If the buffer is unpinned
  534. * before the inode item has been committed and inserted, then it is possible
  535. * for the buffer to be written and IO completes before the inode is inserted
  536. * into the AIL. In that case, we'd be inserting a clean, stale inode into the
  537. * AIL which will never get removed. It will, however, get reclaimed which
  538. * triggers an assert in xfs_inode_free() complaining about freein an inode
  539. * still in the AIL.
  540. *
  541. * To avoid this, just unpin the inode directly and return a LSN of -1 so the
  542. * transaction committed code knows that it does not need to do any further
  543. * processing on the item.
  544. */
  545. STATIC xfs_lsn_t
  546. xfs_inode_item_committed(
  547. struct xfs_log_item *lip,
  548. xfs_lsn_t lsn)
  549. {
  550. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  551. struct xfs_inode *ip = iip->ili_inode;
  552. if (xfs_iflags_test(ip, XFS_ISTALE)) {
  553. xfs_inode_item_unpin(lip, 0);
  554. return -1;
  555. }
  556. return lsn;
  557. }
  558. /*
  559. * XXX rcc - this one really has to do something. Probably needs
  560. * to stamp in a new field in the incore inode.
  561. */
  562. STATIC void
  563. xfs_inode_item_committing(
  564. struct xfs_log_item *lip,
  565. xfs_lsn_t lsn)
  566. {
  567. INODE_ITEM(lip)->ili_last_lsn = lsn;
  568. }
  569. /*
  570. * This is the ops vector shared by all buf log items.
  571. */
  572. static const struct xfs_item_ops xfs_inode_item_ops = {
  573. .iop_size = xfs_inode_item_size,
  574. .iop_format = xfs_inode_item_format,
  575. .iop_pin = xfs_inode_item_pin,
  576. .iop_unpin = xfs_inode_item_unpin,
  577. .iop_unlock = xfs_inode_item_unlock,
  578. .iop_committed = xfs_inode_item_committed,
  579. .iop_push = xfs_inode_item_push,
  580. .iop_committing = xfs_inode_item_committing
  581. };
  582. /*
  583. * Initialize the inode log item for a newly allocated (in-core) inode.
  584. */
  585. void
  586. xfs_inode_item_init(
  587. struct xfs_inode *ip,
  588. struct xfs_mount *mp)
  589. {
  590. struct xfs_inode_log_item *iip;
  591. ASSERT(ip->i_itemp == NULL);
  592. iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
  593. iip->ili_inode = ip;
  594. xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
  595. &xfs_inode_item_ops);
  596. iip->ili_format.ilf_type = XFS_LI_INODE;
  597. iip->ili_format.ilf_ino = ip->i_ino;
  598. iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
  599. iip->ili_format.ilf_len = ip->i_imap.im_len;
  600. iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
  601. }
  602. /*
  603. * Free the inode log item and any memory hanging off of it.
  604. */
  605. void
  606. xfs_inode_item_destroy(
  607. xfs_inode_t *ip)
  608. {
  609. #ifdef XFS_TRANS_DEBUG
  610. if (ip->i_itemp->ili_root_size != 0) {
  611. kmem_free(ip->i_itemp->ili_orig_root);
  612. }
  613. #endif
  614. kmem_zone_free(xfs_ili_zone, ip->i_itemp);
  615. }
  616. /*
  617. * This is the inode flushing I/O completion routine. It is called
  618. * from interrupt level when the buffer containing the inode is
  619. * flushed to disk. It is responsible for removing the inode item
  620. * from the AIL if it has not been re-logged, and unlocking the inode's
  621. * flush lock.
  622. *
  623. * To reduce AIL lock traffic as much as possible, we scan the buffer log item
  624. * list for other inodes that will run this function. We remove them from the
  625. * buffer list so we can process all the inode IO completions in one AIL lock
  626. * traversal.
  627. */
  628. void
  629. xfs_iflush_done(
  630. struct xfs_buf *bp,
  631. struct xfs_log_item *lip)
  632. {
  633. struct xfs_inode_log_item *iip;
  634. struct xfs_log_item *blip;
  635. struct xfs_log_item *next;
  636. struct xfs_log_item *prev;
  637. struct xfs_ail *ailp = lip->li_ailp;
  638. int need_ail = 0;
  639. /*
  640. * Scan the buffer IO completions for other inodes being completed and
  641. * attach them to the current inode log item.
  642. */
  643. blip = bp->b_fspriv;
  644. prev = NULL;
  645. while (blip != NULL) {
  646. if (lip->li_cb != xfs_iflush_done) {
  647. prev = blip;
  648. blip = blip->li_bio_list;
  649. continue;
  650. }
  651. /* remove from list */
  652. next = blip->li_bio_list;
  653. if (!prev) {
  654. bp->b_fspriv = next;
  655. } else {
  656. prev->li_bio_list = next;
  657. }
  658. /* add to current list */
  659. blip->li_bio_list = lip->li_bio_list;
  660. lip->li_bio_list = blip;
  661. /*
  662. * while we have the item, do the unlocked check for needing
  663. * the AIL lock.
  664. */
  665. iip = INODE_ITEM(blip);
  666. if (iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn)
  667. need_ail++;
  668. blip = next;
  669. }
  670. /* make sure we capture the state of the initial inode. */
  671. iip = INODE_ITEM(lip);
  672. if (iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn)
  673. need_ail++;
  674. /*
  675. * We only want to pull the item from the AIL if it is
  676. * actually there and its location in the log has not
  677. * changed since we started the flush. Thus, we only bother
  678. * if the ili_logged flag is set and the inode's lsn has not
  679. * changed. First we check the lsn outside
  680. * the lock since it's cheaper, and then we recheck while
  681. * holding the lock before removing the inode from the AIL.
  682. */
  683. if (need_ail) {
  684. struct xfs_log_item *log_items[need_ail];
  685. int i = 0;
  686. spin_lock(&ailp->xa_lock);
  687. for (blip = lip; blip; blip = blip->li_bio_list) {
  688. iip = INODE_ITEM(blip);
  689. if (iip->ili_logged &&
  690. blip->li_lsn == iip->ili_flush_lsn) {
  691. log_items[i++] = blip;
  692. }
  693. ASSERT(i <= need_ail);
  694. }
  695. /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
  696. xfs_trans_ail_delete_bulk(ailp, log_items, i,
  697. SHUTDOWN_CORRUPT_INCORE);
  698. }
  699. /*
  700. * clean up and unlock the flush lock now we are done. We can clear the
  701. * ili_last_fields bits now that we know that the data corresponding to
  702. * them is safely on disk.
  703. */
  704. for (blip = lip; blip; blip = next) {
  705. next = blip->li_bio_list;
  706. blip->li_bio_list = NULL;
  707. iip = INODE_ITEM(blip);
  708. iip->ili_logged = 0;
  709. iip->ili_last_fields = 0;
  710. xfs_ifunlock(iip->ili_inode);
  711. }
  712. }
  713. /*
  714. * This is the inode flushing abort routine. It is called from xfs_iflush when
  715. * the filesystem is shutting down to clean up the inode state. It is
  716. * responsible for removing the inode item from the AIL if it has not been
  717. * re-logged, and unlocking the inode's flush lock.
  718. */
  719. void
  720. xfs_iflush_abort(
  721. xfs_inode_t *ip,
  722. bool stale)
  723. {
  724. xfs_inode_log_item_t *iip = ip->i_itemp;
  725. if (iip) {
  726. struct xfs_ail *ailp = iip->ili_item.li_ailp;
  727. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  728. spin_lock(&ailp->xa_lock);
  729. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  730. /* xfs_trans_ail_delete() drops the AIL lock. */
  731. xfs_trans_ail_delete(ailp, &iip->ili_item,
  732. stale ?
  733. SHUTDOWN_LOG_IO_ERROR :
  734. SHUTDOWN_CORRUPT_INCORE);
  735. } else
  736. spin_unlock(&ailp->xa_lock);
  737. }
  738. iip->ili_logged = 0;
  739. /*
  740. * Clear the ili_last_fields bits now that we know that the
  741. * data corresponding to them is safely on disk.
  742. */
  743. iip->ili_last_fields = 0;
  744. /*
  745. * Clear the inode logging fields so no more flushes are
  746. * attempted.
  747. */
  748. iip->ili_fields = 0;
  749. }
  750. /*
  751. * Release the inode's flush lock since we're done with it.
  752. */
  753. xfs_ifunlock(ip);
  754. }
  755. void
  756. xfs_istale_done(
  757. struct xfs_buf *bp,
  758. struct xfs_log_item *lip)
  759. {
  760. xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
  761. }
  762. /*
  763. * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
  764. * (which can have different field alignments) to the native version
  765. */
  766. int
  767. xfs_inode_item_format_convert(
  768. xfs_log_iovec_t *buf,
  769. xfs_inode_log_format_t *in_f)
  770. {
  771. if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) {
  772. xfs_inode_log_format_32_t *in_f32 = buf->i_addr;
  773. in_f->ilf_type = in_f32->ilf_type;
  774. in_f->ilf_size = in_f32->ilf_size;
  775. in_f->ilf_fields = in_f32->ilf_fields;
  776. in_f->ilf_asize = in_f32->ilf_asize;
  777. in_f->ilf_dsize = in_f32->ilf_dsize;
  778. in_f->ilf_ino = in_f32->ilf_ino;
  779. /* copy biggest field of ilf_u */
  780. memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
  781. in_f32->ilf_u.ilfu_uuid.__u_bits,
  782. sizeof(uuid_t));
  783. in_f->ilf_blkno = in_f32->ilf_blkno;
  784. in_f->ilf_len = in_f32->ilf_len;
  785. in_f->ilf_boffset = in_f32->ilf_boffset;
  786. return 0;
  787. } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){
  788. xfs_inode_log_format_64_t *in_f64 = buf->i_addr;
  789. in_f->ilf_type = in_f64->ilf_type;
  790. in_f->ilf_size = in_f64->ilf_size;
  791. in_f->ilf_fields = in_f64->ilf_fields;
  792. in_f->ilf_asize = in_f64->ilf_asize;
  793. in_f->ilf_dsize = in_f64->ilf_dsize;
  794. in_f->ilf_ino = in_f64->ilf_ino;
  795. /* copy biggest field of ilf_u */
  796. memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
  797. in_f64->ilf_u.ilfu_uuid.__u_bits,
  798. sizeof(uuid_t));
  799. in_f->ilf_blkno = in_f64->ilf_blkno;
  800. in_f->ilf_len = in_f64->ilf_len;
  801. in_f->ilf_boffset = in_f64->ilf_boffset;
  802. return 0;
  803. }
  804. return EFSCORRUPTED;
  805. }