xfs_inode_item.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. /*
  2. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  3. * All Rights Reserved.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it would be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write the Free Software Foundation,
  16. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "xfs.h"
  19. #include "xfs_fs.h"
  20. #include "xfs_types.h"
  21. #include "xfs_bit.h"
  22. #include "xfs_log.h"
  23. #include "xfs_inum.h"
  24. #include "xfs_trans.h"
  25. #include "xfs_buf_item.h"
  26. #include "xfs_sb.h"
  27. #include "xfs_ag.h"
  28. #include "xfs_dir.h"
  29. #include "xfs_dir2.h"
  30. #include "xfs_dmapi.h"
  31. #include "xfs_mount.h"
  32. #include "xfs_trans_priv.h"
  33. #include "xfs_bmap_btree.h"
  34. #include "xfs_alloc_btree.h"
  35. #include "xfs_ialloc_btree.h"
  36. #include "xfs_dir_sf.h"
  37. #include "xfs_dir2_sf.h"
  38. #include "xfs_attr_sf.h"
  39. #include "xfs_dinode.h"
  40. #include "xfs_inode.h"
  41. #include "xfs_inode_item.h"
  42. #include "xfs_btree.h"
  43. #include "xfs_ialloc.h"
  44. #include "xfs_rw.h"
  45. kmem_zone_t *xfs_ili_zone; /* inode log item zone */
  46. /*
  47. * This returns the number of iovecs needed to log the given inode item.
  48. *
  49. * We need one iovec for the inode log format structure, one for the
  50. * inode core, and possibly one for the inode data/extents/b-tree root
  51. * and one for the inode attribute data/extents/b-tree root.
  52. */
  53. STATIC uint
  54. xfs_inode_item_size(
  55. xfs_inode_log_item_t *iip)
  56. {
  57. uint nvecs;
  58. xfs_inode_t *ip;
  59. ip = iip->ili_inode;
  60. nvecs = 2;
  61. /*
  62. * Only log the data/extents/b-tree root if there is something
  63. * left to log.
  64. */
  65. iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
  66. switch (ip->i_d.di_format) {
  67. case XFS_DINODE_FMT_EXTENTS:
  68. iip->ili_format.ilf_fields &=
  69. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  70. XFS_ILOG_DEV | XFS_ILOG_UUID);
  71. if ((iip->ili_format.ilf_fields & XFS_ILOG_DEXT) &&
  72. (ip->i_d.di_nextents > 0) &&
  73. (ip->i_df.if_bytes > 0)) {
  74. ASSERT(ip->i_df.if_u1.if_extents != NULL);
  75. nvecs++;
  76. } else {
  77. iip->ili_format.ilf_fields &= ~XFS_ILOG_DEXT;
  78. }
  79. break;
  80. case XFS_DINODE_FMT_BTREE:
  81. ASSERT(ip->i_df.if_ext_max ==
  82. XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
  83. iip->ili_format.ilf_fields &=
  84. ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
  85. XFS_ILOG_DEV | XFS_ILOG_UUID);
  86. if ((iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) &&
  87. (ip->i_df.if_broot_bytes > 0)) {
  88. ASSERT(ip->i_df.if_broot != NULL);
  89. nvecs++;
  90. } else {
  91. ASSERT(!(iip->ili_format.ilf_fields &
  92. XFS_ILOG_DBROOT));
  93. #ifdef XFS_TRANS_DEBUG
  94. if (iip->ili_root_size > 0) {
  95. ASSERT(iip->ili_root_size ==
  96. ip->i_df.if_broot_bytes);
  97. ASSERT(memcmp(iip->ili_orig_root,
  98. ip->i_df.if_broot,
  99. iip->ili_root_size) == 0);
  100. } else {
  101. ASSERT(ip->i_df.if_broot_bytes == 0);
  102. }
  103. #endif
  104. iip->ili_format.ilf_fields &= ~XFS_ILOG_DBROOT;
  105. }
  106. break;
  107. case XFS_DINODE_FMT_LOCAL:
  108. iip->ili_format.ilf_fields &=
  109. ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |
  110. XFS_ILOG_DEV | XFS_ILOG_UUID);
  111. if ((iip->ili_format.ilf_fields & XFS_ILOG_DDATA) &&
  112. (ip->i_df.if_bytes > 0)) {
  113. ASSERT(ip->i_df.if_u1.if_data != NULL);
  114. ASSERT(ip->i_d.di_size > 0);
  115. nvecs++;
  116. } else {
  117. iip->ili_format.ilf_fields &= ~XFS_ILOG_DDATA;
  118. }
  119. break;
  120. case XFS_DINODE_FMT_DEV:
  121. iip->ili_format.ilf_fields &=
  122. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  123. XFS_ILOG_DEXT | XFS_ILOG_UUID);
  124. break;
  125. case XFS_DINODE_FMT_UUID:
  126. iip->ili_format.ilf_fields &=
  127. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  128. XFS_ILOG_DEXT | XFS_ILOG_DEV);
  129. break;
  130. default:
  131. ASSERT(0);
  132. break;
  133. }
  134. /*
  135. * If there are no attributes associated with this file,
  136. * then there cannot be anything more to log.
  137. * Clear all attribute-related log flags.
  138. */
  139. if (!XFS_IFORK_Q(ip)) {
  140. iip->ili_format.ilf_fields &=
  141. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
  142. return nvecs;
  143. }
  144. /*
  145. * Log any necessary attribute data.
  146. */
  147. switch (ip->i_d.di_aformat) {
  148. case XFS_DINODE_FMT_EXTENTS:
  149. iip->ili_format.ilf_fields &=
  150. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
  151. if ((iip->ili_format.ilf_fields & XFS_ILOG_AEXT) &&
  152. (ip->i_d.di_anextents > 0) &&
  153. (ip->i_afp->if_bytes > 0)) {
  154. ASSERT(ip->i_afp->if_u1.if_extents != NULL);
  155. nvecs++;
  156. } else {
  157. iip->ili_format.ilf_fields &= ~XFS_ILOG_AEXT;
  158. }
  159. break;
  160. case XFS_DINODE_FMT_BTREE:
  161. iip->ili_format.ilf_fields &=
  162. ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
  163. if ((iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) &&
  164. (ip->i_afp->if_broot_bytes > 0)) {
  165. ASSERT(ip->i_afp->if_broot != NULL);
  166. nvecs++;
  167. } else {
  168. iip->ili_format.ilf_fields &= ~XFS_ILOG_ABROOT;
  169. }
  170. break;
  171. case XFS_DINODE_FMT_LOCAL:
  172. iip->ili_format.ilf_fields &=
  173. ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
  174. if ((iip->ili_format.ilf_fields & XFS_ILOG_ADATA) &&
  175. (ip->i_afp->if_bytes > 0)) {
  176. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  177. nvecs++;
  178. } else {
  179. iip->ili_format.ilf_fields &= ~XFS_ILOG_ADATA;
  180. }
  181. break;
  182. default:
  183. ASSERT(0);
  184. break;
  185. }
  186. return nvecs;
  187. }
  188. /*
  189. * This is called to fill in the vector of log iovecs for the
  190. * given inode log item. It fills the first item with an inode
  191. * log format structure, the second with the on-disk inode structure,
  192. * and a possible third and/or fourth with the inode data/extents/b-tree
  193. * root and inode attributes data/extents/b-tree root.
  194. */
  195. STATIC void
  196. xfs_inode_item_format(
  197. xfs_inode_log_item_t *iip,
  198. xfs_log_iovec_t *log_vector)
  199. {
  200. uint nvecs;
  201. xfs_log_iovec_t *vecp;
  202. xfs_inode_t *ip;
  203. size_t data_bytes;
  204. xfs_bmbt_rec_t *ext_buffer;
  205. int nrecs;
  206. xfs_mount_t *mp;
  207. ip = iip->ili_inode;
  208. vecp = log_vector;
  209. vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
  210. vecp->i_len = sizeof(xfs_inode_log_format_t);
  211. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
  212. vecp++;
  213. nvecs = 1;
  214. /*
  215. * Clear i_update_core if the timestamps (or any other
  216. * non-transactional modification) need flushing/logging
  217. * and we're about to log them with the rest of the core.
  218. *
  219. * This is the same logic as xfs_iflush() but this code can't
  220. * run at the same time as xfs_iflush because we're in commit
  221. * processing here and so we have the inode lock held in
  222. * exclusive mode. Although it doesn't really matter
  223. * for the timestamps if both routines were to grab the
  224. * timestamps or not. That would be ok.
  225. *
  226. * We clear i_update_core before copying out the data.
  227. * This is for coordination with our timestamp updates
  228. * that don't hold the inode lock. They will always
  229. * update the timestamps BEFORE setting i_update_core,
  230. * so if we clear i_update_core after they set it we
  231. * are guaranteed to see their updates to the timestamps
  232. * either here. Likewise, if they set it after we clear it
  233. * here, we'll see it either on the next commit of this
  234. * inode or the next time the inode gets flushed via
  235. * xfs_iflush(). This depends on strongly ordered memory
  236. * semantics, but we have that. We use the SYNCHRONIZE
  237. * macro to make sure that the compiler does not reorder
  238. * the i_update_core access below the data copy below.
  239. */
  240. if (ip->i_update_core) {
  241. ip->i_update_core = 0;
  242. SYNCHRONIZE();
  243. }
  244. /*
  245. * We don't have to worry about re-ordering here because
  246. * the update_size field is protected by the inode lock
  247. * and we have that held in exclusive mode.
  248. */
  249. if (ip->i_update_size)
  250. ip->i_update_size = 0;
  251. vecp->i_addr = (xfs_caddr_t)&ip->i_d;
  252. vecp->i_len = sizeof(xfs_dinode_core_t);
  253. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
  254. vecp++;
  255. nvecs++;
  256. iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
  257. /*
  258. * If this is really an old format inode, then we need to
  259. * log it as such. This means that we have to copy the link
  260. * count from the new field to the old. We don't have to worry
  261. * about the new fields, because nothing trusts them as long as
  262. * the old inode version number is there. If the superblock already
  263. * has a new version number, then we don't bother converting back.
  264. */
  265. mp = ip->i_mount;
  266. ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
  267. XFS_SB_VERSION_HASNLINK(&mp->m_sb));
  268. if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
  269. if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
  270. /*
  271. * Convert it back.
  272. */
  273. ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
  274. ip->i_d.di_onlink = ip->i_d.di_nlink;
  275. } else {
  276. /*
  277. * The superblock version has already been bumped,
  278. * so just make the conversion to the new inode
  279. * format permanent.
  280. */
  281. ip->i_d.di_version = XFS_DINODE_VERSION_2;
  282. ip->i_d.di_onlink = 0;
  283. memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  284. }
  285. }
  286. switch (ip->i_d.di_format) {
  287. case XFS_DINODE_FMT_EXTENTS:
  288. ASSERT(!(iip->ili_format.ilf_fields &
  289. (XFS_ILOG_DDATA | XFS_ILOG_DBROOT |
  290. XFS_ILOG_DEV | XFS_ILOG_UUID)));
  291. if (iip->ili_format.ilf_fields & XFS_ILOG_DEXT) {
  292. ASSERT(ip->i_df.if_bytes > 0);
  293. ASSERT(ip->i_df.if_u1.if_extents != NULL);
  294. ASSERT(ip->i_d.di_nextents > 0);
  295. ASSERT(iip->ili_extents_buf == NULL);
  296. nrecs = ip->i_df.if_bytes /
  297. (uint)sizeof(xfs_bmbt_rec_t);
  298. ASSERT(nrecs > 0);
  299. #ifdef XFS_NATIVE_HOST
  300. if (nrecs == ip->i_d.di_nextents) {
  301. /*
  302. * There are no delayed allocation
  303. * extents, so just point to the
  304. * real extents array.
  305. */
  306. vecp->i_addr =
  307. (char *)(ip->i_df.if_u1.if_extents);
  308. vecp->i_len = ip->i_df.if_bytes;
  309. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
  310. } else
  311. #endif
  312. {
  313. /*
  314. * There are delayed allocation extents
  315. * in the inode, or we need to convert
  316. * the extents to on disk format.
  317. * Use xfs_iextents_copy()
  318. * to copy only the real extents into
  319. * a separate buffer. We'll free the
  320. * buffer in the unlock routine.
  321. */
  322. ext_buffer = kmem_alloc(ip->i_df.if_bytes,
  323. KM_SLEEP);
  324. iip->ili_extents_buf = ext_buffer;
  325. vecp->i_addr = (xfs_caddr_t)ext_buffer;
  326. vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
  327. XFS_DATA_FORK);
  328. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
  329. }
  330. ASSERT(vecp->i_len <= ip->i_df.if_bytes);
  331. iip->ili_format.ilf_dsize = vecp->i_len;
  332. vecp++;
  333. nvecs++;
  334. }
  335. break;
  336. case XFS_DINODE_FMT_BTREE:
  337. ASSERT(!(iip->ili_format.ilf_fields &
  338. (XFS_ILOG_DDATA | XFS_ILOG_DEXT |
  339. XFS_ILOG_DEV | XFS_ILOG_UUID)));
  340. if (iip->ili_format.ilf_fields & XFS_ILOG_DBROOT) {
  341. ASSERT(ip->i_df.if_broot_bytes > 0);
  342. ASSERT(ip->i_df.if_broot != NULL);
  343. vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
  344. vecp->i_len = ip->i_df.if_broot_bytes;
  345. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
  346. vecp++;
  347. nvecs++;
  348. iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
  349. }
  350. break;
  351. case XFS_DINODE_FMT_LOCAL:
  352. ASSERT(!(iip->ili_format.ilf_fields &
  353. (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
  354. XFS_ILOG_DEV | XFS_ILOG_UUID)));
  355. if (iip->ili_format.ilf_fields & XFS_ILOG_DDATA) {
  356. ASSERT(ip->i_df.if_bytes > 0);
  357. ASSERT(ip->i_df.if_u1.if_data != NULL);
  358. ASSERT(ip->i_d.di_size > 0);
  359. vecp->i_addr = (xfs_caddr_t)ip->i_df.if_u1.if_data;
  360. /*
  361. * Round i_bytes up to a word boundary.
  362. * The underlying memory is guaranteed to
  363. * to be there by xfs_idata_realloc().
  364. */
  365. data_bytes = roundup(ip->i_df.if_bytes, 4);
  366. ASSERT((ip->i_df.if_real_bytes == 0) ||
  367. (ip->i_df.if_real_bytes == data_bytes));
  368. vecp->i_len = (int)data_bytes;
  369. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
  370. vecp++;
  371. nvecs++;
  372. iip->ili_format.ilf_dsize = (unsigned)data_bytes;
  373. }
  374. break;
  375. case XFS_DINODE_FMT_DEV:
  376. ASSERT(!(iip->ili_format.ilf_fields &
  377. (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
  378. XFS_ILOG_DDATA | XFS_ILOG_UUID)));
  379. if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
  380. iip->ili_format.ilf_u.ilfu_rdev =
  381. ip->i_df.if_u2.if_rdev;
  382. }
  383. break;
  384. case XFS_DINODE_FMT_UUID:
  385. ASSERT(!(iip->ili_format.ilf_fields &
  386. (XFS_ILOG_DBROOT | XFS_ILOG_DEXT |
  387. XFS_ILOG_DDATA | XFS_ILOG_DEV)));
  388. if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
  389. iip->ili_format.ilf_u.ilfu_uuid =
  390. ip->i_df.if_u2.if_uuid;
  391. }
  392. break;
  393. default:
  394. ASSERT(0);
  395. break;
  396. }
  397. /*
  398. * If there are no attributes associated with the file,
  399. * then we're done.
  400. * Assert that no attribute-related log flags are set.
  401. */
  402. if (!XFS_IFORK_Q(ip)) {
  403. ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
  404. iip->ili_format.ilf_size = nvecs;
  405. ASSERT(!(iip->ili_format.ilf_fields &
  406. (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
  407. return;
  408. }
  409. switch (ip->i_d.di_aformat) {
  410. case XFS_DINODE_FMT_EXTENTS:
  411. ASSERT(!(iip->ili_format.ilf_fields &
  412. (XFS_ILOG_ADATA | XFS_ILOG_ABROOT)));
  413. if (iip->ili_format.ilf_fields & XFS_ILOG_AEXT) {
  414. ASSERT(ip->i_afp->if_bytes > 0);
  415. ASSERT(ip->i_afp->if_u1.if_extents != NULL);
  416. ASSERT(ip->i_d.di_anextents > 0);
  417. #ifdef DEBUG
  418. nrecs = ip->i_afp->if_bytes /
  419. (uint)sizeof(xfs_bmbt_rec_t);
  420. #endif
  421. ASSERT(nrecs > 0);
  422. ASSERT(nrecs == ip->i_d.di_anextents);
  423. #ifdef XFS_NATIVE_HOST
  424. /*
  425. * There are not delayed allocation extents
  426. * for attributes, so just point at the array.
  427. */
  428. vecp->i_addr = (char *)(ip->i_afp->if_u1.if_extents);
  429. vecp->i_len = ip->i_afp->if_bytes;
  430. #else
  431. ASSERT(iip->ili_aextents_buf == NULL);
  432. /*
  433. * Need to endian flip before logging
  434. */
  435. ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
  436. KM_SLEEP);
  437. iip->ili_aextents_buf = ext_buffer;
  438. vecp->i_addr = (xfs_caddr_t)ext_buffer;
  439. vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
  440. XFS_ATTR_FORK);
  441. #endif
  442. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
  443. iip->ili_format.ilf_asize = vecp->i_len;
  444. vecp++;
  445. nvecs++;
  446. }
  447. break;
  448. case XFS_DINODE_FMT_BTREE:
  449. ASSERT(!(iip->ili_format.ilf_fields &
  450. (XFS_ILOG_ADATA | XFS_ILOG_AEXT)));
  451. if (iip->ili_format.ilf_fields & XFS_ILOG_ABROOT) {
  452. ASSERT(ip->i_afp->if_broot_bytes > 0);
  453. ASSERT(ip->i_afp->if_broot != NULL);
  454. vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
  455. vecp->i_len = ip->i_afp->if_broot_bytes;
  456. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
  457. vecp++;
  458. nvecs++;
  459. iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
  460. }
  461. break;
  462. case XFS_DINODE_FMT_LOCAL:
  463. ASSERT(!(iip->ili_format.ilf_fields &
  464. (XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
  465. if (iip->ili_format.ilf_fields & XFS_ILOG_ADATA) {
  466. ASSERT(ip->i_afp->if_bytes > 0);
  467. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  468. vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_u1.if_data;
  469. /*
  470. * Round i_bytes up to a word boundary.
  471. * The underlying memory is guaranteed to
  472. * to be there by xfs_idata_realloc().
  473. */
  474. data_bytes = roundup(ip->i_afp->if_bytes, 4);
  475. ASSERT((ip->i_afp->if_real_bytes == 0) ||
  476. (ip->i_afp->if_real_bytes == data_bytes));
  477. vecp->i_len = (int)data_bytes;
  478. XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
  479. vecp++;
  480. nvecs++;
  481. iip->ili_format.ilf_asize = (unsigned)data_bytes;
  482. }
  483. break;
  484. default:
  485. ASSERT(0);
  486. break;
  487. }
  488. ASSERT(nvecs == iip->ili_item.li_desc->lid_size);
  489. iip->ili_format.ilf_size = nvecs;
  490. }
  491. /*
  492. * This is called to pin the inode associated with the inode log
  493. * item in memory so it cannot be written out. Do this by calling
  494. * xfs_ipin() to bump the pin count in the inode while holding the
  495. * inode pin lock.
  496. */
  497. STATIC void
  498. xfs_inode_item_pin(
  499. xfs_inode_log_item_t *iip)
  500. {
  501. ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
  502. xfs_ipin(iip->ili_inode);
  503. }
  504. /*
  505. * This is called to unpin the inode associated with the inode log
  506. * item which was previously pinned with a call to xfs_inode_item_pin().
  507. * Just call xfs_iunpin() on the inode to do this.
  508. */
  509. /* ARGSUSED */
  510. STATIC void
  511. xfs_inode_item_unpin(
  512. xfs_inode_log_item_t *iip,
  513. int stale)
  514. {
  515. xfs_iunpin(iip->ili_inode);
  516. }
  517. /* ARGSUSED */
  518. STATIC void
  519. xfs_inode_item_unpin_remove(
  520. xfs_inode_log_item_t *iip,
  521. xfs_trans_t *tp)
  522. {
  523. xfs_iunpin(iip->ili_inode);
  524. }
  525. /*
  526. * This is called to attempt to lock the inode associated with this
  527. * inode log item, in preparation for the push routine which does the actual
  528. * iflush. Don't sleep on the inode lock or the flush lock.
  529. *
  530. * If the flush lock is already held, indicating that the inode has
  531. * been or is in the process of being flushed, then (ideally) we'd like to
  532. * see if the inode's buffer is still incore, and if so give it a nudge.
  533. * We delay doing so until the pushbuf routine, though, to avoid holding
  534. * the AIL lock across a call to the blackhole which is the buffercache.
  535. * Also we don't want to sleep in any device strategy routines, which can happen
  536. * if we do the subsequent bawrite in here.
  537. */
  538. STATIC uint
  539. xfs_inode_item_trylock(
  540. xfs_inode_log_item_t *iip)
  541. {
  542. register xfs_inode_t *ip;
  543. ip = iip->ili_inode;
  544. if (xfs_ipincount(ip) > 0) {
  545. return XFS_ITEM_PINNED;
  546. }
  547. if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
  548. return XFS_ITEM_LOCKED;
  549. }
  550. if (!xfs_iflock_nowait(ip)) {
  551. /*
  552. * If someone else isn't already trying to push the inode
  553. * buffer, we get to do it.
  554. */
  555. if (iip->ili_pushbuf_flag == 0) {
  556. iip->ili_pushbuf_flag = 1;
  557. #ifdef DEBUG
  558. iip->ili_push_owner = get_thread_id();
  559. #endif
  560. /*
  561. * Inode is left locked in shared mode.
  562. * Pushbuf routine gets to unlock it.
  563. */
  564. return XFS_ITEM_PUSHBUF;
  565. } else {
  566. /*
  567. * We hold the AIL_LOCK, so we must specify the
  568. * NONOTIFY flag so that we won't double trip.
  569. */
  570. xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
  571. return XFS_ITEM_FLUSHING;
  572. }
  573. /* NOTREACHED */
  574. }
  575. /* Stale items should force out the iclog */
  576. if (ip->i_flags & XFS_ISTALE) {
  577. xfs_ifunlock(ip);
  578. xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
  579. return XFS_ITEM_PINNED;
  580. }
  581. #ifdef DEBUG
  582. if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
  583. ASSERT(iip->ili_format.ilf_fields != 0);
  584. ASSERT(iip->ili_logged == 0);
  585. ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL);
  586. }
  587. #endif
  588. return XFS_ITEM_SUCCESS;
  589. }
  590. /*
  591. * Unlock the inode associated with the inode log item.
  592. * Clear the fields of the inode and inode log item that
  593. * are specific to the current transaction. If the
  594. * hold flags is set, do not unlock the inode.
  595. */
  596. STATIC void
  597. xfs_inode_item_unlock(
  598. xfs_inode_log_item_t *iip)
  599. {
  600. uint hold;
  601. uint iolocked;
  602. uint lock_flags;
  603. xfs_inode_t *ip;
  604. ASSERT(iip != NULL);
  605. ASSERT(iip->ili_inode->i_itemp != NULL);
  606. ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
  607. ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
  608. XFS_ILI_IOLOCKED_EXCL)) ||
  609. ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE));
  610. ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
  611. XFS_ILI_IOLOCKED_SHARED)) ||
  612. ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS));
  613. /*
  614. * Clear the transaction pointer in the inode.
  615. */
  616. ip = iip->ili_inode;
  617. ip->i_transp = NULL;
  618. /*
  619. * If the inode needed a separate buffer with which to log
  620. * its extents, then free it now.
  621. */
  622. if (iip->ili_extents_buf != NULL) {
  623. ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS);
  624. ASSERT(ip->i_d.di_nextents > 0);
  625. ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
  626. ASSERT(ip->i_df.if_bytes > 0);
  627. kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
  628. iip->ili_extents_buf = NULL;
  629. }
  630. if (iip->ili_aextents_buf != NULL) {
  631. ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS);
  632. ASSERT(ip->i_d.di_anextents > 0);
  633. ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
  634. ASSERT(ip->i_afp->if_bytes > 0);
  635. kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
  636. iip->ili_aextents_buf = NULL;
  637. }
  638. /*
  639. * Figure out if we should unlock the inode or not.
  640. */
  641. hold = iip->ili_flags & XFS_ILI_HOLD;
  642. /*
  643. * Before clearing out the flags, remember whether we
  644. * are holding the inode's IO lock.
  645. */
  646. iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY;
  647. /*
  648. * Clear out the fields of the inode log item particular
  649. * to the current transaction.
  650. */
  651. iip->ili_ilock_recur = 0;
  652. iip->ili_iolock_recur = 0;
  653. iip->ili_flags = 0;
  654. /*
  655. * Unlock the inode if XFS_ILI_HOLD was not set.
  656. */
  657. if (!hold) {
  658. lock_flags = XFS_ILOCK_EXCL;
  659. if (iolocked & XFS_ILI_IOLOCKED_EXCL) {
  660. lock_flags |= XFS_IOLOCK_EXCL;
  661. } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) {
  662. lock_flags |= XFS_IOLOCK_SHARED;
  663. }
  664. xfs_iput(iip->ili_inode, lock_flags);
  665. }
  666. }
  667. /*
  668. * This is called to find out where the oldest active copy of the
  669. * inode log item in the on disk log resides now that the last log
  670. * write of it completed at the given lsn. Since we always re-log
  671. * all dirty data in an inode, the latest copy in the on disk log
  672. * is the only one that matters. Therefore, simply return the
  673. * given lsn.
  674. */
  675. /*ARGSUSED*/
  676. STATIC xfs_lsn_t
  677. xfs_inode_item_committed(
  678. xfs_inode_log_item_t *iip,
  679. xfs_lsn_t lsn)
  680. {
  681. return (lsn);
  682. }
  683. /*
  684. * The transaction with the inode locked has aborted. The inode
  685. * must not be dirty within the transaction (unless we're forcibly
  686. * shutting down). We simply unlock just as if the transaction
  687. * had been cancelled.
  688. */
  689. STATIC void
  690. xfs_inode_item_abort(
  691. xfs_inode_log_item_t *iip)
  692. {
  693. xfs_inode_item_unlock(iip);
  694. return;
  695. }
  696. /*
  697. * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
  698. * failed to get the inode flush lock but did get the inode locked SHARED.
  699. * Here we're trying to see if the inode buffer is incore, and if so whether it's
  700. * marked delayed write. If that's the case, we'll initiate a bawrite on that
  701. * buffer to expedite the process.
  702. *
  703. * We aren't holding the AIL_LOCK (or the flush lock) when this gets called,
  704. * so it is inherently race-y.
  705. */
  706. STATIC void
  707. xfs_inode_item_pushbuf(
  708. xfs_inode_log_item_t *iip)
  709. {
  710. xfs_inode_t *ip;
  711. xfs_mount_t *mp;
  712. xfs_buf_t *bp;
  713. uint dopush;
  714. ip = iip->ili_inode;
  715. ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
  716. /*
  717. * The ili_pushbuf_flag keeps others from
  718. * trying to duplicate our effort.
  719. */
  720. ASSERT(iip->ili_pushbuf_flag != 0);
  721. ASSERT(iip->ili_push_owner == get_thread_id());
  722. /*
  723. * If flushlock isn't locked anymore, chances are that the
  724. * inode flush completed and the inode was taken off the AIL.
  725. * So, just get out.
  726. */
  727. if ((valusema(&(ip->i_flock)) > 0) ||
  728. ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
  729. iip->ili_pushbuf_flag = 0;
  730. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  731. return;
  732. }
  733. mp = ip->i_mount;
  734. bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
  735. iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK);
  736. if (bp != NULL) {
  737. if (XFS_BUF_ISDELAYWRITE(bp)) {
  738. /*
  739. * We were racing with iflush because we don't hold
  740. * the AIL_LOCK or the flush lock. However, at this point,
  741. * we have the buffer, and we know that it's dirty.
  742. * So, it's possible that iflush raced with us, and
  743. * this item is already taken off the AIL.
  744. * If not, we can flush it async.
  745. */
  746. dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
  747. (valusema(&(ip->i_flock)) <= 0));
  748. iip->ili_pushbuf_flag = 0;
  749. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  750. xfs_buftrace("INODE ITEM PUSH", bp);
  751. if (XFS_BUF_ISPINNED(bp)) {
  752. xfs_log_force(mp, (xfs_lsn_t)0,
  753. XFS_LOG_FORCE);
  754. }
  755. if (dopush) {
  756. xfs_bawrite(mp, bp);
  757. } else {
  758. xfs_buf_relse(bp);
  759. }
  760. } else {
  761. iip->ili_pushbuf_flag = 0;
  762. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  763. xfs_buf_relse(bp);
  764. }
  765. return;
  766. }
  767. /*
  768. * We have to be careful about resetting pushbuf flag too early (above).
  769. * Even though in theory we can do it as soon as we have the buflock,
  770. * we don't want others to be doing work needlessly. They'll come to
  771. * this function thinking that pushing the buffer is their
  772. * responsibility only to find that the buffer is still locked by
  773. * another doing the same thing
  774. */
  775. iip->ili_pushbuf_flag = 0;
  776. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  777. return;
  778. }
  779. /*
  780. * This is called to asynchronously write the inode associated with this
  781. * inode log item out to disk. The inode will already have been locked by
  782. * a successful call to xfs_inode_item_trylock().
  783. */
  784. STATIC void
  785. xfs_inode_item_push(
  786. xfs_inode_log_item_t *iip)
  787. {
  788. xfs_inode_t *ip;
  789. ip = iip->ili_inode;
  790. ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
  791. ASSERT(valusema(&(ip->i_flock)) <= 0);
  792. /*
  793. * Since we were able to lock the inode's flush lock and
  794. * we found it on the AIL, the inode must be dirty. This
  795. * is because the inode is removed from the AIL while still
  796. * holding the flush lock in xfs_iflush_done(). Thus, if
  797. * we found it in the AIL and were able to obtain the flush
  798. * lock without sleeping, then there must not have been
  799. * anyone in the process of flushing the inode.
  800. */
  801. ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
  802. iip->ili_format.ilf_fields != 0);
  803. /*
  804. * Write out the inode. The completion routine ('iflush_done') will
  805. * pull it from the AIL, mark it clean, unlock the flush lock.
  806. */
  807. (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC);
  808. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  809. return;
  810. }
  811. /*
  812. * XXX rcc - this one really has to do something. Probably needs
  813. * to stamp in a new field in the incore inode.
  814. */
  815. /* ARGSUSED */
  816. STATIC void
  817. xfs_inode_item_committing(
  818. xfs_inode_log_item_t *iip,
  819. xfs_lsn_t lsn)
  820. {
  821. iip->ili_last_lsn = lsn;
  822. return;
  823. }
  824. /*
  825. * This is the ops vector shared by all buf log items.
  826. */
  827. STATIC struct xfs_item_ops xfs_inode_item_ops = {
  828. .iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
  829. .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
  830. xfs_inode_item_format,
  831. .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
  832. .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin,
  833. .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
  834. xfs_inode_item_unpin_remove,
  835. .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
  836. .iop_unlock = (void(*)(xfs_log_item_t*))xfs_inode_item_unlock,
  837. .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
  838. xfs_inode_item_committed,
  839. .iop_push = (void(*)(xfs_log_item_t*))xfs_inode_item_push,
  840. .iop_abort = (void(*)(xfs_log_item_t*))xfs_inode_item_abort,
  841. .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_inode_item_pushbuf,
  842. .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
  843. xfs_inode_item_committing
  844. };
  845. /*
  846. * Initialize the inode log item for a newly allocated (in-core) inode.
  847. */
  848. void
  849. xfs_inode_item_init(
  850. xfs_inode_t *ip,
  851. xfs_mount_t *mp)
  852. {
  853. xfs_inode_log_item_t *iip;
  854. ASSERT(ip->i_itemp == NULL);
  855. iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
  856. iip->ili_item.li_type = XFS_LI_INODE;
  857. iip->ili_item.li_ops = &xfs_inode_item_ops;
  858. iip->ili_item.li_mountp = mp;
  859. iip->ili_inode = ip;
  860. /*
  861. We have zeroed memory. No need ...
  862. iip->ili_extents_buf = NULL;
  863. iip->ili_pushbuf_flag = 0;
  864. */
  865. iip->ili_format.ilf_type = XFS_LI_INODE;
  866. iip->ili_format.ilf_ino = ip->i_ino;
  867. iip->ili_format.ilf_blkno = ip->i_blkno;
  868. iip->ili_format.ilf_len = ip->i_len;
  869. iip->ili_format.ilf_boffset = ip->i_boffset;
  870. }
  871. /*
  872. * Free the inode log item and any memory hanging off of it.
  873. */
  874. void
  875. xfs_inode_item_destroy(
  876. xfs_inode_t *ip)
  877. {
  878. #ifdef XFS_TRANS_DEBUG
  879. if (ip->i_itemp->ili_root_size != 0) {
  880. kmem_free(ip->i_itemp->ili_orig_root,
  881. ip->i_itemp->ili_root_size);
  882. }
  883. #endif
  884. kmem_zone_free(xfs_ili_zone, ip->i_itemp);
  885. }
  886. /*
  887. * This is the inode flushing I/O completion routine. It is called
  888. * from interrupt level when the buffer containing the inode is
  889. * flushed to disk. It is responsible for removing the inode item
  890. * from the AIL if it has not been re-logged, and unlocking the inode's
  891. * flush lock.
  892. */
  893. /*ARGSUSED*/
  894. void
  895. xfs_iflush_done(
  896. xfs_buf_t *bp,
  897. xfs_inode_log_item_t *iip)
  898. {
  899. xfs_inode_t *ip;
  900. SPLDECL(s);
  901. ip = iip->ili_inode;
  902. /*
  903. * We only want to pull the item from the AIL if it is
  904. * actually there and its location in the log has not
  905. * changed since we started the flush. Thus, we only bother
  906. * if the ili_logged flag is set and the inode's lsn has not
  907. * changed. First we check the lsn outside
  908. * the lock since it's cheaper, and then we recheck while
  909. * holding the lock before removing the inode from the AIL.
  910. */
  911. if (iip->ili_logged &&
  912. (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
  913. AIL_LOCK(ip->i_mount, s);
  914. if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
  915. /*
  916. * xfs_trans_delete_ail() drops the AIL lock.
  917. */
  918. xfs_trans_delete_ail(ip->i_mount,
  919. (xfs_log_item_t*)iip, s);
  920. } else {
  921. AIL_UNLOCK(ip->i_mount, s);
  922. }
  923. }
  924. iip->ili_logged = 0;
  925. /*
  926. * Clear the ili_last_fields bits now that we know that the
  927. * data corresponding to them is safely on disk.
  928. */
  929. iip->ili_last_fields = 0;
  930. /*
  931. * Release the inode's flush lock since we're done with it.
  932. */
  933. xfs_ifunlock(ip);
  934. return;
  935. }
  936. /*
  937. * This is the inode flushing abort routine. It is called
  938. * from xfs_iflush when the filesystem is shutting down to clean
  939. * up the inode state.
  940. * It is responsible for removing the inode item
  941. * from the AIL if it has not been re-logged, and unlocking the inode's
  942. * flush lock.
  943. */
  944. void
  945. xfs_iflush_abort(
  946. xfs_inode_t *ip)
  947. {
  948. xfs_inode_log_item_t *iip;
  949. xfs_mount_t *mp;
  950. SPLDECL(s);
  951. iip = ip->i_itemp;
  952. mp = ip->i_mount;
  953. if (iip) {
  954. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  955. AIL_LOCK(mp, s);
  956. if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
  957. /*
  958. * xfs_trans_delete_ail() drops the AIL lock.
  959. */
  960. xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip,
  961. s);
  962. } else
  963. AIL_UNLOCK(mp, s);
  964. }
  965. iip->ili_logged = 0;
  966. /*
  967. * Clear the ili_last_fields bits now that we know that the
  968. * data corresponding to them is safely on disk.
  969. */
  970. iip->ili_last_fields = 0;
  971. /*
  972. * Clear the inode logging fields so no more flushes are
  973. * attempted.
  974. */
  975. iip->ili_format.ilf_fields = 0;
  976. }
  977. /*
  978. * Release the inode's flush lock since we're done with it.
  979. */
  980. xfs_ifunlock(ip);
  981. }
  982. void
  983. xfs_istale_done(
  984. xfs_buf_t *bp,
  985. xfs_inode_log_item_t *iip)
  986. {
  987. xfs_iflush_abort(iip->ili_inode);
  988. }