xfs_iget.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. /*
  2. * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of version 2 of the GNU General Public License as
  6. * published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it would be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. *
  12. * Further, this software is distributed without any warranty that it is
  13. * free of the rightful claim of any third person regarding infringement
  14. * or the like. Any license provided herein, whether implied or
  15. * otherwise, applies only to this software file. Patent licenses, if
  16. * any, provided herein do not apply to combinations of this program with
  17. * other software, or any other product whatsoever.
  18. *
  19. * You should have received a copy of the GNU General Public License along
  20. * with this program; if not, write the Free Software Foundation, Inc., 59
  21. * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  22. *
  23. * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
  24. * Mountain View, CA 94043, or:
  25. *
  26. * http://www.sgi.com
  27. *
  28. * For further information regarding this notice, see:
  29. *
  30. * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  31. */
  32. #include "xfs.h"
  33. #include "xfs_macros.h"
  34. #include "xfs_types.h"
  35. #include "xfs_inum.h"
  36. #include "xfs_log.h"
  37. #include "xfs_trans.h"
  38. #include "xfs_sb.h"
  39. #include "xfs_ag.h"
  40. #include "xfs_dir.h"
  41. #include "xfs_dir2.h"
  42. #include "xfs_dmapi.h"
  43. #include "xfs_mount.h"
  44. #include "xfs_alloc_btree.h"
  45. #include "xfs_bmap_btree.h"
  46. #include "xfs_ialloc_btree.h"
  47. #include "xfs_btree.h"
  48. #include "xfs_ialloc.h"
  49. #include "xfs_attr_sf.h"
  50. #include "xfs_dir_sf.h"
  51. #include "xfs_dir2_sf.h"
  52. #include "xfs_dinode.h"
  53. #include "xfs_inode.h"
  54. #include "xfs_quota.h"
  55. #include "xfs_utils.h"
  56. #include "xfs_bit.h"
  57. /*
  58. * Initialize the inode hash table for the newly mounted file system.
  59. * Choose an initial table size based on user specified value, else
  60. * use a simple algorithm using the maximum number of inodes as an
  61. * indicator for table size, and clamp it between one and some large
  62. * number of pages.
  63. */
  64. void
  65. xfs_ihash_init(xfs_mount_t *mp)
  66. {
  67. __uint64_t icount;
  68. uint i, flags = KM_SLEEP | KM_MAYFAIL;
  69. if (!mp->m_ihsize) {
  70. icount = mp->m_maxicount ? mp->m_maxicount :
  71. (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
  72. mp->m_ihsize = 1 << max_t(uint, 8,
  73. (xfs_highbit64(icount) + 1) / 2);
  74. mp->m_ihsize = min_t(uint, mp->m_ihsize,
  75. (64 * NBPP) / sizeof(xfs_ihash_t));
  76. }
  77. while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
  78. sizeof(xfs_ihash_t), flags))) {
  79. if ((mp->m_ihsize >>= 1) <= NBPP)
  80. flags = KM_SLEEP;
  81. }
  82. for (i = 0; i < mp->m_ihsize; i++) {
  83. rwlock_init(&(mp->m_ihash[i].ih_lock));
  84. }
  85. }
  86. /*
  87. * Free up structures allocated by xfs_ihash_init, at unmount time.
  88. */
  89. void
  90. xfs_ihash_free(xfs_mount_t *mp)
  91. {
  92. kmem_free(mp->m_ihash, mp->m_ihsize*sizeof(xfs_ihash_t));
  93. mp->m_ihash = NULL;
  94. }
  95. /*
  96. * Initialize the inode cluster hash table for the newly mounted file system.
  97. * Its size is derived from the ihash table size.
  98. */
  99. void
  100. xfs_chash_init(xfs_mount_t *mp)
  101. {
  102. uint i;
  103. mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
  104. (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
  105. mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
  106. mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
  107. * sizeof(xfs_chash_t),
  108. KM_SLEEP);
  109. for (i = 0; i < mp->m_chsize; i++) {
  110. spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
  111. }
  112. }
  113. /*
  114. * Free up structures allocated by xfs_chash_init, at unmount time.
  115. */
  116. void
  117. xfs_chash_free(xfs_mount_t *mp)
  118. {
  119. int i;
  120. for (i = 0; i < mp->m_chsize; i++) {
  121. spinlock_destroy(&mp->m_chash[i].ch_lock);
  122. }
  123. kmem_free(mp->m_chash, mp->m_chsize*sizeof(xfs_chash_t));
  124. mp->m_chash = NULL;
  125. }
  126. /*
  127. * Try to move an inode to the front of its hash list if possible
  128. * (and if its not there already). Called right after obtaining
  129. * the list version number and then dropping the read_lock on the
  130. * hash list in question (which is done right after looking up the
  131. * inode in question...).
  132. */
  133. STATIC void
  134. xfs_ihash_promote(
  135. xfs_ihash_t *ih,
  136. xfs_inode_t *ip,
  137. ulong version)
  138. {
  139. xfs_inode_t *iq;
  140. if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
  141. if (likely(version == ih->ih_version)) {
  142. /* remove from list */
  143. if ((iq = ip->i_next)) {
  144. iq->i_prevp = ip->i_prevp;
  145. }
  146. *ip->i_prevp = iq;
  147. /* insert at list head */
  148. iq = ih->ih_next;
  149. iq->i_prevp = &ip->i_next;
  150. ip->i_next = iq;
  151. ip->i_prevp = &ih->ih_next;
  152. ih->ih_next = ip;
  153. }
  154. write_unlock(&ih->ih_lock);
  155. }
  156. }
  157. /*
  158. * Look up an inode by number in the given file system.
  159. * The inode is looked up in the hash table for the file system
  160. * represented by the mount point parameter mp. Each bucket of
  161. * the hash table is guarded by an individual semaphore.
  162. *
  163. * If the inode is found in the hash table, its corresponding vnode
  164. * is obtained with a call to vn_get(). This call takes care of
  165. * coordination with the reclamation of the inode and vnode. Note
  166. * that the vmap structure is filled in while holding the hash lock.
  167. * This gives us the state of the inode/vnode when we found it and
  168. * is used for coordination in vn_get().
  169. *
  170. * If it is not in core, read it in from the file system's device and
  171. * add the inode into the hash table.
  172. *
  173. * The inode is locked according to the value of the lock_flags parameter.
  174. * This flag parameter indicates how and if the inode's IO lock and inode lock
  175. * should be taken.
  176. *
  177. * mp -- the mount point structure for the current file system. It points
  178. * to the inode hash table.
  179. * tp -- a pointer to the current transaction if there is one. This is
  180. * simply passed through to the xfs_iread() call.
  181. * ino -- the number of the inode desired. This is the unique identifier
  182. * within the file system for the inode being requested.
  183. * lock_flags -- flags indicating how to lock the inode. See the comment
  184. * for xfs_ilock() for a list of valid values.
  185. * bno -- the block number starting the buffer containing the inode,
  186. * if known (as by bulkstat), else 0.
  187. */
  188. STATIC int
  189. xfs_iget_core(
  190. vnode_t *vp,
  191. xfs_mount_t *mp,
  192. xfs_trans_t *tp,
  193. xfs_ino_t ino,
  194. uint flags,
  195. uint lock_flags,
  196. xfs_inode_t **ipp,
  197. xfs_daddr_t bno)
  198. {
  199. xfs_ihash_t *ih;
  200. xfs_inode_t *ip;
  201. xfs_inode_t *iq;
  202. vnode_t *inode_vp;
  203. ulong version;
  204. int error;
  205. /* REFERENCED */
  206. xfs_chash_t *ch;
  207. xfs_chashlist_t *chl, *chlnew;
  208. SPLDECL(s);
  209. ih = XFS_IHASH(mp, ino);
  210. again:
  211. read_lock(&ih->ih_lock);
  212. for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
  213. if (ip->i_ino == ino) {
  214. /*
  215. * If INEW is set this inode is being set up
  216. * we need to pause and try again.
  217. */
  218. if (ip->i_flags & XFS_INEW) {
  219. read_unlock(&ih->ih_lock);
  220. delay(1);
  221. XFS_STATS_INC(xs_ig_frecycle);
  222. goto again;
  223. }
  224. inode_vp = XFS_ITOV_NULL(ip);
  225. if (inode_vp == NULL) {
  226. /*
  227. * If IRECLAIM is set this inode is
  228. * on its way out of the system,
  229. * we need to pause and try again.
  230. */
  231. if (ip->i_flags & XFS_IRECLAIM) {
  232. read_unlock(&ih->ih_lock);
  233. delay(1);
  234. XFS_STATS_INC(xs_ig_frecycle);
  235. goto again;
  236. }
  237. vn_trace_exit(vp, "xfs_iget.alloc",
  238. (inst_t *)__return_address);
  239. XFS_STATS_INC(xs_ig_found);
  240. ip->i_flags &= ~XFS_IRECLAIMABLE;
  241. version = ih->ih_version;
  242. read_unlock(&ih->ih_lock);
  243. xfs_ihash_promote(ih, ip, version);
  244. XFS_MOUNT_ILOCK(mp);
  245. list_del_init(&ip->i_reclaim);
  246. XFS_MOUNT_IUNLOCK(mp);
  247. goto finish_inode;
  248. } else if (vp != inode_vp) {
  249. struct inode *inode = LINVFS_GET_IP(inode_vp);
  250. /* The inode is being torn down, pause and
  251. * try again.
  252. */
  253. if (inode->i_state & (I_FREEING | I_CLEAR)) {
  254. read_unlock(&ih->ih_lock);
  255. delay(1);
  256. XFS_STATS_INC(xs_ig_frecycle);
  257. goto again;
  258. }
  259. /* Chances are the other vnode (the one in the inode) is being torn
  260. * down right now, and we landed on top of it. Question is, what do
  261. * we do? Unhook the old inode and hook up the new one?
  262. */
  263. cmn_err(CE_PANIC,
  264. "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
  265. inode_vp, vp);
  266. }
  267. /*
  268. * Inode cache hit: if ip is not at the front of
  269. * its hash chain, move it there now.
  270. * Do this with the lock held for update, but
  271. * do statistics after releasing the lock.
  272. */
  273. version = ih->ih_version;
  274. read_unlock(&ih->ih_lock);
  275. xfs_ihash_promote(ih, ip, version);
  276. XFS_STATS_INC(xs_ig_found);
  277. finish_inode:
  278. if (ip->i_d.di_mode == 0) {
  279. if (!(flags & IGET_CREATE))
  280. return ENOENT;
  281. xfs_iocore_inode_reinit(ip);
  282. }
  283. if (lock_flags != 0)
  284. xfs_ilock(ip, lock_flags);
  285. ip->i_flags &= ~XFS_ISTALE;
  286. vn_trace_exit(vp, "xfs_iget.found",
  287. (inst_t *)__return_address);
  288. goto return_ip;
  289. }
  290. }
  291. /*
  292. * Inode cache miss: save the hash chain version stamp and unlock
  293. * the chain, so we don't deadlock in vn_alloc.
  294. */
  295. XFS_STATS_INC(xs_ig_missed);
  296. version = ih->ih_version;
  297. read_unlock(&ih->ih_lock);
  298. /*
  299. * Read the disk inode attributes into a new inode structure and get
  300. * a new vnode for it. This should also initialize i_ino and i_mount.
  301. */
  302. error = xfs_iread(mp, tp, ino, &ip, bno);
  303. if (error) {
  304. return error;
  305. }
  306. vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address);
  307. xfs_inode_lock_init(ip, vp);
  308. xfs_iocore_inode_init(ip);
  309. if (lock_flags != 0) {
  310. xfs_ilock(ip, lock_flags);
  311. }
  312. if ((ip->i_d.di_mode == 0) && !(flags & IGET_CREATE)) {
  313. xfs_idestroy(ip);
  314. return ENOENT;
  315. }
  316. /*
  317. * Put ip on its hash chain, unless someone else hashed a duplicate
  318. * after we released the hash lock.
  319. */
  320. write_lock(&ih->ih_lock);
  321. if (ih->ih_version != version) {
  322. for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) {
  323. if (iq->i_ino == ino) {
  324. write_unlock(&ih->ih_lock);
  325. xfs_idestroy(ip);
  326. XFS_STATS_INC(xs_ig_dup);
  327. goto again;
  328. }
  329. }
  330. }
  331. /*
  332. * These values _must_ be set before releasing ihlock!
  333. */
  334. ip->i_hash = ih;
  335. if ((iq = ih->ih_next)) {
  336. iq->i_prevp = &ip->i_next;
  337. }
  338. ip->i_next = iq;
  339. ip->i_prevp = &ih->ih_next;
  340. ih->ih_next = ip;
  341. ip->i_udquot = ip->i_gdquot = NULL;
  342. ih->ih_version++;
  343. ip->i_flags |= XFS_INEW;
  344. write_unlock(&ih->ih_lock);
  345. /*
  346. * put ip on its cluster's hash chain
  347. */
  348. ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL &&
  349. ip->i_cnext == NULL);
  350. chlnew = NULL;
  351. ch = XFS_CHASH(mp, ip->i_blkno);
  352. chlredo:
  353. s = mutex_spinlock(&ch->ch_lock);
  354. for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
  355. if (chl->chl_blkno == ip->i_blkno) {
  356. /* insert this inode into the doubly-linked list
  357. * where chl points */
  358. if ((iq = chl->chl_ip)) {
  359. ip->i_cprev = iq->i_cprev;
  360. iq->i_cprev->i_cnext = ip;
  361. iq->i_cprev = ip;
  362. ip->i_cnext = iq;
  363. } else {
  364. ip->i_cnext = ip;
  365. ip->i_cprev = ip;
  366. }
  367. chl->chl_ip = ip;
  368. ip->i_chash = chl;
  369. break;
  370. }
  371. }
  372. /* no hash list found for this block; add a new hash list */
  373. if (chl == NULL) {
  374. if (chlnew == NULL) {
  375. mutex_spinunlock(&ch->ch_lock, s);
  376. ASSERT(xfs_chashlist_zone != NULL);
  377. chlnew = (xfs_chashlist_t *)
  378. kmem_zone_alloc(xfs_chashlist_zone,
  379. KM_SLEEP);
  380. ASSERT(chlnew != NULL);
  381. goto chlredo;
  382. } else {
  383. ip->i_cnext = ip;
  384. ip->i_cprev = ip;
  385. ip->i_chash = chlnew;
  386. chlnew->chl_ip = ip;
  387. chlnew->chl_blkno = ip->i_blkno;
  388. chlnew->chl_next = ch->ch_list;
  389. ch->ch_list = chlnew;
  390. chlnew = NULL;
  391. }
  392. } else {
  393. if (chlnew != NULL) {
  394. kmem_zone_free(xfs_chashlist_zone, chlnew);
  395. }
  396. }
  397. mutex_spinunlock(&ch->ch_lock, s);
  398. /*
  399. * Link ip to its mount and thread it on the mount's inode list.
  400. */
  401. XFS_MOUNT_ILOCK(mp);
  402. if ((iq = mp->m_inodes)) {
  403. ASSERT(iq->i_mprev->i_mnext == iq);
  404. ip->i_mprev = iq->i_mprev;
  405. iq->i_mprev->i_mnext = ip;
  406. iq->i_mprev = ip;
  407. ip->i_mnext = iq;
  408. } else {
  409. ip->i_mnext = ip;
  410. ip->i_mprev = ip;
  411. }
  412. mp->m_inodes = ip;
  413. XFS_MOUNT_IUNLOCK(mp);
  414. return_ip:
  415. ASSERT(ip->i_df.if_ext_max ==
  416. XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
  417. ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
  418. ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
  419. *ipp = ip;
  420. /*
  421. * If we have a real type for an on-disk inode, we can set ops(&unlock)
  422. * now. If it's a new inode being created, xfs_ialloc will handle it.
  423. */
  424. VFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1);
  425. return 0;
  426. }
  427. /*
  428. * The 'normal' internal xfs_iget, if needed it will
  429. * 'allocate', or 'get', the vnode.
  430. */
  431. int
  432. xfs_iget(
  433. xfs_mount_t *mp,
  434. xfs_trans_t *tp,
  435. xfs_ino_t ino,
  436. uint flags,
  437. uint lock_flags,
  438. xfs_inode_t **ipp,
  439. xfs_daddr_t bno)
  440. {
  441. struct inode *inode;
  442. vnode_t *vp = NULL;
  443. int error;
  444. XFS_STATS_INC(xs_ig_attempts);
  445. if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
  446. bhv_desc_t *bdp;
  447. xfs_inode_t *ip;
  448. int newnode;
  449. vp = LINVFS_GET_VP(inode);
  450. if (inode->i_state & I_NEW) {
  451. inode_allocate:
  452. vn_initialize(inode);
  453. error = xfs_iget_core(vp, mp, tp, ino, flags,
  454. lock_flags, ipp, bno);
  455. if (error) {
  456. vn_mark_bad(vp);
  457. if (inode->i_state & I_NEW)
  458. unlock_new_inode(inode);
  459. iput(inode);
  460. }
  461. } else {
  462. if (is_bad_inode(inode)) {
  463. iput(inode);
  464. return EIO;
  465. }
  466. bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
  467. if (bdp == NULL) {
  468. XFS_STATS_INC(xs_ig_dup);
  469. goto inode_allocate;
  470. }
  471. ip = XFS_BHVTOI(bdp);
  472. if (lock_flags != 0)
  473. xfs_ilock(ip, lock_flags);
  474. newnode = (ip->i_d.di_mode == 0);
  475. if (newnode)
  476. xfs_iocore_inode_reinit(ip);
  477. XFS_STATS_INC(xs_ig_found);
  478. *ipp = ip;
  479. error = 0;
  480. }
  481. } else
  482. error = ENOMEM; /* If we got no inode we are out of memory */
  483. return error;
  484. }
  485. /*
  486. * Do the setup for the various locks within the incore inode.
  487. */
  488. void
  489. xfs_inode_lock_init(
  490. xfs_inode_t *ip,
  491. vnode_t *vp)
  492. {
  493. mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
  494. "xfsino", (long)vp->v_number);
  495. mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", vp->v_number);
  496. init_waitqueue_head(&ip->i_ipin_wait);
  497. atomic_set(&ip->i_pincount, 0);
  498. init_sema(&ip->i_flock, 1, "xfsfino", vp->v_number);
  499. }
  500. /*
  501. * Look for the inode corresponding to the given ino in the hash table.
  502. * If it is there and its i_transp pointer matches tp, return it.
  503. * Otherwise, return NULL.
  504. */
  505. xfs_inode_t *
  506. xfs_inode_incore(xfs_mount_t *mp,
  507. xfs_ino_t ino,
  508. xfs_trans_t *tp)
  509. {
  510. xfs_ihash_t *ih;
  511. xfs_inode_t *ip;
  512. ulong version;
  513. ih = XFS_IHASH(mp, ino);
  514. read_lock(&ih->ih_lock);
  515. for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
  516. if (ip->i_ino == ino) {
  517. /*
  518. * If we find it and tp matches, return it.
  519. * Also move it to the front of the hash list
  520. * if we find it and it is not already there.
  521. * Otherwise break from the loop and return
  522. * NULL.
  523. */
  524. if (ip->i_transp == tp) {
  525. version = ih->ih_version;
  526. read_unlock(&ih->ih_lock);
  527. xfs_ihash_promote(ih, ip, version);
  528. return (ip);
  529. }
  530. break;
  531. }
  532. }
  533. read_unlock(&ih->ih_lock);
  534. return (NULL);
  535. }
  536. /*
  537. * Decrement reference count of an inode structure and unlock it.
  538. *
  539. * ip -- the inode being released
  540. * lock_flags -- this parameter indicates the inode's locks to be
  541. * to be released. See the comment on xfs_iunlock() for a list
  542. * of valid values.
  543. */
  544. void
  545. xfs_iput(xfs_inode_t *ip,
  546. uint lock_flags)
  547. {
  548. vnode_t *vp = XFS_ITOV(ip);
  549. vn_trace_entry(vp, "xfs_iput", (inst_t *)__return_address);
  550. xfs_iunlock(ip, lock_flags);
  551. VN_RELE(vp);
  552. }
  553. /*
  554. * Special iput for brand-new inodes that are still locked
  555. */
  556. void
  557. xfs_iput_new(xfs_inode_t *ip,
  558. uint lock_flags)
  559. {
  560. vnode_t *vp = XFS_ITOV(ip);
  561. struct inode *inode = LINVFS_GET_IP(vp);
  562. vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
  563. if ((ip->i_d.di_mode == 0)) {
  564. ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
  565. vn_mark_bad(vp);
  566. }
  567. if (inode->i_state & I_NEW)
  568. unlock_new_inode(inode);
  569. if (lock_flags)
  570. xfs_iunlock(ip, lock_flags);
  571. VN_RELE(vp);
  572. }
  573. /*
  574. * This routine embodies the part of the reclaim code that pulls
  575. * the inode from the inode hash table and the mount structure's
  576. * inode list.
  577. * This should only be called from xfs_reclaim().
  578. */
  579. void
  580. xfs_ireclaim(xfs_inode_t *ip)
  581. {
  582. vnode_t *vp;
  583. /*
  584. * Remove from old hash list and mount list.
  585. */
  586. XFS_STATS_INC(xs_ig_reclaims);
  587. xfs_iextract(ip);
  588. /*
  589. * Here we do a spurious inode lock in order to coordinate with
  590. * xfs_sync(). This is because xfs_sync() references the inodes
  591. * in the mount list without taking references on the corresponding
  592. * vnodes. We make that OK here by ensuring that we wait until
  593. * the inode is unlocked in xfs_sync() before we go ahead and
  594. * free it. We get both the regular lock and the io lock because
  595. * the xfs_sync() code may need to drop the regular one but will
  596. * still hold the io lock.
  597. */
  598. xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
  599. /*
  600. * Release dquots (and their references) if any. An inode may escape
  601. * xfs_inactive and get here via vn_alloc->vn_reclaim path.
  602. */
  603. XFS_QM_DQDETACH(ip->i_mount, ip);
  604. /*
  605. * Pull our behavior descriptor from the vnode chain.
  606. */
  607. vp = XFS_ITOV_NULL(ip);
  608. if (vp) {
  609. vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
  610. }
  611. /*
  612. * Free all memory associated with the inode.
  613. */
  614. xfs_idestroy(ip);
  615. }
  616. /*
  617. * This routine removes an about-to-be-destroyed inode from
  618. * all of the lists in which it is located with the exception
  619. * of the behavior chain.
  620. */
  621. void
  622. xfs_iextract(
  623. xfs_inode_t *ip)
  624. {
  625. xfs_ihash_t *ih;
  626. xfs_inode_t *iq;
  627. xfs_mount_t *mp;
  628. xfs_chash_t *ch;
  629. xfs_chashlist_t *chl, *chm;
  630. SPLDECL(s);
  631. ih = ip->i_hash;
  632. write_lock(&ih->ih_lock);
  633. if ((iq = ip->i_next)) {
  634. iq->i_prevp = ip->i_prevp;
  635. }
  636. *ip->i_prevp = iq;
  637. ih->ih_version++;
  638. write_unlock(&ih->ih_lock);
  639. /*
  640. * Remove from cluster hash list
  641. * 1) delete the chashlist if this is the last inode on the chashlist
  642. * 2) unchain from list of inodes
  643. * 3) point chashlist->chl_ip to 'chl_next' if to this inode.
  644. */
  645. mp = ip->i_mount;
  646. ch = XFS_CHASH(mp, ip->i_blkno);
  647. s = mutex_spinlock(&ch->ch_lock);
  648. if (ip->i_cnext == ip) {
  649. /* Last inode on chashlist */
  650. ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
  651. ASSERT(ip->i_chash != NULL);
  652. chm=NULL;
  653. for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) {
  654. if (chl->chl_blkno == ip->i_blkno) {
  655. if (chm == NULL) {
  656. /* first item on the list */
  657. ch->ch_list = chl->chl_next;
  658. } else {
  659. chm->chl_next = chl->chl_next;
  660. }
  661. kmem_zone_free(xfs_chashlist_zone, chl);
  662. break;
  663. } else {
  664. ASSERT(chl->chl_ip != ip);
  665. chm = chl;
  666. }
  667. }
  668. ASSERT_ALWAYS(chl != NULL);
  669. } else {
  670. /* delete one inode from a non-empty list */
  671. iq = ip->i_cnext;
  672. iq->i_cprev = ip->i_cprev;
  673. ip->i_cprev->i_cnext = iq;
  674. if (ip->i_chash->chl_ip == ip) {
  675. ip->i_chash->chl_ip = iq;
  676. }
  677. ip->i_chash = __return_address;
  678. ip->i_cprev = __return_address;
  679. ip->i_cnext = __return_address;
  680. }
  681. mutex_spinunlock(&ch->ch_lock, s);
  682. /*
  683. * Remove from mount's inode list.
  684. */
  685. XFS_MOUNT_ILOCK(mp);
  686. ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
  687. iq = ip->i_mnext;
  688. iq->i_mprev = ip->i_mprev;
  689. ip->i_mprev->i_mnext = iq;
  690. /*
  691. * Fix up the head pointer if it points to the inode being deleted.
  692. */
  693. if (mp->m_inodes == ip) {
  694. if (ip == iq) {
  695. mp->m_inodes = NULL;
  696. } else {
  697. mp->m_inodes = iq;
  698. }
  699. }
  700. /* Deal with the deleted inodes list */
  701. list_del_init(&ip->i_reclaim);
  702. mp->m_ireclaims++;
  703. XFS_MOUNT_IUNLOCK(mp);
  704. }
  705. /*
  706. * This is a wrapper routine around the xfs_ilock() routine
  707. * used to centralize some grungy code. It is used in places
  708. * that wish to lock the inode solely for reading the extents.
  709. * The reason these places can't just call xfs_ilock(SHARED)
  710. * is that the inode lock also guards to bringing in of the
  711. * extents from disk for a file in b-tree format. If the inode
  712. * is in b-tree format, then we need to lock the inode exclusively
  713. * until the extents are read in. Locking it exclusively all
  714. * the time would limit our parallelism unnecessarily, though.
  715. * What we do instead is check to see if the extents have been
  716. * read in yet, and only lock the inode exclusively if they
  717. * have not.
  718. *
  719. * The function returns a value which should be given to the
  720. * corresponding xfs_iunlock_map_shared(). This value is
  721. * the mode in which the lock was actually taken.
  722. */
  723. uint
  724. xfs_ilock_map_shared(
  725. xfs_inode_t *ip)
  726. {
  727. uint lock_mode;
  728. if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) &&
  729. ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) {
  730. lock_mode = XFS_ILOCK_EXCL;
  731. } else {
  732. lock_mode = XFS_ILOCK_SHARED;
  733. }
  734. xfs_ilock(ip, lock_mode);
  735. return lock_mode;
  736. }
  737. /*
  738. * This is simply the unlock routine to go with xfs_ilock_map_shared().
  739. * All it does is call xfs_iunlock() with the given lock_mode.
  740. */
  741. void
  742. xfs_iunlock_map_shared(
  743. xfs_inode_t *ip,
  744. unsigned int lock_mode)
  745. {
  746. xfs_iunlock(ip, lock_mode);
  747. }
  748. /*
  749. * The xfs inode contains 2 locks: a multi-reader lock called the
  750. * i_iolock and a multi-reader lock called the i_lock. This routine
  751. * allows either or both of the locks to be obtained.
  752. *
  753. * The 2 locks should always be ordered so that the IO lock is
  754. * obtained first in order to prevent deadlock.
  755. *
  756. * ip -- the inode being locked
  757. * lock_flags -- this parameter indicates the inode's locks
  758. * to be locked. It can be:
  759. * XFS_IOLOCK_SHARED,
  760. * XFS_IOLOCK_EXCL,
  761. * XFS_ILOCK_SHARED,
  762. * XFS_ILOCK_EXCL,
  763. * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
  764. * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
  765. * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
  766. * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
  767. */
  768. void
  769. xfs_ilock(xfs_inode_t *ip,
  770. uint lock_flags)
  771. {
  772. /*
  773. * You can't set both SHARED and EXCL for the same lock,
  774. * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
  775. * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
  776. */
  777. ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
  778. (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
  779. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  780. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  781. ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
  782. if (lock_flags & XFS_IOLOCK_EXCL) {
  783. mrupdate(&ip->i_iolock);
  784. } else if (lock_flags & XFS_IOLOCK_SHARED) {
  785. mraccess(&ip->i_iolock);
  786. }
  787. if (lock_flags & XFS_ILOCK_EXCL) {
  788. mrupdate(&ip->i_lock);
  789. } else if (lock_flags & XFS_ILOCK_SHARED) {
  790. mraccess(&ip->i_lock);
  791. }
  792. xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
  793. }
  794. /*
  795. * This is just like xfs_ilock(), except that the caller
  796. * is guaranteed not to sleep. It returns 1 if it gets
  797. * the requested locks and 0 otherwise. If the IO lock is
  798. * obtained but the inode lock cannot be, then the IO lock
  799. * is dropped before returning.
  800. *
  801. * ip -- the inode being locked
  802. * lock_flags -- this parameter indicates the inode's locks to be
  803. * to be locked. See the comment for xfs_ilock() for a list
  804. * of valid values.
  805. *
  806. */
  807. int
  808. xfs_ilock_nowait(xfs_inode_t *ip,
  809. uint lock_flags)
  810. {
  811. int iolocked;
  812. int ilocked;
  813. /*
  814. * You can't set both SHARED and EXCL for the same lock,
  815. * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
  816. * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
  817. */
  818. ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
  819. (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
  820. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  821. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  822. ASSERT((lock_flags & ~XFS_LOCK_MASK) == 0);
  823. iolocked = 0;
  824. if (lock_flags & XFS_IOLOCK_EXCL) {
  825. iolocked = mrtryupdate(&ip->i_iolock);
  826. if (!iolocked) {
  827. return 0;
  828. }
  829. } else if (lock_flags & XFS_IOLOCK_SHARED) {
  830. iolocked = mrtryaccess(&ip->i_iolock);
  831. if (!iolocked) {
  832. return 0;
  833. }
  834. }
  835. if (lock_flags & XFS_ILOCK_EXCL) {
  836. ilocked = mrtryupdate(&ip->i_lock);
  837. if (!ilocked) {
  838. if (iolocked) {
  839. mrunlock(&ip->i_iolock);
  840. }
  841. return 0;
  842. }
  843. } else if (lock_flags & XFS_ILOCK_SHARED) {
  844. ilocked = mrtryaccess(&ip->i_lock);
  845. if (!ilocked) {
  846. if (iolocked) {
  847. mrunlock(&ip->i_iolock);
  848. }
  849. return 0;
  850. }
  851. }
  852. xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
  853. return 1;
  854. }
  855. /*
  856. * xfs_iunlock() is used to drop the inode locks acquired with
  857. * xfs_ilock() and xfs_ilock_nowait(). The caller must pass
  858. * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
  859. * that we know which locks to drop.
  860. *
  861. * ip -- the inode being unlocked
  862. * lock_flags -- this parameter indicates the inode's locks to be
  863. * to be unlocked. See the comment for xfs_ilock() for a list
  864. * of valid values for this parameter.
  865. *
  866. */
  867. void
  868. xfs_iunlock(xfs_inode_t *ip,
  869. uint lock_flags)
  870. {
  871. /*
  872. * You can't set both SHARED and EXCL for the same lock,
  873. * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
  874. * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
  875. */
  876. ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
  877. (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
  878. ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
  879. (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
  880. ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY)) == 0);
  881. ASSERT(lock_flags != 0);
  882. if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
  883. ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
  884. (ismrlocked(&ip->i_iolock, MR_ACCESS)));
  885. ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
  886. (ismrlocked(&ip->i_iolock, MR_UPDATE)));
  887. mrunlock(&ip->i_iolock);
  888. }
  889. if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
  890. ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
  891. (ismrlocked(&ip->i_lock, MR_ACCESS)));
  892. ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
  893. (ismrlocked(&ip->i_lock, MR_UPDATE)));
  894. mrunlock(&ip->i_lock);
  895. /*
  896. * Let the AIL know that this item has been unlocked in case
  897. * it is in the AIL and anyone is waiting on it. Don't do
  898. * this if the caller has asked us not to.
  899. */
  900. if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
  901. ip->i_itemp != NULL) {
  902. xfs_trans_unlocked_item(ip->i_mount,
  903. (xfs_log_item_t*)(ip->i_itemp));
  904. }
  905. }
  906. xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
  907. }
  908. /*
  909. * give up write locks. the i/o lock cannot be held nested
  910. * if it is being demoted.
  911. */
  912. void
  913. xfs_ilock_demote(xfs_inode_t *ip,
  914. uint lock_flags)
  915. {
  916. ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
  917. ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
  918. if (lock_flags & XFS_ILOCK_EXCL) {
  919. ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
  920. mrdemote(&ip->i_lock);
  921. }
  922. if (lock_flags & XFS_IOLOCK_EXCL) {
  923. ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
  924. mrdemote(&ip->i_iolock);
  925. }
  926. }
  927. /*
  928. * The following three routines simply manage the i_flock
  929. * semaphore embedded in the inode. This semaphore synchronizes
  930. * processes attempting to flush the in-core inode back to disk.
  931. */
  932. void
  933. xfs_iflock(xfs_inode_t *ip)
  934. {
  935. psema(&(ip->i_flock), PINOD|PLTWAIT);
  936. }
  937. int
  938. xfs_iflock_nowait(xfs_inode_t *ip)
  939. {
  940. return (cpsema(&(ip->i_flock)));
  941. }
  942. void
  943. xfs_ifunlock(xfs_inode_t *ip)
  944. {
  945. ASSERT(valusema(&(ip->i_flock)) <= 0);
  946. vsema(&(ip->i_flock));
  947. }