Browse Source

Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: (62 commits)
  [XFS] add __init/__exit mark to specific init/cleanup functions
  [XFS] Fix oops in xfs_file_readdir()
  [XFS] kill xfs_root
  [XFS] keep i_nlink updated and use proper accessors
  [XFS] stop updating inode->i_blocks
  [XFS] Make xfs_ail_check check less by default
  [XFS] Move AIL pushing into it's own thread
  [XFS] use generic_permission
  [XFS] stop re-checking permissions in xfs_swapext
  [XFS] clean up xfs_swapext
  [XFS] remove permission check from xfs_change_file_space
  [XFS] prevent panic during log recovery due to bogus op_hdr length
  [XFS] Cleanup various fid related bits:
  [XFS] Fix xfs_lowbit64
  [XFS] Remove CFORK macros and use code directly in IFORK and DFORK macros.
  [XFS] kill superflous buffer locking (2nd attempt)
  [XFS] Use kernel-supplied "roundup_pow_of_two" for simplicity
  [XFS] Remove the BPCSHIFT and NB* based macros from XFS.
  [XFS] Remove bogus assert
  [XFS] optimize XFS_IS_REALTIME_INODE w/o realtime config
  ...
Linus Torvalds 17 years ago
parent
commit
0b61a2ba5d
85 changed files with 2250 additions and 3124 deletions
  1. 0 1
      fs/xfs/Makefile-linux-2.6
  2. 0 45
      fs/xfs/linux-2.6/spin.h
  3. 24 19
      fs/xfs/linux-2.6/xfs_aops.c
  4. 6 51
      fs/xfs/linux-2.6/xfs_buf.c
  5. 0 1
      fs/xfs/linux-2.6/xfs_buf.h
  6. 17 8
      fs/xfs/linux-2.6/xfs_export.c
  7. 1 2
      fs/xfs/linux-2.6/xfs_file.c
  8. 2 1
      fs/xfs/linux-2.6/xfs_globals.c
  9. 31 55
      fs/xfs/linux-2.6/xfs_ioctl.c
  10. 6 3
      fs/xfs/linux-2.6/xfs_ioctl32.c
  11. 136 34
      fs/xfs/linux-2.6/xfs_iops.c
  12. 3 31
      fs/xfs/linux-2.6/xfs_linux.h
  13. 46 76
      fs/xfs/linux-2.6/xfs_lrw.c
  14. 7 9
      fs/xfs/linux-2.6/xfs_lrw.h
  15. 549 23
      fs/xfs/linux-2.6/xfs_super.c
  16. 44 73
      fs/xfs/linux-2.6/xfs_vnode.c
  17. 32 30
      fs/xfs/linux-2.6/xfs_vnode.h
  18. 5 7
      fs/xfs/quota/xfs_dquot.c
  19. 0 5
      fs/xfs/quota/xfs_dquot.h
  20. 11 16
      fs/xfs/quota/xfs_dquot_item.c
  21. 6 8
      fs/xfs/quota/xfs_qm.c
  22. 3 3
      fs/xfs/quota/xfs_qm.h
  23. 8 11
      fs/xfs/quota/xfs_qm_syscalls.c
  24. 6 1
      fs/xfs/support/debug.c
  25. 2 6
      fs/xfs/support/ktrace.c
  26. 0 3
      fs/xfs/support/ktrace.h
  27. 1 1
      fs/xfs/support/uuid.c
  28. 1 1
      fs/xfs/xfs.h
  29. 2 28
      fs/xfs/xfs_acl.c
  30. 0 2
      fs/xfs/xfs_acl.h
  31. 1 1
      fs/xfs/xfs_ag.h
  32. 8 11
      fs/xfs/xfs_alloc.c
  33. 1 1
      fs/xfs/xfs_attr.c
  34. 3 5
      fs/xfs/xfs_attr_leaf.c
  35. 0 103
      fs/xfs/xfs_bit.c
  36. 22 5
      fs/xfs/xfs_bit.h
  37. 10 12
      fs/xfs/xfs_bmap.c
  38. 2 0
      fs/xfs/xfs_bmap.h
  39. 1 2
      fs/xfs/xfs_bmap_btree.c
  40. 2 0
      fs/xfs/xfs_btree.h
  41. 4 6
      fs/xfs/xfs_buf_item.c
  42. 2 0
      fs/xfs/xfs_buf_item.h
  43. 5 8
      fs/xfs/xfs_da_btree.c
  44. 1 0
      fs/xfs/xfs_da_btree.h
  45. 35 49
      fs/xfs/xfs_dfrag.c
  46. 27 55
      fs/xfs/xfs_dinode.h
  47. 2 1
      fs/xfs/xfs_dir2.c
  48. 0 31
      fs/xfs/xfs_error.c
  49. 2 0
      fs/xfs/xfs_error.h
  50. 9 12
      fs/xfs/xfs_extfree_item.c
  51. 1 1
      fs/xfs/xfs_filestream.c
  52. 7 3
      fs/xfs/xfs_fs.h
  53. 5 8
      fs/xfs/xfs_fsops.c
  54. 0 2
      fs/xfs/xfs_ialloc_btree.h
  55. 73 112
      fs/xfs/xfs_iget.c
  56. 54 171
      fs/xfs/xfs_inode.c
  57. 40 58
      fs/xfs/xfs_inode.h
  58. 14 12
      fs/xfs/xfs_inode_item.c
  59. 0 119
      fs/xfs/xfs_iocore.c
  60. 108 104
      fs/xfs/xfs_iomap.c
  61. 1 4
      fs/xfs/xfs_iomap.h
  62. 6 6
      fs/xfs/xfs_itable.c
  63. 147 156
      fs/xfs/xfs_log.c
  64. 2 1
      fs/xfs/xfs_log.h
  65. 38 58
      fs/xfs/xfs_log_priv.h
  66. 92 105
      fs/xfs/xfs_log_recover.c
  67. 194 150
      fs/xfs/xfs_mount.c
  68. 11 116
      fs/xfs/xfs_mount.h
  69. 32 22
      fs/xfs/xfs_mru_cache.c
  70. 3 4
      fs/xfs/xfs_qmops.c
  71. 5 4
      fs/xfs/xfs_rename.c
  72. 7 12
      fs/xfs/xfs_rtalloc.c
  73. 0 2
      fs/xfs/xfs_rtalloc.h
  74. 2 10
      fs/xfs/xfs_rw.h
  75. 3 4
      fs/xfs/xfs_trans.c
  76. 5 2
      fs/xfs/xfs_trans.h
  77. 220 120
      fs/xfs/xfs_trans_ail.c
  78. 1 0
      fs/xfs/xfs_trans_item.c
  79. 10 3
      fs/xfs/xfs_trans_priv.h
  80. 6 5
      fs/xfs/xfs_utils.c
  81. 0 2
      fs/xfs/xfs_utils.h
  82. 29 764
      fs/xfs/xfs_vfsops.c
  83. 1 8
      fs/xfs/xfs_vfsops.h
  84. 47 118
      fs/xfs/xfs_vnodeops.c
  85. 0 2
      fs/xfs/xfs_vnodeops.h

+ 0 - 1
fs/xfs/Makefile-linux-2.6

@@ -70,7 +70,6 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_iget.o \
 				   xfs_inode.o \
 				   xfs_inode_item.o \
-				   xfs_iocore.o \
 				   xfs_iomap.o \
 				   xfs_itable.o \
 				   xfs_dfrag.o \

+ 0 - 45
fs/xfs/linux-2.6/spin.h

@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_SPIN_H__
-#define __XFS_SUPPORT_SPIN_H__
-
-#include <linux/sched.h>	/* preempt needs this */
-#include <linux/spinlock.h>
-
-/*
- * Map lock_t from IRIX to Linux spinlocks.
- *
- * We do not make use of lock_t from interrupt context, so we do not
- * have to worry about disabling interrupts at all (unlike IRIX).
- */
-
-typedef spinlock_t lock_t;
-
-#define SPLDECL(s)			unsigned long s
-#ifndef DEFINE_SPINLOCK
-#define DEFINE_SPINLOCK(s)		spinlock_t s = SPIN_LOCK_UNLOCKED
-#endif
-
-#define spinlock_init(lock, name)	spin_lock_init(lock)
-#define	spinlock_destroy(lock)
-#define mutex_spinlock(lock)		({ spin_lock(lock); 0; })
-#define mutex_spinunlock(lock, s)	do { spin_unlock(lock); (void)s; } while (0)
-#define nested_spinlock(lock)		spin_lock(lock)
-#define nested_spinunlock(lock)		spin_unlock(lock)
-
-#endif /* __XFS_SUPPORT_SPIN_H__ */

+ 24 - 19
fs/xfs/linux-2.6/xfs_aops.c

@@ -107,6 +107,18 @@ xfs_page_trace(
 #define xfs_page_trace(tag, inode, page, pgoff)
 #endif
 
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	if (XFS_IS_REALTIME_INODE(ip))
+		return mp->m_rtdev_targp->bt_bdev;
+	else
+		return mp->m_ddev_targp->bt_bdev;
+}
+
 /*
  * Schedule IO completion handling on a xfsdatad if this was
  * the final hold on this ioend. If we are asked to wait,
@@ -151,7 +163,7 @@ xfs_destroy_ioend(
 /*
  * Update on-disk file size now that data has been written to disk.
  * The current in-memory file size is i_size.  If a write is beyond
- * eof io_new_size will be the intended file size until i_size is
+ * eof i_new_size will be the intended file size until i_size is
  * updated.  If this write does not extend all the way to the valid
  * file size then restrict this update to the end of the write.
  */
@@ -173,7 +185,7 @@ xfs_setfilesize(
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 
-	isize = MAX(ip->i_size, ip->i_iocore.io_new_size);
+	isize = MAX(ip->i_size, ip->i_new_size);
 	isize = MIN(isize, bsize);
 
 	if (ip->i_d.di_size < isize) {
@@ -226,12 +238,13 @@ xfs_end_bio_unwritten(
 {
 	xfs_ioend_t		*ioend =
 		container_of(work, xfs_ioend_t, io_work);
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
 	xfs_off_t		offset = ioend->io_offset;
 	size_t			size = ioend->io_size;
 
 	if (likely(!ioend->io_error)) {
-		xfs_bmap(XFS_I(ioend->io_inode), offset, size,
-				BMAPI_UNWRITTEN, NULL, NULL);
+		if (!XFS_FORCED_SHUTDOWN(ip->i_mount))
+			xfs_iomap_write_unwritten(ip, offset, size);
 		xfs_setfilesize(ioend);
 	}
 	xfs_destroy_ioend(ioend);
@@ -304,7 +317,7 @@ xfs_map_blocks(
 	xfs_inode_t		*ip = XFS_I(inode);
 	int			error, nmaps = 1;
 
-	error = xfs_bmap(ip, offset, count,
+	error = xfs_iomap(ip, offset, count,
 				flags, mapp, &nmaps);
 	if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
 		xfs_iflags_set(ip, XFS_IMODIFIED);
@@ -1323,7 +1336,7 @@ __xfs_get_blocks(
 	offset = (xfs_off_t)iblock << inode->i_blkbits;
 	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
 	size = bh_result->b_size;
-	error = xfs_bmap(XFS_I(inode), offset, size,
+	error = xfs_iomap(XFS_I(inode), offset, size,
 			     create ? flags : BMAPI_READ, &iomap, &niomap);
 	if (error)
 		return -error;
@@ -1471,28 +1484,21 @@ xfs_vm_direct_IO(
 {
 	struct file	*file = iocb->ki_filp;
 	struct inode	*inode = file->f_mapping->host;
-	xfs_iomap_t	iomap;
-	int		maps = 1;
-	int		error;
+	struct block_device *bdev;
 	ssize_t		ret;
 
-	error = xfs_bmap(XFS_I(inode), offset, 0,
-				BMAPI_DEVICE, &iomap, &maps);
-	if (error)
-		return -error;
+	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
 
 	if (rw == WRITE) {
 		iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
 		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-			iomap.iomap_target->bt_bdev,
-			iov, offset, nr_segs,
+			bdev, iov, offset, nr_segs,
 			xfs_get_blocks_direct,
 			xfs_end_io_direct);
 	} else {
 		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
 		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-			iomap.iomap_target->bt_bdev,
-			iov, offset, nr_segs,
+			bdev, iov, offset, nr_segs,
 			xfs_get_blocks_direct,
 			xfs_end_io_direct);
 	}
@@ -1525,8 +1531,7 @@ xfs_vm_bmap(
 	struct inode		*inode = (struct inode *)mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 
-	vn_trace_entry(XFS_I(inode), __FUNCTION__,
-			(inst_t *)__return_address);
+	xfs_itrace_entry(XFS_I(inode));
 	xfs_rwlock(ip, VRWLOCK_READ);
 	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
 	xfs_rwunlock(ip, VRWLOCK_READ);

+ 6 - 51
fs/xfs/linux-2.6/xfs_buf.c

@@ -387,8 +387,6 @@ _xfs_buf_lookup_pages(
 		if (unlikely(page == NULL)) {
 			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
-				for (i = 0; i < bp->b_page_count; i++)
-					unlock_page(bp->b_pages[i]);
 				return -ENOMEM;
 			}
 
@@ -418,24 +416,17 @@ _xfs_buf_lookup_pages(
 		ASSERT(!PagePrivate(page));
 		if (!PageUptodate(page)) {
 			page_count--;
-			if (blocksize >= PAGE_CACHE_SIZE) {
-				if (flags & XBF_READ)
-					bp->b_locked = 1;
-			} else if (!PagePrivate(page)) {
+			if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
 				if (test_page_region(page, offset, nbytes))
 					page_count++;
 			}
 		}
 
+		unlock_page(page);
 		bp->b_pages[i] = page;
 		offset = 0;
 	}
 
-	if (!bp->b_locked) {
-		for (i = 0; i < bp->b_page_count; i++)
-			unlock_page(bp->b_pages[i]);
-	}
-
 	if (page_count == bp->b_page_count)
 		bp->b_flags |= XBF_DONE;
 
@@ -751,7 +742,6 @@ xfs_buf_associate_memory(
 		bp->b_pages[i] = mem_to_page((void *)pageaddr);
 		pageaddr += PAGE_CACHE_SIZE;
 	}
-	bp->b_locked = 0;
 
 	bp->b_count_desired = len;
 	bp->b_buffer_length = buflen;
@@ -1098,25 +1088,13 @@ xfs_buf_iostart(
 	return status;
 }
 
-STATIC_INLINE int
-_xfs_buf_iolocked(
-	xfs_buf_t		*bp)
-{
-	ASSERT(bp->b_flags & (XBF_READ | XBF_WRITE));
-	if (bp->b_flags & XBF_READ)
-		return bp->b_locked;
-	return 0;
-}
-
 STATIC_INLINE void
 _xfs_buf_ioend(
 	xfs_buf_t		*bp,
 	int			schedule)
 {
-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
-		bp->b_locked = 0;
+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
 		xfs_buf_ioend(bp, schedule);
-	}
 }
 
 STATIC void
@@ -1147,10 +1125,6 @@ xfs_buf_bio_end_io(
 
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
-
-		if (_xfs_buf_iolocked(bp)) {
-			unlock_page(page);
-		}
 	} while (bvec >= bio->bi_io_vec);
 
 	_xfs_buf_ioend(bp, 1);
@@ -1161,13 +1135,12 @@ STATIC void
 _xfs_buf_ioapply(
 	xfs_buf_t		*bp)
 {
-	int			i, rw, map_i, total_nr_pages, nr_pages;
+	int			rw, map_i, total_nr_pages, nr_pages;
 	struct bio		*bio;
 	int			offset = bp->b_offset;
 	int			size = bp->b_count_desired;
 	sector_t		sector = bp->b_bn;
 	unsigned int		blocksize = bp->b_target->bt_bsize;
-	int			locking = _xfs_buf_iolocked(bp);
 
 	total_nr_pages = bp->b_page_count;
 	map_i = 0;
@@ -1190,7 +1163,7 @@ _xfs_buf_ioapply(
 	 * filesystem block size is not smaller than the page size.
 	 */
 	if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-	    (bp->b_flags & XBF_READ) && locking &&
+	    (bp->b_flags & XBF_READ) &&
 	    (blocksize >= PAGE_CACHE_SIZE)) {
 		bio = bio_alloc(GFP_NOIO, 1);
 
@@ -1207,24 +1180,6 @@ _xfs_buf_ioapply(
 		goto submit_io;
 	}
 
-	/* Lock down the pages which we need to for the request */
-	if (locking && (bp->b_flags & XBF_WRITE) && (bp->b_locked == 0)) {
-		for (i = 0; size; i++) {
-			int		nbytes = PAGE_CACHE_SIZE - offset;
-			struct page	*page = bp->b_pages[i];
-
-			if (nbytes > size)
-				nbytes = size;
-
-			lock_page(page);
-
-			size -= nbytes;
-			offset = 0;
-		}
-		offset = bp->b_offset;
-		size = bp->b_count_desired;
-	}
-
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
 	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1571,7 +1526,7 @@ xfs_alloc_delwrite_queue(
 
 	INIT_LIST_HEAD(&btp->bt_list);
 	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-	spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
+	spin_lock_init(&btp->bt_delwrite_lock);
 	btp->bt_flags = 0;
 	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
 	if (IS_ERR(btp->bt_task)) {

+ 0 - 1
fs/xfs/linux-2.6/xfs_buf.h

@@ -143,7 +143,6 @@ typedef struct xfs_buf {
 	void			*b_fspriv2;
 	void			*b_fspriv3;
 	unsigned short		b_error;	/* error code on I/O */
-	unsigned short		b_locked;	/* page array is locked */
 	unsigned int		b_page_count;	/* size of page array */
 	unsigned int		b_offset;	/* page offset in first page */
 	struct page		**b_pages;	/* array of page pointers */

+ 17 - 8
fs/xfs/linux-2.6/xfs_export.c

@@ -118,20 +118,29 @@ xfs_nfs_get_inode(
 	u64			ino,
 	u32			generation)
  {
-	xfs_fid_t		xfid;
-	bhv_vnode_t		*vp;
+ 	xfs_mount_t		*mp = XFS_M(sb);
+	xfs_inode_t		*ip;
 	int			error;
 
-	xfid.fid_len = sizeof(xfs_fid_t) - sizeof(xfid.fid_len);
-	xfid.fid_pad = 0;
-	xfid.fid_ino = ino;
-	xfid.fid_gen = generation;
+	/*
+	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
+	 */
+	if (ino == 0)
+		return ERR_PTR(-ESTALE);
 
-	error = xfs_vget(XFS_M(sb), &vp, &xfid);
+	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
 	if (error)
 		return ERR_PTR(-error);
+	if (!ip)
+		return ERR_PTR(-EIO);
+
+	if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) {
+		xfs_iput_new(ip, XFS_ILOCK_SHARED);
+		return ERR_PTR(-ENOENT);
+	}
 
-	return vp ? vn_to_inode(vp) : NULL;
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return ip->i_vnode;
 }
 
 STATIC struct dentry *

+ 1 - 2
fs/xfs/linux-2.6/xfs_file.c

@@ -350,8 +350,8 @@ xfs_file_readdir(
 
 		size = buf.used;
 		de = (struct hack_dirent *)buf.dirent;
-		curr_offset = de->offset /* & 0x7fffffff */;
 		while (size > 0) {
+			curr_offset = de->offset /* & 0x7fffffff */;
 			if (filldir(dirent, de->name, de->namlen,
 					curr_offset & 0x7fffffff,
 					de->ino, de->d_type)) {
@@ -362,7 +362,6 @@ xfs_file_readdir(
 				       sizeof(u64));
 			size -= reclen;
 			de = (struct hack_dirent *)((char *)de + reclen);
-			curr_offset = de->offset /* & 0x7fffffff */;
 		}
 	}
 

+ 2 - 1
fs/xfs/linux-2.6/xfs_globals.c

@@ -47,5 +47,6 @@ xfs_param_t xfs_params = {
 /*
  * Global system credential structure.
  */
-cred_t sys_cred_val, *sys_cred = &sys_cred_val;
+static cred_t sys_cred_val;
+cred_t *sys_cred = &sys_cred_val;
 

+ 31 - 55
fs/xfs/linux-2.6/xfs_ioctl.c

@@ -75,7 +75,6 @@ xfs_find_handle(
 	xfs_handle_t		handle;
 	xfs_fsop_handlereq_t	hreq;
 	struct inode		*inode;
-	bhv_vnode_t		*vp;
 
 	if (copy_from_user(&hreq, arg, sizeof(hreq)))
 		return -XFS_ERROR(EFAULT);
@@ -134,21 +133,16 @@ xfs_find_handle(
 		return -XFS_ERROR(EBADF);
 	}
 
-	/* we need the vnode */
-	vp = vn_from_inode(inode);
-
 	/* now we can grab the fsid */
 	memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid,
 			sizeof(xfs_fsid_t));
 	hsize = sizeof(xfs_fsid_t);
 
 	if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
-		xfs_inode_t	*ip;
+		xfs_inode_t	*ip = XFS_I(inode);
 		int		lock_mode;
 
 		/* need to get access to the xfs_inode to read the generation */
-		ip = xfs_vtoi(vp);
-		ASSERT(ip);
 		lock_mode = xfs_ilock_map_shared(ip);
 
 		/* fill in fid section of handle from inode */
@@ -176,21 +170,19 @@ xfs_find_handle(
 
 
 /*
- * Convert userspace handle data into vnode (and inode).
- * We [ab]use the fact that all the fsop_handlereq ioctl calls
- * have a data structure argument whose first component is always
- * a xfs_fsop_handlereq_t, so we can cast to and from this type.
- * This allows us to optimise the copy_from_user calls and gives
- * a handy, shared routine.
+ * Convert userspace handle data into inode.
+ *
+ * We use the fact that all the fsop_handlereq ioctl calls have a data
+ * structure argument whose first component is always a xfs_fsop_handlereq_t,
+ * so we can pass that sub structure into this handy, shared routine.
  *
- * If no error, caller must always VN_RELE the returned vp.
+ * If no error, caller must always iput the returned inode.
  */
 STATIC int
 xfs_vget_fsop_handlereq(
 	xfs_mount_t		*mp,
 	struct inode		*parinode,	/* parent inode pointer    */
 	xfs_fsop_handlereq_t	*hreq,
-	bhv_vnode_t		**vp,
 	struct inode		**inode)
 {
 	void			__user *hanp;
@@ -199,8 +191,6 @@ xfs_vget_fsop_handlereq(
 	xfs_handle_t		*handlep;
 	xfs_handle_t		handle;
 	xfs_inode_t		*ip;
-	struct inode		*inodep;
-	bhv_vnode_t		*vpp;
 	xfs_ino_t		ino;
 	__u32			igen;
 	int			error;
@@ -241,7 +231,7 @@ xfs_vget_fsop_handlereq(
 	}
 
 	/*
-	 * Get the XFS inode, building a vnode to go with it.
+	 * Get the XFS inode, building a Linux inode to go with it.
 	 */
 	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
 	if (error)
@@ -253,12 +243,9 @@ xfs_vget_fsop_handlereq(
 		return XFS_ERROR(ENOENT);
 	}
 
-	vpp = XFS_ITOV(ip);
-	inodep = vn_to_inode(vpp);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-	*vp = vpp;
-	*inode = inodep;
+	*inode = XFS_ITOV(ip);
 	return 0;
 }
 
@@ -275,7 +262,6 @@ xfs_open_by_handle(
 	struct file		*filp;
 	struct inode		*inode;
 	struct dentry		*dentry;
-	bhv_vnode_t		*vp;
 	xfs_fsop_handlereq_t	hreq;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -283,7 +269,7 @@ xfs_open_by_handle(
 	if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
-	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
 	if (error)
 		return -error;
 
@@ -385,7 +371,6 @@ xfs_readlink_by_handle(
 {
 	struct inode		*inode;
 	xfs_fsop_handlereq_t	hreq;
-	bhv_vnode_t		*vp;
 	__u32			olen;
 	void			*link;
 	int			error;
@@ -395,7 +380,7 @@ xfs_readlink_by_handle(
 	if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
-	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+	error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode);
 	if (error)
 		return -error;
 
@@ -438,34 +423,32 @@ xfs_fssetdm_by_handle(
 	struct fsdmidata	fsd;
 	xfs_fsop_setdm_handlereq_t dmhreq;
 	struct inode		*inode;
-	bhv_vnode_t		*vp;
 
 	if (!capable(CAP_MKNOD))
 		return -XFS_ERROR(EPERM);
 	if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
-	error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
+	error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &inode);
 	if (error)
 		return -error;
 
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
-		VN_RELE(vp);
-		return -XFS_ERROR(EPERM);
+		error = -XFS_ERROR(EPERM);
+		goto out;
 	}
 
 	if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
-		VN_RELE(vp);
-		return -XFS_ERROR(EFAULT);
+		error = -XFS_ERROR(EFAULT);
+		goto out;
 	}
 
-	error = xfs_set_dmattrs(xfs_vtoi(vp),
-			fsd.fsd_dmevmask, fsd.fsd_dmstate);
+	error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask,
+				 fsd.fsd_dmstate);
 
-	VN_RELE(vp);
-	if (error)
-		return -error;
-	return 0;
+ out:
+	iput(inode);
+	return error;
 }
 
 STATIC int
@@ -478,7 +461,6 @@ xfs_attrlist_by_handle(
 	attrlist_cursor_kern_t	*cursor;
 	xfs_fsop_attrlist_handlereq_t al_hreq;
 	struct inode		*inode;
-	bhv_vnode_t		*vp;
 	char			*kbuf;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -488,8 +470,7 @@ xfs_attrlist_by_handle(
 	if (al_hreq.buflen > XATTR_LIST_MAX)
 		return -XFS_ERROR(EINVAL);
 
-	error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
-			&vp, &inode);
+	error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
 	if (error)
 		goto out;
 
@@ -509,7 +490,7 @@ xfs_attrlist_by_handle(
  out_kfree:
 	kfree(kbuf);
  out_vn_rele:
-	VN_RELE(vp);
+	iput(inode);
  out:
 	return -error;
 }
@@ -531,7 +512,7 @@ xfs_attrmulti_attr_get(
 	if (!kbuf)
 		return ENOMEM;
 
-	error = xfs_attr_get(XFS_I(inode), name, kbuf, len, flags, NULL);
+	error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL);
 	if (error)
 		goto out_kfree;
 
@@ -598,7 +579,6 @@ xfs_attrmulti_by_handle(
 	xfs_attr_multiop_t	*ops;
 	xfs_fsop_attrmulti_handlereq_t am_hreq;
 	struct inode		*inode;
-	bhv_vnode_t		*vp;
 	unsigned int		i, size;
 	char			*attr_name;
 
@@ -607,7 +587,7 @@ xfs_attrmulti_by_handle(
 	if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
 		return -XFS_ERROR(EFAULT);
 
-	error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
+	error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &inode);
 	if (error)
 		goto out;
 
@@ -666,7 +646,7 @@ xfs_attrmulti_by_handle(
  out_kfree_ops:
 	kfree(ops);
  out_vn_rele:
-	VN_RELE(vp);
+	iput(inode);
  out:
 	return -error;
 }
@@ -702,7 +682,6 @@ xfs_ioc_fsgeometry(
 
 STATIC int
 xfs_ioc_xattr(
-	bhv_vnode_t		*vp,
 	xfs_inode_t		*ip,
 	struct file		*filp,
 	unsigned int		cmd,
@@ -735,12 +714,10 @@ xfs_ioctl(
 	void			__user *arg)
 {
 	struct inode		*inode = filp->f_path.dentry->d_inode;
-	bhv_vnode_t		*vp = vn_from_inode(inode);
 	xfs_mount_t		*mp = ip->i_mount;
 	int			error;
 
-	vn_trace_entry(XFS_I(inode), "xfs_ioctl", (inst_t *)__return_address);
-
+	xfs_itrace_entry(XFS_I(inode));
 	switch (cmd) {
 
 	case XFS_IOC_ALLOCSP:
@@ -764,7 +741,7 @@ xfs_ioctl(
 	case XFS_IOC_DIOINFO: {
 		struct dioattr	da;
 		xfs_buftarg_t	*target =
-			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+			XFS_IS_REALTIME_INODE(ip) ?
 			mp->m_rtdev_targp : mp->m_ddev_targp;
 
 		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
@@ -796,7 +773,7 @@ xfs_ioctl(
 	case XFS_IOC_GETXFLAGS:
 	case XFS_IOC_SETXFLAGS:
 	case XFS_IOC_FSSETXATTR:
-		return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
+		return xfs_ioc_xattr(ip, filp, cmd, arg);
 
 	case XFS_IOC_FSSETDM: {
 		struct fsdmidata	dmi;
@@ -1203,7 +1180,6 @@ xfs_ioc_fsgetxattr(
 
 STATIC int
 xfs_ioc_xattr(
-	bhv_vnode_t		*vp,
 	xfs_inode_t		*ip,
 	struct file		*filp,
 	unsigned int		cmd,
@@ -1237,7 +1213,7 @@ xfs_ioc_xattr(
 
 		error = xfs_setattr(ip, vattr, attr_flags, NULL);
 		if (likely(!error))
-			__vn_revalidate(vp, vattr);	/* update flags */
+			vn_revalidate(XFS_ITOV(ip));	/* update flags */
 		error = -error;
 		break;
 	}
@@ -1272,7 +1248,7 @@ xfs_ioc_xattr(
 
 		error = xfs_setattr(ip, vattr, attr_flags, NULL);
 		if (likely(!error))
-			__vn_revalidate(vp, vattr);	/* update flags */
+			vn_revalidate(XFS_ITOV(ip));	/* update flags */
 		error = -error;
 		break;
 	}

+ 6 - 3
fs/xfs/linux-2.6/xfs_ioctl32.c

@@ -44,6 +44,7 @@
 #include "xfs_error.h"
 #include "xfs_dfrag.h"
 #include "xfs_vnodeops.h"
+#include "xfs_ioctl32.h"
 
 #define  _NATIVE_IOC(cmd, type) \
 	  _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -379,9 +380,6 @@ xfs_compat_ioctl(
 	switch (cmd) {
 	case XFS_IOC_DIOINFO:
 	case XFS_IOC_FSGEOMETRY:
-	case XFS_IOC_GETVERSION:
-	case XFS_IOC_GETXFLAGS:
-	case XFS_IOC_SETXFLAGS:
 	case XFS_IOC_FSGETXATTR:
 	case XFS_IOC_FSSETXATTR:
 	case XFS_IOC_FSGETXATTRA:
@@ -407,6 +405,11 @@ xfs_compat_ioctl(
 	case XFS_IOC_ERROR_CLEARALL:
 		break;
 
+	case XFS_IOC32_GETXFLAGS:
+	case XFS_IOC32_SETXFLAGS:
+	case XFS_IOC32_GETVERSION:
+		cmd = _NATIVE_IOC(cmd, long);
+		break;
 #ifdef BROKEN_X86_ALIGNMENT
 	/* xfs_flock_t has wrong u32 vs u64 alignment */
 	case XFS_IOC_ALLOCSP_32:

+ 136 - 34
fs/xfs/linux-2.6/xfs_iops.c

@@ -52,6 +52,7 @@
 #include <linux/xattr.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/falloc.h>
 
 /*
  * Bring the atime in the XFS inode uptodate.
@@ -70,6 +71,22 @@ xfs_synchronize_atime(
 	}
 }
 
+/*
+ * If the linux inode exists, mark it dirty.
+ * Used when commiting a dirty inode into a transaction so that
+ * the inode will get written back by the linux code
+ */
+void
+xfs_mark_inode_dirty_sync(
+	xfs_inode_t	*ip)
+{
+	bhv_vnode_t	*vp;
+
+	vp = XFS_ITOV_NULL(ip);
+	if (vp)
+		mark_inode_dirty_sync(vn_to_inode(vp));
+}
+
 /*
  * Change the requested timestamp in the given inode.
  * We don't lock across timestamp updates, and we don't log them but
@@ -184,10 +201,6 @@ xfs_validate_fields(
 	struct xfs_inode	*ip = XFS_I(inode);
 	loff_t size;
 
-	inode->i_nlink = ip->i_d.di_nlink;
-	inode->i_blocks =
-		XFS_FSB_TO_BB(ip->i_mount, ip->i_d.di_nblocks +
-					   ip->i_delayed_blks);
 	/* we're under i_sem so i_size can't change under us */
 	size = XFS_ISIZE(ip);
 	if (i_size_read(inode) != size)
@@ -541,13 +554,32 @@ xfs_vn_put_link(
 }
 
 #ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+xfs_check_acl(
+	struct inode		*inode,
+	int			mask)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	int			error;
+
+	xfs_itrace_entry(ip);
+
+	if (XFS_IFORK_Q(ip)) {
+		error = xfs_acl_iaccess(ip, mask, NULL);
+		if (error != -1)
+			return -error;
+	}
+
+	return -EAGAIN;
+}
+
 STATIC int
 xfs_vn_permission(
-	struct inode	*inode,
-	int		mode,
-	struct nameidata *nd)
+	struct inode		*inode,
+	int			mask,
+	struct nameidata	*nd)
 {
-	return -xfs_access(XFS_I(inode), mode << 6, NULL);
+	return generic_permission(inode, mask, xfs_check_acl);
 }
 #else
 #define xfs_vn_permission NULL
@@ -555,33 +587,61 @@ xfs_vn_permission(
 
 STATIC int
 xfs_vn_getattr(
-	struct vfsmount	*mnt,
-	struct dentry	*dentry,
-	struct kstat	*stat)
+	struct vfsmount		*mnt,
+	struct dentry		*dentry,
+	struct kstat		*stat)
 {
-	struct inode	*inode = dentry->d_inode;
-	bhv_vattr_t	vattr = { .va_mask = XFS_AT_STAT };
-	int		error;
-
-	error = xfs_getattr(XFS_I(inode), &vattr, ATTR_LAZY);
-	if (likely(!error)) {
-		stat->size = i_size_read(inode);
-		stat->dev = inode->i_sb->s_dev;
-		stat->rdev = (vattr.va_rdev == 0) ? 0 :
-				MKDEV(sysv_major(vattr.va_rdev) & 0x1ff,
-				      sysv_minor(vattr.va_rdev));
-		stat->mode = vattr.va_mode;
-		stat->nlink = vattr.va_nlink;
-		stat->uid = vattr.va_uid;
-		stat->gid = vattr.va_gid;
-		stat->ino = vattr.va_nodeid;
-		stat->atime = vattr.va_atime;
-		stat->mtime = vattr.va_mtime;
-		stat->ctime = vattr.va_ctime;
-		stat->blocks = vattr.va_nblocks;
-		stat->blksize = vattr.va_blocksize;
+	struct inode		*inode = dentry->d_inode;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+
+	xfs_itrace_entry(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	stat->size = XFS_ISIZE(ip);
+	stat->dev = inode->i_sb->s_dev;
+	stat->mode = ip->i_d.di_mode;
+	stat->nlink = ip->i_d.di_nlink;
+	stat->uid = ip->i_d.di_uid;
+	stat->gid = ip->i_d.di_gid;
+	stat->ino = ip->i_ino;
+#if XFS_BIG_INUMS
+	stat->ino += mp->m_inoadd;
+#endif
+	stat->atime = inode->i_atime;
+	stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+	stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+	stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+	stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+	stat->blocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		stat->blksize = BLKDEV_IOSIZE;
+		stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+				   sysv_minor(ip->i_df.if_u2.if_rdev));
+		break;
+	default:
+		if (XFS_IS_REALTIME_INODE(ip)) {
+			/*
+			 * If the file blocks are being allocated from a
+			 * realtime volume, then return the inode's realtime
+			 * extent size or the realtime volume's extent size.
+			 */
+			stat->blksize =
+				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+		} else
+			stat->blksize = xfs_preferred_iosize(mp);
+		stat->rdev = 0;
+		break;
 	}
-	return -error;
+
+	return 0;
 }
 
 STATIC int
@@ -636,7 +696,7 @@ xfs_vn_setattr(
 
 	error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
 	if (likely(!error))
-		__vn_revalidate(vn_from_inode(inode), &vattr);
+		vn_revalidate(vn_from_inode(inode));
 	return -error;
 }
 
@@ -750,6 +810,47 @@ xfs_vn_removexattr(
 	return namesp->attr_remove(vp, attr, xflags);
 }
 
+STATIC long
+xfs_vn_fallocate(
+	struct inode	*inode,
+	int		mode,
+	loff_t		offset,
+	loff_t		len)
+{
+	long		error;
+	loff_t		new_size = 0;
+	xfs_flock64_t	bf;
+	xfs_inode_t	*ip = XFS_I(inode);
+
+	/* preallocation on directories not yet supported */
+	error = -ENODEV;
+	if (S_ISDIR(inode->i_mode))
+		goto out_error;
+
+	bf.l_whence = 0;
+	bf.l_start = offset;
+	bf.l_len = len;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+	error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
+						0, NULL, ATTR_NOLOCK);
+	if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
+	    offset + len > i_size_read(inode))
+		new_size = offset + len;
+
+	/* Change file size if needed */
+	if (new_size) {
+		bhv_vattr_t	va;
+
+		va.va_mask = XFS_AT_SIZE;
+		va.va_size = new_size;
+		error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL);
+	}
+
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+out_error:
+	return error;
+}
 
 const struct inode_operations xfs_inode_operations = {
 	.permission		= xfs_vn_permission,
@@ -760,6 +861,7 @@ const struct inode_operations xfs_inode_operations = {
 	.getxattr		= xfs_vn_getxattr,
 	.listxattr		= xfs_vn_listxattr,
 	.removexattr		= xfs_vn_removexattr,
+	.fallocate		= xfs_vn_fallocate,
 };
 
 const struct inode_operations xfs_dir_inode_operations = {

+ 3 - 31
fs/xfs/linux-2.6/xfs_linux.h

@@ -43,7 +43,6 @@
 
 #include <kmem.h>
 #include <mrlock.h>
-#include <spin.h>
 #include <sv.h>
 #include <mutex.h>
 #include <sema.h>
@@ -75,6 +74,7 @@
 #include <linux/notifier.h>
 #include <linux/delay.h>
 #include <linux/log2.h>
+#include <linux/spinlock.h>
 
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -136,43 +136,19 @@
 #define current_restore_flags_nested(sp, f)	\
 		(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
 
-#define NBPP		PAGE_SIZE
-#define NDPP		(1 << (PAGE_SHIFT - 9))
+#define spinlock_destroy(lock)
 
 #define NBBY		8		/* number of bits per byte */
-#define	NBPC		PAGE_SIZE	/* Number of bytes per click */
-#define	BPCSHIFT	PAGE_SHIFT	/* LOG2(NBPC) if exact */
 
 /*
  * Size of block device i/o is parameterized here.
  * Currently the system supports page-sized i/o.
  */
-#define	BLKDEV_IOSHIFT		BPCSHIFT
+#define	BLKDEV_IOSHIFT		PAGE_CACHE_SHIFT
 #define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
 /* number of BB's per block device block */
 #define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
 
-/* bytes to clicks */
-#define	btoc(x)		(((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
-#define	btoct(x)	((__psunsigned_t)(x)>>BPCSHIFT)
-#define	btoc64(x)	(((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
-#define	btoct64(x)	((__uint64_t)(x)>>BPCSHIFT)
-
-/* off_t bytes to clicks */
-#define offtoc(x)       (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
-#define offtoct(x)      ((xfs_off_t)(x)>>BPCSHIFT)
-
-/* clicks to off_t bytes */
-#define	ctooff(x)	((xfs_off_t)(x)<<BPCSHIFT)
-
-/* clicks to bytes */
-#define	ctob(x)		((__psunsigned_t)(x)<<BPCSHIFT)
-#define btoct(x)        ((__psunsigned_t)(x)>>BPCSHIFT)
-#define	ctob64(x)	((__uint64_t)(x)<<BPCSHIFT)
-
-/* bytes to clicks */
-#define btoc(x)         (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
-
 #define ENOATTR		ENODATA		/* Attribute not found */
 #define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
 #define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
@@ -205,10 +181,6 @@
 #define xfs_stack_trace()	dump_stack()
 #define xfs_itruncate_data(ip, off)	\
 	(-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off)))
-#define xfs_statvfs_fsid(statp, mp)	\
-	({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
-	   __kernel_fsid_t *fsid = &(statp)->f_fsid;	\
-	(fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
 
 
 /* Move the kernel do_div definition off to one side */

+ 46 - 76
fs/xfs/linux-2.6/xfs_lrw.c

@@ -58,14 +58,12 @@
 void
 xfs_rw_enter_trace(
 	int			tag,
-	xfs_iocore_t		*io,
+	xfs_inode_t		*ip,
 	void			*data,
 	size_t			segs,
 	loff_t			offset,
 	int			ioflags)
 {
-	xfs_inode_t	*ip = XFS_IO_INODE(io);
-
 	if (ip->i_rwtrace == NULL)
 		return;
 	ktrace_enter(ip->i_rwtrace,
@@ -78,8 +76,8 @@ xfs_rw_enter_trace(
 		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(offset & 0xffffffff)),
 		(void *)((unsigned long)ioflags),
-		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
-		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+		(void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
 		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
@@ -89,13 +87,12 @@ xfs_rw_enter_trace(
 
 void
 xfs_inval_cached_trace(
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,
 	xfs_off_t	len,
 	xfs_off_t	first,
 	xfs_off_t	last)
 {
-	xfs_inode_t	*ip = XFS_IO_INODE(io);
 
 	if (ip->i_rwtrace == NULL)
 		return;
@@ -131,7 +128,7 @@ xfs_inval_cached_trace(
  */
 STATIC int
 xfs_iozero(
-	struct inode		*ip,	/* inode			*/
+	struct xfs_inode	*ip,	/* inode			*/
 	loff_t			pos,	/* offset in file		*/
 	size_t			count)	/* size of data to zero		*/
 {
@@ -139,7 +136,7 @@ xfs_iozero(
 	struct address_space	*mapping;
 	int			status;
 
-	mapping = ip->i_mapping;
+	mapping = ip->i_vnode->i_mapping;
 	do {
 		unsigned offset, bytes;
 		void *fsdata;
@@ -205,7 +202,7 @@ xfs_read(
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		xfs_buftarg_t	*target =
-			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+			XFS_IS_REALTIME_INODE(ip) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 		if ((*offset & target->bt_smask) ||
 		    (size & target->bt_smask)) {
@@ -246,9 +243,8 @@ xfs_read(
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		if (VN_CACHED(vp))
-			ret = xfs_flushinval_pages(ip,
-					ctooff(offtoct(*offset)),
-					-1, FI_REMAPF_LOCKED);
+			ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
+						    -1, FI_REMAPF_LOCKED);
 		mutex_unlock(&inode->i_mutex);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -256,7 +252,7 @@ xfs_read(
 		}
 	}
 
-	xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
+	xfs_rw_enter_trace(XFS_READ_ENTER, ip,
 				(void *)iovp, segs, *offset, ioflags);
 
 	iocb->ki_pos = *offset;
@@ -301,7 +297,7 @@ xfs_splice_read(
 			return -error;
 		}
 	}
-	xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
+	xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip,
 			   pipe, count, *ppos, ioflags);
 	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
 	if (ret > 0)
@@ -323,7 +319,6 @@ xfs_splice_write(
 {
 	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
-	xfs_iocore_t		*io = &ip->i_iocore;
 	ssize_t			ret;
 	struct inode		*inode = outfilp->f_mapping->host;
 	xfs_fsize_t		isize, new_size;
@@ -350,10 +345,10 @@ xfs_splice_write(
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (new_size > ip->i_size)
-		io->io_new_size = new_size;
+		ip->i_new_size = new_size;
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
-	xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
+	xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip,
 			   pipe, count, *ppos, ioflags);
 	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
 	if (ret > 0)
@@ -370,9 +365,9 @@ xfs_splice_write(
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 
-	if (io->io_new_size) {
+	if (ip->i_new_size) {
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		io->io_new_size = 0;
+		ip->i_new_size = 0;
 		if (ip->i_d.di_size > ip->i_size)
 			ip->i_d.di_size = ip->i_size;
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -389,20 +384,19 @@ xfs_splice_write(
  */
 STATIC int				/* error (positive) */
 xfs_zero_last_block(
-	struct inode	*ip,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_fsize_t	offset,
 	xfs_fsize_t	isize)
 {
 	xfs_fileoff_t	last_fsb;
-	xfs_mount_t	*mp = io->io_mount;
+	xfs_mount_t	*mp = ip->i_mount;
 	int		nimaps;
 	int		zero_offset;
 	int		zero_len;
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
 
-	ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 
 	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
 	if (zero_offset == 0) {
@@ -415,7 +409,7 @@ xfs_zero_last_block(
 
 	last_fsb = XFS_B_TO_FSBT(mp, isize);
 	nimaps = 1;
-	error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+	error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
 			  &nimaps, NULL, NULL);
 	if (error) {
 		return error;
@@ -433,14 +427,14 @@ xfs_zero_last_block(
 	 * out sync.  We need to drop the ilock while we do this so we
 	 * don't deadlock when the buffer cache calls back to us.
 	 */
-	XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
 
 	zero_len = mp->m_sb.sb_blocksize - zero_offset;
 	if (isize + zero_len > offset)
 		zero_len = offset - isize;
 	error = xfs_iozero(ip, isize, zero_len);
 
-	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	ASSERT(error >= 0);
 	return error;
 }
@@ -458,35 +452,33 @@ xfs_zero_last_block(
 
 int					/* error (positive) */
 xfs_zero_eof(
-	bhv_vnode_t	*vp,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,		/* starting I/O offset */
 	xfs_fsize_t	isize)		/* current inode size */
 {
-	struct inode	*ip = vn_to_inode(vp);
+	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	start_zero_fsb;
 	xfs_fileoff_t	end_zero_fsb;
 	xfs_fileoff_t	zero_count_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_fileoff_t	zero_off;
 	xfs_fsize_t	zero_len;
-	xfs_mount_t	*mp = io->io_mount;
 	int		nimaps;
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
 
-	ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-	ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 	ASSERT(offset > isize);
 
 	/*
 	 * First handle zeroing the block on which isize resides.
 	 * We only zero a part of that block so it is handled specially.
 	 */
-	error = xfs_zero_last_block(ip, io, offset, isize);
+	error = xfs_zero_last_block(ip, offset, isize);
 	if (error) {
-		ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-		ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+		ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+		ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 		return error;
 	}
 
@@ -514,11 +506,11 @@ xfs_zero_eof(
 	while (start_zero_fsb <= end_zero_fsb) {
 		nimaps = 1;
 		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-		error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+		error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
 				  0, NULL, 0, &imap, &nimaps, NULL, NULL);
 		if (error) {
-			ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
-			ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+			ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+			ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
 			return error;
 		}
 		ASSERT(nimaps > 0);
@@ -542,7 +534,7 @@ xfs_zero_eof(
 		 * Drop the inode lock while we're doing the I/O.
 		 * We'll still have the iolock to protect us.
 		 */
-		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 
 		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
 		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -558,14 +550,13 @@ xfs_zero_eof(
 		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 
-		XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+		xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	}
 
 	return 0;
 
 out_lock:
-
-	XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+	xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 	ASSERT(error >= 0);
 	return error;
 }
@@ -587,7 +578,6 @@ xfs_write(
 	xfs_mount_t		*mp;
 	ssize_t			ret = 0, error = 0;
 	xfs_fsize_t		isize, new_size;
-	xfs_iocore_t		*io;
 	int			iolock;
 	int			eventsent = 0;
 	bhv_vrwlock_t		locktype;
@@ -607,8 +597,7 @@ xfs_write(
 	if (count == 0)
 		return 0;
 
-	io = &xip->i_iocore;
-	mp = io->io_mount;
+	mp = xip->i_mount;
 
 	xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
 
@@ -667,7 +656,7 @@ start:
 
 	if (ioflags & IO_ISDIRECT) {
 		xfs_buftarg_t	*target =
-			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+			XFS_IS_REALTIME_INODE(xip) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
 		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
@@ -688,7 +677,7 @@ start:
 
 	new_size = pos + count;
 	if (new_size > xip->i_size)
-		io->io_new_size = new_size;
+		xip->i_new_size = new_size;
 
 	if (likely(!(ioflags & IO_INVIS))) {
 		file_update_time(file);
@@ -706,7 +695,7 @@ start:
 	 */
 
 	if (pos > xip->i_size) {
-		error = xfs_zero_eof(vp, io, pos, xip->i_size);
+		error = xfs_zero_eof(xip, pos, xip->i_size);
 		if (error) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL);
 			goto out_unlock_internal;
@@ -740,10 +729,10 @@ retry:
 	if ((ioflags & IO_ISDIRECT)) {
 		if (VN_CACHED(vp)) {
 			WARN_ON(need_i_mutex == 0);
-			xfs_inval_cached_trace(io, pos, -1,
-					ctooff(offtoct(pos)), -1);
+			xfs_inval_cached_trace(xip, pos, -1,
+					(pos & PAGE_CACHE_MASK), -1);
 			error = xfs_flushinval_pages(xip,
-					ctooff(offtoct(pos)),
+					(pos & PAGE_CACHE_MASK),
 					-1, FI_REMAPF_LOCKED);
 			if (error)
 				goto out_unlock_internal;
@@ -751,7 +740,7 @@ retry:
 
 		if (need_i_mutex) {
 			/* demote the lock now the cached pages are gone */
-			XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+			xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
 			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
@@ -759,7 +748,7 @@ retry:
 			need_i_mutex = 0;
 		}
 
- 		xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+ 		xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs,
 				*offset, ioflags);
 		ret = generic_file_direct_write(iocb, iovp,
 				&segs, pos, offset, count, ocount);
@@ -779,7 +768,7 @@ retry:
 			goto relock;
 		}
 	} else {
-		xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+		xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
 				*offset, ioflags);
 		ret = generic_file_buffered_write(iocb, iovp, segs,
 				pos, offset, count, ret);
@@ -843,9 +832,9 @@ retry:
 	}
 
  out_unlock_internal:
-	if (io->io_new_size) {
+	if (xip->i_new_size) {
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
-		io->io_new_size = 0;
+		xip->i_new_size = 0;
 		/*
 		 * If this was a direct or synchronous I/O that failed (such
 		 * as ENOSPC) then part of the I/O may have been written to
@@ -894,25 +883,6 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
 	}
 }
 
-
-int
-xfs_bmap(
-	xfs_inode_t	*ip,
-	xfs_off_t	offset,
-	ssize_t		count,
-	int		flags,
-	xfs_iomap_t	*iomapp,
-	int		*niomaps)
-{
-	xfs_iocore_t	*io = &ip->i_iocore;
-
-	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-
-	return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
-}
-
 /*
  * Wrapper around bdstrat so that we can stop data
  * from going to disk in case we are shutting down the filesystem.

+ 7 - 9
fs/xfs/linux-2.6/xfs_lrw.h

@@ -19,7 +19,6 @@
 #define __XFS_LRW_H__
 
 struct xfs_mount;
-struct xfs_iocore;
 struct xfs_inode;
 struct xfs_bmbt_irec;
 struct xfs_buf;
@@ -60,20 +59,19 @@ struct xfs_iomap;
 #define	XFS_IOMAP_UNWRITTEN	27
 #define XFS_SPLICE_READ_ENTER	28
 #define XFS_SPLICE_WRITE_ENTER	29
-extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
-				void *, size_t, loff_t, int);
-extern void xfs_inval_cached_trace(struct xfs_iocore *,
-				xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
+extern void xfs_rw_enter_trace(int, struct xfs_inode *,
+		void *, size_t, loff_t, int);
+extern void xfs_inval_cached_trace(struct xfs_inode *,
+		xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
 #else
-#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
-#define xfs_inval_cached_trace(io, offset, len, first, last)
+#define xfs_rw_enter_trace(tag, ip, data, size, offset, ioflags)
+#define xfs_inval_cached_trace(ip, offset, len, first, last)
 #endif
 
 extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern int xfs_bdstrat_cb(struct xfs_buf *);
 extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
 
-extern int xfs_zero_eof(struct inode *, struct xfs_iocore *, xfs_off_t,
-				xfs_fsize_t);
+extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
 
 #endif	/* __XFS_LRW_H__ */

+ 549 - 23
fs/xfs/linux-2.6/xfs_super.c

@@ -41,6 +41,7 @@
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
+#include "xfs_fsops.h"
 #include "xfs_rw.h"
 #include "xfs_acl.h"
 #include "xfs_attr.h"
@@ -49,6 +50,8 @@
 #include "xfs_vnodeops.h"
 #include "xfs_vfsops.h"
 #include "xfs_version.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
 
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -87,6 +90,435 @@ xfs_args_allocate(
 	return args;
 }
 
+#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
+#define MNTOPT_LOGDEV	"logdev"	/* log device */
+#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
+#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
+#define MNTOPT_INO64	"ino64"		/* force inodes into 64-bit range */
+#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
+#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
+#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
+#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
+#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
+#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
+#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
+#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
+					 * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
+#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
+#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
+#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
+					 * in stat(). */
+#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
+#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
+#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
+#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
+#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
+#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
+#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
+#define MNTOPT_DMAPI	"dmapi"		/* DMI enabled (DMAPI / XDSM) */
+#define MNTOPT_XDSM	"xdsm"		/* DMI enabled (DMAPI / XDSM) */
+#define MNTOPT_DMI	"dmi"		/* DMI enabled (DMAPI / XDSM) */
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+	int	last, shift_left_factor = 0;
+	char	*value = s;
+
+	last = strlen(value) - 1;
+	if (value[last] == 'K' || value[last] == 'k') {
+		shift_left_factor = 10;
+		value[last] = '\0';
+	}
+	if (value[last] == 'M' || value[last] == 'm') {
+		shift_left_factor = 20;
+		value[last] = '\0';
+	}
+	if (value[last] == 'G' || value[last] == 'g') {
+		shift_left_factor = 30;
+		value[last] = '\0';
+	}
+
+	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+STATIC int
+xfs_parseargs(
+	struct xfs_mount	*mp,
+	char			*options,
+	struct xfs_mount_args	*args,
+	int			update)
+{
+	char			*this_char, *value, *eov;
+	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
+	int			iosize;
+	int			ikeep = 0;
+
+	args->flags |= XFSMNT_BARRIER;
+	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+
+	if (!options)
+		goto done;
+
+	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
+
+	while ((this_char = strsep(&options, ",")) != NULL) {
+		if (!*this_char)
+			continue;
+		if ((value = strchr(this_char, '=')) != NULL)
+			*value++ = 0;
+
+		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			args->logbufs = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			args->logbufsize = suffix_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			strncpy(args->logname, value, MAXNAMELEN);
+		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			strncpy(args->mtpt, value, MAXNAMELEN);
+		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			strncpy(args->rtname, value, MAXNAMELEN);
+		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			iosize = simple_strtoul(value, &eov, 10);
+			args->flags |= XFSMNT_IOSIZE;
+			args->iosizelog = (uint8_t) iosize;
+		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			iosize = suffix_strtoul(value, &eov, 10);
+			args->flags |= XFSMNT_IOSIZE;
+			args->iosizelog = ffs(iosize) - 1;
+		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
+			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+			mp->m_flags |= XFS_MOUNT_GRPID;
+		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+			mp->m_flags &= ~XFS_MOUNT_GRPID;
+		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+			args->flags |= XFSMNT_WSYNC;
+		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
+			args->flags |= XFSMNT_OSYNCISOSYNC;
+		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+			args->flags |= XFSMNT_NORECOVERY;
+		} else if (!strcmp(this_char, MNTOPT_INO64)) {
+			args->flags |= XFSMNT_INO64;
+#if !XFS_BIG_INUMS
+			cmn_err(CE_WARN,
+				"XFS: %s option not allowed on this system",
+				this_char);
+			return EINVAL;
+#endif
+		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+			args->flags |= XFSMNT_NOALIGN;
+		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+			args->flags |= XFSMNT_SWALLOC;
+		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			dsunit = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+			if (!value || !*value) {
+				cmn_err(CE_WARN,
+					"XFS: %s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			dswidth = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+			args->flags &= ~XFSMNT_32BITINODES;
+#if !XFS_BIG_INUMS
+			cmn_err(CE_WARN,
+				"XFS: %s option not allowed on this system",
+				this_char);
+			return EINVAL;
+#endif
+		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+			args->flags |= XFSMNT_NOUUID;
+		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+			args->flags |= XFSMNT_BARRIER;
+		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+			args->flags &= ~XFSMNT_BARRIER;
+		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+			ikeep = 1;
+			args->flags &= ~XFSMNT_IDELETE;
+		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+			args->flags |= XFSMNT_IDELETE;
+		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+			args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+			args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+			args->flags |= XFSMNT_ATTR2;
+		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+			args->flags &= ~XFSMNT_ATTR2;
+		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+			args->flags2 |= XFSMNT2_FILESTREAMS;
+		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+			args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
+			args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
+		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+			   !strcmp(this_char, MNTOPT_UQUOTA) ||
+			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
+			args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+			args->flags |= XFSMNT_UQUOTA;
+			args->flags &= ~XFSMNT_UQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+			args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+			args->flags |= XFSMNT_PQUOTA;
+			args->flags &= ~XFSMNT_PQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+			args->flags |= XFSMNT_GQUOTA;
+			args->flags &= ~XFSMNT_GQUOTAENF;
+		} else if (!strcmp(this_char, MNTOPT_DMAPI)) {
+			args->flags |= XFSMNT_DMAPI;
+		} else if (!strcmp(this_char, MNTOPT_XDSM)) {
+			args->flags |= XFSMNT_DMAPI;
+		} else if (!strcmp(this_char, MNTOPT_DMI)) {
+			args->flags |= XFSMNT_DMAPI;
+		} else if (!strcmp(this_char, "ihashsize")) {
+			cmn_err(CE_WARN,
+	"XFS: ihashsize no longer used, option is deprecated.");
+		} else if (!strcmp(this_char, "osyncisdsync")) {
+			/* no-op, this is now the default */
+			cmn_err(CE_WARN,
+	"XFS: osyncisdsync is now the default, option is deprecated.");
+		} else if (!strcmp(this_char, "irixsgid")) {
+			cmn_err(CE_WARN,
+	"XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
+		} else {
+			cmn_err(CE_WARN,
+				"XFS: unknown mount option [%s].", this_char);
+			return EINVAL;
+		}
+	}
+
+	if (args->flags & XFSMNT_NORECOVERY) {
+		if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
+			cmn_err(CE_WARN,
+				"XFS: no-recovery mounts must be read-only.");
+			return EINVAL;
+		}
+	}
+
+	if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
+		cmn_err(CE_WARN,
+	"XFS: sunit and swidth options incompatible with the noalign option");
+		return EINVAL;
+	}
+
+	if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
+		cmn_err(CE_WARN,
+			"XFS: cannot mount with both project and group quota");
+		return EINVAL;
+	}
+
+	if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
+		printk("XFS: %s option needs the mount point option as well\n",
+			MNTOPT_DMAPI);
+		return EINVAL;
+	}
+
+	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+		cmn_err(CE_WARN,
+			"XFS: sunit and swidth must be specified together");
+		return EINVAL;
+	}
+
+	if (dsunit && (dswidth % dsunit != 0)) {
+		cmn_err(CE_WARN,
+	"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)",
+			dswidth, dsunit);
+		return EINVAL;
+	}
+
+	/*
+	 * Applications using DMI filesystems often expect the
+	 * inode generation number to be monotonically increasing.
+	 * If we delete inode chunks we break this assumption, so
+	 * keep unused inode chunks on disk for DMI filesystems
+	 * until we come up with a better solution.
+	 * Note that if "ikeep" or "noikeep" mount options are
+	 * supplied, then they are honored.
+	 */
+	if (!(args->flags & XFSMNT_DMAPI) && !ikeep)
+		args->flags |= XFSMNT_IDELETE;
+
+	if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+		if (dsunit) {
+			args->sunit = dsunit;
+			args->flags |= XFSMNT_RETERR;
+		} else {
+			args->sunit = vol_dsunit;
+		}
+		dswidth ? (args->swidth = dswidth) :
+			  (args->swidth = vol_dswidth);
+	} else {
+		args->sunit = args->swidth = 0;
+	}
+
+done:
+	if (args->flags & XFSMNT_32BITINODES)
+		mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+	if (args->flags2)
+		args->flags |= XFSMNT_FLAGS2;
+	return 0;
+}
+
+struct proc_xfs_info {
+	int	flag;
+	char	*str;
+};
+
+STATIC int
+xfs_showargs(
+	struct xfs_mount	*mp,
+	struct seq_file		*m)
+{
+	static struct proc_xfs_info xfs_info_set[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
+		{ XFS_MOUNT_INO64,		"," MNTOPT_INO64 },
+		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
+		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
+		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
+		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
+		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
+		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
+		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
+		{ XFS_MOUNT_DMAPI,		"," MNTOPT_DMAPI },
+		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
+		{ 0, NULL }
+	};
+	static struct proc_xfs_info xfs_info_unset[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_IDELETE,		"," MNTOPT_IKEEP },
+		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
+		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
+		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
+		{ 0, NULL }
+	};
+	struct proc_xfs_info	*xfs_infop;
+
+	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+		if (mp->m_flags & xfs_infop->flag)
+			seq_puts(m, xfs_infop->str);
+	}
+	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+		if (!(mp->m_flags & xfs_infop->flag))
+			seq_puts(m, xfs_infop->str);
+	}
+
+	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+				(int)(1 << mp->m_writeio_log) >> 10);
+
+	if (mp->m_logbufs > 0)
+		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+	if (mp->m_logbsize > 0)
+		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+	if (mp->m_logname)
+		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+	if (mp->m_rtname)
+		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+	if (mp->m_dalign > 0)
+		seq_printf(m, "," MNTOPT_SUNIT "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+	if (mp->m_swidth > 0)
+		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+		seq_puts(m, "," MNTOPT_USRQUOTA);
+	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+		seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+	if (mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		seq_puts(m, "," MNTOPT_PRJQUOTA);
+	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
+		seq_puts(m, "," MNTOPT_PQUOTANOENF);
+
+	if (mp->m_qflags & (XFS_GQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		seq_puts(m, "," MNTOPT_GRPQUOTA);
+	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
+		seq_puts(m, "," MNTOPT_GQUOTANOENF);
+
+	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+		seq_puts(m, "," MNTOPT_NOQUOTA);
+
+	return 0;
+}
 __uint64_t
 xfs_max_file_offset(
 	unsigned int		blockshift)
@@ -137,7 +569,7 @@ xfs_set_inodeops(
 		break;
 	case S_IFLNK:
 		inode->i_op = &xfs_symlink_inode_operations;
-		if (inode->i_blocks)
+		if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE))
 			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	default:
@@ -174,8 +606,6 @@ xfs_revalidate_inode(
 
 	inode->i_generation = ip->i_d.di_gen;
 	i_size_write(inode, ip->i_d.di_size);
-	inode->i_blocks =
-		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
 	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
 	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
 	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
@@ -334,6 +764,64 @@ xfs_blkdev_issue_flush(
 	blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 
+/*
+ * XFS AIL push thread support
+ */
+void
+xfsaild_wakeup(
+	xfs_mount_t		*mp,
+	xfs_lsn_t		threshold_lsn)
+{
+	mp->m_ail.xa_target = threshold_lsn;
+	wake_up_process(mp->m_ail.xa_task);
+}
+
+int
+xfsaild(
+	void	*data)
+{
+	xfs_mount_t	*mp = (xfs_mount_t *)data;
+	xfs_lsn_t	last_pushed_lsn = 0;
+	long		tout = 0;
+
+	while (!kthread_should_stop()) {
+		if (tout)
+			schedule_timeout_interruptible(msecs_to_jiffies(tout));
+		tout = 1000;
+
+		/* swsusp */
+		try_to_freeze();
+
+		ASSERT(mp->m_log);
+		if (XFS_FORCED_SHUTDOWN(mp))
+			continue;
+
+		tout = xfsaild_push(mp, &last_pushed_lsn);
+	}
+
+	return 0;
+}	/* xfsaild */
+
+int
+xfsaild_start(
+	xfs_mount_t	*mp)
+{
+	mp->m_ail.xa_target = 0;
+	mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild");
+	if (IS_ERR(mp->m_ail.xa_task))
+		return -PTR_ERR(mp->m_ail.xa_task);
+	return 0;
+}
+
+void
+xfsaild_stop(
+	xfs_mount_t	*mp)
+{
+	kthread_stop(mp->m_ail.xa_task);
+}
+
+
+
 STATIC struct inode *
 xfs_fs_alloc_inode(
 	struct super_block	*sb)
@@ -361,7 +849,7 @@ xfs_fs_inode_init_once(
 	inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
 }
 
-STATIC int
+STATIC int __init
 xfs_init_zones(void)
 {
 	xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
@@ -410,8 +898,7 @@ xfs_fs_write_inode(
 {
 	int			error = 0, flags = FLUSH_INODE;
 
-	vn_trace_entry(XFS_I(inode), __FUNCTION__,
-			(inst_t *)__return_address);
+	xfs_itrace_entry(XFS_I(inode));
 	if (sync) {
 		filemap_fdatawait(inode->i_mapping);
 		flags |= FLUSH_SYNC;
@@ -438,8 +925,7 @@ xfs_fs_clear_inode(
 	 * find an inode with di_mode == 0 but without IGET_CREATE set.
 	 */
 	if (ip) {
-		vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-
+		xfs_itrace_entry(ip);
 		XFS_STATS_INC(vn_rele);
 		XFS_STATS_INC(vn_remove);
 		XFS_STATS_INC(vn_reclaim);
@@ -683,8 +1169,44 @@ xfs_fs_statfs(
 	struct dentry		*dentry,
 	struct kstatfs		*statp)
 {
-	return -xfs_statvfs(XFS_M(dentry->d_sb), statp,
-				vn_from_inode(dentry->d_inode));
+	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
+	xfs_sb_t		*sbp = &mp->m_sb;
+	__uint64_t		fakeinos, id;
+	xfs_extlen_t		lsize;
+
+	statp->f_type = XFS_SB_MAGIC;
+	statp->f_namelen = MAXNAMELEN - 1;
+
+	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+	statp->f_fsid.val[0] = (u32)id;
+	statp->f_fsid.val[1] = (u32)(id >> 32);
+
+	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+
+	spin_lock(&mp->m_sb_lock);
+	statp->f_bsize = sbp->sb_blocksize;
+	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+	statp->f_blocks = sbp->sb_dblocks - lsize;
+	statp->f_bfree = statp->f_bavail =
+				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+	fakeinos = statp->f_bfree << sbp->sb_inopblog;
+#if XFS_BIG_INUMS
+	fakeinos += mp->m_inoadd;
+#endif
+	statp->f_files =
+	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+	if (mp->m_maxicount)
+#if XFS_BIG_INUMS
+		if (!mp->m_inoadd)
+#endif
+			statp->f_files = min_t(typeof(statp->f_files),
+						statp->f_files,
+						mp->m_maxicount);
+	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+	spin_unlock(&mp->m_sb_lock);
+
+	XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+	return 0;
 }
 
 STATIC int
@@ -704,11 +1226,19 @@ xfs_fs_remount(
 	return -error;
 }
 
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of themetadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
 STATIC void
 xfs_fs_lockfs(
 	struct super_block	*sb)
 {
-	xfs_freeze(XFS_M(sb));
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	xfs_attr_quiesce(mp);
+	xfs_fs_log_dummy(mp);
 }
 
 STATIC int
@@ -779,7 +1309,6 @@ xfs_fs_fill_super(
 	struct inode		*rootvp;
 	struct xfs_mount	*mp = NULL;
 	struct xfs_mount_args	*args = xfs_args_allocate(sb, silent);
-	struct kstatfs		statvfs;
 	int			error;
 
 	mp = xfs_mount_init();
@@ -807,21 +1336,19 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_vfsop;
 
-	error = xfs_statvfs(mp, &statvfs, NULL);
-	if (error)
-		goto fail_unmount;
-
 	sb->s_dirt = 1;
-	sb->s_magic = statvfs.f_type;
-	sb->s_blocksize = statvfs.f_bsize;
-	sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+	sb->s_magic = XFS_SB_MAGIC;
+	sb->s_blocksize = mp->m_sb.sb_blocksize;
+	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
 	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
 	sb->s_time_gran = 1;
 	set_posix_acl_flag(sb);
 
-	error = xfs_root(mp, &rootvp);
-	if (error)
+	rootvp = igrab(mp->m_rootip->i_vnode);
+	if (!rootvp) {
+		error = ENOENT;
 		goto fail_unmount;
+	}
 
 	sb->s_root = d_alloc_root(vn_to_inode(rootvp));
 	if (!sb->s_root) {
@@ -841,8 +1368,7 @@ xfs_fs_fill_super(
 		goto fail_vnrele;
 	}
 
-	vn_trace_exit(XFS_I(sb->s_root->d_inode), __FUNCTION__,
-			(inst_t *)__return_address);
+	xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
 
 	kmem_free(args, sizeof(*args));
 	return 0;

+ 44 - 73
fs/xfs/linux-2.6/xfs_vnode.c

@@ -40,7 +40,7 @@
 #define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
 static wait_queue_head_t vsync[NVSYNC];
 
-void
+void __init
 vn_init(void)
 {
 	int i;
@@ -82,84 +82,55 @@ vn_ioerror(
 		xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
 }
 
-bhv_vnode_t *
-vn_initialize(
-	struct inode	*inode)
-{
-	bhv_vnode_t	*vp = vn_from_inode(inode);
-
-	XFS_STATS_INC(vn_active);
-	XFS_STATS_INC(vn_alloc);
-
-	ASSERT(VN_CACHED(vp) == 0);
-
-	return vp;
-}
-
 /*
- * Revalidate the Linux inode from the vattr.
+ * Revalidate the Linux inode from the XFS inode.
  * Note: i_size _not_ updated; we must hold the inode
  * semaphore when doing that - callers responsibility.
  */
-void
-vn_revalidate_core(
-	bhv_vnode_t	*vp,
-	bhv_vattr_t	*vap)
+int
+vn_revalidate(
+	bhv_vnode_t		*vp)
 {
-	struct inode	*inode = vn_to_inode(vp);
-
-	inode->i_mode	    = vap->va_mode;
-	inode->i_nlink	    = vap->va_nlink;
-	inode->i_uid	    = vap->va_uid;
-	inode->i_gid	    = vap->va_gid;
-	inode->i_blocks	    = vap->va_nblocks;
-	inode->i_mtime	    = vap->va_mtime;
-	inode->i_ctime	    = vap->va_ctime;
-	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+	struct inode		*inode = vn_to_inode(vp);
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	unsigned long		xflags;
+
+	xfs_itrace_entry(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	inode->i_mode	    = ip->i_d.di_mode;
+	inode->i_uid	    = ip->i_d.di_uid;
+	inode->i_gid	    = ip->i_d.di_gid;
+	inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+	inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+	inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+	inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+
+	xflags = xfs_ip2xflags(ip);
+	if (xflags & XFS_XFLAG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;
 	else
 		inode->i_flags &= ~S_IMMUTABLE;
-	if (vap->va_xflags & XFS_XFLAG_APPEND)
+	if (xflags & XFS_XFLAG_APPEND)
 		inode->i_flags |= S_APPEND;
 	else
 		inode->i_flags &= ~S_APPEND;
-	if (vap->va_xflags & XFS_XFLAG_SYNC)
+	if (xflags & XFS_XFLAG_SYNC)
 		inode->i_flags |= S_SYNC;
 	else
 		inode->i_flags &= ~S_SYNC;
-	if (vap->va_xflags & XFS_XFLAG_NOATIME)
+	if (xflags & XFS_XFLAG_NOATIME)
 		inode->i_flags |= S_NOATIME;
 	else
 		inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Revalidate the Linux inode from the vnode.
- */
-int
-__vn_revalidate(
-	bhv_vnode_t	*vp,
-	bhv_vattr_t	*vattr)
-{
-	int		error;
-
-	vn_trace_entry(xfs_vtoi(vp), __FUNCTION__, (inst_t *)__return_address);
-	vattr->va_mask = XFS_AT_STAT | XFS_AT_XFLAGS;
-	error = xfs_getattr(xfs_vtoi(vp), vattr, 0);
-	if (likely(!error)) {
-		vn_revalidate_core(vp, vattr);
-		xfs_iflags_clear(xfs_vtoi(vp), XFS_IMODIFIED);
-	}
-	return -error;
-}
-
-int
-vn_revalidate(
-	bhv_vnode_t	*vp)
-{
-	bhv_vattr_t	vattr;
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
-	return __vn_revalidate(vp, &vattr);
+	xfs_iflags_clear(ip, XFS_IMODIFIED);
+	return 0;
 }
 
 /*
@@ -179,7 +150,7 @@ vn_hold(
 	return vp;
 }
 
-#ifdef	XFS_VNODE_TRACE
+#ifdef	XFS_INODE_TRACE
 
 /*
  * Reference count of Linux inode if present, -1 if the xfs_inode
@@ -211,32 +182,32 @@ static inline int xfs_icount(struct xfs_inode *ip)
  * Vnode tracing code.
  */
 void
-vn_trace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
+_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
 {
-	KTRACE_ENTER(ip, VNODE_KTRACE_ENTRY, func, 0, ra);
+	KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
 }
 
 void
-vn_trace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
+_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
 {
-	KTRACE_ENTER(ip, VNODE_KTRACE_EXIT, func, 0, ra);
+	KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
 }
 
 void
-vn_trace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
+xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
 {
-	KTRACE_ENTER(ip, VNODE_KTRACE_HOLD, file, line, ra);
+	KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
 }
 
 void
-vn_trace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
+_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
 {
-	KTRACE_ENTER(ip, VNODE_KTRACE_REF, file, line, ra);
+	KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
 }
 
 void
-vn_trace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
+xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
 {
-	KTRACE_ENTER(ip, VNODE_KTRACE_RELE, file, line, ra);
+	KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
 }
-#endif	/* XFS_VNODE_TRACE */
+#endif	/* XFS_INODE_TRACE */

+ 32 - 30
fs/xfs/linux-2.6/xfs_vnode.h

@@ -187,10 +187,7 @@ typedef struct bhv_vattr {
 	(VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 
 extern void	vn_init(void);
-extern bhv_vnode_t	*vn_initialize(struct inode *);
 extern int	vn_revalidate(bhv_vnode_t *);
-extern int	__vn_revalidate(bhv_vnode_t *, bhv_vattr_t *);
-extern void	vn_revalidate_core(bhv_vnode_t *, bhv_vattr_t *);
 
 /*
  * Yeah, these don't take vnode anymore at all, all this should be
@@ -210,12 +207,12 @@ static inline int vn_count(bhv_vnode_t *vp)
  */
 extern bhv_vnode_t	*vn_hold(bhv_vnode_t *);
 
-#if defined(XFS_VNODE_TRACE)
+#if defined(XFS_INODE_TRACE)
 #define VN_HOLD(vp)		\
 	((void)vn_hold(vp),	\
-	  vn_trace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address))
+	  xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address))
 #define VN_RELE(vp)		\
-	  (vn_trace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \
+	  (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \
 	   iput(vn_to_inode(vp)))
 #else
 #define VN_HOLD(vp)		((void)vn_hold(vp))
@@ -238,11 +235,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
 /*
  * Dealing with bad inodes
  */
-static inline void vn_mark_bad(bhv_vnode_t *vp)
-{
-	make_bad_inode(vn_to_inode(vp));
-}
-
 static inline int VN_BAD(bhv_vnode_t *vp)
 {
 	return is_bad_inode(vn_to_inode(vp));
@@ -296,26 +288,36 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 /*
  * Tracking vnode activity.
  */
-#if defined(XFS_VNODE_TRACE)
-
-#define	VNODE_TRACE_SIZE	16		/* number of trace entries */
-#define	VNODE_KTRACE_ENTRY	1
-#define	VNODE_KTRACE_EXIT	2
-#define	VNODE_KTRACE_HOLD	3
-#define	VNODE_KTRACE_REF	4
-#define	VNODE_KTRACE_RELE	5
-
-extern void vn_trace_entry(struct xfs_inode *, const char *, inst_t *);
-extern void vn_trace_exit(struct xfs_inode *, const char *, inst_t *);
-extern void vn_trace_hold(struct xfs_inode *, char *, int, inst_t *);
-extern void vn_trace_ref(struct xfs_inode *, char *, int, inst_t *);
-extern void vn_trace_rele(struct xfs_inode *, char *, int, inst_t *);
+#if defined(XFS_INODE_TRACE)
+
+#define	INODE_TRACE_SIZE	16		/* number of trace entries */
+#define	INODE_KTRACE_ENTRY	1
+#define	INODE_KTRACE_EXIT	2
+#define	INODE_KTRACE_HOLD	3
+#define	INODE_KTRACE_REF	4
+#define	INODE_KTRACE_RELE	5
+
+extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
+extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
+extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
+extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
+extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
+#define xfs_itrace_entry(ip)	\
+	_xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address)
+#define xfs_itrace_exit(ip)	\
+	_xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address)
+#define xfs_itrace_exit_tag(ip, tag)	\
+	_xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
+#define xfs_itrace_ref(ip)	\
+	_xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
+
 #else
-#define	vn_trace_entry(a,b,c)
-#define	vn_trace_exit(a,b,c)
-#define	vn_trace_hold(a,b,c,d)
-#define	vn_trace_ref(a,b,c,d)
-#define	vn_trace_rele(a,b,c,d)
+#define	xfs_itrace_entry(a)
+#define	xfs_itrace_exit(a)
+#define	xfs_itrace_exit_tag(a, b)
+#define	xfs_itrace_hold(a, b, c, d)
+#define	xfs_itrace_ref(a)
+#define	xfs_itrace_rele(a, b, c, d)
 #endif
 
 #endif	/* __XFS_VNODE_H__ */

+ 5 - 7
fs/xfs/quota/xfs_dquot.c

@@ -1209,7 +1209,6 @@ xfs_qm_dqflush(
 	xfs_buf_t		*bp;
 	xfs_disk_dquot_t	*ddqp;
 	int			error;
-	SPLDECL(s);
 
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
 	ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
@@ -1270,9 +1269,9 @@ xfs_qm_dqflush(
 	mp = dqp->q_mount;
 
 	/* lsn is 64 bits */
-	AIL_LOCK(mp, s);
+	spin_lock(&mp->m_ail_lock);
 	dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
-	AIL_UNLOCK(mp, s);
+	spin_unlock(&mp->m_ail_lock);
 
 	/*
 	 * Attach an iodone routine so that we can remove this dquot from the
@@ -1318,7 +1317,6 @@ xfs_qm_dqflush_done(
 	xfs_dq_logitem_t	*qip)
 {
 	xfs_dquot_t		*dqp;
-	SPLDECL(s);
 
 	dqp = qip->qli_dquot;
 
@@ -1333,15 +1331,15 @@ xfs_qm_dqflush_done(
 	if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
 	    qip->qli_item.li_lsn == qip->qli_flush_lsn) {
 
-		AIL_LOCK(dqp->q_mount, s);
+		spin_lock(&dqp->q_mount->m_ail_lock);
 		/*
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
 		if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
 			xfs_trans_delete_ail(dqp->q_mount,
-					     (xfs_log_item_t*)qip, s);
+					     (xfs_log_item_t*)qip);
 		else
-			AIL_UNLOCK(dqp->q_mount, s);
+			spin_unlock(&dqp->q_mount->m_ail_lock);
 	}
 
 	/*

+ 0 - 5
fs/xfs/quota/xfs_dquot.h

@@ -123,11 +123,6 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
 				   vsema(&((dqp)->q_flock)); \
 				   (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
 
-#define XFS_DQ_PINLOCK(dqp)	   mutex_spinlock( \
-				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock))
-#define XFS_DQ_PINUNLOCK(dqp, s)   mutex_spinunlock( \
-				     &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s)
-
 #define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
 #define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)

+ 11 - 16
fs/xfs/quota/xfs_dquot_item.c

@@ -94,14 +94,13 @@ STATIC void
 xfs_qm_dquot_logitem_pin(
 	xfs_dq_logitem_t *logitem)
 {
-	unsigned long	s;
 	xfs_dquot_t *dqp;
 
 	dqp = logitem->qli_dquot;
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	s = XFS_DQ_PINLOCK(dqp);
+	spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 	dqp->q_pincount++;
-	XFS_DQ_PINUNLOCK(dqp, s);
+	spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 }
 
 /*
@@ -115,17 +114,16 @@ xfs_qm_dquot_logitem_unpin(
 	xfs_dq_logitem_t *logitem,
 	int		  stale)
 {
-	unsigned long	s;
 	xfs_dquot_t *dqp;
 
 	dqp = logitem->qli_dquot;
 	ASSERT(dqp->q_pincount > 0);
-	s = XFS_DQ_PINLOCK(dqp);
+	spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 	dqp->q_pincount--;
 	if (dqp->q_pincount == 0) {
 		sv_broadcast(&dqp->q_pinwait);
 	}
-	XFS_DQ_PINUNLOCK(dqp, s);
+	spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 }
 
 /* ARGSUSED */
@@ -189,8 +187,6 @@ void
 xfs_qm_dqunpin_wait(
 	xfs_dquot_t	*dqp)
 {
-	SPLDECL(s);
-
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
 	if (dqp->q_pincount == 0) {
 		return;
@@ -200,9 +196,9 @@ xfs_qm_dqunpin_wait(
 	 * Give the log a push so we don't wait here too long.
 	 */
 	xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
-	s = XFS_DQ_PINLOCK(dqp);
+	spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 	if (dqp->q_pincount == 0) {
-		XFS_DQ_PINUNLOCK(dqp, s);
+		spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
 		return;
 	}
 	sv_wait(&(dqp->q_pinwait), PINOD,
@@ -216,8 +212,8 @@ xfs_qm_dqunpin_wait(
  * If so, we want to push it out to help us take this item off the AIL as soon
  * as possible.
  *
- * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL_LOCK is a
+ * We must not be holding the AIL lock at this point. Calling incore() to
+ * search the buffer cache can be a time consuming thing, and AIL lock is a
  * spinlock.
  */
 STATIC void
@@ -322,7 +318,7 @@ xfs_qm_dquot_logitem_trylock(
 		 * want to do that now since we might sleep in the device
 		 * strategy routine.  We also don't want to grab the buffer lock
 		 * here because we'd like not to call into the buffer cache
-		 * while holding the AIL_LOCK.
+		 * while holding the AIL lock.
 		 * Make sure to only return PUSHBUF if we set pushbuf_flag
 		 * ourselves.  If someone else is doing it then we don't
 		 * want to go to the push routine and duplicate their efforts.
@@ -562,15 +558,14 @@ xfs_qm_qoffend_logitem_committed(
 	xfs_lsn_t lsn)
 {
 	xfs_qoff_logitem_t	*qfs;
-	SPLDECL(s);
 
 	qfs = qfe->qql_start_lip;
-	AIL_LOCK(qfs->qql_item.li_mountp,s);
+	spin_lock(&qfs->qql_item.li_mountp->m_ail_lock);
 	/*
 	 * Delete the qoff-start logitem from the AIL.
 	 * xfs_trans_delete_ail() drops the AIL lock.
 	 */
-	xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s);
+	xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
 	kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
 	kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
 	return (xfs_lsn_t)-1;

+ 6 - 8
fs/xfs/quota/xfs_qm.c

@@ -310,7 +310,6 @@ xfs_qm_mount_quotas(
 	xfs_mount_t	*mp,
 	int		mfsi_flags)
 {
-	unsigned long	s;
 	int		error = 0;
 	uint		sbf;
 
@@ -367,13 +366,13 @@ xfs_qm_mount_quotas(
 
  write_changes:
 	/*
-	 * We actually don't have to acquire the SB_LOCK at all.
+	 * We actually don't have to acquire the m_sb_lock at all.
 	 * This can only be called from mount, and that's single threaded. XXX
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	sbf = mp->m_sb.sb_qflags;
 	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
 		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
@@ -1139,7 +1138,7 @@ xfs_qm_init_quotainfo(
 		return error;
 	}
 
-	spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
+	spin_lock_init(&qinf->qi_pinlock);
 	xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
 	qinf->qi_dqreclaims = 0;
 
@@ -1370,7 +1369,6 @@ xfs_qm_qino_alloc(
 {
 	xfs_trans_t	*tp;
 	int		error;
-	unsigned long	s;
 	int		committed;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
@@ -1402,7 +1400,7 @@ xfs_qm_qino_alloc(
 	 * sbfields arg may contain fields other than *QUOTINO;
 	 * VERSIONNUM for example.
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	if (flags & XFS_QMOPT_SBVERSION) {
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
 		unsigned oldv = mp->m_sb.sb_versionnum;
@@ -1429,7 +1427,7 @@ xfs_qm_qino_alloc(
 		mp->m_sb.sb_uquotino = (*ip)->i_ino;
 	else
 		mp->m_sb.sb_gquotino = (*ip)->i_ino;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 	xfs_mod_sb(tp, sbfields);
 
 	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {

+ 3 - 3
fs/xfs/quota/xfs_qm.h

@@ -52,8 +52,8 @@ extern kmem_zone_t	*qm_dqtrxzone;
 /*
  * Dquot hashtable constants/threshold values.
  */
-#define XFS_QM_HASHSIZE_LOW		(NBPP / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH		((NBPP * 4) / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH		((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
 
 /*
  * This defines the unit of allocation of dquots.
@@ -106,7 +106,7 @@ typedef struct xfs_qm {
 typedef struct xfs_quotainfo {
 	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
 	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
-	lock_t		 qi_pinlock;	 /* dquot pinning mutex */
+	spinlock_t	 qi_pinlock;	 /* dquot pinning lock */
 	xfs_dqlist_t	 qi_dqlist;	 /* all dquots in filesys */
 	int		 qi_dqreclaims;	 /* a change here indicates
 					    a removal in the dqlist */

+ 8 - 11
fs/xfs/quota/xfs_qm_syscalls.c

@@ -200,7 +200,6 @@ xfs_qm_scall_quotaoff(
 	boolean_t		force)
 {
 	uint			dqtype;
-	unsigned long	s;
 	int			error;
 	uint			inactivate_flags;
 	xfs_qoff_logitem_t	*qoffstart;
@@ -237,9 +236,9 @@ xfs_qm_scall_quotaoff(
 	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
 		mp->m_qflags &= ~(flags);
 
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		mp->m_sb.sb_qflags = mp->m_qflags;
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 		mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 
 		/* XXX what to do if error ? Revert back to old vals incore ? */
@@ -415,7 +414,6 @@ xfs_qm_scall_quotaon(
 	uint		flags)
 {
 	int		error;
-	unsigned long	s;
 	uint		qf;
 	uint		accflags;
 	__int64_t	sbflags;
@@ -468,10 +466,10 @@ xfs_qm_scall_quotaon(
 	 * Change sb_qflags on disk but not incore mp->qflags
 	 * if this is the root filesystem.
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	qf = mp->m_sb.sb_qflags;
 	mp->m_sb.sb_qflags = qf | flags;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	/*
 	 * There's nothing to change if it's the same.
@@ -815,7 +813,6 @@ xfs_qm_log_quotaoff(
 {
 	xfs_trans_t	       *tp;
 	int			error;
-	unsigned long	s;
 	xfs_qoff_logitem_t     *qoffi=NULL;
 	uint			oldsbqflag=0;
 
@@ -832,10 +829,10 @@ xfs_qm_log_quotaoff(
 	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
 	xfs_trans_log_quotaoff_item(tp, qoffi);
 
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	oldsbqflag = mp->m_sb.sb_qflags;
 	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	xfs_mod_sb(tp, XFS_SB_QFLAGS);
 
@@ -854,9 +851,9 @@ error0:
 		 * No one else is modifying sb_qflags, so this is OK.
 		 * We still hold the quotaofflock.
 		 */
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		mp->m_sb.sb_qflags = oldsbqflag;
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 	}
 	*qoffstartp = qoffi;
 	return (error);

+ 6 - 1
fs/xfs/support/debug.c

@@ -17,7 +17,6 @@
  */
 #include <xfs.h>
 #include "debug.h"
-#include "spin.h"
 
 static char		message[1024];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
@@ -81,3 +80,9 @@ assfail(char *expr, char *file, int line)
 	printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
 	BUG();
 }
+
+void
+xfs_hex_dump(void *p, int length)
+{
+	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
+}

+ 2 - 6
fs/xfs/support/ktrace.c

@@ -21,7 +21,7 @@ static kmem_zone_t *ktrace_hdr_zone;
 static kmem_zone_t *ktrace_ent_zone;
 static int          ktrace_zentries;
 
-void
+void __init
 ktrace_init(int zentries)
 {
 	ktrace_zentries = zentries;
@@ -36,7 +36,7 @@ ktrace_init(int zentries)
 	ASSERT(ktrace_ent_zone);
 }
 
-void
+void __exit
 ktrace_uninit(void)
 {
 	kmem_zone_destroy(ktrace_hdr_zone);
@@ -90,8 +90,6 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 		return NULL;
 	}
 
-	spinlock_init(&(ktp->kt_lock), "kt_lock");
-
 	ktp->kt_entries  = ktep;
 	ktp->kt_nentries = nentries;
 	ktp->kt_index    = 0;
@@ -114,8 +112,6 @@ ktrace_free(ktrace_t *ktp)
 	if (ktp == (ktrace_t *)NULL)
 		return;
 
-	spinlock_destroy(&ktp->kt_lock);
-
 	/*
 	 * Special treatment for the Vnode trace buffer.
 	 */

+ 0 - 3
fs/xfs/support/ktrace.h

@@ -18,8 +18,6 @@
 #ifndef __XFS_SUPPORT_KTRACE_H__
 #define __XFS_SUPPORT_KTRACE_H__
 
-#include <spin.h>
-
 /*
  * Trace buffer entry structure.
  */
@@ -31,7 +29,6 @@ typedef struct ktrace_entry {
  * Trace buffer header structure.
  */
 typedef struct ktrace {
-	lock_t		kt_lock;	/* mutex to guard counters */
 	int		kt_nentries;	/* number of entries in trace buf */
 	int		kt_index;	/* current index in entries */
 	int		kt_rollover;

+ 1 - 1
fs/xfs/support/uuid.c

@@ -133,7 +133,7 @@ uuid_table_remove(uuid_t *uuid)
 	mutex_unlock(&uuid_monitor);
 }
 
-void
+void __init
 uuid_init(void)
 {
 	mutex_init(&uuid_monitor);

+ 1 - 1
fs/xfs/xfs.h

@@ -37,7 +37,7 @@
 #define XFS_LOG_TRACE 1
 #define XFS_RW_TRACE 1
 #define XFS_BUF_TRACE 1
-#define XFS_VNODE_TRACE 1
+#define XFS_INODE_TRACE 1
 #define XFS_FILESTREAMS_TRACE 1
 #endif
 

+ 2 - 28
fs/xfs/xfs_acl.c

@@ -391,32 +391,6 @@ xfs_acl_allow_set(
 	return error;
 }
 
-/*
- * The access control process to determine the access permission:
- *	if uid == file owner id, use the file owner bits.
- *	if gid == file owner group id, use the file group bits.
- *	scan ACL for a matching user or group, and use matched entry
- *	permission. Use total permissions of all matching group entries,
- *	until all acl entries are exhausted. The final permission produced
- *	by matching acl entry or entries needs to be & with group permission.
- *	if not owner, owning group, or matching entry in ACL, use file
- *	other bits.  
- */
-STATIC int
-xfs_acl_capability_check(
-	mode_t		mode,
-	cred_t		*cr)
-{
-	if ((mode & ACL_READ) && !capable_cred(cr, CAP_DAC_READ_SEARCH))
-		return EACCES;
-	if ((mode & ACL_WRITE) && !capable_cred(cr, CAP_DAC_OVERRIDE))
-		return EACCES;
-	if ((mode & ACL_EXECUTE) && !capable_cred(cr, CAP_DAC_OVERRIDE))
-		return EACCES;
-
-	return 0;
-}
-
 /*
  * Note: cr is only used here for the capability check if the ACL test fails.
  *       It is not used to find out the credentials uid or groups etc, as was
@@ -438,7 +412,6 @@ xfs_acl_access(
 
 	matched.ae_tag = 0;	/* Invalid type */
 	matched.ae_perm = 0;
-	md >>= 6;	/* Normalize the bits for comparison */
 
 	for (i = 0; i < fap->acl_cnt; i++) {
 		/*
@@ -520,7 +493,8 @@ xfs_acl_access(
 		break;
 	}
 
-	return xfs_acl_capability_check(md, cr);
+	/* EACCES tells generic_permission to check for capability overrides */
+	return EACCES;
 }
 
 /*

+ 0 - 2
fs/xfs/xfs_acl.h

@@ -75,7 +75,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int);
 #define _ACL_GET_DEFAULT(pv,pd)	(xfs_acl_vtoacl(pv,NULL,pd) == 0)
 #define _ACL_ACCESS_EXISTS	xfs_acl_vhasacl_access
 #define _ACL_DEFAULT_EXISTS	xfs_acl_vhasacl_default
-#define _ACL_XFS_IACCESS(i,m,c) (XFS_IFORK_Q(i) ? xfs_acl_iaccess(i,m,c) : -1)
 
 #define _ACL_ALLOC(a)		((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
 #define _ACL_FREE(a)		((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
@@ -95,7 +94,6 @@ extern int xfs_acl_vremove(bhv_vnode_t *, int);
 #define _ACL_GET_DEFAULT(pv,pd)	(0)
 #define _ACL_ACCESS_EXISTS	(NULL)
 #define _ACL_DEFAULT_EXISTS	(NULL)
-#define _ACL_XFS_IACCESS(i,m,c) (-1)
 #endif
 
 #endif	/* __XFS_ACL_H__ */

+ 1 - 1
fs/xfs/xfs_ag.h

@@ -193,7 +193,7 @@ typedef struct xfs_perag
 	xfs_agino_t	pagi_count;	/* number of allocated inodes */
 	int		pagb_count;	/* pagb slots in use */
 #ifdef __KERNEL__
-	lock_t		pagb_lock;	/* lock for pagb_list */
+	spinlock_t	pagb_lock;	/* lock for pagb_list */
 #endif
 	xfs_perag_busy_t *pagb_list;	/* unstable blocks */
 	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */

+ 8 - 11
fs/xfs/xfs_alloc.c

@@ -2206,7 +2206,7 @@ xfs_alloc_read_agf(
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
 		pag->pagf_levels[XFS_BTNUM_CNTi] =
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
-		spinlock_init(&pag->pagb_lock, "xfspagb");
+		spin_lock_init(&pag->pagb_lock);
 		pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
 					sizeof(xfs_perag_busy_t), KM_SLEEP);
 		pag->pagf_init = 1;
@@ -2500,10 +2500,9 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
 	xfs_mount_t		*mp;
 	xfs_perag_busy_t	*bsy;
 	int			n;
-	SPLDECL(s);
 
 	mp = tp->t_mountp;
-	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
+	spin_lock(&mp->m_perag[agno].pagb_lock);
 
 	/* search pagb_list for an open slot */
 	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
@@ -2533,7 +2532,7 @@ xfs_alloc_mark_busy(xfs_trans_t *tp,
 		xfs_trans_set_sync(tp);
 	}
 
-	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
+	spin_unlock(&mp->m_perag[agno].pagb_lock);
 }
 
 void
@@ -2543,11 +2542,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
 {
 	xfs_mount_t		*mp;
 	xfs_perag_busy_t	*list;
-	SPLDECL(s);
 
 	mp = tp->t_mountp;
 
-	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
+	spin_lock(&mp->m_perag[agno].pagb_lock);
 	list = mp->m_perag[agno].pagb_list;
 
 	ASSERT(idx < XFS_PAGB_NUM_SLOTS);
@@ -2559,7 +2557,7 @@ xfs_alloc_clear_busy(xfs_trans_t *tp,
 		TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
 	}
 
-	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
+	spin_unlock(&mp->m_perag[agno].pagb_lock);
 }
 
 
@@ -2578,11 +2576,10 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	xfs_agblock_t		uend, bend;
 	xfs_lsn_t		lsn;
 	int			cnt;
-	SPLDECL(s);
 
 	mp = tp->t_mountp;
 
-	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
+	spin_lock(&mp->m_perag[agno].pagb_lock);
 	cnt = mp->m_perag[agno].pagb_count;
 
 	uend = bno + len - 1;
@@ -2615,12 +2612,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
 	if (cnt) {
 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
 		lsn = bsy->busy_tp->t_commit_lsn;
-		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
+		spin_unlock(&mp->m_perag[agno].pagb_lock);
 		xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
 	} else {
 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp);
 		n = -1;
-		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
+		spin_unlock(&mp->m_perag[agno].pagb_lock);
 	}
 
 	return n;

+ 1 - 1
fs/xfs/xfs_attr.c

@@ -929,7 +929,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
  * This leaf block cannot have a "remote" value, we only call this routine
  * if bmap_one_block() says there is only one block (ie: no remote blks).
  */
-int
+STATIC int
 xfs_attr_leaf_addname(xfs_da_args_t *args)
 {
 	xfs_inode_t *dp;

+ 3 - 5
fs/xfs/xfs_attr_leaf.c

@@ -226,17 +226,15 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
 STATIC void
 xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
-	unsigned long s;
-
 	if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
 	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
 			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		} else
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 	}
 }
 

+ 0 - 103
fs/xfs/xfs_bit.c

@@ -25,109 +25,6 @@
  * XFS bit manipulation routines, used in non-realtime code.
  */
 
-#ifndef HAVE_ARCH_HIGHBIT
-/*
- * Index of high bit number in byte, -1 for none set, 0..7 otherwise.
- */
-static const char xfs_highbit[256] = {
-       -1, 0, 1, 1, 2, 2, 2, 2,			/* 00 .. 07 */
-	3, 3, 3, 3, 3, 3, 3, 3,			/* 08 .. 0f */
-	4, 4, 4, 4, 4, 4, 4, 4,			/* 10 .. 17 */
-	4, 4, 4, 4, 4, 4, 4, 4,			/* 18 .. 1f */
-	5, 5, 5, 5, 5, 5, 5, 5,			/* 20 .. 27 */
-	5, 5, 5, 5, 5, 5, 5, 5,			/* 28 .. 2f */
-	5, 5, 5, 5, 5, 5, 5, 5,			/* 30 .. 37 */
-	5, 5, 5, 5, 5, 5, 5, 5,			/* 38 .. 3f */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 40 .. 47 */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 48 .. 4f */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 50 .. 57 */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 58 .. 5f */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 60 .. 67 */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 68 .. 6f */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 70 .. 77 */
-	6, 6, 6, 6, 6, 6, 6, 6,			/* 78 .. 7f */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* 80 .. 87 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* 88 .. 8f */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* 90 .. 97 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* 98 .. 9f */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* a0 .. a7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* a8 .. af */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* b0 .. b7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* b8 .. bf */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* c0 .. c7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* c8 .. cf */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* d0 .. d7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* d8 .. df */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* e0 .. e7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* e8 .. ef */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* f0 .. f7 */
-	7, 7, 7, 7, 7, 7, 7, 7,			/* f8 .. ff */
-};
-#endif
-
-/*
- * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set.
- */
-inline int
-xfs_highbit32(
-	__uint32_t	v)
-{
-#ifdef HAVE_ARCH_HIGHBIT
-	return highbit32(v);
-#else
-	int		i;
-
-	if (v & 0xffff0000)
-		if (v & 0xff000000)
-			i = 24;
-		else
-			i = 16;
-	else if (v & 0x0000ffff)
-		if (v & 0x0000ff00)
-			i = 8;
-		else
-			i = 0;
-	else
-		return -1;
-	return i + xfs_highbit[(v >> i) & 0xff];
-#endif
-}
-
-/*
- * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set.
- */
-int
-xfs_lowbit64(
-	__uint64_t	v)
-{
-	__uint32_t	w = (__uint32_t)v;
-	int		n = 0;
-
-	if (w) {	/* lower bits */
-		n = ffs(w);
-	} else {	/* upper bits */
-		w = (__uint32_t)(v >> 32);
-		if (w && (n = ffs(w)))
-			n += 32;
-	}
-	return n - 1;
-}
-
-/*
- * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set.
- */
-int
-xfs_highbit64(
-	__uint64_t	v)
-{
-	__uint32_t	h = (__uint32_t)(v >> 32);
-
-	if (h)
-		return xfs_highbit32(h) + 32;
-	return xfs_highbit32((__uint32_t)v);
-}
-
-
 /*
  * Return whether bitmap is empty.
  * Size is number of words in the bitmap, which is padded to word boundary

+ 22 - 5
fs/xfs/xfs_bit.h

@@ -47,13 +47,30 @@ static inline __uint64_t xfs_mask64lo(int n)
 }
 
 /* Get high bit set out of 32-bit argument, -1 if none set */
-extern int xfs_highbit32(__uint32_t v);
-
-/* Get low bit set out of 64-bit argument, -1 if none set */
-extern int xfs_lowbit64(__uint64_t v);
+static inline int xfs_highbit32(__uint32_t v)
+{
+	return fls(v) - 1;
+}
 
 /* Get high bit set out of 64-bit argument, -1 if none set */
-extern int xfs_highbit64(__uint64_t);
+static inline int xfs_highbit64(__uint64_t v)
+{
+	return fls64(v) - 1;
+}
+
+/* Get low bit set out of 32-bit argument, -1 if none set */
+static inline int xfs_lowbit32(__uint32_t v)
+{
+	__uint32_t t = v;
+	return (t) ? find_first_bit((unsigned long *)&t, 32) : -1;
+}
+
+/* Get low bit set out of 64-bit argument, -1 if none set */
+static inline int xfs_lowbit64(__uint64_t v)
+{
+	__uint64_t t = v;
+	return (t) ? find_first_bit((unsigned long *)&t, 64) : -1;
+}
 
 /* Return whether bitmap is empty (1 == empty) */
 extern int xfs_bitmap_empty(uint *map, uint size);

+ 10 - 12
fs/xfs/xfs_bmap.c

@@ -2830,11 +2830,11 @@ xfs_bmap_btalloc(
 		args.prod = align;
 		if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
-	} else if (mp->m_sb.sb_blocksize >= NBPP) {
+	} else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
 		args.prod = 1;
 		args.mod = 0;
 	} else {
-		args.prod = NBPP >> mp->m_sb.sb_blocklog;
+		args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
 		if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
 			args.mod = (xfs_extlen_t)(args.prod - args.mod);
 	}
@@ -2969,7 +2969,7 @@ STATIC int
 xfs_bmap_alloc(
 	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
 {
-	if ((ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata)
+	if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
 		return xfs_bmap_rtalloc(ap);
 	return xfs_bmap_btalloc(ap);
 }
@@ -3096,8 +3096,7 @@ xfs_bmap_del_extent(
 		/*
 		 * Realtime allocation.  Free it and record di_nblocks update.
 		 */
-		if (whichfork == XFS_DATA_FORK &&
-		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+		if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
 			xfs_fsblock_t	bno;
 			xfs_filblks_t	len;
 
@@ -3956,7 +3955,6 @@ xfs_bmap_add_attrfork(
 	xfs_bmap_free_t		flist;		/* freed extent records */
 	xfs_mount_t		*mp;		/* mount structure */
 	xfs_trans_t		*tp;		/* transaction pointer */
-	unsigned long		s;		/* spinlock spl value */
 	int			blks;		/* space reservation */
 	int			version = 1;	/* superblock attr version */
 	int			committed;	/* xaction was committed */
@@ -4053,7 +4051,7 @@ xfs_bmap_add_attrfork(
 	   (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) {
 		__int64_t sbfields = 0;
 
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
 			XFS_SB_VERSION_ADDATTR(&mp->m_sb);
 			sbfields |= XFS_SB_VERSIONNUM;
@@ -4063,10 +4061,10 @@ xfs_bmap_add_attrfork(
 			sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		}
 		if (sbfields) {
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, sbfields);
 		} else
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 	}
 	if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
 		goto error2;
@@ -6394,7 +6392,7 @@ xfs_bmap_count_blocks(
  * Recursively walks each level of a btree
  * to count total fsblocks is use.
  */
-int                                     /* error */
+STATIC int                                     /* error */
 xfs_bmap_count_tree(
 	xfs_mount_t     *mp,            /* file system mount point */
 	xfs_trans_t     *tp,            /* transaction pointer */
@@ -6470,7 +6468,7 @@ xfs_bmap_count_tree(
 /*
  * Count leaf blocks given a range of extent records.
  */
-int
+STATIC int
 xfs_bmap_count_leaves(
 	xfs_ifork_t		*ifp,
 	xfs_extnum_t		idx,
@@ -6490,7 +6488,7 @@ xfs_bmap_count_leaves(
  * Count leaf blocks given a range of extent records originally
  * in btree format.
  */
-int
+STATIC int
 xfs_bmap_disk_count_leaves(
 	xfs_extnum_t		idx,
 	xfs_bmbt_block_t	*block,

+ 2 - 0
fs/xfs/xfs_bmap.h

@@ -25,6 +25,8 @@ struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
 
+extern kmem_zone_t	*xfs_bmap_free_item_zone;
+
 /*
  * DELTA: describe a change to the in-core extent list.
  *

+ 1 - 2
fs/xfs/xfs_bmap_btree.c

@@ -2062,8 +2062,7 @@ xfs_bmbt_insert(
 				pcur->bc_private.b.allocated;
 			pcur->bc_private.b.allocated = 0;
 			ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
-			       (cur->bc_private.b.ip->i_d.di_flags &
-				XFS_DIFLAG_REALTIME));
+			       XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
 			cur->bc_private.b.firstblock =
 				pcur->bc_private.b.firstblock;
 			ASSERT(cur->bc_private.b.flist ==

+ 2 - 0
fs/xfs/xfs_btree.h

@@ -24,6 +24,8 @@ struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
 
+extern kmem_zone_t	*xfs_btree_cur_zone;
+
 /*
  * This nonsense is to make -wlint happy.
  */

+ 4 - 6
fs/xfs/xfs_buf_item.c

@@ -378,7 +378,6 @@ xfs_buf_item_unpin(
 	xfs_mount_t	*mp;
 	xfs_buf_t	*bp;
 	int		freed;
-	SPLDECL(s);
 
 	bp = bip->bli_buf;
 	ASSERT(bp != NULL);
@@ -409,8 +408,8 @@ xfs_buf_item_unpin(
 			XFS_BUF_SET_FSPRIVATE(bp, NULL);
 			XFS_BUF_CLR_IODONE_FUNC(bp);
 		} else {
-			AIL_LOCK(mp,s);
-			xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s);
+			spin_lock(&mp->m_ail_lock);
+			xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
 			xfs_buf_item_relse(bp);
 			ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
 		}
@@ -1113,7 +1112,6 @@ xfs_buf_iodone(
 	xfs_buf_log_item_t	*bip)
 {
 	struct xfs_mount	*mp;
-	SPLDECL(s);
 
 	ASSERT(bip->bli_buf == bp);
 
@@ -1128,11 +1126,11 @@ xfs_buf_iodone(
 	 *
 	 * Either way, AIL is useless if we're forcing a shutdown.
 	 */
-	AIL_LOCK(mp,s);
+	spin_lock(&mp->m_ail_lock);
 	/*
 	 * xfs_trans_delete_ail() drops the AIL lock.
 	 */
-	xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s);
+	xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
 
 #ifdef XFS_TRANS_DEBUG
 	kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));

+ 2 - 0
fs/xfs/xfs_buf_item.h

@@ -18,6 +18,8 @@
 #ifndef	__XFS_BUF_ITEM_H__
 #define	__XFS_BUF_ITEM_H__
 
+extern kmem_zone_t	*xfs_buf_item_zone;
+
 /*
  * This is the structure used to lay out a buf log item in the
  * log.  The data map describes which 128 byte chunks of the buffer

+ 5 - 8
fs/xfs/xfs_da_btree.c

@@ -2218,7 +2218,7 @@ xfs_da_state_free(xfs_da_state_t *state)
 
 #ifdef XFS_DABUF_DEBUG
 xfs_dabuf_t	*xfs_dabuf_global_list;
-lock_t		xfs_dabuf_global_lock;
+spinlock_t	xfs_dabuf_global_lock;
 #endif
 
 /*
@@ -2264,10 +2264,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 	}
 #ifdef XFS_DABUF_DEBUG
 	{
-		SPLDECL(s);
 		xfs_dabuf_t	*p;
 
-		s = mutex_spinlock(&xfs_dabuf_global_lock);
+		spin_lock(&xfs_dabuf_global_lock);
 		for (p = xfs_dabuf_global_list; p; p = p->next) {
 			ASSERT(p->blkno != dabuf->blkno ||
 			       p->target != dabuf->target);
@@ -2277,7 +2276,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 			xfs_dabuf_global_list->prev = dabuf;
 		dabuf->next = xfs_dabuf_global_list;
 		xfs_dabuf_global_list = dabuf;
-		mutex_spinunlock(&xfs_dabuf_global_lock, s);
+		spin_unlock(&xfs_dabuf_global_lock);
 	}
 #endif
 	return dabuf;
@@ -2319,16 +2318,14 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
 		kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
 #ifdef XFS_DABUF_DEBUG
 	{
-		SPLDECL(s);
-
-		s = mutex_spinlock(&xfs_dabuf_global_lock);
+		spin_lock(&xfs_dabuf_global_lock);
 		if (dabuf->prev)
 			dabuf->prev->next = dabuf->next;
 		else
 			xfs_dabuf_global_list = dabuf->next;
 		if (dabuf->next)
 			dabuf->next->prev = dabuf->prev;
-		mutex_spinunlock(&xfs_dabuf_global_lock, s);
+		spin_unlock(&xfs_dabuf_global_lock);
 	}
 	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
 #endif

+ 1 - 0
fs/xfs/xfs_da_btree.h

@@ -260,6 +260,7 @@ void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
 xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
 
 extern struct kmem_zone *xfs_da_state_zone;
+extern struct kmem_zone *xfs_dabuf_zone;
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_DA_BTREE_H__ */

+ 35 - 49
fs/xfs/xfs_dfrag.c

@@ -52,76 +52,72 @@ xfs_swapext(
 	xfs_swapext_t	__user *sxu)
 {
 	xfs_swapext_t	*sxp;
-	xfs_inode_t     *ip=NULL, *tip=NULL;
-	xfs_mount_t     *mp;
-	struct file	*fp = NULL, *tfp = NULL;
-	bhv_vnode_t	*vp, *tvp;
+	xfs_inode_t     *ip, *tip;
+	struct file	*file, *target_file;
 	int		error = 0;
 
 	sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL);
 	if (!sxp) {
 		error = XFS_ERROR(ENOMEM);
-		goto error0;
+		goto out;
 	}
 
 	if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) {
 		error = XFS_ERROR(EFAULT);
-		goto error0;
+		goto out_free_sxp;
 	}
 
 	/* Pull information for the target fd */
-	if (((fp = fget((int)sxp->sx_fdtarget)) == NULL) ||
-	    ((vp = vn_from_inode(fp->f_path.dentry->d_inode)) == NULL))  {
+	file = fget((int)sxp->sx_fdtarget);
+	if (!file) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_free_sxp;
 	}
 
-	ip = xfs_vtoi(vp);
-	if (ip == NULL) {
+	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
-		goto error0;
+		goto out_put_file;
 	}
 
-	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
-	    ((tvp = vn_from_inode(tfp->f_path.dentry->d_inode)) == NULL)) {
+	target_file = fget((int)sxp->sx_fdtmp);
+	if (!target_file) {
 		error = XFS_ERROR(EINVAL);
-		goto error0;
+		goto out_put_file;
 	}
 
-	tip = xfs_vtoi(tvp);
-	if (tip == NULL) {
+	if (!(target_file->f_mode & FMODE_WRITE) ||
+	    (target_file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
-		goto error0;
+		goto out_put_target_file;
 	}
 
+	ip = XFS_I(file->f_path.dentry->d_inode);
+	tip = XFS_I(target_file->f_path.dentry->d_inode);
+
 	if (ip->i_mount != tip->i_mount) {
-		error =  XFS_ERROR(EINVAL);
-		goto error0;
+		error = XFS_ERROR(EINVAL);
+		goto out_put_target_file;
 	}
 
 	if (ip->i_ino == tip->i_ino) {
-		error =  XFS_ERROR(EINVAL);
-		goto error0;
+		error = XFS_ERROR(EINVAL);
+		goto out_put_target_file;
 	}
 
-	mp = ip->i_mount;
-
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		error =  XFS_ERROR(EIO);
-		goto error0;
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		error = XFS_ERROR(EIO);
+		goto out_put_target_file;
 	}
 
-	error = XFS_SWAP_EXTENTS(mp, &ip->i_iocore, &tip->i_iocore, sxp);
-
- error0:
-	if (fp != NULL)
-		fput(fp);
-	if (tfp != NULL)
-		fput(tfp);
-
-	if (sxp != NULL)
-		kmem_free(sxp, sizeof(xfs_swapext_t));
+	error = xfs_swap_extents(ip, tip, sxp);
 
+ out_put_target_file:
+	fput(target_file);
+ out_put_file:
+	fput(file);
+ out_free_sxp:
+	kmem_free(sxp, sizeof(xfs_swapext_t));
+ out:
 	return error;
 }
 
@@ -169,15 +165,6 @@ xfs_swap_extents(
 	xfs_lock_inodes(ips, 2, 0, lock_flags);
 	locked = 1;
 
-	/* Check permissions */
-	error = xfs_iaccess(ip, S_IWUSR, NULL);
-	if (error)
-		goto error0;
-
-	error = xfs_iaccess(tip, S_IWUSR, NULL);
-	if (error)
-		goto error0;
-
 	/* Verify that both files have the same format */
 	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
 		error = XFS_ERROR(EINVAL);
@@ -185,8 +172,7 @@ xfs_swap_extents(
 	}
 
 	/* Verify both files are either real-time or non-realtime */
-	if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
-	    (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
 		error = XFS_ERROR(EINVAL);
 		goto error0;
 	}
@@ -199,7 +185,7 @@ xfs_swap_extents(
 	}
 
 	if (VN_CACHED(tvp) != 0) {
-		xfs_inval_cached_trace(&tip->i_iocore, 0, -1, 0, -1);
+		xfs_inval_cached_trace(tip, 0, -1, 0, -1);
 		error = xfs_flushinval_pages(tip, 0, -1,
 				FI_REMAPF_LOCKED);
 		if (error)

+ 27 - 55
fs/xfs/xfs_dinode.h

@@ -171,69 +171,35 @@ typedef enum xfs_dinode_fmt
 /*
  * Inode data & attribute fork sizes, per inode.
  */
-#define XFS_CFORK_Q(dcp)                    ((dcp)->di_forkoff != 0)
-#define	XFS_CFORK_Q_DISK(dcp)		    ((dcp)->di_forkoff != 0)
-
-#define XFS_CFORK_BOFF(dcp)                 ((int)((dcp)->di_forkoff << 3))
-#define	XFS_CFORK_BOFF_DISK(dcp)	    ((int)((dcp)->di_forkoff << 3))
-
-#define	XFS_CFORK_DSIZE_DISK(dcp,mp) \
-	(XFS_CFORK_Q_DISK(dcp) ? XFS_CFORK_BOFF_DISK(dcp) : XFS_LITINO(mp))
-#define XFS_CFORK_DSIZE(dcp,mp) \
-	(XFS_CFORK_Q(dcp) ? XFS_CFORK_BOFF(dcp) : XFS_LITINO(mp))
-
-#define	XFS_CFORK_ASIZE_DISK(dcp,mp) \
-	(XFS_CFORK_Q_DISK(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_DISK(dcp) : 0)
-#define XFS_CFORK_ASIZE(dcp,mp) \
-	(XFS_CFORK_Q(dcp) ? XFS_LITINO(mp) - XFS_CFORK_BOFF(dcp) : 0)
-
-#define	XFS_CFORK_SIZE_DISK(dcp,mp,w) \
-	((w) == XFS_DATA_FORK ? \
-		XFS_CFORK_DSIZE_DISK(dcp, mp) : \
-	 	XFS_CFORK_ASIZE_DISK(dcp, mp))
-#define XFS_CFORK_SIZE(dcp,mp,w) \
-	((w) == XFS_DATA_FORK ? \
-		XFS_CFORK_DSIZE(dcp, mp) : XFS_CFORK_ASIZE(dcp, mp))
+#define XFS_DFORK_Q(dip)		((dip)->di_core.di_forkoff != 0)
+#define XFS_DFORK_BOFF(dip)		((int)((dip)->di_core.di_forkoff << 3))
 
 #define XFS_DFORK_DSIZE(dip,mp) \
-	XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
-#define XFS_DFORK_DSIZE_HOST(dip,mp) \
-	XFS_CFORK_DSIZE(&(dip)->di_core, mp)
+	(XFS_DFORK_Q(dip) ? \
+		XFS_DFORK_BOFF(dip) : \
+		XFS_LITINO(mp))
 #define XFS_DFORK_ASIZE(dip,mp) \
-	XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
-#define XFS_DFORK_ASIZE_HOST(dip,mp) \
-	XFS_CFORK_ASIZE(&(dip)->di_core, mp)
-#define	XFS_DFORK_SIZE(dip,mp,w) \
-	XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
-#define	XFS_DFORK_SIZE_HOST(dip,mp,w) \
-	XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
+	(XFS_DFORK_Q(dip) ? \
+		XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : \
+		0)
+#define XFS_DFORK_SIZE(dip,mp,w) \
+	((w) == XFS_DATA_FORK ? \
+		XFS_DFORK_DSIZE(dip, mp) : \
+		XFS_DFORK_ASIZE(dip, mp))
 
-#define	XFS_DFORK_Q(dip)                    XFS_CFORK_Q_DISK(&(dip)->di_core)
-#define	XFS_DFORK_BOFF(dip)		    XFS_CFORK_BOFF_DISK(&(dip)->di_core)
-#define	XFS_DFORK_DPTR(dip)		    ((dip)->di_u.di_c)
-#define	XFS_DFORK_APTR(dip)	\
+#define XFS_DFORK_DPTR(dip)		    ((dip)->di_u.di_c)
+#define XFS_DFORK_APTR(dip)	\
 	((dip)->di_u.di_c + XFS_DFORK_BOFF(dip))
-#define	XFS_DFORK_PTR(dip,w)	\
+#define XFS_DFORK_PTR(dip,w)	\
 	((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip))
-#define	XFS_CFORK_FORMAT(dcp,w)	\
-	((w) == XFS_DATA_FORK ? (dcp)->di_format : (dcp)->di_aformat)
-#define	XFS_CFORK_FMT_SET(dcp,w,n) \
+#define XFS_DFORK_FORMAT(dip,w) \
 	((w) == XFS_DATA_FORK ? \
-		((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
-#define	XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
-
-#define	XFS_CFORK_NEXTENTS_DISK(dcp,w) \
+		(dip)->di_core.di_format : \
+		(dip)->di_core.di_aformat)
+#define XFS_DFORK_NEXTENTS(dip,w) \
 	((w) == XFS_DATA_FORK ? \
-	 	be32_to_cpu((dcp)->di_nextents) : \
-	 	be16_to_cpu((dcp)->di_anextents))
-#define XFS_CFORK_NEXTENTS(dcp,w) \
-	((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
-#define	XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
-#define	XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
-
-#define	XFS_CFORK_NEXT_SET(dcp,w,n) \
-	((w) == XFS_DATA_FORK ? \
-		((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
+	 	be32_to_cpu((dip)->di_core.di_nextents) : \
+	 	be16_to_cpu((dip)->di_core.di_anextents))
 
 #define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)XFS_BUF_PTR(bp))
 
@@ -273,6 +239,12 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NODEFRAG      (1 << XFS_DIFLAG_NODEFRAG_BIT)
 #define XFS_DIFLAG_FILESTREAM    (1 << XFS_DIFLAG_FILESTREAM_BIT)
 
+#ifdef CONFIG_XFS_RT
+#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+#else
+#define XFS_IS_REALTIME_INODE(ip) (0)
+#endif
+
 #define XFS_DIFLAG_ANY \
 	(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \

+ 2 - 1
fs/xfs/xfs_dir2.c

@@ -42,6 +42,7 @@
 #include "xfs_dir2_node.h"
 #include "xfs_dir2_trace.h"
 #include "xfs_error.h"
+#include "xfs_vnodeops.h"
 
 
 void
@@ -301,7 +302,7 @@ xfs_readdir(
 	int		rval;		/* return value */
 	int		v;		/* type-checking value */
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return XFS_ERROR(EIO);

+ 0 - 31
fs/xfs/xfs_error.c

@@ -230,37 +230,6 @@ xfs_error_report(
 	}
 }
 
-STATIC void
-xfs_hex_dump(void *p, int length)
-{
-	__uint8_t *uip = (__uint8_t*)p;
-	int	i;
-	char	sbuf[128], *s;
-
-	s = sbuf;
-	*s = '\0';
-	for (i=0; i<length; i++, uip++) {
-		if ((i % 16) == 0) {
-			if (*s != '\0')
-				cmn_err(CE_ALERT, "%s\n", sbuf);
-			s = sbuf;
-			sprintf(s, "0x%x: ", i);
-			while( *s != '\0')
-				s++;
-		}
-		sprintf(s, "%02x ", *uip);
-
-		/*
-		 * the kernel sprintf is a void; user sprintf returns
-		 * the sprintf'ed string's length.  Find the new end-
-		 * of-string
-		 */
-		while( *s != '\0')
-			s++;
-	}
-	cmn_err(CE_ALERT, "%s\n", sbuf);
-}
-
 void
 xfs_corruption_error(
 	char		*tag,

+ 2 - 0
fs/xfs/xfs_error.h

@@ -174,6 +174,8 @@ extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
 /* PRINTFLIKE3 */
 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
 
+extern void xfs_hex_dump(void *p, int length);
+
 #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
 	xfs_fs_cmn_err(level, mp, fmt "  Unmount and run xfs_repair.", ## args)
 

+ 9 - 12
fs/xfs/xfs_extfree_item.c

@@ -110,19 +110,18 @@ STATIC void
 xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
 {
 	xfs_mount_t	*mp;
-	SPLDECL(s);
 
 	mp = efip->efi_item.li_mountp;
-	AIL_LOCK(mp, s);
+	spin_lock(&mp->m_ail_lock);
 	if (efip->efi_flags & XFS_EFI_CANCELED) {
 		/*
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
-		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
+		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
 		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 	}
 }
 
@@ -138,10 +137,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 {
 	xfs_mount_t	*mp;
 	xfs_log_item_desc_t	*lidp;
-	SPLDECL(s);
 
 	mp = efip->efi_item.li_mountp;
-	AIL_LOCK(mp, s);
+	spin_lock(&mp->m_ail_lock);
 	if (efip->efi_flags & XFS_EFI_CANCELED) {
 		/*
 		 * free the xaction descriptor pointing to this item
@@ -152,11 +150,11 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
 		 * pull the item off the AIL.
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
-		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
+		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
 		xfs_efi_item_free(efip);
 	} else {
 		efip->efi_flags |= XFS_EFI_COMMITTED;
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 	}
 }
 
@@ -350,13 +348,12 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 {
 	xfs_mount_t	*mp;
 	int		extents_left;
-	SPLDECL(s);
 
 	mp = efip->efi_item.li_mountp;
 	ASSERT(efip->efi_next_extent > 0);
 	ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
 
-	AIL_LOCK(mp, s);
+	spin_lock(&mp->m_ail_lock);
 	ASSERT(efip->efi_next_extent >= nextents);
 	efip->efi_next_extent -= nextents;
 	extents_left = efip->efi_next_extent;
@@ -364,10 +361,10 @@ xfs_efi_release(xfs_efi_log_item_t	*efip,
 		/*
 		 * xfs_trans_delete_ail() drops the AIL lock.
 		 */
-		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s);
+		xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
 		xfs_efi_item_free(efip);
 	} else {
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 	}
 }
 

+ 1 - 1
fs/xfs/xfs_filestream.c

@@ -348,7 +348,7 @@ _xfs_filestream_update_ag(
 }
 
 /* xfs_fstrm_free_func(): callback for freeing cached stream items. */
-void
+STATIC void
 xfs_fstrm_free_func(
 	unsigned long	ino,
 	void		*data)

+ 7 - 3
fs/xfs/xfs_fs.h

@@ -419,9 +419,13 @@ typedef struct xfs_handle {
 /*
  * ioctl commands that are used by Linux filesystems
  */
-#define XFS_IOC_GETXFLAGS	_IOR('f', 1, long)
-#define XFS_IOC_SETXFLAGS	_IOW('f', 2, long)
-#define XFS_IOC_GETVERSION	_IOR('v', 1, long)
+#define XFS_IOC_GETXFLAGS	FS_IOC_GETFLAGS
+#define XFS_IOC_SETXFLAGS	FS_IOC_SETFLAGS
+#define XFS_IOC_GETVERSION	FS_IOC_GETVERSION
+/* 32-bit compat counterparts */
+#define XFS_IOC32_GETXFLAGS	FS_IOC32_GETFLAGS
+#define XFS_IOC32_SETXFLAGS	FS_IOC32_SETFLAGS
+#define XFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
 
 /*
  * ioctl commands that replace IRIX fcntl()'s

+ 5 - 8
fs/xfs/xfs_fsops.c

@@ -462,15 +462,13 @@ xfs_fs_counts(
 	xfs_mount_t		*mp,
 	xfs_fsop_counts_t	*cnt)
 {
-	unsigned long	s;
-
 	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
 	cnt->freertx = mp->m_sb.sb_frextents;
 	cnt->freeino = mp->m_sb.sb_ifree;
 	cnt->allocino = mp->m_sb.sb_icount;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 	return 0;
 }
 
@@ -497,7 +495,6 @@ xfs_reserve_blocks(
 {
 	__int64_t		lcounter, delta, fdblks_delta;
 	__uint64_t		request;
-	unsigned long		s;
 
 	/* If inval is null, report current values and return */
 	if (inval == (__uint64_t *)NULL) {
@@ -515,7 +512,7 @@ xfs_reserve_blocks(
 	 * problem. we needto work out if we are freeing or allocation
 	 * blocks first, then we can do the modification as necessary.
 	 *
-	 * We do this under the XFS_SB_LOCK so that if we are near
+	 * We do this under the m_sb_lock so that if we are near
 	 * ENOSPC, we will hold out any changes while we work out
 	 * what to do. This means that the amount of free space can
 	 * change while we do this, so we need to retry if we end up
@@ -526,7 +523,7 @@ xfs_reserve_blocks(
 	 * enabled, disabled or even compiled in....
 	 */
 retry:
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
 
 	/*
@@ -569,7 +566,7 @@ out:
 		outval->resblks = mp->m_resblks;
 		outval->resblks_avail = mp->m_resblks_avail;
 	}
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	if (fdblks_delta) {
 		/*

+ 0 - 2
fs/xfs/xfs_ialloc_btree.h

@@ -81,8 +81,6 @@ typedef	struct xfs_btree_sblock xfs_inobt_block_t;
 #define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
 #define	XFS_INOBT_IS_FREE(rp,i)		\
 		(((rp)->ir_free & XFS_INOBT_MASK(i)) != 0)
-#define	XFS_INOBT_IS_FREE_DISK(rp,i)	\
-		((be64_to_cpu((rp)->ir_free) & XFS_INOBT_MASK(i)) != 0)
 #define	XFS_INOBT_SET_FREE(rp,i)	((rp)->ir_free |= XFS_INOBT_MASK(i))
 #define	XFS_INOBT_CLR_FREE(rp,i)	((rp)->ir_free &= ~XFS_INOBT_MASK(i))
 

+ 73 - 112
fs/xfs/xfs_iget.c

@@ -65,7 +65,7 @@
  */
 STATIC int
 xfs_iget_core(
-	bhv_vnode_t	*vp,
+	struct inode	*inode,
 	xfs_mount_t	*mp,
 	xfs_trans_t	*tp,
 	xfs_ino_t	ino,
@@ -74,9 +74,9 @@ xfs_iget_core(
 	xfs_inode_t	**ipp,
 	xfs_daddr_t	bno)
 {
+	struct inode	*old_inode;
 	xfs_inode_t	*ip;
 	xfs_inode_t	*iq;
-	bhv_vnode_t	*inode_vp;
 	int		error;
 	xfs_icluster_t	*icl, *new_icl = NULL;
 	unsigned long	first_index, mask;
@@ -111,8 +111,8 @@ again:
 			goto again;
 		}
 
-		inode_vp = XFS_ITOV_NULL(ip);
-		if (inode_vp == NULL) {
+		old_inode = ip->i_vnode;
+		if (old_inode == NULL) {
 			/*
 			 * If IRECLAIM is set this inode is
 			 * on its way out of the system,
@@ -140,28 +140,9 @@ again:
 				return ENOENT;
 			}
 
-			/*
-			 * There may be transactions sitting in the
-			 * incore log buffers or being flushed to disk
-			 * at this time.  We can't clear the
-			 * XFS_IRECLAIMABLE flag until these
-			 * transactions have hit the disk, otherwise we
-			 * will void the guarantee the flag provides
-			 * xfs_iunpin()
-			 */
-			if (xfs_ipincount(ip)) {
-				read_unlock(&pag->pag_ici_lock);
-				xfs_log_force(mp, 0,
-					XFS_LOG_FORCE|XFS_LOG_SYNC);
-				XFS_STATS_INC(xs_ig_frecycle);
-				goto again;
-			}
-
-			vn_trace_exit(ip, "xfs_iget.alloc",
-				(inst_t *)__return_address);
+			xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
 
 			XFS_STATS_INC(xs_ig_found);
-
 			xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 			read_unlock(&pag->pag_ici_lock);
 
@@ -171,13 +152,11 @@ again:
 
 			goto finish_inode;
 
-		} else if (vp != inode_vp) {
-			struct inode *inode = vn_to_inode(inode_vp);
-
+		} else if (inode != old_inode) {
 			/* The inode is being torn down, pause and
 			 * try again.
 			 */
-			if (inode->i_state & (I_FREEING | I_CLEAR)) {
+			if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
 				read_unlock(&pag->pag_ici_lock);
 				delay(1);
 				XFS_STATS_INC(xs_ig_frecycle);
@@ -190,7 +169,7 @@ again:
 */
 			cmn_err(CE_PANIC,
 		"xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
-					inode_vp, vp);
+					old_inode, inode);
 		}
 
 		/*
@@ -200,20 +179,16 @@ again:
 		XFS_STATS_INC(xs_ig_found);
 
 finish_inode:
-		if (ip->i_d.di_mode == 0) {
-			if (!(flags & XFS_IGET_CREATE)) {
-				xfs_put_perag(mp, pag);
-				return ENOENT;
-			}
-			xfs_iocore_inode_reinit(ip);
+		if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+			xfs_put_perag(mp, pag);
+			return ENOENT;
 		}
 
 		if (lock_flags != 0)
 			xfs_ilock(ip, lock_flags);
 
 		xfs_iflags_clear(ip, XFS_ISTALE);
-		vn_trace_exit(ip, "xfs_iget.found",
-					(inst_t *)__return_address);
+		xfs_itrace_exit_tag(ip, "xfs_iget.found");
 		goto return_ip;
 	}
 
@@ -234,10 +209,16 @@ finish_inode:
 		return error;
 	}
 
-	vn_trace_exit(ip, "xfs_iget.alloc", (inst_t *)__return_address);
+	xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
+
+
+	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+		     "xfsino", ip->i_ino);
+	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+	init_waitqueue_head(&ip->i_ipin_wait);
+	atomic_set(&ip->i_pincount, 0);
+	initnsema(&ip->i_flock, 1, "xfsfino");
 
-	xfs_inode_lock_init(ip, vp);
-	xfs_iocore_inode_init(ip);
 	if (lock_flags)
 		xfs_ilock(ip, lock_flags);
 
@@ -333,9 +314,6 @@ finish_inode:
 	ASSERT(ip->i_df.if_ext_max ==
 	       XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
 
-	ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
-	       ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
-
 	xfs_iflags_set(ip, XFS_IMODIFIED);
 	*ipp = ip;
 
@@ -343,7 +321,7 @@ finish_inode:
 	 * If we have a real type for an on-disk inode, we can set ops(&unlock)
 	 * now.	 If it's a new inode being created, xfs_ialloc will handle it.
 	 */
-	xfs_initialize_vnode(mp, vp, ip);
+	xfs_initialize_vnode(mp, inode, ip);
 	return 0;
 }
 
@@ -363,69 +341,58 @@ xfs_iget(
 	xfs_daddr_t	bno)
 {
 	struct inode	*inode;
-	bhv_vnode_t	*vp = NULL;
+	xfs_inode_t	*ip;
 	int		error;
 
 	XFS_STATS_INC(xs_ig_attempts);
 
 retry:
 	inode = iget_locked(mp->m_super, ino);
-	if (inode) {
-		xfs_inode_t	*ip;
-
-		vp = vn_from_inode(inode);
-		if (inode->i_state & I_NEW) {
-			vn_initialize(inode);
-			error = xfs_iget_core(vp, mp, tp, ino, flags,
-					lock_flags, ipp, bno);
-			if (error) {
-				vn_mark_bad(vp);
-				if (inode->i_state & I_NEW)
-					unlock_new_inode(inode);
-				iput(inode);
-			}
-		} else {
-			/*
-			 * If the inode is not fully constructed due to
-			 * filehandle mismatches wait for the inode to go
-			 * away and try again.
-			 *
-			 * iget_locked will call __wait_on_freeing_inode
-			 * to wait for the inode to go away.
-			 */
-			if (is_bad_inode(inode) ||
-			    ((ip = xfs_vtoi(vp)) == NULL)) {
-				iput(inode);
-				delay(1);
-				goto retry;
-			}
-
-			if (lock_flags != 0)
-				xfs_ilock(ip, lock_flags);
-			XFS_STATS_INC(xs_ig_found);
-			*ipp = ip;
-			error = 0;
+	if (!inode)
+		/* If we got no inode we are out of memory */
+		return ENOMEM;
+
+	if (inode->i_state & I_NEW) {
+		XFS_STATS_INC(vn_active);
+		XFS_STATS_INC(vn_alloc);
+
+		error = xfs_iget_core(inode, mp, tp, ino, flags,
+				lock_flags, ipp, bno);
+		if (error) {
+			make_bad_inode(inode);
+			if (inode->i_state & I_NEW)
+				unlock_new_inode(inode);
+			iput(inode);
 		}
-	} else
-		error = ENOMEM;	/* If we got no inode we are out of memory */
+		return error;
+	}
 
-	return error;
-}
+	/*
+	 * If the inode is not fully constructed due to
+	 * filehandle mismatches wait for the inode to go
+	 * away and try again.
+	 *
+	 * iget_locked will call __wait_on_freeing_inode
+	 * to wait for the inode to go away.
+	 */
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		delay(1);
+		goto retry;
+	}
 
-/*
- * Do the setup for the various locks within the incore inode.
- */
-void
-xfs_inode_lock_init(
-	xfs_inode_t	*ip,
-	bhv_vnode_t	*vp)
-{
-	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
-		     "xfsino", ip->i_ino);
-	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-	init_waitqueue_head(&ip->i_ipin_wait);
-	atomic_set(&ip->i_pincount, 0);
-	initnsema(&ip->i_flock, 1, "xfsfino");
+	ip = XFS_I(inode);
+	if (!ip) {
+		iput(inode);
+		delay(1);
+		goto retry;
+	}
+
+	if (lock_flags != 0)
+		xfs_ilock(ip, lock_flags);
+	XFS_STATS_INC(xs_ig_found);
+	*ipp = ip;
+	return 0;
 }
 
 /*
@@ -465,11 +432,9 @@ void
 xfs_iput(xfs_inode_t	*ip,
 	 uint		lock_flags)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-
-	vn_trace_entry(ip, "xfs_iput", (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 	xfs_iunlock(ip, lock_flags);
-	VN_RELE(vp);
+	IRELE(ip);
 }
 
 /*
@@ -479,20 +444,19 @@ void
 xfs_iput_new(xfs_inode_t	*ip,
 	     uint		lock_flags)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct inode	*inode = ip->i_vnode;
 
-	vn_trace_entry(ip, "xfs_iput_new", (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if ((ip->i_d.di_mode == 0)) {
 		ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-		vn_mark_bad(vp);
+		make_bad_inode(inode);
 	}
 	if (inode->i_state & I_NEW)
 		unlock_new_inode(inode);
 	if (lock_flags)
 		xfs_iunlock(ip, lock_flags);
-	VN_RELE(vp);
+	IRELE(ip);
 }
 
 
@@ -505,8 +469,6 @@ xfs_iput_new(xfs_inode_t	*ip,
 void
 xfs_ireclaim(xfs_inode_t *ip)
 {
-	bhv_vnode_t	*vp;
-
 	/*
 	 * Remove from old hash list and mount list.
 	 */
@@ -535,9 +497,8 @@ xfs_ireclaim(xfs_inode_t *ip)
 	/*
 	 * Pull our behavior descriptor from the vnode chain.
 	 */
-	vp = XFS_ITOV_NULL(ip);
-	if (vp) {
-		vn_to_inode(vp)->i_private = NULL;
+	if (ip->i_vnode) {
+		ip->i_vnode->i_private = NULL;
 		ip->i_vnode = NULL;
 	}
 

+ 54 - 171
fs/xfs/xfs_inode.c

@@ -15,6 +15,8 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <linux/log2.h>
+
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -826,15 +828,17 @@ xfs_ip2xflags(
 	xfs_icdinode_t		*dic = &ip->i_d;
 
 	return _xfs_dic2xflags(dic->di_flags) |
-				(XFS_CFORK_Q(dic) ? XFS_XFLAG_HASATTR : 0);
+				(XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
 }
 
 uint
 xfs_dic2xflags(
-	xfs_dinode_core_t	*dic)
+	xfs_dinode_t		*dip)
 {
+	xfs_dinode_core_t	*dic = &dip->di_core;
+
 	return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) |
-				(XFS_CFORK_Q_DISK(dic) ? XFS_XFLAG_HASATTR : 0);
+				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
 }
 
 /*
@@ -884,8 +888,8 @@ xfs_iread(
 	 * Initialize inode's trace buffers.
 	 * Do this before xfs_iformat in case it adds entries.
 	 */
-#ifdef	XFS_VNODE_TRACE
-	ip->i_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+#ifdef	XFS_INODE_TRACE
+	ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP);
 #endif
 #ifdef XFS_BMAP_TRACE
 	ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP);
@@ -1220,10 +1224,8 @@ xfs_ialloc(
 					ip->i_d.di_extsize = pip->i_d.di_extsize;
 				}
 			} else if ((mode & S_IFMT) == S_IFREG) {
-				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
+				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_REALTIME;
-					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
-				}
 				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
 					di_flags |= XFS_DIFLAG_EXTSIZE;
 					ip->i_d.di_extsize = pip->i_d.di_extsize;
@@ -1298,7 +1300,10 @@ xfs_isize_check(
 	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
 		return;
 
-	if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
+	if (XFS_IS_REALTIME_INODE(ip))
+		return;
+
+	if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
 		return;
 
 	nimaps = 2;
@@ -1711,7 +1716,7 @@ xfs_itruncate_finish(
 		 * runs.
 		 */
 		XFS_BMAP_INIT(&free_list, &first_block);
-		error = XFS_BUNMAPI(mp, ntp, &ip->i_iocore,
+		error = xfs_bunmapi(ntp, ip,
 				    first_unmap_block, unmap_len,
 				    XFS_BMAPI_AFLAG(fork) |
 				      (sync ? 0 : XFS_BMAPI_ASYNC),
@@ -1844,8 +1849,6 @@ xfs_igrow_start(
 	xfs_fsize_t	new_size,
 	cred_t		*credp)
 {
-	int		error;
-
 	ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
 	ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
 	ASSERT(new_size > ip->i_size);
@@ -1855,9 +1858,7 @@ xfs_igrow_start(
 	 * xfs_write_file() beyond the end of the file
 	 * and any blocks between the old and new file sizes.
 	 */
-	error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
-			     ip->i_size);
-	return error;
+	return xfs_zero_eof(ip, new_size, ip->i_size);
 }
 
 /*
@@ -1959,24 +1960,6 @@ xfs_iunlink(
 	ASSERT(agi->agi_unlinked[bucket_index]);
 	ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
 
-	error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
-	if (error)
-		return error;
-
-	/*
-	 * Clear the on-disk di_nlink. This is to prevent xfs_bulkstat
-	 * from picking up this inode when it is reclaimed (its incore state
-	 * initialzed but not flushed to disk yet). The in-core di_nlink is
-	 * already cleared in xfs_droplink() and a corresponding transaction
-	 * logged. The hack here just synchronizes the in-core to on-disk
-	 * di_nlink value in advance before the actual inode sync to disk.
-	 * This is OK because the inode is already unlinked and would never
-	 * change its di_nlink again for this inode generation.
-	 * This is a temporary hack that would require a proper fix
-	 * in the future.
-	 */
-	dip->di_core.di_nlink = 0;
-
 	if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
 		/*
 		 * There is already another inode in the bucket we need
@@ -1984,6 +1967,10 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		if (error)
+			return error;
+
 		ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
 		/* both on-disk, don't endian flip twice */
 		dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
@@ -2209,7 +2196,6 @@ xfs_ifree_cluster(
 	xfs_inode_log_item_t	*iip;
 	xfs_log_item_t		*lip;
 	xfs_perag_t		*pag = xfs_get_perag(mp, inum);
-	SPLDECL(s);
 
 	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
 		blks_per_cluster = 1;
@@ -2311,9 +2297,9 @@ xfs_ifree_cluster(
 				iip = (xfs_inode_log_item_t *)lip;
 				ASSERT(iip->ili_logged == 1);
 				lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
-				AIL_LOCK(mp,s);
+				spin_lock(&mp->m_ail_lock);
 				iip->ili_flush_lsn = iip->ili_item.li_lsn;
-				AIL_UNLOCK(mp, s);
+				spin_unlock(&mp->m_ail_lock);
 				xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
 				pre_flushed++;
 			}
@@ -2334,9 +2320,9 @@ xfs_ifree_cluster(
 			iip->ili_last_fields = iip->ili_format.ilf_fields;
 			iip->ili_format.ilf_fields = 0;
 			iip->ili_logged = 1;
-			AIL_LOCK(mp,s);
+			spin_lock(&mp->m_ail_lock);
 			iip->ili_flush_lsn = iip->ili_item.li_lsn;
-			AIL_UNLOCK(mp, s);
+			spin_unlock(&mp->m_ail_lock);
 
 			xfs_buf_attach_iodone(bp,
 				(void(*)(xfs_buf_t*,xfs_log_item_t*))
@@ -2374,6 +2360,8 @@ xfs_ifree(
 	int			error;
 	int			delete;
 	xfs_ino_t		first_ino;
+	xfs_dinode_t    	*dip;
+	xfs_buf_t       	*ibp;
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
 	ASSERT(ip->i_transp == tp);
@@ -2409,8 +2397,27 @@ xfs_ifree(
 	 * by reincarnations of this inode.
 	 */
 	ip->i_d.di_gen++;
+
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
+	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+	if (error)
+		return error;
+
+        /*
+	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat
+	* from picking up this inode when it is reclaimed (its incore state
+	* initialzed but not flushed to disk yet). The in-core di_mode is
+	* already cleared  and a corresponding transaction logged.
+	* The hack here just synchronizes the in-core to on-disk
+	* di_mode value in advance before the actual inode sync to disk.
+	* This is OK because the inode is already unlinked and would never
+	* change its di_mode again for this inode generation.
+	* This is a temporary hack that would require a proper fix
+	* in the future.
+	*/
+	dip->di_core.di_mode = 0;
+
 	if (delete) {
 		xfs_ifree_cluster(ip, tp, first_ino);
 	}
@@ -2735,7 +2742,6 @@ void
 xfs_idestroy(
 	xfs_inode_t	*ip)
 {
-
 	switch (ip->i_d.di_mode & S_IFMT) {
 	case S_IFREG:
 	case S_IFDIR:
@@ -2749,7 +2755,7 @@ xfs_idestroy(
 	mrfree(&ip->i_iolock);
 	freesema(&ip->i_flock);
 
-#ifdef XFS_VNODE_TRACE
+#ifdef XFS_INODE_TRACE
 	ktrace_free(ip->i_trace);
 #endif
 #ifdef XFS_BMAP_TRACE
@@ -2775,16 +2781,15 @@ xfs_idestroy(
 		 */
 		xfs_mount_t	*mp = ip->i_mount;
 		xfs_log_item_t	*lip = &ip->i_itemp->ili_item;
-		int		s;
 
 		ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
 				       XFS_FORCED_SHUTDOWN(ip->i_mount));
 		if (lip->li_flags & XFS_LI_IN_AIL) {
-			AIL_LOCK(mp, s);
+			spin_lock(&mp->m_ail_lock);
 			if (lip->li_flags & XFS_LI_IN_AIL)
-				xfs_trans_delete_ail(mp, lip, s);
+				xfs_trans_delete_ail(mp, lip);
 			else
-				AIL_UNLOCK(mp, s);
+				spin_unlock(&mp->m_ail_lock);
 		}
 		xfs_inode_item_destroy(ip);
 	}
@@ -2816,40 +2821,8 @@ xfs_iunpin(
 {
 	ASSERT(atomic_read(&ip->i_pincount) > 0);
 
-	if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
-
-		/*
-		 * If the inode is currently being reclaimed, the link between
-		 * the bhv_vnode and the xfs_inode will be broken after the
-		 * XFS_IRECLAIM* flag is set. Hence, if these flags are not
-		 * set, then we can move forward and mark the linux inode dirty
-		 * knowing that it is still valid as it won't freed until after
-		 * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
-		 * i_flags_lock is used to synchronise the setting of the
-		 * XFS_IRECLAIM* flags and the breaking of the link, and so we
-		 * can execute atomically w.r.t to reclaim by holding this lock
-		 * here.
-		 *
-		 * However, we still need to issue the unpin wakeup call as the
-		 * inode reclaim may be blocked waiting for the inode to become
-		 * unpinned.
-		 */
-
-		if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
-			bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
-			struct inode *inode = NULL;
-
-			BUG_ON(vp == NULL);
-			inode = vn_to_inode(vp);
-			BUG_ON(inode->i_state & I_CLEAR);
-
-			/* make sync come back and flush this inode */
-			if (!(inode->i_state & (I_NEW|I_FREEING)))
-				mark_inode_dirty_sync(inode);
-		}
-		spin_unlock(&ip->i_flags_lock);
+	if (atomic_dec_and_test(&ip->i_pincount))
 		wake_up(&ip->i_ipin_wait);
-	}
 }
 
 /*
@@ -3338,7 +3311,6 @@ xfs_iflush_int(
 #ifdef XFS_TRANS_DEBUG
 	int			first;
 #endif
-	SPLDECL(s);
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
 	ASSERT(issemalocked(&(ip->i_flock)));
@@ -3533,9 +3505,9 @@ xfs_iflush_int(
 		iip->ili_logged = 1;
 
 		ASSERT(sizeof(xfs_lsn_t) == 8);	/* don't lock if it shrinks */
-		AIL_LOCK(mp,s);
+		spin_lock(&mp->m_ail_lock);
 		iip->ili_flush_lsn = iip->ili_item.li_lsn;
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 
 		/*
 		 * Attach the function xfs_iflush_done to the inode's
@@ -3611,95 +3583,6 @@ xfs_iflush_all(
 	XFS_MOUNT_IUNLOCK(mp);
 }
 
-/*
- * xfs_iaccess: check accessibility of inode for mode.
- */
-int
-xfs_iaccess(
-	xfs_inode_t	*ip,
-	mode_t		mode,
-	cred_t		*cr)
-{
-	int		error;
-	mode_t		orgmode = mode;
-	struct inode	*inode = vn_to_inode(XFS_ITOV(ip));
-
-	if (mode & S_IWUSR) {
-		umode_t		imode = inode->i_mode;
-
-		if (IS_RDONLY(inode) &&
-		    (S_ISREG(imode) || S_ISDIR(imode) || S_ISLNK(imode)))
-			return XFS_ERROR(EROFS);
-
-		if (IS_IMMUTABLE(inode))
-			return XFS_ERROR(EACCES);
-	}
-
-	/*
-	 * If there's an Access Control List it's used instead of
-	 * the mode bits.
-	 */
-	if ((error = _ACL_XFS_IACCESS(ip, mode, cr)) != -1)
-		return error ? XFS_ERROR(error) : 0;
-
-	if (current_fsuid(cr) != ip->i_d.di_uid) {
-		mode >>= 3;
-		if (!in_group_p((gid_t)ip->i_d.di_gid))
-			mode >>= 3;
-	}
-
-	/*
-	 * If the DACs are ok we don't need any capability check.
-	 */
-	if ((ip->i_d.di_mode & mode) == mode)
-		return 0;
-	/*
-	 * Read/write DACs are always overridable.
-	 * Executable DACs are overridable if at least one exec bit is set.
-	 */
-	if (!(orgmode & S_IXUSR) ||
-	    (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
-		if (capable_cred(cr, CAP_DAC_OVERRIDE))
-			return 0;
-
-	if ((orgmode == S_IRUSR) ||
-	    (S_ISDIR(inode->i_mode) && (!(orgmode & S_IWUSR)))) {
-		if (capable_cred(cr, CAP_DAC_READ_SEARCH))
-			return 0;
-#ifdef	NOISE
-		cmn_err(CE_NOTE, "Ick: mode=%o, orgmode=%o", mode, orgmode);
-#endif	/* NOISE */
-		return XFS_ERROR(EACCES);
-	}
-	return XFS_ERROR(EACCES);
-}
-
-/*
- * xfs_iroundup: round up argument to next power of two
- */
-uint
-xfs_iroundup(
-	uint	v)
-{
-	int i;
-	uint m;
-
-	if ((v & (v - 1)) == 0)
-		return v;
-	ASSERT((v & 0x80000000) == 0);
-	if ((v & (v + 1)) == 0)
-		return v + 1;
-	for (i = 0, m = 1; i < 31; i++, m <<= 1) {
-		if (v & m)
-			continue;
-		v |= m;
-		if ((v & (v + 1)) == 0)
-			return v + 1;
-	}
-	ASSERT(0);
-	return( 0 );
-}
-
 #ifdef XFS_ILOCK_TRACE
 ktrace_t	*xfs_ilock_trace_buf;
 
@@ -4206,7 +4089,7 @@ xfs_iext_realloc_direct(
 			return;
 		}
 		if (!is_power_of_2(new_size)){
-			rnew_size = xfs_iroundup(new_size);
+			rnew_size = roundup_pow_of_two(new_size);
 		}
 		if (rnew_size != ifp->if_real_bytes) {
 			ifp->if_u1.if_extents =
@@ -4229,7 +4112,7 @@ xfs_iext_realloc_direct(
 	else {
 		new_size += ifp->if_bytes;
 		if (!is_power_of_2(new_size)) {
-			rnew_size = xfs_iroundup(new_size);
+			rnew_size = roundup_pow_of_two(new_size);
 		}
 		xfs_iext_inline_to_direct(ifp, rnew_size);
 	}

+ 40 - 58
fs/xfs/xfs_inode.h

@@ -132,45 +132,6 @@ typedef struct dm_attrs_s {
 	__uint16_t	da_pad;		/* DMIG extra padding */
 } dm_attrs_t;
 
-typedef struct xfs_iocore {
-	void			*io_obj;	/* pointer to container
-						 * inode or dcxvn structure */
-	struct xfs_mount	*io_mount;	/* fs mount struct ptr */
-#ifdef DEBUG
-	mrlock_t		*io_lock;	/* inode IO lock */
-	mrlock_t		*io_iolock;	/* inode IO lock */
-#endif
-
-	/* I/O state */
-	xfs_fsize_t		io_new_size;	/* sz when write completes */
-
-	/* Miscellaneous state. */
-	unsigned int		io_flags;	/* IO related flags */
-
-	/* DMAPI state */
-	dm_attrs_t		io_dmattrs;
-
-} xfs_iocore_t;
-
-#define        io_dmevmask     io_dmattrs.da_dmevmask
-#define        io_dmstate      io_dmattrs.da_dmstate
-
-#define XFS_IO_INODE(io)	((xfs_inode_t *) ((io)->io_obj))
-#define XFS_IO_DCXVN(io)	((dcxvn_t *) ((io)->io_obj))
-
-/*
- * Flags in the flags field
- */
-
-#define XFS_IOCORE_RT		0x1
-
-/*
- * xfs_iocore prototypes
- */
-
-extern void xfs_iocore_inode_init(struct xfs_inode *);
-extern void xfs_iocore_inode_reinit(struct xfs_inode *);
-
 /*
  * This is the xfs inode cluster structure.  This structure is used by
  * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
@@ -181,7 +142,7 @@ typedef struct xfs_icluster {
 	xfs_daddr_t		icl_blkno;	/* starting block number of
 						 * the cluster */
 	struct xfs_buf		*icl_buf;	/* the inode buffer */
-	lock_t			icl_lock;	/* inode list lock */
+	spinlock_t		icl_lock;	/* inode list lock */
 } xfs_icluster_t;
 
 /*
@@ -283,9 +244,6 @@ typedef struct xfs_inode {
 	struct xfs_inode	**i_refcache;	/* ptr to entry in ref cache */
 	struct xfs_inode	*i_release;	/* inode to unref */
 #endif
-	/* I/O state */
-	xfs_iocore_t		i_iocore;	/* I/O core */
-
 	/* Miscellaneous state. */
 	unsigned short		i_flags;	/* see defined flags below */
 	unsigned char		i_update_core;	/* timestamps/size is dirty */
@@ -298,9 +256,10 @@ typedef struct xfs_inode {
 	struct hlist_node	i_cnode;	/* cluster link node */
 
 	xfs_fsize_t		i_size;		/* in-memory size */
+	xfs_fsize_t		i_new_size;	/* size when write completes */
 	atomic_t		i_iocount;	/* outstanding I/O count */
 	/* Trace buffers per inode. */
-#ifdef XFS_VNODE_TRACE
+#ifdef XFS_INODE_TRACE
 	struct ktrace		*i_trace;	/* general inode trace */
 #endif
 #ifdef XFS_BMAP_TRACE
@@ -382,17 +341,42 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 /*
  * Fork handling.
  */
-#define	XFS_IFORK_PTR(ip,w)		\
-	((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp)
-#define	XFS_IFORK_Q(ip)			XFS_CFORK_Q(&(ip)->i_d)
-#define	XFS_IFORK_DSIZE(ip)		XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount)
-#define	XFS_IFORK_ASIZE(ip)		XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount)
-#define	XFS_IFORK_SIZE(ip,w)		XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w)
-#define	XFS_IFORK_FORMAT(ip,w)		XFS_CFORK_FORMAT(&ip->i_d, w)
-#define	XFS_IFORK_FMT_SET(ip,w,n)	XFS_CFORK_FMT_SET(&ip->i_d, w, n)
-#define	XFS_IFORK_NEXTENTS(ip,w)	XFS_CFORK_NEXTENTS(&ip->i_d, w)
-#define	XFS_IFORK_NEXT_SET(ip,w,n)	XFS_CFORK_NEXT_SET(&ip->i_d, w, n)
 
+#define XFS_IFORK_Q(ip)			((ip)->i_d.di_forkoff != 0)
+#define XFS_IFORK_BOFF(ip)		((int)((ip)->i_d.di_forkoff << 3))
+
+#define XFS_IFORK_PTR(ip,w)		\
+	((w) == XFS_DATA_FORK ? \
+		&(ip)->i_df : \
+		(ip)->i_afp)
+#define XFS_IFORK_DSIZE(ip) \
+	(XFS_IFORK_Q(ip) ? \
+		XFS_IFORK_BOFF(ip) : \
+		XFS_LITINO((ip)->i_mount))
+#define XFS_IFORK_ASIZE(ip) \
+	(XFS_IFORK_Q(ip) ? \
+		XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
+		0)
+#define XFS_IFORK_SIZE(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		XFS_IFORK_DSIZE(ip) : \
+		XFS_IFORK_ASIZE(ip))
+#define XFS_IFORK_FORMAT(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		(ip)->i_d.di_format : \
+		(ip)->i_d.di_aformat)
+#define XFS_IFORK_FMT_SET(ip,w,n) \
+	((w) == XFS_DATA_FORK ? \
+		((ip)->i_d.di_format = (n)) : \
+		((ip)->i_d.di_aformat = (n)))
+#define XFS_IFORK_NEXTENTS(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		(ip)->i_d.di_nextents : \
+		(ip)->i_d.di_anextents)
+#define XFS_IFORK_NEXT_SET(ip,w,n) \
+	((w) == XFS_DATA_FORK ? \
+		((ip)->i_d.di_nextents = (n)) : \
+		((ip)->i_d.di_anextents = (n)))
 
 #ifdef __KERNEL__
 
@@ -509,7 +493,6 @@ void		xfs_ihash_init(struct xfs_mount *);
 void		xfs_ihash_free(struct xfs_mount *);
 xfs_inode_t	*xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
 				  struct xfs_trans *);
-void            xfs_inode_lock_init(xfs_inode_t *, bhv_vnode_t *);
 int		xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			 uint, uint, xfs_inode_t **, xfs_daddr_t);
 void		xfs_iput(xfs_inode_t *, uint);
@@ -545,7 +528,7 @@ void		xfs_dinode_to_disk(struct xfs_dinode_core *,
 				   struct xfs_icdinode *);
 
 uint		xfs_ip2xflags(struct xfs_inode *);
-uint		xfs_dic2xflags(struct xfs_dinode_core *);
+uint		xfs_dic2xflags(struct xfs_dinode *);
 int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
 			   struct xfs_bmap_free *);
 int		xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
@@ -567,13 +550,12 @@ void		xfs_iunpin(xfs_inode_t *);
 int		xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
 int		xfs_iflush(xfs_inode_t *, uint);
 void		xfs_iflush_all(struct xfs_mount *);
-int		xfs_iaccess(xfs_inode_t *, mode_t, cred_t *);
-uint		xfs_iroundup(uint);
 void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
 void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
 void		xfs_synchronize_atime(xfs_inode_t *);
+void		xfs_mark_inode_dirty_sync(xfs_inode_t *);
 
 xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
 void		xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,

+ 14 - 12
fs/xfs/xfs_inode_item.c

@@ -274,6 +274,11 @@ xfs_inode_item_format(
 	 */
 	xfs_synchronize_atime(ip);
 
+	/*
+	 * make sure the linux inode is dirty
+	 */
+	xfs_mark_inode_dirty_sync(ip);
+
 	vecp->i_addr = (xfs_caddr_t)&ip->i_d;
 	vecp->i_len  = sizeof(xfs_dinode_core_t);
 	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -615,7 +620,7 @@ xfs_inode_item_trylock(
 			return XFS_ITEM_PUSHBUF;
 		} else {
 			/*
-			 * We hold the AIL_LOCK, so we must specify the
+			 * We hold the AIL lock, so we must specify the
 			 * NONOTIFY flag so that we won't double trip.
 			 */
 			xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
@@ -749,7 +754,7 @@ xfs_inode_item_committed(
  * marked delayed write. If that's the case, we'll initiate a bawrite on that
  * buffer to expedite the process.
  *
- * We aren't holding the AIL_LOCK (or the flush lock) when this gets called,
+ * We aren't holding the AIL lock (or the flush lock) when this gets called,
  * so it is inherently race-y.
  */
 STATIC void
@@ -792,7 +797,7 @@ xfs_inode_item_pushbuf(
 		if (XFS_BUF_ISDELAYWRITE(bp)) {
 			/*
 			 * We were racing with iflush because we don't hold
-			 * the AIL_LOCK or the flush lock. However, at this point,
+			 * the AIL lock or the flush lock. However, at this point,
 			 * we have the buffer, and we know that it's dirty.
 			 * So, it's possible that iflush raced with us, and
 			 * this item is already taken off the AIL.
@@ -968,7 +973,6 @@ xfs_iflush_done(
 	xfs_inode_log_item_t	*iip)
 {
 	xfs_inode_t	*ip;
-	SPLDECL(s);
 
 	ip = iip->ili_inode;
 
@@ -983,15 +987,15 @@ xfs_iflush_done(
 	 */
 	if (iip->ili_logged &&
 	    (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
-		AIL_LOCK(ip->i_mount, s);
+		spin_lock(&ip->i_mount->m_ail_lock);
 		if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
 			/*
 			 * xfs_trans_delete_ail() drops the AIL lock.
 			 */
 			xfs_trans_delete_ail(ip->i_mount,
-					     (xfs_log_item_t*)iip, s);
+					     (xfs_log_item_t*)iip);
 		} else {
-			AIL_UNLOCK(ip->i_mount, s);
+			spin_unlock(&ip->i_mount->m_ail_lock);
 		}
 	}
 
@@ -1025,21 +1029,19 @@ xfs_iflush_abort(
 {
 	xfs_inode_log_item_t	*iip;
 	xfs_mount_t		*mp;
-	SPLDECL(s);
 
 	iip = ip->i_itemp;
 	mp = ip->i_mount;
 	if (iip) {
 		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
-			AIL_LOCK(mp, s);
+			spin_lock(&mp->m_ail_lock);
 			if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
 				/*
 				 * xfs_trans_delete_ail() drops the AIL lock.
 				 */
-				xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip,
-					s);
+				xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip);
 			} else
-				AIL_UNLOCK(mp, s);
+				spin_unlock(&mp->m_ail_lock);
 		}
 		iip->ili_logged = 0;
 		/*

+ 0 - 119
fs/xfs/xfs_iocore.c

@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dfrag.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_itable.h"
-#include "xfs_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_quota.h"
-#include "xfs_trans_space.h"
-#include "xfs_iomap.h"
-
-
-STATIC xfs_fsize_t
-xfs_size_fn(
-	xfs_inode_t		*ip)
-{
-	return XFS_ISIZE(ip);
-}
-
-STATIC int
-xfs_ioinit(
-	struct xfs_mount	*mp,
-	struct xfs_mount_args	*mntargs,
-	int			flags)
-{
-	return xfs_mountfs(mp, flags);
-}
-
-xfs_ioops_t	xfs_iocore_xfs = {
-	.xfs_ioinit		= (xfs_ioinit_t) xfs_ioinit,
-	.xfs_bmapi_func		= (xfs_bmapi_t) xfs_bmapi,
-	.xfs_bunmapi_func	= (xfs_bunmapi_t) xfs_bunmapi,
-	.xfs_bmap_eof_func	= (xfs_bmap_eof_t) xfs_bmap_eof,
-	.xfs_iomap_write_direct =
-			(xfs_iomap_write_direct_t) xfs_iomap_write_direct,
-	.xfs_iomap_write_delay =
-			(xfs_iomap_write_delay_t) xfs_iomap_write_delay,
-	.xfs_iomap_write_allocate =
-			(xfs_iomap_write_allocate_t) xfs_iomap_write_allocate,
-	.xfs_iomap_write_unwritten =
-			(xfs_iomap_write_unwritten_t) xfs_iomap_write_unwritten,
-	.xfs_ilock		= (xfs_lock_t) xfs_ilock,
-	.xfs_lck_map_shared	= (xfs_lck_map_shared_t) xfs_ilock_map_shared,
-	.xfs_ilock_demote	= (xfs_lock_demote_t) xfs_ilock_demote,
-	.xfs_ilock_nowait	= (xfs_lock_nowait_t) xfs_ilock_nowait,
-	.xfs_unlock		= (xfs_unlk_t) xfs_iunlock,
-	.xfs_size_func		= (xfs_size_t) xfs_size_fn,
-	.xfs_iodone		= (xfs_iodone_t) fs_noerr,
-	.xfs_swap_extents_func	= (xfs_swap_extents_t) xfs_swap_extents,
-};
-
-void
-xfs_iocore_inode_reinit(
-	xfs_inode_t	*ip)
-{
-	xfs_iocore_t	*io = &ip->i_iocore;
-
-	io->io_flags = 0;
-	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
-		io->io_flags |= XFS_IOCORE_RT;
-	io->io_dmevmask = ip->i_d.di_dmevmask;
-	io->io_dmstate = ip->i_d.di_dmstate;
-}
-
-void
-xfs_iocore_inode_init(
-	xfs_inode_t	*ip)
-{
-	xfs_iocore_t	*io = &ip->i_iocore;
-	xfs_mount_t	*mp = ip->i_mount;
-
-	io->io_mount = mp;
-#ifdef DEBUG
-	io->io_lock = &ip->i_lock;
-	io->io_iolock = &ip->i_iolock;
-#endif
-
-	io->io_obj = (void *)ip;
-
-	xfs_iocore_inode_reinit(ip);
-}

+ 108 - 104
fs/xfs/xfs_iomap.c

@@ -53,12 +53,10 @@
 void
 xfs_iomap_enter_trace(
 	int		tag,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,
 	ssize_t		count)
 {
-	xfs_inode_t	*ip = XFS_IO_INODE(io);
-
 	if (!ip->i_rwtrace)
 		return;
 
@@ -70,8 +68,8 @@ xfs_iomap_enter_trace(
 		(void *)((unsigned long)((offset >> 32) & 0xffffffff)),
 		(void *)((unsigned long)(offset & 0xffffffff)),
 		(void *)((unsigned long)count),
-		(void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
-		(void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+		(void *)((unsigned long)((ip->i_new_size >> 32) & 0xffffffff)),
+		(void *)((unsigned long)(ip->i_new_size & 0xffffffff)),
 		(void *)((unsigned long)current_pid()),
 		(void *)NULL,
 		(void *)NULL,
@@ -84,15 +82,13 @@ xfs_iomap_enter_trace(
 void
 xfs_iomap_map_trace(
 	int		tag,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,
 	ssize_t		count,
 	xfs_iomap_t	*iomapp,
 	xfs_bmbt_irec_t	*imapp,
 	int		flags)
 {
-	xfs_inode_t	*ip = XFS_IO_INODE(io);
-
 	if (!ip->i_rwtrace)
 		return;
 
@@ -126,7 +122,7 @@ xfs_iomap_map_trace(
 
 STATIC int
 xfs_imap_to_bmap(
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,
 	xfs_bmbt_irec_t *imap,
 	xfs_iomap_t	*iomapp,
@@ -134,11 +130,10 @@ xfs_imap_to_bmap(
 	int		iomaps,			/* Number of iomap entries */
 	int		flags)
 {
-	xfs_mount_t	*mp;
+	xfs_mount_t	*mp = ip->i_mount;
 	int		pbm;
 	xfs_fsblock_t	start_block;
 
-	mp = io->io_mount;
 
 	for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
 		iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
@@ -146,7 +141,7 @@ xfs_imap_to_bmap(
 		iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
 		iomapp->iomap_flags = flags;
 
-		if (io->io_flags & XFS_IOCORE_RT) {
+		if (XFS_IS_REALTIME_INODE(ip)) {
 			iomapp->iomap_flags |= IOMAP_REALTIME;
 			iomapp->iomap_target = mp->m_rtdev_targp;
 		} else {
@@ -160,7 +155,7 @@ xfs_imap_to_bmap(
 			iomapp->iomap_bn = IOMAP_DADDR_NULL;
 			iomapp->iomap_flags |= IOMAP_DELAY;
 		} else {
-			iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
+			iomapp->iomap_bn = XFS_FSB_TO_DB(ip, start_block);
 			if (ISUNWRITTEN(imap))
 				iomapp->iomap_flags |= IOMAP_UNWRITTEN;
 		}
@@ -172,14 +167,14 @@ xfs_imap_to_bmap(
 
 int
 xfs_iomap(
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_off_t	offset,
 	ssize_t		count,
 	int		flags,
 	xfs_iomap_t	*iomapp,
 	int		*niomaps)
 {
-	xfs_mount_t	*mp = io->io_mount;
+	xfs_mount_t	*mp = ip->i_mount;
 	xfs_fileoff_t	offset_fsb, end_fsb;
 	int		error = 0;
 	int		lockmode = 0;
@@ -188,45 +183,37 @@ xfs_iomap(
 	int		bmapi_flags = 0;
 	int		iomap_flags = 0;
 
+	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	switch (flags &
-		(BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
-		 BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
+	switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
 	case BMAPI_READ:
-		xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
-		lockmode = XFS_LCK_MAP_SHARED(mp, io);
+		xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count);
+		lockmode = xfs_ilock_map_shared(ip);
 		bmapi_flags = XFS_BMAPI_ENTIRE;
 		break;
 	case BMAPI_WRITE:
-		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
+		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
 		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
 		if (flags & BMAPI_IGNSTATE)
 			bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
-		XFS_ILOCK(mp, io, lockmode);
+		xfs_ilock(ip, lockmode);
 		break;
 	case BMAPI_ALLOCATE:
-		xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, io, offset, count);
+		xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
 		lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
 		bmapi_flags = XFS_BMAPI_ENTIRE;
+
 		/* Attempt non-blocking lock */
 		if (flags & BMAPI_TRYLOCK) {
-			if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
+			if (!xfs_ilock_nowait(ip, lockmode))
 				return XFS_ERROR(EAGAIN);
 		} else {
-			XFS_ILOCK(mp, io, lockmode);
+			xfs_ilock(ip, lockmode);
 		}
 		break;
-	case BMAPI_UNWRITTEN:
-		goto phase2;
-	case BMAPI_DEVICE:
-		lockmode = XFS_LCK_MAP_SHARED(mp, io);
-		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-		error = 0;
-		*niomaps = 1;
-		goto out;
 	default:
 		BUG();
 	}
@@ -237,7 +224,7 @@ xfs_iomap(
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
-	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
+	error = xfs_bmapi(NULL, ip, offset_fsb,
 			(xfs_filblks_t)(end_fsb - offset_fsb),
 			bmapi_flags,  NULL, 0, &imap,
 			&nimaps, NULL, NULL);
@@ -245,54 +232,48 @@ xfs_iomap(
 	if (error)
 		goto out;
 
-phase2:
-	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
+	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
 	case BMAPI_WRITE:
 		/* If we found an extent, return it */
 		if (nimaps &&
 		    (imap.br_startblock != HOLESTARTBLOCK) &&
 		    (imap.br_startblock != DELAYSTARTBLOCK)) {
-			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
+			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip,
 					offset, count, iomapp, &imap, flags);
 			break;
 		}
 
 		if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
-			error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
-					count, flags, &imap, &nimaps, nimaps);
+			error = xfs_iomap_write_direct(ip, offset, count, flags,
+						       &imap, &nimaps, nimaps);
 		} else {
-			error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
-					flags, &imap, &nimaps);
+			error = xfs_iomap_write_delay(ip, offset, count, flags,
+						      &imap, &nimaps);
 		}
 		if (!error) {
-			xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, io,
+			xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip,
 					offset, count, iomapp, &imap, flags);
 		}
 		iomap_flags = IOMAP_NEW;
 		break;
 	case BMAPI_ALLOCATE:
 		/* If we found an extent, return it */
-		XFS_IUNLOCK(mp, io, lockmode);
+		xfs_iunlock(ip, lockmode);
 		lockmode = 0;
 
 		if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock)) {
-			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
+			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip,
 					offset, count, iomapp, &imap, flags);
 			break;
 		}
 
-		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count,
+		error = xfs_iomap_write_allocate(ip, offset, count,
 						 &imap, &nimaps);
 		break;
-	case BMAPI_UNWRITTEN:
-		lockmode = 0;
-		error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
-		nimaps = 0;
-		break;
 	}
 
 	if (nimaps) {
-		*niomaps = xfs_imap_to_bmap(io, offset, &imap,
+		*niomaps = xfs_imap_to_bmap(ip, offset, &imap,
 					    iomapp, nimaps, *niomaps, iomap_flags);
 	} else if (niomaps) {
 		*niomaps = 0;
@@ -300,14 +281,15 @@ phase2:
 
 out:
 	if (lockmode)
-		XFS_IUNLOCK(mp, io, lockmode);
+		xfs_iunlock(ip, lockmode);
 	return XFS_ERROR(error);
 }
 
+
 STATIC int
 xfs_iomap_eof_align_last_fsb(
 	xfs_mount_t	*mp,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_fsize_t	isize,
 	xfs_extlen_t	extsize,
 	xfs_fileoff_t	*last_fsb)
@@ -316,7 +298,7 @@ xfs_iomap_eof_align_last_fsb(
 	xfs_extlen_t	align;
 	int		eof, error;
 
-	if (io->io_flags & XFS_IOCORE_RT)
+	if (XFS_IS_REALTIME_INODE(ip))
 		;
 	/*
 	 * If mounted with the "-o swalloc" option, roundup the allocation
@@ -347,7 +329,7 @@ xfs_iomap_eof_align_last_fsb(
 	}
 
 	if (new_last_fsb) {
-		error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+		error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
 		if (error)
 			return error;
 		if (eof)
@@ -416,7 +398,6 @@ xfs_iomap_write_direct(
 	int		found)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t	*io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_filblks_t	count_fsb, resaligned;
@@ -446,13 +427,13 @@ xfs_iomap_write_direct(
 	extsz = xfs_get_extsz_hint(ip);
 
 	isize = ip->i_size;
-	if (io->io_new_size > isize)
-		isize = io->io_new_size;
+	if (ip->i_new_size > isize)
+		isize = ip->i_new_size;
 
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	if ((offset + count) > isize) {
-		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+		error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz,
 							&last_fsb);
 		if (error)
 			goto error_out;
@@ -519,7 +500,7 @@ xfs_iomap_write_direct(
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
 	nimaps = 1;
-	error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, bmapi_flag,
+	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
 		&firstfsb, 0, &imap, &nimaps, &free_list, NULL);
 	if (error)
 		goto error0;
@@ -542,7 +523,8 @@ xfs_iomap_write_direct(
 		goto error_out;
 	}
 
-	if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) {
+	if (unlikely(!imap.br_startblock &&
+		     !(XFS_IS_REALTIME_INODE(ip)))) {
 		error = xfs_cmn_err_fsblock_zero(ip, &imap);
 		goto error_out;
 	}
@@ -577,7 +559,7 @@ error_out:
 STATIC int
 xfs_iomap_eof_want_preallocate(
 	xfs_mount_t	*mp,
-	xfs_iocore_t	*io,
+	xfs_inode_t	*ip,
 	xfs_fsize_t	isize,
 	xfs_off_t	offset,
 	size_t		count,
@@ -604,7 +586,7 @@ xfs_iomap_eof_want_preallocate(
 	while (count_fsb > 0) {
 		imaps = nimaps;
 		firstblock = NULLFSBLOCK;
-		error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0,
+		error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
 				  &firstblock, 0, imap, &imaps, NULL, NULL);
 		if (error)
 			return error;
@@ -630,7 +612,6 @@ xfs_iomap_write_delay(
 	int		*nmaps)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t	*io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_off_t	aligned_offset;
@@ -658,10 +639,10 @@ xfs_iomap_write_delay(
 
 retry:
 	isize = ip->i_size;
-	if (io->io_new_size > isize)
-		isize = io->io_new_size;
+	if (ip->i_new_size > isize)
+		isize = ip->i_new_size;
 
-	error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+	error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count,
 				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
 	if (error)
 		return error;
@@ -675,7 +656,7 @@ retry:
 	}
 
 	if (prealloc || extsz) {
-		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+		error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz,
 							&last_fsb);
 		if (error)
 			return error;
@@ -683,7 +664,7 @@ retry:
 
 	nimaps = XFS_WRITE_IMAPS;
 	firstblock = NULLFSBLOCK;
-	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
+	error = xfs_bmapi(NULL, ip, offset_fsb,
 			  (xfs_filblks_t)(last_fsb - offset_fsb),
 			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
 			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
@@ -697,7 +678,7 @@ retry:
 	 */
 	if (nimaps == 0) {
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
-					io, offset, count);
+					ip, offset, count);
 		if (xfs_flush_space(ip, &fsynced, &ioflag))
 			return XFS_ERROR(ENOSPC);
 
@@ -705,7 +686,8 @@ retry:
 		goto retry;
 	}
 
-	if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT)))
+	if (unlikely(!imap[0].br_startblock &&
+		     !(XFS_IS_REALTIME_INODE(ip))))
 		return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
 
 	*ret_imap = imap[0];
@@ -720,6 +702,9 @@ retry:
  * the originating callers request.
  *
  * Called without a lock on the inode.
+ *
+ * We no longer bother to look at the incoming map - all we have to
+ * guarantee is that whatever we allocate fills the required range.
  */
 int
 xfs_iomap_write_allocate(
@@ -730,15 +715,14 @@ xfs_iomap_write_allocate(
 	int		*retmap)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t    *io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb, last_block;
 	xfs_fileoff_t	end_fsb, map_start_fsb;
 	xfs_fsblock_t	first_block;
 	xfs_bmap_free_t	free_list;
 	xfs_filblks_t	count_fsb;
-	xfs_bmbt_irec_t	imap[XFS_STRAT_WRITE_IMAPS];
+	xfs_bmbt_irec_t	imap;
 	xfs_trans_t	*tp;
-	int		i, nimaps, committed;
+	int		nimaps, committed;
 	int		error = 0;
 	int		nres;
 
@@ -785,13 +769,38 @@ xfs_iomap_write_allocate(
 
 			XFS_BMAP_INIT(&free_list, &first_block);
 
-			nimaps = XFS_STRAT_WRITE_IMAPS;
 			/*
-			 * Ensure we don't go beyond eof - it is possible
-			 * the extents changed since we did the read call,
-			 * we dropped the ilock in the interim.
+			 * it is possible that the extents have changed since
+			 * we did the read call as we dropped the ilock for a
+			 * while. We have to be careful about truncates or hole
+			 * punchs here - we are not allowed to allocate
+			 * non-delalloc blocks here.
+			 *
+			 * The only protection against truncation is the pages
+			 * for the range we are being asked to convert are
+			 * locked and hence a truncate will block on them
+			 * first.
+			 *
+			 * As a result, if we go beyond the range we really
+			 * need and hit an delalloc extent boundary followed by
+			 * a hole while we have excess blocks in the map, we
+			 * will fill the hole incorrectly and overrun the
+			 * transaction reservation.
+			 *
+			 * Using a single map prevents this as we are forced to
+			 * check each map we look for overlap with the desired
+			 * range and abort as soon as we find it. Also, given
+			 * that we only return a single map, having one beyond
+			 * what we can return is probably a bit silly.
+			 *
+			 * We also need to check that we don't go beyond EOF;
+			 * this is a truncate optimisation as a truncate sets
+			 * the new file size before block on the pages we
+			 * currently have locked under writeback. Because they
+			 * are about to be tossed, we don't need to write them
+			 * back....
 			 */
-
+			nimaps = 1;
 			end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
 			xfs_bmap_last_offset(NULL, ip, &last_block,
 				XFS_DATA_FORK);
@@ -805,9 +814,9 @@ xfs_iomap_write_allocate(
 			}
 
 			/* Go get the actual blocks */
-			error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb,
+			error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
 					XFS_BMAPI_WRITE, &first_block, 1,
-					imap, &nimaps, &free_list, NULL);
+					&imap, &nimaps, &free_list, NULL);
 			if (error)
 				goto trans_cancel;
 
@@ -826,27 +835,24 @@ xfs_iomap_write_allocate(
 		 * See if we were able to allocate an extent that
 		 * covers at least part of the callers request
 		 */
-		for (i = 0; i < nimaps; i++) {
-			if (unlikely(!imap[i].br_startblock &&
-				     !(io->io_flags & XFS_IOCORE_RT)))
-				return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
-			if ((offset_fsb >= imap[i].br_startoff) &&
-			    (offset_fsb < (imap[i].br_startoff +
-					   imap[i].br_blockcount))) {
-				*map = imap[i];
-				*retmap = 1;
-				XFS_STATS_INC(xs_xstrat_quick);
-				return 0;
-			}
-			count_fsb -= imap[i].br_blockcount;
+		if (unlikely(!imap.br_startblock &&
+			     XFS_IS_REALTIME_INODE(ip)))
+			return xfs_cmn_err_fsblock_zero(ip, &imap);
+		if ((offset_fsb >= imap.br_startoff) &&
+		    (offset_fsb < (imap.br_startoff +
+				   imap.br_blockcount))) {
+			*map = imap;
+			*retmap = 1;
+			XFS_STATS_INC(xs_xstrat_quick);
+			return 0;
 		}
 
-		/* So far we have not mapped the requested part of the
+		/*
+		 * So far we have not mapped the requested part of the
 		 * file, just surrounding data, try again.
 		 */
-		nimaps--;
-		map_start_fsb = imap[nimaps].br_startoff +
-				imap[nimaps].br_blockcount;
+		count_fsb -= imap.br_blockcount;
+		map_start_fsb = imap.br_startoff + imap.br_blockcount;
 	}
 
 trans_cancel:
@@ -864,7 +870,6 @@ xfs_iomap_write_unwritten(
 	size_t		count)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t    *io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb;
 	xfs_filblks_t	count_fsb;
 	xfs_filblks_t	numblks_fsb;
@@ -877,8 +882,7 @@ xfs_iomap_write_unwritten(
 	int		committed;
 	int		error;
 
-	xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
-				&ip->i_iocore, offset, count);
+	xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count);
 
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
@@ -912,7 +916,7 @@ xfs_iomap_write_unwritten(
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
 		nimaps = 1;
-		error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb,
+		error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
 				  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
 				  1, &imap, &nimaps, &free_list, NULL);
 		if (error)
@@ -928,7 +932,7 @@ xfs_iomap_write_unwritten(
 			return XFS_ERROR(error);
 
 		if (unlikely(!imap.br_startblock &&
-			     !(io->io_flags & XFS_IOCORE_RT)))
+			     !(XFS_IS_REALTIME_INODE(ip))))
 			return xfs_cmn_err_fsblock_zero(ip, &imap);
 
 		if ((numblks_fsb = imap.br_blockcount) == 0) {

+ 1 - 4
fs/xfs/xfs_iomap.h

@@ -36,14 +36,12 @@ typedef enum {
 	BMAPI_READ = (1 << 0),		/* read extents */
 	BMAPI_WRITE = (1 << 1),		/* create extents */
 	BMAPI_ALLOCATE = (1 << 2),	/* delayed allocate to real extents */
-	BMAPI_UNWRITTEN  = (1 << 3),	/* unwritten extents to real extents */
 	/* modifiers */
 	BMAPI_IGNSTATE = (1 << 4),	/* ignore unwritten state on read */
 	BMAPI_DIRECT = (1 << 5),	/* direct instead of buffered write */
 	BMAPI_MMAP = (1 << 6),		/* allocate for mmap write */
 	BMAPI_SYNC = (1 << 7),		/* sync write to flush delalloc space */
 	BMAPI_TRYLOCK = (1 << 8),	/* non-blocking request */
-	BMAPI_DEVICE = (1 << 9),	/* we only want to know the device */
 } bmapi_flags_t;
 
 
@@ -73,11 +71,10 @@ typedef struct xfs_iomap {
 	iomap_flags_t		iomap_flags;
 } xfs_iomap_t;
 
-struct xfs_iocore;
 struct xfs_inode;
 struct xfs_bmbt_irec;
 
-extern int xfs_iomap(struct xfs_iocore *, xfs_off_t, ssize_t, int,
+extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
 		     struct xfs_iomap *, int *);
 extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 				  int, struct xfs_bmbt_irec *, int *, int);

+ 6 - 6
fs/xfs/xfs_itable.c

@@ -170,7 +170,7 @@ xfs_bulkstat_one_dinode(
 	buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
 	buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
 	buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
-	buf->bs_xflags = xfs_dic2xflags(dic);
+	buf->bs_xflags = xfs_dic2xflags(dip);
 	buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
 	buf->bs_extents = be32_to_cpu(dic->di_nextents);
 	buf->bs_gen = be32_to_cpu(dic->di_gen);
@@ -291,7 +291,7 @@ xfs_bulkstat_use_dinode(
 	dip = (xfs_dinode_t *)
 			xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
 	/*
-	 * Check the buffer containing the on-disk inode for di_nlink == 0.
+	 * Check the buffer containing the on-disk inode for di_mode == 0.
 	 * This is to prevent xfs_bulkstat from picking up just reclaimed
 	 * inodes that have their in-core state initialized but not flushed
 	 * to disk yet. This is a temporary hack that would require a proper
@@ -299,7 +299,7 @@ xfs_bulkstat_use_dinode(
 	 */
 	if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC ||
 	    !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) ||
-	    !dip->di_core.di_nlink)
+	    !dip->di_core.di_mode)
 		return 0;
 	if (flags & BULKSTAT_FG_QUICK) {
 		*dipp = dip;
@@ -307,7 +307,7 @@ xfs_bulkstat_use_dinode(
 	}
 	/* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
 	aformat = dip->di_core.di_aformat;
-	if ((XFS_CFORK_Q(&dip->di_core) == 0) ||
+	if ((XFS_DFORK_Q(dip) == 0) ||
 	    (aformat == XFS_DINODE_FMT_LOCAL) ||
 	    (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) {
 		*dipp = dip;
@@ -399,7 +399,7 @@ xfs_bulkstat(
 		(XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
 	nimask = ~(nicluster - 1);
 	nbcluster = nicluster >> mp->m_sb.sb_inopblog;
-	irbuf = kmem_zalloc_greedy(&irbsize, NBPC, NBPC * 4,
+	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4,
 				   KM_SLEEP | KM_MAYFAIL | KM_LARGE);
 	nirbuf = irbsize / sizeof(*irbuf);
 
@@ -830,7 +830,7 @@ xfs_inumbers(
 	agino = XFS_INO_TO_AGINO(mp, ino);
 	left = *count;
 	*count = 0;
-	bcount = MIN(left, (int)(NBPP / sizeof(*buffer)));
+	bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
 	buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
 	error = bufidx = 0;
 	cur = NULL;

File diff suppressed because it is too large
+ 147 - 156
fs/xfs/xfs_log.c


+ 2 - 1
fs/xfs/xfs_log.h

@@ -22,8 +22,9 @@
 
 #define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
 #define BLOCK_LSN(lsn) ((uint)(lsn))
+
 /* this is used in a spot where we might otherwise double-endian-flip */
-#define CYCLE_LSN_DISK(lsn) (((uint *)&(lsn))[0])
+#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
 
 #ifdef __KERNEL__
 /*

+ 38 - 58
fs/xfs/xfs_log_priv.h

@@ -55,32 +55,21 @@ struct xfs_mount;
 	BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 
-/*
- *  set lsns
- */
 
-#define ASSIGN_ANY_LSN_HOST(lsn,cycle,block)  \
-    { \
-	(lsn) = ((xfs_lsn_t)(cycle)<<32)|(block); \
-    }
-#define ASSIGN_ANY_LSN_DISK(lsn,cycle,block)  \
-    { \
-	INT_SET(((uint *)&(lsn))[0], ARCH_CONVERT, (cycle)); \
-	INT_SET(((uint *)&(lsn))[1], ARCH_CONVERT, (block)); \
-    }
-#define ASSIGN_LSN(lsn,log) \
-    ASSIGN_ANY_LSN_DISK(lsn,(log)->l_curr_cycle,(log)->l_curr_block);
-
-#define XLOG_SET(f,b)		(((f) & (b)) == (b))
-
-#define GET_CYCLE(ptr, arch) \
-    (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \
-	 INT_GET(*((uint *)(ptr)+1), arch) : \
-	 INT_GET(*(uint *)(ptr), arch) \
-    )
+static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
+{
+	return ((xfs_lsn_t)cycle << 32) | block;
+}
 
-#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
+static inline uint xlog_get_cycle(char *ptr)
+{
+	if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
+		return be32_to_cpu(*((__be32 *)ptr + 1));
+	else
+		return be32_to_cpu(*(__be32 *)ptr);
+}
 
+#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
 
 #ifdef __KERNEL__
 
@@ -96,19 +85,10 @@ struct xfs_mount;
  *
  * this has endian issues, of course.
  */
-
-#ifndef XFS_NATIVE_HOST
-#define GET_CLIENT_ID(i,arch) \
-    ((i) & 0xff)
-#else
-#define GET_CLIENT_ID(i,arch) \
-    ((i) >> 24)
-#endif
-
-#define GRANT_LOCK(log)		mutex_spinlock(&(log)->l_grant_lock)
-#define GRANT_UNLOCK(log, s)	mutex_spinunlock(&(log)->l_grant_lock, s)
-#define LOG_LOCK(log)		mutex_spinlock(&(log)->l_icloglock)
-#define LOG_UNLOCK(log, s)	mutex_spinunlock(&(log)->l_icloglock, s)
+static inline uint xlog_get_client_id(__be32 i)
+{
+	return be32_to_cpu(i) >> 24;
+}
 
 #define xlog_panic(args...)	cmn_err(CE_PANIC, ## args)
 #define xlog_exit(args...)	cmn_err(CE_PANIC, ## args)
@@ -285,11 +265,11 @@ typedef struct xlog_ticket {
 
 
 typedef struct xlog_op_header {
-	xlog_tid_t oh_tid;	/* transaction id of operation	:  4 b */
-	int	   oh_len;	/* bytes in data region		:  4 b */
-	__uint8_t  oh_clientid;	/* who sent me this		:  1 b */
-	__uint8_t  oh_flags;	/*				:  1 b */
-	ushort	   oh_res2;	/* 32 bit align			:  2 b */
+	__be32	   oh_tid;	/* transaction id of operation	:  4 b */
+	__be32	   oh_len;	/* bytes in data region		:  4 b */
+	__u8	   oh_clientid;	/* who sent me this		:  1 b */
+	__u8	   oh_flags;	/*				:  1 b */
+	__u16	   oh_res2;	/* 32 bit align			:  2 b */
 } xlog_op_header_t;
 
 
@@ -307,25 +287,25 @@ typedef struct xlog_op_header {
 #endif
 
 typedef struct xlog_rec_header {
-	uint	  h_magicno;	/* log record (LR) identifier		:  4 */
-	uint	  h_cycle;	/* write cycle of log			:  4 */
-	int	  h_version;	/* LR version				:  4 */
-	int	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
-	xfs_lsn_t h_lsn;	/* lsn of this LR			:  8 */
-	xfs_lsn_t h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
-	uint	  h_chksum;	/* may not be used; non-zero if used	:  4 */
-	int	  h_prev_block; /* block number to previous LR		:  4 */
-	int	  h_num_logops;	/* number of log operations in this LR	:  4 */
-	uint	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
+	__be32	  h_magicno;	/* log record (LR) identifier		:  4 */
+	__be32	  h_cycle;	/* write cycle of log			:  4 */
+	__be32	  h_version;	/* LR version				:  4 */
+	__be32	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
+	__be64	  h_lsn;	/* lsn of this LR			:  8 */
+	__be64	  h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
+	__be32	  h_chksum;	/* may not be used; non-zero if used	:  4 */
+	__be32	  h_prev_block; /* block number to previous LR		:  4 */
+	__be32	  h_num_logops;	/* number of log operations in this LR	:  4 */
+	__be32	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
 	/* new fields */
-	int       h_fmt;        /* format of log record                 :  4 */
-	uuid_t    h_fs_uuid;    /* uuid of FS                           : 16 */
-	int       h_size;	/* iclog size				:  4 */
+	__be32    h_fmt;        /* format of log record                 :  4 */
+	uuid_t	  h_fs_uuid;    /* uuid of FS                           : 16 */
+	__be32	  h_size;	/* iclog size				:  4 */
 } xlog_rec_header_t;
 
 typedef struct xlog_rec_ext_header {
-	uint	  xh_cycle;	/* write cycle of log			: 4 */
-	uint	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
+	__be32	  xh_cycle;	/* write cycle of log			: 4 */
+	__be32	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
 } xlog_rec_ext_header_t;
 
 #ifdef __KERNEL__
@@ -415,7 +395,7 @@ typedef struct log {
 	xlog_ticket_t		*l_unmount_free;/* kmem_free these addresses */
 	xlog_ticket_t		*l_tail;        /* free list of tickets */
 	xlog_in_core_t		*l_iclog;       /* head log queue	*/
-	lock_t			l_icloglock;    /* grab to change iclog state */
+	spinlock_t		l_icloglock;    /* grab to change iclog state */
 	xfs_lsn_t		l_tail_lsn;     /* lsn of 1st LR with unflushed
 						 * buffers */
 	xfs_lsn_t		l_last_sync_lsn;/* lsn of last LR on disk */
@@ -439,7 +419,7 @@ typedef struct log {
 	char			*l_iclog_bak[XLOG_MAX_ICLOGS];
 
 	/* The following block of fields are changed while holding grant_lock */
-	lock_t			l_grant_lock;
+	spinlock_t		l_grant_lock;
 	xlog_ticket_t		*l_reserve_headq;
 	xlog_ticket_t		*l_write_headq;
 	int			l_grant_reserve_cycle;

+ 92 - 105
fs/xfs/xfs_log_recover.c

@@ -198,7 +198,7 @@ xlog_header_check_dump(
 	cmn_err(CE_DEBUG, "    log : uuid = ");
 	for (b = 0; b < 16; b++)
 		cmn_err(CE_DEBUG, "%02x",((uchar_t *)&head->h_fs_uuid)[b]);
-	cmn_err(CE_DEBUG, ", fmt = %d\n", INT_GET(head->h_fmt, ARCH_CONVERT));
+	cmn_err(CE_DEBUG, ", fmt = %d\n", be32_to_cpu(head->h_fmt));
 }
 #else
 #define xlog_header_check_dump(mp, head)
@@ -212,14 +212,14 @@ xlog_header_check_recover(
 	xfs_mount_t		*mp,
 	xlog_rec_header_t	*head)
 {
-	ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
+	ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
 	/*
 	 * IRIX doesn't write the h_fmt field and leaves it zeroed
 	 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
 	 * a dirty log created in IRIX.
 	 */
-	if (unlikely(INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT)) {
+	if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
 		xlog_warn(
 	"XFS: dirty log written in incompatible format - can't recover");
 		xlog_header_check_dump(mp, head);
@@ -245,7 +245,7 @@ xlog_header_check_mount(
 	xfs_mount_t		*mp,
 	xlog_rec_header_t	*head)
 {
-	ASSERT(INT_GET(head->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM);
+	ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
 
 	if (uuid_is_nil(&head->h_fs_uuid)) {
 		/*
@@ -293,7 +293,7 @@ xlog_recover_iodone(
  * Note that the algorithm can not be perfect because the disk will not
  * necessarily be perfect.
  */
-int
+STATIC int
 xlog_find_cycle_start(
 	xlog_t		*log,
 	xfs_buf_t	*bp,
@@ -311,7 +311,7 @@ xlog_find_cycle_start(
 		if ((error = xlog_bread(log, mid_blk, 1, bp)))
 			return error;
 		offset = xlog_align(log, mid_blk, 1, bp);
-		mid_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+		mid_cycle = xlog_get_cycle(offset);
 		if (mid_cycle == cycle) {
 			*last_blk = mid_blk;
 			/* last_half_cycle == mid_cycle */
@@ -371,7 +371,7 @@ xlog_find_verify_cycle(
 
 		buf = xlog_align(log, i, bcount, bp);
 		for (j = 0; j < bcount; j++) {
-			cycle = GET_CYCLE(buf, ARCH_CONVERT);
+			cycle = xlog_get_cycle(buf);
 			if (cycle == stop_on_cycle_no) {
 				*new_blk = i+j;
 				goto out;
@@ -447,8 +447,7 @@ xlog_find_verify_log_record(
 
 		head = (xlog_rec_header_t *)offset;
 
-		if (XLOG_HEADER_MAGIC_NUM ==
-		    INT_GET(head->h_magicno, ARCH_CONVERT))
+		if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
 			break;
 
 		if (!smallmem)
@@ -480,7 +479,7 @@ xlog_find_verify_log_record(
 	 * record do we update last_blk.
 	 */
 	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-		uint	h_size = INT_GET(head->h_size, ARCH_CONVERT);
+		uint	h_size = be32_to_cpu(head->h_size);
 
 		xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
 		if (h_size % XLOG_HEADER_CYCLE_SIZE)
@@ -489,8 +488,8 @@ xlog_find_verify_log_record(
 		xhdrs = 1;
 	}
 
-	if (*last_blk - i + extra_bblks
-			!= BTOBB(INT_GET(head->h_len, ARCH_CONVERT)) + xhdrs)
+	if (*last_blk - i + extra_bblks !=
+	    BTOBB(be32_to_cpu(head->h_len)) + xhdrs)
 		*last_blk = i;
 
 out:
@@ -550,13 +549,13 @@ xlog_find_head(
 	if ((error = xlog_bread(log, 0, 1, bp)))
 		goto bp_err;
 	offset = xlog_align(log, 0, 1, bp);
-	first_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+	first_half_cycle = xlog_get_cycle(offset);
 
 	last_blk = head_blk = log_bbnum - 1;	/* get cycle # of last block */
 	if ((error = xlog_bread(log, last_blk, 1, bp)))
 		goto bp_err;
 	offset = xlog_align(log, last_blk, 1, bp);
-	last_half_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+	last_half_cycle = xlog_get_cycle(offset);
 	ASSERT(last_half_cycle != 0);
 
 	/*
@@ -808,7 +807,7 @@ xlog_find_tail(
 		if ((error = xlog_bread(log, 0, 1, bp)))
 			goto bread_err;
 		offset = xlog_align(log, 0, 1, bp);
-		if (GET_CYCLE(offset, ARCH_CONVERT) == 0) {
+		if (xlog_get_cycle(offset) == 0) {
 			*tail_blk = 0;
 			/* leave all other log inited values alone */
 			goto exit;
@@ -823,8 +822,7 @@ xlog_find_tail(
 		if ((error = xlog_bread(log, i, 1, bp)))
 			goto bread_err;
 		offset = xlog_align(log, i, 1, bp);
-		if (XLOG_HEADER_MAGIC_NUM ==
-		    INT_GET(*(uint *)offset, ARCH_CONVERT)) {
+		if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
 			found = 1;
 			break;
 		}
@@ -841,7 +839,7 @@ xlog_find_tail(
 				goto bread_err;
 			offset = xlog_align(log, i, 1, bp);
 			if (XLOG_HEADER_MAGIC_NUM ==
-			    INT_GET(*(uint*)offset, ARCH_CONVERT)) {
+			    be32_to_cpu(*(__be32 *)offset)) {
 				found = 2;
 				break;
 			}
@@ -855,7 +853,7 @@ xlog_find_tail(
 
 	/* find blk_no of tail of log */
 	rhead = (xlog_rec_header_t *)offset;
-	*tail_blk = BLOCK_LSN(INT_GET(rhead->h_tail_lsn, ARCH_CONVERT));
+	*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
 
 	/*
 	 * Reset log values according to the state of the log when we
@@ -869,11 +867,11 @@ xlog_find_tail(
 	 */
 	log->l_prev_block = i;
 	log->l_curr_block = (int)*head_blk;
-	log->l_curr_cycle = INT_GET(rhead->h_cycle, ARCH_CONVERT);
+	log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
 	if (found == 2)
 		log->l_curr_cycle++;
-	log->l_tail_lsn = INT_GET(rhead->h_tail_lsn, ARCH_CONVERT);
-	log->l_last_sync_lsn = INT_GET(rhead->h_lsn, ARCH_CONVERT);
+	log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
+	log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
 	log->l_grant_reserve_cycle = log->l_curr_cycle;
 	log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
 	log->l_grant_write_cycle = log->l_curr_cycle;
@@ -891,8 +889,8 @@ xlog_find_tail(
 	 * unmount record rather than the block after it.
 	 */
 	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
-		int	h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
-		int	h_version = INT_GET(rhead->h_version, ARCH_CONVERT);
+		int	h_size = be32_to_cpu(rhead->h_size);
+		int	h_version = be32_to_cpu(rhead->h_version);
 
 		if ((h_version & XLOG_VERSION_2) &&
 		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
@@ -906,10 +904,10 @@ xlog_find_tail(
 		hblks = 1;
 	}
 	after_umount_blk = (i + hblks + (int)
-		BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT))) % log->l_logBBsize;
+		BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
 	tail_lsn = log->l_tail_lsn;
 	if (*head_blk == after_umount_blk &&
-	    INT_GET(rhead->h_num_logops, ARCH_CONVERT) == 1) {
+	    be32_to_cpu(rhead->h_num_logops) == 1) {
 		umount_data_blk = (i + hblks) % log->l_logBBsize;
 		if ((error = xlog_bread(log, umount_data_blk, 1, bp))) {
 			goto bread_err;
@@ -922,10 +920,12 @@ xlog_find_tail(
 			 * log records will point recovery to after the
 			 * current unmount record.
 			 */
-			ASSIGN_ANY_LSN_HOST(log->l_tail_lsn, log->l_curr_cycle,
-					after_umount_blk);
-			ASSIGN_ANY_LSN_HOST(log->l_last_sync_lsn, log->l_curr_cycle,
-					after_umount_blk);
+			log->l_tail_lsn =
+				xlog_assign_lsn(log->l_curr_cycle,
+						after_umount_blk);
+			log->l_last_sync_lsn =
+				xlog_assign_lsn(log->l_curr_cycle,
+						after_umount_blk);
 			*tail_blk = after_umount_blk;
 
 			/*
@@ -986,7 +986,7 @@ exit:
  *	-1 => use *blk_no as the first block of the log
  *	>0 => error has occurred
  */
-int
+STATIC int
 xlog_find_zeroed(
 	xlog_t		*log,
 	xfs_daddr_t	*blk_no)
@@ -1007,7 +1007,7 @@ xlog_find_zeroed(
 	if ((error = xlog_bread(log, 0, 1, bp)))
 		goto bp_err;
 	offset = xlog_align(log, 0, 1, bp);
-	first_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+	first_cycle = xlog_get_cycle(offset);
 	if (first_cycle == 0) {		/* completely zeroed log */
 		*blk_no = 0;
 		xlog_put_bp(bp);
@@ -1018,7 +1018,7 @@ xlog_find_zeroed(
 	if ((error = xlog_bread(log, log_bbnum-1, 1, bp)))
 		goto bp_err;
 	offset = xlog_align(log, log_bbnum-1, 1, bp);
-	last_cycle = GET_CYCLE(offset, ARCH_CONVERT);
+	last_cycle = xlog_get_cycle(offset);
 	if (last_cycle != 0) {		/* log completely written to */
 		xlog_put_bp(bp);
 		return 0;
@@ -1098,13 +1098,13 @@ xlog_add_record(
 	xlog_rec_header_t	*recp = (xlog_rec_header_t *)buf;
 
 	memset(buf, 0, BBSIZE);
-	INT_SET(recp->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM);
-	INT_SET(recp->h_cycle, ARCH_CONVERT, cycle);
-	INT_SET(recp->h_version, ARCH_CONVERT,
+	recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
+	recp->h_cycle = cpu_to_be32(cycle);
+	recp->h_version = cpu_to_be32(
 			XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
-	ASSIGN_ANY_LSN_DISK(recp->h_lsn, cycle, block);
-	ASSIGN_ANY_LSN_DISK(recp->h_tail_lsn, tail_cycle, tail_block);
-	INT_SET(recp->h_fmt, ARCH_CONVERT, XLOG_FMT);
+	recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
+	recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
+	recp->h_fmt = cpu_to_be32(XLOG_FMT);
 	memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t));
 }
 
@@ -2211,7 +2211,7 @@ xlog_recover_do_buffer_trans(
 	 * overlap with future reads of those inodes.
 	 */
 	if (XFS_DINODE_MAGIC ==
-	    INT_GET(*((__uint16_t *)(xfs_buf_offset(bp, 0))), ARCH_CONVERT) &&
+	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
 	    (XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
 			(__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
 		XFS_BUF_STALE(bp);
@@ -2581,8 +2581,7 @@ xlog_recover_do_dquot_trans(
 	/*
 	 * This type of quotas was turned off, so ignore this record.
 	 */
-	type = INT_GET(recddq->d_flags, ARCH_CONVERT) &
-			(XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
 	ASSERT(type);
 	if (log->l_quotaoffs_flag & type)
 		return (0);
@@ -2660,7 +2659,6 @@ xlog_recover_do_efi_trans(
 	xfs_mount_t		*mp;
 	xfs_efi_log_item_t	*efip;
 	xfs_efi_log_format_t	*efi_formatp;
-	SPLDECL(s);
 
 	if (pass == XLOG_RECOVER_PASS1) {
 		return 0;
@@ -2678,11 +2676,11 @@ xlog_recover_do_efi_trans(
 	efip->efi_next_extent = efi_formatp->efi_nextents;
 	efip->efi_flags |= XFS_EFI_COMMITTED;
 
-	AIL_LOCK(mp,s);
+	spin_lock(&mp->m_ail_lock);
 	/*
 	 * xfs_trans_update_ail() drops the AIL lock.
 	 */
-	xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn, s);
+	xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn);
 	return 0;
 }
 
@@ -2707,7 +2705,6 @@ xlog_recover_do_efd_trans(
 	xfs_log_item_t		*lip;
 	int			gen;
 	__uint64_t		efi_id;
-	SPLDECL(s);
 
 	if (pass == XLOG_RECOVER_PASS1) {
 		return;
@@ -2725,7 +2722,7 @@ xlog_recover_do_efd_trans(
 	 * in the AIL.
 	 */
 	mp = log->l_mp;
-	AIL_LOCK(mp,s);
+	spin_lock(&mp->m_ail_lock);
 	lip = xfs_trans_first_ail(mp, &gen);
 	while (lip != NULL) {
 		if (lip->li_type == XFS_LI_EFI) {
@@ -2735,22 +2732,14 @@ xlog_recover_do_efd_trans(
 				 * xfs_trans_delete_ail() drops the
 				 * AIL lock.
 				 */
-				xfs_trans_delete_ail(mp, lip, s);
-				break;
+				xfs_trans_delete_ail(mp, lip);
+				xfs_efi_item_free(efip);
+				return;
 			}
 		}
 		lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
 	}
-
-	/*
-	 * If we found it, then free it up.  If it wasn't there, it
-	 * must have been overwritten in the log.  Oh well.
-	 */
-	if (lip != NULL) {
-		xfs_efi_item_free(efip);
-	} else {
-		AIL_UNLOCK(mp, s);
-	}
+	spin_unlock(&mp->m_ail_lock);
 }
 
 /*
@@ -2897,8 +2886,8 @@ xlog_recover_process_data(
 	unsigned long		hash;
 	uint			flags;
 
-	lp = dp + INT_GET(rhead->h_len, ARCH_CONVERT);
-	num_logops = INT_GET(rhead->h_num_logops, ARCH_CONVERT);
+	lp = dp + be32_to_cpu(rhead->h_len);
+	num_logops = be32_to_cpu(rhead->h_num_logops);
 
 	/* check the log format matches our own - else we can't recover */
 	if (xlog_header_check_recover(log->l_mp, rhead))
@@ -2915,15 +2904,20 @@ xlog_recover_process_data(
 			ASSERT(0);
 			return (XFS_ERROR(EIO));
 		}
-		tid = INT_GET(ohead->oh_tid, ARCH_CONVERT);
+		tid = be32_to_cpu(ohead->oh_tid);
 		hash = XLOG_RHASH(tid);
 		trans = xlog_recover_find_tid(rhash[hash], tid);
 		if (trans == NULL) {		   /* not found; add new tid */
 			if (ohead->oh_flags & XLOG_START_TRANS)
 				xlog_recover_new_tid(&rhash[hash], tid,
-					INT_GET(rhead->h_lsn, ARCH_CONVERT));
+					be64_to_cpu(rhead->h_lsn));
 		} else {
-			ASSERT(dp+INT_GET(ohead->oh_len, ARCH_CONVERT) <= lp);
+			if (dp + be32_to_cpu(ohead->oh_len) > lp) {
+				xlog_warn(
+			"XFS: xlog_recover_process_data: bad length");
+				WARN_ON(1);
+				return (XFS_ERROR(EIO));
+			}
 			flags = ohead->oh_flags & ~XLOG_END_TRANS;
 			if (flags & XLOG_WAS_CONT_TRANS)
 				flags &= ~XLOG_CONTINUE_TRANS;
@@ -2937,8 +2931,7 @@ xlog_recover_process_data(
 				break;
 			case XLOG_WAS_CONT_TRANS:
 				error = xlog_recover_add_to_cont_trans(trans,
-						dp, INT_GET(ohead->oh_len,
-							ARCH_CONVERT));
+						dp, be32_to_cpu(ohead->oh_len));
 				break;
 			case XLOG_START_TRANS:
 				xlog_warn(
@@ -2949,8 +2942,7 @@ xlog_recover_process_data(
 			case 0:
 			case XLOG_CONTINUE_TRANS:
 				error = xlog_recover_add_to_trans(trans,
-						dp, INT_GET(ohead->oh_len,
-							ARCH_CONVERT));
+						dp, be32_to_cpu(ohead->oh_len));
 				break;
 			default:
 				xlog_warn(
@@ -2962,7 +2954,7 @@ xlog_recover_process_data(
 			if (error)
 				return error;
 		}
-		dp += INT_GET(ohead->oh_len, ARCH_CONVERT);
+		dp += be32_to_cpu(ohead->oh_len);
 		num_logops--;
 	}
 	return 0;
@@ -3075,10 +3067,9 @@ xlog_recover_process_efis(
 	xfs_efi_log_item_t	*efip;
 	int			gen;
 	xfs_mount_t		*mp;
-	SPLDECL(s);
 
 	mp = log->l_mp;
-	AIL_LOCK(mp,s);
+	spin_lock(&mp->m_ail_lock);
 
 	lip = xfs_trans_first_ail(mp, &gen);
 	while (lip != NULL) {
@@ -3099,12 +3090,12 @@ xlog_recover_process_efis(
 			continue;
 		}
 
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 		xlog_recover_process_efi(mp, efip);
-		AIL_LOCK(mp,s);
+		spin_lock(&mp->m_ail_lock);
 		lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
 	}
-	AIL_UNLOCK(mp, s);
+	spin_unlock(&mp->m_ail_lock);
 }
 
 /*
@@ -3315,16 +3306,16 @@ xlog_pack_data_checksum(
 	int		size)
 {
 	int		i;
-	uint		*up;
+	__be32		*up;
 	uint		chksum = 0;
 
-	up = (uint *)iclog->ic_datap;
+	up = (__be32 *)iclog->ic_datap;
 	/* divide length by 4 to get # words */
 	for (i = 0; i < (size >> 2); i++) {
-		chksum ^= INT_GET(*up, ARCH_CONVERT);
+		chksum ^= be32_to_cpu(*up);
 		up++;
 	}
-	INT_SET(iclog->ic_header.h_chksum, ARCH_CONVERT, chksum);
+	iclog->ic_header.h_chksum = cpu_to_be32(chksum);
 }
 #else
 #define xlog_pack_data_checksum(log, iclog, size)
@@ -3341,7 +3332,7 @@ xlog_pack_data(
 {
 	int			i, j, k;
 	int			size = iclog->ic_offset + roundoff;
-	uint			cycle_lsn;
+	__be32			cycle_lsn;
 	xfs_caddr_t		dp;
 	xlog_in_core_2_t	*xhdr;
 
@@ -3352,8 +3343,8 @@ xlog_pack_data(
 	dp = iclog->ic_datap;
 	for (i = 0; i < BTOBB(size) &&
 		i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
-		iclog->ic_header.h_cycle_data[i] = *(uint *)dp;
-		*(uint *)dp = cycle_lsn;
+		iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
+		*(__be32 *)dp = cycle_lsn;
 		dp += BBSIZE;
 	}
 
@@ -3362,8 +3353,8 @@ xlog_pack_data(
 		for ( ; i < BTOBB(size); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
 			k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-			xhdr[j].hic_xheader.xh_cycle_data[k] = *(uint *)dp;
-			*(uint *)dp = cycle_lsn;
+			xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
+			*(__be32 *)dp = cycle_lsn;
 			dp += BBSIZE;
 		}
 
@@ -3380,21 +3371,21 @@ xlog_unpack_data_checksum(
 	xfs_caddr_t		dp,
 	xlog_t			*log)
 {
-	uint			*up = (uint *)dp;
+	__be32			*up = (__be32 *)dp;
 	uint			chksum = 0;
 	int			i;
 
 	/* divide length by 4 to get # words */
-	for (i=0; i < INT_GET(rhead->h_len, ARCH_CONVERT) >> 2; i++) {
-		chksum ^= INT_GET(*up, ARCH_CONVERT);
+	for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
+		chksum ^= be32_to_cpu(*up);
 		up++;
 	}
-	if (chksum != INT_GET(rhead->h_chksum, ARCH_CONVERT)) {
+	if (chksum != be32_to_cpu(rhead->h_chksum)) {
 	    if (rhead->h_chksum ||
 		((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
 		    cmn_err(CE_DEBUG,
 			"XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
-			    INT_GET(rhead->h_chksum, ARCH_CONVERT), chksum);
+			    be32_to_cpu(rhead->h_chksum), chksum);
 		    cmn_err(CE_DEBUG,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
 		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
@@ -3418,18 +3409,18 @@ xlog_unpack_data(
 	int			i, j, k;
 	xlog_in_core_2_t	*xhdr;
 
-	for (i = 0; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)) &&
+	for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
 		  i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
-		*(uint *)dp = *(uint *)&rhead->h_cycle_data[i];
+		*(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
 		dp += BBSIZE;
 	}
 
 	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
 		xhdr = (xlog_in_core_2_t *)rhead;
-		for ( ; i < BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT)); i++) {
+		for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
 			k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-			*(uint *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+			*(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
 			dp += BBSIZE;
 		}
 	}
@@ -3445,24 +3436,21 @@ xlog_valid_rec_header(
 {
 	int			hlen;
 
-	if (unlikely(
-	    (INT_GET(rhead->h_magicno, ARCH_CONVERT) !=
-			XLOG_HEADER_MAGIC_NUM))) {
+	if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
 		XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
 				XFS_ERRLEVEL_LOW, log->l_mp);
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 	if (unlikely(
 	    (!rhead->h_version ||
-	    (INT_GET(rhead->h_version, ARCH_CONVERT) &
-			(~XLOG_VERSION_OKBITS)) != 0))) {
+	    (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
 		xlog_warn("XFS: %s: unrecognised log version (%d).",
-			__FUNCTION__, INT_GET(rhead->h_version, ARCH_CONVERT));
+			__FUNCTION__, be32_to_cpu(rhead->h_version));
 		return XFS_ERROR(EIO);
 	}
 
 	/* LR body must have data or it wouldn't have been written */
-	hlen = INT_GET(rhead->h_len, ARCH_CONVERT);
+	hlen = be32_to_cpu(rhead->h_len);
 	if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
 		XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
 				XFS_ERRLEVEL_LOW, log->l_mp);
@@ -3522,9 +3510,8 @@ xlog_do_recovery_pass(
 		error = xlog_valid_rec_header(log, rhead, tail_blk);
 		if (error)
 			goto bread_err1;
-		h_size = INT_GET(rhead->h_size, ARCH_CONVERT);
-		if ((INT_GET(rhead->h_version, ARCH_CONVERT)
-				& XLOG_VERSION_2) &&
+		h_size = be32_to_cpu(rhead->h_size);
+		if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
 		    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
 			hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
 			if (h_size % XLOG_HEADER_CYCLE_SIZE)
@@ -3561,7 +3548,7 @@ xlog_do_recovery_pass(
 				goto bread_err2;
 
 			/* blocks in data section */
-			bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
 			error = xlog_bread(log, blk_no + hblks, bblks, dbp);
 			if (error)
 				goto bread_err2;
@@ -3636,7 +3623,7 @@ xlog_do_recovery_pass(
 			if (error)
 				goto bread_err2;
 
-			bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
 			blk_no += hblks;
 
 			/* Read in data for log record */
@@ -3707,7 +3694,7 @@ xlog_do_recovery_pass(
 			error = xlog_valid_rec_header(log, rhead, blk_no);
 			if (error)
 				goto bread_err2;
-			bblks = (int)BTOBB(INT_GET(rhead->h_len, ARCH_CONVERT));
+			bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
 			if ((error = xlog_bread(log, blk_no+hblks, bblks, dbp)))
 				goto bread_err2;
 			offset = xlog_align(log, blk_no+hblks, bblks, dbp);

+ 194 - 150
fs/xfs/xfs_mount.c

@@ -136,15 +136,9 @@ xfs_mount_init(void)
 		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
 	}
 
-	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
-	spinlock_init(&mp->m_sb_lock, "xfs_sb");
+	spin_lock_init(&mp->m_sb_lock);
 	mutex_init(&mp->m_ilock);
 	mutex_init(&mp->m_growlock);
-	/*
-	 * Initialize the AIL.
-	 */
-	xfs_trans_ail_init(mp);
-
 	atomic_set(&mp->m_active_trans, 0);
 
 	return mp;
@@ -171,7 +165,7 @@ xfs_mount_free(
 			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
 	}
 
-	AIL_LOCK_DESTROY(&mp->m_ail_lock);
+	spinlock_destroy(&mp->m_ail_lock);
 	spinlock_destroy(&mp->m_sb_lock);
 	mutex_destroy(&mp->m_ilock);
 	mutex_destroy(&mp->m_growlock);
@@ -616,7 +610,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
 	int	i;
 
 	mp->m_agfrotor = mp->m_agirotor = 0;
-	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
+	spin_lock_init(&mp->m_agirotor_lock);
 	mp->m_maxagi = mp->m_sb.sb_agcount;
 	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
 	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
@@ -696,7 +690,6 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
 	uint64_t	bfreelst = 0;
 	uint64_t	btree = 0;
 	int		error;
-	int		s;
 
 	for (index = 0; index < agcount; index++) {
 		/*
@@ -721,11 +714,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
 	/*
 	 * Overwrite incore superblock counters with just-read data
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	sbp->sb_ifree = ifree;
 	sbp->sb_icount = ialloc;
 	sbp->sb_fdblocks = bfree + bfreelst + btree;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	/* Fixup the per-cpu counters as well. */
 	xfs_icsb_reinit_counters(mp);
@@ -734,49 +727,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
 }
 
 /*
- * xfs_mountfs
- *
- * This function does the following on an initial mount of a file system:
- *	- reads the superblock from disk and init the mount struct
- *	- if we're a 32-bit kernel, do a size check on the superblock
- *		so we don't mount terabyte filesystems
- *	- init mount struct realtime fields
- *	- allocate inode hash table for fs
- *	- init directory manager
- *	- perform recovery and init the log manager
+ * Update alignment values based on mount options and sb values
  */
-int
-xfs_mountfs(
-	xfs_mount_t	*mp,
-	int		mfsi_flags)
+STATIC int
+xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
 {
-	xfs_buf_t	*bp;
 	xfs_sb_t	*sbp = &(mp->m_sb);
-	xfs_inode_t	*rip;
-	bhv_vnode_t	*rvp = NULL;
-	int		readio_log, writeio_log;
-	xfs_daddr_t	d;
-	__uint64_t	resblks;
-	__int64_t	update_flags;
-	uint		quotamount, quotaflags;
-	int		agno;
-	int		uuid_mounted = 0;
-	int		error = 0;
 
-	if (mp->m_sb_bp == NULL) {
-		if ((error = xfs_readsb(mp, mfsi_flags))) {
-			return error;
-		}
-	}
-	xfs_mount_common(mp, sbp);
-
-	/*
-	 * Check if sb_agblocks is aligned at stripe boundary
-	 * If sb_agblocks is NOT aligned turn off m_dalign since
-	 * allocator alignment is within an ag, therefore ag has
-	 * to be aligned at stripe boundary.
-	 */
-	update_flags = 0LL;
 	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
 		/*
 		 * If stripe unit and stripe width are not multiples
@@ -787,8 +744,7 @@ xfs_mountfs(
 			if (mp->m_flags & XFS_MOUNT_RETERR) {
 				cmn_err(CE_WARN,
 					"XFS: alignment check 1 failed");
-				error = XFS_ERROR(EINVAL);
-				goto error1;
+				return XFS_ERROR(EINVAL);
 			}
 			mp->m_dalign = mp->m_swidth = 0;
 		} else {
@@ -798,8 +754,7 @@ xfs_mountfs(
 			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
 			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
 				if (mp->m_flags & XFS_MOUNT_RETERR) {
-					error = XFS_ERROR(EINVAL);
-					goto error1;
+					return XFS_ERROR(EINVAL);
 				}
 				xfs_fs_cmn_err(CE_WARN, mp,
 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
@@ -816,8 +771,7 @@ xfs_mountfs(
 "stripe alignment turned off: sunit(%d) less than bsize(%d)",
                                         	mp->m_dalign,
 						mp->m_blockmask +1);
-					error = XFS_ERROR(EINVAL);
-					goto error1;
+					return XFS_ERROR(EINVAL);
 				}
 				mp->m_swidth = 0;
 			}
@@ -830,11 +784,11 @@ xfs_mountfs(
 		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
 			if (sbp->sb_unit != mp->m_dalign) {
 				sbp->sb_unit = mp->m_dalign;
-				update_flags |= XFS_SB_UNIT;
+				*update_flags |= XFS_SB_UNIT;
 			}
 			if (sbp->sb_width != mp->m_swidth) {
 				sbp->sb_width = mp->m_swidth;
-				update_flags |= XFS_SB_WIDTH;
+				*update_flags |= XFS_SB_WIDTH;
 			}
 		}
 	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
@@ -843,49 +797,45 @@ xfs_mountfs(
 			mp->m_swidth = sbp->sb_width;
 	}
 
-	xfs_alloc_compute_maxlevels(mp);
-	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
-	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
-	xfs_ialloc_compute_maxlevels(mp);
+	return 0;
+}
 
-	if (sbp->sb_imax_pct) {
-		__uint64_t	icount;
+/*
+ * Set the maximum inode count for this filesystem
+ */
+STATIC void
+xfs_set_maxicount(xfs_mount_t *mp)
+{
+	xfs_sb_t	*sbp = &(mp->m_sb);
+	__uint64_t	icount;
 
-		/* Make sure the maximum inode count is a multiple of the
-		 * units we allocate inodes in.
+	if (sbp->sb_imax_pct) {
+		/*
+		 * Make sure the maximum inode count is a multiple
+		 * of the units we allocate inodes in.
 		 */
-
 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 		do_div(icount, 100);
 		do_div(icount, mp->m_ialloc_blks);
 		mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
 				   sbp->sb_inopblog;
-	} else
+	} else {
 		mp->m_maxicount = 0;
-
-	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
-
-	/*
-	 * XFS uses the uuid from the superblock as the unique
-	 * identifier for fsid.  We can not use the uuid from the volume
-	 * since a single partition filesystem is identical to a single
-	 * partition volume/filesystem.
-	 */
-	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
-	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
-		if (xfs_uuid_mount(mp)) {
-			error = XFS_ERROR(EINVAL);
-			goto error1;
-		}
-		uuid_mounted=1;
 	}
+}
+
+/*
+ * Set the default minimum read and write sizes unless
+ * already specified in a mount option.
+ * We use smaller I/O sizes when the file system
+ * is being used for NFS service (wsync mount option).
+ */
+STATIC void
+xfs_set_rw_sizes(xfs_mount_t *mp)
+{
+	xfs_sb_t	*sbp = &(mp->m_sb);
+	int		readio_log, writeio_log;
 
-	/*
-	 * Set the default minimum read and write sizes unless
-	 * already specified in a mount option.
-	 * We use smaller I/O sizes when the file system
-	 * is being used for NFS service (wsync mount option).
-	 */
 	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
 		if (mp->m_flags & XFS_MOUNT_WSYNC) {
 			readio_log = XFS_WSYNC_READIO_LOG;
@@ -911,17 +861,14 @@ xfs_mountfs(
 		mp->m_writeio_log = writeio_log;
 	}
 	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
+}
 
-	/*
-	 * Set the inode cluster size.
-	 * This may still be overridden by the file system
-	 * block size if it is larger than the chosen cluster size.
-	 */
-	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
-
-	/*
-	 * Set whether we're using inode alignment.
-	 */
+/*
+ * Set whether we're using inode alignment.
+ */
+STATIC void
+xfs_set_inoalignment(xfs_mount_t *mp)
+{
 	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
 	    mp->m_sb.sb_inoalignmt >=
 	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
@@ -937,14 +884,22 @@ xfs_mountfs(
 		mp->m_sinoalign = mp->m_dalign;
 	else
 		mp->m_sinoalign = 0;
-	/*
-	 * Check that the data (and log if separate) are an ok size.
-	 */
+}
+
+/*
+ * Check that the data (and log if separate) are an ok size.
+ */
+STATIC int
+xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags)
+{
+	xfs_buf_t	*bp;
+	xfs_daddr_t	d;
+	int		error;
+
 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
 		cmn_err(CE_WARN, "XFS: size check 1 failed");
-		error = XFS_ERROR(E2BIG);
-		goto error1;
+		return XFS_ERROR(E2BIG);
 	}
 	error = xfs_read_buf(mp, mp->m_ddev_targp,
 			     d - XFS_FSS_TO_BB(mp, 1),
@@ -953,10 +908,9 @@ xfs_mountfs(
 		xfs_buf_relse(bp);
 	} else {
 		cmn_err(CE_WARN, "XFS: size check 2 failed");
-		if (error == ENOSPC) {
+		if (error == ENOSPC)
 			error = XFS_ERROR(E2BIG);
-		}
-		goto error1;
+		return error;
 	}
 
 	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
@@ -964,8 +918,7 @@ xfs_mountfs(
 		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
 		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
 			cmn_err(CE_WARN, "XFS: size check 3 failed");
-			error = XFS_ERROR(E2BIG);
-			goto error1;
+			return XFS_ERROR(E2BIG);
 		}
 		error = xfs_read_buf(mp, mp->m_logdev_targp,
 				     d - XFS_FSB_TO_BB(mp, 1),
@@ -974,17 +927,111 @@ xfs_mountfs(
 			xfs_buf_relse(bp);
 		} else {
 			cmn_err(CE_WARN, "XFS: size check 3 failed");
-			if (error == ENOSPC) {
+			if (error == ENOSPC)
 				error = XFS_ERROR(E2BIG);
-			}
+			return error;
+		}
+	}
+	return 0;
+}
+
+/*
+ * xfs_mountfs
+ *
+ * This function does the following on an initial mount of a file system:
+ *	- reads the superblock from disk and init the mount struct
+ *	- if we're a 32-bit kernel, do a size check on the superblock
+ *		so we don't mount terabyte filesystems
+ *	- init mount struct realtime fields
+ *	- allocate inode hash table for fs
+ *	- init directory manager
+ *	- perform recovery and init the log manager
+ */
+int
+xfs_mountfs(
+	xfs_mount_t	*mp,
+	int		mfsi_flags)
+{
+	xfs_sb_t	*sbp = &(mp->m_sb);
+	xfs_inode_t	*rip;
+	bhv_vnode_t	*rvp = NULL;
+	__uint64_t	resblks;
+	__int64_t	update_flags = 0LL;
+	uint		quotamount, quotaflags;
+	int		agno;
+	int		uuid_mounted = 0;
+	int		error = 0;
+
+	if (mp->m_sb_bp == NULL) {
+		error = xfs_readsb(mp, mfsi_flags);
+		if (error)
+			return error;
+	}
+	xfs_mount_common(mp, sbp);
+
+	/*
+	 * Check if sb_agblocks is aligned at stripe boundary
+	 * If sb_agblocks is NOT aligned turn off m_dalign since
+	 * allocator alignment is within an ag, therefore ag has
+	 * to be aligned at stripe boundary.
+	 */
+	error = xfs_update_alignment(mp, mfsi_flags, &update_flags);
+	if (error)
+		goto error1;
+
+	xfs_alloc_compute_maxlevels(mp);
+	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
+	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
+	xfs_ialloc_compute_maxlevels(mp);
+
+	xfs_set_maxicount(mp);
+
+	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
+
+	/*
+	 * XFS uses the uuid from the superblock as the unique
+	 * identifier for fsid.  We can not use the uuid from the volume
+	 * since a single partition filesystem is identical to a single
+	 * partition volume/filesystem.
+	 */
+	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
+	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
+		if (xfs_uuid_mount(mp)) {
+			error = XFS_ERROR(EINVAL);
 			goto error1;
 		}
+		uuid_mounted=1;
 	}
 
+	/*
+	 * Set the minimum read and write sizes
+	 */
+	xfs_set_rw_sizes(mp);
+
+	/*
+	 * Set the inode cluster size.
+	 * This may still be overridden by the file system
+	 * block size if it is larger than the chosen cluster size.
+	 */
+	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+
+	/*
+	 * Set inode alignment fields
+	 */
+	xfs_set_inoalignment(mp);
+
+	/*
+	 * Check that the data (and log if separate) are an ok size.
+	 */
+	error = xfs_check_sizes(mp, mfsi_flags);
+	if (error)
+		goto error1;
+
 	/*
 	 * Initialize realtime fields in the mount structure
 	 */
-	if ((error = xfs_rtmount_init(mp))) {
+	error = xfs_rtmount_init(mp);
+	if (error) {
 		cmn_err(CE_WARN, "XFS: RT mount failed");
 		goto error1;
 	}
@@ -1102,7 +1149,8 @@ xfs_mountfs(
 	/*
 	 * Initialize realtime inode pointers in the mount structure
 	 */
-	if ((error = xfs_rtmount_inodes(mp))) {
+	error = xfs_rtmount_inodes(mp);
+	if (error) {
 		/*
 		 * Free up the root inode.
 		 */
@@ -1120,7 +1168,8 @@ xfs_mountfs(
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
 	 */
-	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
+	error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
+	if (error)
 		goto error4;
 
 	/*
@@ -1137,7 +1186,8 @@ xfs_mountfs(
 	/*
 	 * Complete the quota initialisation, post-log-replay component.
 	 */
-	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
+	error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags);
+	if (error)
 		goto error4;
 
 	/*
@@ -1255,7 +1305,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
 	xfs_errortag_clearall(mp, 0);
 #endif
-	XFS_IODONE(mp);
 	xfs_mount_free(mp);
 	return 0;
 }
@@ -1441,7 +1490,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
  * Fields are not allowed to dip below zero, so if the delta would
  * do this do not apply it and return EINVAL.
  *
- * The SB_LOCK must be held when this routine is called.
+ * The m_sb_lock must be held when this routine is called.
  */
 int
 xfs_mod_incore_sb_unlocked(
@@ -1606,7 +1655,7 @@ xfs_mod_incore_sb_unlocked(
 /*
  * xfs_mod_incore_sb() is used to change a field in the in-core
  * superblock structure by the specified delta.  This modification
- * is protected by the SB_LOCK.  Just use the xfs_mod_incore_sb_unlocked()
+ * is protected by the m_sb_lock.  Just use the xfs_mod_incore_sb_unlocked()
  * routine to do the work.
  */
 int
@@ -1616,7 +1665,6 @@ xfs_mod_incore_sb(
 	int64_t		delta,
 	int		rsvd)
 {
-	unsigned long	s;
 	int	status;
 
 	/* check for per-cpu counters */
@@ -1633,9 +1681,9 @@ xfs_mod_incore_sb(
 		/* FALLTHROUGH */
 #endif
 	default:
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 		break;
 	}
 
@@ -1656,7 +1704,6 @@ xfs_mod_incore_sb(
 int
 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 {
-	unsigned long	s;
 	int		status=0;
 	xfs_mod_sb_t	*msbp;
 
@@ -1664,10 +1711,10 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 	 * Loop through the array of mod structures and apply each
 	 * individually.  If any fail, then back out all those
 	 * which have already been applied.  Do all of this within
-	 * the scope of the SB_LOCK so that all of the changes will
+	 * the scope of the m_sb_lock so that all of the changes will
 	 * be atomic.
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	msbp = &msb[0];
 	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
 		/*
@@ -1681,11 +1728,11 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 		case XFS_SBS_IFREE:
 		case XFS_SBS_FDBLOCKS:
 			if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-				XFS_SB_UNLOCK(mp, s);
+				spin_unlock(&mp->m_sb_lock);
 				status = xfs_icsb_modify_counters(mp,
 							msbp->msb_field,
 							msbp->msb_delta, rsvd);
-				s = XFS_SB_LOCK(mp);
+				spin_lock(&mp->m_sb_lock);
 				break;
 			}
 			/* FALLTHROUGH */
@@ -1719,12 +1766,12 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 			case XFS_SBS_IFREE:
 			case XFS_SBS_FDBLOCKS:
 				if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-					XFS_SB_UNLOCK(mp, s);
+					spin_unlock(&mp->m_sb_lock);
 					status = xfs_icsb_modify_counters(mp,
 							msbp->msb_field,
 							-(msbp->msb_delta),
 							rsvd);
-					s = XFS_SB_LOCK(mp);
+					spin_lock(&mp->m_sb_lock);
 					break;
 				}
 				/* FALLTHROUGH */
@@ -1740,7 +1787,7 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
 			msbp--;
 		}
 	}
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 	return status;
 }
 
@@ -1888,12 +1935,12 @@ xfs_mount_log_sbunit(
  *
  * Locking rules:
  *
- * 	1. XFS_SB_LOCK() before picking up per-cpu locks
+ * 	1. m_sb_lock before picking up per-cpu locks
  * 	2. per-cpu locks always picked up via for_each_online_cpu() order
- * 	3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
+ * 	3. accurate counter sync requires m_sb_lock + per cpu locks
  * 	4. modifying per-cpu counters requires holding per-cpu lock
- * 	5. modifying global counters requires holding XFS_SB_LOCK
- *	6. enabling or disabling a counter requires holding the XFS_SB_LOCK
+ * 	5. modifying global counters requires holding m_sb_lock
+ *	6. enabling or disabling a counter requires holding the m_sb_lock 
  *	   and _none_ of the per-cpu locks.
  *
  * Disabled counters are only ever re-enabled by a balance operation
@@ -1920,7 +1967,6 @@ xfs_icsb_cpu_notify(
 {
 	xfs_icsb_cnts_t *cntp;
 	xfs_mount_t	*mp;
-	int		s;
 
 	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
 	cntp = (xfs_icsb_cnts_t *)
@@ -1946,7 +1992,7 @@ xfs_icsb_cpu_notify(
 		 * count into the total on the global superblock and
 		 * re-enable the counters. */
 		xfs_icsb_lock(mp);
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
 		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
 		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
@@ -1963,7 +2009,7 @@ xfs_icsb_cpu_notify(
 					 XFS_ICSB_SB_LOCKED, 0);
 		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
 					 XFS_ICSB_SB_LOCKED, 0);
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 		xfs_icsb_unlock(mp);
 		break;
 	}
@@ -2194,11 +2240,10 @@ xfs_icsb_sync_counters_flags(
 	int		flags)
 {
 	xfs_icsb_cnts_t	cnt;
-	int		s;
 
 	/* Pass 1: lock all counters */
 	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 
 	xfs_icsb_count(mp, &cnt, flags);
 
@@ -2211,7 +2256,7 @@ xfs_icsb_sync_counters_flags(
 		mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
 
 	if ((flags & XFS_ICSB_SB_LOCKED) == 0)
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 }
 
 /*
@@ -2252,11 +2297,10 @@ xfs_icsb_balance_counter(
 {
 	uint64_t	count, resid;
 	int		weight = num_online_cpus();
-	int		s;
 	uint64_t	min = (uint64_t)min_per_cpu;
 
 	if (!(flags & XFS_ICSB_SB_LOCKED))
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 
 	/* disable counter and sync counter */
 	xfs_icsb_disable_counter(mp, field);
@@ -2290,10 +2334,10 @@ xfs_icsb_balance_counter(
 	xfs_icsb_enable_counter(mp, field, count, resid);
 out:
 	if (!(flags & XFS_ICSB_SB_LOCKED))
-		XFS_SB_UNLOCK(mp, s);
+		spin_unlock(&mp->m_sb_lock);
 }
 
-int
+STATIC int
 xfs_icsb_modify_counters(
 	xfs_mount_t	*mp,
 	xfs_sb_field_t	field,
@@ -2302,7 +2346,7 @@ xfs_icsb_modify_counters(
 {
 	xfs_icsb_cnts_t	*icsbp;
 	long long	lcounter;	/* long counter for 64 bit fields */
-	int		cpu, ret = 0, s;
+	int		cpu, ret = 0;
 
 	might_sleep();
 again:
@@ -2380,15 +2424,15 @@ slow_path:
 	 * running atomically here, we know a rebalance cannot
 	 * be in progress. Hence we can go straight to operating
 	 * on the global superblock. We do not call xfs_mod_incore_sb()
-	 * here even though we need to get the SB_LOCK. Doing so
+	 * here even though we need to get the m_sb_lock. Doing so
 	 * will cause us to re-enter this function and deadlock.
-	 * Hence we get the SB_LOCK ourselves and then call
+	 * Hence we get the m_sb_lock ourselves and then call
 	 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
 	 * directly on the global counters.
 	 */
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	/*
 	 * Now that we've modified the global superblock, we

+ 11 - 116
fs/xfs/xfs_mount.h

@@ -56,20 +56,12 @@ struct cred;
 struct log;
 struct xfs_mount_args;
 struct xfs_inode;
-struct xfs_iocore;
 struct xfs_bmbt_irec;
 struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
 struct xfs_mru_cache;
 
-#define	AIL_LOCK_T		lock_t
-#define	AIL_LOCKINIT(x,y)	spinlock_init(x,y)
-#define	AIL_LOCK_DESTROY(x)	spinlock_destroy(x)
-#define	AIL_LOCK(mp,s)		s=mutex_spinlock(&(mp)->m_ail_lock)
-#define	AIL_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_ail_lock, s)
-
-
 /*
  * Prototypes and functions for the Data Migration subsystem.
  */
@@ -196,105 +188,6 @@ typedef struct xfs_qmops {
 #define XFS_QM_QUOTACTL(mp, cmd, id, addr) \
 	(*(mp)->m_qm_ops->xfs_quotactl)(mp, cmd, id, addr)
 
-
-/*
- * Prototypes and functions for I/O core modularization.
- */
-
-typedef int		(*xfs_ioinit_t)(struct xfs_mount *,
-				struct xfs_mount_args *, int);
-typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
-				xfs_fileoff_t, xfs_filblks_t, int,
-				xfs_fsblock_t *, xfs_extlen_t,
-				struct xfs_bmbt_irec *, int *,
-				struct xfs_bmap_free *, struct xfs_extdelta *);
-typedef int		(*xfs_bunmapi_t)(struct xfs_trans *,
-				void *, xfs_fileoff_t,
-				xfs_filblks_t, int, xfs_extnum_t,
-				xfs_fsblock_t *, struct xfs_bmap_free *,
-				struct xfs_extdelta *, int *);
-typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
-typedef int		(*xfs_iomap_write_direct_t)(
-				void *, xfs_off_t, size_t, int,
-				struct xfs_bmbt_irec *, int *, int);
-typedef int		(*xfs_iomap_write_delay_t)(
-				void *, xfs_off_t, size_t, int,
-				struct xfs_bmbt_irec *, int *);
-typedef int		(*xfs_iomap_write_allocate_t)(
-				void *, xfs_off_t, size_t,
-				struct xfs_bmbt_irec *, int *);
-typedef int		(*xfs_iomap_write_unwritten_t)(
-				void *, xfs_off_t, size_t);
-typedef uint		(*xfs_lck_map_shared_t)(void *);
-typedef void		(*xfs_lock_t)(void *, uint);
-typedef void		(*xfs_lock_demote_t)(void *, uint);
-typedef int		(*xfs_lock_nowait_t)(void *, uint);
-typedef void		(*xfs_unlk_t)(void *, unsigned int);
-typedef xfs_fsize_t	(*xfs_size_t)(void *);
-typedef xfs_fsize_t	(*xfs_iodone_t)(struct xfs_mount *);
-typedef int		(*xfs_swap_extents_t)(void *, void *,
-				struct xfs_swapext*);
-
-typedef struct xfs_ioops {
-	xfs_ioinit_t			xfs_ioinit;
-	xfs_bmapi_t			xfs_bmapi_func;
-	xfs_bunmapi_t			xfs_bunmapi_func;
-	xfs_bmap_eof_t			xfs_bmap_eof_func;
-	xfs_iomap_write_direct_t	xfs_iomap_write_direct;
-	xfs_iomap_write_delay_t		xfs_iomap_write_delay;
-	xfs_iomap_write_allocate_t	xfs_iomap_write_allocate;
-	xfs_iomap_write_unwritten_t	xfs_iomap_write_unwritten;
-	xfs_lock_t			xfs_ilock;
-	xfs_lck_map_shared_t		xfs_lck_map_shared;
-	xfs_lock_demote_t		xfs_ilock_demote;
-	xfs_lock_nowait_t		xfs_ilock_nowait;
-	xfs_unlk_t			xfs_unlock;
-	xfs_size_t			xfs_size_func;
-	xfs_iodone_t			xfs_iodone;
-	xfs_swap_extents_t		xfs_swap_extents_func;
-} xfs_ioops_t;
-
-#define XFS_IOINIT(mp, args, flags) \
-	(*(mp)->m_io_ops.xfs_ioinit)(mp, args, flags)
-#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist,delta) \
-	(*(mp)->m_io_ops.xfs_bmapi_func) \
-		(trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist,delta)
-#define XFS_BUNMAPI(mp, trans,io,bno,len,f,nexts,first,flist,delta,done) \
-	(*(mp)->m_io_ops.xfs_bunmapi_func) \
-		(trans,(io)->io_obj,bno,len,f,nexts,first,flist,delta,done)
-#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \
-	(*(mp)->m_io_ops.xfs_bmap_eof_func) \
-		((io)->io_obj, endoff, whichfork, eof)
-#define XFS_IOMAP_WRITE_DIRECT(mp, io, offset, count, flags, mval, nmap, found)\
-	(*(mp)->m_io_ops.xfs_iomap_write_direct) \
-		((io)->io_obj, offset, count, flags, mval, nmap, found)
-#define XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, flags, mval, nmap) \
-	(*(mp)->m_io_ops.xfs_iomap_write_delay) \
-		((io)->io_obj, offset, count, flags, mval, nmap)
-#define XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, mval, nmap) \
-	(*(mp)->m_io_ops.xfs_iomap_write_allocate) \
-		((io)->io_obj, offset, count, mval, nmap)
-#define XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count) \
-	(*(mp)->m_io_ops.xfs_iomap_write_unwritten) \
-		((io)->io_obj, offset, count)
-#define XFS_LCK_MAP_SHARED(mp, io) \
-	(*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj)
-#define XFS_ILOCK(mp, io, mode) \
-	(*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode)
-#define XFS_ILOCK_NOWAIT(mp, io, mode) \
-	(*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode)
-#define XFS_IUNLOCK(mp, io, mode) \
-	(*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode)
-#define XFS_ILOCK_DEMOTE(mp, io, mode) \
-	(*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode)
-#define XFS_SIZE(mp, io) \
-	(*(mp)->m_io_ops.xfs_size_func)((io)->io_obj)
-#define XFS_IODONE(mp) \
-	(*(mp)->m_io_ops.xfs_iodone)(mp)
-#define XFS_SWAP_EXTENTS(mp, io, tio, sxp) \
-	(*(mp)->m_io_ops.xfs_swap_extents_func) \
-		((io)->io_obj, (tio)->io_obj, sxp)
-
 #ifdef HAVE_PERCPU_SB
 
 /*
@@ -326,14 +219,20 @@ extern void	xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
 #define xfs_icsb_sync_counters_flags(mp, flags)	do { } while (0)
 #endif
 
+typedef struct xfs_ail {
+	xfs_ail_entry_t		xa_ail;
+	uint			xa_gen;
+	struct task_struct	*xa_task;
+	xfs_lsn_t		xa_target;
+} xfs_ail_t;
+
 typedef struct xfs_mount {
 	struct super_block	*m_super;
 	xfs_tid_t		m_tid;		/* next unused tid for fs */
-	AIL_LOCK_T		m_ail_lock;	/* fs AIL mutex */
-	xfs_ail_entry_t		m_ail;		/* fs active log item list */
-	uint			m_ail_gen;	/* fs AIL generation count */
+	spinlock_t		m_ail_lock;	/* fs AIL mutex */
+	xfs_ail_t		m_ail;		/* fs active log item list */
 	xfs_sb_t		m_sb;		/* copy of fs superblock */
-	lock_t			m_sb_lock;	/* sb counter mutex */
+	spinlock_t		m_sb_lock;	/* sb counter lock */
 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
 	char			*m_fsname;	/* filesystem name */
 	int			m_fsname_len;	/* strlen of fs name */
@@ -342,7 +241,7 @@ typedef struct xfs_mount {
 	int			m_bsize;	/* fs logical block size */
 	xfs_agnumber_t		m_agfrotor;	/* last ag where space found */
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
-	lock_t			m_agirotor_lock;/* .. and lock protecting it */
+	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
 	struct xfs_inode	*m_inodes;	/* active inode list */
 	struct list_head	m_del_inodes;	/* inodes to reclaim */
@@ -423,7 +322,6 @@ typedef struct xfs_mount {
 						 * hash table */
 	struct xfs_dmops	*m_dm_ops;	/* vector of DMI ops */
 	struct xfs_qmops	*m_qm_ops;	/* vector of XQM ops */
-	struct xfs_ioops	m_io_ops;	/* vector of I/O ops */
 	atomic_t		m_active_trans;	/* number trans frozen */
 #ifdef HAVE_PERCPU_SB
 	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
@@ -610,8 +508,6 @@ typedef struct xfs_mod_sb {
 
 #define	XFS_MOUNT_ILOCK(mp)	mutex_lock(&((mp)->m_ilock))
 #define	XFS_MOUNT_IUNLOCK(mp)	mutex_unlock(&((mp)->m_ilock))
-#define	XFS_SB_LOCK(mp)		mutex_spinlock(&(mp)->m_sb_lock)
-#define	XFS_SB_UNLOCK(mp,s)	mutex_spinunlock(&(mp)->m_sb_lock,(s))
 
 extern xfs_mount_t *xfs_mount_init(void);
 extern void	xfs_mod_sb(xfs_trans_t *, __int64_t);
@@ -646,7 +542,6 @@ extern int	xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *);
 extern void	xfs_qmops_put(struct xfs_mount *);
 
 extern struct xfs_dmops xfs_dmcore_xfs;
-extern struct xfs_ioops xfs_iocore_xfs;
 
 extern int	xfs_init(void);
 extern void	xfs_cleanup(void);

+ 32 - 22
fs/xfs/xfs_mru_cache.c

@@ -225,10 +225,14 @@ _xfs_mru_cache_list_insert(
  * list need to be deleted.  For each element this involves removing it from the
  * data store, removing it from the reap list, calling the client's free
  * function and deleting the element from the element zone.
+ *
+ * We get called holding the mru->lock, which we drop and then reacquire.
+ * Sparse need special help with this to tell it we know what we are doing.
  */
 STATIC void
 _xfs_mru_cache_clear_reap_list(
-	xfs_mru_cache_t		*mru)
+	xfs_mru_cache_t		*mru) __releases(mru->lock) __acquires(mru->lock)
+
 {
 	xfs_mru_cache_elem_t	*elem, *next;
 	struct list_head	tmp;
@@ -245,7 +249,7 @@ _xfs_mru_cache_clear_reap_list(
 		 */
 		list_move(&elem->list_node, &tmp);
 	}
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 
 	list_for_each_entry_safe(elem, next, &tmp, list_node) {
 
@@ -259,7 +263,7 @@ _xfs_mru_cache_clear_reap_list(
 		kmem_zone_free(xfs_mru_elem_zone, elem);
 	}
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 }
 
 /*
@@ -280,7 +284,7 @@ _xfs_mru_cache_reap(
 	if (!mru || !mru->lists)
 		return;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 	next = _xfs_mru_cache_migrate(mru, jiffies);
 	_xfs_mru_cache_clear_reap_list(mru);
 
@@ -294,7 +298,7 @@ _xfs_mru_cache_reap(
 		queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
 	}
 
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 }
 
 int
@@ -368,7 +372,7 @@ xfs_mru_cache_create(
 	 */
 	INIT_RADIX_TREE(&mru->store, GFP_ATOMIC);
 	INIT_LIST_HEAD(&mru->reap_list);
-	spinlock_init(&mru->lock, "xfs_mru_cache");
+	spin_lock_init(&mru->lock);
 	INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap);
 
 	mru->grp_time  = grp_time;
@@ -398,17 +402,17 @@ xfs_mru_cache_flush(
 	if (!mru || !mru->lists)
 		return;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 	if (mru->queued) {
-		mutex_spinunlock(&mru->lock, 0);
+		spin_unlock(&mru->lock);
 		cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
-		mutex_spinlock(&mru->lock);
+		spin_lock(&mru->lock);
 	}
 
 	_xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
 	_xfs_mru_cache_clear_reap_list(mru);
 
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 }
 
 void
@@ -454,13 +458,13 @@ xfs_mru_cache_insert(
 	elem->key = key;
 	elem->value = value;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 
 	radix_tree_insert(&mru->store, key, elem);
 	radix_tree_preload_end();
 	_xfs_mru_cache_list_insert(mru, elem);
 
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 
 	return 0;
 }
@@ -483,14 +487,14 @@ xfs_mru_cache_remove(
 	if (!mru || !mru->lists)
 		return NULL;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 	elem = radix_tree_delete(&mru->store, key);
 	if (elem) {
 		value = elem->value;
 		list_del(&elem->list_node);
 	}
 
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 
 	if (elem)
 		kmem_zone_free(xfs_mru_elem_zone, elem);
@@ -528,6 +532,10 @@ xfs_mru_cache_delete(
  *
  * If the element isn't found, this function returns NULL and the spinlock is
  * released.  xfs_mru_cache_done() should NOT be called when this occurs.
+ *
+ * Because sparse isn't smart enough to know about conditional lock return
+ * status, we need to help it get it right by annotating the path that does
+ * not release the lock.
  */
 void *
 xfs_mru_cache_lookup(
@@ -540,14 +548,14 @@ xfs_mru_cache_lookup(
 	if (!mru || !mru->lists)
 		return NULL;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 	elem = radix_tree_lookup(&mru->store, key);
 	if (elem) {
 		list_del(&elem->list_node);
 		_xfs_mru_cache_list_insert(mru, elem);
-	}
-	else
-		mutex_spinunlock(&mru->lock, 0);
+		__release(mru_lock); /* help sparse not be stupid */
+	} else
+		spin_unlock(&mru->lock);
 
 	return elem ? elem->value : NULL;
 }
@@ -571,10 +579,12 @@ xfs_mru_cache_peek(
 	if (!mru || !mru->lists)
 		return NULL;
 
-	mutex_spinlock(&mru->lock);
+	spin_lock(&mru->lock);
 	elem = radix_tree_lookup(&mru->store, key);
 	if (!elem)
-		mutex_spinunlock(&mru->lock, 0);
+		spin_unlock(&mru->lock);
+	else
+		__release(mru_lock); /* help sparse not be stupid */
 
 	return elem ? elem->value : NULL;
 }
@@ -586,7 +596,7 @@ xfs_mru_cache_peek(
  */
 void
 xfs_mru_cache_done(
-	xfs_mru_cache_t	*mru)
+	xfs_mru_cache_t	*mru) __releases(mru->lock)
 {
-	mutex_spinunlock(&mru->lock, 0);
+	spin_unlock(&mru->lock);
 }

+ 3 - 4
fs/xfs/xfs_qmops.c

@@ -49,18 +49,17 @@ xfs_mount_reset_sbqflags(xfs_mount_t *mp)
 {
 	int			error;
 	xfs_trans_t		*tp;
-	unsigned long		s;
 
 	mp->m_qflags = 0;
 	/*
 	 * It is OK to look at sb_qflags here in mount path,
-	 * without SB_LOCK.
+	 * without m_sb_lock.
 	 */
 	if (mp->m_sb.sb_qflags == 0)
 		return 0;
-	s = XFS_SB_LOCK(mp);
+	spin_lock(&mp->m_sb_lock);
 	mp->m_sb.sb_qflags = 0;
-	XFS_SB_UNLOCK(mp, s);
+	spin_unlock(&mp->m_sb_lock);
 
 	/*
 	 * if the fs is readonly, let the incore superblock run

+ 5 - 4
fs/xfs/xfs_rename.c

@@ -39,6 +39,7 @@
 #include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
+#include "xfs_vnodeops.h"
 
 
 /*
@@ -118,7 +119,7 @@ xfs_lock_for_rename(
 	inum1 = ip1->i_ino;
 
 	ASSERT(ip1);
-	ITRACE(ip1);
+	xfs_itrace_ref(ip1);
 
 	/*
 	 * Unlock dp1 and lock dp2 if they are different.
@@ -141,7 +142,7 @@ xfs_lock_for_rename(
 		IRELE (ip1);
 		return error;
 	} else {
-		ITRACE(ip2);
+		xfs_itrace_ref(ip2);
 	}
 
 	/*
@@ -247,8 +248,8 @@ xfs_rename(
 	int		src_namelen = VNAMELEN(src_vname);
 	int		target_namelen = VNAMELEN(target_vname);
 
-	vn_trace_entry(src_dp, "xfs_rename", (inst_t *)__return_address);
-	vn_trace_entry(xfs_vtoi(target_dir_vp), "xfs_rename", (inst_t *)__return_address);
+	xfs_itrace_entry(src_dp);
+	xfs_itrace_entry(xfs_vtoi(target_dir_vp));
 
 	/*
 	 * Find the XFS behavior descriptor for the target directory

+ 7 - 12
fs/xfs/xfs_rtalloc.c

@@ -72,18 +72,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int,
  * Internal functions.
  */
 
-/*
- * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set.
- */
-STATIC int
-xfs_lowbit32(
-	__uint32_t	v)
-{
-	if (v)
-		return ffs(v) - 1;
-	return -1;
-}
-
 /*
  * Allocate space to the bitmap or summary file, and zero it, for growfs.
  */
@@ -444,6 +432,7 @@ xfs_rtallocate_extent_near(
 	}
 	bbno = XFS_BITTOBLOCK(mp, bno);
 	i = 0;
+	ASSERT(minlen != 0);
 	log2len = xfs_highbit32(minlen);
 	/*
 	 * Loop over all bitmap blocks (bbno + i is current block).
@@ -612,6 +601,8 @@ xfs_rtallocate_extent_size(
 	xfs_suminfo_t	sum;		/* summary information for extents */
 
 	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+	ASSERT(maxlen != 0);
+
 	/*
 	 * Loop over all the levels starting with maxlen.
 	 * At each level, look at all the bitmap blocks, to see if there
@@ -669,6 +660,9 @@ xfs_rtallocate_extent_size(
 		*rtblock = NULLRTBLOCK;
 		return 0;
 	}
+	ASSERT(minlen != 0);
+	ASSERT(maxlen != 0);
+
 	/*
 	 * Loop over sizes, from maxlen down to minlen.
 	 * This time, when we do the allocations, allow smaller ones
@@ -1954,6 +1948,7 @@ xfs_growfs_rt(
 				  nsbp->sb_blocksize * nsbp->sb_rextsize);
 		nsbp->sb_rextents = nsbp->sb_rblocks;
 		do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
+		ASSERT(nsbp->sb_rextents != 0);
 		nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
 		nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
 		nrsumsize =

+ 0 - 2
fs/xfs/xfs_rtalloc.h

@@ -21,8 +21,6 @@
 struct xfs_mount;
 struct xfs_trans;
 
-#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
-
 /* Min and max rt extent sizes, specified in bytes */
 #define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
 #define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64KB */

+ 2 - 10
fs/xfs/xfs_rw.h

@@ -32,18 +32,10 @@ struct xfs_mount;
 static inline xfs_daddr_t
 xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 {
-	return (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) ? \
+	return (XFS_IS_REALTIME_INODE(ip) ? \
 		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
 		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
 }
-#define XFS_FSB_TO_DB_IO(io,fsb) xfs_fsb_to_db_io(io,fsb)
-static inline xfs_daddr_t
-xfs_fsb_to_db_io(struct xfs_iocore *io, xfs_fsblock_t fsb)
-{
-	return (((io)->io_flags & XFS_IOCORE_RT) ? \
-		 XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
-		 XFS_FSB_TO_DADDR((io)->io_mount, (fsb)));
-}
 
 /*
  * Flags for xfs_free_eofblocks
@@ -61,7 +53,7 @@ xfs_get_extsz_hint(
 {
 	xfs_extlen_t	extsz;
 
-	if (unlikely(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+	if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
 		extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
 				? ip->i_d.di_extsize
 				: ip->i_mount->m_sb.sb_rextsize;

+ 3 - 4
fs/xfs/xfs_trans.c

@@ -1322,7 +1322,6 @@ xfs_trans_chunk_committed(
 	xfs_lsn_t		item_lsn;
 	struct xfs_mount	*mp;
 	int			i;
-	SPLDECL(s);
 
 	lidp = licp->lic_descs;
 	for (i = 0; i < licp->lic_unused; i++, lidp++) {
@@ -1363,7 +1362,7 @@ xfs_trans_chunk_committed(
 		 * the test below.
 		 */
 		mp = lip->li_mountp;
-		AIL_LOCK(mp,s);
+		spin_lock(&mp->m_ail_lock);
 		if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
 			/*
 			 * This will set the item's lsn to item_lsn
@@ -1372,9 +1371,9 @@ xfs_trans_chunk_committed(
 			 *
 			 * xfs_trans_update_ail() drops the AIL lock.
 			 */
-			xfs_trans_update_ail(mp, lip, item_lsn, s);
+			xfs_trans_update_ail(mp, lip, item_lsn);
 		} else {
-			AIL_UNLOCK(mp, s);
+			spin_unlock(&mp->m_ail_lock);
 		}
 
 		/*

+ 5 - 2
fs/xfs/xfs_trans.h

@@ -992,8 +992,9 @@ int		_xfs_trans_commit(xfs_trans_t *,
 				  int *);
 #define xfs_trans_commit(tp, flags)	_xfs_trans_commit(tp, flags, NULL)
 void		xfs_trans_cancel(xfs_trans_t *, int);
-void		xfs_trans_ail_init(struct xfs_mount *);
-xfs_lsn_t	xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
+int		xfs_trans_ail_init(struct xfs_mount *);
+void		xfs_trans_ail_destroy(struct xfs_mount *);
+void		xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
 xfs_lsn_t	xfs_trans_tail_ail(struct xfs_mount *);
 void		xfs_trans_unlocked_item(struct xfs_mount *,
 					xfs_log_item_t *);
@@ -1001,6 +1002,8 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
 					xfs_agnumber_t ag,
 					xfs_extlen_t idx);
 
+extern kmem_zone_t	*xfs_trans_zone;
+
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_TRANS_H__ */

+ 220 - 120
fs/xfs/xfs_trans_ail.c

@@ -34,9 +34,9 @@ STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *);
 STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *);
 
 #ifdef DEBUG
-STATIC void xfs_ail_check(xfs_ail_entry_t *);
+STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *);
 #else
-#define	xfs_ail_check(a)
+#define	xfs_ail_check(a,l)
 #endif /* DEBUG */
 
 
@@ -55,16 +55,15 @@ xfs_trans_tail_ail(
 {
 	xfs_lsn_t	lsn;
 	xfs_log_item_t	*lip;
-	SPLDECL(s);
 
-	AIL_LOCK(mp,s);
-	lip = xfs_ail_min(&(mp->m_ail));
+	spin_lock(&mp->m_ail_lock);
+	lip = xfs_ail_min(&(mp->m_ail.xa_ail));
 	if (lip == NULL) {
 		lsn = (xfs_lsn_t)0;
 	} else {
 		lsn = lip->li_lsn;
 	}
-	AIL_UNLOCK(mp, s);
+	spin_unlock(&mp->m_ail_lock);
 
 	return lsn;
 }
@@ -72,120 +71,185 @@ xfs_trans_tail_ail(
 /*
  * xfs_trans_push_ail
  *
- * This routine is called to move the tail of the AIL
- * forward.  It does this by trying to flush items in the AIL
- * whose lsns are below the given threshold_lsn.
+ * This routine is called to move the tail of the AIL forward.  It does this by
+ * trying to flush items in the AIL whose lsns are below the given
+ * threshold_lsn.
  *
- * The routine returns the lsn of the tail of the log.
+ * the push is run asynchronously in a separate thread, so we return the tail
+ * of the log right now instead of the tail after the push. This means we will
+ * either continue right away, or we will sleep waiting on the async thread to
+ * do it's work.
+ *
+ * We do this unlocked - we only need to know whether there is anything in the
+ * AIL at the time we are called. We don't need to access the contents of
+ * any of the objects, so the lock is not needed.
  */
-xfs_lsn_t
+void
 xfs_trans_push_ail(
 	xfs_mount_t		*mp,
 	xfs_lsn_t		threshold_lsn)
 {
-	xfs_lsn_t		lsn;
 	xfs_log_item_t		*lip;
-	int			gen;
-	int			restarts;
-	int			lock_result;
-	int			flush_log;
-	SPLDECL(s);
 
-#define	XFS_TRANS_PUSH_AIL_RESTARTS	1000
+	lip = xfs_ail_min(&mp->m_ail.xa_ail);
+	if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
+		if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
+			xfsaild_wakeup(mp, threshold_lsn);
+	}
+}
+
+/*
+ * Return the item in the AIL with the current lsn.
+ * Return the current tree generation number for use
+ * in calls to xfs_trans_next_ail().
+ */
+STATIC xfs_log_item_t *
+xfs_trans_first_push_ail(
+	xfs_mount_t	*mp,
+	int		*gen,
+	xfs_lsn_t	lsn)
+{
+	xfs_log_item_t	*lip;
+
+	lip = xfs_ail_min(&(mp->m_ail.xa_ail));
+	*gen = (int)mp->m_ail.xa_gen;
+	if (lsn == 0)
+		return lip;
+
+	while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0))
+		lip = lip->li_ail.ail_forw;
 
-	AIL_LOCK(mp,s);
-	lip = xfs_trans_first_ail(mp, &gen);
-	if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) {
+	return lip;
+}
+
+/*
+ * Function that does the work of pushing on the AIL
+ */
+long
+xfsaild_push(
+	xfs_mount_t	*mp,
+	xfs_lsn_t	*last_lsn)
+{
+	long		tout = 1000; /* milliseconds */
+	xfs_lsn_t	last_pushed_lsn = *last_lsn;
+	xfs_lsn_t	target =  mp->m_ail.xa_target;
+	xfs_lsn_t	lsn;
+	xfs_log_item_t	*lip;
+	int		gen;
+	int		restarts;
+	int		flush_log, count, stuck;
+
+#define	XFS_TRANS_PUSH_AIL_RESTARTS	10
+
+	spin_lock(&mp->m_ail_lock);
+	lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
+	if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
 		/*
-		 * Just return if the AIL is empty.
+		 * AIL is empty or our push has reached the end.
 		 */
-		AIL_UNLOCK(mp, s);
-		return (xfs_lsn_t)0;
+		spin_unlock(&mp->m_ail_lock);
+		last_pushed_lsn = 0;
+		goto out;
 	}
 
 	XFS_STATS_INC(xs_push_ail);
 
 	/*
 	 * While the item we are looking at is below the given threshold
-	 * try to flush it out.  Make sure to limit the number of times
-	 * we allow xfs_trans_next_ail() to restart scanning from the
-	 * beginning of the list.  We'd like not to stop until we've at least
+	 * try to flush it out. We'd like not to stop until we've at least
 	 * tried to push on everything in the AIL with an LSN less than
-	 * the given threshold. However, we may give up before that if
-	 * we realize that we've been holding the AIL_LOCK for 'too long',
-	 * blocking interrupts. Currently, too long is < 500us roughly.
+	 * the given threshold.
+	 *
+	 * However, we will stop after a certain number of pushes and wait
+	 * for a reduced timeout to fire before pushing further. This
+	 * prevents use from spinning when we can't do anything or there is
+	 * lots of contention on the AIL lists.
 	 */
-	flush_log = 0;
-	restarts = 0;
-	while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) &&
-		(XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) {
+	tout = 10;
+	lsn = lip->li_lsn;
+	flush_log = stuck = count = restarts = 0;
+	while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
+		int	lock_result;
 		/*
-		 * If we can lock the item without sleeping, unlock
-		 * the AIL lock and flush the item.  Then re-grab the
-		 * AIL lock so we can look for the next item on the
-		 * AIL.  Since we unlock the AIL while we flush the
-		 * item, the next routine may start over again at the
-		 * the beginning of the list if anything has changed.
-		 * That is what the generation count is for.
+		 * If we can lock the item without sleeping, unlock the AIL
+		 * lock and flush the item.  Then re-grab the AIL lock so we
+		 * can look for the next item on the AIL. List changes are
+		 * handled by the AIL lookup functions internally
 		 *
-		 * If we can't lock the item, either its holder will flush
-		 * it or it is already being flushed or it is being relogged.
-		 * In any of these case it is being taken care of and we
-		 * can just skip to the next item in the list.
+		 * If we can't lock the item, either its holder will flush it
+		 * or it is already being flushed or it is being relogged.  In
+		 * any of these case it is being taken care of and we can just
+		 * skip to the next item in the list.
 		 */
 		lock_result = IOP_TRYLOCK(lip);
+		spin_unlock(&mp->m_ail_lock);
 		switch (lock_result) {
-		      case XFS_ITEM_SUCCESS:
-			AIL_UNLOCK(mp, s);
+		case XFS_ITEM_SUCCESS:
 			XFS_STATS_INC(xs_push_ail_success);
 			IOP_PUSH(lip);
-			AIL_LOCK(mp,s);
+			last_pushed_lsn = lsn;
 			break;
 
-		      case XFS_ITEM_PUSHBUF:
-			AIL_UNLOCK(mp, s);
+		case XFS_ITEM_PUSHBUF:
 			XFS_STATS_INC(xs_push_ail_pushbuf);
-#ifdef XFSRACEDEBUG
-			delay_for_intr();
-			delay(300);
-#endif
-			ASSERT(lip->li_ops->iop_pushbuf);
-			ASSERT(lip);
 			IOP_PUSHBUF(lip);
-			AIL_LOCK(mp,s);
+			last_pushed_lsn = lsn;
 			break;
 
-		      case XFS_ITEM_PINNED:
+		case XFS_ITEM_PINNED:
 			XFS_STATS_INC(xs_push_ail_pinned);
+			stuck++;
 			flush_log = 1;
 			break;
 
-		      case XFS_ITEM_LOCKED:
+		case XFS_ITEM_LOCKED:
 			XFS_STATS_INC(xs_push_ail_locked);
+			last_pushed_lsn = lsn;
+			stuck++;
 			break;
 
-		      case XFS_ITEM_FLUSHING:
+		case XFS_ITEM_FLUSHING:
 			XFS_STATS_INC(xs_push_ail_flushing);
+			last_pushed_lsn = lsn;
+			stuck++;
 			break;
 
-		      default:
+		default:
 			ASSERT(0);
 			break;
 		}
 
-		lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
-		if (lip == NULL) {
+		spin_lock(&mp->m_ail_lock);
+		/* should we bother continuing? */
+		if (XFS_FORCED_SHUTDOWN(mp))
+			break;
+		ASSERT(mp->m_log);
+
+		count++;
+
+		/*
+		 * Are there too many items we can't do anything with?
+		 * If we we are skipping too many items because we can't flush
+		 * them or they are already being flushed, we back off and
+		 * given them time to complete whatever operation is being
+		 * done. i.e. remove pressure from the AIL while we can't make
+		 * progress so traversals don't slow down further inserts and
+		 * removals to/from the AIL.
+		 *
+		 * The value of 100 is an arbitrary magic number based on
+		 * observation.
+		 */
+		if (stuck > 100)
 			break;
-		}
-		if (XFS_FORCED_SHUTDOWN(mp)) {
-			/*
-			 * Just return if we shut down during the last try.
-			 */
-			AIL_UNLOCK(mp, s);
-			return (xfs_lsn_t)0;
-		}
 
+		lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
+		if (lip == NULL)
+			break;
+		if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
+			break;
+		lsn = lip->li_lsn;
 	}
+	spin_unlock(&mp->m_ail_lock);
 
 	if (flush_log) {
 		/*
@@ -193,22 +257,35 @@ xfs_trans_push_ail(
 		 * push out the log so it will become unpinned and
 		 * move forward in the AIL.
 		 */
-		AIL_UNLOCK(mp, s);
 		XFS_STATS_INC(xs_push_ail_flush);
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-		AIL_LOCK(mp, s);
 	}
 
-	lip = xfs_ail_min(&(mp->m_ail));
-	if (lip == NULL) {
-		lsn = (xfs_lsn_t)0;
-	} else {
-		lsn = lip->li_lsn;
+	/*
+	 * We reached the target so wait a bit longer for I/O to complete and
+	 * remove pushed items from the AIL before we start the next scan from
+	 * the start of the AIL.
+	 */
+	if ((XFS_LSN_CMP(lsn, target) >= 0)) {
+		tout += 20;
+		last_pushed_lsn = 0;
+	} else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
+		   (count && ((stuck * 100) / count > 90))) {
+		/*
+		 * Either there is a lot of contention on the AIL or we
+		 * are stuck due to operations in progress. "Stuck" in this
+		 * case is defined as >90% of the items we tried to push
+		 * were stuck.
+		 *
+		 * Backoff a bit more to allow some I/O to complete before
+		 * continuing from where we were.
+		 */
+		tout += 10;
 	}
-
-	AIL_UNLOCK(mp, s);
-	return lsn;
-}	/* xfs_trans_push_ail */
+out:
+	*last_lsn = last_pushed_lsn;
+	return tout;
+}	/* xfsaild_push */
 
 
 /*
@@ -249,7 +326,7 @@ xfs_trans_unlocked_item(
 	 * the call to xfs_log_move_tail() doesn't do anything if there's
 	 * not enough free space to wake people up so we're safe calling it.
 	 */
-	min_lip = xfs_ail_min(&mp->m_ail);
+	min_lip = xfs_ail_min(&mp->m_ail.xa_ail);
 
 	if (min_lip == lip)
 		xfs_log_move_tail(mp, 1);
@@ -269,21 +346,19 @@ xfs_trans_unlocked_item(
  * has changed.
  *
  * This function must be called with the AIL lock held.  The lock
- * is dropped before returning, so the caller must pass in the
- * cookie returned by AIL_LOCK.
+ * is dropped before returning.
  */
 void
 xfs_trans_update_ail(
 	xfs_mount_t	*mp,
 	xfs_log_item_t	*lip,
-	xfs_lsn_t	lsn,
-	unsigned long	s) __releases(mp->m_ail_lock)
+	xfs_lsn_t	lsn) __releases(mp->m_ail_lock)
 {
 	xfs_ail_entry_t		*ailp;
 	xfs_log_item_t		*dlip=NULL;
 	xfs_log_item_t		*mlip;	/* ptr to minimum lip */
 
-	ailp = &(mp->m_ail);
+	ailp = &(mp->m_ail.xa_ail);
 	mlip = xfs_ail_min(ailp);
 
 	if (lip->li_flags & XFS_LI_IN_AIL) {
@@ -296,14 +371,14 @@ xfs_trans_update_ail(
 	lip->li_lsn = lsn;
 
 	xfs_ail_insert(ailp, lip);
-	mp->m_ail_gen++;
+	mp->m_ail.xa_gen++;
 
 	if (mlip == dlip) {
-		mlip = xfs_ail_min(&(mp->m_ail));
-		AIL_UNLOCK(mp, s);
+		mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
+		spin_unlock(&mp->m_ail_lock);
 		xfs_log_move_tail(mp, mlip->li_lsn);
 	} else {
-		AIL_UNLOCK(mp, s);
+		spin_unlock(&mp->m_ail_lock);
 	}
 
 
@@ -322,21 +397,19 @@ xfs_trans_update_ail(
  * has changed.
  *
  * This function must be called with the AIL lock held.  The lock
- * is dropped before returning, so the caller must pass in the
- * cookie returned by AIL_LOCK.
+ * is dropped before returning.
  */
 void
 xfs_trans_delete_ail(
 	xfs_mount_t	*mp,
-	xfs_log_item_t	*lip,
-	unsigned long	s) __releases(mp->m_ail_lock)
+	xfs_log_item_t	*lip) __releases(mp->m_ail_lock)
 {
 	xfs_ail_entry_t		*ailp;
 	xfs_log_item_t		*dlip;
 	xfs_log_item_t		*mlip;
 
 	if (lip->li_flags & XFS_LI_IN_AIL) {
-		ailp = &(mp->m_ail);
+		ailp = &(mp->m_ail.xa_ail);
 		mlip = xfs_ail_min(ailp);
 		dlip = xfs_ail_delete(ailp, lip);
 		ASSERT(dlip == lip);
@@ -344,14 +417,14 @@ xfs_trans_delete_ail(
 
 		lip->li_flags &= ~XFS_LI_IN_AIL;
 		lip->li_lsn = 0;
-		mp->m_ail_gen++;
+		mp->m_ail.xa_gen++;
 
 		if (mlip == dlip) {
-			mlip = xfs_ail_min(&(mp->m_ail));
-			AIL_UNLOCK(mp, s);
+			mlip = xfs_ail_min(&(mp->m_ail.xa_ail));
+			spin_unlock(&mp->m_ail_lock);
 			xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
 		} else {
-			AIL_UNLOCK(mp, s);
+			spin_unlock(&mp->m_ail_lock);
 		}
 	}
 	else {
@@ -360,12 +433,12 @@ xfs_trans_delete_ail(
 		 * serious trouble if we get to this stage.
 		 */
 		if (XFS_FORCED_SHUTDOWN(mp))
-			AIL_UNLOCK(mp, s);
+			spin_unlock(&mp->m_ail_lock);
 		else {
 			xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
 		"%s: attempting to delete a log item that is not in the AIL",
 					__FUNCTION__);
-			AIL_UNLOCK(mp, s);
+			spin_unlock(&mp->m_ail_lock);
 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 		}
 	}
@@ -385,10 +458,10 @@ xfs_trans_first_ail(
 {
 	xfs_log_item_t	*lip;
 
-	lip = xfs_ail_min(&(mp->m_ail));
-	*gen = (int)mp->m_ail_gen;
+	lip = xfs_ail_min(&(mp->m_ail.xa_ail));
+	*gen = (int)mp->m_ail.xa_gen;
 
-	return (lip);
+	return lip;
 }
 
 /*
@@ -408,11 +481,11 @@ xfs_trans_next_ail(
 	xfs_log_item_t	*nlip;
 
 	ASSERT(mp && lip && gen);
-	if (mp->m_ail_gen == *gen) {
-		nlip = xfs_ail_next(&(mp->m_ail), lip);
+	if (mp->m_ail.xa_gen == *gen) {
+		nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip);
 	} else {
-		nlip = xfs_ail_min(&(mp->m_ail));
-		*gen = (int)mp->m_ail_gen;
+		nlip = xfs_ail_min(&(mp->m_ail).xa_ail);
+		*gen = (int)mp->m_ail.xa_gen;
 		if (restarts != NULL) {
 			XFS_STATS_INC(xs_push_ail_restarts);
 			(*restarts)++;
@@ -437,12 +510,20 @@ xfs_trans_next_ail(
 /*
  * Initialize the doubly linked list to point only to itself.
  */
-void
+int
 xfs_trans_ail_init(
 	xfs_mount_t	*mp)
 {
-	mp->m_ail.ail_forw = (xfs_log_item_t*)&(mp->m_ail);
-	mp->m_ail.ail_back = (xfs_log_item_t*)&(mp->m_ail);
+	mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail;
+	mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail;
+	return xfsaild_start(mp);
+}
+
+void
+xfs_trans_ail_destroy(
+	xfs_mount_t	*mp)
+{
+	xfsaild_stop(mp);
 }
 
 /*
@@ -482,7 +563,7 @@ xfs_ail_insert(
 	next_lip->li_ail.ail_forw = lip;
 	lip->li_ail.ail_forw->li_ail.ail_back = lip;
 
-	xfs_ail_check(base);
+	xfs_ail_check(base, lip);
 	return;
 }
 
@@ -496,12 +577,12 @@ xfs_ail_delete(
 	xfs_log_item_t	*lip)
 /* ARGSUSED */
 {
+	xfs_ail_check(base, lip);
 	lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back;
 	lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw;
 	lip->li_ail.ail_forw = NULL;
 	lip->li_ail.ail_back = NULL;
 
-	xfs_ail_check(base);
 	return lip;
 }
 
@@ -545,13 +626,13 @@ xfs_ail_next(
  */
 STATIC void
 xfs_ail_check(
-	xfs_ail_entry_t *base)
+	xfs_ail_entry_t *base,
+	xfs_log_item_t	*lip)
 {
-	xfs_log_item_t	*lip;
 	xfs_log_item_t	*prev_lip;
 
-	lip = base->ail_forw;
-	if (lip == (xfs_log_item_t*)base) {
+	prev_lip = base->ail_forw;
+	if (prev_lip == (xfs_log_item_t*)base) {
 		/*
 		 * Make sure the pointers are correct when the list
 		 * is empty.
@@ -560,10 +641,28 @@ xfs_ail_check(
 		return;
 	}
 
+	/*
+	 * Check the next and previous entries are valid.
+	 */
+	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
+	prev_lip = lip->li_ail.ail_back;
+	if (prev_lip != (xfs_log_item_t*)base) {
+		ASSERT(prev_lip->li_ail.ail_forw == lip);
+		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
+	}
+	prev_lip = lip->li_ail.ail_forw;
+	if (prev_lip != (xfs_log_item_t*)base) {
+		ASSERT(prev_lip->li_ail.ail_back == lip);
+		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
+	}
+
+
+#ifdef XFS_TRANS_DEBUG
 	/*
 	 * Walk the list checking forward and backward pointers,
 	 * lsn ordering, and that every entry has the XFS_LI_IN_AIL
-	 * flag set.
+	 * flag set. This is really expensive, so only do it when
+	 * specifically debugging the transaction subsystem.
 	 */
 	prev_lip = (xfs_log_item_t*)base;
 	while (lip != (xfs_log_item_t*)base) {
@@ -578,5 +677,6 @@ xfs_ail_check(
 	}
 	ASSERT(lip == (xfs_log_item_t*)base);
 	ASSERT(base->ail_back == prev_lip);
+#endif /* XFS_TRANS_DEBUG */
 }
 #endif /* DEBUG */

+ 1 - 0
fs/xfs/xfs_trans_item.c

@@ -21,6 +21,7 @@
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 
 STATIC int	xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
 					int, int, xfs_lsn_t);

+ 10 - 3
fs/xfs/xfs_trans_priv.h

@@ -47,15 +47,22 @@ xfs_log_busy_slot_t		*xfs_trans_add_busy(xfs_trans_t *tp,
  * From xfs_trans_ail.c
  */
 void			xfs_trans_update_ail(struct xfs_mount *mp,
-				     struct xfs_log_item *lip, xfs_lsn_t lsn,
-				     unsigned long s)
+				     struct xfs_log_item *lip, xfs_lsn_t lsn)
 				     __releases(mp->m_ail_lock);
 void			xfs_trans_delete_ail(struct xfs_mount *mp,
-				     struct xfs_log_item *lip, unsigned long s)
+				     struct xfs_log_item *lip)
 				     __releases(mp->m_ail_lock);
 struct xfs_log_item	*xfs_trans_first_ail(struct xfs_mount *, int *);
 struct xfs_log_item	*xfs_trans_next_ail(struct xfs_mount *,
 				     struct xfs_log_item *, int *, int *);
 
 
+/*
+ * AIL push thread support
+ */
+long	xfsaild_push(struct xfs_mount *, xfs_lsn_t *);
+void	xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t);
+int	xfsaild_start(struct xfs_mount *);
+void	xfsaild_stop(struct xfs_mount *);
+
 #endif	/* __XFS_TRANS_PRIV_H__ */

+ 6 - 5
fs/xfs/xfs_utils.c

@@ -73,7 +73,7 @@ xfs_dir_lookup_int(
 {
 	int		error;
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum);
 	if (!error) {
@@ -302,6 +302,7 @@ xfs_droplink(
 
 	ASSERT (ip->i_d.di_nlink > 0);
 	ip->i_d.di_nlink--;
+	drop_nlink(ip->i_vnode);
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
 	error = 0;
@@ -330,7 +331,6 @@ xfs_bump_ino_vers2(
 	xfs_inode_t	*ip)
 {
 	xfs_mount_t	*mp;
-	unsigned long		s;
 
 	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
@@ -340,13 +340,13 @@ xfs_bump_ino_vers2(
 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 	mp = tp->t_mountp;
 	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
-		s = XFS_SB_LOCK(mp);
+		spin_lock(&mp->m_sb_lock);
 		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
 			XFS_SB_VERSION_ADDNLINK(&mp->m_sb);
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
 		} else {
-			XFS_SB_UNLOCK(mp, s);
+			spin_unlock(&mp->m_sb_lock);
 		}
 	}
 	/* Caller must log the inode */
@@ -366,6 +366,7 @@ xfs_bumplink(
 
 	ASSERT(ip->i_d.di_nlink > 0);
 	ip->i_d.di_nlink++;
+	inc_nlink(ip->i_vnode);
 	if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) &&
 	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
 		/*

+ 0 - 2
fs/xfs/xfs_utils.h

@@ -20,8 +20,6 @@
 
 #define IRELE(ip)	VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)	VN_HOLD(XFS_ITOV(ip))
-#define	ITRACE(ip)	vn_trace_ref(ip, __FILE__, __LINE__, \
-				(inst_t *)__return_address)
 
 extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
 extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *,

+ 29 - 764
fs/xfs/xfs_vfsops.c

@@ -58,17 +58,12 @@
 #include "xfs_vfsops.h"
 
 
-int
+int __init
 xfs_init(void)
 {
-	extern kmem_zone_t	*xfs_bmap_free_item_zone;
-	extern kmem_zone_t	*xfs_btree_cur_zone;
-	extern kmem_zone_t	*xfs_trans_zone;
-	extern kmem_zone_t	*xfs_buf_item_zone;
-	extern kmem_zone_t	*xfs_dabuf_zone;
 #ifdef XFS_DABUF_DEBUG
-	extern lock_t	        xfs_dabuf_global_lock;
-	spinlock_init(&xfs_dabuf_global_lock, "xfsda");
+	extern spinlock_t        xfs_dabuf_global_lock;
+	spin_lock_init(&xfs_dabuf_global_lock);
 #endif
 
 	/*
@@ -152,18 +147,12 @@ xfs_init(void)
 	return 0;
 }
 
-void
+void __exit
 xfs_cleanup(void)
 {
-	extern kmem_zone_t	*xfs_bmap_free_item_zone;
-	extern kmem_zone_t	*xfs_btree_cur_zone;
 	extern kmem_zone_t	*xfs_inode_zone;
-	extern kmem_zone_t	*xfs_trans_zone;
-	extern kmem_zone_t	*xfs_da_state_zone;
-	extern kmem_zone_t	*xfs_dabuf_zone;
 	extern kmem_zone_t	*xfs_efd_zone;
 	extern kmem_zone_t	*xfs_efi_zone;
-	extern kmem_zone_t	*xfs_buf_item_zone;
 	extern kmem_zone_t	*xfs_icluster_zone;
 
 	xfs_cleanup_procfs();
@@ -449,8 +438,6 @@ xfs_mount(
 	if (error)
 		return error;
 
-	mp->m_io_ops = xfs_iocore_xfs;
-
 	if (args->flags & XFSMNT_QUIET)
 		flags |= XFS_MFSI_QUIET;
 
@@ -544,7 +531,7 @@ xfs_mount(
 	if ((error = xfs_filestream_mount(mp)))
 		goto error2;
 
-	error = XFS_IOINIT(mp, args, flags);
+	error = xfs_mountfs(mp, flags);
 	if (error)
 		goto error2;
 
@@ -694,7 +681,7 @@ xfs_quiesce_fs(
  * care of the metadata. New transactions are already blocked, so we need to
  * wait for any remaining transactions to drain out before proceding.
  */
-STATIC void
+void
 xfs_attr_quiesce(
 	xfs_mount_t	*mp)
 {
@@ -820,80 +807,6 @@ fscorrupt_out2:
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
-/*
- * xfs_root extracts the root vnode from a vfs.
- *
- * vfsp -- the vfs struct for the desired file system
- * vpp  -- address of the caller's vnode pointer which should be
- *         set to the desired fs root vnode
- */
-int
-xfs_root(
-	xfs_mount_t	*mp,
-	bhv_vnode_t	**vpp)
-{
-	bhv_vnode_t	*vp;
-
-	vp = XFS_ITOV(mp->m_rootip);
-	VN_HOLD(vp);
-	*vpp = vp;
-	return 0;
-}
-
-/*
- * xfs_statvfs
- *
- * Fill in the statvfs structure for the given file system.  We use
- * the superblock lock in the mount structure to ensure a consistent
- * snapshot of the counters returned.
- */
-int
-xfs_statvfs(
-	xfs_mount_t	*mp,
-	bhv_statvfs_t	*statp,
-	bhv_vnode_t	*vp)
-{
-	__uint64_t	fakeinos;
-	xfs_extlen_t	lsize;
-	xfs_sb_t	*sbp;
-	unsigned long	s;
-
-	sbp = &(mp->m_sb);
-
-	statp->f_type = XFS_SB_MAGIC;
-
-	xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
-	s = XFS_SB_LOCK(mp);
-	statp->f_bsize = sbp->sb_blocksize;
-	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-	statp->f_blocks = sbp->sb_dblocks - lsize;
-	statp->f_bfree = statp->f_bavail =
-				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-#if XFS_BIG_INUMS
-	fakeinos += mp->m_inoadd;
-#endif
-	statp->f_files =
-	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-	if (mp->m_maxicount)
-#if XFS_BIG_INUMS
-		if (!mp->m_inoadd)
-#endif
-			statp->f_files = min_t(typeof(statp->f_files),
-						statp->f_files,
-						mp->m_maxicount);
-	statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
-	XFS_SB_UNLOCK(mp, s);
-
-	xfs_statvfs_fsid(statp, mp);
-	statp->f_namelen = MAXNAMELEN - 1;
-
-	if (vp)
-		XFS_QM_DQSTATVFS(xfs_vtoi(vp), statp);
-	return 0;
-}
-
-
 /*
  * xfs_sync flushes any pending I/O to file system vfsp.
  *
@@ -981,8 +894,6 @@ xfs_sync_inodes(
 	int             *bypassed)
 {
 	xfs_inode_t	*ip = NULL;
-	xfs_inode_t	*ip_next;
-	xfs_buf_t	*bp;
 	bhv_vnode_t	*vp = NULL;
 	int		error;
 	int		last_error;
@@ -992,7 +903,6 @@ xfs_sync_inodes(
 	boolean_t	mount_locked;
 	boolean_t	vnode_refed;
 	int		preempt;
-	xfs_dinode_t	*dip;
 	xfs_iptr_t	*ipointer;
 #ifdef DEBUG
 	boolean_t	ipointer_in = B_FALSE;
@@ -1045,6 +955,8 @@ xfs_sync_inodes(
 
 #define XFS_PREEMPT_MASK	0x7f
 
+	ASSERT(!(flags & SYNC_BDFLUSH));
+
 	if (bypassed)
 		*bypassed = 0;
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
@@ -1057,7 +969,7 @@ xfs_sync_inodes(
 	ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
 
 	fflag = XFS_B_ASYNC;		/* default is don't wait */
-	if (flags & (SYNC_BDFLUSH | SYNC_DELWRI))
+	if (flags & SYNC_DELWRI)
 		fflag = XFS_B_DELWRI;
 	if (flags & SYNC_WAIT)
 		fflag = 0;		/* synchronous overrides all */
@@ -1146,24 +1058,6 @@ xfs_sync_inodes(
 			return 0;
 		}
 
-		/*
-		 * If this is just vfs_sync() or pflushd() calling
-		 * then we can skip inodes for which it looks like
-		 * there is nothing to do.  Since we don't have the
-		 * inode locked this is racy, but these are periodic
-		 * calls so it doesn't matter.  For the others we want
-		 * to know for sure, so we at least try to lock them.
-		 */
-		if (flags & SYNC_BDFLUSH) {
-			if (((ip->i_itemp == NULL) ||
-			     !(ip->i_itemp->ili_format.ilf_fields &
-			       XFS_ILOG_ALL)) &&
-			    (ip->i_update_core == 0)) {
-				ip = ip->i_mnext;
-				continue;
-			}
-		}
-
 		/*
 		 * Try to lock without sleeping.  We're out of order with
 		 * the inode list lock here, so if we fail we need to drop
@@ -1181,7 +1075,7 @@ xfs_sync_inodes(
 		 * it.
 		 */
 		if (xfs_ilock_nowait(ip, lock_flags) == 0) {
-			if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
+			if (vp == NULL) {
 				ip = ip->i_mnext;
 				continue;
 			}
@@ -1242,160 +1136,27 @@ xfs_sync_inodes(
 			xfs_ilock(ip, XFS_ILOCK_SHARED);
 		}
 
-		if (flags & SYNC_BDFLUSH) {
-			if ((flags & SYNC_ATTR) &&
-			    ((ip->i_update_core) ||
-			     ((ip->i_itemp != NULL) &&
-			      (ip->i_itemp->ili_format.ilf_fields != 0)))) {
-
-				/* Insert marker and drop lock if not already
-				 * done.
-				 */
-				if (mount_locked) {
-					IPOINTER_INSERT(ip, mp);
-				}
-
-				/*
-				 * We don't want the periodic flushing of the
-				 * inodes by vfs_sync() to interfere with
-				 * I/O to the file, especially read I/O
-				 * where it is only the access time stamp
-				 * that is being flushed out.  To prevent
-				 * long periods where we have both inode
-				 * locks held shared here while reading the
-				 * inode's buffer in from disk, we drop the
-				 * inode lock while reading in the inode
-				 * buffer.  We have to release the buffer
-				 * and reacquire the inode lock so that they
-				 * are acquired in the proper order (inode
-				 * locks first).  The buffer will go at the
-				 * end of the lru chain, though, so we can
-				 * expect it to still be there when we go
-				 * for it again in xfs_iflush().
-				 */
-				if ((xfs_ipincount(ip) == 0) &&
-				    xfs_iflock_nowait(ip)) {
-
-					xfs_ifunlock(ip);
-					xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-					error = xfs_itobp(mp, NULL, ip,
-							  &dip, &bp, 0, 0);
-					if (!error) {
-						xfs_buf_relse(bp);
-					} else {
-						/* Bailing out, remove the
-						 * marker and free it.
-						 */
-						XFS_MOUNT_ILOCK(mp);
-						IPOINTER_REMOVE(ip, mp);
-						XFS_MOUNT_IUNLOCK(mp);
-
-						ASSERT(!(lock_flags &
-							XFS_IOLOCK_SHARED));
-
-						kmem_free(ipointer,
-							sizeof(xfs_iptr_t));
-						return (0);
-					}
-
-					/*
-					 * Since we dropped the inode lock,
-					 * the inode may have been reclaimed.
-					 * Therefore, we reacquire the mount
-					 * lock and check to see if we were the
-					 * inode reclaimed. If this happened
-					 * then the ipointer marker will no
-					 * longer point back at us. In this
-					 * case, move ip along to the inode
-					 * after the marker, remove the marker
-					 * and continue.
-					 */
-					XFS_MOUNT_ILOCK(mp);
-					mount_locked = B_TRUE;
-
-					if (ip != ipointer->ip_mprev) {
-						IPOINTER_REMOVE(ip, mp);
-
-						ASSERT(!vnode_refed);
-						ASSERT(!(lock_flags &
-							XFS_IOLOCK_SHARED));
-						continue;
-					}
-
-					ASSERT(ip->i_mount == mp);
-
-					if (xfs_ilock_nowait(ip,
-						    XFS_ILOCK_SHARED) == 0) {
-						ASSERT(ip->i_mount == mp);
-						/*
-						 * We failed to reacquire
-						 * the inode lock without
-						 * sleeping, so just skip
-						 * the inode for now.  We
-						 * clear the ILOCK bit from
-						 * the lock_flags so that we
-						 * won't try to drop a lock
-						 * we don't hold below.
-						 */
-						lock_flags &= ~XFS_ILOCK_SHARED;
-						IPOINTER_REMOVE(ip_next, mp);
-					} else if ((xfs_ipincount(ip) == 0) &&
-						   xfs_iflock_nowait(ip)) {
-						ASSERT(ip->i_mount == mp);
-						/*
-						 * Since this is vfs_sync()
-						 * calling we only flush the
-						 * inode out if we can lock
-						 * it without sleeping and
-						 * it is not pinned.  Drop
-						 * the mount lock here so
-						 * that we don't hold it for
-						 * too long. We already have
-						 * a marker in the list here.
-						 */
-						XFS_MOUNT_IUNLOCK(mp);
-						mount_locked = B_FALSE;
-						error = xfs_iflush(ip,
-							   XFS_IFLUSH_DELWRI);
-					} else {
-						ASSERT(ip->i_mount == mp);
-						IPOINTER_REMOVE(ip_next, mp);
-					}
-				}
-
-			}
+		if ((flags & SYNC_ATTR) &&
+		    (ip->i_update_core ||
+		     (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
+			if (mount_locked)
+				IPOINTER_INSERT(ip, mp);
 
-		} else {
-			if ((flags & SYNC_ATTR) &&
-			    ((ip->i_update_core) ||
-			     ((ip->i_itemp != NULL) &&
-			      (ip->i_itemp->ili_format.ilf_fields != 0)))) {
-				if (mount_locked) {
-					IPOINTER_INSERT(ip, mp);
-				}
+			if (flags & SYNC_WAIT) {
+				xfs_iflock(ip);
+				error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
 
-				if (flags & SYNC_WAIT) {
-					xfs_iflock(ip);
-					error = xfs_iflush(ip,
-							   XFS_IFLUSH_SYNC);
-				} else {
-					/*
-					 * If we can't acquire the flush
-					 * lock, then the inode is already
-					 * being flushed so don't bother
-					 * waiting.  If we can lock it then
-					 * do a delwri flush so we can
-					 * combine multiple inode flushes
-					 * in each disk write.
-					 */
-					if (xfs_iflock_nowait(ip)) {
-						error = xfs_iflush(ip,
-							   XFS_IFLUSH_DELWRI);
-					}
-					else if (bypassed)
-						(*bypassed)++;
-				}
+			/*
+			 * If we can't acquire the flush lock, then the inode
+			 * is already being flushed so don't bother waiting.
+			 *
+			 * If we can lock it then do a delwri flush so we can
+			 * combine multiple inode flushes in each disk write.
+			 */
+			} else if (xfs_iflock_nowait(ip)) {
+				error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
+			} else if (bypassed) {
+				(*bypassed)++;
 			}
 		}
 
@@ -1627,499 +1388,3 @@ xfs_syncsub(
 
 	return XFS_ERROR(last_error);
 }
-
-/*
- * xfs_vget - called by DMAPI and NFSD to get vnode from file handle
- */
-int
-xfs_vget(
-	xfs_mount_t	*mp,
-	bhv_vnode_t	**vpp,
-	xfs_fid_t	*xfid)
-{
-	xfs_inode_t	*ip;
-	int		error;
-	xfs_ino_t	ino;
-	unsigned int	igen;
-
-	/*
-	 * Invalid.  Since handles can be created in user space and passed in
-	 * via gethandle(), this is not cause for a panic.
-	 */
-	if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len))
-		return XFS_ERROR(EINVAL);
-
-	ino  = xfid->fid_ino;
-	igen = xfid->fid_gen;
-
-	/*
-	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
-	 */
-	if (ino == 0)
-		return XFS_ERROR(ESTALE);
-
-	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
-	if (error) {
-		*vpp = NULL;
-		return error;
-	}
-
-	if (ip == NULL) {
-		*vpp = NULL;
-		return XFS_ERROR(EIO);
-	}
-
-	if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
-		xfs_iput_new(ip, XFS_ILOCK_SHARED);
-		*vpp = NULL;
-		return XFS_ERROR(ENOENT);
-	}
-
-	*vpp = XFS_ITOV(ip);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	return 0;
-}
-
-
-#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
-#define MNTOPT_LOGDEV	"logdev"	/* log device */
-#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
-#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
-#define MNTOPT_INO64	"ino64"		/* force inodes into 64-bit range */
-#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
-#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
-#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
-#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
-#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
-#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
-#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
-#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
-					 * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
-#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
-#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
-#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
-					 * in stat(). */
-#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
-#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
-#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
-#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
-#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
-#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
-#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
-#define MNTOPT_DMAPI	"dmapi"		/* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_XDSM	"xdsm"		/* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_DMI	"dmi"		/* DMI enabled (DMAPI / XDSM) */
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
-	int	last, shift_left_factor = 0;
-	char	*value = s;
-
-	last = strlen(value) - 1;
-	if (value[last] == 'K' || value[last] == 'k') {
-		shift_left_factor = 10;
-		value[last] = '\0';
-	}
-	if (value[last] == 'M' || value[last] == 'm') {
-		shift_left_factor = 20;
-		value[last] = '\0';
-	}
-	if (value[last] == 'G' || value[last] == 'g') {
-		shift_left_factor = 30;
-		value[last] = '\0';
-	}
-
-	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-int
-xfs_parseargs(
-	struct xfs_mount	*mp,
-	char			*options,
-	struct xfs_mount_args	*args,
-	int			update)
-{
-	char			*this_char, *value, *eov;
-	int			dsunit, dswidth, vol_dsunit, vol_dswidth;
-	int			iosize;
-	int			ikeep = 0;
-
-	args->flags |= XFSMNT_BARRIER;
-	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-
-	if (!options)
-		goto done;
-
-	iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
-
-	while ((this_char = strsep(&options, ",")) != NULL) {
-		if (!*this_char)
-			continue;
-		if ((value = strchr(this_char, '=')) != NULL)
-			*value++ = 0;
-
-		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			args->logbufs = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			args->logbufsize = suffix_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			strncpy(args->logname, value, MAXNAMELEN);
-		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			strncpy(args->mtpt, value, MAXNAMELEN);
-		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			strncpy(args->rtname, value, MAXNAMELEN);
-		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			iosize = simple_strtoul(value, &eov, 10);
-			args->flags |= XFSMNT_IOSIZE;
-			args->iosizelog = (uint8_t) iosize;
-		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			iosize = suffix_strtoul(value, &eov, 10);
-			args->flags |= XFSMNT_IOSIZE;
-			args->iosizelog = ffs(iosize) - 1;
-		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
-			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
-			mp->m_flags |= XFS_MOUNT_GRPID;
-		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
-			mp->m_flags &= ~XFS_MOUNT_GRPID;
-		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-			args->flags |= XFSMNT_WSYNC;
-		} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
-			args->flags |= XFSMNT_OSYNCISOSYNC;
-		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-			args->flags |= XFSMNT_NORECOVERY;
-		} else if (!strcmp(this_char, MNTOPT_INO64)) {
-			args->flags |= XFSMNT_INO64;
-#if !XFS_BIG_INUMS
-			cmn_err(CE_WARN,
-				"XFS: %s option not allowed on this system",
-				this_char);
-			return EINVAL;
-#endif
-		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-			args->flags |= XFSMNT_NOALIGN;
-		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-			args->flags |= XFSMNT_SWALLOC;
-		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			dsunit = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-			if (!value || !*value) {
-				cmn_err(CE_WARN,
-					"XFS: %s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			dswidth = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-			args->flags &= ~XFSMNT_32BITINODES;
-#if !XFS_BIG_INUMS
-			cmn_err(CE_WARN,
-				"XFS: %s option not allowed on this system",
-				this_char);
-			return EINVAL;
-#endif
-		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-			args->flags |= XFSMNT_NOUUID;
-		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-			args->flags |= XFSMNT_BARRIER;
-		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-			args->flags &= ~XFSMNT_BARRIER;
-		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-			ikeep = 1;
-			args->flags &= ~XFSMNT_IDELETE;
-		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-			args->flags |= XFSMNT_IDELETE;
-		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-			args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE;
-		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-			args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-			args->flags |= XFSMNT_ATTR2;
-		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-			args->flags &= ~XFSMNT_ATTR2;
-		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-			args->flags2 |= XFSMNT2_FILESTREAMS;
-		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-			args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA);
-			args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA);
-		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-			   !strcmp(this_char, MNTOPT_UQUOTA) ||
-			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
-			args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-			args->flags |= XFSMNT_UQUOTA;
-			args->flags &= ~XFSMNT_UQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-			args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-			args->flags |= XFSMNT_PQUOTA;
-			args->flags &= ~XFSMNT_PQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-			args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-			args->flags |= XFSMNT_GQUOTA;
-			args->flags &= ~XFSMNT_GQUOTAENF;
-		} else if (!strcmp(this_char, MNTOPT_DMAPI)) {
-			args->flags |= XFSMNT_DMAPI;
-		} else if (!strcmp(this_char, MNTOPT_XDSM)) {
-			args->flags |= XFSMNT_DMAPI;
-		} else if (!strcmp(this_char, MNTOPT_DMI)) {
-			args->flags |= XFSMNT_DMAPI;
-		} else if (!strcmp(this_char, "ihashsize")) {
-			cmn_err(CE_WARN,
-	"XFS: ihashsize no longer used, option is deprecated.");
-		} else if (!strcmp(this_char, "osyncisdsync")) {
-			/* no-op, this is now the default */
-			cmn_err(CE_WARN,
-	"XFS: osyncisdsync is now the default, option is deprecated.");
-		} else if (!strcmp(this_char, "irixsgid")) {
-			cmn_err(CE_WARN,
-	"XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
-		} else {
-			cmn_err(CE_WARN,
-				"XFS: unknown mount option [%s].", this_char);
-			return EINVAL;
-		}
-	}
-
-	if (args->flags & XFSMNT_NORECOVERY) {
-		if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) {
-			cmn_err(CE_WARN,
-				"XFS: no-recovery mounts must be read-only.");
-			return EINVAL;
-		}
-	}
-
-	if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) {
-		cmn_err(CE_WARN,
-	"XFS: sunit and swidth options incompatible with the noalign option");
-		return EINVAL;
-	}
-
-	if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) {
-		cmn_err(CE_WARN,
-			"XFS: cannot mount with both project and group quota");
-		return EINVAL;
-	}
-
-	if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') {
-		printk("XFS: %s option needs the mount point option as well\n",
-			MNTOPT_DMAPI);
-		return EINVAL;
-	}
-
-	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-		cmn_err(CE_WARN,
-			"XFS: sunit and swidth must be specified together");
-		return EINVAL;
-	}
-
-	if (dsunit && (dswidth % dsunit != 0)) {
-		cmn_err(CE_WARN,
-	"XFS: stripe width (%d) must be a multiple of the stripe unit (%d)",
-			dswidth, dsunit);
-		return EINVAL;
-	}
-
-	/*
-	 * Applications using DMI filesystems often expect the
-	 * inode generation number to be monotonically increasing.
-	 * If we delete inode chunks we break this assumption, so
-	 * keep unused inode chunks on disk for DMI filesystems
-	 * until we come up with a better solution.
-	 * Note that if "ikeep" or "noikeep" mount options are
-	 * supplied, then they are honored.
-	 */
-	if (!(args->flags & XFSMNT_DMAPI) && !ikeep)
-		args->flags |= XFSMNT_IDELETE;
-
-	if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
-		if (dsunit) {
-			args->sunit = dsunit;
-			args->flags |= XFSMNT_RETERR;
-		} else {
-			args->sunit = vol_dsunit;
-		}
-		dswidth ? (args->swidth = dswidth) :
-			  (args->swidth = vol_dswidth);
-	} else {
-		args->sunit = args->swidth = 0;
-	}
-
-done:
-	if (args->flags & XFSMNT_32BITINODES)
-		mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-	if (args->flags2)
-		args->flags |= XFSMNT_FLAGS2;
-	return 0;
-}
-
-int
-xfs_showargs(
-	struct xfs_mount	*mp,
-	struct seq_file		*m)
-{
-	static struct proc_xfs_info {
-		int	flag;
-		char	*str;
-	} xfs_info[] = {
-		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
-		{ XFS_MOUNT_INO64,		"," MNTOPT_INO64 },
-		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
-		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
-		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
-		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
-		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
-		{ 0, NULL }
-	};
-	struct proc_xfs_info	*xfs_infop;
-
-	for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
-		if (mp->m_flags & xfs_infop->flag)
-			seq_puts(m, xfs_infop->str);
-	}
-
-	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
-				(int)(1 << mp->m_writeio_log) >> 10);
-
-	if (mp->m_logbufs > 0)
-		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-	if (mp->m_logbsize > 0)
-		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
-	if (mp->m_logname)
-		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-	if (mp->m_rtname)
-		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
-	if (mp->m_dalign > 0)
-		seq_printf(m, "," MNTOPT_SUNIT "=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-	if (mp->m_swidth > 0)
-		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-	if (!(mp->m_flags & XFS_MOUNT_IDELETE))
-		seq_printf(m, "," MNTOPT_IKEEP);
-	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
-		seq_printf(m, "," MNTOPT_LARGEIO);
-
-	if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS))
-		seq_printf(m, "," MNTOPT_64BITINODE);
-	if (mp->m_flags & XFS_MOUNT_GRPID)
-		seq_printf(m, "," MNTOPT_GRPID);
-
-	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_UQUOTA_ENFD)
-			seq_puts(m, "," MNTOPT_USRQUOTA);
-		else
-			seq_puts(m, "," MNTOPT_UQUOTANOENF);
-	}
-
-	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_OQUOTA_ENFD)
-			seq_puts(m, "," MNTOPT_PRJQUOTA);
-		else
-			seq_puts(m, "," MNTOPT_PQUOTANOENF);
-	}
-
-	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_OQUOTA_ENFD)
-			seq_puts(m, "," MNTOPT_GRPQUOTA);
-		else
-			seq_puts(m, "," MNTOPT_GQUOTANOENF);
-	}
-
-	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-		seq_puts(m, "," MNTOPT_NOQUOTA);
-
-	if (mp->m_flags & XFS_MOUNT_DMAPI)
-		seq_puts(m, "," MNTOPT_DMAPI);
-	return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of themetadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-void
-xfs_freeze(
-	xfs_mount_t	*mp)
-{
-	xfs_attr_quiesce(mp);
-	xfs_fs_log_dummy(mp);
-}

+ 1 - 8
fs/xfs/xfs_vfsops.h

@@ -13,16 +13,9 @@ int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
 int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
 int xfs_mntupdate(struct xfs_mount *mp, int *flags,
 		struct xfs_mount_args *args);
-int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp);
-int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp,
-		bhv_vnode_t *vp);
 int xfs_sync(struct xfs_mount *mp, int flags);
-int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid);
-int xfs_parseargs(struct xfs_mount *mp, char *options,
-		struct xfs_mount_args *args, int update);
-int xfs_showargs(struct xfs_mount *mp, struct seq_file *m);
-void xfs_freeze(struct xfs_mount *mp);
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 		int lnnum);
+void xfs_attr_quiesce(struct xfs_mount *mp);
 
 #endif /* _XFS_VFSOPS_H */

+ 47 - 118
fs/xfs/xfs_vnodeops.c

@@ -88,7 +88,7 @@ xfs_getattr(
 	bhv_vnode_t	*vp = XFS_ITOV(ip);
 	xfs_mount_t	*mp = ip->i_mount;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -136,7 +136,7 @@ xfs_getattr(
 	default:
 		vap->va_rdev = 0;
 
-		if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+		if (!(XFS_IS_REALTIME_INODE(ip))) {
 			vap->va_blocksize = xfs_preferred_iosize(mp);
 		} else {
 
@@ -228,7 +228,7 @@ xfs_setattr(
 	int			file_owner;
 	int			need_iolock = 1;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return XFS_ERROR(EROFS);
@@ -508,7 +508,7 @@ xfs_setattr(
 		 */
 		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
 		    (mask & XFS_AT_XFLAGS) &&
-		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
+		    (XFS_IS_REALTIME_INODE(ip)) !=
 		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
 			code = XFS_ERROR(EINVAL);	/* EFBIG? */
 			goto error_return;
@@ -520,7 +520,7 @@ xfs_setattr(
 		if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
 			xfs_extlen_t	size;
 
-			if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
+			if (XFS_IS_REALTIME_INODE(ip) ||
 			    ((mask & XFS_AT_XFLAGS) &&
 			    (vap->va_xflags & XFS_XFLAG_REALTIME))) {
 				size = mp->m_sb.sb_rextsize <<
@@ -804,12 +804,8 @@ xfs_setattr(
 				if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
 					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
 			} else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
-				if (vap->va_xflags & XFS_XFLAG_REALTIME) {
+				if (vap->va_xflags & XFS_XFLAG_REALTIME)
 					di_flags |= XFS_DIFLAG_REALTIME;
-					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
-				} else {
-					ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
-				}
 				if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
 					di_flags |= XFS_DIFLAG_EXTSIZE;
 			}
@@ -902,28 +898,6 @@ xfs_setattr(
 	return code;
 }
 
-
-/*
- * xfs_access
- * Null conversion from vnode mode bits to inode mode bits, as in efs.
- */
-int
-xfs_access(
-	xfs_inode_t	*ip,
-	int		mode,
-	cred_t		*credp)
-{
-	int		error;
-
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = xfs_iaccess(ip, mode, credp);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	return error;
-}
-
-
 /*
  * The maximum pathlen is 1024 bytes. Since the minimum file system
  * blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -987,7 +961,7 @@ xfs_readlink(
 	int		pathlen;
 	int		error = 0;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -1033,7 +1007,7 @@ xfs_fsync(
 	int		error;
 	int		log_flushed = 0, changed = 1;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	ASSERT(start >= 0 && stop >= -1);
 
@@ -1149,7 +1123,7 @@ xfs_fsync(
 		 * If this inode is on the RT dev we need to flush that
 		 * cache as well.
 		 */
-		if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
+		if (XFS_IS_REALTIME_INODE(ip))
 			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
 	}
 
@@ -1188,7 +1162,7 @@ xfs_free_eofblocks(
 
 	nimaps = 1;
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = XFS_BMAPI(mp, NULL, &ip->i_iocore, end_fsb, map_len, 0,
+	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
 			  NULL, 0, &imap, &nimaps, NULL, NULL);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
@@ -1562,9 +1536,6 @@ xfs_release(
 			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
 			if (error)
 				return error;
-			/* Update linux inode block count after free above */
-			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
-				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 	}
 
@@ -1592,7 +1563,7 @@ xfs_inactive(
 	int		error;
 	int		truncate;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	/*
 	 * If the inode is already free, then there can be nothing
@@ -1638,9 +1609,6 @@ xfs_inactive(
 			error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK);
 			if (error)
 				return VN_INACTIVE_CACHE;
-			/* Update linux inode block count after free above */
-			vn_to_inode(vp)->i_blocks = XFS_FSB_TO_BB(mp,
-				ip->i_d.di_nblocks + ip->i_delayed_blks);
 		}
 		goto out;
 	}
@@ -1805,7 +1773,7 @@ xfs_lookup(
 	int			error;
 	uint			lock_mode;
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return XFS_ERROR(EIO);
@@ -1814,7 +1782,7 @@ xfs_lookup(
 	error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip);
 	if (!error) {
 		*vpp = XFS_ITOV(ip);
-		ITRACE(ip);
+		xfs_itrace_ref(ip);
 	}
 	xfs_iunlock_map_shared(dp, lock_mode);
 	return error;
@@ -1848,7 +1816,7 @@ xfs_create(
 	int			namelen;
 
 	ASSERT(!*vpp);
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	namelen = VNAMELEN(dentry);
 
@@ -1930,7 +1898,7 @@ xfs_create(
 			goto error_return;
 		goto abort_return;
 	}
-	ITRACE(ip);
+	xfs_itrace_ref(ip);
 
 	/*
 	 * At this point, we've gotten a newly allocated inode.
@@ -2098,7 +2066,7 @@ again:
 
 	e_inum = ip->i_ino;
 
-	ITRACE(ip);
+	xfs_itrace_ref(ip);
 
 	/*
 	 * We want to lock in increasing inum. Since we've already
@@ -2321,7 +2289,7 @@ xfs_remove(
 	uint			resblks;
 	int			namelen;
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -2364,9 +2332,8 @@ xfs_remove(
 
 	dm_di_mode = ip->i_d.di_mode;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-
-	ITRACE(ip);
+	xfs_itrace_entry(ip);
+	xfs_itrace_ref(ip);
 
 	error = XFS_QM_DQATTACH(mp, dp, 0);
 	if (!error && dp != ip)
@@ -2498,8 +2465,7 @@ xfs_remove(
 	if (link_zero && xfs_inode_is_filestream(ip))
 		xfs_filestream_deassociate(ip);
 
-	vn_trace_exit(ip, __FUNCTION__, (inst_t *)__return_address);
-
+	xfs_itrace_exit(ip);
 	IRELE(ip);
 
 /*	Fall through to std_return with error = 0 */
@@ -2562,8 +2528,8 @@ xfs_link(
 	char			*target_name = VNAME(dentry);
 	int			target_namelen;
 
-	vn_trace_entry(tdp, __FUNCTION__, (inst_t *)__return_address);
-	vn_trace_entry(xfs_vtoi(src_vp), __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(tdp);
+	xfs_itrace_entry(xfs_vtoi(src_vp));
 
 	target_namelen = VNAMELEN(dentry);
 	ASSERT(!VN_ISDIR(src_vp));
@@ -2744,7 +2710,7 @@ xfs_mkdir(
 
 	/* Return through std_return after this point. */
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	mp = dp->i_mount;
 	udqp = gdqp = NULL;
@@ -2810,7 +2776,7 @@ xfs_mkdir(
 			goto error_return;
 		goto abort_return;
 	}
-	ITRACE(cdp);
+	xfs_itrace_ref(cdp);
 
 	/*
 	 * Now we add the directory inode to the transaction.
@@ -2936,7 +2902,7 @@ xfs_rmdir(
 	int			last_cdp_link;
 	uint			resblks;
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -3041,7 +3007,7 @@ xfs_rmdir(
 		VN_HOLD(dir_vp);
 	}
 
-	ITRACE(cdp);
+	xfs_itrace_ref(cdp);
 	xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
 
 	ASSERT(cdp->i_d.di_nlink >= 2);
@@ -3189,8 +3155,7 @@ xfs_symlink(
 	ip = NULL;
 	tp = NULL;
 
-	vn_trace_entry(dp, __FUNCTION__, (inst_t *)__return_address);
-
+	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -3317,7 +3282,7 @@ xfs_symlink(
 			goto error_return;
 		goto error1;
 	}
-	ITRACE(ip);
+	xfs_itrace_ref(ip);
 
 	/*
 	 * An error after we've joined dp to the transaction will result in the
@@ -3465,27 +3430,6 @@ std_return:
 	goto std_return;
 }
 
-
-int
-xfs_fid2(
-	xfs_inode_t	*ip,
-	xfs_fid_t	*xfid)
-{
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-
-	xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len);
-	xfid->fid_pad = 0;
-	/*
-	 * use memcpy because the inode is a long long and there's no
-	 * assurance that xfid->fid_ino is properly aligned.
-	 */
-	memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino));
-	xfid->fid_gen = ip->i_d.di_gen;
-
-	return 0;
-}
-
-
 int
 xfs_rwlock(
 	xfs_inode_t	*ip,
@@ -3558,11 +3502,11 @@ xfs_inode_flush(
 		if (iip && iip->ili_last_lsn) {
 			xlog_t		*log = mp->m_log;
 			xfs_lsn_t	sync_lsn;
-			int		s, log_flags = XFS_LOG_FORCE;
+			int		log_flags = XFS_LOG_FORCE;
 
-			s = GRANT_LOCK(log);
+			spin_lock(&log->l_grant_lock);
 			sync_lsn = log->l_last_sync_lsn;
-			GRANT_UNLOCK(log, s);
+			spin_unlock(&log->l_grant_lock);
 
 			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
 				if (flags & FLUSH_SYNC)
@@ -3637,8 +3581,8 @@ xfs_set_dmattrs(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
-	ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask;
-	ip->i_iocore.io_dmstate  = ip->i_d.di_dmstate  = state;
+	ip->i_d.di_dmevmask = evmask;
+	ip->i_d.di_dmstate  = state;
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	IHOLD(ip);
@@ -3653,7 +3597,7 @@ xfs_reclaim(
 {
 	bhv_vnode_t	*vp = XFS_ITOV(ip);
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	ASSERT(!VN_MAPPED(vp));
 
@@ -3871,7 +3815,7 @@ xfs_alloc_file_space(
 	int			committed;
 	int			error;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
@@ -3976,7 +3920,7 @@ retry:
 		 * Issue the xfs_bmapi() call to allocate the blocks
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
-		error = XFS_BMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
+		error = xfs_bmapi(tp, ip, startoffset_fsb,
 				  allocatesize_fsb, bmapi_flag,
 				  &firstfsb, 0, imapp, &nimaps,
 				  &free_list, NULL);
@@ -4052,13 +3996,13 @@ xfs_zero_remaining_bytes(
 	int			error = 0;
 
 	bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
-				ip->i_d.di_flags & XFS_DIFLAG_REALTIME ?
+				XFS_IS_REALTIME_INODE(ip) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp);
 
 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
 		offset_fsb = XFS_B_TO_FSBT(mp, offset);
 		nimap = 1;
-		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, offset_fsb, 1, 0,
+		error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
 			NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error || nimap < 1)
 			break;
@@ -4141,7 +4085,7 @@ xfs_free_file_space(
 	vp = XFS_ITOV(ip);
 	mp = ip->i_mount;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
@@ -4149,7 +4093,7 @@ xfs_free_file_space(
 	error = 0;
 	if (len <= 0)	/* if nothing being freed */
 		return error;
-	rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+	rt = XFS_IS_REALTIME_INODE(ip);
 	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
 	end_dmi_offset = offset + len;
 	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
@@ -4172,15 +4116,12 @@ xfs_free_file_space(
 		vn_iowait(ip);	/* wait for the completion of any pending DIOs */
 	}
 
-	rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, NBPP);
+	rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
 	ioffset = offset & ~(rounding - 1);
 
 	if (VN_CACHED(vp) != 0) {
-		xfs_inval_cached_trace(&ip->i_iocore, ioffset, -1,
-				ctooff(offtoct(ioffset)), -1);
-		error = xfs_flushinval_pages(ip,
-				ctooff(offtoct(ioffset)),
-				-1, FI_REMAPF_LOCKED);
+		xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1);
+		error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED);
 		if (error)
 			goto out_unlock_iolock;
 	}
@@ -4193,7 +4134,7 @@ xfs_free_file_space(
 	 */
 	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
 		nimap = 1;
-		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, startoffset_fsb,
+		error = xfs_bmapi(NULL, ip, startoffset_fsb,
 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error)
 			goto out_unlock_iolock;
@@ -4208,7 +4149,7 @@ xfs_free_file_space(
 				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
 		}
 		nimap = 1;
-		error = XFS_BMAPI(mp, NULL, &ip->i_iocore, endoffset_fsb - 1,
+		error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
 		if (error)
 			goto out_unlock_iolock;
@@ -4284,7 +4225,7 @@ xfs_free_file_space(
 		 * issue the bunmapi() call to free the blocks
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
-		error = XFS_BUNMAPI(mp, tp, &ip->i_iocore, startoffset_fsb,
+		error = xfs_bunmapi(tp, ip, startoffset_fsb,
 				  endoffset_fsb - startoffset_fsb,
 				  0, 2, &firstfsb, &free_list, NULL, &done);
 		if (error) {
@@ -4347,23 +4288,11 @@ xfs_change_file_space(
 	xfs_trans_t	*tp;
 	bhv_vattr_t	va;
 
-	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
+	xfs_itrace_entry(ip);
 
-	/*
-	 * must be a regular file and have write permission
-	 */
 	if (!S_ISREG(ip->i_d.di_mode))
 		return XFS_ERROR(EINVAL);
 
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-	if ((error = xfs_iaccess(ip, S_IWUSR, credp))) {
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
-		return error;
-	}
-
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
 	switch (bf->l_whence) {
 	case 0: /*SEEK_SET*/
 		break;

+ 0 - 2
fs/xfs/xfs_vnodeops.h

@@ -18,7 +18,6 @@ int xfs_open(struct xfs_inode *ip);
 int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
 int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
 		struct cred *credp);
-int xfs_access(struct xfs_inode *ip, int mode, struct cred *credp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
 		xfs_off_t stop);
@@ -39,7 +38,6 @@ int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
 		char *target_path, mode_t mode, bhv_vnode_t **vpp,
 		struct cred *credp);
-int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid);
 int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);

Some files were not shown because too many files changed in this diff