Переглянути джерело

Merge git://oss.sgi.com:8090/oss/git/xfs-2.6

Linus Torvalds 19 роки тому
батько
коміт
9f5974c873
58 змінених файлів з 2244 додано та 2137 видалено
  1. 438 373
      fs/xfs/linux-2.6/xfs_aops.c
  2. 10 0
      fs/xfs/linux-2.6/xfs_aops.h
  3. 343 397
      fs/xfs/linux-2.6/xfs_buf.c
  4. 286 410
      fs/xfs/linux-2.6/xfs_buf.h
  5. 2 4
      fs/xfs/linux-2.6/xfs_file.c
  6. 3 7
      fs/xfs/linux-2.6/xfs_ioctl.c
  7. 81 40
      fs/xfs/linux-2.6/xfs_iops.c
  8. 0 5
      fs/xfs/linux-2.6/xfs_iops.h
  9. 1 5
      fs/xfs/linux-2.6/xfs_linux.h
  10. 19 37
      fs/xfs/linux-2.6/xfs_lrw.c
  11. 1 1
      fs/xfs/linux-2.6/xfs_stats.c
  12. 9 9
      fs/xfs/linux-2.6/xfs_stats.h
  13. 11 8
      fs/xfs/linux-2.6/xfs_super.c
  14. 0 1
      fs/xfs/linux-2.6/xfs_vnode.c
  15. 19 0
      fs/xfs/linux-2.6/xfs_vnode.h
  16. 2 2
      fs/xfs/quota/xfs_dquot_item.c
  17. 11 7
      fs/xfs/quota/xfs_qm.c
  18. 25 35
      fs/xfs/support/debug.c
  19. 12 13
      fs/xfs/support/debug.h
  20. 14 9
      fs/xfs/support/uuid.c
  21. 19 3
      fs/xfs/xfs_arch.h
  22. 6 6
      fs/xfs/xfs_attr_leaf.c
  23. 43 36
      fs/xfs/xfs_attr_leaf.h
  24. 251 161
      fs/xfs/xfs_bmap.c
  25. 6 1
      fs/xfs/xfs_bmap.h
  26. 1 1
      fs/xfs/xfs_clnt.h
  27. 4 12
      fs/xfs/xfs_dfrag.c
  28. 17 5
      fs/xfs/xfs_dinode.h
  29. 1 1
      fs/xfs/xfs_dir.c
  30. 2 0
      fs/xfs/xfs_dir.h
  31. 0 3
      fs/xfs/xfs_dir2.h
  32. 34 30
      fs/xfs/xfs_dir_leaf.h
  33. 0 1
      fs/xfs/xfs_error.c
  34. 4 4
      fs/xfs/xfs_error.h
  35. 6 4
      fs/xfs/xfs_fs.h
  36. 26 0
      fs/xfs/xfs_fsops.c
  37. 1 0
      fs/xfs/xfs_fsops.h
  38. 1 4
      fs/xfs/xfs_iget.c
  39. 38 23
      fs/xfs/xfs_inode.c
  40. 4 0
      fs/xfs/xfs_inode.h
  41. 7 2
      fs/xfs/xfs_inode_item.c
  42. 234 191
      fs/xfs/xfs_iomap.c
  43. 3 2
      fs/xfs/xfs_itable.c
  44. 96 27
      fs/xfs/xfs_log.c
  45. 1 10
      fs/xfs/xfs_log.h
  46. 5 72
      fs/xfs/xfs_log_priv.h
  47. 6 6
      fs/xfs/xfs_log_recover.c
  48. 2 3
      fs/xfs/xfs_mount.c
  49. 1 2
      fs/xfs/xfs_mount.h
  50. 2 5
      fs/xfs/xfs_rename.c
  51. 4 5
      fs/xfs/xfs_rw.c
  52. 0 17
      fs/xfs/xfs_sb.h
  53. 8 6
      fs/xfs/xfs_trans.c
  54. 0 1
      fs/xfs/xfs_trans.h
  55. 3 6
      fs/xfs/xfs_utils.c
  56. 28 22
      fs/xfs/xfs_vfsops.c
  57. 91 102
      fs/xfs/xfs_vnodeops.c
  58. 2 0
      mm/swap.c

Різницю між файлами не показано, бо вона завелика
+ 438 - 373
fs/xfs/linux-2.6/xfs_aops.c


+ 10 - 0
fs/xfs/linux-2.6/xfs_aops.h

@@ -23,14 +23,24 @@ extern mempool_t *xfs_ioend_pool;
 
 typedef void (*xfs_ioend_func_t)(void *);
 
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
 typedef struct xfs_ioend {
+	struct xfs_ioend	*io_list;	/* next ioend in chain */
+	unsigned int		io_type;	/* delalloc / unwritten */
 	unsigned int		io_uptodate;	/* I/O status register */
 	atomic_t		io_remaining;	/* hold count */
 	struct vnode		*io_vnode;	/* file being written to */
 	struct buffer_head	*io_buffer_head;/* buffer linked list head */
+	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
 	size_t			io_size;	/* size of the extent */
 	xfs_off_t		io_offset;	/* offset in the file */
 	struct work_struct	io_work;	/* xfsdatad work queue */
 } xfs_ioend_t;
 
+extern struct address_space_operations linvfs_aops;
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+
 #endif /* __XFS_IOPS_H__ */

Різницю між файлами не показано, бо вона завелика
+ 343 - 397
fs/xfs/linux-2.6/xfs_buf.c


+ 286 - 410
fs/xfs/linux-2.6/xfs_buf.h

@@ -32,44 +32,47 @@
  *	Base types
  */
 
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-#define page_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
-#define page_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
-#define page_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
-#define page_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum page_buf_rw_e {
-	PBRW_READ = 1,			/* transfer into target memory */
-	PBRW_WRITE = 2,			/* transfer from target memory */
-	PBRW_ZERO = 3			/* Zero target memory */
-} page_buf_rw_t;
-
-
-typedef enum page_buf_flags_e {		/* pb_flags values */
-	PBF_READ = (1 << 0),	/* buffer intended for reading from device */
-	PBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
-	PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
-	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
-	PBF_DONE = (1 << 5),    /* all pages in the buffer uptodate	   */
-	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
-	PBF_STALE = (1 << 7),	/* buffer has been staled, do not find it  */
-	PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
- 	PBF_ORDERED = (1 << 11),    /* use ordered writes		   */
-	PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
+#define XFS_BUF_DADDR_NULL	((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+	XBRW_READ = 1,			/* transfer into target memory */
+	XBRW_WRITE = 2,			/* transfer from target memory */
+	XBRW_ZERO = 3,			/* Zero target memory */
+} xfs_buf_rw_t;
+
+typedef enum {
+	XBF_READ = (1 << 0),	/* buffer intended for reading from device */
+	XBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
+	XBF_MAPPED = (1 << 2),  /* buffer mapped (b_addr valid)            */
+	XBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+	XBF_DONE = (1 << 5),    /* all pages in the buffer uptodate	   */
+	XBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+	XBF_STALE = (1 << 7),	/* buffer has been staled, do not find it  */
+	XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+ 	XBF_ORDERED = (1 << 11),    /* use ordered writes		   */
+	XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
 
 	/* flags used only as arguments to access routines */
-	PBF_LOCK = (1 << 14),       /* lock requested			   */
-	PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait	   */
-	PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
+	XBF_LOCK = (1 << 14),       /* lock requested			   */
+	XBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait	   */
+	XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
 
 	/* flags used only internally */
-	_PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
-	_PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
-	_PBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
-	_PBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
-} page_buf_flags_t;
+	_XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
+	_XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
+	_XBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
+	_XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue		   */
+} xfs_buf_flags_t;
 
+typedef enum {
+	XBT_FORCE_SLEEP = (0 << 1),
+	XBT_FORCE_FLUSH = (1 << 1),
+} xfs_buftarg_flags_t;
 
 typedef struct xfs_bufhash {
 	struct list_head	bh_list;
@@ -77,477 +80,350 @@ typedef struct xfs_bufhash {
 } xfs_bufhash_t;
 
 typedef struct xfs_buftarg {
-	dev_t			pbr_dev;
-	struct block_device	*pbr_bdev;
-	struct address_space	*pbr_mapping;
-	unsigned int		pbr_bsize;
-	unsigned int		pbr_sshift;
-	size_t			pbr_smask;
-
-	/* per-device buffer hash table */
+	dev_t			bt_dev;
+	struct block_device	*bt_bdev;
+	struct address_space	*bt_mapping;
+	unsigned int		bt_bsize;
+	unsigned int		bt_sshift;
+	size_t			bt_smask;
+
+	/* per device buffer hash table */
 	uint			bt_hashmask;
 	uint			bt_hashshift;
 	xfs_bufhash_t		*bt_hash;
+
+	/* per device delwri queue */
+	struct task_struct	*bt_task;
+	struct list_head	bt_list;
+	struct list_head	bt_delwrite_queue;
+	spinlock_t		bt_delwrite_lock;
+	unsigned long		bt_flags;
 } xfs_buftarg_t;
 
 /*
- *	xfs_buf_t:  Buffer structure for page cache-based buffers
+ *	xfs_buf_t:  Buffer structure for pagecache-based buffers
+ *
+ * This buffer structure is used by the pagecache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.
  *
- * This buffer structure is used by the page cache buffer management routines
- * to refer to an assembly of pages forming a logical buffer.  The actual I/O
- * is performed with buffer_head structures, as required by drivers.
- * 
- * The buffer structure is used on temporary basis only, and discarded when
- * released.  The real data storage is recorded in the page cache.  Metadata is
+ * The buffer structure is used on a temporary basis only, and discarded when
+ * released.  The real data storage is recorded in the pagecache. Buffers are
  * hashed to the block device on which the file system resides.
  */
 
 struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+typedef void (*xfs_buf_relse_t)(struct xfs_buf *);
+typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
 
-/* call-back function on I/O completion */
-typedef void (*page_buf_iodone_t)(struct xfs_buf *);
-/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct xfs_buf *);
-/* pre-write function */
-typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
-
-#define PB_PAGES	2
+#define XB_PAGES	2
 
 typedef struct xfs_buf {
-	struct semaphore	pb_sema;	/* semaphore for lockables  */
-	unsigned long		pb_queuetime;	/* time buffer was queued   */
-	atomic_t		pb_pin_count;	/* pin count		    */
-	wait_queue_head_t	pb_waiters;	/* unpin waiters	    */
-	struct list_head	pb_list;
-	page_buf_flags_t	pb_flags;	/* status flags */
-	struct list_head	pb_hash_list;	/* hash table list */
-	xfs_bufhash_t		*pb_hash;	/* hash table list start */
-	xfs_buftarg_t		*pb_target;	/* buffer target (device) */
-	atomic_t		pb_hold;	/* reference count */
-	xfs_daddr_t		pb_bn;		/* block number for I/O */
-	loff_t			pb_file_offset;	/* offset in file */
-	size_t			pb_buffer_length; /* size of buffer in bytes */
-	size_t			pb_count_desired; /* desired transfer size */
-	void			*pb_addr;	/* virtual address of buffer */
-	struct work_struct	pb_iodone_work;
-	atomic_t		pb_io_remaining;/* #outstanding I/O requests */
-	page_buf_iodone_t	pb_iodone;	/* I/O completion function */
-	page_buf_relse_t	pb_relse;	/* releasing function */
-	page_buf_bdstrat_t	pb_strat;	/* pre-write function */
-	struct semaphore	pb_iodonesema;	/* Semaphore for I/O waiters */
-	void			*pb_fspriv;
-	void			*pb_fspriv2;
-	void			*pb_fspriv3;
-	unsigned short		pb_error;	/* error code on I/O */
- 	unsigned short		pb_locked;	/* page array is locked */
- 	unsigned int		pb_page_count;	/* size of page array */
-	unsigned int		pb_offset;	/* page offset in first page */
-	struct page		**pb_pages;	/* array of page pointers */
-	struct page		*pb_page_array[PB_PAGES]; /* inline pages */
-#ifdef PAGEBUF_LOCK_TRACKING
-	int			pb_last_holder;
+	struct semaphore	b_sema;		/* semaphore for lockables */
+	unsigned long		b_queuetime;	/* time buffer was queued */
+	atomic_t		b_pin_count;	/* pin count */
+	wait_queue_head_t	b_waiters;	/* unpin waiters */
+	struct list_head	b_list;
+	xfs_buf_flags_t		b_flags;	/* status flags */
+	struct list_head	b_hash_list;	/* hash table list */
+	xfs_bufhash_t		*b_hash;	/* hash table list start */
+	xfs_buftarg_t		*b_target;	/* buffer target (device) */
+	atomic_t		b_hold;		/* reference count */
+	xfs_daddr_t		b_bn;		/* block number for I/O */
+	xfs_off_t		b_file_offset;	/* offset in file */
+	size_t			b_buffer_length;/* size of buffer in bytes */
+	size_t			b_count_desired;/* desired transfer size */
+	void			*b_addr;	/* virtual address of buffer */
+	struct work_struct	b_iodone_work;
+	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
+	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */
+	xfs_buf_relse_t		b_relse;	/* releasing function */
+	xfs_buf_bdstrat_t	b_strat;	/* pre-write function */
+	struct semaphore	b_iodonesema;	/* Semaphore for I/O waiters */
+	void			*b_fspriv;
+	void			*b_fspriv2;
+	void			*b_fspriv3;
+	unsigned short		b_error;	/* error code on I/O */
+	unsigned short		b_locked;	/* page array is locked */
+	unsigned int		b_page_count;	/* size of page array */
+	unsigned int		b_offset;	/* page offset in first page */
+	struct page		**b_pages;	/* array of page pointers */
+	struct page		*b_page_array[XB_PAGES]; /* inline pages */
+#ifdef XFS_BUF_LOCK_TRACKING
+	int			b_last_holder;
 #endif
 } xfs_buf_t;
 
 
 /* Finding and Reading Buffers */
-
-extern xfs_buf_t *_pagebuf_find(	/* find buffer for block if	*/
-					/* the block is in memory	*/
-		xfs_buftarg_t *,	/* inode for block		*/
-		loff_t,			/* starting offset of range	*/
-		size_t,			/* length of range		*/
-		page_buf_flags_t,	/* PBF_LOCK			*/
-	        xfs_buf_t *);		/* newly allocated buffer	*/
-
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t, xfs_buf_t *);
 #define xfs_incore(buftarg,blkno,len,lockit) \
-	_pagebuf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get_flags(	/* allocate a buffer		*/
-		xfs_buftarg_t *,	/* inode for buffer		*/
-		loff_t,			/* starting offset of range     */
-		size_t,			/* length of range              */
-		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
-					/* PBF_ASYNC			*/
+	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
 
+extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t);
 #define xfs_buf_get(target, blkno, len, flags) \
-	xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
-
-extern xfs_buf_t *xfs_buf_read_flags(	/* allocate and read a buffer	*/
-		xfs_buftarg_t *,	/* inode for buffer		*/
-		loff_t,			/* starting offset of range	*/
-		size_t,			/* length of range		*/
-		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC		*/
+	xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
 
+extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t);
 #define xfs_buf_read(target, blkno, len, flags) \
-	xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
-
-extern xfs_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
-					/*  no memory or disk address	*/
-		size_t len,
-		xfs_buftarg_t *);	/* mount point "fake" inode	*/
-
-extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
-					/* without disk address		*/
-		size_t len,
-		xfs_buftarg_t *);	/* mount point "fake" inode	*/
-
-extern int pagebuf_associate_memory(
-		xfs_buf_t *,
-		void *,
-		size_t);
-
-extern void pagebuf_hold(		/* increment reference count	*/
-		xfs_buf_t *);		/* buffer to hold		*/
+	xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
 
-extern void pagebuf_readahead(		/* read ahead into cache	*/
-		xfs_buftarg_t  *,	/* target for buffer (or NULL)	*/
-		loff_t,			/* starting offset of range     */
-		size_t,			/* length of range              */
-		page_buf_flags_t);	/* additional read flags	*/
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t);
 
 /* Releasing Buffers */
-
-extern void pagebuf_free(		/* deallocate a buffer		*/
-		xfs_buf_t *);		/* buffer to deallocate		*/
-
-extern void pagebuf_rele(		/* release hold on a buffer	*/
-		xfs_buf_t *);		/* buffer to release		*/
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
 
 /* Locking and Unlocking Buffers */
-
-extern int pagebuf_cond_lock(		/* lock buffer, if not locked	*/
-					/* (returns -EBUSY if locked)	*/
-		xfs_buf_t *);		/* buffer to lock		*/
-
-extern int pagebuf_lock_value(		/* return count on lock		*/
-		xfs_buf_t *);          /* buffer to check              */
-
-extern int pagebuf_lock(		/* lock buffer                  */
-		xfs_buf_t *);          /* buffer to lock               */
-
-extern void pagebuf_unlock(		/* unlock buffer		*/
-		xfs_buf_t *);		/* buffer to unlock		*/
+extern int xfs_buf_cond_lock(xfs_buf_t *);
+extern int xfs_buf_lock_value(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
 
 /* Buffer Read and Write Routines */
-
-extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
-		xfs_buf_t *,		/* buffer to mark		*/
-		int);			/* run completion locally, or in
-					 * a helper thread.		*/
-
-extern void pagebuf_ioerror(		/* mark buffer in error	(or not) */
-		xfs_buf_t *,		/* buffer to mark		*/
-		int);			/* error to store (0 if none)	*/
-
-extern int pagebuf_iostart(		/* start I/O on a buffer	*/
-		xfs_buf_t *,		/* buffer to start		*/
-		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC,		*/
-					/* PBF_READ, PBF_WRITE,		*/
-					/* PBF_DELWRI			*/
-
-extern int pagebuf_iorequest(		/* start real I/O		*/
-		xfs_buf_t *);		/* buffer to convey to device	*/
-
-extern int pagebuf_iowait(		/* wait for buffer I/O done	*/
-		xfs_buf_t *);		/* buffer to wait on		*/
-
-extern void pagebuf_iomove(		/* move data in/out of pagebuf	*/
-		xfs_buf_t *,		/* buffer to manipulate		*/
-		size_t,			/* starting buffer offset	*/
-		size_t,			/* length in buffer		*/
-		caddr_t,		/* data pointer			*/
-		page_buf_rw_t);		/* direction			*/
-
-static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+extern void xfs_buf_ioend(xfs_buf_t *,	int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
+				xfs_buf_rw_t);
+
+static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
 {
-	return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+	return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
 }
 
-static inline int pagebuf_geterror(xfs_buf_t *pb)
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
-	return pb ? pb->pb_error : ENOMEM;
+	return bp ? bp->b_error : ENOMEM;
 }
 
 /* Buffer Utility Routines */
-
-extern caddr_t pagebuf_offset(		/* pointer at offset in buffer	*/
-		xfs_buf_t *,		/* buffer to offset into	*/
-		size_t);		/* offset			*/
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
 
 /* Pinning Buffer Storage in Memory */
-
-extern void pagebuf_pin(		/* pin buffer in memory		*/
-		xfs_buf_t *);		/* buffer to pin		*/
-
-extern void pagebuf_unpin(		/* unpin buffered data		*/
-		xfs_buf_t *);		/* buffer to unpin		*/
-
-extern int pagebuf_ispin(		/* check if buffer is pinned	*/
-		xfs_buf_t *);		/* buffer to check		*/
+extern void xfs_buf_pin(xfs_buf_t *);
+extern void xfs_buf_unpin(xfs_buf_t *);
+extern int xfs_buf_ispin(xfs_buf_t *);
 
 /* Delayed Write Buffer Routines */
-
-extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
 
 /* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
 
-extern int pagebuf_init(void);
-extern void pagebuf_terminate(void);
-
-
-#ifdef PAGEBUF_TRACE
-extern ktrace_t *pagebuf_trace_buf;
-extern void pagebuf_trace(
-		xfs_buf_t *,		/* buffer being traced		*/
-		char *,			/* description of operation	*/
-		void *,			/* arbitrary diagnostic value	*/
-		void *);		/* return address		*/
+#ifdef XFS_BUF_TRACE
+extern ktrace_t *xfs_buf_trace_buf;
+extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
 #else
-# define pagebuf_trace(pb, id, ptr, ra)	do { } while (0)
+#define xfs_buf_trace(bp,id,ptr,ra)	do { } while (0)
 #endif
 
-#define pagebuf_target_name(target)	\
-	({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+#define xfs_buf_target_name(target)	\
+	({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
 
 
+#define XFS_B_ASYNC		XBF_ASYNC
+#define XFS_B_DELWRI		XBF_DELWRI
+#define XFS_B_READ		XBF_READ
+#define XFS_B_WRITE		XBF_WRITE
+#define XFS_B_STALE		XBF_STALE
 
-/* These are just for xfs_syncsub... it sets an internal variable
- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
- */
-#define XFS_B_ASYNC		PBF_ASYNC
-#define XFS_B_DELWRI		PBF_DELWRI
-#define XFS_B_READ		PBF_READ
-#define XFS_B_WRITE		PBF_WRITE
-#define XFS_B_STALE		PBF_STALE
-
-#define XFS_BUF_TRYLOCK		PBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK	PBF_TRYLOCK
-#define XFS_BUF_LOCK		PBF_LOCK
-#define XFS_BUF_MAPPED		PBF_MAPPED
-
-#define BUF_BUSY		PBF_DONT_BLOCK
-
-#define XFS_BUF_BFLAGS(x)	((x)->pb_flags)
-#define XFS_BUF_ZEROFLAGS(x)	\
-	((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
-
-#define XFS_BUF_STALE(x)	((x)->pb_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(x)	((x)->pb_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(x)	((x)->pb_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(x)	do {				\
-					XFS_BUF_STALE(x);	\
-					pagebuf_delwri_dequeue(x);	\
-					XFS_BUF_DONE(x);	\
-				} while (0)
+#define XFS_BUF_TRYLOCK		XBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK	XBF_TRYLOCK
+#define XFS_BUF_LOCK		XBF_LOCK
+#define XFS_BUF_MAPPED		XBF_MAPPED
 
-#define XFS_BUF_MANAGE		PBF_FS_MANAGED
-#define XFS_BUF_UNMANAGE(x)	((x)->pb_flags &= ~PBF_FS_MANAGED)
-
-#define XFS_BUF_DELAYWRITE(x)	 ((x)->pb_flags |= PBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(x)	 pagebuf_delwri_dequeue(x)
-#define XFS_BUF_ISDELAYWRITE(x)	 ((x)->pb_flags & PBF_DELWRI)
-
-#define XFS_BUF_ERROR(x,no)	 pagebuf_ioerror(x,no)
-#define XFS_BUF_GETERROR(x)	 pagebuf_geterror(x)
-#define XFS_BUF_ISERROR(x)	 (pagebuf_geterror(x)?1:0)
-
-#define XFS_BUF_DONE(x)		 ((x)->pb_flags |= PBF_DONE)
-#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags &= ~PBF_DONE)
-#define XFS_BUF_ISDONE(x)	 ((x)->pb_flags & PBF_DONE)
-
-#define XFS_BUF_BUSY(x)		 do { } while (0)
-#define XFS_BUF_UNBUSY(x)	 do { } while (0)
-#define XFS_BUF_ISBUSY(x)	 (1)
-
-#define XFS_BUF_ASYNC(x)	 ((x)->pb_flags |= PBF_ASYNC)
-#define XFS_BUF_UNASYNC(x)	 ((x)->pb_flags &= ~PBF_ASYNC)
-#define XFS_BUF_ISASYNC(x)	 ((x)->pb_flags & PBF_ASYNC)
-
-#define XFS_BUF_ORDERED(x)	 ((x)->pb_flags |= PBF_ORDERED)
-#define XFS_BUF_UNORDERED(x)	 ((x)->pb_flags &= ~PBF_ORDERED)
-#define XFS_BUF_ISORDERED(x)	 ((x)->pb_flags & PBF_ORDERED)
-
-#define XFS_BUF_SHUT(x)		 printk("XFS_BUF_SHUT not implemented yet\n")
-#define XFS_BUF_UNSHUT(x)	 printk("XFS_BUF_UNSHUT not implemented yet\n")
-#define XFS_BUF_ISSHUT(x)	 (0)
-
-#define XFS_BUF_HOLD(x)		pagebuf_hold(x)
-#define XFS_BUF_READ(x)		((x)->pb_flags |= PBF_READ)
-#define XFS_BUF_UNREAD(x)	((x)->pb_flags &= ~PBF_READ)
-#define XFS_BUF_ISREAD(x)	((x)->pb_flags & PBF_READ)
-
-#define XFS_BUF_WRITE(x)	((x)->pb_flags |= PBF_WRITE)
-#define XFS_BUF_UNWRITE(x)	((x)->pb_flags &= ~PBF_WRITE)
-#define XFS_BUF_ISWRITE(x)	((x)->pb_flags & PBF_WRITE)
-
-#define XFS_BUF_ISUNINITIAL(x)	 (0)
-#define XFS_BUF_UNUNINITIAL(x)	 (0)
-
-#define XFS_BUF_BP_ISMAPPED(bp)	 1
-
-#define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
-#define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
-			(buf)->pb_iodone = (func)
-#define XFS_BUF_CLR_IODONE_FUNC(buf)		\
-			(buf)->pb_iodone = NULL
-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)	\
-			(buf)->pb_strat = (func)
-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)		\
-			(buf)->pb_strat = NULL
-
-#define XFS_BUF_FSPRIVATE(buf, type)		\
-			((type)(buf)->pb_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(buf, value)	\
-			(buf)->pb_fspriv = (void *)(value)
-#define XFS_BUF_FSPRIVATE2(buf, type)		\
-			((type)(buf)->pb_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(buf, value)	\
-			(buf)->pb_fspriv2 = (void *)(value)
-#define XFS_BUF_FSPRIVATE3(buf, type)		\
-			((type)(buf)->pb_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(buf, value)	\
-			(buf)->pb_fspriv3  = (void *)(value)
-#define XFS_BUF_SET_START(buf)
-
-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
-			(buf)->pb_relse = (value)
-
-#define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
-
-static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
-{
-	if (bp->pb_flags & PBF_MAPPED)
-		return XFS_BUF_PTR(bp) + offset;
-	return (xfs_caddr_t) pagebuf_offset(bp, offset);
-}
+#define BUF_BUSY		XBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(bp)	((bp)->b_flags)
+#define XFS_BUF_ZEROFLAGS(bp)	\
+	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
+
+#define XFS_BUF_STALE(bp)	((bp)->b_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(bp)	do {				\
+					XFS_BUF_STALE(bp);	\
+					xfs_buf_delwri_dequeue(bp);	\
+					XFS_BUF_DONE(bp);	\
+				} while (0)
 
-#define XFS_BUF_SET_PTR(bp, val, count)		\
-				pagebuf_associate_memory(bp, val, count)
-#define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
-#define XFS_BUF_SET_ADDR(bp, blk)		\
-			((bp)->pb_bn = (xfs_daddr_t)(blk))
-#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)		\
-			((bp)->pb_file_offset = (off))
-#define XFS_BUF_COUNT(bp)	((bp)->pb_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)		\
-			((bp)->pb_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)	((bp)->pb_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)		\
-			((bp)->pb_buffer_length = (cnt))
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-#define XFS_BUF_ISPINNED(bp)	pagebuf_ispin(bp)
-
-#define XFS_BUF_VALUSEMA(bp)	pagebuf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp)	(pagebuf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp)	pagebuf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x)	pagebuf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
-
-/* setup the buffer target from a buftarg structure */
-#define XFS_BUF_SET_TARGET(bp, target)	\
-		(bp)->pb_target = (target)
-#define XFS_BUF_TARGET(bp)	((bp)->pb_target)
-#define XFS_BUFTARG_NAME(target)	\
-		pagebuf_target_name(target)
-
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-static inline int	xfs_bawrite(void *mp, xfs_buf_t *bp)
+#define XFS_BUF_MANAGE		XBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(bp)	((bp)->b_flags &= ~XBF_FS_MANAGED)
+
+#define XFS_BUF_DELAYWRITE(bp)		((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp)	xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_ERROR(bp,no)	xfs_buf_ioerror(bp,no)
+#define XFS_BUF_GETERROR(bp)	xfs_buf_geterror(bp)
+#define XFS_BUF_ISERROR(bp)	(xfs_buf_geterror(bp) ? 1 : 0)
+
+#define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_BUSY(bp)	do { } while (0)
+#define XFS_BUF_UNBUSY(bp)	do { } while (0)
+#define XFS_BUF_ISBUSY(bp)	(1)
+
+#define XFS_BUF_ASYNC(bp)	((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_ORDERED(bp)	((bp)->b_flags |= XBF_ORDERED)
+#define XFS_BUF_UNORDERED(bp)	((bp)->b_flags &= ~XBF_ORDERED)
+#define XFS_BUF_ISORDERED(bp)	((bp)->b_flags & XBF_ORDERED)
+
+#define XFS_BUF_SHUT(bp)	do { } while (0)
+#define XFS_BUF_UNSHUT(bp)	do { } while (0)
+#define XFS_BUF_ISSHUT(bp)	(0)
+
+#define XFS_BUF_HOLD(bp)	xfs_buf_hold(bp)
+#define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp)	((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp)	((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(bp)	(0)
+#define XFS_BUF_UNUNINITIAL(bp)	(0)
+
+#define XFS_BUF_BP_ISMAPPED(bp)	(1)
+
+#define XFS_BUF_IODONE_FUNC(bp)			((bp)->b_iodone)
+#define XFS_BUF_SET_IODONE_FUNC(bp, func)	((bp)->b_iodone = (func))
+#define XFS_BUF_CLR_IODONE_FUNC(bp)		((bp)->b_iodone = NULL)
+#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func)	((bp)->b_strat = (func))
+#define XFS_BUF_CLR_BDSTRAT_FUNC(bp)		((bp)->b_strat = NULL)
+
+#define XFS_BUF_FSPRIVATE(bp, type)		((type)(bp)->b_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(bp, val)		((bp)->b_fspriv = (void*)(val))
+#define XFS_BUF_FSPRIVATE2(bp, type)		((type)(bp)->b_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(bp, val)		((bp)->b_fspriv2 = (void*)(val))
+#define XFS_BUF_FSPRIVATE3(bp, type)		((type)(bp)->b_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(bp, val)		((bp)->b_fspriv3 = (void*)(val))
+#define XFS_BUF_SET_START(bp)			do { } while (0)
+#define XFS_BUF_SET_BRELSE_FUNC(bp, func)	((bp)->b_relse = (func))
+
+#define XFS_BUF_PTR(bp)			(xfs_caddr_t)((bp)->b_addr)
+#define XFS_BUF_SET_PTR(bp, val, cnt)	xfs_buf_associate_memory(bp, val, cnt)
+#define XFS_BUF_ADDR(bp)		((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)		((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)	((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)		((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)	((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)		((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)	((bp)->b_buffer_length = (cnt))
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)	do { } while (0)
+#define XFS_BUF_SET_VTYPE(bp, type)		do { } while (0)
+#define XFS_BUF_SET_REF(bp, ref)		do { } while (0)
+
+#define XFS_BUF_ISPINNED(bp)	xfs_buf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp)	xfs_buf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)	(xfs_buf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)	xfs_buf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)	xfs_buf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+
+#define XFS_BUF_SET_TARGET(bp, target)	((bp)->b_target = (target))
+#define XFS_BUF_TARGET(bp)		((bp)->b_target)
+#define XFS_BUFTARG_NAME(target)	xfs_buf_target_name(target)
+
+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
 {
-	bp->pb_fspriv3 = mp;
-	bp->pb_strat = xfs_bdstrat_cb;
-	pagebuf_delwri_dequeue(bp);
-	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+	bp->b_fspriv3 = mp;
+	bp->b_strat = xfs_bdstrat_cb;
+	xfs_buf_delwri_dequeue(bp);
+	return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
 }
 
-static inline void	xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(xfs_buf_t *bp)
 {
-	if (!bp->pb_relse)
-		pagebuf_unlock(bp);
-	pagebuf_rele(bp);
+	if (!bp->b_relse)
+		xfs_buf_unlock(bp);
+	xfs_buf_rele(bp);
 }
 
-#define xfs_bpin(bp)		pagebuf_pin(bp)
-#define xfs_bunpin(bp)		pagebuf_unpin(bp)
+#define xfs_bpin(bp)		xfs_buf_pin(bp)
+#define xfs_bunpin(bp)		xfs_buf_unpin(bp)
 
 #define xfs_buftrace(id, bp)	\
-	    pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+	    xfs_buf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
 
-#define xfs_biodone(pb)		    \
-	    pagebuf_iodone(pb, 0)
+#define xfs_biodone(bp)		xfs_buf_ioend(bp, 0)
 
-#define xfs_biomove(pb, off, len, data, rw) \
-	    pagebuf_iomove((pb), (off), (len), (data), \
-		((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+#define xfs_biomove(bp, off, len, data, rw) \
+	    xfs_buf_iomove((bp), (off), (len), (data), \
+		((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ)
 
-#define xfs_biozero(pb, off, len) \
-	    pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+#define xfs_biozero(bp, off, len) \
+	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
 
 
-static inline int	XFS_bwrite(xfs_buf_t *pb)
+static inline int XFS_bwrite(xfs_buf_t *bp)
 {
-	int	iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+	int	iowait = (bp->b_flags & XBF_ASYNC) == 0;
 	int	error = 0;
 
 	if (!iowait)
-		pb->pb_flags |= _PBF_RUN_QUEUES;
+		bp->b_flags |= _XBF_RUN_QUEUES;
 
-	pagebuf_delwri_dequeue(pb);
-	pagebuf_iostrategy(pb);
+	xfs_buf_delwri_dequeue(bp);
+	xfs_buf_iostrategy(bp);
 	if (iowait) {
-		error = pagebuf_iowait(pb);
-		xfs_buf_relse(pb);
+		error = xfs_buf_iowait(bp);
+		xfs_buf_relse(bp);
 	}
 	return error;
 }
 
-#define XFS_bdwrite(pb)		     \
-	    pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+#define XFS_bdwrite(bp)		xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC)
 
 static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
 {
-	bp->pb_strat = xfs_bdstrat_cb;
-	bp->pb_fspriv3 = mp;
-
-	return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+	bp->b_strat = xfs_bdstrat_cb;
+	bp->b_fspriv3 = mp;
+	return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
 }
 
-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
 
-#define xfs_iowait(pb)	pagebuf_iowait(pb)
+#define xfs_iowait(bp)	xfs_buf_iowait(bp)
 
 #define xfs_baread(target, rablkno, ralen)  \
-	pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
-
-#define xfs_buf_get_empty(len, target)	pagebuf_get_empty((len), (target))
-#define xfs_buf_get_noaddr(len, target)	pagebuf_get_no_daddr((len), (target))
-#define xfs_buf_free(bp)		pagebuf_free(bp)
+	xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
 
 
 /*
  *	Handling of buftargs.
  */
-
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
 extern void xfs_free_buftarg(xfs_buftarg_t *, int);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
 
-#define xfs_getsize_buftarg(buftarg) \
-	block_size((buftarg)->pbr_bdev)
-#define xfs_readonly_buftarg(buftarg) \
-	bdev_read_only((buftarg)->pbr_bdev)
-#define xfs_binval(buftarg) \
-	xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg) \
-	xfs_flush_buftarg(buftarg, 1)
+#define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg)		xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg)		xfs_flush_buftarg(buftarg, 1)
 
 #endif	/* __XFS_BUF_H__ */

+ 2 - 4
fs/xfs/linux-2.6/xfs_file.c

@@ -509,16 +509,14 @@ linvfs_open_exec(
 	vnode_t		*vp = LINVFS_GET_VP(inode);
 	xfs_mount_t	*mp = XFS_VFSTOM(vp->v_vfsp);
 	int		error = 0;
-	bhv_desc_t	*bdp;
 	xfs_inode_t	*ip;
 
 	if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-		bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
-		if (!bdp) {
+		ip = xfs_vtoi(vp);
+		if (!ip) {
 			error = -EINVAL;
 			goto open_exec_out;
 		}
-		ip = XFS_BHVTOI(bdp);
 		if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
 			error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
 					       0, 0, 0, NULL);

+ 3 - 7
fs/xfs/linux-2.6/xfs_ioctl.c

@@ -146,13 +146,10 @@ xfs_find_handle(
 
 	if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
 		xfs_inode_t	*ip;
-		bhv_desc_t	*bhv;
 		int		lock_mode;
 
 		/* need to get access to the xfs_inode to read the generation */
-		bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
-		ASSERT(bhv);
-		ip = XFS_BHVTOI(bhv);
+		ip = xfs_vtoi(vp);
 		ASSERT(ip);
 		lock_mode = xfs_ilock_map_shared(ip);
 
@@ -751,9 +748,8 @@ xfs_ioctl(
 			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 			mp->m_rtdev_targp : mp->m_ddev_targp;
 
-		da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
-		/* The size dio will do in one go */
-		da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
 
 		if (copy_to_user(arg, &da, sizeof(da)))
 			return -XFS_ERROR(EFAULT);

+ 81 - 40
fs/xfs/linux-2.6/xfs_iops.c

@@ -54,10 +54,45 @@
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/namei.h>
+#include <linux/security.h>
 
 #define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) ||	\
 	(S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME))
 
+/*
+ * Get a XFS inode from a given vnode.
+ */
+xfs_inode_t *
+xfs_vtoi(
+	struct vnode	*vp)
+{
+	bhv_desc_t      *bdp;
+
+	bdp = bhv_lookup_range(VN_BHV_HEAD(vp),
+			VNODE_POSITION_XFS, VNODE_POSITION_XFS);
+	if (unlikely(bdp == NULL))
+		return NULL;
+	return XFS_BHVTOI(bdp);
+}
+
+/*
+ * Bring the atime in the XFS inode uptodate.
+ * Used before logging the inode to disk or when the Linux inode goes away.
+ */
+void
+xfs_synchronize_atime(
+	xfs_inode_t	*ip)
+{
+	vnode_t		*vp;
+
+	vp = XFS_ITOV_NULL(ip);
+	if (vp) {
+		struct inode *inode = &vp->v_inode;
+		ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+	}
+}
+
 /*
  * Change the requested timestamp in the given inode.
  * We don't lock across timestamp updates, and we don't log them but
@@ -77,23 +112,6 @@ xfs_ichgtime(
 	struct inode	*inode = LINVFS_GET_IP(XFS_ITOV(ip));
 	timespec_t	tv;
 
-	/*
-	 * We're not supposed to change timestamps in readonly-mounted
-	 * filesystems.  Throw it away if anyone asks us.
-	 */
-	if (unlikely(IS_RDONLY(inode)))
-		return;
-
-	/*
-	 * Don't update access timestamps on reads if mounted "noatime".
-	 * Throw it away if anyone asks us.
-	 */
-	if (unlikely(
-	    (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-	    (flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-			XFS_ICHGTIME_ACC))
-		return;
-
 	nanotime(&tv);
 	if (flags & XFS_ICHGTIME_MOD) {
 		inode->i_mtime = tv;
@@ -130,8 +148,6 @@ xfs_ichgtime(
  * Variant on the above which avoids querying the system clock
  * in situations where we know the Linux inode timestamps have
  * just been updated (and so we can update our inode cheaply).
- * We also skip the readonly and noatime checks here, they are
- * also catered for already.
  */
 void
 xfs_ichgtime_fast(
@@ -142,20 +158,16 @@ xfs_ichgtime_fast(
 	timespec_t	*tvp;
 
 	/*
-	 * We're not supposed to change timestamps in readonly-mounted
-	 * filesystems.  Throw it away if anyone asks us.
+	 * Atime updates for read() & friends are handled lazily now, and
+	 * explicit updates must go through xfs_ichgtime()
 	 */
-	if (unlikely(IS_RDONLY(inode)))
-		return;
+	ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
 
 	/*
-	 * Don't update access timestamps on reads if mounted "noatime".
-	 * Throw it away if anyone asks us.
+	 * We're not supposed to change timestamps in readonly-mounted
+	 * filesystems.  Throw it away if anyone asks us.
 	 */
-	if (unlikely(
-	    (ip->i_mount->m_flags & XFS_MOUNT_NOATIME || IS_NOATIME(inode)) &&
-	    ((flags & (XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD|XFS_ICHGTIME_CHG)) ==
-			XFS_ICHGTIME_ACC)))
+	if (unlikely(IS_RDONLY(inode)))
 		return;
 
 	if (flags & XFS_ICHGTIME_MOD) {
@@ -163,11 +175,6 @@ xfs_ichgtime_fast(
 		ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
 		ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec;
 	}
-	if (flags & XFS_ICHGTIME_ACC) {
-		tvp = &inode->i_atime;
-		ip->i_d.di_atime.t_sec = (__int32_t)tvp->tv_sec;
-		ip->i_d.di_atime.t_nsec = (__int32_t)tvp->tv_nsec;
-	}
 	if (flags & XFS_ICHGTIME_CHG) {
 		tvp = &inode->i_ctime;
 		ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec;
@@ -213,6 +220,39 @@ validate_fields(
 	}
 }
 
+/*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+linvfs_init_security(
+	struct vnode	*vp,
+	struct inode	*dir)
+{
+	struct inode	*ip = LINVFS_GET_IP(vp);
+	size_t		length;
+	void		*value;
+	char		*name;
+	int		error;
+
+	error = security_inode_init_security(ip, dir, &name, &value, &length);
+	if (error) {
+		if (error == -EOPNOTSUPP)
+			return 0;
+		return -error;
+	}
+
+	VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error);
+	if (!error)
+		VMODIFY(vp);
+
+	kfree(name);
+	kfree(value);
+	return error;
+}
+
 /*
  * Determine whether a process has a valid fs_struct (kernel daemons
  * like knfsd don't have an fs_struct).
@@ -278,6 +318,9 @@ linvfs_mknod(
 		break;
 	}
 
+	if (!error)
+		error = linvfs_init_security(vp, dir);
+
 	if (default_acl) {
 		if (!error) {
 			error = _ACL_INHERIT(vp, &va, default_acl);
@@ -294,8 +337,6 @@ linvfs_mknod(
 				teardown.d_inode = ip = LINVFS_GET_IP(vp);
 				teardown.d_name = dentry->d_name;
 
-				vn_mark_bad(vp);
-				
 				if (S_ISDIR(mode))
 					VOP_RMDIR(dvp, &teardown, NULL, err2);
 				else
@@ -506,7 +547,7 @@ linvfs_follow_link(
 	ASSERT(dentry);
 	ASSERT(nd);
 
-	link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+	link = (char *)kmalloc(MAXPATHLEN+1, GFP_KERNEL);
 	if (!link) {
 		nd_set_link(nd, ERR_PTR(-ENOMEM));
 		return NULL;
@@ -522,12 +563,12 @@ linvfs_follow_link(
 	vp = LINVFS_GET_VP(dentry->d_inode);
 
 	iov.iov_base = link;
-	iov.iov_len = MAXNAMELEN;
+	iov.iov_len = MAXPATHLEN;
 
 	uio->uio_iov = &iov;
 	uio->uio_offset = 0;
 	uio->uio_segflg = UIO_SYSSPACE;
-	uio->uio_resid = MAXNAMELEN;
+	uio->uio_resid = MAXPATHLEN;
 	uio->uio_iovcnt = 1;
 
 	VOP_READLINK(vp, uio, 0, NULL, error);
@@ -535,7 +576,7 @@ linvfs_follow_link(
 		kfree(link);
 		link = ERR_PTR(-error);
 	} else {
-		link[MAXNAMELEN - uio->uio_resid] = '\0';
+		link[MAXPATHLEN - uio->uio_resid] = '\0';
 	}
 	kfree(uio);
 

+ 0 - 5
fs/xfs/linux-2.6/xfs_iops.h

@@ -26,11 +26,6 @@ extern struct file_operations linvfs_file_operations;
 extern struct file_operations linvfs_invis_file_operations;
 extern struct file_operations linvfs_dir_operations;
 
-extern struct address_space_operations linvfs_aops;
-
-extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void linvfs_unwritten_done(struct buffer_head *, int);
-
 extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
                         int, unsigned int, void __user *);
 

+ 1 - 5
fs/xfs/linux-2.6/xfs_linux.h

@@ -110,10 +110,6 @@
  * delalloc and these ondisk-uninitialised buffers.
  */
 BUFFER_FNS(PrivateStart, unwritten);
-static inline void set_buffer_unwritten_io(struct buffer_head *bh)
-{
-	bh->b_end_io = linvfs_unwritten_done;
-}
 
 #define restricted_chown	xfs_params.restrict_chown.val
 #define irix_sgid_inherit	xfs_params.sgid_inherit.val
@@ -232,7 +228,7 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_itruncate_data(ip, off)	\
 	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
 #define xfs_statvfs_fsid(statp, mp)	\
-	({ u64 id = huge_encode_dev((mp)->m_dev);	\
+	({ u64 id = huge_encode_dev((mp)->m_ddev_targp->bt_dev); \
 	   __kernel_fsid_t *fsid = &(statp)->f_fsid;	\
 	(fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
 

+ 19 - 37
fs/xfs/linux-2.6/xfs_lrw.c

@@ -233,8 +233,8 @@ xfs_read(
 		xfs_buftarg_t	*target =
 			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
-		if ((*offset & target->pbr_smask) ||
-		    (size & target->pbr_smask)) {
+		if ((*offset & target->bt_smask) ||
+		    (size & target->bt_smask)) {
 			if (*offset == ip->i_d.di_size) {
 				return (0);
 			}
@@ -281,9 +281,6 @@ xfs_read(
 
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
-	if (likely(!(ioflags & IO_INVIS)))
-		xfs_ichgtime_fast(ip, inode, XFS_ICHGTIME_ACC);
-
 unlock_isem:
 	if (unlikely(ioflags & IO_ISDIRECT))
 		mutex_unlock(&inode->i_mutex);
@@ -346,9 +343,6 @@ xfs_sendfile(
 	if (ret > 0)
 		XFS_STATS_ADD(xs_read_bytes, ret);
 
-	if (likely(!(ioflags & IO_INVIS)))
-		xfs_ichgtime_fast(ip, LINVFS_GET_IP(vp), XFS_ICHGTIME_ACC);
-
 	return ret;
 }
 
@@ -362,7 +356,6 @@ STATIC int				/* error (positive) */
 xfs_zero_last_block(
 	struct inode	*ip,
 	xfs_iocore_t	*io,
-	xfs_off_t	offset,
 	xfs_fsize_t	isize,
 	xfs_fsize_t	end_size)
 {
@@ -371,19 +364,16 @@ xfs_zero_last_block(
 	int		nimaps;
 	int		zero_offset;
 	int		zero_len;
-	int		isize_fsb_offset;
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
 	loff_t		loff;
-	size_t		lsize;
 
 	ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
-	ASSERT(offset > isize);
 
 	mp = io->io_mount;
 
-	isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
-	if (isize_fsb_offset == 0) {
+	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+	if (zero_offset == 0) {
 		/*
 		 * There are no extra bytes in the last block on disk to
 		 * zero, so return.
@@ -413,10 +403,8 @@ xfs_zero_last_block(
 	 */
 	XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
 	loff = XFS_FSB_TO_B(mp, last_fsb);
-	lsize = XFS_FSB_TO_B(mp, 1);
 
-	zero_offset = isize_fsb_offset;
-	zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+	zero_len = mp->m_sb.sb_blocksize - zero_offset;
 
 	error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
 
@@ -447,20 +435,17 @@ xfs_zero_eof(
 	struct inode	*ip = LINVFS_GET_IP(vp);
 	xfs_fileoff_t	start_zero_fsb;
 	xfs_fileoff_t	end_zero_fsb;
-	xfs_fileoff_t	prev_zero_fsb;
 	xfs_fileoff_t	zero_count_fsb;
 	xfs_fileoff_t	last_fsb;
 	xfs_extlen_t	buf_len_fsb;
-	xfs_extlen_t	prev_zero_count;
 	xfs_mount_t	*mp;
 	int		nimaps;
 	int		error = 0;
 	xfs_bmbt_irec_t	imap;
-	loff_t		loff;
-	size_t		lsize;
 
 	ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
 	ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+	ASSERT(offset > isize);
 
 	mp = io->io_mount;
 
@@ -468,7 +453,7 @@ xfs_zero_eof(
 	 * First handle zeroing the block on which isize resides.
 	 * We only zero a part of that block so it is handled specially.
 	 */
-	error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+	error = xfs_zero_last_block(ip, io, isize, end_size);
 	if (error) {
 		ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
 		ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -496,8 +481,6 @@ xfs_zero_eof(
 	}
 
 	ASSERT(start_zero_fsb <= end_zero_fsb);
-	prev_zero_fsb = NULLFILEOFF;
-	prev_zero_count = 0;
 	while (start_zero_fsb <= end_zero_fsb) {
 		nimaps = 1;
 		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
@@ -519,10 +502,7 @@ xfs_zero_eof(
 			 * that sits on a hole and sets the page as P_HOLE
 			 * and calls remapf if it is a mapped file.
 			 */
-			prev_zero_fsb = NULLFILEOFF;
-			prev_zero_count = 0;
-			start_zero_fsb = imap.br_startoff +
-					 imap.br_blockcount;
+			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
 			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 			continue;
 		}
@@ -543,17 +523,15 @@ xfs_zero_eof(
 		 */
 		XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
 
-		loff = XFS_FSB_TO_B(mp, start_zero_fsb);
-		lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
-
-		error = xfs_iozero(ip, loff, lsize, end_size);
+		error = xfs_iozero(ip,
+				   XFS_FSB_TO_B(mp, start_zero_fsb),
+				   XFS_FSB_TO_B(mp, buf_len_fsb),
+				   end_size);
 
 		if (error) {
 			goto out_lock;
 		}
 
-		prev_zero_fsb = start_zero_fsb;
-		prev_zero_count = buf_len_fsb;
 		start_zero_fsb = imap.br_startoff + buf_len_fsb;
 		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
 
@@ -640,7 +618,7 @@ xfs_write(
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
-		if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+		if ((pos & target->bt_smask) || (count & target->bt_smask))
 			return XFS_ERROR(-EINVAL);
 
 		if (!VN_CACHED(vp) && pos < i_size_read(inode))
@@ -831,6 +809,10 @@ retry:
 		goto retry;
 	}
 
+	isize = i_size_read(inode);
+	if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
+		*offset = isize;
+
 	if (*offset > xip->i_d.di_size) {
 		xfs_ilock(xip, XFS_ILOCK_EXCL);
 		if (*offset > xip->i_d.di_size) {
@@ -956,7 +938,7 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
 
 	mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
 	if (!XFS_FORCED_SHUTDOWN(mp)) {
-		pagebuf_iorequest(bp);
+		xfs_buf_iorequest(bp);
 		return 0;
 	} else {
 		xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
@@ -1009,7 +991,7 @@ xfsbdstrat(
 		 * if (XFS_BUF_IS_GRIO(bp)) {
 		 */
 
-		pagebuf_iorequest(bp);
+		xfs_buf_iorequest(bp);
 		return 0;
 	}
 

+ 1 - 1
fs/xfs/linux-2.6/xfs_stats.c

@@ -34,7 +34,7 @@ xfs_read_xfsstats(
 	__uint64_t	xs_write_bytes = 0;
 	__uint64_t	xs_read_bytes = 0;
 
-	static struct xstats_entry {
+	static const struct xstats_entry {
 		char	*desc;
 		int	endpoint;
 	} xstats[] = {

+ 9 - 9
fs/xfs/linux-2.6/xfs_stats.h

@@ -109,15 +109,15 @@ struct xfsstats {
 	__uint32_t		vn_remove;	/* # times vn_remove called */
 	__uint32_t		vn_free;	/* # times vn_free called */
 #define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
-	__uint32_t		pb_get;
-	__uint32_t		pb_create;
-	__uint32_t		pb_get_locked;
-	__uint32_t		pb_get_locked_waited;
-	__uint32_t		pb_busy_locked;
-	__uint32_t		pb_miss_locked;
-	__uint32_t		pb_page_retries;
-	__uint32_t		pb_page_found;
-	__uint32_t		pb_get_read;
+	__uint32_t		xb_get;
+	__uint32_t		xb_create;
+	__uint32_t		xb_get_locked;
+	__uint32_t		xb_get_locked_waited;
+	__uint32_t		xb_busy_locked;
+	__uint32_t		xb_miss_locked;
+	__uint32_t		xb_page_retries;
+	__uint32_t		xb_page_found;
+	__uint32_t		xb_get_read;
 /* Extra precision counters */
 	__uint64_t		xs_xstrat_bytes;
 	__uint64_t		xs_write_bytes;

+ 11 - 8
fs/xfs/linux-2.6/xfs_super.c

@@ -306,13 +306,15 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
 		xfs_fs_cmn_err(CE_NOTE, mp,
 		  "Disabling barriers, not supported with external log device");
 		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		return;
 	}
 
-	if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+	if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
 					QUEUE_ORDERED_NONE) {
 		xfs_fs_cmn_err(CE_NOTE, mp,
 		  "Disabling barriers, not supported by the underlying device");
 		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		return;
 	}
 
 	error = xfs_barrier_test(mp);
@@ -320,6 +322,7 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
 		xfs_fs_cmn_err(CE_NOTE, mp,
 		  "Disabling barriers, trial barrier write failed");
 		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		return;
 	}
 }
 
@@ -327,7 +330,7 @@ void
 xfs_blkdev_issue_flush(
 	xfs_buftarg_t		*buftarg)
 {
-	blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+	blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
 
 STATIC struct inode *
@@ -576,7 +579,7 @@ xfssyncd(
 		timeleft = schedule_timeout_interruptible(timeleft);
 		/* swsusp */
 		try_to_freeze();
-		if (kthread_should_stop())
+		if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
 			break;
 
 		spin_lock(&vfsp->vfs_sync_lock);
@@ -966,9 +969,9 @@ init_xfs_fs( void )
 	if (error < 0)
 		goto undo_zones;
 
-	error = pagebuf_init();
+	error = xfs_buf_init();
 	if (error < 0)
-		goto undo_pagebuf;
+		goto undo_buffers;
 
 	vn_init();
 	xfs_init();
@@ -982,9 +985,9 @@ init_xfs_fs( void )
 	return 0;
 
 undo_register:
-	pagebuf_terminate();
+	xfs_buf_terminate();
 
-undo_pagebuf:
+undo_buffers:
 	linvfs_destroy_zones();
 
 undo_zones:
@@ -998,7 +1001,7 @@ exit_xfs_fs( void )
 	XFS_DM_EXIT(&xfs_fs_type);
 	unregister_filesystem(&xfs_fs_type);
 	xfs_cleanup();
-	pagebuf_terminate();
+	xfs_buf_terminate();
 	linvfs_destroy_zones();
 	ktrace_uninit();
 }

+ 0 - 1
fs/xfs/linux-2.6/xfs_vnode.c

@@ -106,7 +106,6 @@ vn_revalidate_core(
 	inode->i_blocks	    = vap->va_nblocks;
 	inode->i_mtime	    = vap->va_mtime;
 	inode->i_ctime	    = vap->va_ctime;
-	inode->i_atime	    = vap->va_atime;
 	inode->i_blksize    = vap->va_blocksize;
 	if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
 		inode->i_flags |= S_IMMUTABLE;

+ 19 - 0
fs/xfs/linux-2.6/xfs_vnode.h

@@ -565,6 +565,25 @@ static inline int VN_BAD(struct vnode *vp)
 	return is_bad_inode(LINVFS_GET_IP(vp));
 }
 
+/*
+ * Extracting atime values in various formats
+ */
+static inline void vn_atime_to_bstime(struct vnode *vp, xfs_bstime_t *bs_atime)
+{
+	bs_atime->tv_sec = vp->v_inode.i_atime.tv_sec;
+	bs_atime->tv_nsec = vp->v_inode.i_atime.tv_nsec;
+}
+
+static inline void vn_atime_to_timespec(struct vnode *vp, struct timespec *ts)
+{
+	*ts = vp->v_inode.i_atime;
+}
+
+static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
+{
+	*tt = vp->v_inode.i_atime.tv_sec;
+}
+
 /*
  * Some useful predicates.
  */

+ 2 - 2
fs/xfs/quota/xfs_dquot_item.c

@@ -239,7 +239,7 @@ xfs_qm_dquot_logitem_pushbuf(
 	 * trying to duplicate our effort.
 	 */
 	ASSERT(qip->qli_pushbuf_flag != 0);
-	ASSERT(qip->qli_push_owner == get_thread_id());
+	ASSERT(qip->qli_push_owner == current_pid());
 
 	/*
 	 * If flushlock isn't locked anymore, chances are that the
@@ -333,7 +333,7 @@ xfs_qm_dquot_logitem_trylock(
 			qip->qli_pushbuf_flag = 1;
 			ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
 #ifdef DEBUG
-			qip->qli_push_owner = get_thread_id();
+			qip->qli_push_owner = current_pid();
 #endif
 			/*
 			 * The dquot is left locked.

+ 11 - 7
fs/xfs/quota/xfs_qm.c

@@ -1392,11 +1392,12 @@ xfs_qm_qino_alloc(
 {
 	xfs_trans_t	*tp;
 	int		error;
-	unsigned long s;
+	unsigned long	s;
 	cred_t		zerocr;
+	xfs_inode_t	zeroino;
 	int		committed;
 
-	tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
 	if ((error = xfs_trans_reserve(tp,
 				      XFS_QM_QINOCREATE_SPACE_RES(mp),
 				      XFS_CREATE_LOG_RES(mp), 0,
@@ -1406,8 +1407,9 @@ xfs_qm_qino_alloc(
 		return (error);
 	}
 	memset(&zerocr, 0, sizeof(zerocr));
+	memset(&zeroino, 0, sizeof(zeroino));
 
-	if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
+	if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0,
 				   &zerocr, 0, 1, ip, &committed))) {
 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
 				 XFS_TRANS_ABORT);
@@ -1918,9 +1920,7 @@ xfs_qm_quotacheck(
 	 * at this point (because we intentionally didn't in dqget_noattach).
 	 */
 	if (error) {
-		xfs_qm_dqpurge_all(mp,
-				   XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
-				   XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
+		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
 		goto error_return;
 	}
 	/*
@@ -2743,6 +2743,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(udqp);
 		ASSERT(ip->i_udquot == NULL);
 		ip->i_udquot = udqp;
+		ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
@@ -2752,7 +2753,10 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
 		xfs_dqunlock(gdqp);
 		ASSERT(ip->i_gdquot == NULL);
 		ip->i_gdquot = gdqp;
-		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
+		ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
+		ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+			ip->i_d.di_gid : ip->i_d.di_projid) ==
+				be32_to_cpu(gdqp->q_core.d_id));
 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
 	}
 }

+ 25 - 35
fs/xfs/support/debug.c

@@ -27,44 +27,11 @@ static DEFINE_SPINLOCK(xfs_err_lock);
 /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
 #define XFS_MAX_ERR_LEVEL	7
 #define XFS_ERR_MASK		((1 << 3) - 1)
-static char		*err_level[XFS_MAX_ERR_LEVEL+1] =
+static const char * const	err_level[XFS_MAX_ERR_LEVEL+1] =
 					{KERN_EMERG, KERN_ALERT, KERN_CRIT,
 					 KERN_ERR, KERN_WARNING, KERN_NOTICE,
 					 KERN_INFO, KERN_DEBUG};
 
-void
-assfail(char *a, char *f, int l)
-{
-    printk("XFS assertion failed: %s, file: %s, line: %d\n", a, f, l);
-    BUG();
-}
-
-#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
-
-unsigned long
-random(void)
-{
-	static unsigned long	RandomValue = 1;
-	/* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
-	register long	rv = RandomValue;
-	register long	lo;
-	register long	hi;
-
-	hi = rv / 127773;
-	lo = rv % 127773;
-	rv = 16807 * lo - 2836 * hi;
-	if( rv <= 0 ) rv += 2147483647;
-	return( RandomValue = rv );
-}
-
-int
-get_thread_id(void)
-{
-	return current->pid;
-}
-
-#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */
-
 void
 cmn_err(register int level, char *fmt, ...)
 {
@@ -90,7 +57,6 @@ cmn_err(register int level, char *fmt, ...)
 		BUG();
 }
 
-
 void
 icmn_err(register int level, char *fmt, va_list ap)
 {
@@ -109,3 +75,27 @@ icmn_err(register int level, char *fmt, va_list ap)
 	if (level == CE_PANIC)
 		BUG();
 }
+
+void
+assfail(char *expr, char *file, int line)
+{
+	printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line);
+	BUG();
+}
+
+#if ((defined(DEBUG) || defined(INDUCE_IO_ERRROR)) && !defined(NO_WANT_RANDOM))
+unsigned long random(void)
+{
+	static unsigned long	RandomValue = 1;
+	/* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
+	register long	rv = RandomValue;
+	register long	lo;
+	register long	hi;
+
+	hi = rv / 127773;
+	lo = rv % 127773;
+	rv = 16807 * lo - 2836 * hi;
+	if (rv <= 0) rv += 2147483647;
+	return RandomValue = rv;
+}
+#endif /* DEBUG || INDUCE_IO_ERRROR || !NO_WANT_RANDOM */

+ 12 - 13
fs/xfs/support/debug.h

@@ -31,24 +31,23 @@ extern void icmn_err(int, char *, va_list)
 	__attribute__ ((format (printf, 2, 0)));
 extern void cmn_err(int, char *, ...)
 	__attribute__ ((format (printf, 2, 3)));
+extern void assfail(char *expr, char *f, int l);
 
-#ifndef STATIC
-# define STATIC static
-#endif
+#define prdev(fmt,targ,args...) \
+	printk("Device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
 
-#ifdef DEBUG
-# define ASSERT(EX)	((EX) ? ((void)0) : assfail(#EX, __FILE__, __LINE__))
-#else
-# define ASSERT(x)	((void)0)
-#endif
+#define ASSERT_ALWAYS(expr)	\
+	(unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
 
-extern void assfail(char *, char *, int);
-#ifdef DEBUG
+#ifndef DEBUG
+# define ASSERT(expr)	((void)0)
+#else
+# define ASSERT(expr)	ASSERT_ALWAYS(expr)
 extern unsigned long random(void);
-extern int get_thread_id(void);
 #endif
 
-#define ASSERT_ALWAYS(EX)  ((EX)?((void)0):assfail(#EX, __FILE__, __LINE__))
-#define	debug_stop_all_cpus(param)	/* param is "cpumask_t *" */
+#ifndef STATIC
+# define STATIC static
+#endif
 
 #endif  /* __XFS_SUPPORT_DEBUG_H__ */

+ 14 - 9
fs/xfs/support/uuid.c

@@ -27,6 +27,16 @@ uuid_init(void)
 	mutex_init(&uuid_monitor);
 }
 
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+	__be32	uu_timelow;
+	__be16	uu_timemid;
+	__be16	uu_timehi;
+	__be16	uu_clockseq;
+	__be16	uu_node[3];
+} xfs_uu_t;
+
 /*
  * uuid_getnodeuniq - obtain the node unique fields of a UUID.
  *
@@ -36,16 +46,11 @@ uuid_init(void)
 void
 uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
 {
-	char	*uu = (char *)uuid;
-
-	/* on IRIX, this function assumes big-endian fields within
-	 * the uuid, so we use INT_GET to get the same result on
-	 * little-endian systems
-	 */
+	xfs_uu_t *uup = (xfs_uu_t *)uuid;
 
-	fsid[0] = (INT_GET(*(u_int16_t*)(uu+8), ARCH_CONVERT) << 16) +
-		   INT_GET(*(u_int16_t*)(uu+4), ARCH_CONVERT);
-	fsid[1] =  INT_GET(*(u_int32_t*)(uu  ), ARCH_CONVERT);
+	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+		   be16_to_cpu(uup->uu_timemid);
+	fsid[1] = be16_to_cpu(uup->uu_timelow);
 }
 
 void

+ 19 - 3
fs/xfs/xfs_arch.h

@@ -40,6 +40,22 @@
 #undef XFS_NATIVE_HOST
 #endif
 
+#ifdef XFS_NATIVE_HOST
+#define cpu_to_be16(val)	((__be16)(val))
+#define cpu_to_be32(val)	((__be32)(val))
+#define cpu_to_be64(val)	((__be64)(val))
+#define be16_to_cpu(val)	((__uint16_t)(val))
+#define be32_to_cpu(val)	((__uint32_t)(val))
+#define be64_to_cpu(val)	((__uint64_t)(val))
+#else
+#define cpu_to_be16(val)	(__swab16((__uint16_t)(val)))
+#define cpu_to_be32(val)	(__swab32((__uint32_t)(val)))
+#define cpu_to_be64(val)	(__swab64((__uint64_t)(val)))
+#define be16_to_cpu(val)	(__swab16((__be16)(val)))
+#define be32_to_cpu(val)	(__swab32((__be32)(val)))
+#define be64_to_cpu(val)	(__swab64((__be64)(val)))
+#endif
+
 #endif	/* __KERNEL__ */
 
 /* do we need conversion? */
@@ -186,7 +202,7 @@ static inline void be64_add(__be64 *a, __s64 b)
  */ 
 
 #define XFS_GET_DIR_INO4(di) \
-	(((u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+	(((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 
 #define XFS_PUT_DIR_INO4(from, di) \
 do { \
@@ -197,9 +213,9 @@ do { \
 } while (0)
 
 #define XFS_DI_HI(di) \
-	(((u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
+	(((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
 #define XFS_DI_LO(di) \
-	(((u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
+	(((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
 
 #define XFS_GET_DIR_INO8(di)        \
 	(((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \

+ 6 - 6
fs/xfs/xfs_attr_leaf.c

@@ -128,7 +128,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
 		return (offset >= minforkoff) ? minforkoff : 0;
 	}
 
-	if (unlikely(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+	if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
 		if (bytes <= XFS_IFORK_ASIZE(dp))
 			return mp->m_attroffset >> 3;
 		return 0;
@@ -157,7 +157,7 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
 	unsigned long s;
 
-	if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+	if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
 	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
 		s = XFS_SB_LOCK(mp);
 		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
@@ -311,7 +311,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 	 */
 	totsize -= size;
 	if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
-	    !(mp->m_flags & XFS_MOUNT_COMPAT_ATTR)) {
+	    (mp->m_flags & XFS_MOUNT_ATTR2)) {
 		/*
 		 * Last attribute now removed, revert to original
 		 * inode format making all literal area available
@@ -330,7 +330,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
 		dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
 		ASSERT(dp->i_d.di_forkoff);
 		ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
-			(mp->m_flags & XFS_MOUNT_COMPAT_ATTR));
+			!(mp->m_flags & XFS_MOUNT_ATTR2));
 		dp->i_afp->if_ext_max =
 			XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
 		dp->i_df.if_ext_max =
@@ -739,7 +739,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
 				+ name_loc->namelen
 				+ INT_GET(name_loc->valuelen, ARCH_CONVERT);
 	}
-	if (!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR) &&
+	if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
 	    (bytes == sizeof(struct xfs_attr_sf_hdr)))
 		return(-1);
 	return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -778,7 +778,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
 		goto out;
 
 	if (forkoff == -1) {
-		ASSERT(!(dp->i_mount->m_flags & XFS_MOUNT_COMPAT_ATTR));
+		ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
 
 		/*
 		 * Last attribute was removed, revert to original

+ 43 - 36
fs/xfs/xfs_attr_leaf.h

@@ -63,7 +63,7 @@ struct xfs_trans;
  * the leaf_entry.  The namespaces are independent only because we also look
  * at the namespace bit when we are looking for a matching attribute name.
  *
- * We also store a "incomplete" bit in the leaf_entry.  It shows that an
+ * We also store an "incomplete" bit in the leaf_entry.  It shows that an
  * attribute is in the middle of being created and should not be shown to
  * the user if we crash during the time that the bit is set.  We clear the
  * bit when we have finished setting up the attribute.  We do this because
@@ -72,42 +72,48 @@ struct xfs_trans;
  */
 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
 
+typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
+	__uint16_t	base;	 	/* base of free region */
+	__uint16_t	size;	  	/* length of free region */
+} xfs_attr_leaf_map_t;
+
+typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
+	xfs_da_blkinfo_t info;		/* block type, links, etc. */
+	__uint16_t	count;		/* count of active leaf_entry's */
+	__uint16_t	usedbytes;	/* num bytes of names/values stored */
+	__uint16_t	firstused;	/* first used byte in name area */
+	__uint8_t	holes;		/* != 0 if blk needs compaction */
+	__uint8_t	pad1;
+	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
+					/* N largest free regions */
+} xfs_attr_leaf_hdr_t;
+
+typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
+	xfs_dahash_t	hashval;	/* hash value of name */
+	__uint16_t	nameidx;	/* index into buffer of name/value */
+	__uint8_t	flags;		/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
+	__uint8_t	pad2;		/* unused pad byte */
+} xfs_attr_leaf_entry_t;
+
+typedef struct xfs_attr_leaf_name_local {
+	__uint16_t	valuelen;	/* number of bytes in value */
+	__uint8_t	namelen;	/* length of name bytes */
+	__uint8_t	nameval[1];	/* name/value bytes */
+} xfs_attr_leaf_name_local_t;
+
+typedef struct xfs_attr_leaf_name_remote {
+	xfs_dablk_t	valueblk;	/* block number of value bytes */
+	__uint32_t	valuelen;	/* number of bytes in value */
+	__uint8_t	namelen;	/* length of name bytes */
+	__uint8_t	name[1];	/* name bytes */
+} xfs_attr_leaf_name_remote_t;
+
 typedef struct xfs_attr_leafblock {
-	struct xfs_attr_leaf_hdr {	/* constant-structure header block */
-		xfs_da_blkinfo_t info;	/* block type, links, etc. */
-		__uint16_t count;	/* count of active leaf_entry's */
-		__uint16_t usedbytes;	/* num bytes of names/values stored */
-		__uint16_t firstused;	/* first used byte in name area */
-		__uint8_t  holes;	/* != 0 if blk needs compaction */
-		__uint8_t  pad1;
-		struct xfs_attr_leaf_map {	  /* RLE map of free bytes */
-			__uint16_t base;	  /* base of free region */
-			__uint16_t size;	  /* length of free region */
-		} freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */
-	} hdr;
-	struct xfs_attr_leaf_entry {	/* sorted on key, not name */
-		xfs_dahash_t hashval;	/* hash value of name */
-		__uint16_t nameidx;	/* index into buffer of name/value */
-		__uint8_t flags;	/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
-		__uint8_t pad2;		/* unused pad byte */
-	} entries[1];			/* variable sized array */
-	struct xfs_attr_leaf_name_local {
-		__uint16_t valuelen;	/* number of bytes in value */
-		__uint8_t namelen;	/* length of name bytes */
-		__uint8_t nameval[1];	/* name/value bytes */
-	} namelist;			/* grows from bottom of buf */
-	struct xfs_attr_leaf_name_remote {
-		xfs_dablk_t valueblk;	/* block number of value bytes */
-		__uint32_t valuelen;	/* number of bytes in value */
-		__uint8_t namelen;	/* length of name bytes */
-		__uint8_t name[1];	/* name bytes */
-	} valuelist;			/* grows from bottom of buf */
+	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */
+	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */
+	xfs_attr_leaf_name_local_t namelist;	/* grows from bottom of buf */
+	xfs_attr_leaf_name_remote_t valuelist;	/* grows from bottom of buf */
 } xfs_attr_leafblock_t;
-typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t;
-typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t;
-typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t;
-typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t;
-typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t;
 
 /*
  * Flags used in the leaf_entry[i].flags field.
@@ -150,7 +156,8 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
 		(leafp))[INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT)];
 }
 
-#define XFS_ATTR_LEAF_NAME(leafp,idx)		xfs_attr_leaf_name(leafp,idx)
+#define XFS_ATTR_LEAF_NAME(leafp,idx)		\
+	xfs_attr_leaf_name(leafp,idx)
 static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
 {
 	return (&((char *)

+ 251 - 161
fs/xfs/xfs_bmap.c

@@ -2146,13 +2146,176 @@ xfs_bmap_add_extent_hole_real(
 	return 0; /* keep gcc quite */
 }
 
+/*
+ * Adjust the size of the new extent based on di_extsize and rt extsize.
+ */
+STATIC int
+xfs_bmap_extsize_align(
+	xfs_mount_t	*mp,
+	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
+	xfs_bmbt_irec_t	*prevp,		/* previous extent pointer */
+	xfs_extlen_t	extsz,		/* align to this extent size */
+	int		rt,		/* is this a realtime inode? */
+	int		eof,		/* is extent at end-of-file? */
+	int		delay,		/* creating delalloc extent? */
+	int		convert,	/* overwriting unwritten extent? */
+	xfs_fileoff_t	*offp,		/* in/out: aligned offset */
+	xfs_extlen_t	*lenp)		/* in/out: aligned length */
+{
+	xfs_fileoff_t	orig_off;	/* original offset */
+	xfs_extlen_t	orig_alen;	/* original length */
+	xfs_fileoff_t	orig_end;	/* original off+len */
+	xfs_fileoff_t	nexto;		/* next file offset */
+	xfs_fileoff_t	prevo;		/* previous file offset */
+	xfs_fileoff_t	align_off;	/* temp for offset */
+	xfs_extlen_t	align_alen;	/* temp for length */
+	xfs_extlen_t	temp;		/* temp for calculations */
+
+	if (convert)
+		return 0;
+
+	orig_off = align_off = *offp;
+	orig_alen = align_alen = *lenp;
+	orig_end = orig_off + orig_alen;
+
+	/*
+	 * If this request overlaps an existing extent, then don't
+	 * attempt to perform any additional alignment.
+	 */
+	if (!delay && !eof &&
+	    (orig_off >= gotp->br_startoff) &&
+	    (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
+		return 0;
+	}
+
+	/*
+	 * If the file offset is unaligned vs. the extent size
+	 * we need to align it.  This will be possible unless
+	 * the file was previously written with a kernel that didn't
+	 * perform this alignment, or if a truncate shot us in the
+	 * foot.
+	 */
+	temp = do_mod(orig_off, extsz);
+	if (temp) {
+		align_alen += temp;
+		align_off -= temp;
+	}
+	/*
+	 * Same adjustment for the end of the requested area.
+	 */
+	if ((temp = (align_alen % extsz))) {
+		align_alen += extsz - temp;
+	}
+	/*
+	 * If the previous block overlaps with this proposed allocation
+	 * then move the start forward without adjusting the length.
+	 */
+	if (prevp->br_startoff != NULLFILEOFF) {
+		if (prevp->br_startblock == HOLESTARTBLOCK)
+			prevo = prevp->br_startoff;
+		else
+			prevo = prevp->br_startoff + prevp->br_blockcount;
+	} else
+		prevo = 0;
+	if (align_off != orig_off && align_off < prevo)
+		align_off = prevo;
+	/*
+	 * If the next block overlaps with this proposed allocation
+	 * then move the start back without adjusting the length,
+	 * but not before offset 0.
+	 * This may of course make the start overlap previous block,
+	 * and if we hit the offset 0 limit then the next block
+	 * can still overlap too.
+	 */
+	if (!eof && gotp->br_startoff != NULLFILEOFF) {
+		if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
+		    (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
+			nexto = gotp->br_startoff + gotp->br_blockcount;
+		else
+			nexto = gotp->br_startoff;
+	} else
+		nexto = NULLFILEOFF;
+	if (!eof &&
+	    align_off + align_alen != orig_end &&
+	    align_off + align_alen > nexto)
+		align_off = nexto > align_alen ? nexto - align_alen : 0;
+	/*
+	 * If we're now overlapping the next or previous extent that
+	 * means we can't fit an extsz piece in this hole.  Just move
+	 * the start forward to the first valid spot and set
+	 * the length so we hit the end.
+	 */
+	if (align_off != orig_off && align_off < prevo)
+		align_off = prevo;
+	if (align_off + align_alen != orig_end &&
+	    align_off + align_alen > nexto &&
+	    nexto != NULLFILEOFF) {
+		ASSERT(nexto > prevo);
+		align_alen = nexto - align_off;
+	}
+
+	/*
+	 * If realtime, and the result isn't a multiple of the realtime
+	 * extent size we need to remove blocks until it is.
+	 */
+	if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+		/*
+		 * We're not covering the original request, or
+		 * we won't be able to once we fix the length.
+		 */
+		if (orig_off < align_off ||
+		    orig_end > align_off + align_alen ||
+		    align_alen - temp < orig_alen)
+			return XFS_ERROR(EINVAL);
+		/*
+		 * Try to fix it by moving the start up.
+		 */
+		if (align_off + temp <= orig_off) {
+			align_alen -= temp;
+			align_off += temp;
+		}
+		/*
+		 * Try to fix it by moving the end in.
+		 */
+		else if (align_off + align_alen - temp >= orig_end)
+			align_alen -= temp;
+		/*
+		 * Set the start to the minimum then trim the length.
+		 */
+		else {
+			align_alen -= orig_off - align_off;
+			align_off = orig_off;
+			align_alen -= align_alen % mp->m_sb.sb_rextsize;
+		}
+		/*
+		 * Result doesn't cover the request, fail it.
+		 */
+		if (orig_off < align_off || orig_end > align_off + align_alen)
+			return XFS_ERROR(EINVAL);
+	} else {
+		ASSERT(orig_off >= align_off);
+		ASSERT(orig_end <= align_off + align_alen);
+	}
+
+#ifdef DEBUG
+	if (!eof && gotp->br_startoff != NULLFILEOFF)
+		ASSERT(align_off + align_alen <= gotp->br_startoff);
+	if (prevp->br_startoff != NULLFILEOFF)
+		ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
+#endif
+
+	*lenp = align_alen;
+	*offp = align_off;
+	return 0;
+}
+
 #define XFS_ALLOC_GAP_UNITS	4
 
 /*
  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
  * It figures out where to ask the underlying allocator to put the new extent.
  */
-STATIC int				/* error */
+STATIC int
 xfs_bmap_alloc(
 	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
 {
@@ -2163,10 +2326,10 @@ xfs_bmap_alloc(
 	xfs_mount_t	*mp;		/* mount point structure */
 	int		nullfb;		/* true if ap->firstblock isn't set */
 	int		rt;		/* true if inode is realtime */
-#ifdef __KERNEL__
-	xfs_extlen_t	prod=0;		/* product factor for allocators */
-	xfs_extlen_t	ralen=0;	/* realtime allocation length */
-#endif
+	xfs_extlen_t	prod = 0;	/* product factor for allocators */
+	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_rtblock_t	rtx;
 
 #define	ISVALID(x,y)	\
 	(rt ? \
@@ -2182,125 +2345,25 @@ xfs_bmap_alloc(
 	nullfb = ap->firstblock == NULLFSBLOCK;
 	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
-#ifdef __KERNEL__
 	if (rt) {
-		xfs_extlen_t	extsz;		/* file extent size for rt */
-		xfs_fileoff_t	nexto;		/* next file offset */
-		xfs_extlen_t	orig_alen;	/* original ap->alen */
-		xfs_fileoff_t	orig_end;	/* original off+len */
-		xfs_fileoff_t	orig_off;	/* original ap->off */
-		xfs_extlen_t	mod_off;	/* modulus calculations */
-		xfs_fileoff_t	prevo;		/* previous file offset */
-		xfs_rtblock_t	rtx;		/* realtime extent number */
-		xfs_extlen_t	temp;		/* temp for rt calculations */
-
-		/*
-		 * Set prod to match the realtime extent size.
-		 */
-		if (!(extsz = ap->ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-		prod = extsz / mp->m_sb.sb_rextsize;
-		orig_off = ap->off;
-		orig_alen = ap->alen;
-		orig_end = orig_off + orig_alen;
-		/*
-		 * If the file offset is unaligned vs. the extent size
-		 * we need to align it.  This will be possible unless
-		 * the file was previously written with a kernel that didn't
-		 * perform this alignment.
-		 */
-		mod_off = do_mod(orig_off, extsz);
-		if (mod_off) {
-			ap->alen += mod_off;
-			ap->off -= mod_off;
-		}
-		/*
-		 * Same adjustment for the end of the requested area.
-		 */
-		if ((temp = (ap->alen % extsz)))
-			ap->alen += extsz - temp;
-		/*
-		 * If the previous block overlaps with this proposed allocation
-		 * then move the start forward without adjusting the length.
-		 */
-		prevo =
-			ap->prevp->br_startoff == NULLFILEOFF ?
-				0 :
-				(ap->prevp->br_startoff +
-				 ap->prevp->br_blockcount);
-		if (ap->off != orig_off && ap->off < prevo)
-			ap->off = prevo;
-		/*
-		 * If the next block overlaps with this proposed allocation
-		 * then move the start back without adjusting the length,
-		 * but not before offset 0.
-		 * This may of course make the start overlap previous block,
-		 * and if we hit the offset 0 limit then the next block
-		 * can still overlap too.
-		 */
-		nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
-			NULLFILEOFF : ap->gotp->br_startoff;
-		if (!ap->eof &&
-		    ap->off + ap->alen != orig_end &&
-		    ap->off + ap->alen > nexto)
-			ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
-		/*
-		 * If we're now overlapping the next or previous extent that
-		 * means we can't fit an extsz piece in this hole.  Just move
-		 * the start forward to the first valid spot and set
-		 * the length so we hit the end.
-		 */
-		if ((ap->off != orig_off && ap->off < prevo) ||
-		    (ap->off + ap->alen != orig_end &&
-		     ap->off + ap->alen > nexto)) {
-			ap->off = prevo;
-			ap->alen = nexto - prevo;
-		}
-		/*
-		 * If the result isn't a multiple of rtextents we need to
-		 * remove blocks until it is.
-		 */
-		if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
-			/*
-			 * We're not covering the original request, or
-			 * we won't be able to once we fix the length.
-			 */
-			if (orig_off < ap->off ||
-			    orig_end > ap->off + ap->alen ||
-			    ap->alen - temp < orig_alen)
-				return XFS_ERROR(EINVAL);
-			/*
-			 * Try to fix it by moving the start up.
-			 */
-			if (ap->off + temp <= orig_off) {
-				ap->alen -= temp;
-				ap->off += temp;
-			}
-			/*
-			 * Try to fix it by moving the end in.
-			 */
-			else if (ap->off + ap->alen - temp >= orig_end)
-				ap->alen -= temp;
-			/*
-			 * Set the start to the minimum then trim the length.
-			 */
-			else {
-				ap->alen -= orig_off - ap->off;
-				ap->off = orig_off;
-				ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
-			}
-			/*
-			 * Result doesn't cover the request, fail it.
-			 */
-			if (orig_off < ap->off || orig_end > ap->off + ap->alen)
-				return XFS_ERROR(EINVAL);
-		}
+		align = ap->ip->i_d.di_extsize ?
+			ap->ip->i_d.di_extsize : mp->m_sb.sb_rextsize;
+		/* Set prod to match the extent size */
+		prod = align / mp->m_sb.sb_rextsize;
+
+		error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+						align, rt, ap->eof, 0,
+						ap->conv, &ap->off, &ap->alen);
+		if (error)
+			return error;
+		ASSERT(ap->alen);
 		ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+
 		/*
 		 * If the offset & length are not perfectly aligned
 		 * then kill prod, it will just get us in trouble.
 		 */
-		if (do_mod(ap->off, extsz) || ap->alen % extsz)
+		if (do_mod(ap->off, align) || ap->alen % align)
 			prod = 1;
 		/*
 		 * Set ralen to be the actual requested length in rtextents.
@@ -2326,15 +2389,24 @@ xfs_bmap_alloc(
 			ap->rval = rtx * mp->m_sb.sb_rextsize;
 		} else
 			ap->rval = 0;
+	} else {
+		align = (ap->userdata && ap->ip->i_d.di_extsize &&
+			(ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)) ?
+			ap->ip->i_d.di_extsize : 0;
+		if (unlikely(align)) {
+			error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+							align, rt,
+							ap->eof, 0, ap->conv,
+							&ap->off, &ap->alen);
+			ASSERT(!error);
+			ASSERT(ap->alen);
+		}
+		if (nullfb)
+			ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+		else
+			ap->rval = ap->firstblock;
 	}
-#else
-	if (rt)
-		ap->rval = 0;
-#endif	/* __KERNEL__ */
-	else if (nullfb)
-		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
-	else
-		ap->rval = ap->firstblock;
+
 	/*
 	 * If allocating at eof, and there's a previous real block,
 	 * try to use it's last block as our starting point.
@@ -2598,11 +2670,12 @@ xfs_bmap_alloc(
 			args.total = ap->total;
 			args.minlen = ap->minlen;
 		}
-		if (ap->ip->i_d.di_extsize) {
+		if (unlikely(ap->userdata && ap->ip->i_d.di_extsize &&
+			    (ap->ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE))) {
 			args.prod = ap->ip->i_d.di_extsize;
 			if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
 				args.mod = (xfs_extlen_t)(args.prod - args.mod);
-		} else if (mp->m_sb.sb_blocksize >= NBPP) {
+		} else if (unlikely(mp->m_sb.sb_blocksize >= NBPP)) {
 			args.prod = 1;
 			args.mod = 0;
 		} else {
@@ -3580,14 +3653,16 @@ xfs_bmap_search_extents(
 
 	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
 					  lastxp, gotp, prevp);
-	rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;
-	if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {
+	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+	if (unlikely(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM))) {
                 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
 			"start_block : %llx start_off : %llx blkcnt : %llx "
 			"extent-state : %x \n",
-			(ip->i_mount)->m_fsname,(long long)ip->i_ino,
-			gotp->br_startblock, gotp->br_startoff,
-			gotp->br_blockcount,gotp->br_state);
+			(ip->i_mount)->m_fsname, (long long)ip->i_ino,
+			(unsigned long long)gotp->br_startblock,
+			(unsigned long long)gotp->br_startoff,
+			(unsigned long long)gotp->br_blockcount,
+			gotp->br_state);
         }
         return ep;
 }
@@ -3875,7 +3950,7 @@ xfs_bmap_add_attrfork(
 		ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
 		if (!ip->i_d.di_forkoff)
 			ip->i_d.di_forkoff = mp->m_attroffset >> 3;
-		else if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
+		else if (mp->m_flags & XFS_MOUNT_ATTR2)
 			version = 2;
 		break;
 	default:
@@ -4023,13 +4098,13 @@ xfs_bmap_compute_maxlevels(
 	 */
 	if (whichfork == XFS_DATA_FORK) {
 		maxleafents = MAXEXTNUM;
-		sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
-			mp->m_attroffset : XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+		sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+			XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset;
 	} else {
 		maxleafents = MAXAEXTNUM;
-		sz = (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) ?
-			mp->m_sb.sb_inodesize - mp->m_attroffset :
-			XFS_BMDR_SPACE_CALC(MINABTPTRS);
+		sz = (mp->m_flags & XFS_MOUNT_ATTR2) ?
+			XFS_BMDR_SPACE_CALC(MINABTPTRS) :
+			mp->m_sb.sb_inodesize - mp->m_attroffset;
 	}
 	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
 	minleafrecs = mp->m_bmap_dmnr[0];
@@ -4418,8 +4493,8 @@ xfs_bmap_read_extents(
 		num_recs = be16_to_cpu(block->bb_numrecs);
 		if (unlikely(i + num_recs > room)) {
 			ASSERT(i + num_recs <= room);
-			xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-				"corrupt dinode %Lu, (btree extents).  Unmount and run xfs_repair.",
+			xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+				"corrupt dinode %Lu, (btree extents).",
 				(unsigned long long) ip->i_ino);
 			XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
 					 XFS_ERRLEVEL_LOW,
@@ -4590,6 +4665,7 @@ xfs_bmapi(
 	char		contig;		/* allocation must be one extent */
 	char		delay;		/* this request is for delayed alloc */
 	char		exact;		/* don't do all of wasdelayed extent */
+	char		convert;	/* unwritten extent I/O completion */
 	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
 	int		error;		/* error return */
 	xfs_bmbt_irec_t	got;		/* current extent list record */
@@ -4643,7 +4719,7 @@ xfs_bmapi(
 	}
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
-	rt = XFS_IS_REALTIME_INODE(ip);
+	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(ifp->if_ext_max ==
 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
@@ -4654,6 +4730,7 @@ xfs_bmapi(
 	delay = (flags & XFS_BMAPI_DELAY) != 0;
 	trim = (flags & XFS_BMAPI_ENTIRE) == 0;
 	userdata = (flags & XFS_BMAPI_METADATA) == 0;
+	convert = (flags & XFS_BMAPI_CONVERT) != 0;
 	exact = (flags & XFS_BMAPI_EXACT) != 0;
 	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
 	contig = (flags & XFS_BMAPI_CONTIG) != 0;
@@ -4748,15 +4825,25 @@ xfs_bmapi(
 			}
 			minlen = contig ? alen : 1;
 			if (delay) {
-				xfs_extlen_t	extsz = 0;
+				xfs_extlen_t	extsz;
 
 				/* Figure out the extent size, adjust alen */
 				if (rt) {
 					if (!(extsz = ip->i_d.di_extsize))
 						extsz = mp->m_sb.sb_rextsize;
-					alen = roundup(alen, extsz);
-					extsz = alen / mp->m_sb.sb_rextsize;
+				} else {
+					extsz = ip->i_d.di_extsize;
 				}
+				if (extsz) {
+					error = xfs_bmap_extsize_align(mp,
+							&got, &prev, extsz,
+							rt, eof, delay, convert,
+							&aoff, &alen);
+					ASSERT(!error);
+				}
+
+				if (rt)
+					extsz = alen / mp->m_sb.sb_rextsize;
 
 				/*
 				 * Make a transaction-less quota reservation for
@@ -4785,32 +4872,33 @@ xfs_bmapi(
 					xfs_bmap_worst_indlen(ip, alen);
 				ASSERT(indlen > 0);
 
-				if (rt)
+				if (rt) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
 							-(extsz), rsvd);
-				else
+				} else {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
 							-(alen), rsvd);
+				}
 				if (!error) {
 					error = xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
 							-(indlen), rsvd);
-					if (error && rt) {
-						xfs_mod_incore_sb(ip->i_mount,
+					if (error && rt)
+						xfs_mod_incore_sb(mp,
 							XFS_SBS_FREXTENTS,
 							extsz, rsvd);
-					} else if (error) {
-						xfs_mod_incore_sb(ip->i_mount,
+					else if (error)
+						xfs_mod_incore_sb(mp,
 							XFS_SBS_FDBLOCKS,
 							alen, rsvd);
-					}
 				}
 
 				if (error) {
-					if (XFS_IS_QUOTA_ON(ip->i_mount))
+					if (XFS_IS_QUOTA_ON(mp))
 						/* unreserve the blocks now */
+						(void)
 						XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
 							mp, NULL, ip,
 							(long)alen, 0, rt ?
@@ -4849,6 +4937,7 @@ xfs_bmapi(
 				bma.firstblock = *firstblock;
 				bma.alen = alen;
 				bma.off = aoff;
+				bma.conv = convert;
 				bma.wasdel = wasdelay;
 				bma.minlen = minlen;
 				bma.low = flist->xbf_low;
@@ -5270,8 +5359,7 @@ xfs_bunmapi(
 		return 0;
 	}
 	XFS_STATS_INC(xs_blk_unmap);
-	isrt = (whichfork == XFS_DATA_FORK) &&
-	       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
+	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
 	start = bno;
 	bno = start + len - 1;
 	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
@@ -5443,7 +5531,7 @@ xfs_bunmapi(
 		}
 		if (wasdel) {
 			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
-			/* Update realtim/data freespace, unreserve quota */
+			/* Update realtime/data freespace, unreserve quota */
 			if (isrt) {
 				xfs_filblks_t rtexts;
 
@@ -5451,14 +5539,14 @@ xfs_bunmapi(
 				do_div(rtexts, mp->m_sb.sb_rextsize);
 				xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
 						(int)rtexts, rsvd);
-				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
-					-((long)del.br_blockcount), 0,
+				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+					NULL, ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_RTBLKS);
 			} else {
 				xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
 						(int)del.br_blockcount, rsvd);
-				XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, NULL, ip,
-					-((long)del.br_blockcount), 0,
+				(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
+					NULL, ip, -((long)del.br_blockcount), 0,
 					XFS_QMOPT_RES_REGBLKS);
 			}
 			ip->i_delayed_blks -= del.br_blockcount;
@@ -5652,7 +5740,9 @@ xfs_getbmap(
 		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
 		return XFS_ERROR(EINVAL);
 	if (whichfork == XFS_DATA_FORK) {
-		if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
+		if ((ip->i_d.di_extsize && (ip->i_d.di_flags &
+				(XFS_DIFLAG_REALTIME|XFS_DIFLAG_EXTSIZE))) ||
+		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
 			prealloced = 1;
 			fixlen = XFS_MAXIOFFSET(mp);
 		} else {

+ 6 - 1
fs/xfs/xfs_bmap.h

@@ -62,6 +62,10 @@ typedef	struct xfs_bmap_free
 #define	XFS_BMAPI_IGSTATE	0x200	/* Ignore state - */
 					/* combine contig. space */
 #define	XFS_BMAPI_CONTIG	0x400	/* must allocate only one extent */
+/*	XFS_BMAPI_DIRECT_IO	0x800	*/
+#define XFS_BMAPI_CONVERT	0x1000	/* unwritten extent conversion - */
+					/* need write cache flushing and no */
+					/* additional allocation alignments */
 
 #define	XFS_BMAPI_AFLAG(w)	xfs_bmapi_aflag(w)
 static inline int xfs_bmapi_aflag(int w)
@@ -101,7 +105,8 @@ typedef struct xfs_bmalloca {
 	char			wasdel;	/* replacing a delayed allocation */
 	char			userdata;/* set if is user data */
 	char			low;	/* low on space, using seq'l ags */
-	char			aeof;   /* allocated space at eof */
+	char			aeof;	/* allocated space at eof */
+	char			conv;	/* overwriting unwritten extents */
 } xfs_bmalloca_t;
 
 #ifdef __KERNEL__

+ 1 - 1
fs/xfs/xfs_clnt.h

@@ -57,7 +57,7 @@ struct xfs_mount_args {
 /*
  * XFS mount option flags -- args->flags1
  */
-#define	XFSMNT_COMPAT_ATTR	0x00000001	/* do not use ATTR2 format */
+#define	XFSMNT_ATTR2		0x00000001	/* allow ATTR2 EA format */
 #define	XFSMNT_WSYNC		0x00000002	/* safe mode nfs mount
 						 * compatible */
 #define	XFSMNT_INO64		0x00000004	/* move inode numbers up

+ 4 - 12
fs/xfs/xfs_dfrag.c

@@ -60,8 +60,6 @@ xfs_swapext(
 	xfs_bstat_t	*sbp;
 	struct file	*fp = NULL, *tfp = NULL;
 	vnode_t		*vp, *tvp;
-	bhv_desc_t      *bdp, *tbdp;
-	vn_bhv_head_t   *bhp, *tbhp;
 	static uint	lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL;
 	int		ilf_fields, tilf_fields;
 	int		error = 0;
@@ -90,13 +88,10 @@ xfs_swapext(
 		goto error0;
 	}
 
-	bhp = VN_BHV_HEAD(vp);
-	bdp = vn_bhv_lookup(bhp, &xfs_vnodeops);
-	if (bdp == NULL) {
+	ip = xfs_vtoi(vp);
+	if (ip == NULL) {
 		error = XFS_ERROR(EBADF);
 		goto error0;
-	} else {
-		ip = XFS_BHVTOI(bdp);
 	}
 
 	if (((tfp = fget((int)sxp->sx_fdtmp)) == NULL) ||
@@ -105,13 +100,10 @@ xfs_swapext(
 		goto error0;
 	}
 
-	tbhp = VN_BHV_HEAD(tvp);
-	tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops);
-	if (tbdp == NULL) {
+	tip = xfs_vtoi(tvp);
+	if (tip == NULL) {
 		error = XFS_ERROR(EBADF);
 		goto error0;
-	} else {
-		tip = XFS_BHVTOI(tbdp);
 	}
 
 	if (ip->i_mount != tip->i_mount) {

+ 17 - 5
fs/xfs/xfs_dinode.h

@@ -199,10 +199,16 @@ typedef enum xfs_dinode_fmt
 
 #define XFS_DFORK_DSIZE(dip,mp) \
 	XFS_CFORK_DSIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_DSIZE_HOST(dip,mp) \
+	XFS_CFORK_DSIZE(&(dip)->di_core, mp)
 #define XFS_DFORK_ASIZE(dip,mp) \
 	XFS_CFORK_ASIZE_DISK(&(dip)->di_core, mp)
+#define XFS_DFORK_ASIZE_HOST(dip,mp) \
+	XFS_CFORK_ASIZE(&(dip)->di_core, mp)
 #define	XFS_DFORK_SIZE(dip,mp,w) \
 	XFS_CFORK_SIZE_DISK(&(dip)->di_core, mp, w)
+#define	XFS_DFORK_SIZE_HOST(dip,mp,w) \
+	XFS_CFORK_SIZE(&(dip)->di_core, mp, w)
 
 #define	XFS_DFORK_Q(dip)                    XFS_CFORK_Q_DISK(&(dip)->di_core)
 #define	XFS_DFORK_BOFF(dip)		    XFS_CFORK_BOFF_DISK(&(dip)->di_core)
@@ -216,6 +222,7 @@ typedef enum xfs_dinode_fmt
 #define	XFS_CFORK_FMT_SET(dcp,w,n) \
 	((w) == XFS_DATA_FORK ? \
 		((dcp)->di_format = (n)) : ((dcp)->di_aformat = (n)))
+#define	XFS_DFORK_FORMAT(dip,w) XFS_CFORK_FORMAT(&(dip)->di_core, w)
 
 #define	XFS_CFORK_NEXTENTS_DISK(dcp,w) \
 	((w) == XFS_DATA_FORK ? \
@@ -223,13 +230,13 @@ typedef enum xfs_dinode_fmt
 	 	INT_GET((dcp)->di_anextents, ARCH_CONVERT))
 #define XFS_CFORK_NEXTENTS(dcp,w) \
 	((w) == XFS_DATA_FORK ? (dcp)->di_nextents : (dcp)->di_anextents)
+#define	XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
+#define	XFS_DFORK_NEXTENTS_HOST(dip,w) XFS_CFORK_NEXTENTS(&(dip)->di_core, w)
 
 #define	XFS_CFORK_NEXT_SET(dcp,w,n) \
 	((w) == XFS_DATA_FORK ? \
 		((dcp)->di_nextents = (n)) : ((dcp)->di_anextents = (n)))
 
-#define	XFS_DFORK_NEXTENTS(dip,w) XFS_CFORK_NEXTENTS_DISK(&(dip)->di_core, w)
-
 #define	XFS_BUF_TO_DINODE(bp)	((xfs_dinode_t *)XFS_BUF_PTR(bp))
 
 /*
@@ -246,8 +253,10 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_NOATIME_BIT   6	/* do not update atime */
 #define XFS_DIFLAG_NODUMP_BIT    7	/* do not dump */
 #define XFS_DIFLAG_RTINHERIT_BIT 8	/* create with realtime bit set */
-#define XFS_DIFLAG_PROJINHERIT_BIT  9	/* create with parents projid */
-#define XFS_DIFLAG_NOSYMLINKS_BIT  10	/* disallow symlink creation */
+#define XFS_DIFLAG_PROJINHERIT_BIT   9	/* create with parents projid */
+#define XFS_DIFLAG_NOSYMLINKS_BIT   10	/* disallow symlink creation */
+#define XFS_DIFLAG_EXTSIZE_BIT      11	/* inode extent size allocator hint */
+#define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
@@ -259,11 +268,14 @@ typedef enum xfs_dinode_fmt
 #define XFS_DIFLAG_RTINHERIT     (1 << XFS_DIFLAG_RTINHERIT_BIT)
 #define XFS_DIFLAG_PROJINHERIT   (1 << XFS_DIFLAG_PROJINHERIT_BIT)
 #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
+#define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
+#define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
 
 #define XFS_DIFLAG_ANY \
 	(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
 	 XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
 	 XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
-	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS)
+	 XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
+	 XFS_DIFLAG_EXTSZINHERIT)
 
 #endif	/* __XFS_DINODE_H__ */

+ 1 - 1
fs/xfs/xfs_dir.c

@@ -176,7 +176,7 @@ xfs_dir_mount(xfs_mount_t *mp)
 	uint shortcount, leafcount, count;
 
 	mp->m_dirversion = 1;
-	if (mp->m_flags & XFS_MOUNT_COMPAT_ATTR) {
+	if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
 		shortcount = (mp->m_attroffset -
 				(uint)sizeof(xfs_dir_sf_hdr_t)) /
 				 (uint)sizeof(xfs_dir_sf_entry_t);

+ 2 - 0
fs/xfs/xfs_dir.h

@@ -135,6 +135,8 @@ void	xfs_dir_startup(void);	/* called exactly once */
 	((mp)->m_dirops.xd_shortform_to_single(args))
 
 #define	XFS_DIR_IS_V1(mp)	((mp)->m_dirversion == 1)
+#define	XFS_DIR_IS_V2(mp)	((mp)->m_dirversion == 2)
 extern xfs_dirops_t xfsv1_dirops;
+extern xfs_dirops_t xfsv2_dirops;
 
 #endif	/* __XFS_DIR_H__ */

+ 0 - 3
fs/xfs/xfs_dir2.h

@@ -72,9 +72,6 @@ typedef struct xfs_dir2_put_args {
 	struct uio	*uio;		/* uio control structure */
 } xfs_dir2_put_args_t;
 
-#define	XFS_DIR_IS_V2(mp)	((mp)->m_dirversion == 2)
-extern xfs_dirops_t	xfsv2_dirops;
-
 /*
  * Other interfaces used by the rest of the dir v2 code.
  */

+ 34 - 30
fs/xfs/xfs_dir_leaf.h

@@ -67,34 +67,38 @@ struct xfs_trans;
  */
 #define XFS_DIR_LEAF_MAPSIZE	3	/* how many freespace slots */
 
+typedef struct xfs_dir_leaf_map {	/* RLE map of free bytes */
+	__uint16_t	base;	 	/* base of free region */
+	__uint16_t	size; 		/* run length of free region */
+} xfs_dir_leaf_map_t;
+
+typedef struct xfs_dir_leaf_hdr {	/* constant-structure header block */
+	xfs_da_blkinfo_t info;		/* block type, links, etc. */
+	__uint16_t	count;		/* count of active leaf_entry's */
+	__uint16_t	namebytes;	/* num bytes of name strings stored */
+	__uint16_t	firstused;	/* first used byte in name area */
+	__uint8_t	holes;		/* != 0 if blk needs compaction */
+	__uint8_t	pad1;
+	xfs_dir_leaf_map_t freemap[XFS_DIR_LEAF_MAPSIZE];
+} xfs_dir_leaf_hdr_t;
+
+typedef struct xfs_dir_leaf_entry {	/* sorted on key, not name */
+	xfs_dahash_t	hashval;	/* hash value of name */
+	__uint16_t	nameidx;	/* index into buffer of name */
+	__uint8_t	namelen;	/* length of name string */
+	__uint8_t	pad2;
+} xfs_dir_leaf_entry_t;
+
+typedef struct xfs_dir_leaf_name {
+	xfs_dir_ino_t	inumber;	/* inode number for this key */
+	__uint8_t	name[1];	/* name string itself */
+} xfs_dir_leaf_name_t;
+
 typedef struct xfs_dir_leafblock {
-	struct xfs_dir_leaf_hdr {	/* constant-structure header block */
-		xfs_da_blkinfo_t info;	/* block type, links, etc. */
-		__uint16_t count;	/* count of active leaf_entry's */
-		__uint16_t namebytes;	/* num bytes of name strings stored */
-		__uint16_t firstused;	/* first used byte in name area */
-		__uint8_t  holes;	/* != 0 if blk needs compaction */
-		__uint8_t  pad1;
-		struct xfs_dir_leaf_map {/* RLE map of free bytes */
-			__uint16_t base; /* base of free region */
-			__uint16_t size; /* run length of free region */
-		} freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */
-	} hdr;
-	struct xfs_dir_leaf_entry {	/* sorted on key, not name */
-		xfs_dahash_t hashval;	/* hash value of name */
-		__uint16_t nameidx;	/* index into buffer of name */
-		__uint8_t namelen;	/* length of name string */
-		__uint8_t pad2;
-	} entries[1];			/* var sized array */
-	struct xfs_dir_leaf_name {
-		xfs_dir_ino_t inumber;	/* inode number for this key */
-		__uint8_t name[1];	/* name string itself */
-	} namelist[1];			/* grows from bottom of buf */
+	xfs_dir_leaf_hdr_t	hdr;	/* constant-structure header block */
+	xfs_dir_leaf_entry_t	entries[1];	/* var sized array */
+	xfs_dir_leaf_name_t	namelist[1];	/* grows from bottom of buf */
 } xfs_dir_leafblock_t;
-typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t;
-typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t;
-typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t;
-typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t;
 
 /*
  * Length of name for which a 512-byte block filesystem
@@ -126,11 +130,10 @@ typedef union {
 #define	XFS_PUT_COOKIE(c,mp,bno,entry,hash)	\
 	((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash))
 
-typedef struct xfs_dir_put_args
-{
+typedef struct xfs_dir_put_args {
 	xfs_dircook_t	cook;		/* cookie of (next) entry */
 	xfs_intino_t	ino;		/* inode number */
-	struct xfs_dirent	*dbp;		/* buffer pointer */
+	struct xfs_dirent *dbp;		/* buffer pointer */
 	char		*name;		/* directory entry name */
 	int		namelen;	/* length of name */
 	int		done;		/* output: set if value was stored */
@@ -138,7 +141,8 @@ typedef struct xfs_dir_put_args
 	struct uio	*uio;		/* uio control structure */
 } xfs_dir_put_args_t;
 
-#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	xfs_dir_leaf_entsize_byname(len)
+#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len)	\
+	xfs_dir_leaf_entsize_byname(len)
 static inline int xfs_dir_leaf_entsize_byname(int len)
 {
 	return (uint)sizeof(xfs_dir_leaf_name_t)-1 + len;

+ 0 - 1
fs/xfs/xfs_error.c

@@ -54,7 +54,6 @@ xfs_error_trap(int e)
 		if (e != xfs_etrap[i])
 			continue;
 		cmn_err(CE_NOTE, "xfs_error_trap: error %d", e);
-		debug_stop_all_cpus((void *)-1LL);
 		BUG();
 		break;
 	}

+ 4 - 4
fs/xfs/xfs_error.h

@@ -18,9 +18,6 @@
 #ifndef	__XFS_ERROR_H__
 #define	__XFS_ERROR_H__
 
-#define prdev(fmt,targ,args...) \
-	printk("XFS: device %s - " fmt "\n", XFS_BUFTARG_NAME(targ), ## args)
-
 #define XFS_ERECOVER	1	/* Failure to recover log */
 #define XFS_ELOGSTAT	2	/* Failure to stat log in user space */
 #define XFS_ENOLOGSPACE	3	/* Reservation too large */
@@ -182,8 +179,11 @@ extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
 struct xfs_mount;
 /* PRINTFLIKE4 */
 extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
-			    char *fmt, ...);
+			char *fmt, ...);
 /* PRINTFLIKE3 */
 extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
 
+#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
+	xfs_fs_cmn_err(level, mp, fmt "  Unmount and run xfs_repair.", ## args)
+
 #endif	/* __XFS_ERROR_H__ */

+ 6 - 4
fs/xfs/xfs_fs.h

@@ -3,15 +3,15 @@
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it would be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
@@ -65,6 +65,8 @@ struct fsxattr {
 #define XFS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
 #define XFS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
+#define XFS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
+#define XFS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
 #define XFS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
 
 /*

+ 26 - 0
fs/xfs/xfs_fsops.c

@@ -540,6 +540,32 @@ xfs_reserve_blocks(
 	return(0);
 }
 
+void
+xfs_fs_log_dummy(xfs_mount_t *mp)
+{
+	xfs_trans_t *tp;
+	xfs_inode_t *ip;
+
+
+	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
+	atomic_inc(&mp->m_active_trans);
+	if (xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0)) {
+		xfs_trans_cancel(tp, 0);
+		return;
+	}
+
+	ip = mp->m_rootip;
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_ihold(tp, ip);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	xfs_trans_set_sync(tp);
+	xfs_trans_commit(tp, 0, NULL);
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+}
+
 int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,

+ 1 - 0
fs/xfs/xfs_fsops.h

@@ -25,5 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
 				xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern void xfs_fs_log_dummy(xfs_mount_t *mp);
 
 #endif	/* __XFS_FSOPS_H__ */

+ 1 - 4
fs/xfs/xfs_iget.c

@@ -493,7 +493,6 @@ xfs_iget(
 
 retry:
 	if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
-		bhv_desc_t	*bdp;
 		xfs_inode_t	*ip;
 
 		vp = LINVFS_GET_VP(inode);
@@ -517,14 +516,12 @@ retry:
 			 * to wait for the inode to go away.
 			 */
 			if (is_bad_inode(inode) ||
-			    ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
-						  &xfs_vnodeops)) == NULL)) {
+			    ((ip = xfs_vtoi(vp)) == NULL)) {
 				iput(inode);
 				delay(1);
 				goto retry;
 			}
 
-			ip = XFS_BHVTOI(bdp);
 			if (lock_flags != 0)
 				xfs_ilock(ip, lock_flags);
 			XFS_STATS_INC(xs_ig_found);

+ 38 - 23
fs/xfs/xfs_inode.c

@@ -404,9 +404,8 @@ xfs_iformat(
 	    INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) +
 		INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) >
 	    INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT))) {
-		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu."
-			"  Unmount and run xfs_repair.",
+		xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
 			(unsigned long long)ip->i_ino,
 			(int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)
 			    + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)),
@@ -418,9 +417,8 @@ xfs_iformat(
 	}
 
 	if (unlikely(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize)) {
-		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-			"corrupt dinode %Lu, forkoff = 0x%x."
-			"  Unmount and run xfs_repair.",
+		xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt dinode %Lu, forkoff = 0x%x.",
 			(unsigned long long)ip->i_ino,
 			(int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT)));
 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -451,8 +449,9 @@ xfs_iformat(
 			 * no local regular files yet
 			 */
 			if (unlikely((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & S_IFMT) == S_IFREG)) {
-				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-					"corrupt inode (local format for regular file) %Lu.  Unmount and run xfs_repair.",
+				xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode %Lu "
+					"(local format for regular file).",
 					(unsigned long long) ip->i_ino);
 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
 						     XFS_ERRLEVEL_LOW,
@@ -462,8 +461,9 @@ xfs_iformat(
 
 			di_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT);
 			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-				xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-					"corrupt inode %Lu (bad size %Ld for local inode).  Unmount and run xfs_repair.",
+				xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+					"corrupt inode %Lu "
+					"(bad size %Ld for local inode).",
 					(unsigned long long) ip->i_ino,
 					(long long) di_size);
 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -551,8 +551,9 @@ xfs_iformat_local(
 	 * kmem_alloc() or memcpy() below.
 	 */
 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-			"corrupt inode %Lu (bad size %d for local fork, size = %d).  Unmount and run xfs_repair.",
+		xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu "
+			"(bad size %d for local fork, size = %d).",
 			(unsigned long long) ip->i_ino, size,
 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -610,8 +611,8 @@ xfs_iformat_extents(
 	 * kmem_alloc() or memcpy() below.
 	 */
 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-			"corrupt inode %Lu ((a)extents = %d).  Unmount and run xfs_repair.",
+		xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu ((a)extents = %d).",
 			(unsigned long long) ip->i_ino, nex);
 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
 				     ip->i_mount, dip);
@@ -692,8 +693,8 @@ xfs_iformat_btree(
 	    || XFS_BMDR_SPACE_CALC(nrecs) >
 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
 	    || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
-			"corrupt inode %Lu (btree).  Unmount and run xfs_repair.",
+		xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
+			"corrupt inode %Lu (btree).",
 			(unsigned long long) ip->i_ino);
 		XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
 				 ip->i_mount);
@@ -809,6 +810,10 @@ _xfs_dic2xflags(
 			flags |= XFS_XFLAG_PROJINHERIT;
 		if (di_flags & XFS_DIFLAG_NOSYMLINKS)
 			flags |= XFS_XFLAG_NOSYMLINKS;
+		if (di_flags & XFS_DIFLAG_EXTSIZE)
+			flags |= XFS_XFLAG_EXTSIZE;
+		if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
+			flags |= XFS_XFLAG_EXTSZINHERIT;
 	}
 
 	return flags;
@@ -1192,11 +1197,19 @@ xfs_ialloc(
 			if ((mode & S_IFMT) == S_IFDIR) {
 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
 					di_flags |= XFS_DIFLAG_RTINHERIT;
-			} else {
+				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+					ip->i_d.di_extsize = pip->i_d.di_extsize;
+				}
+			} else if ((mode & S_IFMT) == S_IFREG) {
 				if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) {
 					di_flags |= XFS_DIFLAG_REALTIME;
 					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
 				}
+				if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
+					di_flags |= XFS_DIFLAG_EXTSIZE;
+					ip->i_d.di_extsize = pip->i_d.di_extsize;
+				}
 			}
 			if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) &&
 			    xfs_inherit_noatime)
@@ -1262,7 +1275,7 @@ xfs_isize_check(
 	if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
 		return;
 
-	if ( ip->i_d.di_flags & XFS_DIFLAG_REALTIME )
+	if (ip->i_d.di_flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_EXTSIZE))
 		return;
 
 	nimaps = 2;
@@ -1765,22 +1778,19 @@ xfs_igrow_start(
 	xfs_fsize_t	new_size,
 	cred_t		*credp)
 {
-	xfs_fsize_t	isize;
 	int		error;
 
 	ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
 	ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
 	ASSERT(new_size > ip->i_d.di_size);
 
-	error = 0;
-	isize = ip->i_d.di_size;
 	/*
 	 * Zero any pages that may have been created by
 	 * xfs_write_file() beyond the end of the file
 	 * and any blocks between the old and new file sizes.
 	 */
-	error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, isize,
-				new_size);
+	error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
+			     ip->i_d.di_size, new_size);
 	return error;
 }
 
@@ -3355,6 +3365,11 @@ xfs_iflush_int(
 	ip->i_update_core = 0;
 	SYNCHRONIZE();
 
+	/*
+	 * Make sure to get the latest atime from the Linux inode.
+	 */
+	xfs_synchronize_atime(ip);
+
 	if (XFS_TEST_ERROR(INT_GET(dip->di_core.di_magic,ARCH_CONVERT) != XFS_DINODE_MAGIC,
 			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
 		xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,

+ 4 - 0
fs/xfs/xfs_inode.h

@@ -436,6 +436,10 @@ void		xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t	xfs_file_last_byte(xfs_inode_t *);
 void		xfs_lock_inodes(xfs_inode_t **, int, int, uint);
 
+xfs_inode_t	*xfs_vtoi(struct vnode *vp);
+
+void		xfs_synchronize_atime(xfs_inode_t *);
+
 #define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
 
 #ifdef DEBUG

+ 7 - 2
fs/xfs/xfs_inode_item.c

@@ -271,6 +271,11 @@ xfs_inode_item_format(
 	if (ip->i_update_size)
 		ip->i_update_size = 0;
 
+	/*
+	 * Make sure to get the latest atime from the Linux inode.
+	 */
+	xfs_synchronize_atime(ip);
+
 	vecp->i_addr = (xfs_caddr_t)&ip->i_d;
 	vecp->i_len  = sizeof(xfs_dinode_core_t);
 	XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
@@ -603,7 +608,7 @@ xfs_inode_item_trylock(
 		if (iip->ili_pushbuf_flag == 0) {
 			iip->ili_pushbuf_flag = 1;
 #ifdef DEBUG
-			iip->ili_push_owner = get_thread_id();
+			iip->ili_push_owner = current_pid();
 #endif
 			/*
 			 * Inode is left locked in shared mode.
@@ -782,7 +787,7 @@ xfs_inode_item_pushbuf(
 	 * trying to duplicate our effort.
 	 */
 	ASSERT(iip->ili_pushbuf_flag != 0);
-	ASSERT(iip->ili_push_owner == get_thread_id());
+	ASSERT(iip->ili_push_owner == current_pid());
 
 	/*
 	 * If flushlock isn't locked anymore, chances are that the

+ 234 - 191
fs/xfs/xfs_iomap.c

@@ -262,7 +262,7 @@ phase2:
 	case BMAPI_WRITE:
 		/* If we found an extent, return it */
 		if (nimaps &&
-		    (imap.br_startblock != HOLESTARTBLOCK) && 
+		    (imap.br_startblock != HOLESTARTBLOCK) &&
 		    (imap.br_startblock != DELAYSTARTBLOCK)) {
 			xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, io,
 					offset, count, iomapp, &imap, flags);
@@ -316,6 +316,58 @@ out:
 	return XFS_ERROR(error);
 }
 
+STATIC int
+xfs_iomap_eof_align_last_fsb(
+	xfs_mount_t	*mp,
+	xfs_iocore_t	*io,
+	xfs_fsize_t	isize,
+	xfs_extlen_t	extsize,
+	xfs_fileoff_t	*last_fsb)
+{
+	xfs_fileoff_t	new_last_fsb = 0;
+	xfs_extlen_t	align;
+	int		eof, error;
+
+	if (io->io_flags & XFS_IOCORE_RT)
+		;
+	/*
+	 * If mounted with the "-o swalloc" option, roundup the allocation
+	 * request to a stripe width boundary if the file size is >=
+	 * stripe width and we are allocating past the allocation eof.
+	 */
+	else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
+	        (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)))
+		new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
+	/*
+	 * Roundup the allocation request to a stripe unit (m_dalign) boundary
+	 * if the file size is >= stripe unit size, and we are allocating past
+	 * the allocation eof.
+	 */
+	else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)))
+		new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
+
+	/*
+	 * Always round up the allocation request to an extent boundary
+	 * (when file on a real-time subvolume or has di_extsize hint).
+	 */
+	if (extsize) {
+		if (new_last_fsb)
+			align = roundup_64(new_last_fsb, extsize);
+		else
+			align = extsize;
+		new_last_fsb = roundup_64(*last_fsb, align);
+	}
+
+	if (new_last_fsb) {
+		error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
+		if (error)
+			return error;
+		if (eof)
+			*last_fsb = new_last_fsb;
+	}
+	return 0;
+}
+
 STATIC int
 xfs_flush_space(
 	xfs_inode_t	*ip,
@@ -362,19 +414,20 @@ xfs_iomap_write_direct(
 	xfs_iocore_t	*io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
-	xfs_filblks_t	count_fsb;
+	xfs_filblks_t	count_fsb, resaligned;
 	xfs_fsblock_t	firstfsb;
+	xfs_extlen_t	extsz, temp;
+	xfs_fsize_t	isize;
 	int		nimaps;
-	int		error;
 	int		bmapi_flag;
 	int		quota_flag;
 	int		rt;
 	xfs_trans_t	*tp;
 	xfs_bmbt_irec_t imap;
 	xfs_bmap_free_t free_list;
-	xfs_filblks_t	qblocks, resblks;
+	uint		qblocks, resblks, resrtextents;
 	int		committed;
-	int		resrtextents;
+	int		error;
 
 	/*
 	 * Make sure that the dquots are there. This doesn't hold
@@ -384,37 +437,52 @@ xfs_iomap_write_direct(
 	if (error)
 		return XFS_ERROR(error);
 
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-	count_fsb = last_fsb - offset_fsb;
-	if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
-		xfs_fileoff_t	map_last_fsb;
-
-		map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-		if (map_last_fsb < last_fsb) {
-			last_fsb = map_last_fsb;
-			count_fsb = last_fsb - offset_fsb;
-		}
-		ASSERT(count_fsb > 0);
+	rt = XFS_IS_REALTIME_INODE(ip);
+	if (unlikely(rt)) {
+		if (!(extsz = ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+	} else {
+		extsz = ip->i_d.di_extsize;
 	}
 
-	/*
-	 * Determine if reserving space on the data or realtime partition.
-	 */
-	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-		xfs_extlen_t	extsz;
+	isize = ip->i_d.di_size;
+	if (io->io_new_size > isize)
+		isize = io->io_new_size;
 
-		if (!(extsz = ip->i_d.di_extsize))
-			extsz = mp->m_sb.sb_rextsize;
-		resrtextents = qblocks = (count_fsb + extsz - 1);
-		do_div(resrtextents, mp->m_sb.sb_rextsize);
-		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-		quota_flag = XFS_QMOPT_RES_RTBLKS;
+  	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+  	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+	if ((offset + count) > isize) {
+		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+							&last_fsb);
+		if (error)
+			goto error_out;
 	} else {
-		resrtextents = 0;
-		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
-		quota_flag = XFS_QMOPT_RES_REGBLKS;
+		if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+			last_fsb = MIN(last_fsb, (xfs_fileoff_t)
+					ret_imap->br_blockcount +
+					ret_imap->br_startoff);
 	}
+	count_fsb = last_fsb - offset_fsb;
+	ASSERT(count_fsb > 0);
+
+	resaligned = count_fsb;
+	if (unlikely(extsz)) {
+		if ((temp = do_mod(offset_fsb, extsz)))
+			resaligned += temp;
+		if ((temp = do_mod(resaligned, extsz)))
+			resaligned += extsz - temp;
+	}
+
+	if (unlikely(rt)) {
+		resrtextents = qblocks = resaligned;
+		resrtextents /= mp->m_sb.sb_rextsize;
+  		resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+  		quota_flag = XFS_QMOPT_RES_RTBLKS;
+  	} else {
+  		resrtextents = 0;
+		resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+  		quota_flag = XFS_QMOPT_RES_REGBLKS;
+  	}
 
 	/*
 	 * Allocate and setup the transaction
@@ -425,7 +493,6 @@ xfs_iomap_write_direct(
 			XFS_WRITE_LOG_RES(mp), resrtextents,
 			XFS_TRANS_PERM_LOG_RES,
 			XFS_WRITE_LOG_COUNT);
-
 	/*
 	 * Check for running out of space, note: need lock to return
 	 */
@@ -435,20 +502,20 @@ xfs_iomap_write_direct(
 	if (error)
 		goto error_out;
 
-	if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) {
-		error = (EDQUOT);
+	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+					      qblocks, 0, quota_flag);
+	if (error)
 		goto error1;
-	}
 
-	bmapi_flag = XFS_BMAPI_WRITE;
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 	xfs_trans_ihold(tp, ip);
 
-	if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+	bmapi_flag = XFS_BMAPI_WRITE;
+	if ((flags & BMAPI_DIRECT) && (offset < ip->i_d.di_size || extsz))
 		bmapi_flag |= XFS_BMAPI_PREALLOC;
 
 	/*
-	 * Issue the bmapi() call to allocate the blocks
+	 * Issue the xfs_bmapi() call to allocate the blocks
 	 */
 	XFS_BMAP_INIT(&free_list, &firstfsb);
 	nimaps = 1;
@@ -483,8 +550,10 @@ xfs_iomap_write_direct(
                         "extent-state : %x \n",
                         (ip->i_mount)->m_fsname,
                         (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startblock,
+			(unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+			ret_imap->br_state);
         }
 	return 0;
 
@@ -500,6 +569,63 @@ error_out:
 	return XFS_ERROR(error);
 }
 
+/*
+ * If the caller is doing a write at the end of the file,
+ * then extend the allocation out to the file system's write
+ * iosize.  We clean up any extra space left over when the
+ * file is closed in xfs_inactive().
+ *
+ * For sync writes, we are flushing delayed allocate space to
+ * try to make additional space available for allocation near
+ * the filesystem full boundary - preallocation hurts in that
+ * situation, of course.
+ */
+STATIC int
+xfs_iomap_eof_want_preallocate(
+	xfs_mount_t	*mp,
+	xfs_iocore_t	*io,
+	xfs_fsize_t	isize,
+	xfs_off_t	offset,
+	size_t		count,
+	int		ioflag,
+	xfs_bmbt_irec_t *imap,
+	int		nimaps,
+	int		*prealloc)
+{
+	xfs_fileoff_t   start_fsb;
+	xfs_filblks_t   count_fsb;
+	xfs_fsblock_t	firstblock;
+	int		n, error, imaps;
+
+	*prealloc = 0;
+	if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize)
+		return 0;
+
+	/*
+	 * If there are any real blocks past eof, then don't
+	 * do any speculative allocation.
+	 */
+	start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
+	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	while (count_fsb > 0) {
+		imaps = nimaps;
+		firstblock = NULLFSBLOCK;
+		error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
+				  0, &firstblock, 0, imap, &imaps, NULL);
+		if (error)
+			return error;
+		for (n = 0; n < imaps; n++) {
+			if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
+			    (imap[n].br_startblock != DELAYSTARTBLOCK))
+				return 0;
+			start_fsb += imap[n].br_blockcount;
+			count_fsb -= imap[n].br_blockcount;
+		}
+	}
+	*prealloc = 1;
+	return 0;
+}
+
 int
 xfs_iomap_write_delay(
 	xfs_inode_t	*ip,
@@ -513,13 +639,15 @@ xfs_iomap_write_delay(
 	xfs_iocore_t	*io = &ip->i_iocore;
 	xfs_fileoff_t	offset_fsb;
 	xfs_fileoff_t	last_fsb;
-	xfs_fsize_t	isize;
+	xfs_off_t	aligned_offset;
+	xfs_fileoff_t	ioalign;
 	xfs_fsblock_t	firstblock;
+	xfs_extlen_t	extsz;
+	xfs_fsize_t	isize;
 	int		nimaps;
-	int		error;
 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-	int		aeof;
-	int		fsynced = 0;
+	int		prealloc, fsynced = 0;
+	int		error;
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
 
@@ -527,152 +655,57 @@ xfs_iomap_write_delay(
 	 * Make sure that the dquots are there. This doesn't hold
 	 * the ilock across a disk read.
 	 */
-
 	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
 	if (error)
 		return XFS_ERROR(error);
 
+	if (XFS_IS_REALTIME_INODE(ip)) {
+		if (!(extsz = ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+	} else {
+		extsz = ip->i_d.di_extsize;
+	}
+
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
 retry:
 	isize = ip->i_d.di_size;
-	if (io->io_new_size > isize) {
+	if (io->io_new_size > isize)
 		isize = io->io_new_size;
-	}
 
-	aeof = 0;
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-	/*
-	 * If the caller is doing a write at the end of the file,
-	 * then extend the allocation (and the buffer used for the write)
-	 * out to the file system's write iosize.  We clean up any extra
-	 * space left over when the file is closed in xfs_inactive().
-	 *
-	 * For sync writes, we are flushing delayed allocate space to
-	 * try to make additional space available for allocation near
-	 * the filesystem full boundary - preallocation hurts in that
-	 * situation, of course.
-	 */
-	if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
-		xfs_off_t	aligned_offset;
-		xfs_filblks_t   count_fsb;
-		unsigned int	iosize;
-		xfs_fileoff_t	ioalign;
-		int		n;
-		xfs_fileoff_t   start_fsb;
+	error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count,
+				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
+	if (error)
+		return error;
 
-		/*
-		 * If there are any real blocks past eof, then don't
-		 * do any speculative allocation.
-		 */
-		start_fsb = XFS_B_TO_FSBT(mp,
-					((xfs_ufsize_t)(offset + count - 1)));
-		count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-		while (count_fsb > 0) {
-			nimaps = XFS_WRITE_IMAPS;
-			error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb,
-					0, &firstblock, 0, imap, &nimaps, NULL);
-			if (error) {
-				return error;
-			}
-			for (n = 0; n < nimaps; n++) {
-				if ( !(io->io_flags & XFS_IOCORE_RT)  && 
-					!imap[n].br_startblock) {
-					cmn_err(CE_PANIC,"Access to block "
-						"zero:  fs <%s> inode: %lld "
-						"start_block : %llx start_off "
-						": %llx blkcnt : %llx "
-						"extent-state : %x \n",
-						(ip->i_mount)->m_fsname,
-						(long long)ip->i_ino,
-						imap[n].br_startblock,
-						imap[n].br_startoff,
-						imap[n].br_blockcount,
-						imap[n].br_state);
-        			}
-				if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
-				    (imap[n].br_startblock != DELAYSTARTBLOCK)) {
-					goto write_map;
-				}
-				start_fsb += imap[n].br_blockcount;
-				count_fsb -= imap[n].br_blockcount;
-			}
-		}
-		iosize = mp->m_writeio_blocks;
+	if (prealloc) {
 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-		last_fsb = ioalign + iosize;
-		aeof = 1;
+		last_fsb = ioalign + mp->m_writeio_blocks;
+	} else {
+		last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
 	}
-write_map:
-	nimaps = XFS_WRITE_IMAPS;
-	firstblock = NULLFSBLOCK;
 
-	/*
-	 * If mounted with the "-o swalloc" option, roundup the allocation
-	 * request to a stripe width boundary if the file size is >=
-	 * stripe width and we are allocating past the allocation eof.
-	 */
-	if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth 
-	    && (mp->m_flags & XFS_MOUNT_SWALLOC)
-	    && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) {
-		int eof;
-		xfs_fileoff_t new_last_fsb;
-
-		new_last_fsb = roundup_64(last_fsb, mp->m_swidth);
-		error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-		if (error) {
-			return error;
-		}
-		if (eof) {
-			last_fsb = new_last_fsb;
-		}
-	/*
-	 * Roundup the allocation request to a stripe unit (m_dalign) boundary
-	 * if the file size is >= stripe unit size, and we are allocating past
-	 * the allocation eof.
-	 */
-	} else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign &&
-		   (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
-		int eof;
-		xfs_fileoff_t new_last_fsb;
-		new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
-		error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-		if (error) {
-			return error;
-		}
-		if (eof) {
-			last_fsb = new_last_fsb;
-		}
-	/*
-	 * Round up the allocation request to a real-time extent boundary
-	 * if the file is on the real-time subvolume.
-	 */
-	} else if (io->io_flags & XFS_IOCORE_RT && aeof) {
-		int eof;
-		xfs_fileoff_t new_last_fsb;
-
-		new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize);
-		error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof);
-		if (error) {
+	if (prealloc || extsz) {
+		error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz,
+							&last_fsb);
+		if (error)
 			return error;
-		}
-		if (eof)
-			last_fsb = new_last_fsb;
 	}
+
+	nimaps = XFS_WRITE_IMAPS;
+	firstblock = NULLFSBLOCK;
 	error = xfs_bmapi(NULL, ip, offset_fsb,
 			  (xfs_filblks_t)(last_fsb - offset_fsb),
 			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
 			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
 			  &nimaps, NULL);
-	/*
-	 * This can be EDQUOT, if nimaps == 0
-	 */
-	if (error && (error != ENOSPC)) {
+	if (error && (error != ENOSPC))
 		return XFS_ERROR(error);
-	}
+
 	/*
 	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-	 * then we must have run out of space.
+	 * then we must have run out of space - flush delalloc, and retry..
 	 */
 	if (nimaps == 0) {
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
@@ -684,17 +717,21 @@ write_map:
 		goto retry;
 	}
 
-	*ret_imap = imap[0];
-	*nmaps = 1;
-	if ( !(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
+	if (!(io->io_flags & XFS_IOCORE_RT)  && !ret_imap->br_startblock) {
 		cmn_err(CE_PANIC,"Access to block zero:  fs <%s> inode: %lld "
                         "start_block : %llx start_off : %llx blkcnt : %llx "
                         "extent-state : %x \n",
                         (ip->i_mount)->m_fsname,
                         (long long)ip->i_ino,
-                        ret_imap->br_startblock, ret_imap->br_startoff,
-                        ret_imap->br_blockcount,ret_imap->br_state);
+                        (unsigned long long)ret_imap->br_startblock,
+			(unsigned long long)ret_imap->br_startoff,
+                        (unsigned long long)ret_imap->br_blockcount,
+			ret_imap->br_state);
 	}
+
+	*ret_imap = imap[0];
+	*nmaps = 1;
+
 	return 0;
 }
 
@@ -820,17 +857,21 @@ xfs_iomap_write_allocate(
 		 */
 
 		for (i = 0; i < nimaps; i++) {
-			if ( !(io->io_flags & XFS_IOCORE_RT)  && 
-				!imap[i].br_startblock) {
+			if (!(io->io_flags & XFS_IOCORE_RT)  &&
+			    !imap[i].br_startblock) {
 				cmn_err(CE_PANIC,"Access to block zero:  "
 					"fs <%s> inode: %lld "
-					"start_block : %llx start_off : %llx " 
+					"start_block : %llx start_off : %llx "
 					"blkcnt : %llx extent-state : %x \n",
 					(ip->i_mount)->m_fsname,
 					(long long)ip->i_ino,
-					imap[i].br_startblock,
-					imap[i].br_startoff,
-				        imap[i].br_blockcount,imap[i].br_state);
+					(unsigned long long)
+						imap[i].br_startblock,
+					(unsigned long long)
+						imap[i].br_startoff,
+					(unsigned long long)
+				        	imap[i].br_blockcount,
+					imap[i].br_state);
                         }
 			if ((offset_fsb >= imap[i].br_startoff) &&
 			    (offset_fsb < (imap[i].br_startoff +
@@ -867,17 +908,17 @@ xfs_iomap_write_unwritten(
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	xfs_iocore_t    *io = &ip->i_iocore;
-	xfs_trans_t	*tp;
 	xfs_fileoff_t	offset_fsb;
 	xfs_filblks_t	count_fsb;
 	xfs_filblks_t	numblks_fsb;
-	xfs_bmbt_irec_t	imap;
+	xfs_fsblock_t	firstfsb;
+	int		nimaps;
+	xfs_trans_t	*tp;
+	xfs_bmbt_irec_t imap;
+	xfs_bmap_free_t free_list;
+	uint		resblks;
 	int		committed;
 	int		error;
-	int		nres;
-	int		nimaps;
-	xfs_fsblock_t	firstfsb;
-	xfs_bmap_free_t	free_list;
 
 	xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN,
 				&ip->i_iocore, offset, count);
@@ -886,9 +927,9 @@ xfs_iomap_write_unwritten(
 	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
 	count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
 
-	do {
-		nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
 
+	do {
 		/*
 		 * set up a transaction to convert the range of extents
 		 * from unwritten to real. Do allocations in a loop until
@@ -896,7 +937,7 @@ xfs_iomap_write_unwritten(
 		 */
 
 		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-		error = xfs_trans_reserve(tp, nres,
+		error = xfs_trans_reserve(tp, resblks,
 				XFS_WRITE_LOG_RES(mp), 0,
 				XFS_TRANS_PERM_LOG_RES,
 				XFS_WRITE_LOG_COUNT);
@@ -915,7 +956,7 @@ xfs_iomap_write_unwritten(
 		XFS_BMAP_INIT(&free_list, &firstfsb);
 		nimaps = 1;
 		error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-				  XFS_BMAPI_WRITE, &firstfsb,
+				  XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
 				  1, &imap, &nimaps, &free_list);
 		if (error)
 			goto error_on_bmapi_transaction;
@@ -929,15 +970,17 @@ xfs_iomap_write_unwritten(
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
 			goto error0;
-		
+
 		if ( !(io->io_flags & XFS_IOCORE_RT)  && !imap.br_startblock) {
 			cmn_err(CE_PANIC,"Access to block zero:  fs <%s> "
 				"inode: %lld start_block : %llx start_off : "
 				"%llx blkcnt : %llx extent-state : %x \n",
 				(ip->i_mount)->m_fsname,
 				(long long)ip->i_ino,
-				imap.br_startblock,imap.br_startoff,
-				imap.br_blockcount,imap.br_state);
+				(unsigned long long)imap.br_startblock,
+				(unsigned long long)imap.br_startoff,
+				(unsigned long long)imap.br_blockcount,
+				imap.br_state);
         	}
 
 		if ((numblks_fsb = imap.br_blockcount) == 0) {

+ 3 - 2
fs/xfs/xfs_itable.c

@@ -56,6 +56,7 @@ xfs_bulkstat_one_iget(
 {
 	xfs_dinode_core_t *dic;		/* dinode core info pointer */
 	xfs_inode_t	*ip;		/* incore inode pointer */
+	vnode_t		*vp;
 	int		error;
 
 	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
@@ -72,6 +73,7 @@ xfs_bulkstat_one_iget(
 		goto out_iput;
 	}
 
+	vp = XFS_ITOV(ip);
 	dic = &ip->i_d;
 
 	/* xfs_iget returns the following without needing
@@ -84,8 +86,7 @@ xfs_bulkstat_one_iget(
 	buf->bs_uid = dic->di_uid;
 	buf->bs_gid = dic->di_gid;
 	buf->bs_size = dic->di_size;
-	buf->bs_atime.tv_sec = dic->di_atime.t_sec;
-	buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
+	vn_atime_to_bstime(vp, &buf->bs_atime);
 	buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
 	buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
 	buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;

+ 96 - 27
fs/xfs/xfs_log.c

@@ -178,6 +178,83 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
 #define	xlog_trace_iclog(iclog,state)
 #endif /* XFS_LOG_TRACE */
 
+
+static void
+xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+	if (*qp) {
+		tic->t_next	    = (*qp);
+		tic->t_prev	    = (*qp)->t_prev;
+		(*qp)->t_prev->t_next = tic;
+		(*qp)->t_prev	    = tic;
+	} else {
+		tic->t_prev = tic->t_next = tic;
+		*qp = tic;
+	}
+
+	tic->t_flags |= XLOG_TIC_IN_Q;
+}
+
+static void
+xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
+{
+	if (tic == tic->t_next) {
+		*qp = NULL;
+	} else {
+		*qp = tic->t_next;
+		tic->t_next->t_prev = tic->t_prev;
+		tic->t_prev->t_next = tic->t_next;
+	}
+
+	tic->t_next = tic->t_prev = NULL;
+	tic->t_flags &= ~XLOG_TIC_IN_Q;
+}
+
+static void
+xlog_grant_sub_space(struct log *log, int bytes)
+{
+	log->l_grant_write_bytes -= bytes;
+	if (log->l_grant_write_bytes < 0) {
+		log->l_grant_write_bytes += log->l_logsize;
+		log->l_grant_write_cycle--;
+	}
+
+	log->l_grant_reserve_bytes -= bytes;
+	if ((log)->l_grant_reserve_bytes < 0) {
+		log->l_grant_reserve_bytes += log->l_logsize;
+		log->l_grant_reserve_cycle--;
+	}
+
+}
+
+static void
+xlog_grant_add_space_write(struct log *log, int bytes)
+{
+	log->l_grant_write_bytes += bytes;
+	if (log->l_grant_write_bytes > log->l_logsize) {
+		log->l_grant_write_bytes -= log->l_logsize;
+		log->l_grant_write_cycle++;
+	}
+}
+
+static void
+xlog_grant_add_space_reserve(struct log *log, int bytes)
+{
+	log->l_grant_reserve_bytes += bytes;
+	if (log->l_grant_reserve_bytes > log->l_logsize) {
+		log->l_grant_reserve_bytes -= log->l_logsize;
+		log->l_grant_reserve_cycle++;
+	}
+}
+
+static inline void
+xlog_grant_add_space(struct log *log, int bytes)
+{
+	xlog_grant_add_space_write(log, bytes);
+	xlog_grant_add_space_reserve(log, bytes);
+}
+
+
 /*
  * NOTES:
  *
@@ -428,7 +505,7 @@ xfs_log_mount(xfs_mount_t	*mp,
 		if (readonly)
 			vfsp->vfs_flag &= ~VFS_RDONLY;
 
-		error = xlog_recover(mp->m_log, readonly);
+		error = xlog_recover(mp->m_log);
 
 		if (readonly)
 			vfsp->vfs_flag |= VFS_RDONLY;
@@ -1320,8 +1397,7 @@ xlog_sync(xlog_t		*log,
 
 	/* move grant heads by roundoff in sync */
 	s = GRANT_LOCK(log);
-	XLOG_GRANT_ADD_SPACE(log, roundoff, 'w');
-	XLOG_GRANT_ADD_SPACE(log, roundoff, 'r');
+	xlog_grant_add_space(log, roundoff);
 	GRANT_UNLOCK(log, s);
 
 	/* put cycle number in every block */
@@ -1515,7 +1591,6 @@ xlog_state_finish_copy(xlog_t		*log,
  * print out info relating to regions written which consume
  * the reservation
  */
-#if defined(XFS_LOG_RES_DEBUG)
 STATIC void
 xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 {
@@ -1605,11 +1680,11 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 			ticket->t_res_arr_sum, ticket->t_res_o_flow,
 			ticket->t_res_num_ophdrs, ophdr_spc,
 			ticket->t_res_arr_sum + 
-			  ticket->t_res_o_flow + ophdr_spc,
+			ticket->t_res_o_flow + ophdr_spc,
 			ticket->t_res_num);
 
 	for (i = 0; i < ticket->t_res_num; i++) {
-	   	uint r_type = ticket->t_res_arr[i].r_type; 
+		uint r_type = ticket->t_res_arr[i].r_type; 
 		cmn_err(CE_WARN,
 			    "region[%u]: %s - %u bytes\n",
 			    i, 
@@ -1618,9 +1693,6 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
 			    ticket->t_res_arr[i].r_len);
 	}
 }
-#else
-#define xlog_print_tic_res(mp, ticket)
-#endif
 
 /*
  * Write some region out to in-core log
@@ -2389,7 +2461,7 @@ xlog_grant_log_space(xlog_t	   *log,
 
 	/* something is already sleeping; insert new transaction at end */
 	if (log->l_reserve_headq) {
-		XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+		xlog_ins_ticketq(&log->l_reserve_headq, tic);
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: sleep 1");
 		/*
@@ -2422,7 +2494,7 @@ redo:
 				     log->l_grant_reserve_bytes);
 	if (free_bytes < need_bytes) {
 		if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-			XLOG_INS_TICKETQ(log->l_reserve_headq, tic);
+			xlog_ins_ticketq(&log->l_reserve_headq, tic);
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: sleep 2");
 		XFS_STATS_INC(xs_sleep_logspace);
@@ -2439,11 +2511,10 @@ redo:
 		s = GRANT_LOCK(log);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
-		XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+		xlog_del_ticketq(&log->l_reserve_headq, tic);
 
 	/* we've got enough space */
-	XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w');
-	XLOG_GRANT_ADD_SPACE(log, need_bytes, 'r');
+	xlog_grant_add_space(log, need_bytes);
 #ifdef DEBUG
 	tail_lsn = log->l_tail_lsn;
 	/*
@@ -2464,7 +2535,7 @@ redo:
 
  error_return:
 	if (tic->t_flags & XLOG_TIC_IN_Q)
-		XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+		xlog_del_ticketq(&log->l_reserve_headq, tic);
 	xlog_trace_loggrant(log, tic, "xlog_grant_log_space: err_ret");
 	/*
 	 * If we are failing, make sure the ticket doesn't have any
@@ -2533,7 +2604,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 
 		if (ntic != log->l_write_headq) {
 			if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-				XLOG_INS_TICKETQ(log->l_write_headq, tic);
+				xlog_ins_ticketq(&log->l_write_headq, tic);
 
 			xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: sleep 1");
@@ -2565,7 +2636,7 @@ redo:
 				     log->l_grant_write_bytes);
 	if (free_bytes < need_bytes) {
 		if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
-			XLOG_INS_TICKETQ(log->l_write_headq, tic);
+			xlog_ins_ticketq(&log->l_write_headq, tic);
 		XFS_STATS_INC(xs_sleep_logspace);
 		sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
@@ -2581,9 +2652,10 @@ redo:
 		s = GRANT_LOCK(log);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
-		XLOG_DEL_TICKETQ(log->l_write_headq, tic);
+		xlog_del_ticketq(&log->l_write_headq, tic);
 
-	XLOG_GRANT_ADD_SPACE(log, need_bytes, 'w'); /* we've got enough space */
+	/* we've got enough space */
+	xlog_grant_add_space_write(log, need_bytes);
 #ifdef DEBUG
 	tail_lsn = log->l_tail_lsn;
 	if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
@@ -2600,7 +2672,7 @@ redo:
 
  error_return:
 	if (tic->t_flags & XLOG_TIC_IN_Q)
-		XLOG_DEL_TICKETQ(log->l_reserve_headq, tic);
+		xlog_del_ticketq(&log->l_reserve_headq, tic);
 	xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: err_ret");
 	/*
 	 * If we are failing, make sure the ticket doesn't have any
@@ -2633,8 +2705,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
 		ticket->t_cnt--;
 
 	s = GRANT_LOCK(log);
-	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
-	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
+	xlog_grant_sub_space(log, ticket->t_curr_res);
 	ticket->t_curr_res = ticket->t_unit_res;
 	XLOG_TIC_RESET_RES(ticket);
 	xlog_trace_loggrant(log, ticket,
@@ -2647,7 +2718,7 @@ xlog_regrant_reserve_log_space(xlog_t	     *log,
 		return;
 	}
 
-	XLOG_GRANT_ADD_SPACE(log, ticket->t_unit_res, 'r');
+	xlog_grant_add_space_reserve(log, ticket->t_unit_res);
 	xlog_trace_loggrant(log, ticket,
 			    "xlog_regrant_reserve_log_space: exit");
 	xlog_verify_grant_head(log, 0);
@@ -2683,8 +2754,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
 	s = GRANT_LOCK(log);
 	xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: enter");
 
-	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
-	XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
+	xlog_grant_sub_space(log, ticket->t_curr_res);
 
 	xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: sub current");
 
@@ -2693,8 +2763,7 @@ xlog_ungrant_log_space(xlog_t	     *log,
 	 */
 	if (ticket->t_cnt > 0) {
 		ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
-		XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'w');
-		XLOG_GRANT_SUB_SPACE(log, ticket->t_unit_res*ticket->t_cnt,'r');
+		xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
 	}
 
 	xlog_trace_loggrant(log, ticket, "xlog_ungrant_log_space: exit");

+ 1 - 10
fs/xfs/xfs_log.h

@@ -96,7 +96,6 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 
 
 /* Region types for iovec's i_type */
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_REG_TYPE_BFORMAT		1
 #define XLOG_REG_TYPE_BCHUNK		2
 #define XLOG_REG_TYPE_EFI_FORMAT	3
@@ -117,21 +116,13 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XLOG_REG_TYPE_COMMIT		18
 #define XLOG_REG_TYPE_TRANSHDR		19
 #define XLOG_REG_TYPE_MAX		19
-#endif
 
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
-#else
-#define XLOG_VEC_SET_TYPE(vecp, t)
-#endif
-
 
 typedef struct xfs_log_iovec {
 	xfs_caddr_t		i_addr;		/* beginning address of region */
 	int		i_len;		/* length in bytes of region */
-#if defined(XFS_LOG_RES_DEBUG)
- 	uint		i_type;		/* type of region */
-#endif
+	uint		i_type;		/* type of region */
 } xfs_log_iovec_t;
 
 typedef void* xfs_log_ticket_t;

+ 5 - 72
fs/xfs/xfs_log_priv.h

@@ -253,7 +253,6 @@ typedef __uint32_t xlog_tid_t;
 
 
 /* Ticket reservation region accounting */ 
-#if defined(XFS_LOG_RES_DEBUG)
 #define XLOG_TIC_LEN_MAX	15
 #define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
 				(t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
@@ -278,15 +277,9 @@ typedef __uint32_t xlog_tid_t;
  * we don't care about.
  */
 typedef struct xlog_res {
-	uint	r_len;
-	uint	r_type;
+	uint	r_len;	/* region length		:4 */
+	uint	r_type;	/* region's transaction type	:4 */
 } xlog_res_t;
-#else
-#define XLOG_TIC_RESET_RES(t)
-#define XLOG_TIC_ADD_OPHDR(t)
-#define XLOG_TIC_ADD_REGION(t, len, type)
-#endif
-
 
 typedef struct xlog_ticket {
 	sv_t		   t_sema;	 /* sleep on this semaphore      : 20 */
@@ -301,14 +294,12 @@ typedef struct xlog_ticket {
 	char		   t_flags;	 /* properties of reservation	 : 1  */
 	uint		   t_trans_type; /* transaction type             : 4  */
 
-#if defined (XFS_LOG_RES_DEBUG)
         /* reservation array fields */
 	uint		   t_res_num;                    /* num in array : 4 */
-	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : X */ 
 	uint		   t_res_num_ophdrs;		 /* num op hdrs  : 4 */
 	uint		   t_res_arr_sum;		 /* array sum    : 4 */
 	uint		   t_res_o_flow;		 /* sum overflow : 4 */
-#endif
+	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
 } xlog_ticket_t;
 
 #endif
@@ -494,71 +485,13 @@ typedef struct log {
 
 #define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)
 
-#define XLOG_GRANT_SUB_SPACE(log,bytes,type)				\
-    {									\
-	if (type == 'w') {						\
-		(log)->l_grant_write_bytes -= (bytes);			\
-		if ((log)->l_grant_write_bytes < 0) {			\
-			(log)->l_grant_write_bytes += (log)->l_logsize;	\
-			(log)->l_grant_write_cycle--;			\
-		}							\
-	} else {							\
-		(log)->l_grant_reserve_bytes -= (bytes);		\
-		if ((log)->l_grant_reserve_bytes < 0) {			\
-			(log)->l_grant_reserve_bytes += (log)->l_logsize;\
-			(log)->l_grant_reserve_cycle--;			\
-		}							\
-	 }								\
-    }
-#define XLOG_GRANT_ADD_SPACE(log,bytes,type)				\
-    {									\
-	if (type == 'w') {						\
-		(log)->l_grant_write_bytes += (bytes);			\
-		if ((log)->l_grant_write_bytes > (log)->l_logsize) {	\
-			(log)->l_grant_write_bytes -= (log)->l_logsize;	\
-			(log)->l_grant_write_cycle++;			\
-		}							\
-	} else {							\
-		(log)->l_grant_reserve_bytes += (bytes);		\
-		if ((log)->l_grant_reserve_bytes > (log)->l_logsize) {	\
-			(log)->l_grant_reserve_bytes -= (log)->l_logsize;\
-			(log)->l_grant_reserve_cycle++;			\
-		}							\
-	 }								\
-    }
-#define XLOG_INS_TICKETQ(q, tic)			\
-    {							\
-	if (q) {					\
-		(tic)->t_next	    = (q);		\
-		(tic)->t_prev	    = (q)->t_prev;	\
-		(q)->t_prev->t_next = (tic);		\
-		(q)->t_prev	    = (tic);		\
-	} else {					\
-		(tic)->t_prev = (tic)->t_next = (tic);	\
-		(q) = (tic);				\
-	}						\
-	(tic)->t_flags |= XLOG_TIC_IN_Q;		\
-    }
-#define XLOG_DEL_TICKETQ(q, tic)			\
-    {							\
-	if ((tic) == (tic)->t_next) {			\
-		(q) = NULL;				\
-	} else {					\
-		(q) = (tic)->t_next;			\
-		(tic)->t_next->t_prev = (tic)->t_prev;	\
-		(tic)->t_prev->t_next = (tic)->t_next;	\
-	}						\
-	(tic)->t_next = (tic)->t_prev = NULL;		\
-	(tic)->t_flags &= ~XLOG_TIC_IN_Q;		\
-    }
 
 /* common routines */
 extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 extern int	 xlog_find_tail(xlog_t	*log,
 				xfs_daddr_t *head_blk,
-				xfs_daddr_t *tail_blk,
-				int readonly);
-extern int	 xlog_recover(xlog_t *log, int readonly);
+				xfs_daddr_t *tail_blk);
+extern int	 xlog_recover(xlog_t *log);
 extern int	 xlog_recover_finish(xlog_t *log, int mfsi_flags);
 extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 extern void	 xlog_recover_process_iunlinks(xlog_t *log);

+ 6 - 6
fs/xfs/xfs_log_recover.c

@@ -783,8 +783,7 @@ int
 xlog_find_tail(
 	xlog_t			*log,
 	xfs_daddr_t		*head_blk,
-	xfs_daddr_t		*tail_blk,
-	int			readonly)
+	xfs_daddr_t		*tail_blk)
 {
 	xlog_rec_header_t	*rhead;
 	xlog_op_header_t	*op_head;
@@ -2563,10 +2562,12 @@ xlog_recover_do_quotaoff_trans(
 
 	/*
 	 * The logitem format's flag tells us if this was user quotaoff,
-	 * group quotaoff or both.
+	 * group/project quotaoff or both.
 	 */
 	if (qoff_f->qf_flags & XFS_UQUOTA_ACCT)
 		log->l_quotaoffs_flag |= XFS_DQ_USER;
+	if (qoff_f->qf_flags & XFS_PQUOTA_ACCT)
+		log->l_quotaoffs_flag |= XFS_DQ_PROJ;
 	if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
 		log->l_quotaoffs_flag |= XFS_DQ_GROUP;
 
@@ -3890,14 +3891,13 @@ xlog_do_recover(
  */
 int
 xlog_recover(
-	xlog_t		*log,
-	int		readonly)
+	xlog_t		*log)
 {
 	xfs_daddr_t	head_blk, tail_blk;
 	int		error;
 
 	/* find the tail of the log */
-	if ((error = xlog_find_tail(log, &head_blk, &tail_blk, readonly)))
+	if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
 		return error;
 
 	if (tail_blk != head_blk) {

+ 2 - 3
fs/xfs/xfs_mount.c

@@ -51,7 +51,7 @@ STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
 
-static struct {
+static const struct {
     short offset;
     short type;     /* 0 = integer
 		* 1 = binary / string (no translation)
@@ -1077,8 +1077,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
 
 	xfs_iflush_all(mp);
 
-	XFS_QM_DQPURGEALL(mp,
-		XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
+	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
 
 	/*
 	 * Flush out the log synchronously so that we know for sure

+ 1 - 2
fs/xfs/xfs_mount.h

@@ -308,7 +308,6 @@ typedef struct xfs_mount {
 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
-#define m_dev		m_ddev_targp->pbr_dev
 	__uint8_t		m_dircook_elog;	/* log d-cookie entry bits */
 	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */
 	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */
@@ -393,7 +392,7 @@ typedef struct xfs_mount {
 						   user */
 #define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment
 						   allocations */
-#define XFS_MOUNT_COMPAT_ATTR	(1ULL << 8)	/* do not use attr2 format */
+#define XFS_MOUNT_ATTR2		(1ULL << 8)	/* allow use of attr2 format */
 			     /*	(1ULL << 9)	-- currently unused */
 #define XFS_MOUNT_NORECOVERY	(1ULL << 10)	/* no recovery - dirty fs */
 #define XFS_MOUNT_SHARED	(1ULL << 11)	/* shared mount */

+ 2 - 5
fs/xfs/xfs_rename.c

@@ -243,7 +243,6 @@ xfs_rename(
 	xfs_inode_t	*inodes[4];
 	int		target_ip_dropped = 0;	/* dropped target_ip link? */
 	vnode_t		*src_dir_vp;
-	bhv_desc_t	*target_dir_bdp;
 	int		spaceres;
 	int		target_link_zero = 0;
 	int		num_inodes;
@@ -260,14 +259,12 @@ xfs_rename(
 	 * Find the XFS behavior descriptor for the target directory
 	 * vnode since it was not handed to us.
 	 */
-	target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp),
-						&xfs_vnodeops);
-	if (target_dir_bdp == NULL) {
+	target_dp = xfs_vtoi(target_dir_vp);
+	if (target_dp == NULL) {
 		return XFS_ERROR(EXDEV);
 	}
 
 	src_dp = XFS_BHVTOI(src_dir_bdp);
-	target_dp = XFS_BHVTOI(target_dir_bdp);
 	mp = src_dp->i_mount;
 
 	if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) ||

+ 4 - 5
fs/xfs/xfs_rw.c

@@ -238,6 +238,7 @@ xfs_bioerror_relse(
 	}
 	return (EIO);
 }
+
 /*
  * Prints out an ALERT message about I/O error.
  */
@@ -252,11 +253,9 @@ xfs_ioerror_alert(
  "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
  "       (\"%s\") error %d buf count %zd",
 		(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
-		XFS_BUFTARG_NAME(bp->pb_target),
-		(__uint64_t)blkno,
-		func,
-		XFS_BUF_GETERROR(bp),
-		XFS_BUF_COUNT(bp));
+		XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+		(__uint64_t)blkno, func,
+		XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
 }
 
 /*

+ 0 - 17
fs/xfs/xfs_sb.h

@@ -68,18 +68,6 @@ struct xfs_mount;
 	(XFS_SB_VERSION_NUMBITS | \
 	 XFS_SB_VERSION_OKREALFBITS | \
 	 XFS_SB_VERSION_OKSASHFBITS)
-#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2,na,sflag,morebits)	\
-	(((ia) || (dia) || (extflag) || (dirv2) || (na) || (sflag) || \
-	  (morebits)) ? \
-		(XFS_SB_VERSION_4 | \
-		 ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \
-		 ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \
-		 ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \
-		 ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0) | \
-		 ((na) ? XFS_SB_VERSION_LOGV2BIT : 0) | \
-		 ((sflag) ? XFS_SB_VERSION_SECTORBIT : 0) | \
-		 ((morebits) ? XFS_SB_VERSION_MOREBITSBIT : 0)) : \
-		XFS_SB_VERSION_1)
 
 /*
  * There are two words to hold XFS "feature" bits: the original
@@ -105,11 +93,6 @@ struct xfs_mount;
 	(XFS_SB_VERSION2_OKREALFBITS |	\
 	 XFS_SB_VERSION2_OKSASHFBITS )
 
-/*
- * mkfs macro to set up sb_features2 word
- */
-#define	XFS_SB_VERSION2_MKFS(resvd1, sbcntr)	0
-
 typedef struct xfs_sb
 {
 	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */

+ 8 - 6
fs/xfs/xfs_trans.c

@@ -1014,6 +1014,7 @@ xfs_trans_cancel(
 	xfs_log_item_t		*lip;
 	int			i;
 #endif
+	xfs_mount_t		*mp = tp->t_mountp;
 
 	/*
 	 * See if the caller is being too lazy to figure out if
@@ -1026,9 +1027,10 @@ xfs_trans_cancel(
 	 * filesystem.  This happens in paths where we detect
 	 * corruption and decide to give up.
 	 */
-	if ((tp->t_flags & XFS_TRANS_DIRTY) &&
-	    !XFS_FORCED_SHUTDOWN(tp->t_mountp))
-		xfs_force_shutdown(tp->t_mountp, XFS_CORRUPT_INCORE);
+	if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) {
+		XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
+		xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
+	}
 #ifdef DEBUG
 	if (!(flags & XFS_TRANS_ABORT)) {
 		licp = &(tp->t_items);
@@ -1040,7 +1042,7 @@ xfs_trans_cancel(
 				}
 
 				lip = lidp->lid_item;
-				if (!XFS_FORCED_SHUTDOWN(tp->t_mountp))
+				if (!XFS_FORCED_SHUTDOWN(mp))
 					ASSERT(!(lip->li_type == XFS_LI_EFD));
 			}
 			licp = licp->lic_next;
@@ -1048,7 +1050,7 @@ xfs_trans_cancel(
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
-	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+	XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
 
 	if (tp->t_ticket) {
 		if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1057,7 +1059,7 @@ xfs_trans_cancel(
 		} else {
 			log_flags = 0;
 		}
-		xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
+		xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
 	}
 
 	/* mark this thread as no longer being in a transaction */

+ 0 - 1
fs/xfs/xfs_trans.h

@@ -973,7 +973,6 @@ void		xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
-void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);

+ 3 - 6
fs/xfs/xfs_utils.c

@@ -55,16 +55,13 @@ xfs_get_dir_entry(
 	xfs_inode_t	**ipp)
 {
 	vnode_t		*vp;
-	bhv_desc_t	*bdp;
 
 	vp = VNAME_TO_VNODE(dentry);
-	bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
-	if (!bdp) {
-		*ipp = NULL;
+
+	*ipp = xfs_vtoi(vp);
+	if (!*ipp)
 		return XFS_ERROR(ENOENT);
-	}
 	VN_HOLD(vp);
-	*ipp = XFS_BHVTOI(bdp);
 	return 0;
 }
 

+ 28 - 22
fs/xfs/xfs_vfsops.c

@@ -53,6 +53,7 @@
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_clnt.h"
+#include "xfs_fsops.h"
 
 STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
 
@@ -290,8 +291,8 @@ xfs_start_flags(
 		mp->m_flags |= XFS_MOUNT_IDELETE;
 	if (ap->flags & XFSMNT_DIRSYNC)
 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
-	if (ap->flags & XFSMNT_COMPAT_ATTR)
-		mp->m_flags |= XFS_MOUNT_COMPAT_ATTR;
+	if (ap->flags & XFSMNT_ATTR2)
+		mp->m_flags |= XFS_MOUNT_ATTR2;
 
 	if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
 		mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
@@ -312,6 +313,8 @@ xfs_start_flags(
 		mp->m_flags |= XFS_MOUNT_NOUUID;
 	if (ap->flags & XFSMNT_BARRIER)
 		mp->m_flags |= XFS_MOUNT_BARRIER;
+	else
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
 
 	return 0;
 }
@@ -330,10 +333,11 @@ xfs_finish_flags(
 
 	/* Fail a mount where the logbuf is smaller then the log stripe */
 	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
-		if ((ap->logbufsize == -1) &&
+		if ((ap->logbufsize <= 0) &&
 		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
 			mp->m_logbsize = mp->m_sb.sb_logsunit;
-		} else if (ap->logbufsize < mp->m_sb.sb_logsunit) {
+		} else if (ap->logbufsize > 0 &&
+			   ap->logbufsize < mp->m_sb.sb_logsunit) {
 			cmn_err(CE_WARN,
 	"XFS: logbuf size must be greater than or equal to log stripe size");
 			return XFS_ERROR(EINVAL);
@@ -347,6 +351,10 @@ xfs_finish_flags(
 		}
 	}
 
+	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+		mp->m_flags |= XFS_MOUNT_ATTR2;
+	}
+
 	/*
 	 * prohibit r/w mounts of read-only filesystems
 	 */
@@ -382,10 +390,6 @@ xfs_finish_flags(
 			return XFS_ERROR(EINVAL);
 	}
 
-	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
-		mp->m_flags &= ~XFS_MOUNT_COMPAT_ATTR;
-	}
-
 	return 0;
 }
 
@@ -504,13 +508,13 @@ xfs_mount(
 	if (error)
 		goto error2;
 
+	if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
+		xfs_mountfs_check_barriers(mp);
+
 	error = XFS_IOINIT(vfsp, args, flags);
 	if (error)
 		goto error2;
 
-	if ((args->flags & XFSMNT_BARRIER) &&
-	    !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
-		xfs_mountfs_check_barriers(mp);
 	return 0;
 
 error2:
@@ -655,6 +659,11 @@ xfs_mntupdate(
 	else
 		mp->m_flags &= ~XFS_MOUNT_NOATIME;
 
+	if (args->flags & XFSMNT_BARRIER)
+		mp->m_flags |= XFS_MOUNT_BARRIER;
+	else
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+
 	if ((vfsp->vfs_flag & VFS_RDONLY) &&
 	    !(*flags & MS_RDONLY)) {
 		vfsp->vfs_flag &= ~VFS_RDONLY;
@@ -1634,6 +1643,7 @@ xfs_vget(
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
 #define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
 					 * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
 #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
@@ -1680,7 +1690,6 @@ xfs_parseargs(
 	int			iosize;
 
 	args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
-	args->flags |= XFSMNT_COMPAT_ATTR;
 
 #if 0	/* XXX: off by default, until some remaining issues ironed out */
 	args->flags |= XFSMNT_IDELETE; /* default to on */
@@ -1806,6 +1815,8 @@ xfs_parseargs(
 			args->flags |= XFSMNT_NOUUID;
 		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
 			args->flags |= XFSMNT_BARRIER;
+		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+			args->flags &= ~XFSMNT_BARRIER;
 		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
 			args->flags &= ~XFSMNT_IDELETE;
 		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
@@ -1815,9 +1826,9 @@ xfs_parseargs(
 		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
 			args->flags2 |= XFSMNT2_COMPAT_IOSIZE;
 		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-			args->flags &= ~XFSMNT_COMPAT_ATTR;
+			args->flags |= XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-			args->flags |= XFSMNT_COMPAT_ATTR;
+			args->flags &= ~XFSMNT_ATTR2;
 		} else if (!strcmp(this_char, "osyncisdsync")) {
 			/* no-op, this is now the default */
 printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
@@ -1892,7 +1903,6 @@ xfs_showargs(
 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
 		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
 		{ XFS_MOUNT_OSYNCISOSYNC,	"," MNTOPT_OSYNCISOSYNC },
-		{ XFS_MOUNT_BARRIER,		"," MNTOPT_BARRIER },
 		{ XFS_MOUNT_IDELETE,		"," MNTOPT_NOIKEEP },
 		{ 0, NULL }
 	};
@@ -1914,33 +1924,28 @@ xfs_showargs(
 
 	if (mp->m_logbufs > 0)
 		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-
 	if (mp->m_logbsize > 0)
 		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
 
 	if (mp->m_logname)
 		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-
 	if (mp->m_rtname)
 		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
 
 	if (mp->m_dalign > 0)
 		seq_printf(m, "," MNTOPT_SUNIT "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-
 	if (mp->m_swidth > 0)
 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-	if (!(mp->m_flags & XFS_MOUNT_COMPAT_ATTR))
-		seq_printf(m, "," MNTOPT_ATTR2);
-
 	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE))
 		seq_printf(m, "," MNTOPT_LARGEIO);
+	if (mp->m_flags & XFS_MOUNT_BARRIER)
+		seq_printf(m, "," MNTOPT_BARRIER);
 
 	if (!(vfsp->vfs_flag & VFS_32BITINODES))
 		seq_printf(m, "," MNTOPT_64BITINODE);
-
 	if (vfsp->vfs_flag & VFS_GRPID)
 		seq_printf(m, "," MNTOPT_GRPID);
 
@@ -1959,6 +1964,7 @@ xfs_freeze(
 	/* Push the superblock and write an unmount record */
 	xfs_log_unmount_write(mp);
 	xfs_unmountfs_writesb(mp);
+	xfs_fs_log_dummy(mp);
 }
 
 

+ 91 - 102
fs/xfs/xfs_vnodeops.c

@@ -185,8 +185,7 @@ xfs_getattr(
 		break;
 	}
 
-	vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
-	vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+	vn_atime_to_timespec(vp, &vap->va_atime);
 	vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
 	vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
 	vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
@@ -543,24 +542,6 @@ xfs_setattr(
 			goto error_return;
 		}
 
-		/*
-		 * Can't set extent size unless the file is marked, or
-		 * about to be marked as a realtime file.
-		 *
-		 * This check will be removed when fixed size extents
-		 * with buffered data writes is implemented.
-		 *
-		 */
-		if ((mask & XFS_AT_EXTSIZE)			&&
-		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-		     vap->va_extsize) &&
-		    (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
-		       ((mask & XFS_AT_XFLAGS) &&
-			(vap->va_xflags & XFS_XFLAG_REALTIME))))) {
-			code = XFS_ERROR(EINVAL);
-			goto error_return;
-		}
-
 		/*
 		 * Can't change realtime flag if any extents are allocated.
 		 */
@@ -823,13 +804,17 @@ xfs_setattr(
 					di_flags |= XFS_DIFLAG_RTINHERIT;
 				if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
 					di_flags |= XFS_DIFLAG_NOSYMLINKS;
-			} else {
+				if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
+					di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+			} else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
 				if (vap->va_xflags & XFS_XFLAG_REALTIME) {
 					di_flags |= XFS_DIFLAG_REALTIME;
 					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
 				} else {
 					ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
 				}
+				if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
+					di_flags |= XFS_DIFLAG_EXTSIZE;
 			}
 			ip->i_d.di_flags = di_flags;
 		}
@@ -999,10 +984,6 @@ xfs_readlink(
 		goto error_return;
 	}
 
-	if (!(ioflags & IO_INVIS)) {
-		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
-	}
-
 	/*
 	 * See if the symlink is stored inline.
 	 */
@@ -1234,7 +1215,8 @@ xfs_inactive_free_eofblocks(
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
 	if (!error && (nimaps != 0) &&
-	    (imap.br_startblock != HOLESTARTBLOCK)) {
+	    (imap.br_startblock != HOLESTARTBLOCK ||
+	     ip->i_delayed_blks)) {
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
@@ -1569,9 +1551,11 @@ xfs_release(
 
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
+		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+		       ip->i_delayed_blks > 0)) &&
 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-		    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
+		    (!(ip->i_d.di_flags &
+				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return (error);
 			/* Update linux inode block count after free above */
@@ -1628,7 +1612,8 @@ xfs_inactive(
 	 * only one with a reference to the inode.
 	 */
 	truncate = ((ip->i_d.di_nlink == 0) &&
-	    ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) &&
+            ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) ||
+             (ip->i_delayed_blks > 0)) &&
 	    ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
 
 	mp = ip->i_mount;
@@ -1646,10 +1631,12 @@ xfs_inactive(
 
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
-		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
-		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
-		    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
-		     (ip->i_delayed_blks != 0))) {
+                     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
+                       ip->i_delayed_blks > 0)) &&
+		      (ip->i_df.if_flags & XFS_IFEXTENTS) &&
+		     (!(ip->i_d.di_flags &
+				(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
+		      (ip->i_delayed_blks != 0)))) {
 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
 				return (VN_INACTIVE_CACHE);
 			/* Update linux inode block count after free above */
@@ -2593,7 +2580,6 @@ xfs_link(
 	int			cancel_flags;
 	int			committed;
 	vnode_t			*target_dir_vp;
-	bhv_desc_t		*src_bdp;
 	int			resblks;
 	char			*target_name = VNAME(dentry);
 	int			target_namelen;
@@ -2606,8 +2592,7 @@ xfs_link(
 	if (VN_ISDIR(src_vp))
 		return XFS_ERROR(EPERM);
 
-	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
-	sip = XFS_BHVTOI(src_bdp);
+	sip = xfs_vtoi(src_vp);
 	tdp = XFS_BHVTOI(target_dir_bdp);
 	mp = tdp->i_mount;
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -3240,7 +3225,6 @@ xfs_readdir(
 	xfs_trans_t	*tp = NULL;
 	int		error = 0;
 	uint		lock_mode;
-	xfs_off_t	start_offset;
 
 	vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
 					       (inst_t *)__return_address);
@@ -3251,11 +3235,7 @@ xfs_readdir(
 	}
 
 	lock_mode = xfs_ilock_map_shared(dp);
-	start_offset = uiop->uio_offset;
 	error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
-	if (start_offset != uiop->uio_offset) {
-		xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
-	}
 	xfs_iunlock_map_shared(dp, lock_mode);
 	return error;
 }
@@ -3832,7 +3812,12 @@ xfs_reclaim(
 	vn_iowait(vp);
 
 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-	ASSERT(VN_CACHED(vp) == 0);
+
+	/*
+	 * Make sure the atime in the XFS inode is correct before freeing the
+	 * Linux inode.
+	 */
+	xfs_synchronize_atime(ip);
 
 	/* If we have nothing to flush with this inode then complete the
 	 * teardown now, otherwise break the link between the xfs inode
@@ -4002,42 +3987,36 @@ xfs_alloc_file_space(
 	int			alloc_type,
 	int			attr_flags)
 {
+	xfs_mount_t		*mp = ip->i_mount;
+	xfs_off_t		count;
 	xfs_filblks_t		allocated_fsb;
 	xfs_filblks_t		allocatesize_fsb;
-	int			committed;
-	xfs_off_t		count;
-	xfs_filblks_t		datablocks;
-	int			error;
+	xfs_extlen_t		extsz, temp;
+	xfs_fileoff_t		startoffset_fsb;
 	xfs_fsblock_t		firstfsb;
-	xfs_bmap_free_t		free_list;
-	xfs_bmbt_irec_t		*imapp;
-	xfs_bmbt_irec_t		imaps[1];
-	xfs_mount_t		*mp;
-	int			numrtextents;
-	int			reccount;
-	uint			resblks;
+	int			nimaps;
+	int			bmapi_flag;
+	int			quota_flag;
 	int			rt;
-	int			rtextsize;
-	xfs_fileoff_t		startoffset_fsb;
 	xfs_trans_t		*tp;
-	int			xfs_bmapi_flags;
+	xfs_bmbt_irec_t		imaps[1], *imapp;
+	xfs_bmap_free_t		free_list;
+	uint			qblocks, resblks, resrtextents;
+	int			committed;
+	int			error;
 
 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
-	mp = ip->i_mount;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	/*
-	 * determine if this is a realtime file
-	 */
-	if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
-		if (ip->i_d.di_extsize)
-			rtextsize = ip->i_d.di_extsize;
-		else
-			rtextsize = mp->m_sb.sb_rextsize;
-	} else
-		rtextsize = 0;
+	rt = XFS_IS_REALTIME_INODE(ip);
+	if (unlikely(rt)) {
+		if (!(extsz = ip->i_d.di_extsize))
+			extsz = mp->m_sb.sb_rextsize;
+	} else {
+		extsz = ip->i_d.di_extsize;
+	}
 
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return error;
@@ -4048,8 +4027,8 @@ xfs_alloc_file_space(
 	count = len;
 	error = 0;
 	imapp = &imaps[0];
-	reccount = 1;
-	xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+	nimaps = 1;
+	bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
 	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
 
@@ -4070,43 +4049,51 @@ xfs_alloc_file_space(
 	}
 
 	/*
-	 * allocate file space until done or until there is an error
+	 * Allocate file space until done or until there is an error
 	 */
 retry:
 	while (allocatesize_fsb && !error) {
+		xfs_fileoff_t	s, e;
+
 		/*
-		 * determine if reserving space on
-		 * the data or realtime partition.
+		 * Determine space reservations for data/realtime.
 		 */
-		if (rt) {
-			xfs_fileoff_t s, e;
-
+		if (unlikely(extsz)) {
 			s = startoffset_fsb;
-			do_div(s, rtextsize);
-			s *= rtextsize;
-			e = roundup_64(startoffset_fsb + allocatesize_fsb,
-				rtextsize);
-			numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
-			datablocks = 0;
+			do_div(s, extsz);
+			s *= extsz;
+			e = startoffset_fsb + allocatesize_fsb;
+			if ((temp = do_mod(startoffset_fsb, extsz)))
+				e += temp;
+			if ((temp = do_mod(e, extsz)))
+				e += extsz - temp;
+		} else {
+			s = 0;
+			e = allocatesize_fsb;
+		}
+
+		if (unlikely(rt)) {
+			resrtextents = qblocks = (uint)(e - s);
+			resrtextents /= mp->m_sb.sb_rextsize;
+			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+			quota_flag = XFS_QMOPT_RES_RTBLKS;
 		} else {
-			datablocks = allocatesize_fsb;
-			numrtextents = 0;
+			resrtextents = 0;
+			resblks = qblocks = \
+				XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+			quota_flag = XFS_QMOPT_RES_REGBLKS;
 		}
 
 		/*
-		 * allocate and setup the transaction
+		 * Allocate and setup the transaction.
 		 */
 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-		resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
-		error = xfs_trans_reserve(tp,
-					  resblks,
-					  XFS_WRITE_LOG_RES(mp),
-					  numrtextents,
+		error = xfs_trans_reserve(tp, resblks,
+					  XFS_WRITE_LOG_RES(mp), resrtextents,
 					  XFS_TRANS_PERM_LOG_RES,
 					  XFS_WRITE_LOG_COUNT);
-
 		/*
-		 * check for running out of space
+		 * Check for running out of space
 		 */
 		if (error) {
 			/*
@@ -4117,8 +4104,8 @@ retry:
 			break;
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0, 0);
+		error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
+						      qblocks, 0, quota_flag);
 		if (error)
 			goto error1;
 
@@ -4126,19 +4113,19 @@ retry:
 		xfs_trans_ihold(tp, ip);
 
 		/*
-		 * issue the bmapi() call to allocate the blocks
+		 * Issue the xfs_bmapi() call to allocate the blocks
 		 */
 		XFS_BMAP_INIT(&free_list, &firstfsb);
 		error = xfs_bmapi(tp, ip, startoffset_fsb,
-				  allocatesize_fsb, xfs_bmapi_flags,
-				  &firstfsb, 0, imapp, &reccount,
+				  allocatesize_fsb, bmapi_flag,
+				  &firstfsb, 0, imapp, &nimaps,
 				  &free_list);
 		if (error) {
 			goto error0;
 		}
 
 		/*
-		 * complete the transaction
+		 * Complete the transaction
 		 */
 		error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
 		if (error) {
@@ -4153,7 +4140,7 @@ retry:
 
 		allocated_fsb = imapp->br_blockcount;
 
-		if (reccount == 0) {
+		if (nimaps == 0) {
 			error = XFS_ERROR(ENOSPC);
 			break;
 		}
@@ -4176,9 +4163,11 @@ dmapi_enospc_check:
 
 	return error;
 
- error0:
+error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
 	xfs_bmap_cancel(&free_list);
- error1:
+	XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+
+error1:	/* Just cancel transaction */
 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	goto dmapi_enospc_check;
@@ -4423,8 +4412,8 @@ xfs_free_file_space(
 		}
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
-				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
-				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+				ip->i_udquot, ip->i_gdquot, resblks, 0,
+				XFS_QMOPT_RES_REGBLKS);
 		if (error)
 			goto error1;
 

+ 2 - 0
mm/swap.c

@@ -384,6 +384,8 @@ unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 	return pagevec_count(pvec);
 }
 
+EXPORT_SYMBOL(pagevec_lookup);
+
 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t *index, int tag, unsigned nr_pages)
 {

Деякі файли не було показано, через те що забагато файлів було змінено