Эх сурвалжийг харах

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: (87 commits)
  nilfs2: get rid of bd_mount_sem use from nilfs
  nilfs2: correct exclusion control in nilfs_remount function
  nilfs2: simplify remaining sget() use
  nilfs2: get rid of sget use for checking if current mount is present
  nilfs2: get rid of sget use for acquiring nilfs object
  nilfs2: remove meaningless EBUSY case from nilfs_get_sb function
  remove the call to ->write_super in __sync_filesystem
  nilfs2: call nilfs2_write_super from nilfs2_sync_fs
  jffs2: call jffs2_write_super from jffs2_sync_fs
  ufs: add ->sync_fs
  sysv: add ->sync_fs
  hfsplus: add ->sync_fs
  hfs: add ->sync_fs
  fat: add ->sync_fs
  ext2: add ->sync_fs
  exofs: add ->sync_fs
  bfs: add ->sync_fs
  affs: add ->sync_fs
  sanitize ->fsync() for affs
  repair bfs_write_inode(), switch bfs to simple_fsync()
  ...
Linus Torvalds 16 жил өмнө
parent
commit
4b4f1d0178
100 өөрчлөгдсөн 1267 нэмэгдсэн , 1100 устгасан
  1. 0 3
      arch/alpha/kernel/osf_sys.c
  2. 3 2
      drivers/ieee1394/dv1394.c
  3. 5 1
      drivers/ieee1394/ieee1394_core.h
  4. 5 0
      drivers/usb/core/inode.c
  5. 3 1
      fs/adfs/adfs.h
  6. 8 2
      fs/adfs/dir.c
  7. 17 0
      fs/adfs/dir_f.c
  8. 17 0
      fs/adfs/dir_fplus.c
  9. 1 1
      fs/adfs/file.c
  10. 2 2
      fs/adfs/inode.c
  11. 1 1
      fs/adfs/map.c
  12. 4 0
      fs/adfs/super.c
  13. 1 0
      fs/affs/affs.h
  14. 1 1
      fs/affs/dir.c
  15. 13 1
      fs/affs/file.c
  16. 39 15
      fs/affs/super.c
  17. 1 1
      fs/afs/mntpt.c
  18. 4 0
      fs/afs/super.c
  19. 2 3
      fs/autofs/dirhash.c
  20. 3 3
      fs/autofs4/autofs_i.h
  21. 60 135
      fs/autofs4/dev-ioctl.c
  22. 7 8
      fs/autofs4/expire.c
  23. 3 4
      fs/autofs4/root.c
  24. 4 1
      fs/befs/linuxvfs.c
  25. 4 4
      fs/bfs/dir.c
  26. 38 14
      fs/bfs/inode.c
  27. 12 7
      fs/block_dev.c
  28. 0 7
      fs/btrfs/inode.c
  29. 0 11
      fs/btrfs/super.c
  30. 3 1
      fs/cachefiles/interface.c
  31. 13 1
      fs/char_dev.c
  32. 1 1
      fs/cifs/cifs_dfs_ref.c
  33. 5 1
      fs/cifs/cifsfs.c
  34. 0 2
      fs/compat.c
  35. 3 4
      fs/dcache.c
  36. 5 0
      fs/ecryptfs/super.c
  37. 22 3
      fs/exofs/super.c
  38. 1 1
      fs/ext2/Makefile
  39. 1 1
      fs/ext2/dir.c
  40. 0 3
      fs/ext2/ext2.h
  41. 2 2
      fs/ext2/file.c
  42. 0 50
      fs/ext2/fsync.c
  43. 2 9
      fs/ext2/inode.c
  44. 43 17
      fs/ext2/super.c
  45. 1 2
      fs/ext3/balloc.c
  46. 1 2
      fs/ext3/ialloc.c
  47. 0 1
      fs/ext3/inode.c
  48. 0 2
      fs/ext3/resize.c
  49. 11 23
      fs/ext3/super.c
  50. 0 1
      fs/ext3/xattr.c
  51. 14 2
      fs/ext4/super.c
  52. 8 8
      fs/fat/dir.c
  53. 6 0
      fs/fat/fat.h
  54. 8 5
      fs/fat/fatent.c
  55. 13 1
      fs/fat/file.c
  56. 30 1
      fs/fat/inode.c
  57. 2 2
      fs/fat/namei_msdos.c
  58. 2 2
      fs/fat/namei_vfat.c
  59. 39 1
      fs/file_table.c
  60. 4 0
      fs/freevxfs/vxfs_super.c
  61. 24 68
      fs/fs-writeback.c
  62. 0 2
      fs/gfs2/log.c
  63. 3 12
      fs/gfs2/super.c
  64. 21 2
      fs/hfs/super.c
  65. 21 4
      fs/hfsplus/super.c
  66. 12 0
      fs/hpfs/super.c
  67. 1 1
      fs/inode.c
  68. 17 0
      fs/internal.h
  69. 5 0
      fs/isofs/inode.c
  70. 3 15
      fs/jffs2/fs.c
  71. 0 1
      fs/jffs2/os-linux.h
  72. 26 0
      fs/jffs2/super.c
  73. 23 4
      fs/jfs/super.c
  74. 25 0
      fs/libfs.c
  75. 1 1
      fs/minix/dir.c
  76. 1 19
      fs/minix/file.c
  77. 13 24
      fs/minix/inode.c
  78. 0 2
      fs/minix/minix.h
  79. 77 52
      fs/namei.c
  80. 139 188
      fs/namespace.c
  81. 4 0
      fs/ncpfs/inode.c
  82. 1 1
      fs/nfs/namespace.c
  83. 2 0
      fs/nfs/super.c
  84. 32 46
      fs/nfsd/export.c
  85. 27 27
      fs/nfsd/vfs.c
  86. 3 3
      fs/nilfs2/cpfile.c
  87. 1 0
      fs/nilfs2/sb.h
  88. 93 163
      fs/nilfs2/super.c
  89. 108 5
      fs/nilfs2/the_nilfs.c
  90. 21 2
      fs/nilfs2/the_nilfs.h
  91. 22 32
      fs/ntfs/super.c
  92. 8 14
      fs/ocfs2/super.c
  93. 1 16
      fs/omfs/file.c
  94. 2 2
      fs/open.c
  95. 25 0
      fs/proc/internal.h
  96. 1 0
      fs/proc/proc_devtree.c
  97. 1 1
      fs/qnx4/Makefile
  98. 1 6
      fs/qnx4/bitmap.c
  99. 2 7
      fs/qnx4/dir.c
  100. 2 3
      fs/qnx4/file.c

+ 0 - 3
arch/alpha/kernel/osf_sys.c

@@ -371,8 +371,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 	int retval = -EINVAL;
 	char *name;
 
-	lock_kernel();
-
 	name = getname(path);
 	retval = PTR_ERR(name);
 	if (IS_ERR(name))
@@ -392,7 +390,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path,
 	}
 	putname(name);
  out:
-	unlock_kernel();
 	return retval;
 }
 

+ 3 - 2
drivers/ieee1394/dv1394.c

@@ -1789,12 +1789,13 @@ static int dv1394_open(struct inode *inode, struct file *file)
 	} else {
 		/* look up the card by ID */
 		unsigned long flags;
+		int idx = ieee1394_file_to_instance(file);
 
 		spin_lock_irqsave(&dv1394_cards_lock, flags);
 		if (!list_empty(&dv1394_cards)) {
 			struct video_card *p;
 			list_for_each_entry(p, &dv1394_cards, list) {
-				if ((p->id) == ieee1394_file_to_instance(file)) {
+				if ((p->id) == idx) {
 					video = p;
 					break;
 				}
@@ -1803,7 +1804,7 @@ static int dv1394_open(struct inode *inode, struct file *file)
 		spin_unlock_irqrestore(&dv1394_cards_lock, flags);
 
 		if (!video) {
-			debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file));
+			debug_printk("dv1394: OHCI card %d not found", idx);
 			return -ENODEV;
 		}
 

+ 5 - 1
drivers/ieee1394/ieee1394_core.h

@@ -5,6 +5,7 @@
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/cdev.h>
 #include <asm/atomic.h>
 
 #include "hosts.h"
@@ -155,7 +156,10 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size,
  */
 static inline unsigned char ieee1394_file_to_instance(struct file *file)
 {
-	return file->f_path.dentry->d_inode->i_cindex;
+	int idx = cdev_index(file->f_path.dentry->d_inode);
+	if (idx < 0)
+		idx = 0;
+	return idx;
 }
 
 extern int hpsb_disable_irm;

+ 5 - 0
drivers/usb/core/inode.c

@@ -39,6 +39,7 @@
 #include <linux/parser.h>
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <asm/byteorder.h>
 #include "usb.h"
 #include "hcd.h"
@@ -265,9 +266,13 @@ static int remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
+	lock_kernel();
+
 	if (usbfs_mount && usbfs_mount->mnt_sb)
 		update_sb(usbfs_mount->mnt_sb);
 
+	unlock_kernel();
+
 	return 0;
 }
 

+ 3 - 1
fs/adfs/adfs.h

@@ -53,6 +53,7 @@ struct adfs_dir_ops {
 	int	(*update)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*create)(struct adfs_dir *dir, struct object_info *obj);
 	int	(*remove)(struct adfs_dir *dir, struct object_info *obj);
+	int	(*sync)(struct adfs_dir *dir);
 	void	(*free)(struct adfs_dir *dir);
 };
 
@@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations;
 extern struct adfs_dir_ops adfs_f_dir_ops;
 extern struct adfs_dir_ops adfs_fplus_dir_ops;
 
-extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
+extern int adfs_dir_update(struct super_block *sb, struct object_info *obj,
+			   int wait);
 
 /* file.c */
 extern const struct inode_operations adfs_file_inode_operations;

+ 8 - 2
fs/adfs/dir.c

@@ -83,7 +83,7 @@ out:
 }
 
 int
-adfs_dir_update(struct super_block *sb, struct object_info *obj)
+adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_ADFS_FS_RW
@@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj)
 	ret = ops->update(&dir, obj);
 	write_unlock(&adfs_dir_lock);
 
+	if (wait) {
+		int err = ops->sync(&dir);
+		if (!ret)
+			ret = err;
+	}
+
 	ops->free(&dir);
 out:
 #endif
@@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = {
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= adfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 static int

+ 17 - 0
fs/adfs/dir_f.c

@@ -437,6 +437,22 @@ bad_dir:
 #endif
 }
 
+static int
+adfs_f_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_f_free(struct adfs_dir *dir)
 {
@@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = {
 	.setpos		= adfs_f_setpos,
 	.getnext	= adfs_f_getnext,
 	.update		= adfs_f_update,
+	.sync		= adfs_f_sync,
 	.free		= adfs_f_free
 };

+ 17 - 0
fs/adfs/dir_fplus.c

@@ -161,6 +161,22 @@ out:
 	return ret;
 }
 
+static int
+adfs_fplus_sync(struct adfs_dir *dir)
+{
+	int err = 0;
+	int i;
+
+	for (i = dir->nr_buffers - 1; i >= 0; i--) {
+		struct buffer_head *bh = dir->bh[i];
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 static void
 adfs_fplus_free(struct adfs_dir *dir)
 {
@@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = {
 	.read		= adfs_fplus_read,
 	.setpos		= adfs_fplus_setpos,
 	.getnext	= adfs_fplus_getnext,
+	.sync		= adfs_fplus_sync,
 	.free		= adfs_fplus_free
 };

+ 1 - 1
fs/adfs/file.c

@@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = {
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
 	.mmap		= generic_file_mmap,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.splice_read	= generic_file_splice_read,

+ 2 - 2
fs/adfs/inode.c

@@ -376,7 +376,7 @@ out:
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int unused)
+int adfs_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused)
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj);
+	ret = adfs_dir_update(sb, &obj, wait);
 	unlock_kernel();
 	return ret;
 }

+ 1 - 1
fs/adfs/map.c

@@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
 #define GET_FRAG_ID(_map,_start,_idmask)				\
 	({								\
 		unsigned char *_m = _map + (_start >> 3);		\
-		u32 _frag = get_unaligned((u32 *)_m);			\
+		u32 _frag = get_unaligned_le32(_m);			\
 		_frag >>= (_start & 7);					\
 		_frag & _idmask;					\
 	})

+ 4 - 0
fs/adfs/super.c

@@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb)
 	int i;
 	struct adfs_sb_info *asb = ADFS_SB(sb);
 
+	lock_kernel();
+
 	for (i = 0; i < asb->s_map_size; i++)
 		brelse(asb->s_map[i].dm_bh);
 	kfree(asb->s_map);
 	kfree(asb);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)

+ 1 - 0
fs/affs/affs.h

@@ -182,6 +182,7 @@ extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dent
 
 void		affs_free_prealloc(struct inode *inode);
 extern void	affs_truncate(struct inode *);
+int		affs_file_fsync(struct file *, struct dentry *, int);
 
 /* dir.c */
 

+ 1 - 1
fs/affs/dir.c

@@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = {
 	.read		= generic_read_dir,
 	.llseek		= generic_file_llseek,
 	.readdir	= affs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 };
 
 /*

+ 13 - 1
fs/affs/file.c

@@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = {
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,
-	.fsync		= file_fsync,
+	.fsync		= affs_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -915,3 +915,15 @@ affs_truncate(struct inode *inode)
 	}
 	affs_free_prealloc(inode);
 }
+
+int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode * inode = dentry->d_inode;
+	int ret, err;
+
+	ret = write_inode_now(inode, 0);
+	err = sync_blockdev(inode->i_sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}

+ 39 - 15
fs/affs/super.c

@@ -16,6 +16,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
@@ -23,50 +24,68 @@ extern struct timezone sys_tz;
 static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
+static void
+affs_commit_super(struct super_block *sb, int clean)
+{
+	struct affs_sb_info *sbi = AFFS_SB(sb);
+	struct buffer_head *bh = sbi->s_root_bh;
+	struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
+
+	tail->bm_flag = cpu_to_be32(clean);
+	secs_to_datestamp(get_seconds(), &tail->disk_change);
+	affs_fix_checksum(sb, bh);
+	mark_buffer_dirty(bh);
+}
+
 static void
 affs_put_super(struct super_block *sb)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	pr_debug("AFFS: put_super()\n");
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
-	}
+	lock_kernel();
+
+	if (!(sb->s_flags & MS_RDONLY))
+		affs_commit_super(sb, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
 	affs_brelse(sbi->s_root_bh);
 	kfree(sbi);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 static void
 affs_write_super(struct super_block *sb)
 {
 	int clean = 2;
-	struct affs_sb_info *sbi = AFFS_SB(sb);
 
+	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		//	if (sbi->s_bitmap[i].bm_bh) {
 		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
 		//			clean = 0;
-		AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean);
-		secs_to_datestamp(get_seconds(),
-				  &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
-		affs_fix_checksum(sb, sbi->s_root_bh);
-		mark_buffer_dirty(sbi->s_root_bh);
+		affs_commit_super(sb, clean);
 		sb->s_dirt = !clean;	/* redo until bitmap synced */
 	} else
 		sb->s_dirt = 0;
+	unlock_super(sb);
 
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
+static int
+affs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	affs_commit_super(sb, 2);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+	return 0;
+}
+
 static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
@@ -124,6 +143,7 @@ static const struct super_operations affs_sops = {
 	.clear_inode	= affs_clear_inode,
 	.put_super	= affs_put_super,
 	.write_super	= affs_write_super,
+	.sync_fs	= affs_sync_fs,
 	.statfs		= affs_statfs,
 	.remount_fs	= affs_remount,
 	.show_options	= generic_show_options,
@@ -507,6 +527,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		kfree(new_opts);
 		return -EINVAL;
 	}
+	lock_kernel();
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
@@ -514,8 +535,10 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	sbi->s_uid   = uid;
 	sbi->s_gid   = gid;
 
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		sb->s_dirt = 1;
 		while (sb->s_dirt)
@@ -524,6 +547,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 	} else
 		res = affs_init_bitmap(sb, flags);
 
+	unlock_kernel();
 	return res;
 }
 

+ 1 - 1
fs/afs/mntpt.c

@@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:

+ 4 - 0
fs/afs/super.c

@@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb)
 
 	_enter("");
 
+	lock_kernel();
+
 	afs_put_volume(as->volume);
 
+	unlock_kernel();
+
 	_leave("");
 }
 

+ 2 - 3
fs/autofs/dirhash.c

@@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 		}
 		path.mnt = mnt;
 		path_get(&path);
-		if (!follow_down(&path.mnt, &path.dentry)) {
+		if (!follow_down(&path)) {
 			path_put(&path);
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) &&
-		       follow_down(&path.mnt, &path.dentry))
+		while (d_mountpoint(path.dentry) && follow_down(&path));
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);

+ 3 - 3
fs/autofs4/autofs_i.h

@@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
 int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
 void autofs4_catatonic_mode(struct autofs_sb_info *);
 
-static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static inline int autofs4_follow_mount(struct path *path)
 {
 	int res = 0;
 
-	while (d_mountpoint(*dentry)) {
-		int followed = follow_down(mnt, dentry);
+	while (d_mountpoint(path->dentry)) {
+		int followed = follow_down(path);
 		if (!followed)
 			break;
 		res = 1;

+ 60 - 135
fs/autofs4/dev-ioctl.c

@@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp,
 	return 0;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested device number (aka. new_encode_dev(sb->s_dev).
- */
-static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+static int find_autofs_mount(const char *pathname,
+			     struct path *res,
+			     int test(struct path *path, void *data),
+			     void *data)
 {
-	struct dentry *dentry;
-	struct inode *inode;
-	struct super_block *sb;
-	dev_t s_dev;
-	unsigned int err;
-
+	struct path path;
+	int err = kern_path(pathname, 0, &path);
+	if (err)
+		return err;
 	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
-
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		inode = nd->path.dentry->d_inode;
-		if (!inode)
-			break;
-
-		sb = inode->i_sb;
-		s_dev = new_encode_dev(sb->s_dev);
-		if (devno == s_dev) {
-			if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+	while (path.dentry == path.mnt->mnt_root) {
+		if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+			if (test(&path, data)) {
+				path_get(&path);
+				if (!err) /* already found some */
+					path_put(res);
+				*res = path;
 				err = 0;
-				break;
 			}
 		}
+		if (!follow_up(&path))
+			break;
 	}
-out:
+	path_put(&path);
 	return err;
 }
 
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested mount type (ie. indirect, direct or offset).
- */
-static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+static int test_by_dev(struct path *path, void *p)
 {
-	struct dentry *dentry;
-	struct autofs_info *ino;
-	unsigned int err;
-
-	err = -ENOENT;
-
-	/* Lookup the dentry name at the base of our mount point */
-	dentry = d_lookup(nd->path.dentry, &nd->last);
-	if (!dentry)
-		goto out;
-
-	dput(nd->path.dentry);
-	nd->path.dentry = dentry;
+	return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+}
 
-	/* And follow the mount stack looking for our autofs mount */
-	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
-		ino = autofs4_dentry_ino(nd->path.dentry);
-		if (ino && ino->sbi->type & type) {
-			err = 0;
-			break;
-		}
-	}
-out:
-	return err;
+static int test_by_type(struct path *path, void *p)
+{
+	struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+	return ino && ino->sbi->type & *(unsigned *)p;
 }
 
 static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
@@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
  * Open a file descriptor on the autofs mount point corresponding
  * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
  */
-static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 {
-	struct file *filp;
-	struct nameidata nd;
 	int err, fd;
 
 	fd = get_unused_fd();
 	if (likely(fd >= 0)) {
-		/* Get nameidata of the parent directory */
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		struct file *filp;
+		struct path path;
+
+		err = find_autofs_mount(name, &path, test_by_dev, &devid);
 		if (err)
 			goto out;
 
 		/*
-		 * Search down, within the parent, looking for an
-		 * autofs super block that has the device number
+		 * Find autofs super block that has the device number
 		 * corresponding to the autofs fs we want to open.
 		 */
-		err = autofs_dev_ioctl_find_super(&nd, devid);
-		if (err) {
-			path_put(&nd.path);
-			goto out;
-		}
 
-		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+		filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
 				   current_cred());
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
@@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
 	param->ioctlfd = -1;
 
 	path = param->path;
-	devid = param->openmount.devid;
+	devid = new_decode_dev(param->openmount.devid);
 
 	err = 0;
 	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 				      struct autofs_dev_ioctl *param)
 {
 	struct autofs_info *ino;
-	struct nameidata nd;
-	const char *path;
+	struct path path;
 	dev_t devid;
 	int err = -ENOENT;
 
@@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 		goto out;
 	}
 
-	path = param->path;
-	devid = new_encode_dev(sbi->sb->s_dev);
+	devid = sbi->sb->s_dev;
 
 	param->requester.uid = param->requester.gid = -1;
 
-	/* Get nameidata of the parent directory */
-	err = path_lookup(path, LOOKUP_PARENT, &nd);
+	err = find_autofs_mount(param->path, &path, test_by_dev, &devid);
 	if (err)
 		goto out;
 
-	err = autofs_dev_ioctl_find_super(&nd, devid);
-	if (err)
-		goto out_release;
-
-	ino = autofs4_dentry_ino(nd.path.dentry);
+	ino = autofs4_dentry_ino(path.dentry);
 	if (ino) {
 		err = 0;
-		autofs4_expire_wait(nd.path.dentry);
+		autofs4_expire_wait(path.dentry);
 		spin_lock(&sbi->fs_lock);
 		param->requester.uid = ino->uid;
 		param->requester.gid = ino->gid;
 		spin_unlock(&sbi->fs_lock);
 	}
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }
@@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 					 struct autofs_sb_info *sbi,
 					 struct autofs_dev_ioctl *param)
 {
-	struct nameidata nd;
-	const char *path;
+	struct path path;
+	const char *name;
 	unsigned int type;
 	unsigned int devid, magic;
 	int err = -ENOENT;
@@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 		goto out;
 	}
 
-	path = param->path;
+	name = param->path;
 	type = param->ismountpoint.in.type;
 
 	param->ismountpoint.out.devid = devid = 0;
 	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
-		if (autofs_type_any(type)) {
-			struct super_block *sb;
-
-			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
-			if (err)
-				goto out;
-
-			sb = nd.path.dentry->d_sb;
-			devid = new_encode_dev(sb->s_dev);
-		} else {
-			struct autofs_info *ino;
-
-			err = path_lookup(path, LOOKUP_PARENT, &nd);
-			if (err)
-				goto out;
-
-			err = autofs_dev_ioctl_find_sbi_type(&nd, type);
-			if (err)
-				goto out_release;
-
-			ino = autofs4_dentry_ino(nd.path.dentry);
-			devid = autofs4_get_dev(ino->sbi);
-		}
-
+		if (autofs_type_any(type))
+			err = kern_path(name, LOOKUP_FOLLOW, &path);
+		else
+			err = find_autofs_mount(name, &path, test_by_type, &type);
+		if (err)
+			goto out;
+		devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
 		err = 0;
-		if (nd.path.dentry->d_inode &&
-		    nd.path.mnt->mnt_root == nd.path.dentry) {
+		if (path.dentry->d_inode &&
+		    path.mnt->mnt_root == path.dentry) {
 			err = 1;
-			magic = nd.path.dentry->d_inode->i_sb->s_magic;
+			magic = path.dentry->d_inode->i_sb->s_magic;
 		}
 	} else {
-		dev_t dev = autofs4_get_dev(sbi);
+		dev_t dev = sbi->sb->s_dev;
 
-		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		err = find_autofs_mount(name, &path, test_by_dev, &dev);
 		if (err)
 			goto out;
 
-		err = autofs_dev_ioctl_find_super(&nd, dev);
-		if (err)
-			goto out_release;
-
-		devid = dev;
+		devid = new_encode_dev(dev);
 
-		err = have_submounts(nd.path.dentry);
+		err = have_submounts(path.dentry);
 
-		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
-			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
-				struct inode *inode = nd.path.dentry->d_inode;
-				magic = inode->i_sb->s_magic;
-			}
+		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
+			if (follow_down(&path))
+				magic = path.mnt->mnt_sb->s_magic;
 		}
 	}
 
 	param->ismountpoint.out.devid = devid;
 	param->ismountpoint.out.magic = magic;
-
-out_release:
-	path_put(&nd.path);
+	path_put(&path);
 out:
 	return err;
 }

+ 7 - 8
fs/autofs4/expire.c

@@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry,
 static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 {
 	struct dentry *top = dentry;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 
 	DPRINTK("dentry %p %.*s",
 		dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
-	mntget(mnt);
-	dget(dentry);
+	path_get(&path);
 
-	if (!follow_down(&mnt, &dentry))
+	if (!follow_down(&path))
 		goto done;
 
-	if (is_autofs4_dentry(dentry)) {
-		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+	if (is_autofs4_dentry(path.dentry)) {
+		struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
 		if (autofs_type_indirect(sbi->type))
@@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		 * Otherwise it's an offset mount and we need to check
 		 * if we can umount its mount, if there is one.
 		 */
-		if (!d_mountpoint(dentry)) {
+		if (!d_mountpoint(path.dentry)) {
 			status = 0;
 			goto done;
 		}
@@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	status = 0;
 done:
 	DPRINTK("returning = %d", status);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path);
 	return status;
 }
 

+ 3 - 4
fs/autofs4/root.c

@@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		nd->flags);
 	/*
 	 * For an expire of a covered direct or offset mount we need
-	 * to beeak out of follow_down() at the autofs mount trigger
+	 * to break out of follow_down() at the autofs mount trigger
 	 * (d_mounted--), so we can see the expiring flag, and manage
 	 * the blocking and following here until the expire is completed.
 	 */
@@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
 		if (ino->flags & AUTOFS_INF_EXPIRING) {
 			spin_unlock(&sbi->fs_lock);
 			/* Follow down to our covering mount. */
-			if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+			if (!follow_down(&nd->path))
 				goto done;
 			goto follow;
 		}
@@ -230,8 +230,7 @@ follow:
 	 * to follow it.
 	 */
 	if (d_mountpoint(dentry)) {
-		if (!autofs4_follow_mount(&nd->path.mnt,
-					  &nd->path.dentry)) {
+		if (!autofs4_follow_mount(&nd->path)) {
 			status = -ENOENT;
 			goto out_error;
 		}

+ 4 - 1
fs/befs/linuxvfs.c

@@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts)
 static void
 befs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
 	kfree(BEFS_SB(sb)->mount_opts.iocharset);
 	BEFS_SB(sb)->mount_opts.iocharset = NULL;
 
@@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb)
 
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
-	return;
+
+	unlock_kernel();
 }
 
 /* Allocate private field of the superblock, fill it.

+ 4 - 4
fs/bfs/dir.c

@@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
 const struct file_operations bfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= bfs_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 	.llseek		= generic_file_llseek,
 };
 
@@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 		inode->i_nlink = 1;
 	}
 	de->ino = 0;
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 	inode->i_ctime = dir->i_ctime;
@@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 		inode_dec_link_count(new_inode);
 	}
-	mark_buffer_dirty(old_bh);
+	mark_buffer_dirty_inode(old_bh, old_dir);
 	error = 0;
 
 end_rename:
@@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
 				for (i = 0; i < BFS_NAMELEN; i++)
 					de->name[i] =
 						(i < namelen) ? name[i] : 0;
-				mark_buffer_dirty(bh);
+				mark_buffer_dirty_inode(bh, dir);
 				brelse(bh);
 				return 0;
 			}

+ 38 - 14
fs/bfs/inode.c

@@ -30,6 +30,7 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
+static void bfs_write_super(struct super_block *s);
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -97,14 +98,15 @@ error:
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int unused)
+static int bfs_write_inode(struct inode *inode, int wait)
 {
+	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
         unsigned long i_sblock;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
 	int block, off;
-	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
+	int err = 0;
 
         dprintf("ino=%08x\n", ino);
 
@@ -145,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused)
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
+	if (wait) {
+		sync_dirty_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+			err = -EIO;
+	}
 	brelse(bh);
 	mutex_unlock(&info->bfs_lock);
-	return 0;
+	return err;
 }
 
 static void bfs_delete_inode(struct inode *inode)
@@ -209,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
+static int bfs_sync_fs(struct super_block *sb, int wait)
+{
+	struct bfs_sb_info *info = BFS_SB(sb);
+
+	mutex_lock(&info->bfs_lock);
+	mark_buffer_dirty(info->si_sbh);
+	sb->s_dirt = 0;
+	mutex_unlock(&info->bfs_lock);
+
+	return 0;
+}
+
+static void bfs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		bfs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
+}
+
 static void bfs_put_super(struct super_block *s)
 {
 	struct bfs_sb_info *info = BFS_SB(s);
@@ -216,11 +243,18 @@ static void bfs_put_super(struct super_block *s)
 	if (!info)
 		return;
 
+	lock_kernel();
+
+	if (s->s_dirt)
+		bfs_write_super(s);
+
 	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
 	kfree(info);
 	s->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -240,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static void bfs_write_super(struct super_block *s)
-{
-	struct bfs_sb_info *info = BFS_SB(s);
-
-	mutex_lock(&info->bfs_lock);
-	if (!(s->s_flags & MS_RDONLY))
-		mark_buffer_dirty(info->si_sbh);
-	s->s_dirt = 0;
-	mutex_unlock(&info->bfs_lock);
-}
-
 static struct kmem_cache *bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
@@ -298,6 +321,7 @@ static const struct super_operations bfs_sops = {
 	.delete_inode	= bfs_delete_inode,
 	.put_super	= bfs_put_super,
 	.write_super	= bfs_write_super,
+	.sync_fs	= bfs_sync_fs,
 	.statfs		= bfs_statfs,
 };
 

+ 12 - 7
fs/block_dev.c

@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	if (!bdev)
+		return 0;
+	if (!wait)
+		return filemap_flush(bdev->bd_inode->i_mapping);
+	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
 /*
  * Write out and wait upon all the dirty data associated with a block
  * device via its mapping.  Does not take the superblock lock.
  */
 int sync_blockdev(struct block_device *bdev)
 {
-	int ret = 0;
-
-	if (bdev)
-		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
-	return ret;
+	return __sync_blockdev(bdev, 1);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
@@ -199,7 +204,7 @@ int fsync_bdev(struct block_device *bdev)
 {
 	struct super_block *sb = get_super(bdev);
 	if (sb) {
-		int res = fsync_super(sb);
+		int res = sync_filesystem(sb);
 		drop_super(sb);
 		return res;
 	}
@@ -241,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sb->s_frozen = SB_FREEZE_WRITE;
 		smp_wmb();
 
-		__fsync_super(sb);
+		sync_filesystem(sb);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
 		smp_wmb();

+ 0 - 7
fs/btrfs/inode.c

@@ -2322,7 +2322,6 @@ err:
 	btrfs_update_inode(trans, root, dir);
 	btrfs_drop_nlink(inode);
 	ret = btrfs_update_inode(trans, root, inode);
-	dir->i_sb->s_dirt = 1;
 out:
 	return ret;
 }
@@ -2806,7 +2805,6 @@ error:
 				      pending_del_nr);
 	}
 	btrfs_free_path(path);
-	inode->i_sb->s_dirt = 1;
 	return ret;
 }
 
@@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 		init_special_inode(inode, inode->i_mode, rdev);
 		btrfs_update_inode(trans, root, inode);
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
@@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (err)
 		drop_inode = 1;
 
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, dir);
 	err = btrfs_update_inode(trans, root, inode);
 
@@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	d_instantiate(dentry, inode);
 	drop_on_err = 0;
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 
@@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 		inode->i_op = &btrfs_file_inode_operations;
 		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 	}
-	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, inode);
 	btrfs_update_inode_block_group(trans, dir);
 	if (drop_inode)

+ 0 - 11
fs/btrfs/super.c

@@ -394,10 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 	struct btrfs_root *root = btrfs_sb(sb);
 	int ret;
 
-	if (sb->s_flags & MS_RDONLY)
-		return 0;
-
-	sb->s_dirt = 0;
 	if (!wait) {
 		filemap_flush(root->fs_info->btree_inode->i_mapping);
 		return 0;
@@ -408,7 +404,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 
 	trans = btrfs_start_transaction(root, 1);
 	ret = btrfs_commit_transaction(trans, root);
-	sb->s_dirt = 0;
 	return ret;
 }
 
@@ -454,11 +449,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	return 0;
 }
 
-static void btrfs_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
-}
-
 static int btrfs_test_super(struct super_block *s, void *data)
 {
 	struct btrfs_fs_devices *test_fs_devices = data;
@@ -689,7 +679,6 @@ static int btrfs_unfreeze(struct super_block *sb)
 static struct super_operations btrfs_super_ops = {
 	.delete_inode	= btrfs_delete_inode,
 	.put_super	= btrfs_put_super,
-	.write_super	= btrfs_write_super,
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
 	.write_inode	= btrfs_write_inode,

+ 3 - 1
fs/cachefiles/interface.c

@@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
 	/* make sure all pages pinned by operations on behalf of the netfs are
 	 * written to disc */
 	cachefiles_begin_secure(cache, &saved_cred);
-	ret = fsync_super(cache->mnt->mnt_sb);
+	down_read(&cache->mnt->mnt_sb->s_umount);
+	ret = sync_filesystem(cache->mnt->mnt_sb);
+	up_read(&cache->mnt->mnt_sb->s_umount);
 	cachefiles_end_secure(cache, saved_cred);
 
 	if (ret == -EIO)

+ 13 - 1
fs/char_dev.c

@@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		p = inode->i_cdev;
 		if (!p) {
 			inode->i_cdev = p = new;
-			inode->i_cindex = idx;
 			list_add(&inode->i_devices, &p->list);
 			new = NULL;
 		} else if (!cdev_get(p))
@@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 	return ret;
 }
 
+int cdev_index(struct inode *inode)
+{
+	int idx;
+	struct kobject *kobj;
+
+	kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
+	if (!kobj)
+		return -1;
+	kobject_put(kobj);
+	return idx;
+}
+
 void cd_forget(struct inode *inode)
 {
 	spin_lock(&cdev_lock);
@@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init);
 EXPORT_SYMBOL(cdev_alloc);
 EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_index);
 EXPORT_SYMBOL(register_chrdev);
 EXPORT_SYMBOL(unregister_chrdev);
 EXPORT_SYMBOL(directly_mappable_cdev_bdi);

+ 1 - 1
fs/cifs/cifs_dfs_ref.c

@@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
 	case -EBUSY:
 		/* someone else made a mount here whilst we were busy */
 		while (d_mountpoint(nd->path.dentry) &&
-		       follow_down(&nd->path.mnt, &nd->path.dentry))
+		       follow_down(&nd->path))
 			;
 		err = 0;
 	default:

+ 5 - 1
fs/cifs/cifsfs.c

@@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb)
 		cFYI(1, ("Empty cifs superblock info passed to unmount"));
 		return;
 	}
+
+	lock_kernel();
+
 	rc = cifs_umount(sb, cifs_sb);
 	if (rc)
 		cERROR(1, ("cifs_umount failed with return code %d", rc));
@@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb)
 
 	unload_nls(cifs_sb->local_nls);
 	kfree(cifs_sb);
-	return;
+
+	unlock_kernel();
 }
 
 static int

+ 0 - 2
fs/compat.c

@@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
 		}
 	}
 
-	lock_kernel();
 	retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
 			flags, (void*)data_page);
-	unlock_kernel();
 
  out4:
 	free_page(data_page);

+ 3 - 4
fs/dcache.c

@@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root,
 
 	spin_lock(&vfsmount_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, " (deleted)", 10) != 0))
 			goto Elong;
 
@@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 
 	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, "//deleted", 9) != 0))
 			goto Elong;
 	if (buflen < 1)
@@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
-	/* Has the current directory has been unlinked? */
 	spin_lock(&dcache_lock);
-	if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
+	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
 		char * cwd;

+ 5 - 0
fs/ecryptfs/super.c

@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/key.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
@@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb)
 {
 	struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
 
+	lock_kernel();
+
 	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
+
+	unlock_kernel();
 }
 
 /**

+ 22 - 3
fs/exofs/super.c

@@ -200,20 +200,21 @@ static const struct export_operations exofs_export_ops;
 /*
  * Write the superblock to the OSD
  */
-static void exofs_write_super(struct super_block *sb)
+static int exofs_sync_fs(struct super_block *sb, int wait)
 {
 	struct exofs_sb_info *sbi;
 	struct exofs_fscb *fscb;
 	struct osd_request *or;
 	struct osd_obj_id obj;
-	int ret;
+	int ret = -ENOMEM;
 
 	fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
 	if (!fscb) {
 		EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
-		return;
+		return -ENOMEM;
 	}
 
+	lock_super(sb);
 	lock_kernel();
 	sbi = sb->s_fs_info;
 	fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -246,7 +247,17 @@ out:
 	if (or)
 		osd_end_request(or);
 	unlock_kernel();
+	unlock_super(sb);
 	kfree(fscb);
+	return ret;
+}
+
+static void exofs_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		exofs_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 /*
@@ -258,6 +269,11 @@ static void exofs_put_super(struct super_block *sb)
 	int num_pend;
 	struct exofs_sb_info *sbi = sb->s_fs_info;
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		exofs_write_super(sb);
+
 	/* make sure there are no pending commands */
 	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
 	     num_pend = atomic_read(&sbi->s_curr_pending)) {
@@ -271,6 +287,8 @@ static void exofs_put_super(struct super_block *sb)
 	osduld_put_device(sbi->s_dev);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 /*
@@ -484,6 +502,7 @@ static const struct super_operations exofs_sops = {
 	.delete_inode   = exofs_delete_inode,
 	.put_super      = exofs_put_super,
 	.write_super    = exofs_write_super,
+	.sync_fs	= exofs_sync_fs,
 	.statfs         = exofs_statfs,
 };
 

+ 1 - 1
fs/ext2/Makefile

@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_EXT2_FS) += ext2.o
 
-ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \
+ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
 	  ioctl.o namei.o super.o symlink.o
 
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o

+ 1 - 1
fs/ext2/dir.c

@@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };

+ 0 - 3
fs/ext2/ext2.h

@@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
 
-/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *, int);
-
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, int);
 extern void ext2_free_inode (struct inode *);

+ 2 - 2
fs/ext2/file.c

@@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = {
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
@@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = {
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
-	.fsync		= ext2_sync_file,
+	.fsync		= simple_fsync,
 };
 #endif
 

+ 0 - 50
fs/ext2/fsync.c

@@ -1,50 +0,0 @@
-/*
- *  linux/fs/ext2/fsync.c
- *
- *  Copyright (C) 1993  Stephen Tweedie (sct@dcs.ed.ac.uk)
- *  from
- *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
- *                      Laboratoire MASI - Institut Blaise Pascal
- *                      Universite Pierre et Marie Curie (Paris VI)
- *  from
- *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
- * 
- *  ext2fs fsync primitive
- *
- *  Big-endian to little-endian byte-swapping/bitmaps by
- *        David S. Miller (davem@caip.rutgers.edu), 1995
- * 
- *  Removed unnecessary code duplication for little endian machines
- *  and excessive __inline__s. 
- *        Andi Kleen, 1997
- *
- * Major simplications and cleanup - we only need to do the metadata, because
- * we can depend on generic_block_fdatasync() to sync the data blocks.
- */
-
-#include "ext2.h"
-#include <linux/buffer_head.h>		/* for sync_mapping_buffers() */
-
-
-/*
- *	File may be NULL when we are called. Perhaps we shouldn't
- *	even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-	int ret;
-
-	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return ret;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return ret;
-
-	err = ext2_sync_inode(inode);
-	if (ret == 0)
-		ret = err;
-	return ret;
-}

+ 2 - 9
fs/ext2/inode.c

@@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
-static int ext2_update_inode(struct inode * inode, int do_sync);
-
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode)
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_update_inode(inode, inode_needs_sync(inode));
+	ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1337,7 +1335,7 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
-static int ext2_update_inode(struct inode * inode, int do_sync)
+int ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
 	return err;
 }
 
-int ext2_write_inode(struct inode *inode, int wait)
-{
-	return ext2_update_inode(inode, wait);
-}
-
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {

+ 43 - 17
fs/ext2/super.c

@@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext2_sync_fs(struct super_block *sb, int wait);
 
 void ext2_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
@@ -114,6 +115,11 @@ static void ext2_put_super (struct super_block * sb)
 	int i;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		ext2_write_super(sb);
+
 	ext2_xattr_put_super(sb);
 	if (!(sb->s_flags & MS_RDONLY)) {
 		struct ext2_super_block *es = sbi->s_es;
@@ -135,7 +141,7 @@ static void ext2_put_super (struct super_block * sb)
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * ext2_inode_cachep;
@@ -304,6 +310,7 @@ static const struct super_operations ext2_sops = {
 	.delete_inode	= ext2_delete_inode,
 	.put_super	= ext2_put_super,
 	.write_super	= ext2_write_super,
+	.sync_fs	= ext2_sync_fs,
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
 	.clear_inode	= ext2_clear_inode,
@@ -1127,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
  * set s_state to EXT2_VALID_FS after some corrections.
  */
 
-void ext2_write_super (struct super_block * sb)
+static int ext2_sync_fs(struct super_block *sb, int wait)
 {
-	struct ext2_super_block * es;
+	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+
 	lock_kernel();
-	if (!(sb->s_flags & MS_RDONLY)) {
-		es = EXT2_SB(sb)->s_es;
-
-		if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
-			ext2_debug ("setting valid to 0\n");
-			es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
-			es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
-			es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
-			es->s_mtime = cpu_to_le32(get_seconds());
-			ext2_sync_super(sb, es);
-		} else
-			ext2_commit_super (sb, es);
+	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
+		ext2_debug("setting valid to 0\n");
+		es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
+		es->s_free_blocks_count =
+			cpu_to_le32(ext2_count_free_blocks(sb));
+		es->s_free_inodes_count =
+			cpu_to_le32(ext2_count_free_inodes(sb));
+		es->s_mtime = cpu_to_le32(get_seconds());
+		ext2_sync_super(sb, es);
+	} else {
+		ext2_commit_super(sb, es);
 	}
 	sb->s_dirt = 0;
 	unlock_kernel();
+
+	return 0;
+}
+
+
+void ext2_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		ext2_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static int ext2_remount (struct super_block * sb, int * flags, char * data)
@@ -1157,6 +1175,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	unsigned long old_sb_flags;
 	int err;
 
+	lock_kernel();
+
 	/* Store the old options */
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
@@ -1192,12 +1212,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
 		sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
 	}
-	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		unlock_kernel();
 		return 0;
+	}
 	if (*flags & MS_RDONLY) {
 		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
-		    !(sbi->s_mount_state & EXT2_VALID_FS))
+		    !(sbi->s_mount_state & EXT2_VALID_FS)) {
+			unlock_kernel();
 			return 0;
+		}
 		/*
 		 * OK, we are remounting a valid rw partition rdonly, so set
 		 * the rdonly flag and then mark the partition as valid again.
@@ -1224,12 +1248,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 			sb->s_flags &= ~MS_RDONLY;
 	}
 	ext2_sync_super(sb, es);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sbi->s_mount_opt = old_opts.s_mount_opt;
 	sbi->s_resuid = old_opts.s_resuid;
 	sbi->s_resgid = old_opts.s_resgid;
 	sb->s_flags = old_sb_flags;
+	unlock_kernel();
 	return err;
 }
 

+ 1 - 2
fs/ext3/balloc.c

@@ -649,7 +649,7 @@ do_more:
 		count = overflow;
 		goto do_more;
 	}
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, err);
@@ -1708,7 +1708,6 @@ allocated:
 	if (!fatal)
 		fatal = err;
 
-	sb->s_dirt = 1;
 	if (fatal)
 		goto out;
 

+ 1 - 2
fs/ext3/ialloc.c

@@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
-	sb->s_dirt = 1;
+
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, fatal);
@@ -537,7 +537,6 @@ got:
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
 		percpu_counter_inc(&sbi->s_dirs_counter);
-	sb->s_dirt = 1;
 
 	inode->i_uid = current_fsuid();
 	if (test_opt (sb, GRPID))

+ 0 - 1
fs/ext3/inode.c

@@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle,
 				ext3_update_dynamic_rev(sb);
 				EXT3_SET_RO_COMPAT_FEATURE(sb,
 					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-				sb->s_dirt = 1;
 				handle->h_sync = 1;
 				err = ext3_journal_dirty_metadata(handle,
 						EXT3_SB(sb)->s_sbh);

+ 0 - 2
fs/ext3/resize.c

@@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 			   EXT3_INODES_PER_GROUP(sb));
 
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
-	sb->s_dirt = 1;
 
 exit_journal:
 	unlock_super(sb);
@@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	sb->s_dirt = 1;
 	unlock_super(sb);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);

+ 11 - 23
fs/ext3/super.c

@@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 static int ext3_remount (struct super_block * sb, int * flags, char * data);
 static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext3_unfreeze(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
 static int ext3_freeze(struct super_block *sb);
 
 /*
@@ -399,6 +398,8 @@ static void ext3_put_super (struct super_block * sb)
 	struct ext3_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_kernel();
+
 	ext3_xattr_put_super(sb);
 	err = journal_destroy(sbi->s_journal);
 	sbi->s_journal = NULL;
@@ -447,7 +448,8 @@ static void ext3_put_super (struct super_block * sb)
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
-	return;
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *ext3_inode_cachep;
@@ -761,7 +763,6 @@ static const struct super_operations ext3_sops = {
 	.dirty_inode	= ext3_dirty_inode,
 	.delete_inode	= ext3_delete_inode,
 	.put_super	= ext3_put_super,
-	.write_super	= ext3_write_super,
 	.sync_fs	= ext3_sync_fs,
 	.freeze_fs	= ext3_freeze,
 	.unfreeze_fs	= ext3_unfreeze,
@@ -1785,7 +1786,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #else
 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
 #endif
-		sb->s_dirt = 1;
 	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
@@ -2265,7 +2265,6 @@ static int ext3_load_journal(struct super_block *sb,
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
-		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
 		ext3_commit_super(sb, es, 1);
@@ -2308,7 +2307,6 @@ static int ext3_create_journal(struct super_block * sb,
 	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
 
 	es->s_journal_inum = cpu_to_le32(journal_inum);
-	sb->s_dirt = 1;
 
 	/* Make sure we flush the recovery flag to disk. */
 	ext3_commit_super(sb, es, 1);
@@ -2354,7 +2352,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
 	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		sb->s_dirt = 0;
 		ext3_commit_super(sb, es, 1);
 	}
 	unlock_super(sb);
@@ -2413,29 +2410,14 @@ int ext3_force_commit(struct super_block *sb)
 		return 0;
 
 	journal = EXT3_SB(sb)->s_journal;
-	sb->s_dirt = 0;
 	ret = ext3_journal_force_commit(journal);
 	return ret;
 }
 
-/*
- * Ext3 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point.  (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
- */
-static void ext3_write_super (struct super_block * sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ext3_sync_fs(struct super_block *sb, int wait)
 {
 	tid_t target;
 
-	sb->s_dirt = 0;
 	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
 		if (wait)
 			log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2451,7 +2433,6 @@ static int ext3_freeze(struct super_block *sb)
 {
 	int error = 0;
 	journal_t *journal;
-	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
 		journal = EXT3_SB(sb)->s_journal;
@@ -2509,7 +2490,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -2617,6 +2601,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 restore_opts:
 	sb->s_flags = old_sb_flags;
@@ -2633,6 +2619,8 @@ restore_opts:
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 

+ 0 - 1
fs/ext3/xattr.c

@@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle,
 
 	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
 		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
-		sb->s_dirt = 1;
 		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	}
 }

+ 14 - 2
fs/ext4/super.c

@@ -576,6 +576,11 @@ static void ext4_put_super(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	lock_super(sb);
+	lock_kernel();
+	if (sb->s_dirt)
+		ext4_commit_super(sb, 1);
+
 	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
@@ -642,8 +647,6 @@ static void ext4_put_super(struct super_block *sb)
 	unlock_super(sb);
 	kobject_put(&sbi->s_kobj);
 	wait_for_completion(&sbi->s_kobj_unregister);
-	lock_super(sb);
-	lock_kernel();
 	kfree(sbi->s_blockgroup_lock);
 	kfree(sbi);
 }
@@ -3333,7 +3336,9 @@ int ext4_force_commit(struct super_block *sb)
 
 static void ext4_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	ext4_commit_super(sb, 1);
+	unlock_super(sb);
 }
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3417,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	int i;
 #endif
 
+	lock_kernel();
+
 	/* Store the original options */
+	lock_super(sb);
 	old_sb_flags = sb->s_flags;
 	old_opts.s_mount_opt = sbi->s_mount_opt;
 	old_opts.s_resuid = sbi->s_resuid;
@@ -3551,6 +3559,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
 			kfree(old_opts.s_qf_names[i]);
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return 0;
 
 restore_opts:
@@ -3570,6 +3580,8 @@ restore_opts:
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
 #endif
+	unlock_super(sb);
+	unlock_kernel();
 	return err;
 }
 

+ 8 - 8
fs/fat/dir.c

@@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= fat_compat_dir_ioctl,
 #endif
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 };
 
 static int fat_get_short_entry(struct inode *dir, loff_t *pos,
@@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 			de++;
 			nr_slots--;
 		}
-		mark_buffer_dirty(bh);
+		mark_buffer_dirty_inode(bh, dir);
 		if (IS_DIRSYNC(dir))
 			err = sync_dirty_buffer(bh);
 		brelse(bh);
@@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
 		de--;
 		nr_slots--;
 	}
-	mark_buffer_dirty(bh);
+	mark_buffer_dirty_inode(bh, dir);
 	if (IS_DIRSYNC(dir))
 		err = sync_dirty_buffer(bh);
 	brelse(bh);
@@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
 		}
 		memset(bhs[n]->b_data, 0, sb->s_blocksize);
 		set_buffer_uptodate(bhs[n]);
-		mark_buffer_dirty(bhs[n]);
+		mark_buffer_dirty_inode(bhs[n], dir);
 
 		n++;
 		blknr++;
@@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
 	de[0].size = de[1].size = 0;
 	memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
 	set_buffer_uptodate(bhs[0]);
-	mark_buffer_dirty(bhs[0]);
+	mark_buffer_dirty_inode(bhs[0], dir);
 
 	err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
 	if (err)
@@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 			slots += copy;
 			size -= copy;
 			set_buffer_uptodate(bhs[n]);
-			mark_buffer_dirty(bhs[n]);
+			mark_buffer_dirty_inode(bhs[n], dir);
 			if (!size)
 				break;
 			n++;
@@ -1293,7 +1293,7 @@ found:
 		for (i = 0; i < long_bhs; i++) {
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			offset = 0;
 			slots += copy;
 			size -= copy;
@@ -1304,7 +1304,7 @@ found:
 			/* Fill the short name slot. */
 			int copy = min_t(int, sb->s_blocksize - offset, size);
 			memcpy(bhs[i]->b_data + offset, slots, copy);
-			mark_buffer_dirty(bhs[i]);
+			mark_buffer_dirty_inode(bhs[i], dir);
 			if (IS_DIRSYNC(dir))
 				err = sync_dirty_buffer(bhs[i]);
 		}

+ 6 - 0
fs/fat/fat.h

@@ -74,6 +74,7 @@ struct msdos_sb_info {
 
 	int fatent_shift;
 	struct fatent_operations *fatent_ops;
+	struct inode *fat_inode;
 
 	spinlock_t inode_hash_lock;
 	struct hlist_head inode_hashtable[FAT_HASH_SIZE];
@@ -251,6 +252,7 @@ struct fat_entry {
 	} u;
 	int nr_bhs;
 	struct buffer_head *bhs[2];
+	struct inode *fat_inode;
 };
 
 static inline void fatent_init(struct fat_entry *fatent)
@@ -259,6 +261,7 @@ static inline void fatent_init(struct fat_entry *fatent)
 	fatent->entry = 0;
 	fatent->u.ent32_p = NULL;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
@@ -275,6 +278,7 @@ static inline void fatent_brelse(struct fat_entry *fatent)
 		brelse(fatent->bhs[i]);
 	fatent->nr_bhs = 0;
 	fatent->bhs[0] = fatent->bhs[1] = NULL;
+	fatent->fat_inode = NULL;
 }
 
 extern void fat_ent_access_init(struct super_block *sb);
@@ -296,6 +300,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
+extern int fat_file_fsync(struct file *file, struct dentry *dentry,
+			  int datasync);
 
 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);

+ 8 - 5
fs/fat/fatent.c

@@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
 	struct buffer_head **bhs = fatent->bhs;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
+
 	bhs[0] = sb_bread(sb, blocknr);
 	if (!bhs[0])
 		goto err;
@@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
 	struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
 
 	WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+	fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
 	fatent->bhs[0] = sb_bread(sb, blocknr);
 	if (!fatent->bhs[0]) {
 		printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
@@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
 	}
 	spin_unlock(&fat12_entry_lock);
 
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 	if (fatent->nr_bhs == 2)
-		mark_buffer_dirty(fatent->bhs[1]);
+		mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode);
 }
 
 static void fat16_ent_put(struct fat_entry *fatent, int new)
@@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new)
 		new = EOF_FAT16;
 
 	*fatent->u.ent16_p = cpu_to_le16(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static void fat32_ent_put(struct fat_entry *fatent, int new)
@@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new)
 	WARN_ON(new & 0xf0000000);
 	new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
 	*fatent->u.ent32_p = cpu_to_le32(new);
-	mark_buffer_dirty(fatent->bhs[0]);
+	mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
 }
 
 static int fat12_ent_next(struct fat_entry *fatent)
@@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
 			}
 			memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
 			set_buffer_uptodate(c_bh);
-			mark_buffer_dirty(c_bh);
+			mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
 			if (sb->s_flags & MS_SYNCHRONOUS)
 				err = sync_dirty_buffer(c_bh);
 			brelse(c_bh);

+ 13 - 1
fs/fat/file.c

@@ -133,6 +133,18 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int res, err;
+
+	res = simple_fsync(filp, dentry, datasync);
+	err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+
+	return res ? res : err;
+}
+
+
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -142,7 +154,7 @@ const struct file_operations fat_file_operations = {
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.ioctl		= fat_generic_ioctl,
-	.fsync		= file_fsync,
+	.fsync		= fat_file_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 

+ 30 - 1
fs/fat/inode.c

@@ -441,16 +441,35 @@ static void fat_clear_inode(struct inode *inode)
 
 static void fat_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY))
 		fat_clusters_flush(sb);
+	unlock_super(sb);
+}
+
+static int fat_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
+	fat_clusters_flush(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
 }
 
 static void fat_put_super(struct super_block *sb)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		fat_write_super(sb);
+
+	iput(sbi->fat_inode);
+
 	if (sbi->nls_disk) {
 		unload_nls(sbi->nls_disk);
 		sbi->nls_disk = NULL;
@@ -467,6 +486,8 @@ static void fat_put_super(struct super_block *sb)
 
 	sb->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 static struct kmem_cache *fat_inode_cachep;
@@ -632,6 +653,7 @@ static const struct super_operations fat_sops = {
 	.delete_inode	= fat_delete_inode,
 	.put_super	= fat_put_super,
 	.write_super	= fat_write_super,
+	.sync_fs	= fat_sync_fs,
 	.statfs		= fat_statfs,
 	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
@@ -1174,7 +1196,7 @@ static int fat_read_root(struct inode *inode)
 int fat_fill_super(struct super_block *sb, void *data, int silent,
 		   const struct inode_operations *fs_dir_inode_ops, int isvfat)
 {
-	struct inode *root_inode = NULL;
+	struct inode *root_inode = NULL, *fat_inode = NULL;
 	struct buffer_head *bh;
 	struct fat_boot_sector *b;
 	struct msdos_sb_info *sbi;
@@ -1414,6 +1436,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
 	}
 
 	error = -ENOMEM;
+	fat_inode = new_inode(sb);
+	if (!fat_inode)
+		goto out_fail;
+	MSDOS_I(fat_inode)->i_pos = 0;
+	sbi->fat_inode = fat_inode;
 	root_inode = new_inode(sb);
 	if (!root_inode)
 		goto out_fail;
@@ -1439,6 +1466,8 @@ out_invalid:
 		       " on dev %s.\n", sb->s_id);
 
 out_fail:
+	if (fat_inode)
+		iput(fat_inode);
 	if (root_inode)
 		iput(root_inode);
 	if (sbi->nls_io)

+ 2 - 2
fs/fat/namei_msdos.c

@@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -586,7 +586,7 @@ error_dotdot:
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:

+ 2 - 2
fs/fat/namei_vfat.c

@@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
 		int start = MSDOS_I(new_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		if (IS_DIRSYNC(new_dir)) {
 			err = sync_dirty_buffer(dotdot_bh);
 			if (err)
@@ -1009,7 +1009,7 @@ error_dotdot:
 		int start = MSDOS_I(old_dir)->i_logstart;
 		dotdot_de->start = cpu_to_le16(start);
 		dotdot_de->starthi = cpu_to_le16(start >> 16);
-		mark_buffer_dirty(dotdot_bh);
+		mark_buffer_dirty_inode(dotdot_bh, old_inode);
 		corrupt |= sync_dirty_buffer(dotdot_bh);
 	}
 error_inode:

+ 39 - 1
fs/file_table.c

@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
 		file_take_write(file);
-		error = mnt_want_write(mnt);
+		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
 	return error;
@@ -399,6 +399,44 @@ too_bad:
 	return 0;
 }
 
+/**
+ *	mark_files_ro - mark all files read-only
+ *	@sb: superblock in question
+ *
+ *	All files are marked read-only.  We don't care about pending
+ *	delete files so this should be used in 'force' mode only.
+ */
+void mark_files_ro(struct super_block *sb)
+{
+	struct file *f;
+
+retry:
+	file_list_lock();
+	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+		struct vfsmount *mnt;
+		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+		       continue;
+		if (!file_count(f))
+			continue;
+		if (!(f->f_mode & FMODE_WRITE))
+			continue;
+		f->f_mode &= ~FMODE_WRITE;
+		if (file_check_writeable(f) != 0)
+			continue;
+		file_release_write(f);
+		mnt = mntget(f->f_path.mnt);
+		file_list_unlock();
+		/*
+		 * This can sleep, so we can't hold
+		 * the file_list_lock() spinlock.
+		 */
+		mnt_drop_write(mnt);
+		mntput(mnt);
+		goto retry;
+	}
+	file_list_unlock();
+}
+
 void __init files_init(unsigned long mempages)
 { 
 	int n; 

+ 4 - 0
fs/freevxfs/vxfs_super.c

@@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp)
 {
 	struct vxfs_sb_info	*infp = VXFS_SBI(sbp);
 
+	lock_kernel();
+
 	vxfs_put_fake_inode(infp->vsi_fship);
 	vxfs_put_fake_inode(infp->vsi_ilist);
 	vxfs_put_fake_inode(infp->vsi_stilist);
 
 	brelse(infp->vsi_bp);
 	kfree(infp);
+
+	unlock_kernel();
 }
 
 /**

+ 24 - 68
fs/fs-writeback.c

@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi)
 	clear_bit(BDI_pdflush, &bdi->state);
 }
 
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+		struct dentry *dentry;
+		const char *name = "?";
+
+		dentry = d_find_alias(inode);
+		if (dentry) {
+			spin_lock(&dentry->d_lock);
+			name = (const char *) dentry->d_name.name;
+		}
+		printk(KERN_DEBUG
+		       "%s(%d): dirtied inode %lu (%s) on %s\n",
+		       current->comm, task_pid_nr(current), inode->i_ino,
+		       name, inode->i_sb->s_id);
+		if (dentry) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+		}
+	}
+}
+
 /**
  *	__mark_inode_dirty -	internal function
  *	@inode: inode to mark
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	if ((inode->i_state & flags) == flags)
 		return;
 
-	if (unlikely(block_dump)) {
-		struct dentry *dentry = NULL;
-		const char *name = "?";
-
-		if (!list_empty(&inode->i_dentry)) {
-			dentry = list_entry(inode->i_dentry.next,
-					    struct dentry, d_alias);
-			if (dentry && dentry->d_name.name)
-				name = (const char *) dentry->d_name.name;
-		}
-
-		if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
-			printk(KERN_DEBUG
-			       "%s(%d): dirtied inode %lu (%s) on %s\n",
-			       current->comm, task_pid_nr(current), inode->i_ino,
-			       name, inode->i_sb->s_id);
-	}
+	if (unlikely(block_dump))
+		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode_lock);
 	if ((inode->i_state & flags) != flags) {
@@ -289,7 +296,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	int ret;
 
 	BUG_ON(inode->i_state & I_SYNC);
-	WARN_ON(inode->i_state & I_NEW);
 
 	/* Set I_SYNC, reset I_DIRTY */
 	dirty = inode->i_state & I_DIRTY;
@@ -314,7 +320,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	}
 
 	spin_lock(&inode_lock);
-	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
 		if (!(inode->i_state & I_DIRTY) &&
@@ -678,55 +683,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
 	sync_sb_inodes(sb, &wbc);
 }
 
-/**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last.  This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep.  So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode().  This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
-	struct super_block *sb;
-
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
-			sync_blockdev(sb->s_bdev);
-		}
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
-	}
-	spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
-	__sync_inodes(0);
-
-	if (wait)
-		__sync_inodes(1);
-}
-
 /**
  * write_inode_now	-	write an inode to disk
  * @inode: inode to write to disk

+ 0 - 2
fs/gfs2/log.c

@@ -764,7 +764,6 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 	}
 	gfs2_log_unlock(sdp);
 
-	sdp->sd_vfs->s_dirt = 0;
 	up_write(&sdp->sd_log_flush_lock);
 
 	kfree(ai);
@@ -823,7 +822,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	log_refund(sdp, tr);
 	buf_lo_incore_commit(sdp, tr);
 
-	sdp->sd_vfs->s_dirt = 1;
 	up_read(&sdp->sd_log_flush_lock);
 
 	gfs2_log_lock(sdp);

+ 3 - 12
fs/gfs2/super.c

@@ -719,6 +719,8 @@ static void gfs2_put_super(struct super_block *sb)
 	int error;
 	struct gfs2_jdesc *jd;
 
+	lock_kernel();
+
 	/*  Unfreeze the filesystem, if we need to  */
 
 	mutex_lock(&sdp->sd_freeze_lock);
@@ -785,17 +787,8 @@ restart:
 
 	/*  At this point, we're through participating in the lockspace  */
 	gfs2_sys_fs_del(sdp);
-}
-
-/**
- * gfs2_write_super
- * @sb: the superblock
- *
- */
 
-static void gfs2_write_super(struct super_block *sb)
-{
-	sb->s_dirt = 0;
+	unlock_kernel();
 }
 
 /**
@@ -807,7 +800,6 @@ static void gfs2_write_super(struct super_block *sb)
 
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
-	sb->s_dirt = 0;
 	if (wait && sb->s_fs_info)
 		gfs2_log_flush(sb->s_fs_info, NULL);
 	return 0;
@@ -1324,7 +1316,6 @@ const struct super_operations gfs2_super_ops = {
 	.write_inode		= gfs2_write_inode,
 	.delete_inode		= gfs2_delete_inode,
 	.put_super		= gfs2_put_super,
-	.write_super		= gfs2_write_super,
 	.sync_fs		= gfs2_sync_fs,
 	.freeze_fs 		= gfs2_freeze,
 	.unfreeze_fs		= gfs2_unfreeze,

+ 21 - 2
fs/hfs/super.c

@@ -49,11 +49,23 @@ MODULE_LICENSE("GPL");
  */
 static void hfs_write_super(struct super_block *sb)
 {
+	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		return;
+
 	/* sync everything to the buffers */
+	if (!(sb->s_flags & MS_RDONLY))
+		hfs_mdb_commit(sb);
+	unlock_super(sb);
+}
+
+static int hfs_sync_fs(struct super_block *sb, int wait)
+{
+	lock_super(sb);
 	hfs_mdb_commit(sb);
+	sb->s_dirt = 0;
+	unlock_super(sb);
+
+	return 0;
 }
 
 /*
@@ -65,9 +77,15 @@ static void hfs_write_super(struct super_block *sb)
  */
 static void hfs_put_super(struct super_block *sb)
 {
+	lock_kernel();
+
+	if (sb->s_dirt)
+		hfs_write_super(sb);
 	hfs_mdb_close(sb);
 	/* release the MDB's resources */
 	hfs_mdb_put(sb);
+
+	unlock_kernel();
 }
 
 /*
@@ -164,6 +182,7 @@ static const struct super_operations hfs_super_operations = {
 	.clear_inode	= hfs_clear_inode,
 	.put_super	= hfs_put_super,
 	.write_super	= hfs_write_super,
+	.sync_fs	= hfs_sync_fs,
 	.statfs		= hfs_statfs,
 	.remount_fs     = hfs_remount,
 	.show_options	= hfs_show_options,

+ 21 - 4
fs/hfsplus/super.c

@@ -152,15 +152,14 @@ static void hfsplus_clear_inode(struct inode *inode)
 	}
 }
 
-static void hfsplus_write_super(struct super_block *sb)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 	dprint(DBG_SUPER, "hfsplus_write_super\n");
+
+	lock_super(sb);
 	sb->s_dirt = 0;
-	if (sb->s_flags & MS_RDONLY)
-		/* warn? */
-		return;
 
 	vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
 	vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
@@ -192,6 +191,16 @@ static void hfsplus_write_super(struct super_block *sb)
 		}
 		HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
 	}
+	unlock_super(sb);
+	return 0;
+}
+
+static void hfsplus_write_super(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		hfsplus_sync_fs(sb, 1);
+	else
+		sb->s_dirt = 0;
 }
 
 static void hfsplus_put_super(struct super_block *sb)
@@ -199,6 +208,11 @@ static void hfsplus_put_super(struct super_block *sb)
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
 	if (!sb->s_fs_info)
 		return;
+
+	lock_kernel();
+
+	if (sb->s_dirt)
+		hfsplus_write_super(sb);
 	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
 		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
@@ -218,6 +232,8 @@ static void hfsplus_put_super(struct super_block *sb)
 		unload_nls(HFSPLUS_SB(sb).nls);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
+
+	unlock_kernel();
 }
 
 static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -279,6 +295,7 @@ static const struct super_operations hfsplus_sops = {
 	.clear_inode	= hfsplus_clear_inode,
 	.put_super	= hfsplus_put_super,
 	.write_super	= hfsplus_write_super,
+	.sync_fs	= hfsplus_sync_fs,
 	.statfs		= hfsplus_statfs,
 	.remount_fs	= hfsplus_remount,
 	.show_options	= hfsplus_show_options,

+ 12 - 0
fs/hpfs/super.c

@@ -13,6 +13,7 @@
 #include <linux/statfs.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
@@ -99,11 +100,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
 static void hpfs_put_super(struct super_block *s)
 {
 	struct hpfs_sb_info *sbi = hpfs_sb(s);
+
+	lock_kernel();
+
 	kfree(sbi->sb_cp_table);
 	kfree(sbi->sb_bmp_dir);
 	unmark_dirty(s);
 	s->s_fs_info = NULL;
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -393,6 +399,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 	
 	*flags |= MS_NOATIME;
 	
+	lock_kernel();
+	lock_super(s);
 	uid = sbi->sb_uid; gid = sbi->sb_gid;
 	umask = 0777 & ~sbi->sb_mode;
 	lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
@@ -425,9 +433,13 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 
 	replace_mount_options(s, new_opts);
 
+	unlock_super(s);
+	unlock_kernel();
 	return 0;
 
 out_err:
+	unlock_super(s);
+	unlock_kernel();
 	kfree(new_opts);
 	return -EINVAL;
 }

+ 1 - 1
fs/inode.c

@@ -1422,7 +1422,7 @@ void file_update_time(struct file *file)
 	if (IS_NOCMTIME(inode))
 		return;
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		return;
 

+ 17 - 0
fs/internal.h

@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 	return sb == blockdev_superblock;
 }
 
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
 #else
 static inline void bdev_cache_init(void)
 {
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 {
 	return 0;
 }
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -66,3 +73,13 @@ extern void __init mnt_init(void);
  * fs_struct.c
  */
 extern void chroot_fs_refs(struct path *, struct path *);
+
+/*
+ * file_table.c
+ */
+extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+extern int do_remount_sb(struct super_block *, int, void *, int);

+ 5 - 0
fs/isofs/inode.c

@@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst
 static void isofs_put_super(struct super_block *sb)
 {
 	struct isofs_sb_info *sbi = ISOFS_SB(sb);
+
 #ifdef CONFIG_JOLIET
+	lock_kernel();
+
 	if (sbi->s_nls_iocharset) {
 		unload_nls(sbi->s_nls_iocharset);
 		sbi->s_nls_iocharset = NULL;
 	}
+
+	unlock_kernel();
 #endif
 
 	kfree(sbi);

+ 3 - 15
fs/jffs2/fs.c

@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
 #include <linux/crc32.h>
+#include <linux/smp_lock.h>
 #include "nodelist.h"
 
 static int jffs2_flash_setup(struct jffs2_sb_info *c);
@@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 	   This also catches the case where it was stopped and this
 	   is just a remount to restart it.
 	   Flush the writebuffer, if neccecary, else we loose it */
+	lock_kernel();
 	if (!(sb->s_flags & MS_RDONLY)) {
 		jffs2_stop_garbage_collect_thread(c);
 		mutex_lock(&c->alloc_sem);
@@ -399,24 +401,10 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 
 	*flags |= MS_NOATIME;
 
+	unlock_kernel();
 	return 0;
 }
 
-void jffs2_write_super (struct super_block *sb)
-{
-	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
-	sb->s_dirt = 0;
-
-	if (sb->s_flags & MS_RDONLY)
-		return;
-
-	D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
-	jffs2_garbage_collect_trigger(c);
-	jffs2_erase_pending_blocks(c, 0);
-	jffs2_flush_wbuf_gc(c, 0);
-}
-
-
 /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
    fill in the raw_inode while you're at it. */
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)

+ 0 - 1
fs/jffs2/os-linux.h

@@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
-void jffs2_write_super (struct super_block *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
 void jffs2_gc_release_inode(struct jffs2_sb_info *c,

+ 26 - 0
fs/jffs2/super.c

@@ -53,10 +53,29 @@ static void jffs2_i_init_once(void *foo)
 	inode_init_once(&f->vfs_inode);
 }
 
+static void jffs2_write_super(struct super_block *sb)
+{
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+
+	lock_super(sb);
+	sb->s_dirt = 0;
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
+		jffs2_garbage_collect_trigger(c);
+		jffs2_erase_pending_blocks(c, 0);
+		jffs2_flush_wbuf_gc(c, 0);
+	}
+
+	unlock_super(sb);
+}
+
 static int jffs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
 
+	jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
@@ -174,6 +193,11 @@ static void jffs2_put_super (struct super_block *sb)
 
 	D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		jffs2_write_super(sb);
+
 	mutex_lock(&c->alloc_sem);
 	jffs2_flush_wbuf_pad(c);
 	mutex_unlock(&c->alloc_sem);
@@ -192,6 +216,8 @@ static void jffs2_put_super (struct super_block *sb)
 	if (c->mtd->sync)
 		c->mtd->sync(c->mtd);
 
+	unlock_kernel();
+
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
 

+ 23 - 4
fs/jfs/super.c

@@ -32,6 +32,7 @@
 #include <linux/crc32.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -183,6 +184,9 @@ static void jfs_put_super(struct super_block *sb)
 	int rc;
 
 	jfs_info("In jfs_put_super");
+
+	lock_kernel();
+
 	rc = jfs_umount(sb);
 	if (rc)
 		jfs_err("jfs_umount failed with return code %d", rc);
@@ -195,6 +199,8 @@ static void jfs_put_super(struct super_block *sb)
 	sbi->direct_inode = NULL;
 
 	kfree(sbi);
+
+	unlock_kernel();
 }
 
 enum {
@@ -370,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 	s64 newLVSize = 0;
 	int rc = 0;
 	int flag = JFS_SBI(sb)->flag;
+	int ret;
 
 	if (!parse_options(data, sb, &newLVSize, &flag)) {
 		return -EINVAL;
 	}
+	lock_kernel();
 	if (newLVSize) {
 		if (sb->s_flags & MS_RDONLY) {
 			printk(KERN_ERR
 		  "JFS: resize requires volume to be mounted read-write\n");
+			unlock_kernel();
 			return -EROFS;
 		}
 		rc = jfs_extendfs(sb, newLVSize, 0);
-		if (rc)
+		if (rc) {
+			unlock_kernel();
 			return rc;
+		}
 	}
 
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
@@ -393,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 		truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
 
 		JFS_SBI(sb)->flag = flag;
-		return jfs_mount_rw(sb, 1);
+		ret = jfs_mount_rw(sb, 1);
+		unlock_kernel();
+		return ret;
 	}
 	if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
 		rc = jfs_umount_rw(sb);
 		JFS_SBI(sb)->flag = flag;
+		unlock_kernel();
 		return rc;
 	}
 	if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
 		if (!(sb->s_flags & MS_RDONLY)) {
 			rc = jfs_umount_rw(sb);
-			if (rc)
+			if (rc) {
+				unlock_kernel();
 				return rc;
+			}
 			JFS_SBI(sb)->flag = flag;
-			return jfs_mount_rw(sb, 1);
+			ret = jfs_mount_rw(sb, 1);
+			unlock_kernel();
+			return ret;
 		}
 	JFS_SBI(sb)->flag = flag;
 
+	unlock_kernel();
 	return 0;
 }
 

+ 25 - 0
fs/libfs.c

@@ -9,6 +9,8 @@
 #include <linux/vfs.h>
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
 
 #include <asm/uaccess.h>
 
@@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
+int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0, /* metadata-only; caller takes care of data */
+	};
+	struct inode *inode = dentry->d_inode;
+	int err;
+	int ret;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY))
+		return ret;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		return ret;
+
+	err = sync_inode(inode, &wbc);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+EXPORT_SYMBOL(simple_fsync);
+
 EXPORT_SYMBOL(dcache_dir_close);
 EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);

+ 1 - 1
fs/minix/dir.c

@@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t);
 const struct file_operations minix_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= minix_readdir,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 };
 
 static inline void dir_put_page(struct page *page)

+ 1 - 19
fs/minix/file.c

@@ -6,15 +6,12 @@
  *  minix regular file handling primitives
  */
 
-#include <linux/buffer_head.h>		/* for fsync_inode_buffers() */
 #include "minix.h"
 
 /*
  * We have mostly NULLs here: the current defaults are OK for
  * the minix filesystem.
  */
-int minix_sync_file(struct file *, struct dentry *, int);
-
 const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= minix_sync_file,
+	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
 
@@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = {
 	.truncate	= minix_truncate,
 	.getattr	= minix_getattr,
 };
-
-int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	
-	err |= minix_sync_inode(inode);
-	return err ? -EIO : 0;
-}

+ 13 - 24
fs/minix/inode.c

@@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb)
 	int i;
 	struct minix_sb_info *sbi = minix_sb(sb);
 
+	lock_kernel();
+
 	if (!(sb->s_flags & MS_RDONLY)) {
 		if (sbi->s_version != MINIX_V3)	 /* s_state is now out from V3 sb */
 			sbi->s_ms->s_state = sbi->s_mount_state;
@@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 
-	return;
+	unlock_kernel();
 }
 
 static struct kmem_cache * minix_inode_cachep;
@@ -554,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	return bh;
 }
 
-static struct buffer_head *minix_update_inode(struct inode *inode)
-{
-	if (INODE_VERSION(inode) == MINIX_V1)
-		return V1_minix_update_inode(inode);
-	else
-		return V2_minix_update_inode(inode);
-}
-
-static int minix_write_inode(struct inode * inode, int wait)
-{
-	brelse(minix_update_inode(inode));
-	return 0;
-}
-
-int minix_sync_inode(struct inode * inode)
+static int minix_write_inode(struct inode *inode, int wait)
 {
 	int err = 0;
 	struct buffer_head *bh;
 
-	bh = minix_update_inode(inode);
-	if (bh && buffer_dirty(bh))
-	{
+	if (INODE_VERSION(inode) == MINIX_V1)
+		bh = V1_minix_update_inode(inode);
+	else
+		bh = V2_minix_update_inode(inode);
+	if (!bh)
+		return -EIO;
+	if (wait && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
-		if (buffer_req(bh) && !buffer_uptodate(bh))
-		{
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
 				inode->i_sb->s_id, inode->i_ino);
-			err = -1;
+			err = -EIO;
 		}
 	}
-	else if (!bh)
-		err = -1;
 	brelse (bh);
 	return err;
 }

+ 0 - 2
fs/minix/minix.h

@@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping,
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
 extern void minix_truncate(struct inode *);
-extern int minix_sync_inode(struct inode *);
 extern void minix_set_inode(struct inode *, dev_t);
 extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
 extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
@@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*);
 extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
 extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
 extern ino_t minix_inode_by_name(struct dentry*);
-extern int minix_sync_file(struct file *, struct dentry *, int);
 
 extern const struct inode_operations minix_file_inode_operations;
 extern const struct inode_operations minix_dir_inode_operations;

+ 77 - 52
fs/namei.c

@@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
 	return result;
 }
 
+static __always_inline void set_root(struct nameidata *nd)
+{
+	if (!nd->root.mnt) {
+		struct fs_struct *fs = current->fs;
+		read_lock(&fs->lock);
+		nd->root = fs->root;
+		path_get(&nd->root);
+		read_unlock(&fs->lock);
+	}
+}
+
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
 	int res = 0;
@@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
 		goto fail;
 
 	if (*link == '/') {
-		struct fs_struct *fs = current->fs;
-
+		set_root(nd);
 		path_put(&nd->path);
-
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	}
 
 	res = link_path_walk(link, nd);
@@ -668,23 +675,23 @@ loop:
 	return err;
 }
 
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
 	spin_lock(&vfsmount_lock);
-	parent=(*mnt)->mnt_parent;
-	if (parent == *mnt) {
+	parent = path->mnt->mnt_parent;
+	if (parent == path->mnt) {
 		spin_unlock(&vfsmount_lock);
 		return 0;
 	}
 	mntget(parent);
-	mountpoint=dget((*mnt)->mnt_mountpoint);
+	mountpoint = dget(path->mnt->mnt_mountpoint);
 	spin_unlock(&vfsmount_lock);
-	dput(*dentry);
-	*dentry = mountpoint;
-	mntput(*mnt);
-	*mnt = parent;
+	dput(path->dentry);
+	path->dentry = mountpoint;
+	mntput(path->mnt);
+	path->mnt = parent;
 	return 1;
 }
 
@@ -695,7 +702,7 @@ static int __follow_mount(struct path *path)
 {
 	int res = 0;
 	while (d_mountpoint(path->dentry)) {
-		struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
 		dput(path->dentry);
@@ -708,32 +715,32 @@ static int __follow_mount(struct path *path)
 	return res;
 }
 
-static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static void follow_mount(struct path *path)
 {
-	while (d_mountpoint(*dentry)) {
-		struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+	while (d_mountpoint(path->dentry)) {
+		struct vfsmount *mounted = lookup_mnt(path);
 		if (!mounted)
 			break;
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 	}
 }
 
 /* no need for dcache_lock, as serialization is taken care in
  * namespace.c
  */
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct path *path)
 {
 	struct vfsmount *mounted;
 
-	mounted = lookup_mnt(*mnt, *dentry);
+	mounted = lookup_mnt(path);
 	if (mounted) {
-		dput(*dentry);
-		mntput(*mnt);
-		*mnt = mounted;
-		*dentry = dget(mounted->mnt_root);
+		dput(path->dentry);
+		mntput(path->mnt);
+		path->mnt = mounted;
+		path->dentry = dget(mounted->mnt_root);
 		return 1;
 	}
 	return 0;
@@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
 
 static __always_inline void follow_dotdot(struct nameidata *nd)
 {
-	struct fs_struct *fs = current->fs;
+	set_root(nd);
 
 	while(1) {
 		struct vfsmount *parent;
 		struct dentry *old = nd->path.dentry;
 
-                read_lock(&fs->lock);
-		if (nd->path.dentry == fs->root.dentry &&
-		    nd->path.mnt == fs->root.mnt) {
-                        read_unlock(&fs->lock);
+		if (nd->path.dentry == nd->root.dentry &&
+		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-                read_unlock(&fs->lock);
 		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			nd->path.dentry = dget(nd->path.dentry->d_parent);
@@ -775,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
 		mntput(nd->path.mnt);
 		nd->path.mnt = parent;
 	}
-	follow_mount(&nd->path.mnt, &nd->path.dentry);
+	follow_mount(&nd->path);
 }
 
 /*
@@ -1017,25 +1021,23 @@ static int path_walk(const char *name, struct nameidata *nd)
 	return link_path_walk(name, nd);
 }
 
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
-				unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
 {
 	int retval = 0;
 	int fput_needed;
 	struct file *file;
-	struct fs_struct *fs = current->fs;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags;
 	nd->depth = 0;
+	nd->root.mnt = NULL;
 
 	if (*name=='/') {
-		read_lock(&fs->lock);
-		nd->path = fs->root;
-		path_get(&fs->root);
-		read_unlock(&fs->lock);
+		set_root(nd);
+		nd->path = nd->root;
+		path_get(&nd->root);
 	} else if (dfd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
 		read_lock(&fs->lock);
 		nd->path = fs->pwd;
 		path_get(&fs->pwd);
@@ -1063,17 +1065,29 @@ static int do_path_lookup(int dfd, const char *name,
 
 		fput_light(file, fput_needed);
 	}
+	return 0;
 
-	retval = path_walk(name, nd);
+fput_fail:
+	fput_light(file, fput_needed);
+out_fail:
+	return retval;
+}
+
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int do_path_lookup(int dfd, const char *name,
+				unsigned int flags, struct nameidata *nd)
+{
+	int retval = path_init(dfd, name, flags, nd);
+	if (!retval)
+		retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
-out_fail:
+	if (nd->root.mnt) {
+		path_put(&nd->root);
+		nd->root.mnt = NULL;
+	}
 	return retval;
-
-fput_fail:
-	fput_light(file, fput_needed);
-	goto out_fail;
 }
 
 int path_lookup(const char *name, unsigned int flags,
@@ -1113,14 +1127,18 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 	nd->path.dentry = dentry;
 	nd->path.mnt = mnt;
 	path_get(&nd->path);
+	nd->root = nd->path;
+	path_get(&nd->root);
 
 	retval = path_walk(name, nd);
 	if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
 				nd->path.dentry->d_inode))
 		audit_inode(name, nd->path.dentry);
 
-	return retval;
+	path_put(&nd->root);
+	nd->root.mnt = NULL;
 
+	return retval;
 }
 
 /**
@@ -1676,9 +1694,14 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
 	if (error)
 		return ERR_PTR(error);
+	error = path_walk(pathname, &nd);
+	if (error)
+		return ERR_PTR(error);
+	if (unlikely(!audit_dummy_context()))
+		audit_inode(pathname, nd.path.dentry);
 
 	/*
 	 * We have the parent and last component. First of all, check
@@ -1806,6 +1829,8 @@ exit:
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
 exit_parent:
+	if (nd.root.mnt)
+		path_put(&nd.root);
 	path_put(&nd.path);
 	return ERR_PTR(error);
 

+ 139 - 188
fs/namespace.c

@@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
-		atomic_set(&mnt->__mnt_writers, 0);
+#ifdef CONFIG_SMP
+		mnt->mnt_writers = alloc_percpu(int);
+		if (!mnt->mnt_writers)
+			goto out_free_devname;
+#else
+		mnt->mnt_writers = 0;
+#endif
 	}
 	return mnt;
 
+#ifdef CONFIG_SMP
+out_free_devname:
+	kfree(mnt->mnt_devname);
+#endif
 out_free_id:
 	mnt_free_id(mnt);
 out_free_cache:
@@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 
-struct mnt_writer {
-	/*
-	 * If holding multiple instances of this lock, they
-	 * must be ordered by cpu number.
-	 */
-	spinlock_t lock;
-	struct lock_class_key lock_class; /* compiles out with !lockdep */
-	unsigned long count;
-	struct vfsmount *mnt;
-} ____cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
+static inline void inc_mnt_writers(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+#else
+	mnt->mnt_writers++;
+#endif
+}
 
-static int __init init_mnt_writers(void)
+static inline void dec_mnt_writers(struct vfsmount *mnt)
 {
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
-		spin_lock_init(&writer->lock);
-		lockdep_set_class(&writer->lock, &writer->lock_class);
-		writer->count = 0;
-	}
-	return 0;
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+#else
+	mnt->mnt_writers--;
+#endif
 }
-fs_initcall(init_mnt_writers);
 
-static void unlock_mnt_writers(void)
+static unsigned int count_mnt_writers(struct vfsmount *mnt)
 {
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
 	int cpu;
-	struct mnt_writer *cpu_writer;
 
 	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_unlock(&cpu_writer->lock);
+		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
 	}
-}
 
-static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
-{
-	if (!cpu_writer->mnt)
-		return;
-	/*
-	 * This is in case anyone ever leaves an invalid,
-	 * old ->mnt and a count of 0.
-	 */
-	if (!cpu_writer->count)
-		return;
-	atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
-	cpu_writer->count = 0;
-}
- /*
- * must hold cpu_writer->lock
- */
-static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
-					  struct vfsmount *mnt)
-{
-	if (cpu_writer->mnt == mnt)
-		return;
-	__clear_mnt_count(cpu_writer);
-	cpu_writer->mnt = mnt;
+	return count;
+#else
+	return mnt->mnt_writers;
+#endif
 }
 
 /*
@@ -253,74 +236,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
 int mnt_want_write(struct vfsmount *mnt)
 {
 	int ret = 0;
-	struct mnt_writer *cpu_writer;
 
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	/*
+	 * The store to inc_mnt_writers must be visible before we pass
+	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
+	 * incremented count after it has set MNT_WRITE_HOLD.
+	 */
+	smp_mb();
+	while (mnt->mnt_flags & MNT_WRITE_HOLD)
+		cpu_relax();
+	/*
+	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+	 * be set to match its requirements. So we must not load that until
+	 * MNT_WRITE_HOLD is cleared.
+	 */
+	smp_rmb();
 	if (__mnt_is_readonly(mnt)) {
+		dec_mnt_writers(mnt);
 		ret = -EROFS;
 		goto out;
 	}
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	cpu_writer->count++;
 out:
-	spin_unlock(&cpu_writer->lock);
-	put_cpu_var(mnt_writers);
+	preempt_enable();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
-static void lock_mnt_writers(void)
-{
-	int cpu;
-	struct mnt_writer *cpu_writer;
-
-	for_each_possible_cpu(cpu) {
-		cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		__clear_mnt_count(cpu_writer);
-		cpu_writer->mnt = NULL;
-	}
+/**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
+ */
+int mnt_clone_write(struct vfsmount *mnt)
+{
+	/* superblock may be r/o */
+	if (__mnt_is_readonly(mnt))
+		return -EROFS;
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	preempt_enable();
+	return 0;
 }
+EXPORT_SYMBOL_GPL(mnt_clone_write);
 
-/*
- * These per-cpu write counts are not guaranteed to have
- * matched increments and decrements on any given cpu.
- * A file open()ed for write on one cpu and close()d on
- * another cpu will imbalance this count.  Make sure it
- * does not get too far out of whack.
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
  */
-static void handle_write_count_underflow(struct vfsmount *mnt)
+int mnt_want_write_file(struct file *file)
 {
-	if (atomic_read(&mnt->__mnt_writers) >=
-	    MNT_WRITER_UNDERFLOW_LIMIT)
-		return;
-	/*
-	 * It isn't necessary to hold all of the locks
-	 * at the same time, but doing it this way makes
-	 * us share a lot more code.
-	 */
-	lock_mnt_writers();
-	/*
-	 * vfsmount_lock is for mnt_flags.
-	 */
-	spin_lock(&vfsmount_lock);
-	/*
-	 * If coalescing the per-cpu writer counts did not
-	 * get us back to a positive writer count, we have
-	 * a bug.
-	 */
-	if ((atomic_read(&mnt->__mnt_writers) < 0) &&
-	    !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
-		WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
-				"count: %d\n",
-			mnt, atomic_read(&mnt->__mnt_writers));
-		/* use the flag to keep the dmesg spam down */
-		mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
-	}
-	spin_unlock(&vfsmount_lock);
-	unlock_mnt_writers();
+	if (!(file->f_mode & FMODE_WRITE))
+		return mnt_want_write(file->f_path.mnt);
+	else
+		return mnt_clone_write(file->f_path.mnt);
 }
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
 
 /**
  * mnt_drop_write - give up write access to a mount
@@ -332,37 +314,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
  */
 void mnt_drop_write(struct vfsmount *mnt)
 {
-	int must_check_underflow = 0;
-	struct mnt_writer *cpu_writer;
-
-	cpu_writer = &get_cpu_var(mnt_writers);
-	spin_lock(&cpu_writer->lock);
-
-	use_cpu_writer_for_mount(cpu_writer, mnt);
-	if (cpu_writer->count > 0) {
-		cpu_writer->count--;
-	} else {
-		must_check_underflow = 1;
-		atomic_dec(&mnt->__mnt_writers);
-	}
-
-	spin_unlock(&cpu_writer->lock);
-	/*
-	 * Logically, we could call this each time,
-	 * but the __mnt_writers cacheline tends to
-	 * be cold, and makes this expensive.
-	 */
-	if (must_check_underflow)
-		handle_write_count_underflow(mnt);
-	/*
-	 * This could be done right after the spinlock
-	 * is taken because the spinlock keeps us on
-	 * the cpu, and disables preemption.  However,
-	 * putting it here bounds the amount that
-	 * __mnt_writers can underflow.  Without it,
-	 * we could theoretically wrap __mnt_writers.
-	 */
-	put_cpu_var(mnt_writers);
+	preempt_disable();
+	dec_mnt_writers(mnt);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
 
@@ -370,24 +324,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
 {
 	int ret = 0;
 
-	lock_mnt_writers();
+	spin_lock(&vfsmount_lock);
+	mnt->mnt_flags |= MNT_WRITE_HOLD;
 	/*
-	 * With all the locks held, this value is stable
+	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+	 * should be visible before we do.
 	 */
-	if (atomic_read(&mnt->__mnt_writers) > 0) {
-		ret = -EBUSY;
-		goto out;
-	}
+	smp_mb();
+
 	/*
-	 * nobody can do a successful mnt_want_write() with all
-	 * of the counts in MNT_DENIED_WRITE and the locks held.
+	 * With writers on hold, if this value is zero, then there are
+	 * definitely no active writers (although held writers may subsequently
+	 * increment the count, they'll have to wait, and decrement it after
+	 * seeing MNT_READONLY).
+	 *
+	 * It is OK to have counter incremented on one CPU and decremented on
+	 * another: the sum will add up correctly. The danger would be when we
+	 * sum up each counter, if we read a counter before it is incremented,
+	 * but then read another CPU's count which it has been subsequently
+	 * decremented from -- we would see more decrements than we should.
+	 * MNT_WRITE_HOLD protects against this scenario, because
+	 * mnt_want_write first increments count, then smp_mb, then spins on
+	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+	 * we're counting up here.
 	 */
-	spin_lock(&vfsmount_lock);
-	if (!ret)
+	if (count_mnt_writers(mnt) > 0)
+		ret = -EBUSY;
+	else
 		mnt->mnt_flags |= MNT_READONLY;
+	/*
+	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+	 * that become unheld will see MNT_READONLY.
+	 */
+	smp_wmb();
+	mnt->mnt_flags &= ~MNT_WRITE_HOLD;
 	spin_unlock(&vfsmount_lock);
-out:
-	unlock_mnt_writers();
 	return ret;
 }
 
@@ -410,6 +381,9 @@ void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
+#ifdef CONFIG_SMP
+	free_percpu(mnt->mnt_writers);
+#endif
 	kmem_cache_free(mnt_cache, mnt);
 }
 
@@ -442,11 +416,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
  * lookup_mnt increments the ref count before returning
  * the vfsmount struct.
  */
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct vfsmount *child_mnt;
 	spin_lock(&vfsmount_lock);
-	if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
 		mntget(child_mnt);
 	spin_unlock(&vfsmount_lock);
 	return child_mnt;
@@ -604,38 +578,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 
 static inline void __mntput(struct vfsmount *mnt)
 {
-	int cpu;
 	struct super_block *sb = mnt->mnt_sb;
-	/*
-	 * We don't have to hold all of the locks at the
-	 * same time here because we know that we're the
-	 * last reference to mnt and that no new writers
-	 * can come in.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
-		spin_lock(&cpu_writer->lock);
-		if (cpu_writer->mnt != mnt) {
-			spin_unlock(&cpu_writer->lock);
-			continue;
-		}
-		atomic_add(cpu_writer->count, &mnt->__mnt_writers);
-		cpu_writer->count = 0;
-		/*
-		 * Might as well do this so that no one
-		 * ever sees the pointer and expects
-		 * it to be valid.
-		 */
-		cpu_writer->mnt = NULL;
-		spin_unlock(&cpu_writer->lock);
-	}
 	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
 	 * happens, the filesystem was probably unable
 	 * to make r/w->r/o transitions.
 	 */
-	WARN_ON(atomic_read(&mnt->__mnt_writers));
+	/*
+	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
+	 * provides barriers, so count_mnt_writers() below is safe.  AV
+	 */
+	WARN_ON(count_mnt_writers(mnt));
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
@@ -1106,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
 		 * we just try to remount it readonly.
 		 */
 		down_write(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			lock_kernel();
+		if (!(sb->s_flags & MS_RDONLY))
 			retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
-			unlock_kernel();
-		}
 		up_write(&sb->s_umount);
 		return retval;
 	}
@@ -1253,11 +1204,11 @@ Enomem:
 	return NULL;
 }
 
-struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *collect_mounts(struct path *path)
 {
 	struct vfsmount *tree;
 	down_write(&namespace_sem);
-	tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+	tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
 	up_write(&namespace_sem);
 	return tree;
 }
@@ -1430,7 +1381,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 		goto out_unlock;
 
 	err = -ENOENT;
-	if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
+	if (!d_unlinked(path->dentry))
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1601,7 +1552,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	down_write(&namespace_sem);
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
 	if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1612,7 +1563,7 @@ static int do_move_mount(struct path *path, char *old_name)
 	if (IS_DEADDIR(path->dentry->d_inode))
 		goto out1;
 
-	if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
+	if (d_unlinked(path->dentry))
 		goto out1;
 
 	err = -EINVAL;
@@ -1676,7 +1627,9 @@ static int do_new_mount(struct path *path, char *type, int flags,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	lock_kernel();
 	mnt = do_kern_mount(type, flags, name, data);
+	unlock_kernel();
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -1695,10 +1648,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while (d_mountpoint(path->dentry) &&
-	       follow_down(&path->mnt, &path->dentry))
+	       follow_down(path))
 		;
 	err = -EINVAL;
-	if (!check_mnt(path->mnt))
+	if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
 		goto unlock;
 
 	/* Refuse the same filesystem on the same mount point */
@@ -2092,10 +2045,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (retval < 0)
 		goto out3;
 
-	lock_kernel();
 	retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
 			  flags, (void *)data_page);
-	unlock_kernel();
 	free_page(data_page);
 
 out3:
@@ -2175,9 +2126,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	error = -ENOENT;
 	if (IS_DEADDIR(new.dentry->d_inode))
 		goto out2;
-	if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
+	if (d_unlinked(new.dentry))
 		goto out2;
-	if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
+	if (d_unlinked(old.dentry))
 		goto out2;
 	error = -EBUSY;
 	if (new.mnt == root.mnt ||

+ 4 - 0
fs/ncpfs/inode.c

@@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb)
 {
 	struct ncp_server *server = NCP_SBP(sb);
 
+	lock_kernel();
+
 	ncp_lock_server(server);
 	ncp_disconnect(server);
 	ncp_unlock_server(server);
@@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb)
 	vfree(server->packet);
 	sb->s_fs_info = NULL;
 	kfree(server);
+
+	unlock_kernel();
 }
 
 static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)

+ 1 - 1
fs/nfs/namespace.c

@@ -154,7 +154,7 @@ out_err:
 	goto out;
 out_follow:
 	while (d_mountpoint(nd->path.dentry) &&
-	       follow_down(&nd->path.mnt, &nd->path.dentry))
+	       follow_down(&nd->path))
 		;
 	err = 0;
 	goto out;

+ 2 - 0
fs/nfs/super.c

@@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	if (data == NULL)
 		return -ENOMEM;
 
+	lock_kernel();
 	/* fill out struct with values from existing mount */
 	data->flags = nfss->flags;
 	data->rsize = nfss->rsize;
@@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
 	error = nfs_compare_remount_data(nfss, data);
 out:
 	kfree(data);
+	unlock_kernel();
 	return error;
 }
 

+ 32 - 46
fs/nfsd/export.c

@@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid)
 	return exp_find_key(clp, FSID_NUM, fsidv, NULL);
 }
 
-static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
-				   struct dentry *dentry,
-				   struct cache_req *reqp)
+static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+				     struct cache_req *reqp)
 {
 	struct svc_export *exp, key;
 	int err;
@@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 		return ERR_PTR(-ENOENT);
 
 	key.ex_client = clp;
-	key.ex_path.mnt = mnt;
-	key.ex_path.dentry = dentry;
+	key.ex_path = *path;
 
 	exp = svc_export_lookup(&key);
 	if (exp == NULL)
@@ -873,24 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
 /*
  * Find the export entry for a given dentry.
  */
-static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
-				     struct dentry *dentry,
-				     struct cache_req *reqp)
+static struct svc_export *exp_parent(svc_client *clp, struct path *path)
 {
-	svc_export *exp;
-
-	dget(dentry);
-	exp = exp_get_by_name(clp, mnt, dentry, reqp);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
-
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = exp_get_by_name(clp, mnt, dentry, reqp);
+	struct dentry *saved = dget(path->dentry);
+	svc_export *exp = exp_get_by_name(clp, path, NULL);
+
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = exp_get_by_name(clp, path, NULL);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 
@@ -1018,7 +1011,7 @@ exp_export(struct nfsctl_export *nxp)
 		goto out_put_clp;
 	err = -EINVAL;
 
-	exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(clp, &path, NULL);
 
 	memset(&new, 0, sizeof(new));
 
@@ -1135,7 +1128,7 @@ exp_unexport(struct nfsctl_export *nxp)
 		goto out_domain;
 
 	err = -EINVAL;
-	exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
+	exp = exp_get_by_name(dom, &path, NULL);
 	path_put(&path);
 	if (IS_ERR(exp))
 		goto out_domain;
@@ -1177,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
 	dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
 		 name, path.dentry, clp->name,
 		 inode->i_sb->s_id, inode->i_ino);
-	exp = exp_parent(clp, path.mnt, path.dentry, NULL);
+	exp = exp_parent(clp, &path);
 	if (IS_ERR(exp)) {
 		err = PTR_ERR(exp);
 		goto out;
@@ -1207,7 +1200,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
 	if (IS_ERR(ek))
 		return ERR_CAST(ek);
 
-	exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
+	exp = exp_get_by_name(clp, &ek->ek_path, reqp);
 	cache_put(&ek->h, &svc_expkey_cache);
 
 	if (IS_ERR(exp))
@@ -1247,8 +1240,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
  * use exp_get_by_name() or exp_find().
  */
 struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
 
@@ -1256,8 +1248,7 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
 		goto gss;
 
 	/* First try the auth_unix client: */
-	exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
-						&rqstp->rq_chandle);
+	exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
 	if (PTR_ERR(exp) == -ENOENT)
 		goto gss;
 	if (IS_ERR(exp))
@@ -1269,8 +1260,7 @@ gss:
 	/* Otherwise, try falling back on gss client */
 	if (rqstp->rq_gssclient == NULL)
 		return exp;
-	gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
-						&rqstp->rq_chandle);
+	gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
 	if (PTR_ERR(gssexp) == -ENOENT)
 		return exp;
 	if (!IS_ERR(exp))
@@ -1309,23 +1299,19 @@ gss:
 }
 
 struct svc_export *
-rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
-		struct dentry *dentry)
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
 {
-	struct svc_export *exp;
-
-	dget(dentry);
-	exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
-
-	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
-		struct dentry *parent;
-
-		parent = dget_parent(dentry);
-		dput(dentry);
-		dentry = parent;
-		exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+	struct dentry *saved = dget(path->dentry);
+	struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
+
+	while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+		struct dentry *parent = dget_parent(path->dentry);
+		dput(path->dentry);
+		path->dentry = parent;
+		exp = rqst_exp_get_by_name(rqstp, path);
 	}
-	dput(dentry);
+	dput(path->dentry);
+	path->dentry = saved;
 	return exp;
 }
 

+ 27 - 27
fs/nfsd/vfs.c

@@ -101,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 {
 	struct svc_export *exp = *expp, *exp2 = NULL;
 	struct dentry *dentry = *dpp;
-	struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-	struct dentry *mounts = dget(dentry);
+	struct path path = {.mnt = mntget(exp->ex_path.mnt),
+			    .dentry = dget(dentry)};
 	int err = 0;
 
-	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+	while (d_mountpoint(path.dentry) && follow_down(&path))
+		;
 
-	exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+	exp2 = rqst_exp_get_by_name(rqstp, &path);
 	if (IS_ERR(exp2)) {
 		if (PTR_ERR(exp2) != -ENOENT)
 			err = PTR_ERR(exp2);
-		dput(mounts);
-		mntput(mnt);
+		path_put(&path);
 		goto out;
 	}
 	if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
 		/* successfully crossed mount point */
 		/*
-		 * This is subtle: dentry is *not* under mnt at this point.
-		 * The only reason we are safe is that original mnt is pinned
-		 * down by exp, so we should dput before putting exp.
+		 * This is subtle: path.dentry is *not* on path.mnt
+		 * at this point.  The only reason we are safe is that
+		 * original mnt is pinned down by exp, so we should
+		 * put path *before* putting exp
 		 */
-		dput(dentry);
-		*dpp = mounts;
-		exp_put(exp);
+		*dpp = path.dentry;
+		path.dentry = dentry;
 		*expp = exp2;
-	} else {
-		exp_put(exp2);
-		dput(mounts);
+		exp2 = exp;
 	}
-	mntput(mnt);
+	path_put(&path);
+	exp_put(exp2);
 out:
 	return err;
 }
@@ -169,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			/* checking mountpoint crossing is very different when stepping up */
 			struct svc_export *exp2 = NULL;
 			struct dentry *dp;
-			struct vfsmount *mnt = mntget(exp->ex_path.mnt);
-			dentry = dget(dparent);
-			while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+			struct path path = {.mnt = mntget(exp->ex_path.mnt),
+					    .dentry = dget(dparent)};
+
+			while (path.dentry == path.mnt->mnt_root &&
+			       follow_up(&path))
 				;
-			dp = dget_parent(dentry);
-			dput(dentry);
-			dentry = dp;
+			dp = dget_parent(path.dentry);
+			dput(path.dentry);
+			path.dentry = dp;
 
-			exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+			exp2 = rqst_exp_parent(rqstp, &path);
 			if (PTR_ERR(exp2) == -ENOENT) {
-				dput(dentry);
 				dentry = dget(dparent);
 			} else if (IS_ERR(exp2)) {
 				host_err = PTR_ERR(exp2);
-				dput(dentry);
-				mntput(mnt);
+				path_put(&path);
 				goto out_nfserr;
 			} else {
+				dentry = dget(path.dentry);
 				exp_put(exp);
 				exp = exp2;
 			}
-			mntput(mnt);
+			path_put(&path);
 		}
 	} else {
 		fh_lock(fhp);

+ 3 - 3
fs/nilfs2/cpfile.c

@@ -864,11 +864,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
 	case NILFS_CHECKPOINT:
 		/*
 		 * Check for protecting existing snapshot mounts:
-		 * bd_mount_sem is used to make this operation atomic and
+		 * ns_mount_mutex is used to make this operation atomic and
 		 * exclusive with a new mount job.  Though it doesn't cover
 		 * umount, it's enough for the purpose.
 		 */
-		down(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_lock(&nilfs->ns_mount_mutex);
 		if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) {
 			/* Current implementation does not have to protect
 			   plain read-only mounts since they are exclusive
@@ -877,7 +877,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
 			ret = -EBUSY;
 		} else
 			ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
-		up(&nilfs->ns_bdev->bd_mount_sem);
+		mutex_unlock(&nilfs->ns_mount_mutex);
 		return ret;
 	case NILFS_SNAPSHOT:
 		return nilfs_cpfile_set_snapshot(cpfile, cno);

+ 1 - 0
fs/nilfs2/sb.h

@@ -60,6 +60,7 @@ struct nilfs_sb_info {
 	struct super_block *s_super;	/* reverse pointer to super_block */
 	struct the_nilfs *s_nilfs;
 	struct list_head s_list;	/* list head for nilfs->ns_supers */
+	atomic_t s_count;		/* reference count */
 
 	/* Segment constructor */
 	struct list_head s_dirty_files;	/* dirty files list */

+ 93 - 163
fs/nilfs2/super.c

@@ -65,9 +65,8 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
 		   "(NILFS)");
 MODULE_LICENSE("GPL");
 
+static void nilfs_write_super(struct super_block *sb);
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags);
 
 /**
  * nilfs_error() - report failure condition on a filesystem
@@ -315,6 +314,11 @@ static void nilfs_put_super(struct super_block *sb)
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 	struct the_nilfs *nilfs = sbi->s_nilfs;
 
+	lock_kernel();
+
+	if (sb->s_dirt)
+		nilfs_write_super(sb);
+
 	nilfs_detach_segment_constructor(sbi);
 
 	if (!(sb->s_flags & MS_RDONLY)) {
@@ -323,12 +327,18 @@ static void nilfs_put_super(struct super_block *sb)
 		nilfs_commit_super(sbi, 1);
 		up_write(&nilfs->ns_sem);
 	}
+	down_write(&nilfs->ns_super_sem);
+	if (nilfs->ns_current == sbi)
+		nilfs->ns_current = NULL;
+	up_write(&nilfs->ns_super_sem);
 
 	nilfs_detach_checkpoint(sbi);
 	put_nilfs(sbi->s_nilfs);
 	sbi->s_super = NULL;
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
+
+	unlock_kernel();
 }
 
 /**
@@ -383,6 +393,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
 {
 	int err = 0;
 
+	nilfs_write_super(sb);
+
 	/* This function is called when super block should be written back */
 	if (wait)
 		err = nilfs_construct_segment(sb);
@@ -396,9 +408,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
 	struct buffer_head *bh_cp;
 	int err;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_add(&sbi->s_list, &nilfs->ns_supers);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	sbi->s_ifile = nilfs_mdt_new(
 		nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
@@ -436,9 +448,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
 
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 
 	return err;
 }
@@ -450,9 +462,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
 	nilfs_mdt_clear(sbi->s_ifile);
 	nilfs_mdt_destroy(sbi->s_ifile);
 	sbi->s_ifile = NULL;
-	down_write(&nilfs->ns_sem);
+	down_write(&nilfs->ns_super_sem);
 	list_del_init(&sbi->s_list);
-	up_write(&nilfs->ns_sem);
+	up_write(&nilfs->ns_super_sem);
 }
 
 static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
@@ -752,7 +764,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
  * @silent: silent mode flag
  * @nilfs: the_nilfs struct
  *
- * This function is called exclusively by bd_mount_mutex.
+ * This function is called exclusively by nilfs->ns_mount_mutex.
  * So, the recovery process is protected from other simultaneous mounts.
  */
 static int
@@ -773,6 +785,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 	get_nilfs(nilfs);
 	sbi->s_nilfs = nilfs;
 	sbi->s_super = sb;
+	atomic_set(&sbi->s_count, 1);
 
 	err = init_nilfs(nilfs, sbi, (char *)data);
 	if (err)
@@ -870,6 +883,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 		goto failed_root;
 	}
 
+	down_write(&nilfs->ns_super_sem);
+	if (!nilfs_test_opt(sbi, SNAPSHOT))
+		nilfs->ns_current = sbi;
+	up_write(&nilfs->ns_super_sem);
+
 	return 0;
 
  failed_root:
@@ -885,7 +903,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
  failed_sbi:
 	put_nilfs(nilfs);
 	sb->s_fs_info = NULL;
-	kfree(sbi);
+	nilfs_put_sbinfo(sbi);
 	return err;
 }
 
@@ -898,6 +916,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 	struct nilfs_mount_options old_opts;
 	int err;
 
+	lock_kernel();
+
+	down_write(&nilfs->ns_super_sem);
 	old_sb_flags = sb->s_flags;
 	old_opts.mount_opt = sbi->s_mount_opt;
 	old_opts.snapshot_cno = sbi->s_snapshot_cno;
@@ -945,14 +966,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 		 * store the current valid flag.  (It may have been changed
 		 * by fsck since we originally mounted the partition.)
 		 */
-		down(&sb->s_bdev->bd_mount_sem);
-		/* Check existing RW-mount */
-		if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
+		if (nilfs->ns_current && nilfs->ns_current != sbi) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
-			       "remount because a RW-mount exists.\n",
+			       "remount because an RW-mount exists.\n",
 			       sb->s_id);
 			err = -EBUSY;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
 		if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
 			printk(KERN_WARNING "NILFS (device %s): couldn't "
@@ -960,7 +979,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 			       "the latest one.\n",
 			       sb->s_id);
 			err = -EINVAL;
-			goto rw_remount_failed;
+			goto restore_opts;
 		}
 		sb->s_flags &= ~MS_RDONLY;
 		nilfs_clear_opt(sbi, SNAPSHOT);
@@ -968,28 +987,31 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
 		err = nilfs_attach_segment_constructor(sbi);
 		if (err)
-			goto rw_remount_failed;
+			goto restore_opts;
 
 		down_write(&nilfs->ns_sem);
 		nilfs_setup_super(sbi);
 		up_write(&nilfs->ns_sem);
 
-		up(&sb->s_bdev->bd_mount_sem);
+		nilfs->ns_current = sbi;
 	}
  out:
+	up_write(&nilfs->ns_super_sem);
+	unlock_kernel();
 	return 0;
 
- rw_remount_failed:
-	up(&sb->s_bdev->bd_mount_sem);
  restore_opts:
 	sb->s_flags = old_sb_flags;
 	sbi->s_mount_opt = old_opts.mount_opt;
 	sbi->s_snapshot_cno = old_opts.snapshot_cno;
+	up_write(&nilfs->ns_super_sem);
+	unlock_kernel();
 	return err;
 }
 
 struct nilfs_super_data {
 	struct block_device *bdev;
+	struct nilfs_sb_info *sbi;
 	__u64 cno;
 	int flags;
 };
@@ -1048,33 +1070,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
 	struct nilfs_super_data *sd = data;
 
-	return s->s_bdev == sd->bdev;
-}
-
-static int nilfs_test_bdev_super2(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-
-	if (!((s->s_flags | sd->flags) & MS_RDONLY))
-		return 1; /* Reuse an old R/W-mode super_block */
-
-	if (s->s_flags & sd->flags & MS_RDONLY) {
-		if (down_read_trylock(&s->s_umount)) {
-			ret = s->s_root &&
-				(sd->cno == NILFS_SB(s)->s_snapshot_cno);
-			up_read(&s->s_umount);
-			/*
-			 * This path is locked with sb_lock by sget().
-			 * So, drop_super() causes deadlock.
-			 */
-			return ret;
-		}
-	}
-	return 0;
+	return sd->sbi && s->s_fs_info == (void *)sd->sbi;
 }
 
 static int
@@ -1082,8 +1078,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	     const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	struct nilfs_super_data sd;
-	struct super_block *s, *s2;
-	struct the_nilfs *nilfs = NULL;
+	struct super_block *s;
+	struct the_nilfs *nilfs;
 	int err, need_to_close = 1;
 
 	sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type);
@@ -1095,7 +1091,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	 * much more information than normal filesystems to identify mount
 	 * instance.  For snapshot mounts, not only a mount type (ro-mount
 	 * or rw-mount) but also a checkpoint number is required.
-	 * The results are passed in sget() using nilfs_super_data.
 	 */
 	sd.cno = 0;
 	sd.flags = flags;
@@ -1104,64 +1099,59 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 		goto failed;
 	}
 
-	/*
-	 * once the super is inserted into the list by sget, s_umount
-	 * will protect the lockfs code from trying to start a snapshot
-	 * while we are mounting
-	 */
-	down(&sd.bdev->bd_mount_sem);
-	if (!sd.cno &&
-	    (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
-		err = (err < 0) ? : -EBUSY;
-		goto failed_unlock;
+	nilfs = find_or_create_nilfs(sd.bdev);
+	if (!nilfs) {
+		err = -ENOMEM;
+		goto failed;
 	}
 
-	/*
-	 * Phase-1: search any existent instance and get the_nilfs
-	 */
-	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
-	if (IS_ERR(s))
-		goto error_s;
-
-	if (!s->s_root) {
-		err = -ENOMEM;
-		nilfs = alloc_nilfs(sd.bdev);
-		if (!nilfs)
-			goto cancel_new;
-	} else {
-		struct nilfs_sb_info *sbi = NILFS_SB(s);
+	mutex_lock(&nilfs->ns_mount_mutex);
 
+	if (!sd.cno) {
 		/*
-		 * s_umount protects super_block from unmount process;
-		 * It covers pointers of nilfs_sb_info and the_nilfs.
+		 * Check if an exclusive mount exists or not.
+		 * Snapshot mounts coexist with a current mount
+		 * (i.e. rw-mount or ro-mount), whereas rw-mount and
+		 * ro-mount are mutually exclusive.
 		 */
-		nilfs = sbi->s_nilfs;
-		get_nilfs(nilfs);
-		up_write(&s->s_umount);
+		down_read(&nilfs->ns_super_sem);
+		if (nilfs->ns_current &&
+		    ((nilfs->ns_current->s_super->s_flags ^ flags)
+		     & MS_RDONLY)) {
+			up_read(&nilfs->ns_super_sem);
+			err = -EBUSY;
+			goto failed_unlock;
+		}
+		up_read(&nilfs->ns_super_sem);
+	}
 
-		/*
-		 * Phase-2: search specified snapshot or R/W mode super_block
-		 */
-		if (!sd.cno)
-			/* trying to get the latest checkpoint.  */
-			sd.cno = nilfs_last_cno(nilfs);
+	/*
+	 * Find existing nilfs_sb_info struct
+	 */
+	sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
 
-		s2 = sget(fs_type, nilfs_test_bdev_super2,
-			  nilfs_set_bdev_super, &sd);
-		deactivate_super(s);
-		/*
-		 * Although deactivate_super() invokes close_bdev_exclusive() at
-		 * kill_block_super().  Here, s is an existent mount; we need
-		 * one more close_bdev_exclusive() call.
-		 */
-		s = s2;
-		if (IS_ERR(s))
-			goto error_s;
+	if (!sd.cno)
+		/* trying to get the latest checkpoint.  */
+		sd.cno = nilfs_last_cno(nilfs);
+
+	/*
+	 * Get super block instance holding the nilfs_sb_info struct.
+	 * A new instance is allocated if no existing mount is present or
+	 * existing instance has been unmounted.
+	 */
+	s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
+	if (sd.sbi)
+		nilfs_put_sbinfo(sd.sbi);
+
+	if (IS_ERR(s)) {
+		err = PTR_ERR(s);
+		goto failed_unlock;
 	}
 
 	if (!s->s_root) {
 		char b[BDEVNAME_SIZE];
 
+		/* New superblock instance created */
 		s->s_flags = flags;
 		strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(sd.bdev));
@@ -1172,26 +1162,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 
 		s->s_flags |= MS_ACTIVE;
 		need_to_close = 0;
-	} else if (!(s->s_flags & MS_RDONLY)) {
-		err = -EBUSY;
 	}
 
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
 	put_nilfs(nilfs);
 	if (need_to_close)
 		close_bdev_exclusive(sd.bdev, flags);
 	simple_set_mnt(mnt, s);
 	return 0;
 
- error_s:
-	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
-	close_bdev_exclusive(sd.bdev, flags);
-	return PTR_ERR(s);
-
  failed_unlock:
-	up(&sd.bdev->bd_mount_sem);
+	mutex_unlock(&nilfs->ns_mount_mutex);
+	put_nilfs(nilfs);
  failed:
 	close_bdev_exclusive(sd.bdev, flags);
 
@@ -1199,70 +1181,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 
  cancel_new:
 	/* Abandoning the newly allocated superblock */
-	up(&sd.bdev->bd_mount_sem);
-	if (nilfs)
-		put_nilfs(nilfs);
+	mutex_unlock(&nilfs->ns_mount_mutex);
+	put_nilfs(nilfs);
 	up_write(&s->s_umount);
 	deactivate_super(s);
 	/*
 	 * deactivate_super() invokes close_bdev_exclusive().
 	 * We must finish all post-cleaning before this call;
-	 * put_nilfs() and unlocking bd_mount_sem need the block device.
+	 * put_nilfs() needs the block device.
 	 */
 	return err;
 }
 
-static int nilfs_test_bdev_super3(struct super_block *s, void *data)
-{
-	struct nilfs_super_data *sd = data;
-	int ret;
-
-	if (s->s_bdev != sd->bdev)
-		return 0;
-	if (down_read_trylock(&s->s_umount)) {
-		ret = (s->s_flags & MS_RDONLY) && s->s_root &&
-			nilfs_test_opt(NILFS_SB(s), SNAPSHOT);
-		up_read(&s->s_umount);
-		if (ret)
-			return 0; /* ignore snapshot mounts */
-	}
-	return !((sd->flags ^ s->s_flags) & MS_RDONLY);
-}
-
-static int __false_bdev_super(struct super_block *s, void *data)
-{
-#if 0 /* XXX: workaround for lock debug. This is not good idea */
-	up_write(&s->s_umount);
-#endif
-	return -EFAULT;
-}
-
-/**
- * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not.
- * fs_type: filesystem type
- * bdev: block device
- * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount)
- * res: pointer to an integer to store result
- *
- * This function must be called within a section protected by bd_mount_mutex.
- */
-static int test_exclusive_mount(struct file_system_type *fs_type,
-				struct block_device *bdev, int flags)
-{
-	struct super_block *s;
-	struct nilfs_super_data sd = { .flags = flags, .bdev = bdev };
-
-	s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd);
-	if (IS_ERR(s)) {
-		if (PTR_ERR(s) != -EFAULT)
-			return PTR_ERR(s);
-		return 0;  /* Not found */
-	}
-	up_write(&s->s_umount);
-	deactivate_super(s);
-	return 1;  /* Found */
-}
-
 struct file_system_type nilfs_fs_type = {
 	.owner    = THIS_MODULE,
 	.name     = "nilfs2",

+ 108 - 5
fs/nilfs2/the_nilfs.c

@@ -35,6 +35,10 @@
 #include "seglist.h"
 #include "segbuf.h"
 
+
+static LIST_HEAD(nilfs_objects);
+static DEFINE_SPINLOCK(nilfs_lock);
+
 void nilfs_set_last_segment(struct the_nilfs *nilfs,
 			    sector_t start_blocknr, u64 seq, __u64 cno)
 {
@@ -55,7 +59,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
  * Return Value: On success, pointer to the_nilfs is returned.
  * On error, NULL is returned.
  */
-struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 {
 	struct the_nilfs *nilfs;
 
@@ -68,7 +72,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 	atomic_set(&nilfs->ns_writer_refcount, -1);
 	atomic_set(&nilfs->ns_ndirtyblks, 0);
 	init_rwsem(&nilfs->ns_sem);
+	init_rwsem(&nilfs->ns_super_sem);
+	mutex_init(&nilfs->ns_mount_mutex);
 	mutex_init(&nilfs->ns_writer_mutex);
+	INIT_LIST_HEAD(&nilfs->ns_list);
 	INIT_LIST_HEAD(&nilfs->ns_supers);
 	spin_lock_init(&nilfs->ns_last_segment_lock);
 	nilfs->ns_gc_inodes_h = NULL;
@@ -77,6 +84,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
 	return nilfs;
 }
 
+/**
+ * find_or_create_nilfs - find or create nilfs object
+ * @bdev: block device to which the_nilfs is related
+ *
+ * find_nilfs() looks up an existent nilfs object created on the
+ * device and gets the reference count of the object.  If no nilfs object
+ * is found on the device, a new nilfs object is allocated.
+ *
+ * Return Value: On success, pointer to the nilfs object is returned.
+ * On error, NULL is returned.
+ */
+struct the_nilfs *find_or_create_nilfs(struct block_device *bdev)
+{
+	struct the_nilfs *nilfs, *new = NULL;
+
+ retry:
+	spin_lock(&nilfs_lock);
+	list_for_each_entry(nilfs, &nilfs_objects, ns_list) {
+		if (nilfs->ns_bdev == bdev) {
+			get_nilfs(nilfs);
+			spin_unlock(&nilfs_lock);
+			if (new)
+				put_nilfs(new);
+			return nilfs; /* existing object */
+		}
+	}
+	if (new) {
+		list_add_tail(&new->ns_list, &nilfs_objects);
+		spin_unlock(&nilfs_lock);
+		return new; /* new object */
+	}
+	spin_unlock(&nilfs_lock);
+
+	new = alloc_nilfs(bdev);
+	if (new)
+		goto retry;
+	return NULL; /* insufficient memory */
+}
+
 /**
  * put_nilfs - release a reference to the_nilfs
  * @nilfs: the_nilfs structure to be released
@@ -86,13 +132,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
  */
 void put_nilfs(struct the_nilfs *nilfs)
 {
-	if (!atomic_dec_and_test(&nilfs->ns_count))
+	spin_lock(&nilfs_lock);
+	if (!atomic_dec_and_test(&nilfs->ns_count)) {
+		spin_unlock(&nilfs_lock);
 		return;
+	}
+	list_del_init(&nilfs->ns_list);
+	spin_unlock(&nilfs_lock);
+
 	/*
-	 * Increment of ns_count never occur below because the caller
+	 * Increment of ns_count never occurs below because the caller
 	 * of get_nilfs() holds at least one reference to the_nilfs.
 	 * Thus its exclusion control is not required here.
 	 */
+
 	might_sleep();
 	if (nilfs_loaded(nilfs)) {
 		nilfs_mdt_clear(nilfs->ns_sufile);
@@ -613,13 +666,63 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs)
 	return ret;
 }
 
+/**
+ * nilfs_find_sbinfo - find existing nilfs_sb_info structure
+ * @nilfs: nilfs object
+ * @rw_mount: mount type (non-zero value for read/write mount)
+ * @cno: checkpoint number (zero for read-only mount)
+ *
+ * nilfs_find_sbinfo() returns the nilfs_sb_info structure which
+ * @rw_mount and @cno (in case of snapshots) matched.  If no instance
+ * was found, NULL is returned.  Although the super block instance can
+ * be unmounted after this function returns, the nilfs_sb_info struct
+ * is kept on memory until nilfs_put_sbinfo() is called.
+ */
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
+					int rw_mount, __u64 cno)
+{
+	struct nilfs_sb_info *sbi;
+
+	down_read(&nilfs->ns_super_sem);
+	/*
+	 * The SNAPSHOT flag and sb->s_flags are supposed to be
+	 * protected with nilfs->ns_super_sem.
+	 */
+	sbi = nilfs->ns_current;
+	if (rw_mount) {
+		if (sbi && !(sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read/write mount */
+		else
+			goto out;
+	} else if (cno == 0) {
+		if (sbi && (sbi->s_super->s_flags & MS_RDONLY))
+			goto found; /* read-only mount */
+		else
+			goto out;
+	}
+
+	list_for_each_entry(sbi, &nilfs->ns_supers, s_list) {
+		if (nilfs_test_opt(sbi, SNAPSHOT) &&
+		    sbi->s_snapshot_cno == cno)
+			goto found; /* snapshot mount */
+	}
+ out:
+	up_read(&nilfs->ns_super_sem);
+	return NULL;
+
+ found:
+	atomic_inc(&sbi->s_count);
+	up_read(&nilfs->ns_super_sem);
+	return sbi;
+}
+
 int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 				int snapshot_mount)
 {
 	struct nilfs_sb_info *sbi;
 	int ret = 0;
 
-	down_read(&nilfs->ns_sem);
+	down_read(&nilfs->ns_super_sem);
 	if (cno == 0 || cno > nilfs->ns_cno)
 		goto out_unlock;
 
@@ -636,6 +739,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
 		ret++;
 
  out_unlock:
-	up_read(&nilfs->ns_sem);
+	up_read(&nilfs->ns_super_sem);
 	return ret;
 }

+ 21 - 2
fs/nilfs2/the_nilfs.h

@@ -43,12 +43,16 @@ enum {
  * struct the_nilfs - struct to supervise multiple nilfs mount points
  * @ns_flags: flags
  * @ns_count: reference count
+ * @ns_list: list head for nilfs_list
  * @ns_bdev: block device
  * @ns_bdi: backing dev info
  * @ns_writer: back pointer to writable nilfs_sb_info
  * @ns_sem: semaphore for shared states
+ * @ns_super_sem: semaphore for global operations across super block instances
+ * @ns_mount_mutex: mutex protecting mount process of nilfs
  * @ns_writer_mutex: mutex protecting ns_writer attach/detach
  * @ns_writer_refcount: number of referrers on ns_writer
+ * @ns_current: back pointer to current mount
  * @ns_sbh: buffer heads of on-disk super blocks
  * @ns_sbp: pointers to super block data
  * @ns_sbwtime: previous write time of super blocks
@@ -88,14 +92,23 @@ enum {
 struct the_nilfs {
 	unsigned long		ns_flags;
 	atomic_t		ns_count;
+	struct list_head	ns_list;
 
 	struct block_device    *ns_bdev;
 	struct backing_dev_info *ns_bdi;
 	struct nilfs_sb_info   *ns_writer;
 	struct rw_semaphore	ns_sem;
+	struct rw_semaphore	ns_super_sem;
+	struct mutex		ns_mount_mutex;
 	struct mutex		ns_writer_mutex;
 	atomic_t		ns_writer_refcount;
 
+	/*
+	 * components protected by ns_super_sem
+	 */
+	struct nilfs_sb_info   *ns_current;
+	struct list_head	ns_supers;
+
 	/*
 	 * used for
 	 * - loading the latest checkpoint exclusively.
@@ -108,7 +121,6 @@ struct the_nilfs {
 	time_t			ns_sbwtime[2];
 	unsigned		ns_sbsize;
 	unsigned		ns_mount_state;
-	struct list_head	ns_supers;
 
 	/*
 	 * Following fields are dedicated to a writable FS-instance.
@@ -191,11 +203,12 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
 #define NILFS_ALTSB_FREQ	60  /* spare superblock */
 
 void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
-struct the_nilfs *alloc_nilfs(struct block_device *);
+struct the_nilfs *find_or_create_nilfs(struct block_device *);
 void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
 int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
 int nilfs_near_disk_full(struct the_nilfs *);
 void nilfs_fall_back_super_block(struct the_nilfs *);
@@ -238,6 +251,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 	mutex_unlock(&nilfs->ns_writer_mutex);
 }
 
+static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
+{
+	if (!atomic_dec_and_test(&sbi->s_count))
+		kfree(sbi);
+}
+
 static inline void
 nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
 			sector_t *seg_start, sector_t *seg_end)

+ 22 - 32
fs/ntfs/super.c

@@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering with remount options string: %s", opt);
+
+	lock_kernel();
 #ifndef NTFS_RW
 	/* For read-only compiled driver, enforce read-only flag. */
 	*flags |= MS_RDONLY;
@@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 		if (NVolErrors(vol)) {
 			ntfs_error(sb, "Volume has errors and is read-only%s",
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_IS_DIRTY) {
 			ntfs_error(sb, "Volume is dirty and read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
 			ntfs_error(sb, "Volume has been modified by chkdsk "
 					"and is read-only%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 					"(0x%x) and is read-only%s",
 					(unsigned)le16_to_cpu(vol->vol_flags),
 					es);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
 			ntfs_error(sb, "Failed to set dirty bit in volume "
 					"information flags%s", es);
+			unlock_kernel();
 			return -EROFS;
 		}
 #if 0
@@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 			ntfs_error(sb, "Failed to empty journal $LogFile%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_mark_quotas_out_of_date(vol)) {
 			ntfs_error(sb, "Failed to mark quotas out of date%s",
 					es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 		if (!ntfs_stamp_usnjrnl(vol)) {
 			ntfs_error(sb, "Failed to stamp transation log "
 					"($UsnJrnl)%s", es);
 			NVolSetErrors(vol);
+			unlock_kernel();
 			return -EROFS;
 		}
 	} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
@@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 
 	// TODO: Deal with *flags.
 
-	if (!parse_options(vol, opt))
+	if (!parse_options(vol, opt)) {
+		unlock_kernel();
 		return -EINVAL;
+	}
+	unlock_kernel();
 	ntfs_debug("Done.");
 	return 0;
 }
@@ -2246,6 +2259,9 @@ static void ntfs_put_super(struct super_block *sb)
 	ntfs_volume *vol = NTFS_SB(sb);
 
 	ntfs_debug("Entering.");
+
+	lock_kernel();
+
 #ifdef NTFS_RW
 	/*
 	 * Commit all inodes while they are still open in case some of them
@@ -2373,39 +2389,12 @@ static void ntfs_put_super(struct super_block *sb)
 		vol->mftmirr_ino = NULL;
 	}
 	/*
-	 * If any dirty inodes are left, throw away all mft data page cache
-	 * pages to allow a clean umount.  This should never happen any more
-	 * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
-	 * the underlying mft records are written out and cleaned.  If it does,
-	 * happen anyway, we want to know...
+	 * We should have no dirty inodes left, due to
+	 * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+	 * the underlying mft records are written out and cleaned.
 	 */
 	ntfs_commit_inode(vol->mft_ino);
 	write_inode_now(vol->mft_ino, 1);
-	if (sb_has_dirty_inodes(sb)) {
-		const char *s1, *s2;
-
-		mutex_lock(&vol->mft_ino->i_mutex);
-		truncate_inode_pages(vol->mft_ino->i_mapping, 0);
-		mutex_unlock(&vol->mft_ino->i_mutex);
-		write_inode_now(vol->mft_ino, 1);
-		if (sb_has_dirty_inodes(sb)) {
-			static const char *_s1 = "inodes";
-			static const char *_s2 = "";
-			s1 = _s1;
-			s2 = _s2;
-		} else {
-			static const char *_s1 = "mft pages";
-			static const char *_s2 = "They have been thrown "
-					"away.  ";
-			s1 = _s1;
-			s2 = _s2;
-		}
-		ntfs_error(sb, "Dirty %s found at umount time.  %sYou should "
-				"run chkdsk.  Please email "
-				"linux-ntfs-dev@lists.sourceforge.net and say "
-				"that you saw this message.  Thank you.", s1,
-				s2);
-	}
 #endif /* NTFS_RW */
 
 	iput(vol->mft_ino);
@@ -2444,7 +2433,8 @@ static void ntfs_put_super(struct super_block *sb)
 	}
 	sb->s_fs_info = NULL;
 	kfree(vol);
-	return;
+
+	unlock_kernel();
 }
 
 /**

+ 8 - 14
fs/ocfs2/super.c

@@ -42,6 +42,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
+#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
@@ -126,7 +127,6 @@ static int ocfs2_get_sector(struct super_block *sb,
 			    struct buffer_head **bh,
 			    int block,
 			    int sect_size);
-static void ocfs2_write_super(struct super_block *sb);
 static struct inode *ocfs2_alloc_inode(struct super_block *sb);
 static void ocfs2_destroy_inode(struct inode *inode);
 static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
@@ -141,7 +141,6 @@ static const struct super_operations ocfs2_sops = {
 	.clear_inode	= ocfs2_clear_inode,
 	.delete_inode	= ocfs2_delete_inode,
 	.sync_fs	= ocfs2_sync_fs,
-	.write_super	= ocfs2_write_super,
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
 	.show_options   = ocfs2_show_options,
@@ -365,24 +364,12 @@ static struct file_operations ocfs2_osb_debug_fops = {
 	.llseek =	generic_file_llseek,
 };
 
-/*
- * write_super and sync_fs ripped right out of ext3.
- */
-static void ocfs2_write_super(struct super_block *sb)
-{
-	if (mutex_trylock(&sb->s_lock) != 0)
-		BUG();
-	sb->s_dirt = 0;
-}
-
 static int ocfs2_sync_fs(struct super_block *sb, int wait)
 {
 	int status;
 	tid_t target;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
-	sb->s_dirt = 0;
-
 	if (ocfs2_is_hard_readonly(osb))
 		return -EROFS;
 
@@ -595,6 +582,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 	struct mount_options parsed_options;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
+	lock_kernel();
+
 	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
 		ret = -EINVAL;
 		goto out;
@@ -698,6 +687,7 @@ unlock_osb:
 			ocfs2_set_journal_params(osb);
 	}
 out:
+	unlock_kernel();
 	return ret;
 }
 
@@ -1550,9 +1540,13 @@ static void ocfs2_put_super(struct super_block *sb)
 {
 	mlog_entry("(0x%p)\n", sb);
 
+	lock_kernel();
+
 	ocfs2_sync_blockdev(sb);
 	ocfs2_dismount_volume(sb, 0);
 
+	unlock_kernel();
+
 	mlog_exit_void();
 }
 

+ 1 - 16
fs/omfs/file.c

@@ -11,21 +11,6 @@
 #include <linux/mpage.h>
 #include "omfs.h"
 
-static int omfs_sync_file(struct file *file, struct dentry *dentry,
-		int datasync)
-{
-	struct inode *inode = dentry->d_inode;
-	int err;
-
-	err = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
-		return err;
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
-		return err;
-	err |= omfs_sync_inode(inode);
-	return err ? -EIO : 0;
-}
-
 static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
 {
 	return (sbi->s_sys_blocksize - offset -
@@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = {
 	.aio_read = generic_file_aio_read,
 	.aio_write = generic_file_aio_write,
 	.mmap = generic_file_mmap,
-	.fsync = omfs_sync_file,
+	.fsync = simple_fsync,
 	.splice_read = generic_file_splice_read,
 };
 

+ 2 - 2
fs/open.c

@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 
 	audit_inode(NULL, dentry);
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	if (!file)
 		goto out;
 
-	error = mnt_want_write(file->f_path.mnt);
+	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
 	dentry = file->f_path.dentry;

+ 25 - 0
fs/proc/internal.h

@@ -92,3 +92,28 @@ struct pde_opener {
 	struct list_head lh;
 };
 void pde_users_dec(struct proc_dir_entry *pde);
+
+extern spinlock_t proc_subdir_lock;
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+void task_mem(struct seq_file *, struct mm_struct *);
+
+struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+void de_put(struct proc_dir_entry *de);
+
+extern struct vfsmount *proc_mnt;
+int proc_fill_super(struct super_block *);
+struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+
+/*
+ * These are generic /proc routines that use the internal
+ * "struct proc_dir_entry" tree to traverse the filesystem.
+ *
+ * The /proc root directory has extended versions to take care
+ * of the /proc/<pid> subdirectories.
+ */
+int proc_readdir(struct file *, void *, filldir_t);
+struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);

+ 1 - 0
fs/proc/proc_devtree.c

@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <asm/prom.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 #ifndef HAVE_ARCH_DEVTREE_FIXUPS
 static inline void set_node_proc_entry(struct device_node *np,

+ 1 - 1
fs/qnx4/Makefile

@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_QNX4FS_FS) += qnx4.o
 
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o
+qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o

+ 1 - 6
fs/qnx4/bitmap.c

@@ -13,14 +13,9 @@
  * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/buffer_head.h>
 #include <linux/bitops.h>
+#include "qnx4.h"
 
 #if 0
 int qnx4_new_block(struct super_block *sb)

+ 2 - 7
fs/qnx4/dir.c

@@ -11,14 +11,9 @@
  * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
  */
 
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
-
+#include "qnx4.h"
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
@@ -84,7 +79,7 @@ const struct file_operations qnx4_dir_operations =
 {
 	.read		= generic_read_dir,
 	.readdir	= qnx4_readdir,
-	.fsync		= file_fsync,
+	.fsync		= simple_fsync,
 };
 
 const struct inode_operations qnx4_dir_inode_operations =

+ 2 - 3
fs/qnx4/file.c

@@ -12,8 +12,7 @@
  * 27-06-1998 by Frank Denis : file overwriting.
  */
 
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
+#include "qnx4.h"
 
 /*
  * We have mostly NULL's here: the current defaults are ok for
@@ -29,7 +28,7 @@ const struct file_operations qnx4_file_operations =
 #ifdef CONFIG_QNX4FS_RW
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
-	.fsync		= qnx4_sync_file,
+	.fsync		= simple_fsync,
 #endif
 };
 

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно