浏览代码

Merge tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux

Pull writeback tree from Wu Fengguang:
 "Mainly from Jan Kara to avoid iput() in the flusher threads."

* tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux:
  writeback: Avoid iput() from flusher thread
  vfs: Rename end_writeback() to clear_inode()
  vfs: Move waiting for inode writeback from end_writeback() to evict_inode()
  writeback: Refactor writeback_single_inode()
  writeback: Remove wb->list_lock from writeback_single_inode()
  writeback: Separate inode requeueing after writeback
  writeback: Move I_DIRTY_PAGES handling
  writeback: Move requeueing when I_SYNC set to writeback_sb_inodes()
  writeback: Move clearing of I_SYNC into inode_sync_complete()
  writeback: initialize global_dirty_limit
  fs: remove 8 bytes of padding from struct writeback_control on 64 bit builds
  mm: page-writeback.c: local functions should not be exposed globally
Linus Torvalds 13 年之前
父节点
当前提交
90324cc1b1
共有 56 个文件被更改,包括 319 次插入220 次删除
  1. 7 9
      Documentation/filesystems/porting
  2. 1 1
      arch/powerpc/platforms/cell/spufs/inode.c
  3. 1 1
      arch/s390/hypfs/inode.c
  4. 1 1
      fs/9p/vfs_inode.c
  5. 1 1
      fs/affs/inode.c
  6. 1 1
      fs/afs/inode.c
  7. 1 1
      fs/autofs4/inode.c
  8. 1 1
      fs/bfs/inode.c
  9. 1 1
      fs/binfmt_misc.c
  10. 1 1
      fs/block_dev.c
  11. 1 1
      fs/btrfs/inode.c
  12. 1 1
      fs/cifs/cifsfs.c
  13. 1 1
      fs/coda/inode.c
  14. 1 1
      fs/ecryptfs/super.c
  15. 2 2
      fs/exofs/inode.c
  16. 1 1
      fs/ext2/inode.c
  17. 3 3
      fs/ext3/inode.c
  18. 1 1
      fs/ext4/super.c
  19. 1 1
      fs/fat/inode.c
  20. 1 1
      fs/freevxfs/vxfs_inode.c
  21. 205 131
      fs/fs-writeback.c
  22. 1 1
      fs/fuse/inode.c
  23. 1 1
      fs/gfs2/super.c
  24. 1 1
      fs/hfs/inode.c
  25. 1 1
      fs/hfsplus/super.c
  26. 1 1
      fs/hostfs/hostfs_kern.c
  27. 1 1
      fs/hpfs/inode.c
  28. 1 1
      fs/hppfs/hppfs.c
  29. 1 1
      fs/hugetlbfs/inode.c
  30. 11 4
      fs/inode.c
  31. 1 1
      fs/jffs2/fs.c
  32. 1 1
      fs/jfs/inode.c
  33. 1 1
      fs/logfs/readwrite.c
  34. 1 1
      fs/minix/inode.c
  35. 1 1
      fs/ncpfs/inode.c
  36. 2 2
      fs/nfs/inode.c
  37. 2 2
      fs/nilfs2/inode.c
  38. 1 1
      fs/ntfs/inode.c
  39. 1 1
      fs/ocfs2/dlmfs/dlmfs.c
  40. 1 1
      fs/ocfs2/inode.c
  41. 1 1
      fs/omfs/inode.c
  42. 1 1
      fs/proc/inode.c
  43. 1 1
      fs/pstore/inode.c
  44. 2 2
      fs/reiserfs/inode.c
  45. 1 1
      fs/sysfs/inode.c
  46. 1 1
      fs/sysv/inode.c
  47. 1 1
      fs/ubifs/super.c
  48. 1 1
      fs/udf/inode.c
  49. 1 1
      fs/ufs/inode.c
  50. 1 1
      fs/xfs/xfs_super.c
  51. 7 6
      include/linux/fs.h
  52. 3 7
      include/linux/writeback.h
  53. 29 7
      include/trace/events/writeback.h
  54. 1 1
      ipc/mqueue.c
  55. 2 1
      mm/page-writeback.c
  56. 1 1
      mm/shmem.c

+ 7 - 9
Documentation/filesystems/porting

@@ -297,7 +297,8 @@ in the beginning of ->setattr unconditionally.
 be used instead.  It gets called whenever the inode is evicted, whether it has
 be used instead.  It gets called whenever the inode is evicted, whether it has
 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
 metadata buffers; getting rid of those is responsibility of method, as it had
 metadata buffers; getting rid of those is responsibility of method, as it had
-been for ->delete_inode().
+been for ->delete_inode(). Caller makes sure async writeback cannot be running
+for the inode while (or after) ->evict_inode() is called.
 
 
 	->drop_inode() returns int now; it's called on final iput() with
 	->drop_inode() returns int now; it's called on final iput() with
 inode->i_lock held and it returns true if filesystems wants the inode to be
 inode->i_lock held and it returns true if filesystems wants the inode to be
@@ -306,14 +307,11 @@ updated appropriately.  generic_delete_inode() is also alive and it consists
 simply of return 1.  Note that all actual eviction work is done by caller after
 simply of return 1.  Note that all actual eviction work is done by caller after
 ->drop_inode() returns.
 ->drop_inode() returns.
 
 
-	clear_inode() is gone; use end_writeback() instead.  As before, it must
-be called exactly once on each call of ->evict_inode() (as it used to be for
-each call of ->delete_inode()).  Unlike before, if you are using inode-associated
-metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to
-call invalidate_inode_buffers() before end_writeback().
-	No async writeback (and thus no calls of ->write_inode()) will happen
-after end_writeback() returns, so actions that should not overlap with ->write_inode()
-(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call.
+	As before, clear_inode() must be called exactly once on each call of
+->evict_inode() (as it used to be for each call of ->delete_inode()).  Unlike
+before, if you are using inode-associated metadata buffers (i.e.
+mark_buffer_dirty_inode()), it's your responsibility to call
+invalidate_inode_buffers() before clear_inode().
 
 
 	NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
 	NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
 if it's zero is not *and* *never* *had* *been* enough.  Final unlink() and iput()
 if it's zero is not *and* *never* *had* *been* enough.  Final unlink() and iput()

+ 1 - 1
arch/powerpc/platforms/cell/spufs/inode.c

@@ -151,7 +151,7 @@ static void
 spufs_evict_inode(struct inode *inode)
 spufs_evict_inode(struct inode *inode)
 {
 {
 	struct spufs_inode_info *ei = SPUFS_I(inode);
 	struct spufs_inode_info *ei = SPUFS_I(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (ei->i_ctx)
 	if (ei->i_ctx)
 		put_spu_context(ei->i_ctx);
 		put_spu_context(ei->i_ctx);
 	if (ei->i_gang)
 	if (ei->i_gang)

+ 1 - 1
arch/s390/hypfs/inode.c

@@ -115,7 +115,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
 
 
 static void hypfs_evict_inode(struct inode *inode)
 static void hypfs_evict_inode(struct inode *inode)
 {
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 	kfree(inode->i_private);
 }
 }
 
 

+ 1 - 1
fs/9p/vfs_inode.c

@@ -448,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode)
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
 
 	truncate_inode_pages(inode->i_mapping, 0);
 	truncate_inode_pages(inode->i_mapping, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	filemap_fdatawrite(inode->i_mapping);
 	filemap_fdatawrite(inode->i_mapping);
 
 
 #ifdef CONFIG_9P_FSCACHE
 #ifdef CONFIG_9P_FSCACHE

+ 1 - 1
fs/affs/inode.c

@@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode)
 	}
 	}
 
 
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	affs_free_prealloc(inode);
 	affs_free_prealloc(inode);
 	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	if (cache_page) {
 	if (cache_page) {

+ 1 - 1
fs/afs/inode.c

@@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode)
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	afs_give_up_callback(vnode);
 	afs_give_up_callback(vnode);
 
 

+ 1 - 1
fs/autofs4/inode.c

@@ -100,7 +100,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 
 
 static void autofs4_evict_inode(struct inode *inode)
 static void autofs4_evict_inode(struct inode *inode)
 {
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 	kfree(inode->i_private);
 }
 }
 
 

+ 1 - 1
fs/bfs/inode.c

@@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode)
 
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	if (inode->i_nlink)
 	if (inode->i_nlink)
 		return;
 		return;

+ 1 - 1
fs/binfmt_misc.c

@@ -505,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 
 
 static void bm_evict_inode(struct inode *inode)
 static void bm_evict_inode(struct inode *inode)
 {
 {
-	end_writeback(inode);
+	clear_inode(inode);
 	kfree(inode->i_private);
 	kfree(inode->i_private);
 }
 }
 
 

+ 1 - 1
fs/block_dev.c

@@ -487,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode)
 	struct list_head *p;
 	struct list_head *p;
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode); /* is it needed here? */
 	invalidate_inode_buffers(inode); /* is it needed here? */
-	end_writeback(inode);
+	clear_inode(inode);
 	spin_lock(&bdev_lock);
 	spin_lock(&bdev_lock);
 	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 		__bd_forget(list_entry(p, struct inode, i_devices));
 		__bd_forget(list_entry(p, struct inode, i_devices));

+ 1 - 1
fs/btrfs/inode.c

@@ -3756,7 +3756,7 @@ void btrfs_evict_inode(struct inode *inode)
 	btrfs_end_transaction(trans, root);
 	btrfs_end_transaction(trans, root);
 	btrfs_btree_balance_dirty(root, nr);
 	btrfs_btree_balance_dirty(root, nr);
 no_delete:
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	return;
 	return;
 }
 }
 
 

+ 1 - 1
fs/cifs/cifsfs.c

@@ -272,7 +272,7 @@ static void
 cifs_evict_inode(struct inode *inode)
 cifs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	cifs_fscache_release_inode_cookie(inode);
 	cifs_fscache_release_inode_cookie(inode);
 }
 }
 
 

+ 1 - 1
fs/coda/inode.c

@@ -244,7 +244,7 @@ static void coda_put_super(struct super_block *sb)
 static void coda_evict_inode(struct inode *inode)
 static void coda_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	coda_cache_clear_inode(inode);
 	coda_cache_clear_inode(inode);
 }
 }
 
 

+ 1 - 1
fs/ecryptfs/super.c

@@ -133,7 +133,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static void ecryptfs_evict_inode(struct inode *inode)
 static void ecryptfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	iput(ecryptfs_inode_to_lower(inode));
 	iput(ecryptfs_inode_to_lower(inode));
 }
 }
 
 

+ 2 - 2
fs/exofs/inode.c

@@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode)
 		goto no_delete;
 		goto no_delete;
 
 
 	inode->i_size = 0;
 	inode->i_size = 0;
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	/* if we are deleting an obj that hasn't been created yet, wait.
 	/* if we are deleting an obj that hasn't been created yet, wait.
 	 * This also makes sure that create_done cannot be called with an
 	 * This also makes sure that create_done cannot be called with an
@@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode)
 	return;
 	return;
 
 
 no_delete:
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 }
 }

+ 1 - 1
fs/ext2/inode.c

@@ -90,7 +90,7 @@ void ext2_evict_inode(struct inode * inode)
 	}
 	}
 
 
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	ext2_discard_reservation(inode);
 	ext2_discard_reservation(inode);
 	rsv = EXT2_I(inode)->i_block_alloc_info;
 	rsv = EXT2_I(inode)->i_block_alloc_info;

+ 3 - 3
fs/ext3/inode.c

@@ -272,18 +272,18 @@ void ext3_evict_inode (struct inode *inode)
 	if (ext3_mark_inode_dirty(handle, inode)) {
 	if (ext3_mark_inode_dirty(handle, inode)) {
 		/* If that failed, just dquot_drop() and be done with that */
 		/* If that failed, just dquot_drop() and be done with that */
 		dquot_drop(inode);
 		dquot_drop(inode);
-		end_writeback(inode);
+		clear_inode(inode);
 	} else {
 	} else {
 		ext3_xattr_delete_inode(handle, inode);
 		ext3_xattr_delete_inode(handle, inode);
 		dquot_free_inode(inode);
 		dquot_free_inode(inode);
 		dquot_drop(inode);
 		dquot_drop(inode);
-		end_writeback(inode);
+		clear_inode(inode);
 		ext3_free_inode(handle, inode);
 		ext3_free_inode(handle, inode);
 	}
 	}
 	ext3_journal_stop(handle);
 	ext3_journal_stop(handle);
 	return;
 	return;
 no_delete:
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 	dquot_drop(inode);
 }
 }
 
 

+ 1 - 1
fs/ext4/super.c

@@ -1007,7 +1007,7 @@ static void destroy_inodecache(void)
 void ext4_clear_inode(struct inode *inode)
 void ext4_clear_inode(struct inode *inode)
 {
 {
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_I(inode)->jinode) {
 	if (EXT4_I(inode)->jinode) {

+ 1 - 1
fs/fat/inode.c

@@ -454,7 +454,7 @@ static void fat_evict_inode(struct inode *inode)
 		fat_truncate_blocks(inode, 0);
 		fat_truncate_blocks(inode, 0);
 	}
 	}
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	fat_cache_inval_inode(inode);
 	fat_cache_inval_inode(inode);
 	fat_detach(inode);
 	fat_detach(inode);
 }
 }

+ 1 - 1
fs/freevxfs/vxfs_inode.c

@@ -355,6 +355,6 @@ void
 vxfs_evict_inode(struct inode *ip)
 vxfs_evict_inode(struct inode *ip)
 {
 {
 	truncate_inode_pages(&ip->i_data, 0);
 	truncate_inode_pages(&ip->i_data, 0);
-	end_writeback(ip);
+	clear_inode(ip);
 	call_rcu(&ip->i_rcu, vxfs_i_callback);
 	call_rcu(&ip->i_rcu, vxfs_i_callback);
 }
 }

+ 205 - 131
fs/fs-writeback.c

@@ -231,11 +231,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
 
 
 static void inode_sync_complete(struct inode *inode)
 static void inode_sync_complete(struct inode *inode)
 {
 {
-	/*
-	 * Prevent speculative execution through
-	 * spin_unlock(&wb->list_lock);
-	 */
-
+	inode->i_state &= ~I_SYNC;
+	/* Waiters must see I_SYNC cleared before being woken up */
 	smp_mb();
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_SYNC);
 	wake_up_bit(&inode->i_state, __I_SYNC);
 }
 }
@@ -329,10 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
 }
 }
 
 
 /*
 /*
- * Wait for writeback on an inode to complete.
+ * Wait for writeback on an inode to complete. Called with i_lock held.
+ * Caller must make sure inode cannot go away when we drop i_lock.
  */
  */
-static void inode_wait_for_writeback(struct inode *inode,
-				     struct bdi_writeback *wb)
+static void __inode_wait_for_writeback(struct inode *inode)
+	__releases(inode->i_lock)
+	__acquires(inode->i_lock)
 {
 {
 	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
 	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
 	wait_queue_head_t *wqh;
 	wait_queue_head_t *wqh;
@@ -340,70 +339,119 @@ static void inode_wait_for_writeback(struct inode *inode,
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	while (inode->i_state & I_SYNC) {
 	while (inode->i_state & I_SYNC) {
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&wb->list_lock);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&wb->list_lock);
 		spin_lock(&inode->i_lock);
 		spin_lock(&inode->i_lock);
 	}
 	}
 }
 }
 
 
 /*
 /*
- * Write out an inode's dirty pages.  Called under wb->list_lock and
- * inode->i_lock.  Either the caller has an active reference on the inode or
- * the inode has I_WILL_FREE set.
- *
- * If `wait' is set, wait on the writeout.
- *
- * The whole writeout design is quite complex and fragile.  We want to avoid
- * starvation of particular inodes when others are being redirtied, prevent
- * livelocks, etc.
+ * Wait for writeback on an inode to complete. Caller must have inode pinned.
  */
  */
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
-		       struct writeback_control *wbc)
+void inode_wait_for_writeback(struct inode *inode)
 {
 {
-	struct address_space *mapping = inode->i_mapping;
-	long nr_to_write = wbc->nr_to_write;
-	unsigned dirty;
-	int ret;
+	spin_lock(&inode->i_lock);
+	__inode_wait_for_writeback(inode);
+	spin_unlock(&inode->i_lock);
+}
 
 
-	assert_spin_locked(&wb->list_lock);
-	assert_spin_locked(&inode->i_lock);
+/*
+ * Sleep until I_SYNC is cleared. This function must be called with i_lock
+ * held and drops it. It is aimed for callers not holding any inode reference
+ * so once i_lock is dropped, inode can go away.
+ */
+static void inode_sleep_on_writeback(struct inode *inode)
+	__releases(inode->i_lock)
+{
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
+	int sleep;
 
 
-	if (!atomic_read(&inode->i_count))
-		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
-	else
-		WARN_ON(inode->i_state & I_WILL_FREE);
+	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+	sleep = inode->i_state & I_SYNC;
+	spin_unlock(&inode->i_lock);
+	if (sleep)
+		schedule();
+	finish_wait(wqh, &wait);
+}
 
 
-	if (inode->i_state & I_SYNC) {
+/*
+ * Find proper writeback list for the inode depending on its current state and
+ * possibly also change of its state while we were doing writeback.  Here we
+ * handle things such as livelock prevention or fairness of writeback among
+ * inodes. This function can be called only by flusher thread - noone else
+ * processes all inodes in writeback lists and requeueing inodes behind flusher
+ * thread's back can have unexpected consequences.
+ */
+static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
+			  struct writeback_control *wbc)
+{
+	if (inode->i_state & I_FREEING)
+		return;
+
+	/*
+	 * Sync livelock prevention. Each inode is tagged and synced in one
+	 * shot. If still dirty, it will be redirty_tail()'ed below.  Update
+	 * the dirty time to prevent enqueue and sync it again.
+	 */
+	if ((inode->i_state & I_DIRTY) &&
+	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
+		inode->dirtied_when = jiffies;
+
+	if (wbc->pages_skipped) {
 		/*
 		/*
-		 * If this inode is locked for writeback and we are not doing
-		 * writeback-for-data-integrity, move it to b_more_io so that
-		 * writeback can proceed with the other inodes on s_io.
-		 *
-		 * We'll have another go at writing back this inode when we
-		 * completed a full scan of b_io.
+		 * writeback is not making progress due to locked
+		 * buffers. Skip this inode for now.
 		 */
 		 */
-		if (wbc->sync_mode != WB_SYNC_ALL) {
+		redirty_tail(inode, wb);
+		return;
+	}
+
+	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+		/*
+		 * We didn't write back all the pages.  nfs_writepages()
+		 * sometimes bales out without doing anything.
+		 */
+		if (wbc->nr_to_write <= 0) {
+			/* Slice used up. Queue for next turn. */
 			requeue_io(inode, wb);
 			requeue_io(inode, wb);
-			trace_writeback_single_inode_requeue(inode, wbc,
-							     nr_to_write);
-			return 0;
+		} else {
+			/*
+			 * Writeback blocked by something other than
+			 * congestion. Delay the inode for some time to
+			 * avoid spinning on the CPU (100% iowait)
+			 * retrying writeback of the dirty page/inode
+			 * that cannot be performed immediately.
+			 */
+			redirty_tail(inode, wb);
 		}
 		}
-
+	} else if (inode->i_state & I_DIRTY) {
 		/*
 		/*
-		 * It's a data-integrity sync.  We must wait.
+		 * Filesystems can dirty the inode during writeback operations,
+		 * such as delayed allocation during submission or metadata
+		 * updates after data IO completion.
 		 */
 		 */
-		inode_wait_for_writeback(inode, wb);
+		redirty_tail(inode, wb);
+	} else {
+		/* The inode is clean. Remove from writeback lists. */
+		list_del_init(&inode->i_wb_list);
 	}
 	}
+}
 
 
-	BUG_ON(inode->i_state & I_SYNC);
+/*
+ * Write out an inode and its dirty pages. Do not update the writeback list
+ * linkage. That is left to the caller. The caller is also responsible for
+ * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ */
+static int
+__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+			 struct writeback_control *wbc)
+{
+	struct address_space *mapping = inode->i_mapping;
+	long nr_to_write = wbc->nr_to_write;
+	unsigned dirty;
+	int ret;
 
 
-	/* Set I_SYNC, reset I_DIRTY_PAGES */
-	inode->i_state |= I_SYNC;
-	inode->i_state &= ~I_DIRTY_PAGES;
-	spin_unlock(&inode->i_lock);
-	spin_unlock(&wb->list_lock);
+	WARN_ON(!(inode->i_state & I_SYNC));
 
 
 	ret = do_writepages(mapping, wbc);
 	ret = do_writepages(mapping, wbc);
 
 
@@ -424,6 +472,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	 * write_inode()
 	 * write_inode()
 	 */
 	 */
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode->i_lock);
+	/* Clear I_DIRTY_PAGES if we've written out all dirty pages */
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+		inode->i_state &= ~I_DIRTY_PAGES;
 	dirty = inode->i_state & I_DIRTY;
 	dirty = inode->i_state & I_DIRTY;
 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode->i_lock);
@@ -433,60 +484,67 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 		if (ret == 0)
 		if (ret == 0)
 			ret = err;
 			ret = err;
 	}
 	}
+	trace_writeback_single_inode(inode, wbc, nr_to_write);
+	return ret;
+}
+
+/*
+ * Write out an inode's dirty pages. Either the caller has an active reference
+ * on the inode or the inode has I_WILL_FREE set.
+ *
+ * This function is designed to be called for writing back one inode which
+ * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
+ * and does more profound writeback list handling in writeback_sb_inodes().
+ */
+static int
+writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
+		       struct writeback_control *wbc)
+{
+	int ret = 0;
 
 
-	spin_lock(&wb->list_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode->i_lock);
-	inode->i_state &= ~I_SYNC;
-	if (!(inode->i_state & I_FREEING)) {
+	if (!atomic_read(&inode->i_count))
+		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
+	else
+		WARN_ON(inode->i_state & I_WILL_FREE);
+
+	if (inode->i_state & I_SYNC) {
+		if (wbc->sync_mode != WB_SYNC_ALL)
+			goto out;
 		/*
 		/*
-		 * Sync livelock prevention. Each inode is tagged and synced in
-		 * one shot. If still dirty, it will be redirty_tail()'ed below.
-		 * Update the dirty time to prevent enqueue and sync it again.
+		 * It's a data-integrity sync. We must wait. Since callers hold
+		 * inode reference or inode has I_WILL_FREE set, it cannot go
+		 * away under us.
 		 */
 		 */
-		if ((inode->i_state & I_DIRTY) &&
-		    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
-			inode->dirtied_when = jiffies;
-
-		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-			/*
-			 * We didn't write back all the pages.  nfs_writepages()
-			 * sometimes bales out without doing anything.
-			 */
-			inode->i_state |= I_DIRTY_PAGES;
-			if (wbc->nr_to_write <= 0) {
-				/*
-				 * slice used up: queue for next turn
-				 */
-				requeue_io(inode, wb);
-			} else {
-				/*
-				 * Writeback blocked by something other than
-				 * congestion. Delay the inode for some time to
-				 * avoid spinning on the CPU (100% iowait)
-				 * retrying writeback of the dirty page/inode
-				 * that cannot be performed immediately.
-				 */
-				redirty_tail(inode, wb);
-			}
-		} else if (inode->i_state & I_DIRTY) {
-			/*
-			 * Filesystems can dirty the inode during writeback
-			 * operations, such as delayed allocation during
-			 * submission or metadata updates after data IO
-			 * completion.
-			 */
-			redirty_tail(inode, wb);
-		} else {
-			/*
-			 * The inode is clean.  At this point we either have
-			 * a reference to the inode or it's on it's way out.
-			 * No need to add it back to the LRU.
-			 */
-			list_del_init(&inode->i_wb_list);
-		}
+		__inode_wait_for_writeback(inode);
 	}
 	}
+	WARN_ON(inode->i_state & I_SYNC);
+	/*
+	 * Skip inode if it is clean. We don't want to mess with writeback
+	 * lists in this function since flusher thread may be doing for example
+	 * sync in parallel and if we move the inode, it could get skipped. So
+	 * here we make sure inode is on some writeback list and leave it there
+	 * unless we have completely cleaned the inode.
+	 */
+	if (!(inode->i_state & I_DIRTY))
+		goto out;
+	inode->i_state |= I_SYNC;
+	spin_unlock(&inode->i_lock);
+
+	ret = __writeback_single_inode(inode, wb, wbc);
+
+	spin_lock(&wb->list_lock);
+	spin_lock(&inode->i_lock);
+	/*
+	 * If inode is clean, remove it from writeback lists. Otherwise don't
+	 * touch it. See comment above for explanation.
+	 */
+	if (!(inode->i_state & I_DIRTY))
+		list_del_init(&inode->i_wb_list);
+	spin_unlock(&wb->list_lock);
 	inode_sync_complete(inode);
 	inode_sync_complete(inode);
-	trace_writeback_single_inode(inode, wbc, nr_to_write);
+out:
+	spin_unlock(&inode->i_lock);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -580,29 +638,57 @@ static long writeback_sb_inodes(struct super_block *sb,
 			redirty_tail(inode, wb);
 			redirty_tail(inode, wb);
 			continue;
 			continue;
 		}
 		}
-		__iget(inode);
+		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
+			/*
+			 * If this inode is locked for writeback and we are not
+			 * doing writeback-for-data-integrity, move it to
+			 * b_more_io so that writeback can proceed with the
+			 * other inodes on s_io.
+			 *
+			 * We'll have another go at writing back this inode
+			 * when we completed a full scan of b_io.
+			 */
+			spin_unlock(&inode->i_lock);
+			requeue_io(inode, wb);
+			trace_writeback_sb_inodes_requeue(inode);
+			continue;
+		}
+		spin_unlock(&wb->list_lock);
+
+		/*
+		 * We already requeued the inode if it had I_SYNC set and we
+		 * are doing WB_SYNC_NONE writeback. So this catches only the
+		 * WB_SYNC_ALL case.
+		 */
+		if (inode->i_state & I_SYNC) {
+			/* Wait for I_SYNC. This function drops i_lock... */
+			inode_sleep_on_writeback(inode);
+			/* Inode may be gone, start again */
+			continue;
+		}
+		inode->i_state |= I_SYNC;
+		spin_unlock(&inode->i_lock);
+
 		write_chunk = writeback_chunk_size(wb->bdi, work);
 		write_chunk = writeback_chunk_size(wb->bdi, work);
 		wbc.nr_to_write = write_chunk;
 		wbc.nr_to_write = write_chunk;
 		wbc.pages_skipped = 0;
 		wbc.pages_skipped = 0;
 
 
-		writeback_single_inode(inode, wb, &wbc);
+		/*
+		 * We use I_SYNC to pin the inode in memory. While it is set
+		 * evict_inode() will wait so the inode cannot be freed.
+		 */
+		__writeback_single_inode(inode, wb, &wbc);
 
 
 		work->nr_pages -= write_chunk - wbc.nr_to_write;
 		work->nr_pages -= write_chunk - wbc.nr_to_write;
 		wrote += write_chunk - wbc.nr_to_write;
 		wrote += write_chunk - wbc.nr_to_write;
+		spin_lock(&wb->list_lock);
+		spin_lock(&inode->i_lock);
 		if (!(inode->i_state & I_DIRTY))
 		if (!(inode->i_state & I_DIRTY))
 			wrote++;
 			wrote++;
-		if (wbc.pages_skipped) {
-			/*
-			 * writeback is not making progress due to locked
-			 * buffers.  Skip this inode for now.
-			 */
-			redirty_tail(inode, wb);
-		}
+		requeue_inode(inode, wb, &wbc);
+		inode_sync_complete(inode);
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&inode->i_lock);
-		spin_unlock(&wb->list_lock);
-		iput(inode);
-		cond_resched();
-		spin_lock(&wb->list_lock);
+		cond_resched_lock(&wb->list_lock);
 		/*
 		/*
 		 * bail out to wb_writeback() often enough to check
 		 * bail out to wb_writeback() often enough to check
 		 * background threshold and other termination conditions.
 		 * background threshold and other termination conditions.
@@ -796,8 +882,10 @@ static long wb_writeback(struct bdi_writeback *wb,
 			trace_writeback_wait(wb->bdi, work);
 			trace_writeback_wait(wb->bdi, work);
 			inode = wb_inode(wb->b_more_io.prev);
 			inode = wb_inode(wb->b_more_io.prev);
 			spin_lock(&inode->i_lock);
 			spin_lock(&inode->i_lock);
-			inode_wait_for_writeback(inode, wb);
-			spin_unlock(&inode->i_lock);
+			spin_unlock(&wb->list_lock);
+			/* This function drops i_lock... */
+			inode_sleep_on_writeback(inode);
+			spin_lock(&wb->list_lock);
 		}
 		}
 	}
 	}
 	spin_unlock(&wb->list_lock);
 	spin_unlock(&wb->list_lock);
@@ -1331,7 +1419,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
 int write_inode_now(struct inode *inode, int sync)
 int write_inode_now(struct inode *inode, int sync)
 {
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-	int ret;
 	struct writeback_control wbc = {
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
 		.nr_to_write = LONG_MAX,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -1343,12 +1430,7 @@ int write_inode_now(struct inode *inode, int sync)
 		wbc.nr_to_write = 0;
 		wbc.nr_to_write = 0;
 
 
 	might_sleep();
 	might_sleep();
-	spin_lock(&wb->list_lock);
-	spin_lock(&inode->i_lock);
-	ret = writeback_single_inode(inode, wb, &wbc);
-	spin_unlock(&inode->i_lock);
-	spin_unlock(&wb->list_lock);
-	return ret;
+	return writeback_single_inode(inode, wb, &wbc);
 }
 }
 EXPORT_SYMBOL(write_inode_now);
 EXPORT_SYMBOL(write_inode_now);
 
 
@@ -1365,15 +1447,7 @@ EXPORT_SYMBOL(write_inode_now);
  */
  */
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 int sync_inode(struct inode *inode, struct writeback_control *wbc)
 {
 {
-	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-	int ret;
-
-	spin_lock(&wb->list_lock);
-	spin_lock(&inode->i_lock);
-	ret = writeback_single_inode(inode, wb, wbc);
-	spin_unlock(&inode->i_lock);
-	spin_unlock(&wb->list_lock);
-	return ret;
+	return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
 }
 }
 EXPORT_SYMBOL(sync_inode);
 EXPORT_SYMBOL(sync_inode);
 
 

+ 1 - 1
fs/fuse/inode.c

@@ -122,7 +122,7 @@ static void fuse_destroy_inode(struct inode *inode)
 static void fuse_evict_inode(struct inode *inode)
 static void fuse_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);

+ 1 - 1
fs/gfs2/super.c

@@ -1554,7 +1554,7 @@ out_unlock:
 out:
 out:
 	/* Case 3 starts here */
 	/* Case 3 starts here */
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
 	ip->i_gl->gl_object = NULL;
 	flush_delayed_work_sync(&ip->i_gl->gl_work);
 	flush_delayed_work_sync(&ip->i_gl->gl_work);

+ 1 - 1
fs/hfs/inode.c

@@ -532,7 +532,7 @@ out:
 void hfs_evict_inode(struct inode *inode)
 void hfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
 	if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
 		HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFS_I(inode)->rsrc_inode);
 		iput(HFS_I(inode)->rsrc_inode);

+ 1 - 1
fs/hfsplus/super.c

@@ -154,7 +154,7 @@ static void hfsplus_evict_inode(struct inode *inode)
 {
 {
 	dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
 	dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
 	if (HFSPLUS_IS_RSRC(inode)) {
 		HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFSPLUS_I(inode)->rsrc_inode);
 		iput(HFSPLUS_I(inode)->rsrc_inode);

+ 1 - 1
fs/hostfs/hostfs_kern.c

@@ -240,7 +240,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 static void hostfs_evict_inode(struct inode *inode)
 static void hostfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (HOSTFS_I(inode)->fd != -1) {
 	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
 		HOSTFS_I(inode)->fd = -1;

+ 1 - 1
fs/hpfs/inode.c

@@ -299,7 +299,7 @@ void hpfs_write_if_changed(struct inode *inode)
 void hpfs_evict_inode(struct inode *inode)
 void hpfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink) {
 	if (!inode->i_nlink) {
 		hpfs_lock(inode->i_sb);
 		hpfs_lock(inode->i_sb);
 		hpfs_remove_fnode(inode->i_sb, inode->i_ino);
 		hpfs_remove_fnode(inode->i_sb, inode->i_ino);

+ 1 - 1
fs/hppfs/hppfs.c

@@ -614,7 +614,7 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
 
 
 void hppfs_evict_inode(struct inode *ino)
 void hppfs_evict_inode(struct inode *ino)
 {
 {
-	end_writeback(ino);
+	clear_inode(ino);
 	dput(HPPFS_I(ino)->proc_dentry);
 	dput(HPPFS_I(ino)->proc_dentry);
 	mntput(ino->i_sb->s_fs_info);
 	mntput(ino->i_sb->s_fs_info);
 }
 }

+ 1 - 1
fs/hugetlbfs/inode.c

@@ -393,7 +393,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 static void hugetlbfs_evict_inode(struct inode *inode)
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_hugepages(inode, 0);
 	truncate_hugepages(inode, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 }
 }
 
 
 static inline void
 static inline void

+ 11 - 4
fs/inode.c

@@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode)
 }
 }
 EXPORT_SYMBOL(__remove_inode_hash);
 EXPORT_SYMBOL(__remove_inode_hash);
 
 
-void end_writeback(struct inode *inode)
+void clear_inode(struct inode *inode)
 {
 {
 	might_sleep();
 	might_sleep();
 	/*
 	/*
@@ -500,11 +500,10 @@ void end_writeback(struct inode *inode)
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	BUG_ON(inode->i_state & I_CLEAR);
-	inode_sync_wait(inode);
 	/* don't need i_lock here, no concurrent mods to i_state */
 	/* don't need i_lock here, no concurrent mods to i_state */
 	inode->i_state = I_FREEING | I_CLEAR;
 	inode->i_state = I_FREEING | I_CLEAR;
 }
 }
-EXPORT_SYMBOL(end_writeback);
+EXPORT_SYMBOL(clear_inode);
 
 
 /*
 /*
  * Free the inode passed in, removing it from the lists it is still connected
  * Free the inode passed in, removing it from the lists it is still connected
@@ -531,12 +530,20 @@ static void evict(struct inode *inode)
 
 
 	inode_sb_list_del(inode);
 	inode_sb_list_del(inode);
 
 
+	/*
+	 * Wait for flusher thread to be done with the inode so that filesystem
+	 * does not start destroying it while writeback is still running. Since
+	 * the inode has I_FREEING set, flusher thread won't start new work on
+	 * the inode.  We just have to wait for running writeback to finish.
+	 */
+	inode_wait_for_writeback(inode);
+
 	if (op->evict_inode) {
 	if (op->evict_inode) {
 		op->evict_inode(inode);
 		op->evict_inode(inode);
 	} else {
 	} else {
 		if (inode->i_data.nrpages)
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
 			truncate_inode_pages(&inode->i_data, 0);
-		end_writeback(inode);
+		clear_inode(inode);
 	}
 	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
 		bd_forget(inode);

+ 1 - 1
fs/jffs2/fs.c

@@ -240,7 +240,7 @@ void jffs2_evict_inode (struct inode *inode)
 	jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
 	jffs2_dbg(1, "%s(): ino #%lu mode %o\n",
 		  __func__, inode->i_ino, inode->i_mode);
 		  __func__, inode->i_ino, inode->i_mode);
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	jffs2_do_clear_inode(c, f);
 	jffs2_do_clear_inode(c, f);
 }
 }
 
 

+ 1 - 1
fs/jfs/inode.c

@@ -169,7 +169,7 @@ void jfs_evict_inode(struct inode *inode)
 	} else {
 	} else {
 		truncate_inode_pages(&inode->i_data, 0);
 		truncate_inode_pages(&inode->i_data, 0);
 	}
 	}
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 	dquot_drop(inode);
 }
 }
 
 

+ 1 - 1
fs/logfs/readwrite.c

@@ -2175,7 +2175,7 @@ void logfs_evict_inode(struct inode *inode)
 		}
 		}
 	}
 	}
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	/* Cheaper version of write_inode.  All changes are concealed in
 	/* Cheaper version of write_inode.  All changes are concealed in
 	 * aliases, which are moved back.  No write to the medium happens.
 	 * aliases, which are moved back.  No write to the medium happens.

+ 1 - 1
fs/minix/inode.c

@@ -32,7 +32,7 @@ static void minix_evict_inode(struct inode *inode)
 		minix_truncate(inode);
 		minix_truncate(inode);
 	}
 	}
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink)
 	if (!inode->i_nlink)
 		minix_free_inode(inode);
 		minix_free_inode(inode);
 }
 }

+ 1 - 1
fs/ncpfs/inode.c

@@ -292,7 +292,7 @@ static void
 ncp_evict_inode(struct inode *inode)
 ncp_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	if (S_ISDIR(inode->i_mode)) {
 	if (S_ISDIR(inode->i_mode)) {
 		DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
 		DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);

+ 2 - 2
fs/nfs/inode.c

@@ -121,7 +121,7 @@ static void nfs_clear_inode(struct inode *inode)
 void nfs_evict_inode(struct inode *inode)
 void nfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	nfs_clear_inode(inode);
 	nfs_clear_inode(inode);
 }
 }
 
 
@@ -1500,7 +1500,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 void nfs4_evict_inode(struct inode *inode)
 void nfs4_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	pnfs_return_layout(inode);
 	pnfs_return_layout(inode);
 	pnfs_destroy_layout(NFS_I(inode));
 	pnfs_destroy_layout(NFS_I(inode));
 	/* If we are holding a delegation, return it! */
 	/* If we are holding a delegation, return it! */

+ 2 - 2
fs/nilfs2/inode.c

@@ -734,7 +734,7 @@ void nilfs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
 	if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
 		if (inode->i_data.nrpages)
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
 			truncate_inode_pages(&inode->i_data, 0);
-		end_writeback(inode);
+		clear_inode(inode);
 		nilfs_clear_inode(inode);
 		nilfs_clear_inode(inode);
 		return;
 		return;
 	}
 	}
@@ -746,7 +746,7 @@ void nilfs_evict_inode(struct inode *inode)
 	/* TODO: some of the following operations may fail.  */
 	/* TODO: some of the following operations may fail.  */
 	nilfs_truncate_bmap(ii, 0);
 	nilfs_truncate_bmap(ii, 0);
 	nilfs_mark_inode_dirty(inode);
 	nilfs_mark_inode_dirty(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
 	ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
 	if (!ret)
 	if (!ret)

+ 1 - 1
fs/ntfs/inode.c

@@ -2258,7 +2258,7 @@ void ntfs_evict_big_inode(struct inode *vi)
 	ntfs_inode *ni = NTFS_I(vi);
 	ntfs_inode *ni = NTFS_I(vi);
 
 
 	truncate_inode_pages(&vi->i_data, 0);
 	truncate_inode_pages(&vi->i_data, 0);
-	end_writeback(vi);
+	clear_inode(vi);
 
 
 #ifdef NTFS_RW
 #ifdef NTFS_RW
 	if (NInoDirty(ni)) {
 	if (NInoDirty(ni)) {

+ 1 - 1
fs/ocfs2/dlmfs/dlmfs.c

@@ -367,7 +367,7 @@ static void dlmfs_evict_inode(struct inode *inode)
 	int status;
 	int status;
 	struct dlmfs_inode_private *ip;
 	struct dlmfs_inode_private *ip;
 
 
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	mlog(0, "inode %lu\n", inode->i_ino);
 	mlog(0, "inode %lu\n", inode->i_ino);
 
 

+ 1 - 1
fs/ocfs2/inode.c

@@ -1069,7 +1069,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 	int status;
 	int status;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 
-	end_writeback(inode);
+	clear_inode(inode);
 	trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
 	trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
 				inode->i_nlink);
 				inode->i_nlink);
 
 

+ 1 - 1
fs/omfs/inode.c

@@ -184,7 +184,7 @@ int omfs_sync_inode(struct inode *inode)
 static void omfs_evict_inode(struct inode *inode)
 static void omfs_evict_inode(struct inode *inode)
 {
 {
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	if (inode->i_nlink)
 	if (inode->i_nlink)
 		return;
 		return;

+ 1 - 1
fs/proc/inode.c

@@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode)
 	const struct proc_ns_operations *ns_ops;
 	const struct proc_ns_operations *ns_ops;
 
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	/* Stop tracking associated processes */
 	/* Stop tracking associated processes */
 	put_pid(PROC_I(inode)->pid);
 	put_pid(PROC_I(inode)->pid);

+ 1 - 1
fs/pstore/inode.c

@@ -85,7 +85,7 @@ static void pstore_evict_inode(struct inode *inode)
 	struct pstore_private	*p = inode->i_private;
 	struct pstore_private	*p = inode->i_private;
 	unsigned long		flags;
 	unsigned long		flags;
 
 
-	end_writeback(inode);
+	clear_inode(inode);
 	if (p) {
 	if (p) {
 		spin_lock_irqsave(&allpstore_lock, flags);
 		spin_lock_irqsave(&allpstore_lock, flags);
 		list_del(&p->list);
 		list_del(&p->list);

+ 2 - 2
fs/reiserfs/inode.c

@@ -76,14 +76,14 @@ void reiserfs_evict_inode(struct inode *inode)
 		;
 		;
 	}
 	}
       out:
       out:
-	end_writeback(inode);	/* note this must go after the journal_end to prevent deadlock */
+	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
 	dquot_drop(inode);
 	dquot_drop(inode);
 	inode->i_blocks = 0;
 	inode->i_blocks = 0;
 	reiserfs_write_unlock_once(inode->i_sb, depth);
 	reiserfs_write_unlock_once(inode->i_sb, depth);
 	return;
 	return;
 
 
 no_delete:
 no_delete:
-	end_writeback(inode);
+	clear_inode(inode);
 	dquot_drop(inode);
 	dquot_drop(inode);
 }
 }
 
 

+ 1 - 1
fs/sysfs/inode.c

@@ -310,7 +310,7 @@ void sysfs_evict_inode(struct inode *inode)
 	struct sysfs_dirent *sd  = inode->i_private;
 	struct sysfs_dirent *sd  = inode->i_private;
 
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	sysfs_put(sd);
 	sysfs_put(sd);
 }
 }
 
 

+ 1 - 1
fs/sysv/inode.c

@@ -316,7 +316,7 @@ static void sysv_evict_inode(struct inode *inode)
 		sysv_truncate(inode);
 		sysv_truncate(inode);
 	}
 	}
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (!inode->i_nlink)
 	if (!inode->i_nlink)
 		sysv_free_inode(inode);
 		sysv_free_inode(inode);
 }
 }

+ 1 - 1
fs/ubifs/super.c

@@ -378,7 +378,7 @@ out:
 		smp_wmb();
 		smp_wmb();
 	}
 	}
 done:
 done:
-	end_writeback(inode);
+	clear_inode(inode);
 }
 }
 
 
 static void ubifs_dirty_inode(struct inode *inode, int flags)
 static void ubifs_dirty_inode(struct inode *inode, int flags)

+ 1 - 1
fs/udf/inode.c

@@ -80,7 +80,7 @@ void udf_evict_inode(struct inode *inode)
 	} else
 	} else
 		truncate_inode_pages(&inode->i_data, 0);
 		truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	    inode->i_size != iinfo->i_lenExtents) {
 	    inode->i_size != iinfo->i_lenExtents) {
 		udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
 		udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",

+ 1 - 1
fs/ufs/inode.c

@@ -895,7 +895,7 @@ void ufs_evict_inode(struct inode * inode)
 	}
 	}
 
 
 	invalidate_inode_buffers(inode);
 	invalidate_inode_buffers(inode);
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	if (want_delete) {
 	if (want_delete) {
 		lock_ufs(inode->i_sb);
 		lock_ufs(inode->i_sb);

+ 1 - 1
fs/xfs/xfs_super.c

@@ -932,7 +932,7 @@ xfs_fs_evict_inode(
 	trace_xfs_evict_inode(ip);
 	trace_xfs_evict_inode(ip);
 
 
 	truncate_inode_pages(&inode->i_data, 0);
 	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
+	clear_inode(inode);
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_DEC(vn_active);
 	XFS_STATS_DEC(vn_active);

+ 7 - 6
include/linux/fs.h

@@ -1764,8 +1764,8 @@ struct super_operations {
  * I_FREEING		Set when inode is about to be freed but still has dirty
  * I_FREEING		Set when inode is about to be freed but still has dirty
  *			pages or buffers attached or the inode itself is still
  *			pages or buffers attached or the inode itself is still
  *			dirty.
  *			dirty.
- * I_CLEAR		Added by end_writeback().  In this state the inode is clean
- *			and can be destroyed.  Inode keeps I_FREEING.
+ * I_CLEAR		Added by clear_inode().  In this state the inode is
+ *			clean and can be destroyed.  Inode keeps I_FREEING.
  *
  *
  *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
  *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
  *			prohibited for many purposes.  iget() must wait for
  *			prohibited for many purposes.  iget() must wait for
@@ -1773,9 +1773,10 @@ struct super_operations {
  *			anew.  Other functions will just ignore such inodes,
  *			anew.  Other functions will just ignore such inodes,
  *			if appropriate.  I_NEW is used for waiting.
  *			if appropriate.  I_NEW is used for waiting.
  *
  *
- * I_SYNC		Synchonized write of dirty inode data.  The bits is
- *			set during data writeback, and cleared with a wakeup
- *			on the bit address once it is done.
+ * I_SYNC		Writeback of inode is running. The bit is set during
+ *			data writeback, and cleared with a wakeup on the bit
+ *			address once it is done. The bit is also used to pin
+ *			the inode in memory for flusher thread.
  *
  *
  * I_REFERENCED		Marks the inode as recently references on the LRU list.
  * I_REFERENCED		Marks the inode as recently references on the LRU list.
  *
  *
@@ -2349,7 +2350,7 @@ extern unsigned int get_next_ino(void);
 
 
 extern void __iget(struct inode * inode);
 extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void iget_failed(struct inode *);
-extern void end_writeback(struct inode *);
+extern void clear_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode_pseudo(struct super_block *sb);
 extern struct inode *new_inode_pseudo(struct super_block *sb);
 extern struct inode *new_inode(struct super_block *sb);
 extern struct inode *new_inode(struct super_block *sb);

+ 3 - 7
include/linux/writeback.h

@@ -58,7 +58,6 @@ extern const char *wb_reason_name[];
  * in a manner such that unspecified fields are set to zero.
  * in a manner such that unspecified fields are set to zero.
  */
  */
 struct writeback_control {
 struct writeback_control {
-	enum writeback_sync_modes sync_mode;
 	long nr_to_write;		/* Write this many pages, and decrement
 	long nr_to_write;		/* Write this many pages, and decrement
 					   this for each page written */
 					   this for each page written */
 	long pages_skipped;		/* Pages which were not written */
 	long pages_skipped;		/* Pages which were not written */
@@ -71,6 +70,8 @@ struct writeback_control {
 	loff_t range_start;
 	loff_t range_start;
 	loff_t range_end;
 	loff_t range_end;
 
 
+	enum writeback_sync_modes sync_mode;
+
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_background:1;	/* A background writeback */
 	unsigned for_background:1;	/* A background writeback */
 	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
 	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
@@ -94,6 +95,7 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 				enum wb_reason reason);
 				enum wb_reason reason);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
+void inode_wait_for_writeback(struct inode *inode);
 
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
 static inline void wait_on_inode(struct inode *inode)
@@ -101,12 +103,6 @@ static inline void wait_on_inode(struct inode *inode)
 	might_sleep();
 	might_sleep();
 	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 }
-static inline void inode_sync_wait(struct inode *inode)
-{
-	might_sleep();
-	wait_on_bit(&inode->i_state, __I_SYNC, inode_wait,
-							TASK_UNINTERRUPTIBLE);
-}
 
 
 
 
 /*
 /*

+ 29 - 7
include/trace/events/writeback.h

@@ -372,6 +372,35 @@ TRACE_EVENT(balance_dirty_pages,
 	  )
 	  )
 );
 );
 
 
+TRACE_EVENT(writeback_sb_inodes_requeue,
+
+	TP_PROTO(struct inode *inode),
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned long, ino)
+		__field(unsigned long, state)
+		__field(unsigned long, dirtied_when)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name,
+		        dev_name(inode_to_bdi(inode)->dev), 32);
+		__entry->ino		= inode->i_ino;
+		__entry->state		= inode->i_state;
+		__entry->dirtied_when	= inode->dirtied_when;
+	),
+
+	TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu",
+		  __entry->name,
+		  __entry->ino,
+		  show_inode_state(__entry->state),
+		  __entry->dirtied_when,
+		  (jiffies - __entry->dirtied_when) / HZ
+	)
+);
+
 DECLARE_EVENT_CLASS(writeback_congest_waited_template,
 DECLARE_EVENT_CLASS(writeback_congest_waited_template,
 
 
 	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
 	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
@@ -450,13 +479,6 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
 	)
 	)
 );
 );
 
 
-DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_requeue,
-	TP_PROTO(struct inode *inode,
-		 struct writeback_control *wbc,
-		 unsigned long nr_to_write),
-	TP_ARGS(inode, wbc, nr_to_write)
-);
-
 DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
 DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
 	TP_PROTO(struct inode *inode,
 	TP_PROTO(struct inode *inode,
 		 struct writeback_control *wbc,
 		 struct writeback_control *wbc,

+ 1 - 1
ipc/mqueue.c

@@ -251,7 +251,7 @@ static void mqueue_evict_inode(struct inode *inode)
 	int i;
 	int i;
 	struct ipc_namespace *ipc_ns;
 	struct ipc_namespace *ipc_ns;
 
 
-	end_writeback(inode);
+	clear_inode(inode);
 
 
 	if (S_ISDIR(inode->i_mode))
 	if (S_ISDIR(inode->i_mode))
 		return;
 		return;

+ 2 - 1
mm/page-writeback.c

@@ -204,7 +204,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
  * Returns the global number of pages potentially available for dirty
  * Returns the global number of pages potentially available for dirty
  * page cache.  This is the base value for the global dirty limits.
  * page cache.  This is the base value for the global dirty limits.
  */
  */
-unsigned long global_dirtyable_memory(void)
+static unsigned long global_dirtyable_memory(void)
 {
 {
 	unsigned long x;
 	unsigned long x;
 
 
@@ -1568,6 +1568,7 @@ void writeback_set_ratelimit(void)
 	unsigned long background_thresh;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long dirty_thresh;
 	global_dirty_limits(&background_thresh, &dirty_thresh);
 	global_dirty_limits(&background_thresh, &dirty_thresh);
+	global_dirty_limit = dirty_thresh;
 	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
 	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
 	if (ratelimit_pages < 16)
 	if (ratelimit_pages < 16)
 		ratelimit_pages = 16;
 		ratelimit_pages = 16;

+ 1 - 1
mm/shmem.c

@@ -597,7 +597,7 @@ static void shmem_evict_inode(struct inode *inode)
 	}
 	}
 	BUG_ON(inode->i_blocks);
 	BUG_ON(inode->i_blocks);
 	shmem_free_inode(inode->i_sb);
 	shmem_free_inode(inode->i_sb);
-	end_writeback(inode);
+	clear_inode(inode);
 }
 }
 
 
 /*
 /*