|
@@ -35,7 +35,9 @@
|
|
|
struct wb_writeback_work {
|
|
|
long nr_pages;
|
|
|
struct super_block *sb;
|
|
|
+ unsigned long *older_than_this;
|
|
|
enum writeback_sync_modes sync_mode;
|
|
|
+ unsigned int tagged_writepages:1;
|
|
|
unsigned int for_kupdate:1;
|
|
|
unsigned int range_cyclic:1;
|
|
|
unsigned int for_background:1;
|
|
@@ -180,12 +182,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
|
|
|
*/
|
|
|
void inode_wb_list_del(struct inode *inode)
|
|
|
{
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
|
|
|
+
|
|
|
+ spin_lock(&bdi->wb.list_lock);
|
|
|
list_del_init(&inode->i_wb_list);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&bdi->wb.list_lock);
|
|
|
}
|
|
|
|
|
|
-
|
|
|
/*
|
|
|
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the
|
|
|
* furthest end of its superblock's dirty-inode list.
|
|
@@ -195,11 +198,9 @@ void inode_wb_list_del(struct inode *inode)
|
|
|
* the case then the inode must have been redirtied while it was being written
|
|
|
* out and we don't reset its dirtied_when.
|
|
|
*/
|
|
|
-static void redirty_tail(struct inode *inode)
|
|
|
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
|
|
|
{
|
|
|
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
|
|
|
-
|
|
|
- assert_spin_locked(&inode_wb_list_lock);
|
|
|
+ assert_spin_locked(&wb->list_lock);
|
|
|
if (!list_empty(&wb->b_dirty)) {
|
|
|
struct inode *tail;
|
|
|
|
|
@@ -213,11 +214,9 @@ static void redirty_tail(struct inode *inode)
|
|
|
/*
|
|
|
* requeue inode for re-scanning after bdi->b_io list is exhausted.
|
|
|
*/
|
|
|
-static void requeue_io(struct inode *inode)
|
|
|
+static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
|
|
|
{
|
|
|
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
|
|
|
-
|
|
|
- assert_spin_locked(&inode_wb_list_lock);
|
|
|
+ assert_spin_locked(&wb->list_lock);
|
|
|
list_move(&inode->i_wb_list, &wb->b_more_io);
|
|
|
}
|
|
|
|
|
@@ -225,7 +224,7 @@ static void inode_sync_complete(struct inode *inode)
|
|
|
{
|
|
|
/*
|
|
|
* Prevent speculative execution through
|
|
|
- * spin_unlock(&inode_wb_list_lock);
|
|
|
+ * spin_unlock(&wb->list_lock);
|
|
|
*/
|
|
|
|
|
|
smp_mb();
|
|
@@ -250,15 +249,16 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
|
|
|
/*
|
|
|
* Move expired dirty inodes from @delaying_queue to @dispatch_queue.
|
|
|
*/
|
|
|
-static void move_expired_inodes(struct list_head *delaying_queue,
|
|
|
+static int move_expired_inodes(struct list_head *delaying_queue,
|
|
|
struct list_head *dispatch_queue,
|
|
|
- unsigned long *older_than_this)
|
|
|
+ unsigned long *older_than_this)
|
|
|
{
|
|
|
LIST_HEAD(tmp);
|
|
|
struct list_head *pos, *node;
|
|
|
struct super_block *sb = NULL;
|
|
|
struct inode *inode;
|
|
|
int do_sb_sort = 0;
|
|
|
+ int moved = 0;
|
|
|
|
|
|
while (!list_empty(delaying_queue)) {
|
|
|
inode = wb_inode(delaying_queue->prev);
|
|
@@ -269,12 +269,13 @@ static void move_expired_inodes(struct list_head *delaying_queue,
|
|
|
do_sb_sort = 1;
|
|
|
sb = inode->i_sb;
|
|
|
list_move(&inode->i_wb_list, &tmp);
|
|
|
+ moved++;
|
|
|
}
|
|
|
|
|
|
/* just one sb in list, splice to dispatch_queue and we're done */
|
|
|
if (!do_sb_sort) {
|
|
|
list_splice(&tmp, dispatch_queue);
|
|
|
- return;
|
|
|
+ goto out;
|
|
|
}
|
|
|
|
|
|
/* Move inodes from one superblock together */
|
|
@@ -286,6 +287,8 @@ static void move_expired_inodes(struct list_head *delaying_queue,
|
|
|
list_move(&inode->i_wb_list, dispatch_queue);
|
|
|
}
|
|
|
}
|
|
|
+out:
|
|
|
+ return moved;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -301,9 +304,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
|
|
|
*/
|
|
|
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
|
|
|
{
|
|
|
- assert_spin_locked(&inode_wb_list_lock);
|
|
|
+ int moved;
|
|
|
+ assert_spin_locked(&wb->list_lock);
|
|
|
list_splice_init(&wb->b_more_io, &wb->b_io);
|
|
|
- move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
|
|
|
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
|
|
|
+ trace_writeback_queue_io(wb, older_than_this, moved);
|
|
|
}
|
|
|
|
|
|
static int write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
@@ -316,7 +321,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
/*
|
|
|
* Wait for writeback on an inode to complete.
|
|
|
*/
|
|
|
-static void inode_wait_for_writeback(struct inode *inode)
|
|
|
+static void inode_wait_for_writeback(struct inode *inode,
|
|
|
+ struct bdi_writeback *wb)
|
|
|
{
|
|
|
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
|
|
|
wait_queue_head_t *wqh;
|
|
@@ -324,15 +330,15 @@ static void inode_wait_for_writeback(struct inode *inode)
|
|
|
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
|
|
|
while (inode->i_state & I_SYNC) {
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
spin_lock(&inode->i_lock);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Write out an inode's dirty pages. Called under inode_wb_list_lock and
|
|
|
+ * Write out an inode's dirty pages. Called under wb->list_lock and
|
|
|
* inode->i_lock. Either the caller has an active reference on the inode or
|
|
|
* the inode has I_WILL_FREE set.
|
|
|
*
|
|
@@ -343,13 +349,15 @@ static void inode_wait_for_writeback(struct inode *inode)
|
|
|
* livelocks, etc.
|
|
|
*/
|
|
|
static int
|
|
|
-writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
+writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
|
|
|
+ struct writeback_control *wbc)
|
|
|
{
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
+ long nr_to_write = wbc->nr_to_write;
|
|
|
unsigned dirty;
|
|
|
int ret;
|
|
|
|
|
|
- assert_spin_locked(&inode_wb_list_lock);
|
|
|
+ assert_spin_locked(&wb->list_lock);
|
|
|
assert_spin_locked(&inode->i_lock);
|
|
|
|
|
|
if (!atomic_read(&inode->i_count))
|
|
@@ -367,14 +375,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
* completed a full scan of b_io.
|
|
|
*/
|
|
|
if (wbc->sync_mode != WB_SYNC_ALL) {
|
|
|
- requeue_io(inode);
|
|
|
+ requeue_io(inode, wb);
|
|
|
+ trace_writeback_single_inode_requeue(inode, wbc,
|
|
|
+ nr_to_write);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* It's a data-integrity sync. We must wait.
|
|
|
*/
|
|
|
- inode_wait_for_writeback(inode);
|
|
|
+ inode_wait_for_writeback(inode, wb);
|
|
|
}
|
|
|
|
|
|
BUG_ON(inode->i_state & I_SYNC);
|
|
@@ -383,7 +393,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
inode->i_state |= I_SYNC;
|
|
|
inode->i_state &= ~I_DIRTY_PAGES;
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
|
|
|
ret = do_writepages(mapping, wbc);
|
|
|
|
|
@@ -414,10 +424,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
ret = err;
|
|
|
}
|
|
|
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
spin_lock(&inode->i_lock);
|
|
|
inode->i_state &= ~I_SYNC;
|
|
|
if (!(inode->i_state & I_FREEING)) {
|
|
|
+ /*
|
|
|
+ * Sync livelock prevention. Each inode is tagged and synced in
|
|
|
+ * one shot. If still dirty, it will be redirty_tail()'ed below.
|
|
|
+ * Update the dirty time to prevent enqueue and sync it again.
|
|
|
+ */
|
|
|
+ if ((inode->i_state & I_DIRTY) &&
|
|
|
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
|
|
|
+ inode->dirtied_when = jiffies;
|
|
|
+
|
|
|
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
|
|
/*
|
|
|
* We didn't write back all the pages. nfs_writepages()
|
|
@@ -428,7 +447,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
/*
|
|
|
* slice used up: queue for next turn
|
|
|
*/
|
|
|
- requeue_io(inode);
|
|
|
+ requeue_io(inode, wb);
|
|
|
} else {
|
|
|
/*
|
|
|
* Writeback blocked by something other than
|
|
@@ -437,7 +456,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
* retrying writeback of the dirty page/inode
|
|
|
* that cannot be performed immediately.
|
|
|
*/
|
|
|
- redirty_tail(inode);
|
|
|
+ redirty_tail(inode, wb);
|
|
|
}
|
|
|
} else if (inode->i_state & I_DIRTY) {
|
|
|
/*
|
|
@@ -446,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
* submission or metadata updates after data IO
|
|
|
* completion.
|
|
|
*/
|
|
|
- redirty_tail(inode);
|
|
|
+ redirty_tail(inode, wb);
|
|
|
} else {
|
|
|
/*
|
|
|
* The inode is clean. At this point we either have
|
|
@@ -457,9 +476,41 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
}
|
|
|
}
|
|
|
inode_sync_complete(inode);
|
|
|
+ trace_writeback_single_inode(inode, wbc, nr_to_write);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static long writeback_chunk_size(struct backing_dev_info *bdi,
|
|
|
+ struct wb_writeback_work *work)
|
|
|
+{
|
|
|
+ long pages;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
|
|
|
+ * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
|
|
|
+ * here avoids calling into writeback_inodes_wb() more than once.
|
|
|
+ *
|
|
|
+ * The intended call sequence for WB_SYNC_ALL writeback is:
|
|
|
+ *
|
|
|
+ * wb_writeback()
|
|
|
+ * writeback_sb_inodes() <== called only once
|
|
|
+ * write_cache_pages() <== called once for each inode
|
|
|
+ * (quickly) tag currently dirty pages
|
|
|
+ * (maybe slowly) sync all tagged pages
|
|
|
+ */
|
|
|
+ if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
|
|
|
+ pages = LONG_MAX;
|
|
|
+ else {
|
|
|
+ pages = min(bdi->avg_write_bandwidth / 2,
|
|
|
+ global_dirty_limit / DIRTY_SCOPE);
|
|
|
+ pages = min(pages, work->nr_pages);
|
|
|
+ pages = round_down(pages + MIN_WRITEBACK_PAGES,
|
|
|
+ MIN_WRITEBACK_PAGES);
|
|
|
+ }
|
|
|
+
|
|
|
+ return pages;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Write a portion of b_io inodes which belong to @sb.
|
|
|
*
|
|
@@ -467,24 +518,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
* inodes. Otherwise write only ones which go sequentially
|
|
|
* in reverse order.
|
|
|
*
|
|
|
- * Return 1, if the caller writeback routine should be
|
|
|
- * interrupted. Otherwise return 0.
|
|
|
+ * Return the number of pages and/or inodes written.
|
|
|
*/
|
|
|
-static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
|
|
|
- struct writeback_control *wbc, bool only_this_sb)
|
|
|
+static long writeback_sb_inodes(struct super_block *sb,
|
|
|
+ struct bdi_writeback *wb,
|
|
|
+ struct wb_writeback_work *work)
|
|
|
{
|
|
|
+ struct writeback_control wbc = {
|
|
|
+ .sync_mode = work->sync_mode,
|
|
|
+ .tagged_writepages = work->tagged_writepages,
|
|
|
+ .for_kupdate = work->for_kupdate,
|
|
|
+ .for_background = work->for_background,
|
|
|
+ .range_cyclic = work->range_cyclic,
|
|
|
+ .range_start = 0,
|
|
|
+ .range_end = LLONG_MAX,
|
|
|
+ };
|
|
|
+ unsigned long start_time = jiffies;
|
|
|
+ long write_chunk;
|
|
|
+ long wrote = 0; /* count both pages and inodes */
|
|
|
+
|
|
|
while (!list_empty(&wb->b_io)) {
|
|
|
- long pages_skipped;
|
|
|
struct inode *inode = wb_inode(wb->b_io.prev);
|
|
|
|
|
|
if (inode->i_sb != sb) {
|
|
|
- if (only_this_sb) {
|
|
|
+ if (work->sb) {
|
|
|
/*
|
|
|
* We only want to write back data for this
|
|
|
* superblock, move all inodes not belonging
|
|
|
* to it back onto the dirty list.
|
|
|
*/
|
|
|
- redirty_tail(inode);
|
|
|
+ redirty_tail(inode, wb);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
@@ -493,7 +556,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
|
|
|
* Bounce back to the caller to unpin this and
|
|
|
* pin the next superblock.
|
|
|
*/
|
|
|
- return 0;
|
|
|
+ break;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -504,95 +567,91 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
|
|
|
spin_lock(&inode->i_lock);
|
|
|
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- requeue_io(inode);
|
|
|
+ redirty_tail(inode, wb);
|
|
|
continue;
|
|
|
}
|
|
|
-
|
|
|
- /*
|
|
|
- * Was this inode dirtied after sync_sb_inodes was called?
|
|
|
- * This keeps sync from extra jobs and livelock.
|
|
|
- */
|
|
|
- if (inode_dirtied_after(inode, wbc->wb_start)) {
|
|
|
- spin_unlock(&inode->i_lock);
|
|
|
- return 1;
|
|
|
- }
|
|
|
-
|
|
|
__iget(inode);
|
|
|
+ write_chunk = writeback_chunk_size(wb->bdi, work);
|
|
|
+ wbc.nr_to_write = write_chunk;
|
|
|
+ wbc.pages_skipped = 0;
|
|
|
|
|
|
- pages_skipped = wbc->pages_skipped;
|
|
|
- writeback_single_inode(inode, wbc);
|
|
|
- if (wbc->pages_skipped != pages_skipped) {
|
|
|
+ writeback_single_inode(inode, wb, &wbc);
|
|
|
+
|
|
|
+ work->nr_pages -= write_chunk - wbc.nr_to_write;
|
|
|
+ wrote += write_chunk - wbc.nr_to_write;
|
|
|
+ if (!(inode->i_state & I_DIRTY))
|
|
|
+ wrote++;
|
|
|
+ if (wbc.pages_skipped) {
|
|
|
/*
|
|
|
* writeback is not making progress due to locked
|
|
|
* buffers. Skip this inode for now.
|
|
|
*/
|
|
|
- redirty_tail(inode);
|
|
|
+ redirty_tail(inode, wb);
|
|
|
}
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
iput(inode);
|
|
|
cond_resched();
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
- if (wbc->nr_to_write <= 0) {
|
|
|
- wbc->more_io = 1;
|
|
|
- return 1;
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
+ /*
|
|
|
+ * bail out to wb_writeback() often enough to check
|
|
|
+ * background threshold and other termination conditions.
|
|
|
+ */
|
|
|
+ if (wrote) {
|
|
|
+ if (time_is_before_jiffies(start_time + HZ / 10UL))
|
|
|
+ break;
|
|
|
+ if (work->nr_pages <= 0)
|
|
|
+ break;
|
|
|
}
|
|
|
- if (!list_empty(&wb->b_more_io))
|
|
|
- wbc->more_io = 1;
|
|
|
}
|
|
|
- /* b_io is empty */
|
|
|
- return 1;
|
|
|
+ return wrote;
|
|
|
}
|
|
|
|
|
|
-void writeback_inodes_wb(struct bdi_writeback *wb,
|
|
|
- struct writeback_control *wbc)
|
|
|
+static long __writeback_inodes_wb(struct bdi_writeback *wb,
|
|
|
+ struct wb_writeback_work *work)
|
|
|
{
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- if (!wbc->wb_start)
|
|
|
- wbc->wb_start = jiffies; /* livelock avoidance */
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
- if (!wbc->for_kupdate || list_empty(&wb->b_io))
|
|
|
- queue_io(wb, wbc->older_than_this);
|
|
|
+ unsigned long start_time = jiffies;
|
|
|
+ long wrote = 0;
|
|
|
|
|
|
while (!list_empty(&wb->b_io)) {
|
|
|
struct inode *inode = wb_inode(wb->b_io.prev);
|
|
|
struct super_block *sb = inode->i_sb;
|
|
|
|
|
|
if (!grab_super_passive(sb)) {
|
|
|
- requeue_io(inode);
|
|
|
+ requeue_io(inode, wb);
|
|
|
continue;
|
|
|
}
|
|
|
- ret = writeback_sb_inodes(sb, wb, wbc, false);
|
|
|
+ wrote += writeback_sb_inodes(sb, wb, work);
|
|
|
drop_super(sb);
|
|
|
|
|
|
- if (ret)
|
|
|
- break;
|
|
|
+ /* refer to the same tests at the end of writeback_sb_inodes */
|
|
|
+ if (wrote) {
|
|
|
+ if (time_is_before_jiffies(start_time + HZ / 10UL))
|
|
|
+ break;
|
|
|
+ if (work->nr_pages <= 0)
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
/* Leave any unwritten inodes on b_io */
|
|
|
+ return wrote;
|
|
|
}
|
|
|
|
|
|
-static void __writeback_inodes_sb(struct super_block *sb,
|
|
|
- struct bdi_writeback *wb, struct writeback_control *wbc)
|
|
|
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
|
|
|
{
|
|
|
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
+ struct wb_writeback_work work = {
|
|
|
+ .nr_pages = nr_pages,
|
|
|
+ .sync_mode = WB_SYNC_NONE,
|
|
|
+ .range_cyclic = 1,
|
|
|
+ };
|
|
|
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
- if (!wbc->for_kupdate || list_empty(&wb->b_io))
|
|
|
- queue_io(wb, wbc->older_than_this);
|
|
|
- writeback_sb_inodes(sb, wb, wbc, true);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
-}
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
+ if (list_empty(&wb->b_io))
|
|
|
+ queue_io(wb, NULL);
|
|
|
+ __writeback_inodes_wb(wb, &work);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
|
|
|
-/*
|
|
|
- * The maximum number of pages to writeout in a single bdi flush/kupdate
|
|
|
- * operation. We do this so we don't hold I_SYNC against an inode for
|
|
|
- * enormous amounts of time, which would block a userspace task which has
|
|
|
- * been forced to throttle against that inode. Also, the code reevaluates
|
|
|
- * the dirty each time it has written this many pages.
|
|
|
- */
|
|
|
-#define MAX_WRITEBACK_PAGES 1024
|
|
|
+ return nr_pages - work.nr_pages;
|
|
|
+}
|
|
|
|
|
|
static inline bool over_bground_thresh(void)
|
|
|
{
|
|
@@ -604,6 +663,16 @@ static inline bool over_bground_thresh(void)
|
|
|
global_page_state(NR_UNSTABLE_NFS) > background_thresh);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Called under wb->list_lock. If there are multiple wb per bdi,
|
|
|
+ * only the flusher working on the first wb should do it.
|
|
|
+ */
|
|
|
+static void wb_update_bandwidth(struct bdi_writeback *wb,
|
|
|
+ unsigned long start_time)
|
|
|
+{
|
|
|
+ __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Explicit flushing or periodic writeback of "old" data.
|
|
|
*
|
|
@@ -622,47 +691,16 @@ static inline bool over_bground_thresh(void)
|
|
|
static long wb_writeback(struct bdi_writeback *wb,
|
|
|
struct wb_writeback_work *work)
|
|
|
{
|
|
|
- struct writeback_control wbc = {
|
|
|
- .sync_mode = work->sync_mode,
|
|
|
- .older_than_this = NULL,
|
|
|
- .for_kupdate = work->for_kupdate,
|
|
|
- .for_background = work->for_background,
|
|
|
- .range_cyclic = work->range_cyclic,
|
|
|
- };
|
|
|
+ unsigned long wb_start = jiffies;
|
|
|
+ long nr_pages = work->nr_pages;
|
|
|
unsigned long oldest_jif;
|
|
|
- long wrote = 0;
|
|
|
- long write_chunk;
|
|
|
struct inode *inode;
|
|
|
+ long progress;
|
|
|
|
|
|
- if (wbc.for_kupdate) {
|
|
|
- wbc.older_than_this = &oldest_jif;
|
|
|
- oldest_jif = jiffies -
|
|
|
- msecs_to_jiffies(dirty_expire_interval * 10);
|
|
|
- }
|
|
|
- if (!wbc.range_cyclic) {
|
|
|
- wbc.range_start = 0;
|
|
|
- wbc.range_end = LLONG_MAX;
|
|
|
- }
|
|
|
+ oldest_jif = jiffies;
|
|
|
+ work->older_than_this = &oldest_jif;
|
|
|
|
|
|
- /*
|
|
|
- * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
|
|
|
- * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
|
|
|
- * here avoids calling into writeback_inodes_wb() more than once.
|
|
|
- *
|
|
|
- * The intended call sequence for WB_SYNC_ALL writeback is:
|
|
|
- *
|
|
|
- * wb_writeback()
|
|
|
- * __writeback_inodes_sb() <== called only once
|
|
|
- * write_cache_pages() <== called once for each inode
|
|
|
- * (quickly) tag currently dirty pages
|
|
|
- * (maybe slowly) sync all tagged pages
|
|
|
- */
|
|
|
- if (wbc.sync_mode == WB_SYNC_NONE)
|
|
|
- write_chunk = MAX_WRITEBACK_PAGES;
|
|
|
- else
|
|
|
- write_chunk = LONG_MAX;
|
|
|
-
|
|
|
- wbc.wb_start = jiffies; /* livelock avoidance */
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
for (;;) {
|
|
|
/*
|
|
|
* Stop writeback when nr_pages has been consumed
|
|
@@ -687,52 +725,54 @@ static long wb_writeback(struct bdi_writeback *wb,
|
|
|
if (work->for_background && !over_bground_thresh())
|
|
|
break;
|
|
|
|
|
|
- wbc.more_io = 0;
|
|
|
- wbc.nr_to_write = write_chunk;
|
|
|
- wbc.pages_skipped = 0;
|
|
|
+ if (work->for_kupdate) {
|
|
|
+ oldest_jif = jiffies -
|
|
|
+ msecs_to_jiffies(dirty_expire_interval * 10);
|
|
|
+ work->older_than_this = &oldest_jif;
|
|
|
+ }
|
|
|
|
|
|
- trace_wbc_writeback_start(&wbc, wb->bdi);
|
|
|
+ trace_writeback_start(wb->bdi, work);
|
|
|
+ if (list_empty(&wb->b_io))
|
|
|
+ queue_io(wb, work->older_than_this);
|
|
|
if (work->sb)
|
|
|
- __writeback_inodes_sb(work->sb, wb, &wbc);
|
|
|
+ progress = writeback_sb_inodes(work->sb, wb, work);
|
|
|
else
|
|
|
- writeback_inodes_wb(wb, &wbc);
|
|
|
- trace_wbc_writeback_written(&wbc, wb->bdi);
|
|
|
+ progress = __writeback_inodes_wb(wb, work);
|
|
|
+ trace_writeback_written(wb->bdi, work);
|
|
|
|
|
|
- work->nr_pages -= write_chunk - wbc.nr_to_write;
|
|
|
- wrote += write_chunk - wbc.nr_to_write;
|
|
|
+ wb_update_bandwidth(wb, wb_start);
|
|
|
|
|
|
/*
|
|
|
- * If we consumed everything, see if we have more
|
|
|
+ * Did we write something? Try for more
|
|
|
+ *
|
|
|
+ * Dirty inodes are moved to b_io for writeback in batches.
|
|
|
+ * The completion of the current batch does not necessarily
|
|
|
+ * mean the overall work is done. So we keep looping as long
|
|
|
+ * as made some progress on cleaning pages or inodes.
|
|
|
*/
|
|
|
- if (wbc.nr_to_write <= 0)
|
|
|
+ if (progress)
|
|
|
continue;
|
|
|
/*
|
|
|
- * Didn't write everything and we don't have more IO, bail
|
|
|
+ * No more inodes for IO, bail
|
|
|
*/
|
|
|
- if (!wbc.more_io)
|
|
|
+ if (list_empty(&wb->b_more_io))
|
|
|
break;
|
|
|
- /*
|
|
|
- * Did we write something? Try for more
|
|
|
- */
|
|
|
- if (wbc.nr_to_write < write_chunk)
|
|
|
- continue;
|
|
|
/*
|
|
|
* Nothing written. Wait for some inode to
|
|
|
* become available for writeback. Otherwise
|
|
|
* we'll just busyloop.
|
|
|
*/
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
if (!list_empty(&wb->b_more_io)) {
|
|
|
+ trace_writeback_wait(wb->bdi, work);
|
|
|
inode = wb_inode(wb->b_more_io.prev);
|
|
|
- trace_wbc_writeback_wait(&wbc, wb->bdi);
|
|
|
spin_lock(&inode->i_lock);
|
|
|
- inode_wait_for_writeback(inode);
|
|
|
+ inode_wait_for_writeback(inode, wb);
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
}
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
}
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
|
|
|
- return wrote;
|
|
|
+ return nr_pages - work->nr_pages;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1063,10 +1103,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
|
|
}
|
|
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ spin_lock(&bdi->wb.list_lock);
|
|
|
inode->dirtied_when = jiffies;
|
|
|
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&bdi->wb.list_lock);
|
|
|
|
|
|
if (wakeup_bdi)
|
|
|
bdi_wakeup_thread_delayed(bdi);
|
|
@@ -1162,10 +1202,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
|
|
|
{
|
|
|
DECLARE_COMPLETION_ONSTACK(done);
|
|
|
struct wb_writeback_work work = {
|
|
|
- .sb = sb,
|
|
|
- .sync_mode = WB_SYNC_NONE,
|
|
|
- .done = &done,
|
|
|
- .nr_pages = nr,
|
|
|
+ .sb = sb,
|
|
|
+ .sync_mode = WB_SYNC_NONE,
|
|
|
+ .tagged_writepages = 1,
|
|
|
+ .done = &done,
|
|
|
+ .nr_pages = nr,
|
|
|
};
|
|
|
|
|
|
WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
@@ -1267,6 +1308,7 @@ EXPORT_SYMBOL(sync_inodes_sb);
|
|
|
*/
|
|
|
int write_inode_now(struct inode *inode, int sync)
|
|
|
{
|
|
|
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
|
|
|
int ret;
|
|
|
struct writeback_control wbc = {
|
|
|
.nr_to_write = LONG_MAX,
|
|
@@ -1279,11 +1321,11 @@ int write_inode_now(struct inode *inode, int sync)
|
|
|
wbc.nr_to_write = 0;
|
|
|
|
|
|
might_sleep();
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
spin_lock(&inode->i_lock);
|
|
|
- ret = writeback_single_inode(inode, &wbc);
|
|
|
+ ret = writeback_single_inode(inode, wb, &wbc);
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
if (sync)
|
|
|
inode_sync_wait(inode);
|
|
|
return ret;
|
|
@@ -1303,13 +1345,14 @@ EXPORT_SYMBOL(write_inode_now);
|
|
|
*/
|
|
|
int sync_inode(struct inode *inode, struct writeback_control *wbc)
|
|
|
{
|
|
|
+ struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
|
|
|
int ret;
|
|
|
|
|
|
- spin_lock(&inode_wb_list_lock);
|
|
|
+ spin_lock(&wb->list_lock);
|
|
|
spin_lock(&inode->i_lock);
|
|
|
- ret = writeback_single_inode(inode, wbc);
|
|
|
+ ret = writeback_single_inode(inode, wb, wbc);
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
- spin_unlock(&inode_wb_list_lock);
|
|
|
+ spin_unlock(&wb->list_lock);
|
|
|
return ret;
|
|
|
}
|
|
|
EXPORT_SYMBOL(sync_inode);
|