|
@@ -63,24 +63,16 @@ struct bdi_work {
|
|
|
};
|
|
|
|
|
|
enum {
|
|
|
- WS_USED_B = 0,
|
|
|
- WS_ONSTACK_B,
|
|
|
+ WS_INPROGRESS = 0,
|
|
|
+ WS_ONSTACK,
|
|
|
};
|
|
|
|
|
|
-#define WS_USED (1 << WS_USED_B)
|
|
|
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
|
|
|
-
|
|
|
-static inline bool bdi_work_on_stack(struct bdi_work *work)
|
|
|
-{
|
|
|
- return test_bit(WS_ONSTACK_B, &work->state);
|
|
|
-}
|
|
|
-
|
|
|
static inline void bdi_work_init(struct bdi_work *work,
|
|
|
struct wb_writeback_args *args)
|
|
|
{
|
|
|
INIT_RCU_HEAD(&work->rcu_head);
|
|
|
work->args = *args;
|
|
|
- work->state = WS_USED;
|
|
|
+ __set_bit(WS_INPROGRESS, &work->state);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -95,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
|
|
|
return !list_empty(&bdi->work_list);
|
|
|
}
|
|
|
|
|
|
-static void bdi_work_clear(struct bdi_work *work)
|
|
|
-{
|
|
|
- clear_bit(WS_USED_B, &work->state);
|
|
|
- smp_mb__after_clear_bit();
|
|
|
- /*
|
|
|
- * work can have disappeared at this point. bit waitq functions
|
|
|
- * should be able to tolerate this, provided bdi_sched_wait does
|
|
|
- * not dereference it's pointer argument.
|
|
|
- */
|
|
|
- wake_up_bit(&work->state, WS_USED_B);
|
|
|
-}
|
|
|
-
|
|
|
static void bdi_work_free(struct rcu_head *head)
|
|
|
{
|
|
|
struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
|
|
|
|
|
|
- if (!bdi_work_on_stack(work))
|
|
|
- kfree(work);
|
|
|
- else
|
|
|
- bdi_work_clear(work);
|
|
|
-}
|
|
|
-
|
|
|
-static void wb_work_complete(struct bdi_work *work)
|
|
|
-{
|
|
|
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
|
|
|
- int onstack = bdi_work_on_stack(work);
|
|
|
+ clear_bit(WS_INPROGRESS, &work->state);
|
|
|
+ smp_mb__after_clear_bit();
|
|
|
+ wake_up_bit(&work->state, WS_INPROGRESS);
|
|
|
|
|
|
- /*
|
|
|
- * For allocated work, we can clear the done/seen bit right here.
|
|
|
- * For on-stack work, we need to postpone both the clear and free
|
|
|
- * to after the RCU grace period, since the stack could be invalidated
|
|
|
- * as soon as bdi_work_clear() has done the wakeup.
|
|
|
- */
|
|
|
- if (!onstack)
|
|
|
- bdi_work_clear(work);
|
|
|
- if (sync_mode == WB_SYNC_NONE || onstack)
|
|
|
- call_rcu(&work->rcu_head, bdi_work_free);
|
|
|
+ if (!test_bit(WS_ONSTACK, &work->state))
|
|
|
+ kfree(work);
|
|
|
}
|
|
|
|
|
|
static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
|
|
@@ -147,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
|
|
|
list_del_rcu(&work->list);
|
|
|
spin_unlock(&bdi->wb_lock);
|
|
|
|
|
|
- wb_work_complete(work);
|
|
|
+ call_rcu(&work->rcu_head, bdi_work_free);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -185,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
|
|
|
* Used for on-stack allocated work items. The caller needs to wait until
|
|
|
* the wb threads have acked the work before it's safe to continue.
|
|
|
*/
|
|
|
-static void bdi_wait_on_work_clear(struct bdi_work *work)
|
|
|
+static void bdi_wait_on_work_done(struct bdi_work *work)
|
|
|
{
|
|
|
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
|
|
|
+ wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
|
|
|
TASK_UNINTERRUPTIBLE);
|
|
|
}
|
|
|
|
|
@@ -213,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * bdi_sync_writeback - start and wait for writeback
|
|
|
- * @bdi: the backing device to write from
|
|
|
+ * bdi_queue_work_onstack - start and wait for writeback
|
|
|
* @sb: write inodes from this super_block
|
|
|
*
|
|
|
* Description:
|
|
|
- * This does WB_SYNC_ALL data integrity writeback and waits for the
|
|
|
- * IO to complete. Callers must hold the sb s_umount semaphore for
|
|
|
+ * This function initiates writeback and waits for the operation to
|
|
|
+ * complete. Callers must hold the sb s_umount semaphore for
|
|
|
* reading, to avoid having the super disappear before we are done.
|
|
|
*/
|
|
|
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
|
|
|
- struct super_block *sb)
|
|
|
+static void bdi_queue_work_onstack(struct wb_writeback_args *args)
|
|
|
{
|
|
|
- struct wb_writeback_args args = {
|
|
|
- .sb = sb,
|
|
|
- .sync_mode = WB_SYNC_ALL,
|
|
|
- .nr_pages = LONG_MAX,
|
|
|
- .range_cyclic = 0,
|
|
|
- };
|
|
|
struct bdi_work work;
|
|
|
|
|
|
- bdi_work_init(&work, &args);
|
|
|
- work.state |= WS_ONSTACK;
|
|
|
+ bdi_work_init(&work, args);
|
|
|
+ __set_bit(WS_ONSTACK, &work.state);
|
|
|
|
|
|
- bdi_queue_work(bdi, &work);
|
|
|
- bdi_wait_on_work_clear(&work);
|
|
|
+ bdi_queue_work(args->sb->s_bdi, &work);
|
|
|
+ bdi_wait_on_work_done(&work);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* bdi_start_writeback - start writeback
|
|
|
* @bdi: the backing device to write from
|
|
|
- * @sb: write inodes from this super_block
|
|
|
* @nr_pages: the number of pages to write
|
|
|
*
|
|
|
* Description:
|
|
@@ -252,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
|
|
|
* completion. Caller need not hold sb s_umount semaphore.
|
|
|
*
|
|
|
*/
|
|
|
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
|
|
|
- long nr_pages)
|
|
|
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
|
|
|
{
|
|
|
struct wb_writeback_args args = {
|
|
|
- .sb = sb,
|
|
|
.sync_mode = WB_SYNC_NONE,
|
|
|
.nr_pages = nr_pages,
|
|
|
.range_cyclic = 1,
|
|
|
};
|
|
|
|
|
|
- /*
|
|
|
- * We treat @nr_pages=0 as the special case to do background writeback,
|
|
|
- * ie. to sync pages until the background dirty threshold is reached.
|
|
|
- */
|
|
|
- if (!nr_pages) {
|
|
|
- args.nr_pages = LONG_MAX;
|
|
|
- args.for_background = 1;
|
|
|
- }
|
|
|
+ bdi_alloc_queue_work(bdi, &args);
|
|
|
+}
|
|
|
|
|
|
+/**
|
|
|
+ * bdi_start_background_writeback - start background writeback
|
|
|
+ * @bdi: the backing device to write from
|
|
|
+ *
|
|
|
+ * Description:
|
|
|
+ * This does WB_SYNC_NONE background writeback. The IO is only
|
|
|
+ * started when this function returns, we make no guarentees on
|
|
|
+ * completion. Caller need not hold sb s_umount semaphore.
|
|
|
+ */
|
|
|
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
|
|
|
+{
|
|
|
+ struct wb_writeback_args args = {
|
|
|
+ .sync_mode = WB_SYNC_NONE,
|
|
|
+ .nr_pages = LONG_MAX,
|
|
|
+ .for_background = 1,
|
|
|
+ .range_cyclic = 1,
|
|
|
+ };
|
|
|
bdi_alloc_queue_work(bdi, &args);
|
|
|
}
|
|
|
|
|
@@ -561,48 +526,30 @@ select_queue:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static void unpin_sb_for_writeback(struct super_block *sb)
|
|
|
-{
|
|
|
- up_read(&sb->s_umount);
|
|
|
- put_super(sb);
|
|
|
-}
|
|
|
-
|
|
|
-enum sb_pin_state {
|
|
|
- SB_PINNED,
|
|
|
- SB_NOT_PINNED,
|
|
|
- SB_PIN_FAILED
|
|
|
-};
|
|
|
-
|
|
|
/*
|
|
|
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
|
|
|
+ * For background writeback the caller does not have the sb pinned
|
|
|
* before calling writeback. So make sure that we do pin it, so it doesn't
|
|
|
* go away while we are writing inodes from it.
|
|
|
*/
|
|
|
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
|
|
|
- struct super_block *sb)
|
|
|
+static bool pin_sb_for_writeback(struct super_block *sb)
|
|
|
{
|
|
|
- /*
|
|
|
- * Caller must already hold the ref for this
|
|
|
- */
|
|
|
- if (wbc->sync_mode == WB_SYNC_ALL) {
|
|
|
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
- return SB_NOT_PINNED;
|
|
|
- }
|
|
|
spin_lock(&sb_lock);
|
|
|
+ if (list_empty(&sb->s_instances)) {
|
|
|
+ spin_unlock(&sb_lock);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
sb->s_count++;
|
|
|
+ spin_unlock(&sb_lock);
|
|
|
+
|
|
|
if (down_read_trylock(&sb->s_umount)) {
|
|
|
- if (sb->s_root) {
|
|
|
- spin_unlock(&sb_lock);
|
|
|
- return SB_PINNED;
|
|
|
- }
|
|
|
- /*
|
|
|
- * umounted, drop rwsem again and fall through to failure
|
|
|
- */
|
|
|
+ if (sb->s_root)
|
|
|
+ return true;
|
|
|
up_read(&sb->s_umount);
|
|
|
}
|
|
|
- sb->s_count--;
|
|
|
- spin_unlock(&sb_lock);
|
|
|
- return SB_PIN_FAILED;
|
|
|
+
|
|
|
+ put_super(sb);
|
|
|
+ return false;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -681,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
|
|
|
struct inode *inode = list_entry(wb->b_io.prev,
|
|
|
struct inode, i_list);
|
|
|
struct super_block *sb = inode->i_sb;
|
|
|
- enum sb_pin_state state;
|
|
|
|
|
|
- if (wbc->sb && sb != wbc->sb) {
|
|
|
- /* super block given and doesn't
|
|
|
- match, skip this inode */
|
|
|
- redirty_tail(inode);
|
|
|
- continue;
|
|
|
- }
|
|
|
- state = pin_sb_for_writeback(wbc, sb);
|
|
|
+ if (wbc->sb) {
|
|
|
+ /*
|
|
|
+ * We are requested to write out inodes for a specific
|
|
|
+ * superblock. This means we already have s_umount
|
|
|
+ * taken by the caller which also waits for us to
|
|
|
+ * complete the writeout.
|
|
|
+ */
|
|
|
+ if (sb != wbc->sb) {
|
|
|
+ redirty_tail(inode);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
- if (state == SB_PIN_FAILED) {
|
|
|
- requeue_io(inode);
|
|
|
- continue;
|
|
|
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
+
|
|
|
+ ret = writeback_sb_inodes(sb, wb, wbc);
|
|
|
+ } else {
|
|
|
+ if (!pin_sb_for_writeback(sb)) {
|
|
|
+ requeue_io(inode);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ ret = writeback_sb_inodes(sb, wb, wbc);
|
|
|
+ drop_super(sb);
|
|
|
}
|
|
|
- ret = writeback_sb_inodes(sb, wb, wbc);
|
|
|
|
|
|
- if (state == SB_PINNED)
|
|
|
- unpin_sb_for_writeback(sb);
|
|
|
if (ret)
|
|
|
break;
|
|
|
}
|
|
@@ -911,7 +865,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
|
|
|
* If this isn't a data integrity operation, just notify
|
|
|
* that we have seen this work and we are now starting it.
|
|
|
*/
|
|
|
- if (args.sync_mode == WB_SYNC_NONE)
|
|
|
+ if (!test_bit(WS_ONSTACK, &work->state))
|
|
|
wb_clear_pending(wb, work);
|
|
|
|
|
|
wrote += wb_writeback(wb, &args);
|
|
@@ -920,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
|
|
|
* This is a data integrity writeback, so only do the
|
|
|
* notification when we have completed the work.
|
|
|
*/
|
|
|
- if (args.sync_mode == WB_SYNC_ALL)
|
|
|
+ if (test_bit(WS_ONSTACK, &work->state))
|
|
|
wb_clear_pending(wb, work);
|
|
|
}
|
|
|
|
|
@@ -978,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
|
|
|
- * writeback, for integrity writeback see bdi_sync_writeback().
|
|
|
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
|
|
|
+ * the whole world.
|
|
|
*/
|
|
|
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
|
|
|
+void wakeup_flusher_threads(long nr_pages)
|
|
|
{
|
|
|
+ struct backing_dev_info *bdi;
|
|
|
struct wb_writeback_args args = {
|
|
|
- .sb = sb,
|
|
|
- .nr_pages = nr_pages,
|
|
|
.sync_mode = WB_SYNC_NONE,
|
|
|
};
|
|
|
- struct backing_dev_info *bdi;
|
|
|
|
|
|
- rcu_read_lock();
|
|
|
+ if (nr_pages) {
|
|
|
+ args.nr_pages = nr_pages;
|
|
|
+ } else {
|
|
|
+ args.nr_pages = global_page_state(NR_FILE_DIRTY) +
|
|
|
+ global_page_state(NR_UNSTABLE_NFS);
|
|
|
+ }
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
|
|
|
if (!bdi_has_dirty_io(bdi))
|
|
|
continue;
|
|
|
-
|
|
|
bdi_alloc_queue_work(bdi, &args);
|
|
|
}
|
|
|
-
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
|
|
|
- * the whole world.
|
|
|
- */
|
|
|
-void wakeup_flusher_threads(long nr_pages)
|
|
|
-{
|
|
|
- if (nr_pages == 0)
|
|
|
- nr_pages = global_page_state(NR_FILE_DIRTY) +
|
|
|
- global_page_state(NR_UNSTABLE_NFS);
|
|
|
- bdi_writeback_all(NULL, nr_pages);
|
|
|
-}
|
|
|
-
|
|
|
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
|
|
|
{
|
|
|
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
|
|
@@ -1218,12 +1162,17 @@ void writeback_inodes_sb(struct super_block *sb)
|
|
|
{
|
|
|
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
|
|
|
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
|
|
|
- long nr_to_write;
|
|
|
+ struct wb_writeback_args args = {
|
|
|
+ .sb = sb,
|
|
|
+ .sync_mode = WB_SYNC_NONE,
|
|
|
+ };
|
|
|
+
|
|
|
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
|
|
|
- nr_to_write = nr_dirty + nr_unstable +
|
|
|
+ args.nr_pages = nr_dirty + nr_unstable +
|
|
|
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
|
|
|
|
|
|
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
|
|
|
+ bdi_queue_work_onstack(&args);
|
|
|
}
|
|
|
EXPORT_SYMBOL(writeback_inodes_sb);
|
|
|
|
|
@@ -1237,7 +1186,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
|
|
|
int writeback_inodes_sb_if_idle(struct super_block *sb)
|
|
|
{
|
|
|
if (!writeback_in_progress(sb->s_bdi)) {
|
|
|
+ down_read(&sb->s_umount);
|
|
|
writeback_inodes_sb(sb);
|
|
|
+ up_read(&sb->s_umount);
|
|
|
return 1;
|
|
|
} else
|
|
|
return 0;
|
|
@@ -1253,7 +1204,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
|
|
|
*/
|
|
|
void sync_inodes_sb(struct super_block *sb)
|
|
|
{
|
|
|
- bdi_sync_writeback(sb->s_bdi, sb);
|
|
|
+ struct wb_writeback_args args = {
|
|
|
+ .sb = sb,
|
|
|
+ .sync_mode = WB_SYNC_ALL,
|
|
|
+ .nr_pages = LONG_MAX,
|
|
|
+ .range_cyclic = 0,
|
|
|
+ };
|
|
|
+
|
|
|
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
|
|
|
+
|
|
|
+ bdi_queue_work_onstack(&args);
|
|
|
wait_sb_inodes(sb);
|
|
|
}
|
|
|
EXPORT_SYMBOL(sync_inodes_sb);
|