|
@@ -37,6 +37,9 @@ static struct class *bdi_class;
|
|
|
DEFINE_SPINLOCK(bdi_lock);
|
|
|
LIST_HEAD(bdi_list);
|
|
|
|
|
|
+/* bdi_wq serves all asynchronous writeback tasks */
|
|
|
+struct workqueue_struct *bdi_wq;
|
|
|
+
|
|
|
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
|
|
|
{
|
|
|
if (wb1 < wb2) {
|
|
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void)
|
|
|
{
|
|
|
int err;
|
|
|
|
|
|
+ bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
|
|
|
+ WQ_UNBOUND, 0);
|
|
|
+ if (!bdi_wq)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
err = bdi_init(&default_backing_dev_info);
|
|
|
if (!err)
|
|
|
bdi_register(&default_backing_dev_info, NULL, "default");
|
|
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
|
|
|
return wb_has_dirty_io(&bdi->wb);
|
|
|
}
|
|
|
|
|
|
-static void wakeup_timer_fn(unsigned long data)
|
|
|
-{
|
|
|
- struct backing_dev_info *bdi = (struct backing_dev_info *)data;
|
|
|
-
|
|
|
- spin_lock_bh(&bdi->wb_lock);
|
|
|
- if (bdi->wb.task) {
|
|
|
- trace_writeback_wake_thread(bdi);
|
|
|
- wake_up_process(bdi->wb.task);
|
|
|
- } else if (bdi->dev) {
|
|
|
- /*
|
|
|
- * When bdi tasks are inactive for long time, they are killed.
|
|
|
- * In this case we have to wake-up the forker thread which
|
|
|
- * should create and run the bdi thread.
|
|
|
- */
|
|
|
- trace_writeback_wake_forker_thread(bdi);
|
|
|
- wake_up_process(default_backing_dev_info.wb.task);
|
|
|
- }
|
|
|
- spin_unlock_bh(&bdi->wb_lock);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* This function is used when the first inode for this bdi is marked dirty. It
|
|
|
* wakes-up the corresponding bdi thread which should then take care of the
|
|
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
|
|
|
unsigned long timeout;
|
|
|
|
|
|
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
|
|
|
- mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Calculate the longest interval (jiffies) bdi threads are allowed to be
|
|
|
- * inactive.
|
|
|
- */
|
|
|
-static unsigned long bdi_longest_inactive(void)
|
|
|
-{
|
|
|
- unsigned long interval;
|
|
|
-
|
|
|
- interval = msecs_to_jiffies(dirty_writeback_interval * 10);
|
|
|
- return max(5UL * 60 * HZ, interval);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Clear pending bit and wakeup anybody waiting for flusher thread creation or
|
|
|
- * shutdown
|
|
|
- */
|
|
|
-static void bdi_clear_pending(struct backing_dev_info *bdi)
|
|
|
-{
|
|
|
- clear_bit(BDI_pending, &bdi->state);
|
|
|
- smp_mb__after_clear_bit();
|
|
|
- wake_up_bit(&bdi->state, BDI_pending);
|
|
|
-}
|
|
|
-
|
|
|
-static int bdi_forker_thread(void *ptr)
|
|
|
-{
|
|
|
- struct bdi_writeback *me = ptr;
|
|
|
-
|
|
|
- current->flags |= PF_SWAPWRITE;
|
|
|
- set_freezable();
|
|
|
-
|
|
|
- /*
|
|
|
- * Our parent may run at a different priority, just set us to normal
|
|
|
- */
|
|
|
- set_user_nice(current, 0);
|
|
|
-
|
|
|
- for (;;) {
|
|
|
- struct task_struct *task = NULL;
|
|
|
- struct backing_dev_info *bdi;
|
|
|
- enum {
|
|
|
- NO_ACTION, /* Nothing to do */
|
|
|
- FORK_THREAD, /* Fork bdi thread */
|
|
|
- KILL_THREAD, /* Kill inactive bdi thread */
|
|
|
- } action = NO_ACTION;
|
|
|
-
|
|
|
- /*
|
|
|
- * Temporary measure, we want to make sure we don't see
|
|
|
- * dirty data on the default backing_dev_info
|
|
|
- */
|
|
|
- if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
|
|
|
- del_timer(&me->wakeup_timer);
|
|
|
- wb_do_writeback(me, 0);
|
|
|
- }
|
|
|
-
|
|
|
- spin_lock_bh(&bdi_lock);
|
|
|
- /*
|
|
|
- * In the following loop we are going to check whether we have
|
|
|
- * some work to do without any synchronization with tasks
|
|
|
- * waking us up to do work for them. Set the task state here
|
|
|
- * so that we don't miss wakeups after verifying conditions.
|
|
|
- */
|
|
|
- set_current_state(TASK_INTERRUPTIBLE);
|
|
|
-
|
|
|
- list_for_each_entry(bdi, &bdi_list, bdi_list) {
|
|
|
- bool have_dirty_io;
|
|
|
-
|
|
|
- if (!bdi_cap_writeback_dirty(bdi) ||
|
|
|
- bdi_cap_flush_forker(bdi))
|
|
|
- continue;
|
|
|
-
|
|
|
- WARN(!test_bit(BDI_registered, &bdi->state),
|
|
|
- "bdi %p/%s is not registered!\n", bdi, bdi->name);
|
|
|
-
|
|
|
- have_dirty_io = !list_empty(&bdi->work_list) ||
|
|
|
- wb_has_dirty_io(&bdi->wb);
|
|
|
-
|
|
|
- /*
|
|
|
- * If the bdi has work to do, but the thread does not
|
|
|
- * exist - create it.
|
|
|
- */
|
|
|
- if (!bdi->wb.task && have_dirty_io) {
|
|
|
- /*
|
|
|
- * Set the pending bit - if someone will try to
|
|
|
- * unregister this bdi - it'll wait on this bit.
|
|
|
- */
|
|
|
- set_bit(BDI_pending, &bdi->state);
|
|
|
- action = FORK_THREAD;
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- spin_lock(&bdi->wb_lock);
|
|
|
-
|
|
|
- /*
|
|
|
- * If there is no work to do and the bdi thread was
|
|
|
- * inactive long enough - kill it. The wb_lock is taken
|
|
|
- * to make sure no-one adds more work to this bdi and
|
|
|
- * wakes the bdi thread up.
|
|
|
- */
|
|
|
- if (bdi->wb.task && !have_dirty_io &&
|
|
|
- time_after(jiffies, bdi->wb.last_active +
|
|
|
- bdi_longest_inactive())) {
|
|
|
- task = bdi->wb.task;
|
|
|
- bdi->wb.task = NULL;
|
|
|
- spin_unlock(&bdi->wb_lock);
|
|
|
- set_bit(BDI_pending, &bdi->state);
|
|
|
- action = KILL_THREAD;
|
|
|
- break;
|
|
|
- }
|
|
|
- spin_unlock(&bdi->wb_lock);
|
|
|
- }
|
|
|
- spin_unlock_bh(&bdi_lock);
|
|
|
-
|
|
|
- /* Keep working if default bdi still has things to do */
|
|
|
- if (!list_empty(&me->bdi->work_list))
|
|
|
- __set_current_state(TASK_RUNNING);
|
|
|
-
|
|
|
- switch (action) {
|
|
|
- case FORK_THREAD:
|
|
|
- __set_current_state(TASK_RUNNING);
|
|
|
- task = kthread_create(bdi_writeback_thread, &bdi->wb,
|
|
|
- "flush-%s", dev_name(bdi->dev));
|
|
|
- if (IS_ERR(task)) {
|
|
|
- /*
|
|
|
- * If thread creation fails, force writeout of
|
|
|
- * the bdi from the thread. Hopefully 1024 is
|
|
|
- * large enough for efficient IO.
|
|
|
- */
|
|
|
- writeback_inodes_wb(&bdi->wb, 1024,
|
|
|
- WB_REASON_FORKER_THREAD);
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * The spinlock makes sure we do not lose
|
|
|
- * wake-ups when racing with 'bdi_queue_work()'.
|
|
|
- * And as soon as the bdi thread is visible, we
|
|
|
- * can start it.
|
|
|
- */
|
|
|
- spin_lock_bh(&bdi->wb_lock);
|
|
|
- bdi->wb.task = task;
|
|
|
- spin_unlock_bh(&bdi->wb_lock);
|
|
|
- wake_up_process(task);
|
|
|
- }
|
|
|
- bdi_clear_pending(bdi);
|
|
|
- break;
|
|
|
-
|
|
|
- case KILL_THREAD:
|
|
|
- __set_current_state(TASK_RUNNING);
|
|
|
- kthread_stop(task);
|
|
|
- bdi_clear_pending(bdi);
|
|
|
- break;
|
|
|
-
|
|
|
- case NO_ACTION:
|
|
|
- if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
|
|
|
- /*
|
|
|
- * There are no dirty data. The only thing we
|
|
|
- * should now care about is checking for
|
|
|
- * inactive bdi threads and killing them. Thus,
|
|
|
- * let's sleep for longer time, save energy and
|
|
|
- * be friendly for battery-driven devices.
|
|
|
- */
|
|
|
- schedule_timeout(bdi_longest_inactive());
|
|
|
- else
|
|
|
- schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
|
|
|
- try_to_freeze();
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
|
|
|
spin_unlock_bh(&bdi_lock);
|
|
|
|
|
|
synchronize_rcu_expedited();
|
|
|
+
|
|
|
+ /* bdi_list is now unused, clear it to mark @bdi dying */
|
|
|
+ INIT_LIST_HEAD(&bdi->bdi_list);
|
|
|
}
|
|
|
|
|
|
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
|
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
|
|
|
|
|
bdi->dev = dev;
|
|
|
|
|
|
- /*
|
|
|
- * Just start the forker thread for our default backing_dev_info,
|
|
|
- * and add other bdi's to the list. They will get a thread created
|
|
|
- * on-demand when they need it.
|
|
|
- */
|
|
|
- if (bdi_cap_flush_forker(bdi)) {
|
|
|
- struct bdi_writeback *wb = &bdi->wb;
|
|
|
-
|
|
|
- wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
|
|
|
- dev_name(dev));
|
|
|
- if (IS_ERR(wb->task))
|
|
|
- return PTR_ERR(wb->task);
|
|
|
- }
|
|
|
-
|
|
|
bdi_debug_register(bdi, dev_name(dev));
|
|
|
set_bit(BDI_registered, &bdi->state);
|
|
|
|
|
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
|
|
|
*/
|
|
|
static void bdi_wb_shutdown(struct backing_dev_info *bdi)
|
|
|
{
|
|
|
- struct task_struct *task;
|
|
|
-
|
|
|
if (!bdi_cap_writeback_dirty(bdi))
|
|
|
return;
|
|
|
|
|
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
|
|
|
bdi_remove_from_list(bdi);
|
|
|
|
|
|
/*
|
|
|
- * If setup is pending, wait for that to complete first
|
|
|
+ * Drain work list and shutdown the delayed_work. At this point,
|
|
|
+ * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
|
|
|
+ * is dying and its work_list needs to be drained no matter what.
|
|
|
*/
|
|
|
- wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
|
|
|
- TASK_UNINTERRUPTIBLE);
|
|
|
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
|
|
|
+ flush_delayed_work(&bdi->wb.dwork);
|
|
|
+ WARN_ON(!list_empty(&bdi->work_list));
|
|
|
|
|
|
/*
|
|
|
- * Finally, kill the kernel thread. We don't need to be RCU
|
|
|
- * safe anymore, since the bdi is gone from visibility.
|
|
|
+ * This shouldn't be necessary unless @bdi for some reason has
|
|
|
+ * unflushed dirty IO after work_list is drained. Do it anyway
|
|
|
+ * just in case.
|
|
|
*/
|
|
|
- spin_lock_bh(&bdi->wb_lock);
|
|
|
- task = bdi->wb.task;
|
|
|
- bdi->wb.task = NULL;
|
|
|
- spin_unlock_bh(&bdi->wb_lock);
|
|
|
-
|
|
|
- if (task)
|
|
|
- kthread_stop(task);
|
|
|
+ cancel_delayed_work_sync(&bdi->wb.dwork);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
|
|
|
bdi_set_min_ratio(bdi, 0);
|
|
|
trace_writeback_bdi_unregister(bdi);
|
|
|
bdi_prune_sb(bdi);
|
|
|
- del_timer_sync(&bdi->wb.wakeup_timer);
|
|
|
|
|
|
- if (!bdi_cap_flush_forker(bdi))
|
|
|
- bdi_wb_shutdown(bdi);
|
|
|
+ bdi_wb_shutdown(bdi);
|
|
|
bdi_debug_unregister(bdi);
|
|
|
|
|
|
spin_lock_bh(&bdi->wb_lock);
|
|
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
|
|
|
INIT_LIST_HEAD(&wb->b_io);
|
|
|
INIT_LIST_HEAD(&wb->b_more_io);
|
|
|
spin_lock_init(&wb->list_lock);
|
|
|
- setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
|
|
|
+ INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
|
|
|
bdi_unregister(bdi);
|
|
|
|
|
|
/*
|
|
|
- * If bdi_unregister() had already been called earlier, the
|
|
|
- * wakeup_timer could still be armed because bdi_prune_sb()
|
|
|
- * can race with the bdi_wakeup_thread_delayed() calls from
|
|
|
- * __mark_inode_dirty().
|
|
|
+ * If bdi_unregister() had already been called earlier, the dwork
|
|
|
+ * could still be pending because bdi_prune_sb() can race with the
|
|
|
+ * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
|
|
|
*/
|
|
|
- del_timer_sync(&bdi->wb.wakeup_timer);
|
|
|
+ cancel_delayed_work_sync(&bdi->wb.dwork);
|
|
|
|
|
|
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
|
|
|
percpu_counter_destroy(&bdi->bdi_stat[i]);
|