|
@@ -54,20 +54,6 @@
|
|
|
*/
|
|
|
static long ratelimit_pages = 32;
|
|
|
|
|
|
-/*
|
|
|
- * When balance_dirty_pages decides that the caller needs to perform some
|
|
|
- * non-background writeback, this is how many pages it will attempt to write.
|
|
|
- * It should be somewhat larger than dirtied pages to ensure that reasonably
|
|
|
- * large amounts of I/O are submitted.
|
|
|
- */
|
|
|
-static inline long sync_writeback_pages(unsigned long dirtied)
|
|
|
-{
|
|
|
- if (dirtied < ratelimit_pages)
|
|
|
- dirtied = ratelimit_pages;
|
|
|
-
|
|
|
- return dirtied + dirtied / 2;
|
|
|
-}
|
|
|
-
|
|
|
/* The following parameters are exported via /proc/sys/vm */
|
|
|
|
|
|
/*
|
|
@@ -169,6 +155,8 @@ static void update_completion_period(void)
|
|
|
int shift = calc_period_shift();
|
|
|
prop_change_shift(&vm_completions, shift);
|
|
|
prop_change_shift(&vm_dirties, shift);
|
|
|
+
|
|
|
+ writeback_set_ratelimit();
|
|
|
}
|
|
|
|
|
|
int dirty_background_ratio_handler(struct ctl_table *table, int write,
|
|
@@ -978,6 +966,23 @@ static void bdi_update_bandwidth(struct backing_dev_info *bdi,
|
|
|
spin_unlock(&bdi->wb.list_lock);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr()
|
|
|
+ * will look to see if it needs to start dirty throttling.
|
|
|
+ *
|
|
|
+ * If dirty_poll_interval is too low, big NUMA machines will call the expensive
|
|
|
+ * global_page_state() too often. So scale it near-sqrt to the safety margin
|
|
|
+ * (the number of pages we may dirty without exceeding the dirty limits).
|
|
|
+ */
|
|
|
+static unsigned long dirty_poll_interval(unsigned long dirty,
|
|
|
+ unsigned long thresh)
|
|
|
+{
|
|
|
+ if (thresh > dirty)
|
|
|
+ return 1UL << (ilog2(thresh - dirty) >> 1);
|
|
|
+
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* balance_dirty_pages() must be called by processes which are generating dirty
|
|
|
* data. It looks at the number of dirty pages in the machine and will force
|
|
@@ -1112,6 +1117,9 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|
|
if (clear_dirty_exceeded && bdi->dirty_exceeded)
|
|
|
bdi->dirty_exceeded = 0;
|
|
|
|
|
|
+ current->nr_dirtied = 0;
|
|
|
+ current->nr_dirtied_pause = dirty_poll_interval(nr_dirty, dirty_thresh);
|
|
|
+
|
|
|
if (writeback_in_progress(bdi))
|
|
|
return;
|
|
|
|
|
@@ -1138,7 +1146,7 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
|
|
|
+static DEFINE_PER_CPU(int, bdp_ratelimits);
|
|
|
|
|
|
/**
|
|
|
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
|
|
@@ -1158,31 +1166,39 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
|
|
|
unsigned long nr_pages_dirtied)
|
|
|
{
|
|
|
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
|
|
- unsigned long ratelimit;
|
|
|
- unsigned long *p;
|
|
|
+ int ratelimit;
|
|
|
+ int *p;
|
|
|
|
|
|
if (!bdi_cap_account_dirty(bdi))
|
|
|
return;
|
|
|
|
|
|
- ratelimit = ratelimit_pages;
|
|
|
- if (mapping->backing_dev_info->dirty_exceeded)
|
|
|
- ratelimit = 8;
|
|
|
+ ratelimit = current->nr_dirtied_pause;
|
|
|
+ if (bdi->dirty_exceeded)
|
|
|
+ ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
|
|
|
+
|
|
|
+ current->nr_dirtied += nr_pages_dirtied;
|
|
|
|
|
|
+ preempt_disable();
|
|
|
/*
|
|
|
- * Check the rate limiting. Also, we do not want to throttle real-time
|
|
|
- * tasks in balance_dirty_pages(). Period.
|
|
|
+ * This prevents one CPU to accumulate too many dirtied pages without
|
|
|
+ * calling into balance_dirty_pages(), which can happen when there are
|
|
|
+ * 1000+ tasks, all of them start dirtying pages at exactly the same
|
|
|
+ * time, hence all honoured too large initial task->nr_dirtied_pause.
|
|
|
*/
|
|
|
- preempt_disable();
|
|
|
p = &__get_cpu_var(bdp_ratelimits);
|
|
|
- *p += nr_pages_dirtied;
|
|
|
- if (unlikely(*p >= ratelimit)) {
|
|
|
- ratelimit = sync_writeback_pages(*p);
|
|
|
+ if (unlikely(current->nr_dirtied >= ratelimit))
|
|
|
*p = 0;
|
|
|
- preempt_enable();
|
|
|
- balance_dirty_pages(mapping, ratelimit);
|
|
|
- return;
|
|
|
+ else {
|
|
|
+ *p += nr_pages_dirtied;
|
|
|
+ if (unlikely(*p >= ratelimit_pages)) {
|
|
|
+ *p = 0;
|
|
|
+ ratelimit = 0;
|
|
|
+ }
|
|
|
}
|
|
|
preempt_enable();
|
|
|
+
|
|
|
+ if (unlikely(current->nr_dirtied >= ratelimit))
|
|
|
+ balance_dirty_pages(mapping, current->nr_dirtied);
|
|
|
}
|
|
|
EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
|
|
|
|
|
@@ -1277,22 +1293,17 @@ void laptop_sync_completion(void)
|
|
|
*
|
|
|
* Here we set ratelimit_pages to a level which ensures that when all CPUs are
|
|
|
* dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
|
|
|
- * thresholds before writeback cuts in.
|
|
|
- *
|
|
|
- * But the limit should not be set too high. Because it also controls the
|
|
|
- * amount of memory which the balance_dirty_pages() caller has to write back.
|
|
|
- * If this is too large then the caller will block on the IO queue all the
|
|
|
- * time. So limit it to four megabytes - the balance_dirty_pages() caller
|
|
|
- * will write six megabyte chunks, max.
|
|
|
+ * thresholds.
|
|
|
*/
|
|
|
|
|
|
void writeback_set_ratelimit(void)
|
|
|
{
|
|
|
- ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
|
|
|
+ unsigned long background_thresh;
|
|
|
+ unsigned long dirty_thresh;
|
|
|
+ global_dirty_limits(&background_thresh, &dirty_thresh);
|
|
|
+ ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
|
|
|
if (ratelimit_pages < 16)
|
|
|
ratelimit_pages = 16;
|
|
|
- if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
|
|
|
- ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
|
|
|
}
|
|
|
|
|
|
static int __cpuinit
|