14 years ago · 9d823e8f6b
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1525,6 +1525,13 @@ struct task_struct {
 
				 	int make_it_fail;
			
 
				 #endif
			
 
				 	struct prop_local_single dirties;
			
 
				+	/*
			
 
				+	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
			
 
				+	 * balance_dirty_pages() for some dirty throttling pause
			
 
				+	 */
			
 
				+	int nr_dirtied;
			
 
				+	int nr_dirtied_pause;
			
 
				+
			
 
				 #ifdef CONFIG_LATENCYTOP
			
 
				 	int latency_record_count;
			
 
				 	struct latency_record latency_record[LT_SAVECOUNT];
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,6 +1302,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
				 	p->pdeath_signal = 0;
			
 
				 	p->exit_state = 0;
			
 
				 
			
 
				+	p->nr_dirtied = 0;
			
 
				+	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
			
 
				+
			
 
				 	/*
			
 
				 	 * Ok, make it visible to the rest of the system.
			
 
				 	 * We dont wake it up yet.
			
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -54,20 +54,6 @@
 
				  */
			
 
				 static long ratelimit_pages = 32;
			
 
				 
			
 
				-/*
			
 
				- * When balance_dirty_pages decides that the caller needs to perform some
			
 
				- * non-background writeback, this is how many pages it will attempt to write.
			
 
				- * It should be somewhat larger than dirtied pages to ensure that reasonably
			
 
				- * large amounts of I/O are submitted.
			
 
				- */
			
 
				-static inline long sync_writeback_pages(unsigned long dirtied)
			
 
				-{
			
 
				-	if (dirtied < ratelimit_pages)
			
 
				-		dirtied = ratelimit_pages;
			
 
				-
			
 
				-	return dirtied + dirtied / 2;
			
 
				-}
			
 
				-
			
 
				 /* The following parameters are exported via /proc/sys/vm */
			
 
				 
			
 
				 /*
			
@@ -169,6 +155,8 @@ static void update_completion_period(void)
 
				 	int shift = calc_period_shift();
			
 
				 	prop_change_shift(&vm_completions, shift);
			
 
				 	prop_change_shift(&vm_dirties, shift);
			
 
				+
			
 
				+	writeback_set_ratelimit();
			
 
				 }
			
 
				 
			
 
				 int dirty_background_ratio_handler(struct ctl_table *table, int write,
			
@@ -978,6 +966,23 @@ static void bdi_update_bandwidth(struct backing_dev_info *bdi,
 
				 	spin_unlock(&bdi->wb.list_lock);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * After a task dirtied this many pages, balance_dirty_pages_ratelimited_nr()
			
 
				+ * will look to see if it needs to start dirty throttling.
			
 
				+ *
			
 
				+ * If dirty_poll_interval is too low, big NUMA machines will call the expensive
			
 
				+ * global_page_state() too often. So scale it near-sqrt to the safety margin
			
 
				+ * (the number of pages we may dirty without exceeding the dirty limits).
			
 
				+ */
			
 
				+static unsigned long dirty_poll_interval(unsigned long dirty,
			
 
				+					 unsigned long thresh)
			
 
				+{
			
 
				+	if (thresh > dirty)
			
 
				+		return 1UL << (ilog2(thresh - dirty) >> 1);
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * balance_dirty_pages() must be called by processes which are generating dirty
			
 
				  * data.  It looks at the number of dirty pages in the machine and will force
			
@@ -1112,6 +1117,9 @@ static void balance_dirty_pages(struct address_space *mapping,
 
				 	if (clear_dirty_exceeded && bdi->dirty_exceeded)
			
 
				 		bdi->dirty_exceeded = 0;
			
 
				 
			
 
				+	current->nr_dirtied = 0;
			
 
				+	current->nr_dirtied_pause = dirty_poll_interval(nr_dirty, dirty_thresh);
			
 
				+
			
 
				 	if (writeback_in_progress(bdi))
			
 
				 		return;
			
 
				 
			
@@ -1138,7 +1146,7 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;
			
 
				+static DEFINE_PER_CPU(int, bdp_ratelimits);
			
 
				 
			
 
				 /**
			
 
				  * balance_dirty_pages_ratelimited_nr - balance dirty memory state
			
@@ -1158,31 +1166,39 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 
				 					unsigned long nr_pages_dirtied)
			
 
				 {
			
 
				 	struct backing_dev_info *bdi = mapping->backing_dev_info;
			
 
				-	unsigned long ratelimit;
			
 
				-	unsigned long *p;
			
 
				+	int ratelimit;
			
 
				+	int *p;
			
 
				 
			
 
				 	if (!bdi_cap_account_dirty(bdi))
			
 
				 		return;
			
 
				 
			
 
				-	ratelimit = ratelimit_pages;
			
 
				-	if (mapping->backing_dev_info->dirty_exceeded)
			
 
				-		ratelimit = 8;
			
 
				+	ratelimit = current->nr_dirtied_pause;
			
 
				+	if (bdi->dirty_exceeded)
			
 
				+		ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
			
 
				+
			
 
				+	current->nr_dirtied += nr_pages_dirtied;
			
 
				 
			
 
				+	preempt_disable();
			
 
				 	/*
			
 
				-	 * Check the rate limiting. Also, we do not want to throttle real-time
			
 
				-	 * tasks in balance_dirty_pages(). Period.
			
 
				+	 * This prevents one CPU to accumulate too many dirtied pages without
			
 
				+	 * calling into balance_dirty_pages(), which can happen when there are
			
 
				+	 * 1000+ tasks, all of them start dirtying pages at exactly the same
			
 
				+	 * time, hence all honoured too large initial task->nr_dirtied_pause.
			
 
				 	 */
			
 
				-	preempt_disable();
			
 
				 	p =  &__get_cpu_var(bdp_ratelimits);
			
 
				-	*p += nr_pages_dirtied;
			
 
				-	if (unlikely(*p >= ratelimit)) {
			
 
				-		ratelimit = sync_writeback_pages(*p);
			
 
				+	if (unlikely(current->nr_dirtied >= ratelimit))
			
 
				 		*p = 0;
			
 
				-		preempt_enable();
			
 
				-		balance_dirty_pages(mapping, ratelimit);
			
 
				-		return;
			
 
				+	else {
			
 
				+		*p += nr_pages_dirtied;
			
 
				+		if (unlikely(*p >= ratelimit_pages)) {
			
 
				+			*p = 0;
			
 
				+			ratelimit = 0;
			
 
				+		}
			
 
				 	}
			
 
				 	preempt_enable();
			
 
				+
			
 
				+	if (unlikely(current->nr_dirtied >= ratelimit))
			
 
				+		balance_dirty_pages(mapping, current->nr_dirtied);
			
 
				 }
			
 
				 EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
			
 
				 
			
@@ -1277,22 +1293,17 @@ void laptop_sync_completion(void)
 
				  *
			
 
				  * Here we set ratelimit_pages to a level which ensures that when all CPUs are
			
 
				  * dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
			
 
				- * thresholds before writeback cuts in.
			
 
				- *
			
 
				- * But the limit should not be set too high.  Because it also controls the
			
 
				- * amount of memory which the balance_dirty_pages() caller has to write back.
			
 
				- * If this is too large then the caller will block on the IO queue all the
			
 
				- * time.  So limit it to four megabytes - the balance_dirty_pages() caller
			
 
				- * will write six megabyte chunks, max.
			
 
				+ * thresholds.
			
 
				  */
			
 
				 
			
 
				 void writeback_set_ratelimit(void)
			
 
				 {
			
 
				-	ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
			
 
				+	unsigned long background_thresh;
			
 
				+	unsigned long dirty_thresh;
			
 
				+	global_dirty_limits(&background_thresh, &dirty_thresh);
			
 
				+	ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
			
 
				 	if (ratelimit_pages < 16)
			
 
				 		ratelimit_pages = 16;
			
 
				-	if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
			
 
				-		ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
			
 
				 }
			
 
				 
			
 
				 static int __cpuinit