浏览代码

sched: Provide iowait counters

For counting how long an application has been waiting for
(disk) IO, there currently is only the HZ sample driven
information available, while for all other counters in this
class, a high resolution version is available via
CONFIG_SCHEDSTATS.

In order to make an improved bootchart tool possible, we also
need a higher resolution version of the iowait time.

This patch below adds this scheduler statistic to the kernel.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4A64B813.1080506@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Arjan van de Ven 16 年之前
父节点
当前提交
8f0dfc34e9
共有 4 个文件被更改,包括 17 次插入0 次删除
  1. 4 0
      include/linux/sched.h
  2. 4 0
      kernel/sched.c
  3. 4 0
      kernel/sched_debug.c
  4. 5 0
      kernel/sched_fair.c

+ 4 - 0
include/linux/sched.h

@@ -1111,6 +1111,8 @@ struct sched_entity {
 	u64			wait_max;
 	u64			wait_max;
 	u64			wait_count;
 	u64			wait_count;
 	u64			wait_sum;
 	u64			wait_sum;
+	u64			iowait_count;
+	u64			iowait_sum;
 
 
 	u64			sleep_start;
 	u64			sleep_start;
 	u64			sleep_max;
 	u64			sleep_max;
@@ -1231,6 +1233,8 @@ struct task_struct {
 	unsigned did_exec:1;
 	unsigned did_exec:1;
 	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
 	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
 				 * execve */
 				 * execve */
+	unsigned in_iowait:1;
+
 
 
 	/* Revert to default priority/policy when forking */
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_reset_on_fork:1;

+ 4 - 0
kernel/sched.c

@@ -6754,7 +6754,9 @@ void __sched io_schedule(void)
 
 
 	delayacct_blkio_start();
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
 	atomic_inc(&rq->nr_iowait);
+	current->in_iowait = 1;
 	schedule();
 	schedule();
+	current->in_iowait = 0;
 	atomic_dec(&rq->nr_iowait);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 	delayacct_blkio_end();
 }
 }
@@ -6767,7 +6769,9 @@ long __sched io_schedule_timeout(long timeout)
 
 
 	delayacct_blkio_start();
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
 	atomic_inc(&rq->nr_iowait);
+	current->in_iowait = 1;
 	ret = schedule_timeout(timeout);
 	ret = schedule_timeout(timeout);
+	current->in_iowait = 0;
 	atomic_dec(&rq->nr_iowait);
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 	delayacct_blkio_end();
 	return ret;
 	return ret;

+ 4 - 0
kernel/sched_debug.c

@@ -409,6 +409,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.wait_max);
 	PN(se.wait_max);
 	PN(se.wait_sum);
 	PN(se.wait_sum);
 	P(se.wait_count);
 	P(se.wait_count);
+	PN(se.iowait_sum);
+	P(se.iowait_count);
 	P(sched_info.bkl_count);
 	P(sched_info.bkl_count);
 	P(se.nr_migrations);
 	P(se.nr_migrations);
 	P(se.nr_migrations_cold);
 	P(se.nr_migrations_cold);
@@ -479,6 +481,8 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.wait_max				= 0;
 	p->se.wait_max				= 0;
 	p->se.wait_sum				= 0;
 	p->se.wait_sum				= 0;
 	p->se.wait_count			= 0;
 	p->se.wait_count			= 0;
+	p->se.iowait_sum			= 0;
+	p->se.iowait_count			= 0;
 	p->se.sleep_max				= 0;
 	p->se.sleep_max				= 0;
 	p->se.sum_sleep_runtime			= 0;
 	p->se.sum_sleep_runtime			= 0;
 	p->se.block_max				= 0;
 	p->se.block_max				= 0;

+ 5 - 0
kernel/sched_fair.c

@@ -652,6 +652,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		se->sum_sleep_runtime += delta;
 		se->sum_sleep_runtime += delta;
 
 
 		if (tsk) {
 		if (tsk) {
+			if (tsk->in_iowait) {
+				se->iowait_sum += delta;
+				se->iowait_count++;
+			}
+
 			/*
 			/*
 			 * Blocking time is in units of nanosecs, so shift by
 			 * Blocking time is in units of nanosecs, so shift by
 			 * 20 to get a milliseconds-range estimation of the
 			 * 20 to get a milliseconds-range estimation of the