13 years ago · 612ef28a04
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
 
				 	j = 0;
			
 
				 	for_each_online_cpu(i) {
			
 
				 		os_data->os_cpu[j].per_cpu_user =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
			
 
				 		os_data->os_cpu[j].per_cpu_nice =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
			
 
				 		os_data->os_cpu[j].per_cpu_system =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
			
 
				 		os_data->os_cpu[j].per_cpu_idle =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
			
 
				 		os_data->os_cpu[j].per_cpu_irq =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
			
 
				 		os_data->os_cpu[j].per_cpu_softirq =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
			
 
				 		os_data->os_cpu[j].per_cpu_iowait =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
			
 
				 		os_data->os_cpu[j].per_cpu_steal =
			
 
				-			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
			
 
				+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
			
 
				 		os_data->os_cpu[j].cpu_id = i;
			
 
				 		j++;
			
 
				 	}
			
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 
				 #ifdef CONFIG_SMP
			
 
				 #define safe_address (__per_cpu_offset[0])
			
 
				 #else
			
 
				-#define safe_address (kstat_cpu(0).cpustat.user)
			
 
				+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
			
 
				 #endif
			
 
				 
			
 
				 /*
			
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -95,26 +95,26 @@ static struct dbs_tuners {
 
				 	.freq_step = 5,
			
 
				 };
			
 
				 
			
 
				-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
			
 
				-							cputime64_t *wall)
			
 
				+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
			
 
				 {
			
 
				-	cputime64_t idle_time;
			
 
				-	cputime64_t cur_wall_time;
			
 
				-	cputime64_t busy_time;
			
 
				+	u64 idle_time;
			
 
				+	u64 cur_wall_time;
			
 
				+	u64 busy_time;
			
 
				 
			
 
				 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
			
 
				-	busy_time  = kstat_cpu(cpu).cpustat.user;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.system;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.irq;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.softirq;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.steal;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.nice;
			
 
				+
			
 
				+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
			
 
				 
			
 
				 	idle_time = cur_wall_time - busy_time;
			
 
				 	if (wall)
			
 
				-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
			
 
				+		*wall = jiffies_to_usecs(cur_wall_time);
			
 
				 
			
 
				-	return (cputime64_t)jiffies_to_usecs(idle_time);
			
 
				+	return jiffies_to_usecs(idle_time);
			
 
				 }
			
 
				 
			
 
				 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
			
@@ -271,7 +271,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 
				 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
			
 
				 						&dbs_info->prev_cpu_wall);
			
 
				 		if (dbs_tuners_ins.ignore_nice)
			
 
				-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			
 
				+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 	}
			
 
				 	return count;
			
 
				 }
			
@@ -361,11 +361,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
				 		j_dbs_info->prev_cpu_idle = cur_idle_time;
			
 
				 
			
 
				 		if (dbs_tuners_ins.ignore_nice) {
			
 
				-			cputime64_t cur_nice;
			
 
				+			u64 cur_nice;
			
 
				 			unsigned long cur_nice_jiffies;
			
 
				 
			
 
				-			cur_nice = kstat_cpu(j).cpustat.nice -
			
 
				-					j_dbs_info->prev_cpu_nice;
			
 
				+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
			
 
				+					 j_dbs_info->prev_cpu_nice;
			
 
				 			/*
			
 
				 			 * Assumption: nice time between sampling periods will
			
 
				 			 * be less than 2^32 jiffies for 32 bit sys
			
@@ -373,7 +373,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
				 			cur_nice_jiffies = (unsigned long)
			
 
				 					cputime64_to_jiffies64(cur_nice);
			
 
				 
			
 
				-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			
 
				+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
			
 
				 		}
			
 
				 
			
@@ -500,10 +500,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
				 
			
 
				 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
			
 
				 						&j_dbs_info->prev_cpu_wall);
			
 
				-			if (dbs_tuners_ins.ignore_nice) {
			
 
				+			if (dbs_tuners_ins.ignore_nice)
			
 
				 				j_dbs_info->prev_cpu_nice =
			
 
				-						kstat_cpu(j).cpustat.nice;
			
 
				-			}
			
 
				+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 		}
			
 
				 		this_dbs_info->down_skip = 0;
			
 
				 		this_dbs_info->requested_freq = policy->cur;
			
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -119,26 +119,26 @@ static struct dbs_tuners {
 
				 	.powersave_bias = 0,
			
 
				 };
			
 
				 
			
 
				-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
			
 
				-							cputime64_t *wall)
			
 
				+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
			
 
				 {
			
 
				-	cputime64_t idle_time;
			
 
				-	cputime64_t cur_wall_time;
			
 
				-	cputime64_t busy_time;
			
 
				+	u64 idle_time;
			
 
				+	u64 cur_wall_time;
			
 
				+	u64 busy_time;
			
 
				 
			
 
				 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
			
 
				-	busy_time  = kstat_cpu(cpu).cpustat.user;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.system;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.irq;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.softirq;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.steal;
			
 
				-	busy_time += kstat_cpu(cpu).cpustat.nice;
			
 
				+
			
 
				+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
			
 
				+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
			
 
				 
			
 
				 	idle_time = cur_wall_time - busy_time;
			
 
				 	if (wall)
			
 
				-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
			
 
				+		*wall = jiffies_to_usecs(cur_wall_time);
			
 
				 
			
 
				-	return (cputime64_t)jiffies_to_usecs(idle_time);
			
 
				+	return jiffies_to_usecs(idle_time);
			
 
				 }
			
 
				 
			
 
				 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
			
@@ -344,7 +344,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 
				 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
			
 
				 						&dbs_info->prev_cpu_wall);
			
 
				 		if (dbs_tuners_ins.ignore_nice)
			
 
				-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			
 
				+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 
			
 
				 	}
			
 
				 	return count;
			
@@ -454,11 +454,11 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
				 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
			
 
				 
			
 
				 		if (dbs_tuners_ins.ignore_nice) {
			
 
				-			cputime64_t cur_nice;
			
 
				+			u64 cur_nice;
			
 
				 			unsigned long cur_nice_jiffies;
			
 
				 
			
 
				-			cur_nice = kstat_cpu(j).cpustat.nice -
			
 
				-					j_dbs_info->prev_cpu_nice;
			
 
				+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
			
 
				+					 j_dbs_info->prev_cpu_nice;
			
 
				 			/*
			
 
				 			 * Assumption: nice time between sampling periods will
			
 
				 			 * be less than 2^32 jiffies for 32 bit sys
			
@@ -466,7 +466,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
				 			cur_nice_jiffies = (unsigned long)
			
 
				 					cputime64_to_jiffies64(cur_nice);
			
 
				 
			
 
				-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
			
 
				+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
			
 
				 		}
			
 
				 
			
@@ -645,10 +645,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
				 
			
 
				 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
			
 
				 						&j_dbs_info->prev_cpu_wall);
			
 
				-			if (dbs_tuners_ins.ignore_nice) {
			
 
				+			if (dbs_tuners_ins.ignore_nice)
			
 
				 				j_dbs_info->prev_cpu_nice =
			
 
				-						kstat_cpu(j).cpustat.nice;
			
 
				-			}
			
 
				+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
			
 
				 		}
			
 
				 		this_dbs_info->cpu = cpu;
			
 
				 		this_dbs_info->rate_mult = 1;
			
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -81,12 +81,13 @@ static int rackmeter_ignore_nice;
 
				  */
			
 
				 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
			
 
				 {
			
 
				-	cputime64_t retval;
			
 
				+	u64 retval;
			
 
				 
			
 
				-	retval = kstat_cpu(cpu).cpustat.idle + kstat_cpu(cpu).cpustat.iowait;
			
 
				+	retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
			
 
				+		 kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
			
 
				 
			
 
				 	if (rackmeter_ignore_nice)
			
 
				-		retval += kstat_cpu(cpu).cpustat.nice;
			
 
				+		retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
			
 
				 
			
 
				 	return retval;
			
 
				 }
			
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,14 +22,13 @@
 
				 #define arch_idle_time(cpu) 0
			
 
				 #endif
			
 
				 
			
 
				-static cputime64_t get_idle_time(int cpu)
			
 
				+static u64 get_idle_time(int cpu)
			
 
				 {
			
 
				-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
			
 
				-	cputime64_t idle;
			
 
				+	u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
			
 
				 
			
 
				 	if (idle_time == -1ULL) {
			
 
				 		/* !NO_HZ so we can rely on cpustat.idle */
			
 
				-		idle = kstat_cpu(cpu).cpustat.idle;
			
 
				+		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
			
 
				 		idle += arch_idle_time(cpu);
			
 
				 	} else
			
 
				 		idle = nsecs_to_jiffies64(1000 * idle_time);
			
@@ -37,14 +36,13 @@ static cputime64_t get_idle_time(int cpu)
 
				 	return idle;
			
 
				 }
			
 
				 
			
 
				-static cputime64_t get_iowait_time(int cpu)
			
 
				+static u64 get_iowait_time(int cpu)
			
 
				 {
			
 
				-	u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
			
 
				-	cputime64_t iowait;
			
 
				+	u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
			
 
				 
			
 
				 	if (iowait_time == -1ULL)
			
 
				 		/* !NO_HZ so we can rely on cpustat.iowait */
			
 
				-		iowait = kstat_cpu(cpu).cpustat.iowait;
			
 
				+		iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
			
 
				 	else
			
 
				 		iowait = nsecs_to_jiffies64(1000 * iowait_time);
			
 
				 
			
@@ -55,8 +53,8 @@ static int show_stat(struct seq_file *p, void *v)
 
				 {
			
 
				 	int i, j;
			
 
				 	unsigned long jif;
			
 
				-	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
			
 
				-	cputime64_t guest, guest_nice;
			
 
				+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
			
 
				+	u64 guest, guest_nice;
			
 
				 	u64 sum = 0;
			
 
				 	u64 sum_softirq = 0;
			
 
				 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
			
@@ -69,18 +67,16 @@ static int show_stat(struct seq_file *p, void *v)
 
				 	jif = boottime.tv_sec;
			
 
				 
			
 
				 	for_each_possible_cpu(i) {
			
 
				-		user += kstat_cpu(i).cpustat.user;
			
 
				-		nice += kstat_cpu(i).cpustat.nice;
			
 
				-		system += kstat_cpu(i).cpustat.system;
			
 
				+		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
			
 
				+		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
			
 
				+		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
			
 
				 		idle += get_idle_time(i);
			
 
				 		iowait += get_iowait_time(i);
			
 
				-		irq += kstat_cpu(i).cpustat.irq;
			
 
				-		softirq += kstat_cpu(i).cpustat.softirq;
			
 
				-		steal += kstat_cpu(i).cpustat.steal;
			
 
				-		guest += kstat_cpu(i).cpustat.guest;
			
 
				-		guest_nice += kstat_cpu(i).cpustat.guest_nice;
			
 
				-		sum += kstat_cpu_irqs_sum(i);
			
 
				-		sum += arch_irq_stat_cpu(i);
			
 
				+		irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
			
 
				+		softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
			
 
				+		steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
			
 
				+		guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
			
 
				+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
			
 
				 
			
 
				 		for (j = 0; j < NR_SOFTIRQS; j++) {
			
 
				 			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
			
@@ -105,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
 
				 		(unsigned long long)cputime64_to_clock_t(guest_nice));
			
 
				 	for_each_online_cpu(i) {
			
 
				 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
			
 
				-		user = kstat_cpu(i).cpustat.user;
			
 
				-		nice = kstat_cpu(i).cpustat.nice;
			
 
				-		system = kstat_cpu(i).cpustat.system;
			
 
				+		user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
			
 
				+		nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
			
 
				+		system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
			
 
				 		idle = get_idle_time(i);
			
 
				 		iowait = get_iowait_time(i);
			
 
				-		irq = kstat_cpu(i).cpustat.irq;
			
 
				-		softirq = kstat_cpu(i).cpustat.softirq;
			
 
				-		steal = kstat_cpu(i).cpustat.steal;
			
 
				-		guest = kstat_cpu(i).cpustat.guest;
			
 
				-		guest_nice = kstat_cpu(i).cpustat.guest_nice;
			
 
				+		irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
			
 
				+		softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
			
 
				+		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
			
 
				+		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
			
 
				+		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
			
 
				 		seq_printf(p,
			
 
				 			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
			
 
				 			"%llu\n",
			
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,14 +11,14 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 
				 {
			
 
				 	struct timespec uptime;
			
 
				 	struct timespec idle;
			
 
				-	cputime64_t idletime;
			
 
				+	u64 idletime;
			
 
				 	u64 nsec;
			
 
				 	u32 rem;
			
 
				 	int i;
			
 
				 
			
 
				 	idletime = 0;
			
 
				 	for_each_possible_cpu(i)
			
 
				-		idletime += kstat_cpu(i).cpustat.idle;
			
 
				+		idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
			
 
				 
			
 
				 	do_posix_clock_monotonic_gettime(&uptime);
			
 
				 	monotonic_to_bootbased(&uptime);
			
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -6,6 +6,7 @@
 
				 #include <linux/percpu.h>
			
 
				 #include <linux/cpumask.h>
			
 
				 #include <linux/interrupt.h>
			
 
				+#include <linux/sched.h>
			
 
				 #include <asm/irq.h>
			
 
				 #include <asm/cputime.h>
			
 
				 
			
@@ -15,21 +16,25 @@
 
				  * used by rstatd/perfmeter
			
 
				  */
			
 
				 
			
 
				-struct cpu_usage_stat {
			
 
				-	cputime64_t user;
			
 
				-	cputime64_t nice;
			
 
				-	cputime64_t system;
			
 
				-	cputime64_t softirq;
			
 
				-	cputime64_t irq;
			
 
				-	cputime64_t idle;
			
 
				-	cputime64_t iowait;
			
 
				-	cputime64_t steal;
			
 
				-	cputime64_t guest;
			
 
				-	cputime64_t guest_nice;
			
 
				+enum cpu_usage_stat {
			
 
				+	CPUTIME_USER,
			
 
				+	CPUTIME_NICE,
			
 
				+	CPUTIME_SYSTEM,
			
 
				+	CPUTIME_SOFTIRQ,
			
 
				+	CPUTIME_IRQ,
			
 
				+	CPUTIME_IDLE,
			
 
				+	CPUTIME_IOWAIT,
			
 
				+	CPUTIME_STEAL,
			
 
				+	CPUTIME_GUEST,
			
 
				+	CPUTIME_GUEST_NICE,
			
 
				+	NR_STATS,
			
 
				+};
			
 
				+
			
 
				+struct kernel_cpustat {
			
 
				+	u64 cpustat[NR_STATS];
			
 
				 };
			
 
				 
			
 
				 struct kernel_stat {
			
 
				-	struct cpu_usage_stat	cpustat;
			
 
				 #ifndef CONFIG_GENERIC_HARDIRQS
			
 
				        unsigned int irqs[NR_IRQS];
			
 
				 #endif
			
@@ -38,10 +43,13 @@ struct kernel_stat {
 
				 };
			
 
				 
			
 
				 DECLARE_PER_CPU(struct kernel_stat, kstat);
			
 
				+DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
			
 
				 
			
 
				-#define kstat_cpu(cpu)	per_cpu(kstat, cpu)
			
 
				 /* Must have preemption disabled for this to be meaningful. */
			
 
				-#define kstat_this_cpu	__get_cpu_var(kstat)
			
 
				+#define kstat_this_cpu (&__get_cpu_var(kstat))
			
 
				+#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
			
 
				+#define kstat_cpu(cpu) per_cpu(kstat, cpu)
			
 
				+#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
			
 
				 
			
 
				 extern unsigned long long nr_context_switches(void);
			
 
				 
			
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -10,6 +10,8 @@
 
				 #define _INCLUDE_GUARD_LATENCYTOP_H_
			
 
				 
			
 
				 #include <linux/compiler.h>
			
 
				+struct task_struct;
			
 
				+
			
 
				 #ifdef CONFIG_LATENCYTOP
			
 
				 
			
 
				 #define LT_SAVECOUNT		32
			
@@ -23,7 +25,6 @@ struct latency_record {
 
				 };
			
 
				 
			
 
				 
			
 
				-struct task_struct;
			
 
				 
			
 
				 extern int latencytop_enabled;
			
 
				 void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
 
				 
			
 
				 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
			
 
				 extern void select_nohz_load_balancer(int stop_tick);
			
 
				+extern void set_cpu_sd_state_idle(void);
			
 
				 extern int get_nohz_timer_target(void);
			
 
				 #else
			
 
				 static inline void select_nohz_load_balancer(int stop_tick) { }
			
 
				+static inline void set_cpu_sd_state_idle(void) { }
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -901,6 +903,10 @@ struct sched_group_power {
 
				 	 * single CPU.
			
 
				 	 */
			
 
				 	unsigned int power, power_orig;
			
 
				+	/*
			
 
				+	 * Number of busy cpus in this group.
			
 
				+	 */
			
 
				+	atomic_t nr_busy_cpus;
			
 
				 };
			
 
				 
			
 
				 struct sched_group {
			
@@ -925,6 +931,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 
				 	return to_cpumask(sg->cpumask);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
			
 
				+ * @group: The group whose first cpu is to be returned.
			
 
				+ */
			
 
				+static inline unsigned int group_first_cpu(struct sched_group *group)
			
 
				+{
			
 
				+	return cpumask_first(sched_group_cpus(group));
			
 
				+}
			
 
				+
			
 
				 struct sched_domain_attr {
			
 
				 	int relax_domain_level;
			
 
				 };
			
@@ -1315,8 +1330,8 @@ struct task_struct {
 
				 	 * older sibling, respectively.  (p->father can be replaced with 
			
 
				 	 * p->real_parent->pid)
			
 
				 	 */
			
 
				-	struct task_struct *real_parent; /* real parent process */
			
 
				-	struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
			
 
				+	struct task_struct __rcu *real_parent; /* real parent process */
			
 
				+	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
			
 
				 	/*
			
 
				 	 * children/sibling forms the list of my natural children
			
 
				 	 */
			
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
 
				 	     TP_PROTO(struct task_struct *tsk, u64 delay),
			
 
				 	     TP_ARGS(tsk, delay));
			
 
				 
			
 
				+/*
			
 
				+ * Tracepoint for accounting blocked time (time the task is in uninterruptible).
			
 
				+ */
			
 
				+DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
			
 
				+	     TP_PROTO(struct task_struct *tsk, u64 delay),
			
 
				+	     TP_ARGS(tsk, delay));
			
 
				+
			
 
				 /*
			
 
				  * Tracepoint for accounting runtime (time the task is executing
			
 
				  * on a CPU).
			
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,16 +2,15 @@
 
				 # Makefile for the linux kernel.
			
 
				 #
			
 
				 
			
 
				-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
			
 
				+obj-y     = fork.o exec_domain.o panic.o printk.o \
			
 
				 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
			
 
				 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
			
 
				 	    signal.o sys.o kmod.o workqueue.o pid.o \
			
 
				 	    rcupdate.o extable.o params.o posix-timers.o \
			
 
				 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
			
 
				 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
			
 
				-	    notifier.o ksysfs.o sched_clock.o cred.o \
			
 
				-	    async.o range.o
			
 
				-obj-y += groups.o
			
 
				+	    notifier.o ksysfs.o cred.o \
			
 
				+	    async.o range.o groups.o
			
 
				 
			
 
				 ifdef CONFIG_FUNCTION_TRACER
			
 
				 # Do not trace debug files and internal ftrace files
			
@@ -20,10 +19,11 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg
 
				 CFLAGS_REMOVE_mutex-debug.o = -pg
			
 
				 CFLAGS_REMOVE_rtmutex-debug.o = -pg
			
 
				 CFLAGS_REMOVE_cgroup-debug.o = -pg
			
 
				-CFLAGS_REMOVE_sched_clock.o = -pg
			
 
				 CFLAGS_REMOVE_irq_work.o = -pg
			
 
				 endif
			
 
				 
			
 
				+obj-y += sched/
			
 
				+
			
 
				 obj-$(CONFIG_FREEZER) += freezer.o
			
 
				 obj-$(CONFIG_PROFILING) += profile.o
			
 
				 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
			
@@ -99,7 +99,6 @@ obj-$(CONFIG_TRACING) += trace/
 
				 obj-$(CONFIG_X86_DS) += trace/
			
 
				 obj-$(CONFIG_RING_BUFFER) += trace/
			
 
				 obj-$(CONFIG_TRACEPOINTS) += trace/
			
 
				-obj-$(CONFIG_SMP) += sched_cpupri.o
			
 
				 obj-$(CONFIG_IRQ_WORK) += irq_work.o
			
 
				 obj-$(CONFIG_CPU_PM) += cpu_pm.o
			
 
				 
			
@@ -110,15 +109,6 @@ obj-$(CONFIG_PADATA) += padata.o
 
				 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
			
 
				 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
			
 
				 
			
 
				-ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
			
 
				-# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
			
 
				-# needed for x86 only.  Why this used to be enabled for all architectures is beyond
			
 
				-# me.  I suspect most platforms don't need this, but until we know that for sure
			
 
				-# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
			
 
				-# to get a correct value for the wait-channel (WCHAN in ps). --davidm
			
 
				-CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
			
 
				-endif
			
 
				-
			
 
				 $(obj)/configs.o: $(obj)/config_data.h
			
 
				 
			
 
				 # config_data.h contains the same information as ikconfig.h but gzipped.
			
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -0,0 +1,20 @@
 
				+ifdef CONFIG_FUNCTION_TRACER
			
 
				+CFLAGS_REMOVE_clock.o = -pg
			
 
				+endif
			
 
				+
			
 
				+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
			
 
				+# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
			
 
				+# needed for x86 only.  Why this used to be enabled for all architectures is beyond
			
 
				+# me.  I suspect most platforms don't need this, but until we know that for sure
			
 
				+# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
			
 
				+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
			
 
				+CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
			
 
				+endif
			
 
				+
			
 
				+obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
			
 
				+obj-$(CONFIG_SMP) += cpupri.o
			
 
				+obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
			
 
				+obj-$(CONFIG_SCHEDSTATS) += stats.o
			
 
				+obj-$(CONFIG_SCHED_DEBUG) += debug.o
			
 
				+
			
 
				+
			
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -1,15 +1,19 @@
 
				 #ifdef CONFIG_SCHED_AUTOGROUP
			
 
				 
			
 
				+#include "sched.h"
			
 
				+
			
 
				 #include <linux/proc_fs.h>
			
 
				 #include <linux/seq_file.h>
			
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/utsname.h>
			
 
				+#include <linux/security.h>
			
 
				+#include <linux/export.h>
			
 
				 
			
 
				 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
			
 
				 static struct autogroup autogroup_default;
			
 
				 static atomic_t autogroup_seq_nr;
			
 
				 
			
 
				-static void __init autogroup_init(struct task_struct *init_task)
			
 
				+void __init autogroup_init(struct task_struct *init_task)
			
 
				 {
			
 
				 	autogroup_default.tg = &root_task_group;
			
 
				 	kref_init(&autogroup_default.kref);
			
@@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task)
 
				 	init_task->signal->autogroup = &autogroup_default;
			
 
				 }
			
 
				 
			
 
				-static inline void autogroup_free(struct task_group *tg)
			
 
				+void autogroup_free(struct task_group *tg)
			
 
				 {
			
 
				 	kfree(tg->autogroup);
			
 
				 }
			
@@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
 
				 	return ag;
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_RT_GROUP_SCHED
			
 
				-static void free_rt_sched_group(struct task_group *tg);
			
 
				-#endif
			
 
				-
			
 
				 static inline struct autogroup *autogroup_create(void)
			
 
				 {
			
 
				 	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
			
@@ -108,8 +108,7 @@ out_fail:
 
				 	return autogroup_kref_get(&autogroup_default);
			
 
				 }
			
 
				 
			
 
				-static inline bool
			
 
				-task_wants_autogroup(struct task_struct *p, struct task_group *tg)
			
 
				+bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
			
 
				 {
			
 
				 	if (tg != &root_task_group)
			
 
				 		return false;
			
@@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static inline bool task_group_is_autogroup(struct task_group *tg)
			
 
				-{
			
 
				-	return !!tg->autogroup;
			
 
				-}
			
 
				-
			
 
				-static inline struct task_group *
			
 
				-autogroup_task_group(struct task_struct *p, struct task_group *tg)
			
 
				-{
			
 
				-	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
			
 
				-
			
 
				-	if (enabled && task_wants_autogroup(p, tg))
			
 
				-		return p->signal->autogroup->tg;
			
 
				-
			
 
				-	return tg;
			
 
				-}
			
 
				-
			
 
				 static void
			
 
				 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
			
 
				 {
			
@@ -263,7 +246,7 @@ out:
 
				 #endif /* CONFIG_PROC_FS */
			
 
				 
			
 
				 #ifdef CONFIG_SCHED_DEBUG
			
 
				-static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
			
 
				+int autogroup_path(struct task_group *tg, char *buf, int buflen)
			
 
				 {
			
 
				 	if (!task_group_is_autogroup(tg))
			
 
				 		return 0;
			
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -1,5 +1,8 @@
 
				 #ifdef CONFIG_SCHED_AUTOGROUP
			
 
				 
			
 
				+#include <linux/kref.h>
			
 
				+#include <linux/rwsem.h>
			
 
				+
			
 
				 struct autogroup {
			
 
				 	/*
			
 
				 	 * reference doesn't mean how many thread attach to this
			
@@ -13,9 +16,28 @@ struct autogroup {
 
				 	int			nice;
			
 
				 };
			
 
				 
			
 
				-static inline bool task_group_is_autogroup(struct task_group *tg);
			
 
				+extern void autogroup_init(struct task_struct *init_task);
			
 
				+extern void autogroup_free(struct task_group *tg);
			
 
				+
			
 
				+static inline bool task_group_is_autogroup(struct task_group *tg)
			
 
				+{
			
 
				+	return !!tg->autogroup;
			
 
				+}
			
 
				+
			
 
				+extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
			
 
				+
			
 
				 static inline struct task_group *
			
 
				-autogroup_task_group(struct task_struct *p, struct task_group *tg);
			
 
				+autogroup_task_group(struct task_struct *p, struct task_group *tg)
			
 
				+{
			
 
				+	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
			
 
				+
			
 
				+	if (enabled && task_wants_autogroup(p, tg))
			
 
				+		return p->signal->autogroup->tg;
			
 
				+
			
 
				+	return tg;
			
 
				+}
			
 
				+
			
 
				+extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
			
 
				 
			
 
				 #else /* !CONFIG_SCHED_AUTOGROUP */
			
 
				 
			
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- *  kernel/sched_cpupri.c
			
 
				+ *  kernel/sched/cpupri.c
			
 
				  *
			
 
				  *  CPU priority management
			
 
				  *
			
@@ -28,7 +28,7 @@
 
				  */
			
 
				 
			
 
				 #include <linux/gfp.h>
			
 
				-#include "sched_cpupri.h"
			
 
				+#include "cpupri.h"
			
 
				 
			
 
				 /* Convert between a 140 based task->prio, and our 102 based cpupri */
			
 
				 static int convert_prio(int prio)
			
--- a/kernel/sched/cpupri.h
+++ b/kernel/sched/cpupri.h
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * kernel/time/sched_debug.c
			
 
				+ * kernel/sched/debug.c
			
 
				  *
			
 
				  * Print the CFS rbtree
			
 
				  *
			
@@ -16,6 +16,8 @@
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/utsname.h>
			
 
				 
			
 
				+#include "sched.h"
			
 
				+
			
 
				 static DEFINE_SPINLOCK(sched_debug_lock);
			
 
				 
			
 
				 /*
			
@@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static void sysrq_sched_debug_show(void)
			
 
				+void sysrq_sched_debug_show(void)
			
 
				 {
			
 
				 	sched_debug_show(NULL, NULL);
			
 
				 }
			
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -3,13 +3,13 @@
 
				  * them to run sooner, but does not allow tons of sleepers to
			
 
				  * rip the spread apart.
			
 
				  */
			
 
				-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
			
 
				+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
			
 
				 
			
 
				 /*
			
 
				  * Place new tasks ahead so that they do not starve already running
			
 
				  * tasks
			
 
				  */
			
 
				-SCHED_FEAT(START_DEBIT, 1)
			
 
				+SCHED_FEAT(START_DEBIT, true)
			
 
				 
			
 
				 /*
			
 
				  * Based on load and program behaviour, see if it makes sense to place
			
@@ -17,54 +17,54 @@ SCHED_FEAT(START_DEBIT, 1)
 
				  * improve cache locality. Typically used with SYNC wakeups as
			
 
				  * generated by pipes and the like, see also SYNC_WAKEUPS.
			
 
				  */
			
 
				-SCHED_FEAT(AFFINE_WAKEUPS, 1)
			
 
				+SCHED_FEAT(AFFINE_WAKEUPS, true)
			
 
				 
			
 
				 /*
			
 
				  * Prefer to schedule the task we woke last (assuming it failed
			
 
				  * wakeup-preemption), since its likely going to consume data we
			
 
				  * touched, increases cache locality.
			
 
				  */
			
 
				-SCHED_FEAT(NEXT_BUDDY, 0)
			
 
				+SCHED_FEAT(NEXT_BUDDY, false)
			
 
				 
			
 
				 /*
			
 
				  * Prefer to schedule the task that ran last (when we did
			
 
				  * wake-preempt) as that likely will touch the same data, increases
			
 
				  * cache locality.
			
 
				  */
			
 
				-SCHED_FEAT(LAST_BUDDY, 1)
			
 
				+SCHED_FEAT(LAST_BUDDY, true)
			
 
				 
			
 
				 /*
			
 
				  * Consider buddies to be cache hot, decreases the likelyness of a
			
 
				  * cache buddy being migrated away, increases cache locality.
			
 
				  */
			
 
				-SCHED_FEAT(CACHE_HOT_BUDDY, 1)
			
 
				+SCHED_FEAT(CACHE_HOT_BUDDY, true)
			
 
				 
			
 
				 /*
			
 
				  * Use arch dependent cpu power functions
			
 
				  */
			
 
				-SCHED_FEAT(ARCH_POWER, 0)
			
 
				+SCHED_FEAT(ARCH_POWER, false)
			
 
				 
			
 
				-SCHED_FEAT(HRTICK, 0)
			
 
				-SCHED_FEAT(DOUBLE_TICK, 0)
			
 
				-SCHED_FEAT(LB_BIAS, 1)
			
 
				+SCHED_FEAT(HRTICK, false)
			
 
				+SCHED_FEAT(DOUBLE_TICK, false)
			
 
				+SCHED_FEAT(LB_BIAS, true)
			
 
				 
			
 
				 /*
			
 
				  * Spin-wait on mutex acquisition when the mutex owner is running on
			
 
				  * another cpu -- assumes that when the owner is running, it will soon
			
 
				  * release the lock. Decreases scheduling overhead.
			
 
				  */
			
 
				-SCHED_FEAT(OWNER_SPIN, 1)
			
 
				+SCHED_FEAT(OWNER_SPIN, true)
			
 
				 
			
 
				 /*
			
 
				  * Decrement CPU power based on time not spent running tasks
			
 
				  */
			
 
				-SCHED_FEAT(NONTASK_POWER, 1)
			
 
				+SCHED_FEAT(NONTASK_POWER, true)
			
 
				 
			
 
				 /*
			
 
				  * Queue remote wakeups on the target CPU and process them
			
 
				  * using the scheduler IPI. Reduces rq->lock contention/bounces.
			
 
				  */
			
 
				-SCHED_FEAT(TTWU_QUEUE, 1)
			
 
				+SCHED_FEAT(TTWU_QUEUE, true)
			
 
				 
			
 
				-SCHED_FEAT(FORCE_SD_OVERLAP, 0)
			
 
				-SCHED_FEAT(RT_RUNTIME_SHARE, 1)
			
 
				+SCHED_FEAT(FORCE_SD_OVERLAP, false)
			
 
				+SCHED_FEAT(RT_RUNTIME_SHARE, true)
			
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -1,3 +1,5 @@
 
				+#include "sched.h"
			
 
				+
			
 
				 /*
			
 
				  * idle-task scheduling class.
			
 
				  *
			
@@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
 
				 /*
			
 
				  * Simple, special scheduling class for the per-CPU idle tasks:
			
 
				  */
			
 
				-static const struct sched_class idle_sched_class = {
			
 
				+const struct sched_class idle_sched_class = {
			
 
				 	/* .next is NULL */
			
 
				 	/* no enqueue/yield_task for idle tasks */
			
 
				 
			
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3,7 +3,92 @@
 
				  * policies)
			
 
				  */
			
 
				 
			
 
				+#include "sched.h"
			
 
				+
			
 
				+#include <linux/slab.h>
			
 
				+
			
 
				+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
			
 
				+
			
 
				+struct rt_bandwidth def_rt_bandwidth;
			
 
				+
			
 
				+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
			
 
				+{
			
 
				+	struct rt_bandwidth *rt_b =
			
 
				+		container_of(timer, struct rt_bandwidth, rt_period_timer);
			
 
				+	ktime_t now;
			
 
				+	int overrun;
			
 
				+	int idle = 0;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		now = hrtimer_cb_get_time(timer);
			
 
				+		overrun = hrtimer_forward(timer, now, rt_b->rt_period);
			
 
				+
			
 
				+		if (!overrun)
			
 
				+			break;
			
 
				+
			
 
				+		idle = do_sched_rt_period_timer(rt_b, overrun);
			
 
				+	}
			
 
				+
			
 
				+	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
			
 
				+}
			
 
				+
			
 
				+void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
			
 
				+{
			
 
				+	rt_b->rt_period = ns_to_ktime(period);
			
 
				+	rt_b->rt_runtime = runtime;
			
 
				+
			
 
				+	raw_spin_lock_init(&rt_b->rt_runtime_lock);
			
 
				+
			
 
				+	hrtimer_init(&rt_b->rt_period_timer,
			
 
				+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
			
 
				+	rt_b->rt_period_timer.function = sched_rt_period_timer;
			
 
				+}
			
 
				+
			
 
				+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
			
 
				+{
			
 
				+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
			
 
				+		return;
			
 
				+
			
 
				+	if (hrtimer_active(&rt_b->rt_period_timer))
			
 
				+		return;
			
 
				+
			
 
				+	raw_spin_lock(&rt_b->rt_runtime_lock);
			
 
				+	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
			
 
				+	raw_spin_unlock(&rt_b->rt_runtime_lock);
			
 
				+}
			
 
				+
			
 
				+void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
			
 
				+{
			
 
				+	struct rt_prio_array *array;
			
 
				+	int i;
			
 
				+
			
 
				+	array = &rt_rq->active;
			
 
				+	for (i = 0; i < MAX_RT_PRIO; i++) {
			
 
				+		INIT_LIST_HEAD(array->queue + i);
			
 
				+		__clear_bit(i, array->bitmap);
			
 
				+	}
			
 
				+	/* delimiter for bitsearch: */
			
 
				+	__set_bit(MAX_RT_PRIO, array->bitmap);
			
 
				+
			
 
				+#if defined CONFIG_SMP
			
 
				+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
			
 
				+	rt_rq->highest_prio.next = MAX_RT_PRIO;
			
 
				+	rt_rq->rt_nr_migratory = 0;
			
 
				+	rt_rq->overloaded = 0;
			
 
				+	plist_head_init(&rt_rq->pushable_tasks);
			
 
				+#endif
			
 
				+
			
 
				+	rt_rq->rt_time = 0;
			
 
				+	rt_rq->rt_throttled = 0;
			
 
				+	rt_rq->rt_runtime = 0;
			
 
				+	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
 
				+static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
			
 
				+{
			
 
				+	hrtimer_cancel(&rt_b->rt_period_timer);
			
 
				+}
			
 
				 
			
 
				 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
			
 
				 
			
@@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 
				 	return rt_se->rt_rq;
			
 
				 }
			
 
				 
			
 
				+void free_rt_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	if (tg->rt_se)
			
 
				+		destroy_rt_bandwidth(&tg->rt_bandwidth);
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		if (tg->rt_rq)
			
 
				+			kfree(tg->rt_rq[i]);
			
 
				+		if (tg->rt_se)
			
 
				+			kfree(tg->rt_se[i]);
			
 
				+	}
			
 
				+
			
 
				+	kfree(tg->rt_rq);
			
 
				+	kfree(tg->rt_se);
			
 
				+}
			
 
				+
			
 
				+void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
			
 
				+		struct sched_rt_entity *rt_se, int cpu,
			
 
				+		struct sched_rt_entity *parent)
			
 
				+{
			
 
				+	struct rq *rq = cpu_rq(cpu);
			
 
				+
			
 
				+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
			
 
				+	rt_rq->rt_nr_boosted = 0;
			
 
				+	rt_rq->rq = rq;
			
 
				+	rt_rq->tg = tg;
			
 
				+
			
 
				+	tg->rt_rq[cpu] = rt_rq;
			
 
				+	tg->rt_se[cpu] = rt_se;
			
 
				+
			
 
				+	if (!rt_se)
			
 
				+		return;
			
 
				+
			
 
				+	if (!parent)
			
 
				+		rt_se->rt_rq = &rq->rt;
			
 
				+	else
			
 
				+		rt_se->rt_rq = parent->my_q;
			
 
				+
			
 
				+	rt_se->my_q = rt_rq;
			
 
				+	rt_se->parent = parent;
			
 
				+	INIT_LIST_HEAD(&rt_se->run_list);
			
 
				+}
			
 
				+
			
 
				+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
			
 
				+{
			
 
				+	struct rt_rq *rt_rq;
			
 
				+	struct sched_rt_entity *rt_se;
			
 
				+	int i;
			
 
				+
			
 
				+	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
			
 
				+	if (!tg->rt_rq)
			
 
				+		goto err;
			
 
				+	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
			
 
				+	if (!tg->rt_se)
			
 
				+		goto err;
			
 
				+
			
 
				+	init_rt_bandwidth(&tg->rt_bandwidth,
			
 
				+			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		rt_rq = kzalloc_node(sizeof(struct rt_rq),
			
 
				+				     GFP_KERNEL, cpu_to_node(i));
			
 
				+		if (!rt_rq)
			
 
				+			goto err;
			
 
				+
			
 
				+		rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
			
 
				+				     GFP_KERNEL, cpu_to_node(i));
			
 
				+		if (!rt_se)
			
 
				+			goto err_free_rq;
			
 
				+
			
 
				+		init_rt_rq(rt_rq, cpu_rq(i));
			
 
				+		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
			
 
				+		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+
			
 
				+err_free_rq:
			
 
				+	kfree(rt_rq);
			
 
				+err:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 #else /* CONFIG_RT_GROUP_SCHED */
			
 
				 
			
 
				 #define rt_entity_is_task(rt_se) (1)
			
@@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 
				 	return &rq->rt;
			
 
				 }
			
 
				 
			
 
				+void free_rt_sched_group(struct task_group *tg) { }
			
 
				+
			
 
				+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				 #endif /* CONFIG_RT_GROUP_SCHED */
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
@@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq)
 
				 	raw_spin_unlock_irqrestore(&rq->lock, flags);
			
 
				 }
			
 
				 
			
 
				+int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
			
 
				+{
			
 
				+	int cpu = (int)(long)hcpu;
			
 
				+
			
 
				+	switch (action) {
			
 
				+	case CPU_DOWN_PREPARE:
			
 
				+	case CPU_DOWN_PREPARE_FROZEN:
			
 
				+		disable_runtime(cpu_rq(cpu));
			
 
				+		return NOTIFY_OK;
			
 
				+
			
 
				+	case CPU_DOWN_FAILED:
			
 
				+	case CPU_DOWN_FAILED_FROZEN:
			
 
				+	case CPU_ONLINE:
			
 
				+	case CPU_ONLINE_FROZEN:
			
 
				+		enable_runtime(cpu_rq(cpu));
			
 
				+		return NOTIFY_OK;
			
 
				+
			
 
				+	default:
			
 
				+		return NOTIFY_DONE;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int balance_runtime(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 	int more = 0;
			
@@ -648,7 +846,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
				 	if (rt_rq->rt_throttled)
			
 
				 		return rt_rq_throttled(rt_rq);
			
 
				 
			
 
				-	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
			
 
				+	if (runtime >= sched_rt_period(rt_rq))
			
 
				 		return 0;
			
 
				 
			
 
				 	balance_runtime(rt_rq);
			
@@ -957,8 +1155,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Put task to the end of the run list without the overhead of dequeue
			
 
				- * followed by enqueue.
			
 
				+ * Put task to the head or the end of the run list without the overhead of
			
 
				+ * dequeue followed by enqueue.
			
 
				  */
			
 
				 static void
			
 
				 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
			
@@ -1002,6 +1200,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
				 
			
 
				 	cpu = task_cpu(p);
			
 
				 
			
 
				+	if (p->rt.nr_cpus_allowed == 1)
			
 
				+		goto out;
			
 
				+
			
 
				 	/* For anything but wake ups, just return the task_cpu */
			
 
				 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
			
 
				 		goto out;
			
@@ -1178,8 +1379,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 
				 /* Only try algorithms three times */
			
 
				 #define RT_MAX_TRIES 3
			
 
				 
			
 
				-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
			
 
				-
			
 
				 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
			
 
				 {
			
 
				 	if (!task_running(rq, p) &&
			
@@ -1653,13 +1852,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 
				 		pull_rt_task(rq);
			
 
				 }
			
 
				 
			
 
				-static inline void init_sched_rt_class(void)
			
 
				+void init_sched_rt_class(void)
			
 
				 {
			
 
				 	unsigned int i;
			
 
				 
			
 
				-	for_each_possible_cpu(i)
			
 
				+	for_each_possible_cpu(i) {
			
 
				 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
			
 
				 					GFP_KERNEL, cpu_to_node(i));
			
 
				+	}
			
 
				 }
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
@@ -1800,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 
				 		return 0;
			
 
				 }
			
 
				 
			
 
				-static const struct sched_class rt_sched_class = {
			
 
				+const struct sched_class rt_sched_class = {
			
 
				 	.next			= &fair_sched_class,
			
 
				 	.enqueue_task		= enqueue_task_rt,
			
 
				 	.dequeue_task		= dequeue_task_rt,
			
@@ -1835,7 +2035,7 @@ static const struct sched_class rt_sched_class = {
 
				 #ifdef CONFIG_SCHED_DEBUG
			
 
				 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
			
 
				 
			
 
				-static void print_rt_stats(struct seq_file *m, int cpu)
			
 
				+void print_rt_stats(struct seq_file *m, int cpu)
			
 
				 {
			
 
				 	rt_rq_iter_t iter;
			
 
				 	struct rt_rq *rt_rq;
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -0,0 +1,1136 @@
 
				+
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/mutex.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/stop_machine.h>
			
 
				+
			
 
				+#include "cpupri.h"
			
 
				+
			
 
				+extern __read_mostly int scheduler_running;
			
 
				+
			
 
				+/*
			
 
				+ * Convert user-nice values [ -20 ... 0 ... 19 ]
			
 
				+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
			
 
				+ * and back.
			
 
				+ */
			
 
				+#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
			
 
				+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
			
 
				+#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
			
 
				+
			
 
				+/*
			
 
				+ * 'User priority' is the nice value converted to something we
			
 
				+ * can work with better when scaling various scheduler parameters,
			
 
				+ * it's a [ 0 ... 39 ] range.
			
 
				+ */
			
 
				+#define USER_PRIO(p)		((p)-MAX_RT_PRIO)
			
 
				+#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
			
 
				+#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
			
 
				+
			
 
				+/*
			
 
				+ * Helpers for converting nanosecond timing to jiffy resolution
			
 
				+ */
			
 
				+#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
			
 
				+
			
 
				+#define NICE_0_LOAD		SCHED_LOAD_SCALE
			
 
				+#define NICE_0_SHIFT		SCHED_LOAD_SHIFT
			
 
				+
			
 
				+/*
			
 
				+ * These are the 'tuning knobs' of the scheduler:
			
 
				+ *
			
 
				+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
			
 
				+ * Timeslices get refilled after they expire.
			
 
				+ */
			
 
				+#define DEF_TIMESLICE		(100 * HZ / 1000)
			
 
				+
			
 
				+/*
			
 
				+ * single value that denotes runtime == period, ie unlimited time.
			
 
				+ */
			
 
				+#define RUNTIME_INF	((u64)~0ULL)
			
 
				+
			
 
				+static inline int rt_policy(int policy)
			
 
				+{
			
 
				+	if (policy == SCHED_FIFO || policy == SCHED_RR)
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int task_has_rt_policy(struct task_struct *p)
			
 
				+{
			
 
				+	return rt_policy(p->policy);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * This is the priority-queue data structure of the RT scheduling class:
			
 
				+ */
			
 
				+struct rt_prio_array {
			
 
				+	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
			
 
				+	struct list_head queue[MAX_RT_PRIO];
			
 
				+};
			
 
				+
			
 
				+struct rt_bandwidth {
			
 
				+	/* nests inside the rq lock: */
			
 
				+	raw_spinlock_t		rt_runtime_lock;
			
 
				+	ktime_t			rt_period;
			
 
				+	u64			rt_runtime;
			
 
				+	struct hrtimer		rt_period_timer;
			
 
				+};
			
 
				+
			
 
				+extern struct mutex sched_domains_mutex;
			
 
				+
			
 
				+#ifdef CONFIG_CGROUP_SCHED
			
 
				+
			
 
				+#include <linux/cgroup.h>
			
 
				+
			
 
				+struct cfs_rq;
			
 
				+struct rt_rq;
			
 
				+
			
 
				+static LIST_HEAD(task_groups);
			
 
				+
			
 
				+struct cfs_bandwidth {
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	raw_spinlock_t lock;
			
 
				+	ktime_t period;
			
 
				+	u64 quota, runtime;
			
 
				+	s64 hierarchal_quota;
			
 
				+	u64 runtime_expires;
			
 
				+
			
 
				+	int idle, timer_active;
			
 
				+	struct hrtimer period_timer, slack_timer;
			
 
				+	struct list_head throttled_cfs_rq;
			
 
				+
			
 
				+	/* statistics */
			
 
				+	int nr_periods, nr_throttled;
			
 
				+	u64 throttled_time;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+/* task group related information */
			
 
				+struct task_group {
			
 
				+	struct cgroup_subsys_state css;
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+	/* schedulable entities of this group on each cpu */
			
 
				+	struct sched_entity **se;
			
 
				+	/* runqueue "owned" by this group on each cpu */
			
 
				+	struct cfs_rq **cfs_rq;
			
 
				+	unsigned long shares;
			
 
				+
			
 
				+	atomic_t load_weight;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	struct sched_rt_entity **rt_se;
			
 
				+	struct rt_rq **rt_rq;
			
 
				+
			
 
				+	struct rt_bandwidth rt_bandwidth;
			
 
				+#endif
			
 
				+
			
 
				+	struct rcu_head rcu;
			
 
				+	struct list_head list;
			
 
				+
			
 
				+	struct task_group *parent;
			
 
				+	struct list_head siblings;
			
 
				+	struct list_head children;
			
 
				+
			
 
				+#ifdef CONFIG_SCHED_AUTOGROUP
			
 
				+	struct autogroup *autogroup;
			
 
				+#endif
			
 
				+
			
 
				+	struct cfs_bandwidth cfs_bandwidth;
			
 
				+};
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
			
 
				+
			
 
				+/*
			
 
				+ * A weight of 0 or 1 can cause arithmetics problems.
			
 
				+ * A weight of a cfs_rq is the sum of weights of which entities
			
 
				+ * are queued on this cfs_rq, so a weight of a entity should not be
			
 
				+ * too large, so as the shares value of a task group.
			
 
				+ * (The default weight is 1024 - so there's no practical
			
 
				+ *  limitation from this.)
			
 
				+ */
			
 
				+#define MIN_SHARES	(1UL <<  1)
			
 
				+#define MAX_SHARES	(1UL << 18)
			
 
				+#endif
			
 
				+
			
 
				+/* Default task group.
			
 
				+ *	Every task in system belong to this group at bootup.
			
 
				+ */
			
 
				+extern struct task_group root_task_group;
			
 
				+
			
 
				+typedef int (*tg_visitor)(struct task_group *, void *);
			
 
				+
			
 
				+extern int walk_tg_tree_from(struct task_group *from,
			
 
				+			     tg_visitor down, tg_visitor up, void *data);
			
 
				+
			
 
				+/*
			
 
				+ * Iterate the full tree, calling @down when first entering a node and @up when
			
 
				+ * leaving it for the final time.
			
 
				+ *
			
 
				+ * Caller must hold rcu_lock or sufficient equivalent.
			
 
				+ */
			
 
				+static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
			
 
				+{
			
 
				+	return walk_tg_tree_from(&root_task_group, down, up, data);
			
 
				+}
			
 
				+
			
 
				+extern int tg_nop(struct task_group *tg, void *data);
			
 
				+
			
 
				+extern void free_fair_sched_group(struct task_group *tg);
			
 
				+extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
			
 
				+extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
			
 
				+extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
			
 
				+			struct sched_entity *se, int cpu,
			
 
				+			struct sched_entity *parent);
			
 
				+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
			
 
				+extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
			
 
				+
			
 
				+extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
			
 
				+extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
			
 
				+extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
			
 
				+
			
 
				+extern void free_rt_sched_group(struct task_group *tg);
			
 
				+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
			
 
				+extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
			
 
				+		struct sched_rt_entity *rt_se, int cpu,
			
 
				+		struct sched_rt_entity *parent);
			
 
				+
			
 
				+#else /* CONFIG_CGROUP_SCHED */
			
 
				+
			
 
				+struct cfs_bandwidth { };
			
 
				+
			
 
				+#endif	/* CONFIG_CGROUP_SCHED */
			
 
				+
			
 
				+/* CFS-related fields in a runqueue */
			
 
				+struct cfs_rq {
			
 
				+	struct load_weight load;
			
 
				+	unsigned long nr_running, h_nr_running;
			
 
				+
			
 
				+	u64 exec_clock;
			
 
				+	u64 min_vruntime;
			
 
				+#ifndef CONFIG_64BIT
			
 
				+	u64 min_vruntime_copy;
			
 
				+#endif
			
 
				+
			
 
				+	struct rb_root tasks_timeline;
			
 
				+	struct rb_node *rb_leftmost;
			
 
				+
			
 
				+	struct list_head tasks;
			
 
				+	struct list_head *balance_iterator;
			
 
				+
			
 
				+	/*
			
 
				+	 * 'curr' points to currently running entity on this cfs_rq.
			
 
				+	 * It is set to NULL otherwise (i.e when none are currently running).
			
 
				+	 */
			
 
				+	struct sched_entity *curr, *next, *last, *skip;
			
 
				+
			
 
				+#ifdef	CONFIG_SCHED_DEBUG
			
 
				+	unsigned int nr_spread_over;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
			
 
				+
			
 
				+	/*
			
 
				+	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
			
 
				+	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
			
 
				+	 * (like users, containers etc.)
			
 
				+	 *
			
 
				+	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
			
 
				+	 * list is used during load balance.
			
 
				+	 */
			
 
				+	int on_list;
			
 
				+	struct list_head leaf_cfs_rq_list;
			
 
				+	struct task_group *tg;	/* group that "owns" this runqueue */
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * the part of load.weight contributed by tasks
			
 
				+	 */
			
 
				+	unsigned long task_weight;
			
 
				+
			
 
				+	/*
			
 
				+	 *   h_load = weight * f(tg)
			
 
				+	 *
			
 
				+	 * Where f(tg) is the recursive weight fraction assigned to
			
 
				+	 * this group.
			
 
				+	 */
			
 
				+	unsigned long h_load;
			
 
				+
			
 
				+	/*
			
 
				+	 * Maintaining per-cpu shares distribution for group scheduling
			
 
				+	 *
			
 
				+	 * load_stamp is the last time we updated the load average
			
 
				+	 * load_last is the last time we updated the load average and saw load
			
 
				+	 * load_unacc_exec_time is currently unaccounted execution time
			
 
				+	 */
			
 
				+	u64 load_avg;
			
 
				+	u64 load_period;
			
 
				+	u64 load_stamp, load_last, load_unacc_exec_time;
			
 
				+
			
 
				+	unsigned long load_contribution;
			
 
				+#endif /* CONFIG_SMP */
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	int runtime_enabled;
			
 
				+	u64 runtime_expires;
			
 
				+	s64 runtime_remaining;
			
 
				+
			
 
				+	u64 throttled_timestamp;
			
 
				+	int throttled, throttle_count;
			
 
				+	struct list_head throttled_list;
			
 
				+#endif /* CONFIG_CFS_BANDWIDTH */
			
 
				+#endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				+};
			
 
				+
			
 
				+static inline int rt_bandwidth_enabled(void)
			
 
				+{
			
 
				+	return sysctl_sched_rt_runtime >= 0;
			
 
				+}
			
 
				+
			
 
				+/* Real-Time classes' related field in a runqueue: */
			
 
				+struct rt_rq {
			
 
				+	struct rt_prio_array active;
			
 
				+	unsigned long rt_nr_running;
			
 
				+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				+	struct {
			
 
				+		int curr; /* highest queued rt task prio */
			
 
				+#ifdef CONFIG_SMP
			
 
				+		int next; /* next highest */
			
 
				+#endif
			
 
				+	} highest_prio;
			
 
				+#endif
			
 
				+#ifdef CONFIG_SMP
			
 
				+	unsigned long rt_nr_migratory;
			
 
				+	unsigned long rt_nr_total;
			
 
				+	int overloaded;
			
 
				+	struct plist_head pushable_tasks;
			
 
				+#endif
			
 
				+	int rt_throttled;
			
 
				+	u64 rt_time;
			
 
				+	u64 rt_runtime;
			
 
				+	/* Nests inside the rq lock: */
			
 
				+	raw_spinlock_t rt_runtime_lock;
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	unsigned long rt_nr_boosted;
			
 
				+
			
 
				+	struct rq *rq;
			
 
				+	struct list_head leaf_rt_rq_list;
			
 
				+	struct task_group *tg;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				+/*
			
 
				+ * We add the notion of a root-domain which will be used to define per-domain
			
 
				+ * variables. Each exclusive cpuset essentially defines an island domain by
			
 
				+ * fully partitioning the member cpus from any other cpuset. Whenever a new
			
 
				+ * exclusive cpuset is created, we also create and attach a new root-domain
			
 
				+ * object.
			
 
				+ *
			
 
				+ */
			
 
				+struct root_domain {
			
 
				+	atomic_t refcount;
			
 
				+	atomic_t rto_count;
			
 
				+	struct rcu_head rcu;
			
 
				+	cpumask_var_t span;
			
 
				+	cpumask_var_t online;
			
 
				+
			
 
				+	/*
			
 
				+	 * The "RT overload" flag: it gets set if a CPU has more than
			
 
				+	 * one runnable RT task.
			
 
				+	 */
			
 
				+	cpumask_var_t rto_mask;
			
 
				+	struct cpupri cpupri;
			
 
				+};
			
 
				+
			
 
				+extern struct root_domain def_root_domain;
			
 
				+
			
 
				+#endif /* CONFIG_SMP */
			
 
				+
			
 
				+/*
			
 
				+ * This is the main, per-CPU runqueue data structure.
			
 
				+ *
			
 
				+ * Locking rule: those places that want to lock multiple runqueues
			
 
				+ * (such as the load balancing or the thread migration code), lock
			
 
				+ * acquire operations must be ordered by ascending &runqueue.
			
 
				+ */
			
 
				+struct rq {
			
 
				+	/* runqueue lock: */
			
 
				+	raw_spinlock_t lock;
			
 
				+
			
 
				+	/*
			
 
				+	 * nr_running and cpu_load should be in the same cacheline because
			
 
				+	 * remote CPUs use both these fields when doing load calculation.
			
 
				+	 */
			
 
				+	unsigned long nr_running;
			
 
				+	#define CPU_LOAD_IDX_MAX 5
			
 
				+	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
			
 
				+	unsigned long last_load_update_tick;
			
 
				+#ifdef CONFIG_NO_HZ
			
 
				+	u64 nohz_stamp;
			
 
				+	unsigned long nohz_flags;
			
 
				+#endif
			
 
				+	int skip_clock_update;
			
 
				+
			
 
				+	/* capture load from *all* tasks on this cpu: */
			
 
				+	struct load_weight load;
			
 
				+	unsigned long nr_load_updates;
			
 
				+	u64 nr_switches;
			
 
				+
			
 
				+	struct cfs_rq cfs;
			
 
				+	struct rt_rq rt;
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+	/* list of leaf cfs_rq on this cpu: */
			
 
				+	struct list_head leaf_cfs_rq_list;
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	struct list_head leaf_rt_rq_list;
			
 
				+#endif
			
 
				+
			
 
				+	/*
			
 
				+	 * This is part of a global counter where only the total sum
			
 
				+	 * over all CPUs matters. A task can increase this counter on
			
 
				+	 * one CPU and if it got migrated afterwards it may decrease
			
 
				+	 * it on another CPU. Always updated under the runqueue lock:
			
 
				+	 */
			
 
				+	unsigned long nr_uninterruptible;
			
 
				+
			
 
				+	struct task_struct *curr, *idle, *stop;
			
 
				+	unsigned long next_balance;
			
 
				+	struct mm_struct *prev_mm;
			
 
				+
			
 
				+	u64 clock;
			
 
				+	u64 clock_task;
			
 
				+
			
 
				+	atomic_t nr_iowait;
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+	struct root_domain *rd;
			
 
				+	struct sched_domain *sd;
			
 
				+
			
 
				+	unsigned long cpu_power;
			
 
				+
			
 
				+	unsigned char idle_balance;
			
 
				+	/* For active balancing */
			
 
				+	int post_schedule;
			
 
				+	int active_balance;
			
 
				+	int push_cpu;
			
 
				+	struct cpu_stop_work active_balance_work;
			
 
				+	/* cpu of this runqueue: */
			
 
				+	int cpu;
			
 
				+	int online;
			
 
				+
			
 
				+	u64 rt_avg;
			
 
				+	u64 age_stamp;
			
 
				+	u64 idle_stamp;
			
 
				+	u64 avg_idle;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
			
 
				+	u64 prev_irq_time;
			
 
				+#endif
			
 
				+#ifdef CONFIG_PARAVIRT
			
 
				+	u64 prev_steal_time;
			
 
				+#endif
			
 
				+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
			
 
				+	u64 prev_steal_time_rq;
			
 
				+#endif
			
 
				+
			
 
				+	/* calc_load related fields */
			
 
				+	unsigned long calc_load_update;
			
 
				+	long calc_load_active;
			
 
				+
			
 
				+#ifdef CONFIG_SCHED_HRTICK
			
 
				+#ifdef CONFIG_SMP
			
 
				+	int hrtick_csd_pending;
			
 
				+	struct call_single_data hrtick_csd;
			
 
				+#endif
			
 
				+	struct hrtimer hrtick_timer;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_SCHEDSTATS
			
 
				+	/* latency stats */
			
 
				+	struct sched_info rq_sched_info;
			
 
				+	unsigned long long rq_cpu_time;
			
 
				+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
			
 
				+
			
 
				+	/* sys_sched_yield() stats */
			
 
				+	unsigned int yld_count;
			
 
				+
			
 
				+	/* schedule() stats */
			
 
				+	unsigned int sched_switch;
			
 
				+	unsigned int sched_count;
			
 
				+	unsigned int sched_goidle;
			
 
				+
			
 
				+	/* try_to_wake_up() stats */
			
 
				+	unsigned int ttwu_count;
			
 
				+	unsigned int ttwu_local;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+	struct llist_head wake_list;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+static inline int cpu_of(struct rq *rq)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	return rq->cpu;
			
 
				+#else
			
 
				+	return 0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+DECLARE_PER_CPU(struct rq, runqueues);
			
 
				+
			
 
				+#define rcu_dereference_check_sched_domain(p) \
			
 
				+	rcu_dereference_check((p), \
			
 
				+			      lockdep_is_held(&sched_domains_mutex))
			
 
				+
			
 
				+/*
			
 
				+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
			
 
				+ * See detach_destroy_domains: synchronize_sched for details.
			
 
				+ *
			
 
				+ * The domain tree of any CPU may only be accessed from within
			
 
				+ * preempt-disabled sections.
			
 
				+ */
			
 
				+#define for_each_domain(cpu, __sd) \
			
 
				+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
			
 
				+
			
 
				+#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
			
 
				+
			
 
				+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
			
 
				+#define this_rq()		(&__get_cpu_var(runqueues))
			
 
				+#define task_rq(p)		cpu_rq(task_cpu(p))
			
 
				+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
			
 
				+#define raw_rq()		(&__raw_get_cpu_var(runqueues))
			
 
				+
			
 
				+#include "stats.h"
			
 
				+#include "auto_group.h"
			
 
				+
			
 
				+#ifdef CONFIG_CGROUP_SCHED
			
 
				+
			
 
				+/*
			
 
				+ * Return the group to which this tasks belongs.
			
 
				+ *
			
 
				+ * We use task_subsys_state_check() and extend the RCU verification with
			
 
				+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
			
 
				+ * task it moves into the cgroup. Therefore by holding either of those locks,
			
 
				+ * we pin the task to the current cgroup.
			
 
				+ */
			
 
				+static inline struct task_group *task_group(struct task_struct *p)
			
 
				+{
			
 
				+	struct task_group *tg;
			
 
				+	struct cgroup_subsys_state *css;
			
 
				+
			
 
				+	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
			
 
				+			lockdep_is_held(&p->pi_lock) ||
			
 
				+			lockdep_is_held(&task_rq(p)->lock));
			
 
				+	tg = container_of(css, struct task_group, css);
			
 
				+
			
 
				+	return autogroup_task_group(p, tg);
			
 
				+}
			
 
				+
			
 
				+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
			
 
				+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
			
 
				+{
			
 
				+#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
			
 
				+	struct task_group *tg = task_group(p);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+	p->se.cfs_rq = tg->cfs_rq[cpu];
			
 
				+	p->se.parent = tg->se[cpu];
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	p->rt.rt_rq  = tg->rt_rq[cpu];
			
 
				+	p->rt.parent = tg->rt_se[cpu];
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#else /* CONFIG_CGROUP_SCHED */
			
 
				+
			
 
				+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
			
 
				+static inline struct task_group *task_group(struct task_struct *p)
			
 
				+{
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_CGROUP_SCHED */
			
 
				+
			
 
				+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
			
 
				+{
			
 
				+	set_task_rq(p, cpu);
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
			
 
				+	 * successfuly executed on another CPU. We must ensure that updates of
			
 
				+	 * per-task data have been completed by this moment.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	task_thread_info(p)->cpu = cpu;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
			
 
				+ */
			
 
				+#ifdef CONFIG_SCHED_DEBUG
			
 
				+# include <linux/jump_label.h>
			
 
				+# define const_debug __read_mostly
			
 
				+#else
			
 
				+# define const_debug const
			
 
				+#endif
			
 
				+
			
 
				+extern const_debug unsigned int sysctl_sched_features;
			
 
				+
			
 
				+#define SCHED_FEAT(name, enabled)	\
			
 
				+	__SCHED_FEAT_##name ,
			
 
				+
			
 
				+enum {
			
 
				+#include "features.h"
			
 
				+	__SCHED_FEAT_NR,
			
 
				+};
			
 
				+
			
 
				+#undef SCHED_FEAT
			
 
				+
			
 
				+#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
			
 
				+static __always_inline bool static_branch__true(struct jump_label_key *key)
			
 
				+{
			
 
				+	return likely(static_branch(key)); /* Not out of line branch. */
			
 
				+}
			
 
				+
			
 
				+static __always_inline bool static_branch__false(struct jump_label_key *key)
			
 
				+{
			
 
				+	return unlikely(static_branch(key)); /* Out of line branch. */
			
 
				+}
			
 
				+
			
 
				+#define SCHED_FEAT(name, enabled)					\
			
 
				+static __always_inline bool static_branch_##name(struct jump_label_key *key) \
			
 
				+{									\
			
 
				+	return static_branch__##enabled(key);				\
			
 
				+}
			
 
				+
			
 
				+#include "features.h"
			
 
				+
			
 
				+#undef SCHED_FEAT
			
 
				+
			
 
				+extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR];
			
 
				+#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
			
 
				+#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
			
 
				+#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
			
 
				+#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
			
 
				+
			
 
				+static inline u64 global_rt_period(void)
			
 
				+{
			
 
				+	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
			
 
				+}
			
 
				+
			
 
				+static inline u64 global_rt_runtime(void)
			
 
				+{
			
 
				+	if (sysctl_sched_rt_runtime < 0)
			
 
				+		return RUNTIME_INF;
			
 
				+
			
 
				+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
 
				+static inline int task_current(struct rq *rq, struct task_struct *p)
			
 
				+{
			
 
				+	return rq->curr == p;
			
 
				+}
			
 
				+
			
 
				+static inline int task_running(struct rq *rq, struct task_struct *p)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	return p->on_cpu;
			
 
				+#else
			
 
				+	return task_current(rq, p);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#ifndef prepare_arch_switch
			
 
				+# define prepare_arch_switch(next)	do { } while (0)
			
 
				+#endif
			
 
				+#ifndef finish_arch_switch
			
 
				+# define finish_arch_switch(prev)	do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
			
 
				+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * We can optimise this out completely for !SMP, because the
			
 
				+	 * SMP rebalancing from interrupt is the only thing that cares
			
 
				+	 * here.
			
 
				+	 */
			
 
				+	next->on_cpu = 1;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
			
 
				+	 * We must ensure this doesn't happen until the switch is completely
			
 
				+	 * finished.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	prev->on_cpu = 0;
			
 
				+#endif
			
 
				+#ifdef CONFIG_DEBUG_SPINLOCK
			
 
				+	/* this is a valid case when another task releases the spinlock */
			
 
				+	rq->lock.owner = current;
			
 
				+#endif
			
 
				+	/*
			
 
				+	 * If we are tracking spinlock dependencies then we have to
			
 
				+	 * fix up the runqueue lock - which gets 'carried over' from
			
 
				+	 * prev into current:
			
 
				+	 */
			
 
				+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
			
 
				+
			
 
				+	raw_spin_unlock_irq(&rq->lock);
			
 
				+}
			
 
				+
			
 
				+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
			
 
				+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * We can optimise this out completely for !SMP, because the
			
 
				+	 * SMP rebalancing from interrupt is the only thing that cares
			
 
				+	 * here.
			
 
				+	 */
			
 
				+	next->on_cpu = 1;
			
 
				+#endif
			
 
				+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
			
 
				+	raw_spin_unlock_irq(&rq->lock);
			
 
				+#else
			
 
				+	raw_spin_unlock(&rq->lock);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
			
 
				+{
			
 
				+#ifdef CONFIG_SMP
			
 
				+	/*
			
 
				+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
			
 
				+	 * We must ensure this doesn't happen until the switch is completely
			
 
				+	 * finished.
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	prev->on_cpu = 0;
			
 
				+#endif
			
 
				+#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
			
 
				+	local_irq_enable();
			
 
				+#endif
			
 
				+}
			
 
				+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
			
 
				+
			
 
				+
			
 
				+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
			
 
				+{
			
 
				+	lw->weight += inc;
			
 
				+	lw->inv_weight = 0;
			
 
				+}
			
 
				+
			
 
				+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
			
 
				+{
			
 
				+	lw->weight -= dec;
			
 
				+	lw->inv_weight = 0;
			
 
				+}
			
 
				+
			
 
				+static inline void update_load_set(struct load_weight *lw, unsigned long w)
			
 
				+{
			
 
				+	lw->weight = w;
			
 
				+	lw->inv_weight = 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * To aid in avoiding the subversion of "niceness" due to uneven distribution
			
 
				+ * of tasks with abnormal "nice" values across CPUs the contribution that
			
 
				+ * each task makes to its run queue's load is weighted according to its
			
 
				+ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
			
 
				+ * scaled version of the new time slice allocation that they receive on time
			
 
				+ * slice expiry etc.
			
 
				+ */
			
 
				+
			
 
				+#define WEIGHT_IDLEPRIO                3
			
 
				+#define WMULT_IDLEPRIO         1431655765
			
 
				+
			
 
				+/*
			
 
				+ * Nice levels are multiplicative, with a gentle 10% change for every
			
 
				+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
			
 
				+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
			
 
				+ * that remained on nice 0.
			
 
				+ *
			
 
				+ * The "10% effect" is relative and cumulative: from _any_ nice level,
			
 
				+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
			
 
				+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
			
 
				+ * If a task goes up by ~10% and another task goes down by ~10% then
			
 
				+ * the relative distance between them is ~25%.)
			
 
				+ */
			
 
				+static const int prio_to_weight[40] = {
			
 
				+ /* -20 */     88761,     71755,     56483,     46273,     36291,
			
 
				+ /* -15 */     29154,     23254,     18705,     14949,     11916,
			
 
				+ /* -10 */      9548,      7620,      6100,      4904,      3906,
			
 
				+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
			
 
				+ /*   0 */      1024,       820,       655,       526,       423,
			
 
				+ /*   5 */       335,       272,       215,       172,       137,
			
 
				+ /*  10 */       110,        87,        70,        56,        45,
			
 
				+ /*  15 */        36,        29,        23,        18,        15,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
			
 
				+ *
			
 
				+ * In cases where the weight does not change often, we can use the
			
 
				+ * precalculated inverse to speed up arithmetics by turning divisions
			
 
				+ * into multiplications:
			
 
				+ */
			
 
				+static const u32 prio_to_wmult[40] = {
			
 
				+ /* -20 */     48388,     59856,     76040,     92818,    118348,
			
 
				+ /* -15 */    147320,    184698,    229616,    287308,    360437,
			
 
				+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
			
 
				+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
			
 
				+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
			
 
				+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
			
 
				+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
			
 
				+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
			
 
				+};
			
 
				+
			
 
				+/* Time spent by the tasks of the cpu accounting group executing in ... */
			
 
				+enum cpuacct_stat_index {
			
 
				+	CPUACCT_STAT_USER,	/* ... user mode */
			
 
				+	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
			
 
				+
			
 
				+	CPUACCT_STAT_NSTATS,
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#define sched_class_highest (&stop_sched_class)
			
 
				+#define for_each_class(class) \
			
 
				+   for (class = sched_class_highest; class; class = class->next)
			
 
				+
			
 
				+extern const struct sched_class stop_sched_class;
			
 
				+extern const struct sched_class rt_sched_class;
			
 
				+extern const struct sched_class fair_sched_class;
			
 
				+extern const struct sched_class idle_sched_class;
			
 
				+
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				+extern void trigger_load_balance(struct rq *rq, int cpu);
			
 
				+extern void idle_balance(int this_cpu, struct rq *this_rq);
			
 
				+
			
 
				+#else	/* CONFIG_SMP */
			
 
				+
			
 
				+static inline void idle_balance(int cpu, struct rq *rq)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+extern void sysrq_sched_debug_show(void);
			
 
				+extern void sched_init_granularity(void);
			
 
				+extern void update_max_interval(void);
			
 
				+extern void update_group_power(struct sched_domain *sd, int cpu);
			
 
				+extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu);
			
 
				+extern void init_sched_rt_class(void);
			
 
				+extern void init_sched_fair_class(void);
			
 
				+
			
 
				+extern void resched_task(struct task_struct *p);
			
 
				+extern void resched_cpu(int cpu);
			
 
				+
			
 
				+extern struct rt_bandwidth def_rt_bandwidth;
			
 
				+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
			
 
				+
			
 
				+extern void update_cpu_load(struct rq *this_rq);
			
 
				+
			
 
				+#ifdef CONFIG_CGROUP_CPUACCT
			
 
				+#include <linux/cgroup.h>
			
 
				+/* track cpu usage of a group of tasks and its child groups */
			
 
				+struct cpuacct {
			
 
				+	struct cgroup_subsys_state css;
			
 
				+	/* cpuusage holds pointer to a u64-type object on every cpu */
			
 
				+	u64 __percpu *cpuusage;
			
 
				+	struct kernel_cpustat __percpu *cpustat;
			
 
				+};
			
 
				+
			
 
				+/* return cpu accounting group corresponding to this container */
			
 
				+static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
			
 
				+{
			
 
				+	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
			
 
				+			    struct cpuacct, css);
			
 
				+}
			
 
				+
			
 
				+/* return cpu accounting group to which this task belongs */
			
 
				+static inline struct cpuacct *task_ca(struct task_struct *tsk)
			
 
				+{
			
 
				+	return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
			
 
				+			    struct cpuacct, css);
			
 
				+}
			
 
				+
			
 
				+static inline struct cpuacct *parent_ca(struct cpuacct *ca)
			
 
				+{
			
 
				+	if (!ca || !ca->css.cgroup->parent)
			
 
				+		return NULL;
			
 
				+	return cgroup_ca(ca->css.cgroup->parent);
			
 
				+}
			
 
				+
			
 
				+extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
			
 
				+#else
			
 
				+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
			
 
				+#endif
			
 
				+
			
 
				+static inline void inc_nr_running(struct rq *rq)
			
 
				+{
			
 
				+	rq->nr_running++;
			
 
				+}
			
 
				+
			
 
				+static inline void dec_nr_running(struct rq *rq)
			
 
				+{
			
 
				+	rq->nr_running--;
			
 
				+}
			
 
				+
			
 
				+extern void update_rq_clock(struct rq *rq);
			
 
				+
			
 
				+extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
			
 
				+extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
			
 
				+
			
 
				+extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
			
 
				+
			
 
				+extern const_debug unsigned int sysctl_sched_time_avg;
			
 
				+extern const_debug unsigned int sysctl_sched_nr_migrate;
			
 
				+extern const_debug unsigned int sysctl_sched_migration_cost;
			
 
				+
			
 
				+static inline u64 sched_avg_period(void)
			
 
				+{
			
 
				+	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
			
 
				+}
			
 
				+
			
 
				+void calc_load_account_idle(struct rq *this_rq);
			
 
				+
			
 
				+#ifdef CONFIG_SCHED_HRTICK
			
 
				+
			
 
				+/*
			
 
				+ * Use hrtick when:
			
 
				+ *  - enabled by features
			
 
				+ *  - hrtimer is actually high res
			
 
				+ */
			
 
				+static inline int hrtick_enabled(struct rq *rq)
			
 
				+{
			
 
				+	if (!sched_feat(HRTICK))
			
 
				+		return 0;
			
 
				+	if (!cpu_active(cpu_of(rq)))
			
 
				+		return 0;
			
 
				+	return hrtimer_is_hres_active(&rq->hrtick_timer);
			
 
				+}
			
 
				+
			
 
				+void hrtick_start(struct rq *rq, u64 delay);
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline int hrtick_enabled(struct rq *rq)
			
 
				+{
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_SCHED_HRTICK */
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+extern void sched_avg_update(struct rq *rq);
			
 
				+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
			
 
				+{
			
 
				+	rq->rt_avg += rt_delta;
			
 
				+	sched_avg_update(rq);
			
 
				+}
			
 
				+#else
			
 
				+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
			
 
				+static inline void sched_avg_update(struct rq *rq) { }
			
 
				+#endif
			
 
				+
			
 
				+extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+#ifdef CONFIG_PREEMPT
			
 
				+
			
 
				+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
			
 
				+
			
 
				+/*
			
 
				+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
			
 
				+ * way at the expense of forcing extra atomic operations in all
			
 
				+ * invocations.  This assures that the double_lock is acquired using the
			
 
				+ * same underlying policy as the spinlock_t on this architecture, which
			
 
				+ * reduces latency compared to the unfair variant below.  However, it
			
 
				+ * also adds more overhead and therefore may reduce throughput.
			
 
				+ */
			
 
				+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
			
 
				+	__releases(this_rq->lock)
			
 
				+	__acquires(busiest->lock)
			
 
				+	__acquires(this_rq->lock)
			
 
				+{
			
 
				+	raw_spin_unlock(&this_rq->lock);
			
 
				+	double_rq_lock(this_rq, busiest);
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+/*
			
 
				+ * Unfair double_lock_balance: Optimizes throughput at the expense of
			
 
				+ * latency by eliminating extra atomic operations when the locks are
			
 
				+ * already in proper order on entry.  This favors lower cpu-ids and will
			
 
				+ * grant the double lock to lower cpus over higher ids under contention,
			
 
				+ * regardless of entry order into the function.
			
 
				+ */
			
 
				+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
			
 
				+	__releases(this_rq->lock)
			
 
				+	__acquires(busiest->lock)
			
 
				+	__acquires(this_rq->lock)
			
 
				+{
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
			
 
				+		if (busiest < this_rq) {
			
 
				+			raw_spin_unlock(&this_rq->lock);
			
 
				+			raw_spin_lock(&busiest->lock);
			
 
				+			raw_spin_lock_nested(&this_rq->lock,
			
 
				+					      SINGLE_DEPTH_NESTING);
			
 
				+			ret = 1;
			
 
				+		} else
			
 
				+			raw_spin_lock_nested(&busiest->lock,
			
 
				+					      SINGLE_DEPTH_NESTING);
			
 
				+	}
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_PREEMPT */
			
 
				+
			
 
				+/*
			
 
				+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
			
 
				+ */
			
 
				+static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
			
 
				+{
			
 
				+	if (unlikely(!irqs_disabled())) {
			
 
				+		/* printk() doesn't work good under rq->lock */
			
 
				+		raw_spin_unlock(&this_rq->lock);
			
 
				+		BUG_ON(1);
			
 
				+	}
			
 
				+
			
 
				+	return _double_lock_balance(this_rq, busiest);
			
 
				+}
			
 
				+
			
 
				+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
			
 
				+	__releases(busiest->lock)
			
 
				+{
			
 
				+	raw_spin_unlock(&busiest->lock);
			
 
				+	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * double_rq_lock - safely lock two runqueues
			
 
				+ *
			
 
				+ * Note this does not disable interrupts like task_rq_lock,
			
 
				+ * you need to do so manually before calling.
			
 
				+ */
			
 
				+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
			
 
				+	__acquires(rq1->lock)
			
 
				+	__acquires(rq2->lock)
			
 
				+{
			
 
				+	BUG_ON(!irqs_disabled());
			
 
				+	if (rq1 == rq2) {
			
 
				+		raw_spin_lock(&rq1->lock);
			
 
				+		__acquire(rq2->lock);	/* Fake it out ;) */
			
 
				+	} else {
			
 
				+		if (rq1 < rq2) {
			
 
				+			raw_spin_lock(&rq1->lock);
			
 
				+			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
			
 
				+		} else {
			
 
				+			raw_spin_lock(&rq2->lock);
			
 
				+			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * double_rq_unlock - safely unlock two runqueues
			
 
				+ *
			
 
				+ * Note this does not restore interrupts like task_rq_unlock,
			
 
				+ * you need to do so manually after calling.
			
 
				+ */
			
 
				+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
			
 
				+	__releases(rq1->lock)
			
 
				+	__releases(rq2->lock)
			
 
				+{
			
 
				+	raw_spin_unlock(&rq1->lock);
			
 
				+	if (rq1 != rq2)
			
 
				+		raw_spin_unlock(&rq2->lock);
			
 
				+	else
			
 
				+		__release(rq2->lock);
			
 
				+}
			
 
				+
			
 
				+#else /* CONFIG_SMP */
			
 
				+
			
 
				+/*
			
 
				+ * double_rq_lock - safely lock two runqueues
			
 
				+ *
			
 
				+ * Note this does not disable interrupts like task_rq_lock,
			
 
				+ * you need to do so manually before calling.
			
 
				+ */
			
 
				+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
			
 
				+	__acquires(rq1->lock)
			
 
				+	__acquires(rq2->lock)
			
 
				+{
			
 
				+	BUG_ON(!irqs_disabled());
			
 
				+	BUG_ON(rq1 != rq2);
			
 
				+	raw_spin_lock(&rq1->lock);
			
 
				+	__acquire(rq2->lock);	/* Fake it out ;) */
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * double_rq_unlock - safely unlock two runqueues
			
 
				+ *
			
 
				+ * Note this does not restore interrupts like task_rq_unlock,
			
 
				+ * you need to do so manually after calling.
			
 
				+ */
			
 
				+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
			
 
				+	__releases(rq1->lock)
			
 
				+	__releases(rq2->lock)
			
 
				+{
			
 
				+	BUG_ON(rq1 != rq2);
			
 
				+	raw_spin_unlock(&rq1->lock);
			
 
				+	__release(rq2->lock);
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
			
 
				+extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
			
 
				+extern void print_cfs_stats(struct seq_file *m, int cpu);
			
 
				+extern void print_rt_stats(struct seq_file *m, int cpu);
			
 
				+
			
 
				+extern void init_cfs_rq(struct cfs_rq *cfs_rq);
			
 
				+extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
			
 
				+extern void unthrottle_offline_cfs_rqs(struct rq *rq);
			
 
				+
			
 
				+extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
			
 
				+
			
 
				+#ifdef CONFIG_NO_HZ
			
 
				+enum rq_nohz_flag_bits {
			
 
				+	NOHZ_TICK_STOPPED,
			
 
				+	NOHZ_BALANCE_KICK,
			
 
				+	NOHZ_IDLE,
			
 
				+};
			
 
				+
			
 
				+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
			
 
				+#endif
			
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -0,0 +1,111 @@
 
				+
			
 
				+#include <linux/slab.h>
			
 
				+#include <linux/fs.h>
			
 
				+#include <linux/seq_file.h>
			
 
				+#include <linux/proc_fs.h>
			
 
				+
			
 
				+#include "sched.h"
			
 
				+
			
 
				+/*
			
 
				+ * bump this up when changing the output format or the meaning of an existing
			
 
				+ * format, so that tools can adapt (or abort)
			
 
				+ */
			
 
				+#define SCHEDSTAT_VERSION 15
			
 
				+
			
 
				+static int show_schedstat(struct seq_file *seq, void *v)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
			
 
				+	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
			
 
				+
			
 
				+	if (mask_str == NULL)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
			
 
				+	seq_printf(seq, "timestamp %lu\n", jiffies);
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		struct rq *rq = cpu_rq(cpu);
			
 
				+#ifdef CONFIG_SMP
			
 
				+		struct sched_domain *sd;
			
 
				+		int dcount = 0;
			
 
				+#endif
			
 
				+
			
 
				+		/* runqueue-specific stats */
			
 
				+		seq_printf(seq,
			
 
				+		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
			
 
				+		    cpu, rq->yld_count,
			
 
				+		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
			
 
				+		    rq->ttwu_count, rq->ttwu_local,
			
 
				+		    rq->rq_cpu_time,
			
 
				+		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
			
 
				+
			
 
				+		seq_printf(seq, "\n");
			
 
				+
			
 
				+#ifdef CONFIG_SMP
			
 
				+		/* domain-specific stats */
			
 
				+		rcu_read_lock();
			
 
				+		for_each_domain(cpu, sd) {
			
 
				+			enum cpu_idle_type itype;
			
 
				+
			
 
				+			cpumask_scnprintf(mask_str, mask_len,
			
 
				+					  sched_domain_span(sd));
			
 
				+			seq_printf(seq, "domain%d %s", dcount++, mask_str);
			
 
				+			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
			
 
				+					itype++) {
			
 
				+				seq_printf(seq, " %u %u %u %u %u %u %u %u",
			
 
				+				    sd->lb_count[itype],
			
 
				+				    sd->lb_balanced[itype],
			
 
				+				    sd->lb_failed[itype],
			
 
				+				    sd->lb_imbalance[itype],
			
 
				+				    sd->lb_gained[itype],
			
 
				+				    sd->lb_hot_gained[itype],
			
 
				+				    sd->lb_nobusyq[itype],
			
 
				+				    sd->lb_nobusyg[itype]);
			
 
				+			}
			
 
				+			seq_printf(seq,
			
 
				+				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
			
 
				+			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
			
 
				+			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
			
 
				+			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
			
 
				+			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
			
 
				+			    sd->ttwu_move_balance);
			
 
				+		}
			
 
				+		rcu_read_unlock();
			
 
				+#endif
			
 
				+	}
			
 
				+	kfree(mask_str);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int schedstat_open(struct inode *inode, struct file *file)
			
 
				+{
			
 
				+	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
			
 
				+	char *buf = kmalloc(size, GFP_KERNEL);
			
 
				+	struct seq_file *m;
			
 
				+	int res;
			
 
				+
			
 
				+	if (!buf)
			
 
				+		return -ENOMEM;
			
 
				+	res = single_open(file, show_schedstat, NULL);
			
 
				+	if (!res) {
			
 
				+		m = file->private_data;
			
 
				+		m->buf = buf;
			
 
				+		m->size = size;
			
 
				+	} else
			
 
				+		kfree(buf);
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+static const struct file_operations proc_schedstat_operations = {
			
 
				+	.open    = schedstat_open,
			
 
				+	.read    = seq_read,
			
 
				+	.llseek  = seq_lseek,
			
 
				+	.release = single_release,
			
 
				+};
			
 
				+
			
 
				+static int __init proc_schedstat_init(void)
			
 
				+{
			
 
				+	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
			
 
				+	return 0;
			
 
				+}
			
 
				+module_init(proc_schedstat_init);
			
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -1,108 +1,5 @@
 
				 
			
 
				 #ifdef CONFIG_SCHEDSTATS
			
 
				-/*
			
 
				- * bump this up when changing the output format or the meaning of an existing
			
 
				- * format, so that tools can adapt (or abort)
			
 
				- */
			
 
				-#define SCHEDSTAT_VERSION 15
			
 
				-
			
 
				-static int show_schedstat(struct seq_file *seq, void *v)
			
 
				-{
			
 
				-	int cpu;
			
 
				-	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
			
 
				-	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
			
 
				-
			
 
				-	if (mask_str == NULL)
			
 
				-		return -ENOMEM;
			
 
				-
			
 
				-	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
			
 
				-	seq_printf(seq, "timestamp %lu\n", jiffies);
			
 
				-	for_each_online_cpu(cpu) {
			
 
				-		struct rq *rq = cpu_rq(cpu);
			
 
				-#ifdef CONFIG_SMP
			
 
				-		struct sched_domain *sd;
			
 
				-		int dcount = 0;
			
 
				-#endif
			
 
				-
			
 
				-		/* runqueue-specific stats */
			
 
				-		seq_printf(seq,
			
 
				-		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
			
 
				-		    cpu, rq->yld_count,
			
 
				-		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
			
 
				-		    rq->ttwu_count, rq->ttwu_local,
			
 
				-		    rq->rq_cpu_time,
			
 
				-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
			
 
				-
			
 
				-		seq_printf(seq, "\n");
			
 
				-
			
 
				-#ifdef CONFIG_SMP
			
 
				-		/* domain-specific stats */
			
 
				-		rcu_read_lock();
			
 
				-		for_each_domain(cpu, sd) {
			
 
				-			enum cpu_idle_type itype;
			
 
				-
			
 
				-			cpumask_scnprintf(mask_str, mask_len,
			
 
				-					  sched_domain_span(sd));
			
 
				-			seq_printf(seq, "domain%d %s", dcount++, mask_str);
			
 
				-			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
			
 
				-					itype++) {
			
 
				-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
			
 
				-				    sd->lb_count[itype],
			
 
				-				    sd->lb_balanced[itype],
			
 
				-				    sd->lb_failed[itype],
			
 
				-				    sd->lb_imbalance[itype],
			
 
				-				    sd->lb_gained[itype],
			
 
				-				    sd->lb_hot_gained[itype],
			
 
				-				    sd->lb_nobusyq[itype],
			
 
				-				    sd->lb_nobusyg[itype]);
			
 
				-			}
			
 
				-			seq_printf(seq,
			
 
				-				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
			
 
				-			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
			
 
				-			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
			
 
				-			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
			
 
				-			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
			
 
				-			    sd->ttwu_move_balance);
			
 
				-		}
			
 
				-		rcu_read_unlock();
			
 
				-#endif
			
 
				-	}
			
 
				-	kfree(mask_str);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int schedstat_open(struct inode *inode, struct file *file)
			
 
				-{
			
 
				-	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
			
 
				-	char *buf = kmalloc(size, GFP_KERNEL);
			
 
				-	struct seq_file *m;
			
 
				-	int res;
			
 
				-
			
 
				-	if (!buf)
			
 
				-		return -ENOMEM;
			
 
				-	res = single_open(file, show_schedstat, NULL);
			
 
				-	if (!res) {
			
 
				-		m = file->private_data;
			
 
				-		m->buf = buf;
			
 
				-		m->size = size;
			
 
				-	} else
			
 
				-		kfree(buf);
			
 
				-	return res;
			
 
				-}
			
 
				-
			
 
				-static const struct file_operations proc_schedstat_operations = {
			
 
				-	.open    = schedstat_open,
			
 
				-	.read    = seq_read,
			
 
				-	.llseek  = seq_lseek,
			
 
				-	.release = single_release,
			
 
				-};
			
 
				-
			
 
				-static int __init proc_schedstat_init(void)
			
 
				-{
			
 
				-	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
			
 
				-	return 0;
			
 
				-}
			
 
				-module_init(proc_schedstat_init);
			
 
				 
			
 
				 /*
			
 
				  * Expects runqueue lock to be held for atomicity of update
			
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,3 +1,5 @@
 
				+#include "sched.h"
			
 
				+
			
 
				 /*
			
 
				  * stop-task scheduling class.
			
 
				  *
			
@@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
 
				 /*
			
 
				  * Simple, special scheduling class for the per-CPU stop tasks:
			
 
				  */
			
 
				-static const struct sched_class stop_sched_class = {
			
 
				+const struct sched_class stop_sched_class = {
			
 
				 	.next			= &rt_sched_class,
			
 
				 
			
 
				 	.enqueue_task		= enqueue_task_stop,
			
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -296,6 +296,15 @@ void tick_nohz_stop_sched_tick(int inidle)
 
				 	cpu = smp_processor_id();
			
 
				 	ts = &per_cpu(tick_cpu_sched, cpu);
			
 
				 
			
 
				+	/*
			
 
				+ 	 * Update the idle state in the scheduler domain hierarchy
			
 
				+ 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
			
 
				+ 	 * State will be updated to busy during the first busy tick after
			
 
				+ 	 * exiting idle.
			
 
				+ 	 */
			
 
				+	if (inidle)
			
 
				+		set_cpu_sd_state_idle();
			
 
				+
			
 
				 	/*
			
 
				 	 * Call to tick_nohz_start_idle stops the last_update_time from being
			
 
				 	 * updated. Thus, it must not be called in the event we are called from