Browse Source

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
  sched/tracing: Add a new tracepoint for sleeptime
  sched: Disable scheduler warnings during oopses
  sched: Fix cgroup movement of waking process
  sched: Fix cgroup movement of newly created process
  sched: Fix cgroup movement of forking process
  sched: Remove cfs bandwidth period check in tg_set_cfs_period()
  sched: Fix load-balance lock-breaking
  sched: Replace all_pinned with a generic flags field
  sched: Only queue remote wakeups when crossing cache boundaries
  sched: Add missing rcu_dereference() around ->real_parent usage
  [S390] fix cputime overflow in uptime_proc_show
  [S390] cputime: add sparse checking and cleanup
  sched: Mark parent and real_parent as __rcu
  sched, nohz: Fix missing RCU read lock
  sched, nohz: Set the NOHZ_BALANCE_KICK flag for idle load balancer
  sched, nohz: Fix the idle cpu check in nohz_idle_balance
  sched: Use jump_labels for sched_feat
  sched/accounting: Fix parameter passing in task_group_account_field
  sched/accounting: Fix user/system tick double accounting
  sched/accounting: Re-use scheduler statistics for the root cgroup
  ...

Fix up conflicts in
 - arch/ia64/include/asm/cputime.h, include/asm-generic/cputime.h
	usecs_to_cputime64() vs the sparse cleanups
 - kernel/sched/fair.c, kernel/time/tick-sched.c
	scheduler changes in multiple branches
Linus Torvalds 13 years ago
parent
commit
0db49b72bc

+ 34 - 38
arch/ia64/include/asm/cputime.h

@@ -26,60 +26,53 @@
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
 
-#define cputime_zero			((cputime_t)0)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
 
 
 /*
 /*
  * Convert cputime <-> jiffies (HZ)
  * Convert cputime <-> jiffies (HZ)
  */
  */
-#define cputime_to_jiffies(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime_to_jiffies(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	\
+	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	\
+	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
 
 /*
 /*
  * Convert cputime <-> microseconds
  * Convert cputime <-> microseconds
  */
  */
-#define cputime_to_usecs(__ct)		((__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)	((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs)	usecs_to_cputime(__usecs)
+#define cputime_to_usecs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)	\
+	(__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs)	\
+	(__force cputime64_t)((__usecs) * NSEC_PER_USEC)
 
 
 /*
 /*
  * Convert cputime <-> seconds
  * Convert cputime <-> seconds
  */
  */
-#define cputime_to_secs(__ct)		((__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)		((__secs) * NSEC_PER_SEC)
+#define cputime_to_secs(__ct)		\
+	((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		\
+	(__force cputime_t)((__secs) * NSEC_PER_SEC)
 
 
 /*
 /*
  * Convert cputime <-> timespec (nsec)
  * Convert cputime <-> timespec (nsec)
  */
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_nsec);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+	return (__force cputime_t) ret;
 }
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 {
 {
-	val->tv_sec  = ct / NSEC_PER_SEC;
-	val->tv_nsec = ct % NSEC_PER_SEC;
+	val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
 }
 }
 
 
 /*
 /*
@@ -87,25 +80,28 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
  */
 static inline cputime_t timeval_to_cputime(struct timeval *val)
 static inline cputime_t timeval_to_cputime(struct timeval *val)
 {
 {
-	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-	return (ret + val->tv_usec * NSEC_PER_USEC);
+	u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+	return (__force cputime_t) ret;
 }
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 {
 {
-	val->tv_sec = ct / NSEC_PER_SEC;
-	val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+	val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+	val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
 }
 }
 
 
 /*
 /*
  * Convert cputime <-> clock (USER_HZ)
  * Convert cputime <-> clock (USER_HZ)
  */
  */
-#define cputime_to_clock_t(__ct)	((__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)		((__x) * (NSEC_PER_SEC / USER_HZ))
+#define cputime_to_clock_t(__ct)	\
+	((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		\
+	(__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
 
 
 /*
 /*
  * Convert cputime64 to clock.
  * Convert cputime64 to clock.
  */
  */
-#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
+#define cputime64_to_clock_t(__ct)	\
+	cputime_to_clock_t((__force cputime_t)__ct)
 
 
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
 #endif /* __IA64_CPUTIME_H */

+ 27 - 43
arch/powerpc/include/asm/cputime.h

@@ -29,25 +29,8 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/time.h>
 #include <asm/time.h>
 #include <asm/param.h>
 #include <asm/param.h>
 
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
-
-#define cputime_zero			((cputime_t)0)
-#define cputime_max			((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-
-#define cputime64_zero			((cputime64_t)0)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
 
 #ifdef __KERNEL__
 #ifdef __KERNEL__
 
 
@@ -65,7 +48,7 @@ DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 
 static inline unsigned long cputime_to_jiffies(const cputime_t ct)
 static inline unsigned long cputime_to_jiffies(const cputime_t ct)
 {
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 }
 
 
 /* Estimate the scaled cputime by scaling the real cputime based on
 /* Estimate the scaled cputime by scaling the real cputime based on
@@ -74,14 +57,15 @@ static inline cputime_t cputime_to_scaled(const cputime_t ct)
 {
 {
 	if (cpu_has_feature(CPU_FTR_SPURR) &&
 	if (cpu_has_feature(CPU_FTR_SPURR) &&
 	    __get_cpu_var(cputime_last_delta))
 	    __get_cpu_var(cputime_last_delta))
-		return ct * __get_cpu_var(cputime_scaled_last_delta) /
-			    __get_cpu_var(cputime_last_delta);
+		return (__force u64) ct *
+			__get_cpu_var(cputime_scaled_last_delta) /
+			__get_cpu_var(cputime_last_delta);
 	return ct;
 	return ct;
 }
 }
 
 
 static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 {
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 	unsigned long sec;
 
 
 	/* have to be a little careful about overflow */
 	/* have to be a little careful about overflow */
@@ -93,7 +77,7 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	}
 	}
 	if (sec)
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 }
 
 
 static inline void setup_cputime_one_jiffy(void)
 static inline void setup_cputime_one_jiffy(void)
@@ -103,7 +87,7 @@ static inline void setup_cputime_one_jiffy(void)
 
 
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
 {
-	cputime_t ct;
+	u64 ct;
 	u64 sec;
 	u64 sec;
 
 
 	/* have to be a little careful about overflow */
 	/* have to be a little careful about overflow */
@@ -114,13 +98,13 @@ static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 		do_div(ct, HZ);
 		do_div(ct, HZ);
 	}
 	}
 	if (sec)
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime64_t) ct;
 }
 }
 
 
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 {
 {
-	return mulhdu(ct, __cputime_jiffies_factor);
+	return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 }
 
 
 /*
 /*
@@ -130,12 +114,12 @@ extern u64 __cputime_msec_factor;
 
 
 static inline unsigned long cputime_to_usecs(const cputime_t ct)
 static inline unsigned long cputime_to_usecs(const cputime_t ct)
 {
 {
-	return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
+	return mulhdu((__force u64) ct, __cputime_msec_factor) * USEC_PER_MSEC;
 }
 }
 
 
 static inline cputime_t usecs_to_cputime(const unsigned long us)
 static inline cputime_t usecs_to_cputime(const unsigned long us)
 {
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 	unsigned long sec;
 
 
 	/* have to be a little careful about overflow */
 	/* have to be a little careful about overflow */
@@ -147,7 +131,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
 	}
 	}
 	if (sec)
 	if (sec)
 		ct += (cputime_t) sec * tb_ticks_per_sec;
 		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+	return (__force cputime_t) ct;
 }
 }
 
 
 #define usecs_to_cputime64(us)		usecs_to_cputime(us)
 #define usecs_to_cputime64(us)		usecs_to_cputime(us)
@@ -159,12 +143,12 @@ extern u64 __cputime_sec_factor;
 
 
 static inline unsigned long cputime_to_secs(const cputime_t ct)
 static inline unsigned long cputime_to_secs(const cputime_t ct)
 {
 {
-	return mulhdu(ct, __cputime_sec_factor);
+	return mulhdu((__force u64) ct, __cputime_sec_factor);
 }
 }
 
 
 static inline cputime_t secs_to_cputime(const unsigned long sec)
 static inline cputime_t secs_to_cputime(const unsigned long sec)
 {
 {
-	return (cputime_t) sec * tb_ticks_per_sec;
+	return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
 }
 }
 
 
 /*
 /*
@@ -172,7 +156,7 @@ static inline cputime_t secs_to_cputime(const unsigned long sec)
  */
  */
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 {
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 	unsigned int frac;
 
 
 	frac = do_div(x, tb_ticks_per_sec);
 	frac = do_div(x, tb_ticks_per_sec);
@@ -184,11 +168,11 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 
 
 static inline cputime_t timespec_to_cputime(const struct timespec *p)
 static inline cputime_t timespec_to_cputime(const struct timespec *p)
 {
 {
-	cputime_t ct;
+	u64 ct;
 
 
 	ct = (u64) p->tv_nsec * tb_ticks_per_sec;
 	ct = (u64) p->tv_nsec * tb_ticks_per_sec;
 	do_div(ct, 1000000000);
 	do_div(ct, 1000000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 }
 
 
 /*
 /*
@@ -196,7 +180,7 @@ static inline cputime_t timespec_to_cputime(const struct timespec *p)
  */
  */
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 {
 {
-	u64 x = ct;
+	u64 x = (__force u64) ct;
 	unsigned int frac;
 	unsigned int frac;
 
 
 	frac = do_div(x, tb_ticks_per_sec);
 	frac = do_div(x, tb_ticks_per_sec);
@@ -208,11 +192,11 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 
 
 static inline cputime_t timeval_to_cputime(const struct timeval *p)
 static inline cputime_t timeval_to_cputime(const struct timeval *p)
 {
 {
-	cputime_t ct;
+	u64 ct;
 
 
 	ct = (u64) p->tv_usec * tb_ticks_per_sec;
 	ct = (u64) p->tv_usec * tb_ticks_per_sec;
 	do_div(ct, 1000000);
 	do_div(ct, 1000000);
-	return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+	return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 }
 
 
 /*
 /*
@@ -222,12 +206,12 @@ extern u64 __cputime_clockt_factor;
 
 
 static inline unsigned long cputime_to_clock_t(const cputime_t ct)
 static inline unsigned long cputime_to_clock_t(const cputime_t ct)
 {
 {
-	return mulhdu(ct, __cputime_clockt_factor);
+	return mulhdu((__force u64) ct, __cputime_clockt_factor);
 }
 }
 
 
 static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 {
 {
-	cputime_t ct;
+	u64 ct;
 	unsigned long sec;
 	unsigned long sec;
 
 
 	/* have to be a little careful about overflow */
 	/* have to be a little careful about overflow */
@@ -238,8 +222,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 		do_div(ct, USER_HZ);
 		do_div(ct, USER_HZ);
 	}
 	}
 	if (sec)
 	if (sec)
-		ct += (cputime_t) sec * tb_ticks_per_sec;
-	return ct;
+		ct += (u64) sec * tb_ticks_per_sec;
+	return (__force cputime_t) ct;
 }
 }
 
 
 #define cputime64_to_clock_t(ct)	cputime_to_clock_t((cputime_t)(ct))
 #define cputime64_to_clock_t(ct)	cputime_to_clock_t((cputime_t)(ct))

+ 8 - 8
arch/s390/appldata/appldata_os.c

@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
 	j = 0;
 	j = 0;
 	for_each_online_cpu(i) {
 	for_each_online_cpu(i) {
 		os_data->os_cpu[j].per_cpu_user =
 		os_data->os_cpu[j].per_cpu_user =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
 		os_data->os_cpu[j].per_cpu_nice =
 		os_data->os_cpu[j].per_cpu_nice =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
 		os_data->os_cpu[j].per_cpu_system =
 		os_data->os_cpu[j].per_cpu_system =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
 		os_data->os_cpu[j].per_cpu_idle =
 		os_data->os_cpu[j].per_cpu_idle =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
 		os_data->os_cpu[j].per_cpu_irq =
 		os_data->os_cpu[j].per_cpu_irq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
 		os_data->os_cpu[j].per_cpu_softirq =
 		os_data->os_cpu[j].per_cpu_softirq =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
 		os_data->os_cpu[j].per_cpu_iowait =
 		os_data->os_cpu[j].per_cpu_iowait =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
 		os_data->os_cpu[j].per_cpu_steal =
 		os_data->os_cpu[j].per_cpu_steal =
-			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
 		os_data->os_cpu[j].cpu_id = i;
 		os_data->os_cpu[j].cpu_id = i;
 		j++;
 		j++;
 	}
 	}

+ 63 - 77
arch/s390/include/asm/cputime.h

@@ -16,75 +16,60 @@
 
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
 
-typedef unsigned long long cputime_t;
-typedef unsigned long long cputime64_t;
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
 
 
-#ifndef __s390x__
-
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline unsigned long __div(unsigned long long n, unsigned long base)
 {
 {
+#ifndef __s390x__
 	register_pair rp;
 	register_pair rp;
 
 
 	rp.pair = n >> 1;
 	rp.pair = n >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
 	asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
 	return rp.subreg.odd;
 	return rp.subreg.odd;
+#else /* __s390x__ */
+	return n / base;
+#endif /* __s390x__ */
 }
 }
 
 
-#else /* __s390x__ */
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 
 
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
 {
 {
-	return n / base;
+	return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
 }
 }
 
 
-#endif /* __s390x__ */
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+	return (__force cputime_t)(jif * (4096000000ULL / HZ));
+}
 
 
-#define cputime_zero			(0ULL)
-#define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n) ({		\
-	unsigned long long __div = (__a);	\
-	do_div(__div,__n);			\
-	__div;					\
-})
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__div((__ct), 4096000000ULL / HZ))
-#define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	((cputime_t)(__hz) * (4096000000ULL / HZ))
-
-#define cputime64_zero			(0ULL)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime_to_cputime64(__ct)	(__ct)
-
-static inline u64
-cputime64_to_jiffies64(cputime64_t cputime)
-{
-	do_div(cputime, 4096000000ULL / HZ);
-	return cputime;
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+	unsigned long long jif = (__force unsigned long long) cputime;
+	do_div(jif, 4096000000ULL / HZ);
+	return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+	return (__force cputime64_t)(jif * (4096000000ULL / HZ));
 }
 }
 
 
 /*
 /*
  * Convert cputime to microseconds and back.
  * Convert cputime to microseconds and back.
  */
  */
-static inline unsigned int
-cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
 {
 {
-	return cputime_div(cputime, 4096);
+	return (__force unsigned long long) cputime >> 12;
 }
 }
 
 
-static inline cputime_t
-usecs_to_cputime(const unsigned int m)
+static inline cputime_t usecs_to_cputime(const unsigned int m)
 {
 {
-	return (cputime_t) m * 4096;
+	return (__force cputime_t)(m * 4096ULL);
 }
 }
 
 
 #define usecs_to_cputime64(m)		usecs_to_cputime(m)
 #define usecs_to_cputime64(m)		usecs_to_cputime(m)
@@ -92,40 +77,39 @@ usecs_to_cputime(const unsigned int m)
 /*
 /*
  * Convert cputime to milliseconds and back.
  * Convert cputime to milliseconds and back.
  */
  */
-static inline unsigned int
-cputime_to_secs(const cputime_t cputime)
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
 {
 {
-	return __div(cputime, 2048000000) >> 1;
+	return __div((__force unsigned long long) cputime, 2048000000) >> 1;
 }
 }
 
 
-static inline cputime_t
-secs_to_cputime(const unsigned int s)
+static inline cputime_t secs_to_cputime(const unsigned int s)
 {
 {
-	return (cputime_t) s * 4096000000ULL;
+	return (__force cputime_t)(s * 4096000000ULL);
 }
 }
 
 
 /*
 /*
  * Convert cputime to timespec and back.
  * Convert cputime to timespec and back.
  */
  */
-static inline cputime_t
-timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
 {
-	return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
 }
 }
 
 
-static inline void
-cputime_to_timespec(const cputime_t cputime, struct timespec *value)
+static inline void cputime_to_timespec(const cputime_t cputime,
+				       struct timespec *value)
 {
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 #ifndef __s390x__
 	register_pair rp;
 	register_pair rp;
 
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_nsec = rp.subreg.even * 1000 / 4096;
 	value->tv_nsec = rp.subreg.even * 1000 / 4096;
 	value->tv_sec = rp.subreg.odd;
 	value->tv_sec = rp.subreg.odd;
 #else
 #else
-	value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 #endif
 }
 }
 
 
@@ -134,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
  * Since cputime and timeval have the same resolution (microseconds)
  * Since cputime and timeval have the same resolution (microseconds)
  * this is easy.
  * this is easy.
  */
  */
-static inline cputime_t
-timeval_to_cputime(const struct timeval *value)
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
 {
 {
-	return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
+	unsigned long long ret = value->tv_sec * 4096000000ULL;
+	return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
 }
 }
 
 
-static inline void
-cputime_to_timeval(const cputime_t cputime, struct timeval *value)
+static inline void cputime_to_timeval(const cputime_t cputime,
+				      struct timeval *value)
 {
 {
+	unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
 #ifndef __s390x__
 	register_pair rp;
 	register_pair rp;
 
 
-	rp.pair = cputime >> 1;
+	rp.pair = __cputime >> 1;
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
 	value->tv_usec = rp.subreg.even / 4096;
 	value->tv_usec = rp.subreg.even / 4096;
 	value->tv_sec = rp.subreg.odd;
 	value->tv_sec = rp.subreg.odd;
 #else
 #else
-	value->tv_usec = (cputime % 4096000000ULL) / 4096;
-	value->tv_sec = cputime / 4096000000ULL;
+	value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+	value->tv_sec = __cputime / 4096000000ULL;
 #endif
 #endif
 }
 }
 
 
 /*
 /*
  * Convert cputime to clock and back.
  * Convert cputime to clock and back.
  */
  */
-static inline clock_t
-cputime_to_clock_t(cputime_t cputime)
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
 {
 {
-	return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 }
 
 
-static inline cputime_t
-clock_t_to_cputime(unsigned long x)
+static inline cputime_t clock_t_to_cputime(unsigned long x)
 {
 {
-	return (cputime_t) x * (4096000000ULL / USER_HZ);
+	return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
 }
 }
 
 
 /*
 /*
  * Convert cputime64 to clock.
  * Convert cputime64 to clock.
  */
  */
-static inline clock_t
-cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 {
 {
-       return cputime_div(cputime, 4096000000ULL / USER_HZ);
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, 4096000000ULL / USER_HZ);
+	return clock;
 }
 }
 
 
 struct s390_idle_data {
 struct s390_idle_data {

+ 1 - 1
arch/x86/include/asm/i387.h

@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 #define safe_address (__per_cpu_offset[0])
 #define safe_address (__per_cpu_offset[0])
 #else
 #else
-#define safe_address (kstat_cpu(0).cpustat.user)
+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
 #endif
 #endif
 
 
 /*
 /*

+ 24 - 26
drivers/cpufreq/cpufreq_conservative.c

@@ -95,27 +95,26 @@ static struct dbs_tuners {
 	.freq_step = 5,
 	.freq_step = 5,
 };
 };
 
 
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
-							cputime64_t *wall)
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 {
-	cputime64_t idle_time;
-	cputime64_t cur_wall_time;
-	cputime64_t busy_time;
+	u64 idle_time;
+	u64 cur_wall_time;
+	u64 busy_time;
 
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
 
 
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 
-	idle_time = cputime64_sub(cur_wall_time, busy_time);
+	idle_time = cur_wall_time - busy_time;
 	if (wall)
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
 
 
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 }
 
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -272,7 +271,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
 	}
 	return count;
 	return count;
 }
 }
@@ -353,20 +352,20 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 
 
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 
 
-		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
-				j_dbs_info->prev_cpu_wall);
+		wall_time = (unsigned int)
+			(cur_wall_time - j_dbs_info->prev_cpu_wall);
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 
 
-		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
-				j_dbs_info->prev_cpu_idle);
+		idle_time = (unsigned int)
+			(cur_idle_time - j_dbs_info->prev_cpu_idle);
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 
 		if (dbs_tuners_ins.ignore_nice) {
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
 			unsigned long cur_nice_jiffies;
 
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
 			 * be less than 2^32 jiffies for 32 bit sys
@@ -374,7 +373,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
 					cputime64_to_jiffies64(cur_nice);
 
 
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 		}
 
 
@@ -501,10 +500,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
-			}
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 		}
 		}
 		this_dbs_info->down_skip = 0;
 		this_dbs_info->down_skip = 0;
 		this_dbs_info->requested_freq = policy->cur;
 		this_dbs_info->requested_freq = policy->cur;

+ 26 - 28
drivers/cpufreq/cpufreq_ondemand.c

@@ -119,27 +119,26 @@ static struct dbs_tuners {
 	.powersave_bias = 0,
 	.powersave_bias = 0,
 };
 };
 
 
-static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
-							cputime64_t *wall)
+static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 {
-	cputime64_t idle_time;
-	cputime64_t cur_wall_time;
-	cputime64_t busy_time;
+	u64 idle_time;
+	u64 cur_wall_time;
+	u64 busy_time;
 
 
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
 	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
-	busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
-			kstat_cpu(cpu).cpustat.system);
 
 
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
-	busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 
-	idle_time = cputime64_sub(cur_wall_time, busy_time);
+	idle_time = cur_wall_time - busy_time;
 	if (wall)
 	if (wall)
-		*wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
+		*wall = jiffies_to_usecs(cur_wall_time);
 
 
-	return (cputime64_t)jiffies_to_usecs(idle_time);
+	return jiffies_to_usecs(idle_time);
 }
 }
 
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
@@ -345,7 +344,7 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&dbs_info->prev_cpu_wall);
 						&dbs_info->prev_cpu_wall);
 		if (dbs_tuners_ins.ignore_nice)
 		if (dbs_tuners_ins.ignore_nice)
-			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
 
 	}
 	}
 	return count;
 	return count;
@@ -442,24 +441,24 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
 		cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
 		cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
 
 
-		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
-				j_dbs_info->prev_cpu_wall);
+		wall_time = (unsigned int)
+			(cur_wall_time - j_dbs_info->prev_cpu_wall);
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 		j_dbs_info->prev_cpu_wall = cur_wall_time;
 
 
-		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
-				j_dbs_info->prev_cpu_idle);
+		idle_time = (unsigned int)
+			(cur_idle_time - j_dbs_info->prev_cpu_idle);
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 		j_dbs_info->prev_cpu_idle = cur_idle_time;
 
 
-		iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
-				j_dbs_info->prev_cpu_iowait);
+		iowait_time = (unsigned int)
+			(cur_iowait_time - j_dbs_info->prev_cpu_iowait);
 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 		j_dbs_info->prev_cpu_iowait = cur_iowait_time;
 
 
 		if (dbs_tuners_ins.ignore_nice) {
 		if (dbs_tuners_ins.ignore_nice) {
-			cputime64_t cur_nice;
+			u64 cur_nice;
 			unsigned long cur_nice_jiffies;
 			unsigned long cur_nice_jiffies;
 
 
-			cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
-					 j_dbs_info->prev_cpu_nice);
+			cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] -
+					 j_dbs_info->prev_cpu_nice;
 			/*
 			/*
 			 * Assumption: nice time between sampling periods will
 			 * Assumption: nice time between sampling periods will
 			 * be less than 2^32 jiffies for 32 bit sys
 			 * be less than 2^32 jiffies for 32 bit sys
@@ -467,7 +466,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 			cur_nice_jiffies = (unsigned long)
 			cur_nice_jiffies = (unsigned long)
 					cputime64_to_jiffies64(cur_nice);
 					cputime64_to_jiffies64(cur_nice);
 
 
-			j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
+			j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 		}
 
 
@@ -646,10 +645,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
 						&j_dbs_info->prev_cpu_wall);
 						&j_dbs_info->prev_cpu_wall);
-			if (dbs_tuners_ins.ignore_nice) {
+			if (dbs_tuners_ins.ignore_nice)
 				j_dbs_info->prev_cpu_nice =
 				j_dbs_info->prev_cpu_nice =
-						kstat_cpu(j).cpustat.nice;
-			}
+						kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 		}
 		}
 		this_dbs_info->cpu = cpu;
 		this_dbs_info->cpu = cpu;
 		this_dbs_info->rate_mult = 1;
 		this_dbs_info->rate_mult = 1;

+ 2 - 3
drivers/cpufreq/cpufreq_stats.c

@@ -61,9 +61,8 @@ static int cpufreq_stats_update(unsigned int cpu)
 	spin_lock(&cpufreq_stats_lock);
 	spin_lock(&cpufreq_stats_lock);
 	stat = per_cpu(cpufreq_stats_table, cpu);
 	stat = per_cpu(cpufreq_stats_table, cpu);
 	if (stat->time_in_state)
 	if (stat->time_in_state)
-		stat->time_in_state[stat->last_index] =
-			cputime64_add(stat->time_in_state[stat->last_index],
-				      cputime_sub(cur_time, stat->last_time));
+		stat->time_in_state[stat->last_index] +=
+			cur_time - stat->last_time;
 	stat->last_time = cur_time;
 	stat->last_time = cur_time;
 	spin_unlock(&cpufreq_stats_lock);
 	spin_unlock(&cpufreq_stats_lock);
 	return 0;
 	return 0;

+ 6 - 8
drivers/macintosh/rack-meter.c

@@ -81,13 +81,13 @@ static int rackmeter_ignore_nice;
  */
  */
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
 {
-	cputime64_t retval;
+	u64 retval;
 
 
-	retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
-			kstat_cpu(cpu).cpustat.iowait);
+	retval = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE] +
+		 kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 
 
 	if (rackmeter_ignore_nice)
 	if (rackmeter_ignore_nice)
-		retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
+		retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
 
 
 	return retval;
 	return retval;
 }
 }
@@ -220,13 +220,11 @@ static void rackmeter_do_timer(struct work_struct *work)
 	int i, offset, load, cumm, pause;
 	int i, offset, load, cumm, pause;
 
 
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
-	total_ticks = (unsigned int)cputime64_sub(cur_jiffies,
-						  rcpu->prev_wall);
+	total_ticks = (unsigned int) (cur_jiffies - rcpu->prev_wall);
 	rcpu->prev_wall = cur_jiffies;
 	rcpu->prev_wall = cur_jiffies;
 
 
 	total_idle_ticks = get_cpu_idle_time(cpu);
 	total_idle_ticks = get_cpu_idle_time(cpu);
-	idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
-				rcpu->prev_idle);
+	idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
 	rcpu->prev_idle = total_idle_ticks;
 	rcpu->prev_idle = total_idle_ticks;
 
 
 	/* We do a very dumb calculation to update the LEDs for now,
 	/* We do a very dumb calculation to update the LEDs for now,

+ 4 - 4
fs/proc/array.c

@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 
 	sigemptyset(&sigign);
 	sigemptyset(&sigign);
 	sigemptyset(&sigcatch);
 	sigemptyset(&sigcatch);
-	cutime = cstime = utime = stime = cputime_zero;
-	cgtime = gtime = cputime_zero;
+	cutime = cstime = utime = stime = 0;
+	cgtime = gtime = 0;
 
 
 	if (lock_task_sighand(task, &flags)) {
 	if (lock_task_sighand(task, &flags)) {
 		struct signal_struct *sig = task->signal;
 		struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 			do {
 				min_flt += t->min_flt;
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
 				maj_flt += t->maj_flt;
-				gtime = cputime_add(gtime, t->gtime);
+				gtime += t->gtime;
 				t = next_thread(t);
 				t = next_thread(t);
 			} while (t != task);
 			} while (t != task);
 
 
 			min_flt += sig->min_flt;
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
 			maj_flt += sig->maj_flt;
 			thread_group_times(task, &utime, &stime);
 			thread_group_times(task, &utime, &stime);
-			gtime = cputime_add(gtime, sig->gtime);
+			gtime += sig->gtime;
 		}
 		}
 
 
 		sid = task_session_nr_ns(task, ns);
 		sid = task_session_nr_ns(task, ns);

+ 29 - 34
fs/proc/stat.c

@@ -22,29 +22,27 @@
 #define arch_idle_time(cpu) 0
 #define arch_idle_time(cpu) 0
 #endif
 #endif
 
 
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
 {
 {
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
-	cputime64_t idle;
+	u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
 
 
 	if (idle_time == -1ULL) {
 	if (idle_time == -1ULL) {
 		/* !NO_HZ so we can rely on cpustat.idle */
 		/* !NO_HZ so we can rely on cpustat.idle */
-		idle = kstat_cpu(cpu).cpustat.idle;
-		idle = cputime64_add(idle, arch_idle_time(cpu));
+		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+		idle += arch_idle_time(cpu);
 	} else
 	} else
 		idle = usecs_to_cputime64(idle_time);
 		idle = usecs_to_cputime64(idle_time);
 
 
 	return idle;
 	return idle;
 }
 }
 
 
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
 {
 {
-	u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
-	cputime64_t iowait;
+	u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
 
 
 	if (iowait_time == -1ULL)
 	if (iowait_time == -1ULL)
 		/* !NO_HZ so we can rely on cpustat.iowait */
 		/* !NO_HZ so we can rely on cpustat.iowait */
-		iowait = kstat_cpu(cpu).cpustat.iowait;
+		iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
 	else
 	else
 		iowait = usecs_to_cputime64(iowait_time);
 		iowait = usecs_to_cputime64(iowait_time);
 
 
@@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v)
 {
 {
 	int i, j;
 	int i, j;
 	unsigned long jif;
 	unsigned long jif;
-	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
-	cputime64_t guest, guest_nice;
+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
+	u64 guest, guest_nice;
 	u64 sum = 0;
 	u64 sum = 0;
 	u64 sum_softirq = 0;
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec boottime;
 	struct timespec boottime;
 
 
 	user = nice = system = idle = iowait =
 	user = nice = system = idle = iowait =
-		irq = softirq = steal = cputime64_zero;
-	guest = guest_nice = cputime64_zero;
+		irq = softirq = steal = 0;
+	guest = guest_nice = 0;
 	getboottime(&boottime);
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 	jif = boottime.tv_sec;
 
 
 	for_each_possible_cpu(i) {
 	for_each_possible_cpu(i) {
-		user = cputime64_add(user, kstat_cpu(i).cpustat.user);
-		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
-		system = cputime64_add(system, kstat_cpu(i).cpustat.system);
-		idle = cputime64_add(idle, get_idle_time(i));
-		iowait = cputime64_add(iowait, get_iowait_time(i));
-		irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
-		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
-		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
-		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		guest_nice = cputime64_add(guest_nice,
-			kstat_cpu(i).cpustat.guest_nice);
-		sum += kstat_cpu_irqs_sum(i);
-		sum += arch_irq_stat_cpu(i);
+		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
+		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
+		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
+		idle += get_idle_time(i);
+		iowait += get_iowait_time(i);
+		irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
+		softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
+		steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
+		guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 
 
 		for (j = 0; j < NR_SOFTIRQS; j++) {
 		for (j = 0; j < NR_SOFTIRQS; j++) {
 			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
 			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
 		(unsigned long long)cputime64_to_clock_t(guest_nice));
 		(unsigned long long)cputime64_to_clock_t(guest_nice));
 	for_each_online_cpu(i) {
 	for_each_online_cpu(i) {
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
-		user = kstat_cpu(i).cpustat.user;
-		nice = kstat_cpu(i).cpustat.nice;
-		system = kstat_cpu(i).cpustat.system;
+		user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
+		nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
+		system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
 		idle = get_idle_time(i);
 		idle = get_idle_time(i);
 		iowait = get_iowait_time(i);
 		iowait = get_iowait_time(i);
-		irq = kstat_cpu(i).cpustat.irq;
-		softirq = kstat_cpu(i).cpustat.softirq;
-		steal = kstat_cpu(i).cpustat.steal;
-		guest = kstat_cpu(i).cpustat.guest;
-		guest_nice = kstat_cpu(i).cpustat.guest_nice;
+		irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
+		softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
+		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
+		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
+		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
 		seq_printf(p,
 		seq_printf(p,
 			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
 			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
 			"%llu\n",
 			"%llu\n",

+ 8 - 3
fs/proc/uptime.c

@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 {
 {
 	struct timespec uptime;
 	struct timespec uptime;
 	struct timespec idle;
 	struct timespec idle;
+	u64 idletime;
+	u64 nsec;
+	u32 rem;
 	int i;
 	int i;
-	cputime_t idletime = cputime_zero;
 
 
+	idletime = 0;
 	for_each_possible_cpu(i)
 	for_each_possible_cpu(i)
-		idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
+		idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
 
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	do_posix_clock_monotonic_gettime(&uptime);
 	monotonic_to_bootbased(&uptime);
 	monotonic_to_bootbased(&uptime);
-	cputime_to_timespec(idletime, &idle);
+	nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+	idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+	idle.tv_nsec = rem;
 	seq_printf(m, "%lu.%02lu %lu.%02lu\n",
 	seq_printf(m, "%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
 			(unsigned long) uptime.tv_sec,
 			(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
 			(uptime.tv_nsec / (NSEC_PER_SEC / 100)),

+ 30 - 35
include/asm-generic/cputime.h

@@ -4,71 +4,66 @@
 #include <linux/time.h>
 #include <linux/time.h>
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
 
 
-typedef unsigned long cputime_t;
+typedef unsigned long __nocast cputime_t;
 
 
-#define cputime_zero			(0UL)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_one_jiffy		jiffies_to_cputime(1)
-#define cputime_max			((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)		((__a) +  (__b))
-#define cputime_sub(__a, __b)		((__a) -  (__b))
-#define cputime_div(__a, __n)		((__a) /  (__n))
-#define cputime_halve(__a)		((__a) >> 1)
-#define cputime_eq(__a, __b)		((__a) == (__b))
-#define cputime_gt(__a, __b)		((__a) >  (__b))
-#define cputime_ge(__a, __b)		((__a) >= (__b))
-#define cputime_lt(__a, __b)		((__a) <  (__b))
-#define cputime_le(__a, __b)		((__a) <= (__b))
-#define cputime_to_jiffies(__ct)	(__ct)
+#define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
 #define cputime_to_scaled(__ct)		(__ct)
 #define cputime_to_scaled(__ct)		(__ct)
-#define jiffies_to_cputime(__hz)	(__hz)
+#define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
 
 
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime64_t;
 
 
-#define cputime64_zero (0ULL)
-#define cputime64_add(__a, __b)		((__a) + (__b))
-#define cputime64_sub(__a, __b)		((__a) - (__b))
-#define cputime64_to_jiffies64(__ct)	(__ct)
-#define jiffies64_to_cputime64(__jif)	(__jif)
-#define cputime_to_cputime64(__ct)	((u64) __ct)
-#define cputime64_gt(__a, __b)		((__a) >  (__b))
+#define cputime64_to_jiffies64(__ct)	(__force u64)(__ct)
+#define jiffies64_to_cputime64(__jif)	(__force cputime64_t)(__jif)
 
 
-#define nsecs_to_cputime64(__ct)	nsecs_to_jiffies64(__ct)
+#define nsecs_to_cputime64(__ct)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
 
 
 
 
 /*
 /*
  * Convert cputime to microseconds and back.
  * Convert cputime to microseconds and back.
  */
  */
-#define cputime_to_usecs(__ct)		jiffies_to_usecs(__ct)
-#define usecs_to_cputime(__msecs)	usecs_to_jiffies(__msecs)
-#define usecs_to_cputime64(__msecs)	nsecs_to_jiffies64((__msecs) * 1000)
+#define cputime_to_usecs(__ct)		\
+	jiffies_to_usecs(cputime_to_jiffies(__ct))
+#define usecs_to_cputime(__usec)	\
+	jiffies_to_cputime(usecs_to_jiffies(__usec))
+#define usecs_to_cputime64(__usec)	\
+	jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
 
 
 /*
 /*
  * Convert cputime to seconds and back.
  * Convert cputime to seconds and back.
  */
  */
-#define cputime_to_secs(jif)		((jif) / HZ)
-#define secs_to_cputime(sec)		((sec) * HZ)
+#define cputime_to_secs(jif)		(cputime_to_jiffies(jif) / HZ)
+#define secs_to_cputime(sec)		jiffies_to_cputime((sec) * HZ)
 
 
 /*
 /*
  * Convert cputime to timespec and back.
  * Convert cputime to timespec and back.
  */
  */
-#define timespec_to_cputime(__val)	timespec_to_jiffies(__val)
-#define cputime_to_timespec(__ct,__val)	jiffies_to_timespec(__ct,__val)
+#define timespec_to_cputime(__val)	\
+	jiffies_to_cputime(timespec_to_jiffies(__val))
+#define cputime_to_timespec(__ct,__val)	\
+	jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
 
 
 /*
 /*
  * Convert cputime to timeval and back.
  * Convert cputime to timeval and back.
  */
  */
-#define timeval_to_cputime(__val)	timeval_to_jiffies(__val)
-#define cputime_to_timeval(__ct,__val)	jiffies_to_timeval(__ct,__val)
+#define timeval_to_cputime(__val)	\
+	jiffies_to_cputime(timeval_to_jiffies(__val))
+#define cputime_to_timeval(__ct,__val)	\
+	jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
 
 
 /*
 /*
  * Convert cputime to clock and back.
  * Convert cputime to clock and back.
  */
  */
-#define cputime_to_clock_t(__ct)	jiffies_to_clock_t(__ct)
-#define clock_t_to_cputime(__x)		clock_t_to_jiffies(__x)
+#define cputime_to_clock_t(__ct)	\
+	jiffies_to_clock_t(cputime_to_jiffies(__ct))
+#define clock_t_to_cputime(__x)		\
+	jiffies_to_cputime(clock_t_to_jiffies(__x))
 
 
 /*
 /*
  * Convert cputime64 to clock.
  * Convert cputime64 to clock.
  */
  */
-#define cputime64_to_clock_t(__ct)	jiffies_64_to_clock_t(__ct)
+#define cputime64_to_clock_t(__ct)	\
+	jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
 
 
 #endif
 #endif

+ 22 - 14
include/linux/kernel_stat.h

@@ -6,6 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 #include <asm/cputime.h>
 
 
@@ -15,21 +16,25 @@
  * used by rstatd/perfmeter
  * used by rstatd/perfmeter
  */
  */
 
 
-struct cpu_usage_stat {
-	cputime64_t user;
-	cputime64_t nice;
-	cputime64_t system;
-	cputime64_t softirq;
-	cputime64_t irq;
-	cputime64_t idle;
-	cputime64_t iowait;
-	cputime64_t steal;
-	cputime64_t guest;
-	cputime64_t guest_nice;
+enum cpu_usage_stat {
+	CPUTIME_USER,
+	CPUTIME_NICE,
+	CPUTIME_SYSTEM,
+	CPUTIME_SOFTIRQ,
+	CPUTIME_IRQ,
+	CPUTIME_IDLE,
+	CPUTIME_IOWAIT,
+	CPUTIME_STEAL,
+	CPUTIME_GUEST,
+	CPUTIME_GUEST_NICE,
+	NR_STATS,
+};
+
+struct kernel_cpustat {
+	u64 cpustat[NR_STATS];
 };
 };
 
 
 struct kernel_stat {
 struct kernel_stat {
-	struct cpu_usage_stat	cpustat;
 #ifndef CONFIG_GENERIC_HARDIRQS
 #ifndef CONFIG_GENERIC_HARDIRQS
        unsigned int irqs[NR_IRQS];
        unsigned int irqs[NR_IRQS];
 #endif
 #endif
@@ -38,10 +43,13 @@ struct kernel_stat {
 };
 };
 
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
 DECLARE_PER_CPU(struct kernel_stat, kstat);
+DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
 
-#define kstat_cpu(cpu)	per_cpu(kstat, cpu)
 /* Must have preemption disabled for this to be meaningful. */
 /* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu	__get_cpu_var(kstat)
+#define kstat_this_cpu (&__get_cpu_var(kstat))
+#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
+#define kstat_cpu(cpu) per_cpu(kstat, cpu)
+#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
 
 
 extern unsigned long long nr_context_switches(void);
 extern unsigned long long nr_context_switches(void);
 
 

+ 2 - 1
include/linux/latencytop.h

@@ -10,6 +10,8 @@
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 #define _INCLUDE_GUARD_LATENCYTOP_H_
 
 
 #include <linux/compiler.h>
 #include <linux/compiler.h>
+struct task_struct;
+
 #ifdef CONFIG_LATENCYTOP
 #ifdef CONFIG_LATENCYTOP
 
 
 #define LT_SAVECOUNT		32
 #define LT_SAVECOUNT		32
@@ -23,7 +25,6 @@ struct latency_record {
 };
 };
 
 
 
 
-struct task_struct;
 
 
 extern int latencytop_enabled;
 extern int latencytop_enabled;
 void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
 void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);

+ 19 - 4
include/linux/sched.h

@@ -273,9 +273,11 @@ extern int runqueue_is_locked(int cpu);
 
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern void select_nohz_load_balancer(int stop_tick);
 extern void select_nohz_load_balancer(int stop_tick);
+extern void set_cpu_sd_state_idle(void);
 extern int get_nohz_timer_target(void);
 extern int get_nohz_timer_target(void);
 #else
 #else
 static inline void select_nohz_load_balancer(int stop_tick) { }
 static inline void select_nohz_load_balancer(int stop_tick) { }
+static inline void set_cpu_sd_state_idle(void) { }
 #endif
 #endif
 
 
 /*
 /*
@@ -483,8 +485,8 @@ struct task_cputime {
 
 
 #define INIT_CPUTIME	\
 #define INIT_CPUTIME	\
 	(struct task_cputime) {					\
 	(struct task_cputime) {					\
-		.utime = cputime_zero,				\
-		.stime = cputime_zero,				\
+		.utime = 0,					\
+		.stime = 0,					\
 		.sum_exec_runtime = 0,				\
 		.sum_exec_runtime = 0,				\
 	}
 	}
 
 
@@ -901,6 +903,10 @@ struct sched_group_power {
 	 * single CPU.
 	 * single CPU.
 	 */
 	 */
 	unsigned int power, power_orig;
 	unsigned int power, power_orig;
+	/*
+	 * Number of busy cpus in this group.
+	 */
+	atomic_t nr_busy_cpus;
 };
 };
 
 
 struct sched_group {
 struct sched_group {
@@ -925,6 +931,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 	return to_cpumask(sg->cpumask);
 }
 }
 
 
+/**
+ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
+ * @group: The group whose first cpu is to be returned.
+ */
+static inline unsigned int group_first_cpu(struct sched_group *group)
+{
+	return cpumask_first(sched_group_cpus(group));
+}
+
 struct sched_domain_attr {
 struct sched_domain_attr {
 	int relax_domain_level;
 	int relax_domain_level;
 };
 };
@@ -1315,8 +1330,8 @@ struct task_struct {
 	 * older sibling, respectively.  (p->father can be replaced with 
 	 * older sibling, respectively.  (p->father can be replaced with 
 	 * p->real_parent->pid)
 	 * p->real_parent->pid)
 	 */
 	 */
-	struct task_struct *real_parent; /* real parent process */
-	struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
+	struct task_struct __rcu *real_parent; /* real parent process */
+	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
 	/*
 	/*
 	 * children/sibling forms the list of my natural children
 	 * children/sibling forms the list of my natural children
 	 */
 	 */

+ 57 - 0
include/trace/events/sched.h

@@ -330,6 +330,13 @@ DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
 	     TP_PROTO(struct task_struct *tsk, u64 delay),
 	     TP_ARGS(tsk, delay));
 	     TP_ARGS(tsk, delay));
 
 
+/*
+ * Tracepoint for accounting blocked time (time the task is in uninterruptible).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+	     TP_PROTO(struct task_struct *tsk, u64 delay),
+	     TP_ARGS(tsk, delay));
+
 /*
 /*
  * Tracepoint for accounting runtime (time the task is executing
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  * on a CPU).
@@ -363,6 +370,56 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 			(unsigned long long)__entry->vruntime)
 );
 );
 
 
+#ifdef CREATE_TRACE_POINTS
+static inline u64 trace_get_sleeptime(struct task_struct *tsk)
+{
+#ifdef CONFIG_SCHEDSTATS
+	u64 block, sleep;
+
+	block = tsk->se.statistics.block_start;
+	sleep = tsk->se.statistics.sleep_start;
+	tsk->se.statistics.block_start = 0;
+	tsk->se.statistics.sleep_start = 0;
+
+	return block ? block : sleep ? sleep : 0;
+#else
+	return 0;
+#endif
+}
+#endif
+
+/*
+ * Tracepoint for accounting sleeptime (time the task is sleeping
+ * or waiting for I/O).
+ */
+TRACE_EVENT(sched_stat_sleeptime,
+
+	TP_PROTO(struct task_struct *tsk, u64 now),
+
+	TP_ARGS(tsk, now),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	sleeptime		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->sleeptime = trace_get_sleeptime(tsk);
+		__entry->sleeptime = __entry->sleeptime ?
+				now - __entry->sleeptime : 0;
+	)
+	TP_perf_assign(
+		__perf_count(__entry->sleeptime);
+	),
+
+	TP_printk("comm=%s pid=%d sleeptime=%Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->sleeptime)
+);
+
 /*
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
  * priority.

+ 5 - 15
kernel/Makefile

@@ -2,16 +2,15 @@
 # Makefile for the linux kernel.
 # Makefile for the linux kernel.
 #
 #
 
 
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
+obj-y     = fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
-	    notifier.o ksysfs.o sched_clock.o cred.o \
-	    async.o range.o
-obj-y += groups.o
+	    notifier.o ksysfs.o cred.o \
+	    async.o range.o groups.o
 
 
 ifdef CONFIG_FUNCTION_TRACER
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
 # Do not trace debug files and internal ftrace files
@@ -20,10 +19,11 @@ CFLAGS_REMOVE_lockdep_proc.o = -pg
 CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_mutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_rtmutex-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_cgroup-debug.o = -pg
-CFLAGS_REMOVE_sched_clock.o = -pg
 CFLAGS_REMOVE_irq_work.o = -pg
 CFLAGS_REMOVE_irq_work.o = -pg
 endif
 endif
 
 
+obj-y += sched/
+
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
@@ -99,7 +99,6 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
-obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 
 
@@ -110,15 +109,6 @@ obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 
 
-ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
-# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
-# needed for x86 only.  Why this used to be enabled for all architectures is beyond
-# me.  I suspect most platforms don't need this, but until we know that for sure
-# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
-# to get a correct value for the wait-channel (WCHAN in ps). --davidm
-CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
-endif
-
 $(obj)/configs.o: $(obj)/config_data.h
 $(obj)/configs.o: $(obj)/config_data.h
 
 
 # config_data.h contains the same information as ikconfig.h but gzipped.
 # config_data.h contains the same information as ikconfig.h but gzipped.

+ 2 - 2
kernel/acct.c

@@ -613,8 +613,8 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
 		pacct->ac_flag |= AXSIG;
-	pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
-	pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+	pacct->ac_utime += current->utime;
+	pacct->ac_stime += current->stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
 	spin_unlock_irq(&current->sighand->siglock);

+ 1 - 2
kernel/cpu.c

@@ -178,8 +178,7 @@ static inline void check_for_tasks(int cpu)
 	write_lock_irq(&tasklist_lock);
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
 	for_each_process(p) {
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (!cputime_eq(p->utime, cputime_zero) ||
-		     !cputime_eq(p->stime, cputime_zero)))
+		    (p->utime || p->stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
 				p->comm, task_pid_nr(p), cpu,

+ 6 - 16
kernel/exit.c

@@ -121,9 +121,9 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 * will have been the last reference on the signal_struct.
 		 */
 		 */
-		sig->utime = cputime_add(sig->utime, tsk->utime);
-		sig->stime = cputime_add(sig->stime, tsk->stime);
-		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
+		sig->utime += tsk->utime;
+		sig->stime += tsk->stime;
+		sig->gtime += tsk->gtime;
 		sig->min_flt += tsk->min_flt;
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
 		sig->nvcsw += tsk->nvcsw;
@@ -1255,19 +1255,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		psig = p->real_parent->signal;
 		sig = p->signal;
 		sig = p->signal;
-		psig->cutime =
-			cputime_add(psig->cutime,
-			cputime_add(tgutime,
-				    sig->cutime));
-		psig->cstime =
-			cputime_add(psig->cstime,
-			cputime_add(tgstime,
-				    sig->cstime));
-		psig->cgtime =
-			cputime_add(psig->cgtime,
-			cputime_add(p->gtime,
-			cputime_add(sig->gtime,
-				    sig->cgtime)));
+		psig->cutime += tgutime + sig->cutime;
+		psig->cstime += tgstime + sig->cstime;
+		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
 		psig->cmaj_flt +=

+ 5 - 9
kernel/fork.c

@@ -1023,8 +1023,8 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  */
  */
 static void posix_cpu_timers_init(struct task_struct *tsk)
 static void posix_cpu_timers_init(struct task_struct *tsk)
 {
 {
-	tsk->cputime_expires.prof_exp = cputime_zero;
-	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.prof_exp = 0;
+	tsk->cputime_expires.virt_exp = 0;
 	tsk->cputime_expires.sched_exp = 0;
 	tsk->cputime_expires.sched_exp = 0;
 	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
 	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
 	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
 	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
@@ -1132,14 +1132,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 
 	init_sigpending(&p->pending);
 	init_sigpending(&p->pending);
 
 
-	p->utime = cputime_zero;
-	p->stime = cputime_zero;
-	p->gtime = cputime_zero;
-	p->utimescaled = cputime_zero;
-	p->stimescaled = cputime_zero;
+	p->utime = p->stime = p->gtime = 0;
+	p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	p->prev_utime = cputime_zero;
-	p->prev_stime = cputime_zero;
+	p->prev_utime = p->prev_stime = 0;
 #endif
 #endif
 #if defined(SPLIT_RSS_COUNTING)
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));

+ 7 - 8
kernel/itimer.c

@@ -52,22 +52,22 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 
 	cval = it->expires;
 	cval = it->expires;
 	cinterval = it->incr;
 	cinterval = it->incr;
-	if (!cputime_eq(cval, cputime_zero)) {
+	if (cval) {
 		struct task_cputime cputime;
 		struct task_cputime cputime;
 		cputime_t t;
 		cputime_t t;
 
 
 		thread_group_cputimer(tsk, &cputime);
 		thread_group_cputimer(tsk, &cputime);
 		if (clock_id == CPUCLOCK_PROF)
 		if (clock_id == CPUCLOCK_PROF)
-			t = cputime_add(cputime.utime, cputime.stime);
+			t = cputime.utime + cputime.stime;
 		else
 		else
 			/* CPUCLOCK_VIRT */
 			/* CPUCLOCK_VIRT */
 			t = cputime.utime;
 			t = cputime.utime;
 
 
-		if (cputime_le(cval, t))
+		if (cval < t)
 			/* about to fire */
 			/* about to fire */
 			cval = cputime_one_jiffy;
 			cval = cputime_one_jiffy;
 		else
 		else
-			cval = cputime_sub(cval, t);
+			cval = cval - t;
 	}
 	}
 
 
 	spin_unlock_irq(&tsk->sighand->siglock);
 	spin_unlock_irq(&tsk->sighand->siglock);
@@ -161,10 +161,9 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 
 	cval = it->expires;
 	cval = it->expires;
 	cinterval = it->incr;
 	cinterval = it->incr;
-	if (!cputime_eq(cval, cputime_zero) ||
-	    !cputime_eq(nval, cputime_zero)) {
-		if (cputime_gt(nval, cputime_zero))
-			nval = cputime_add(nval, cputime_one_jiffy);
+	if (cval || nval) {
+		if (nval > 0)
+			nval += cputime_one_jiffy;
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 	}
 	}
 	it->expires = nval;
 	it->expires = nval;

+ 52 - 80
kernel/posix-cpu-timers.c

@@ -78,7 +78,7 @@ static inline int cpu_time_before(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		return now.sched < then.sched;
 		return now.sched < then.sched;
 	}  else {
 	}  else {
-		return cputime_lt(now.cpu, then.cpu);
+		return now.cpu < then.cpu;
 	}
 	}
 }
 }
 static inline void cpu_time_add(const clockid_t which_clock,
 static inline void cpu_time_add(const clockid_t which_clock,
@@ -88,7 +88,7 @@ static inline void cpu_time_add(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		acc->sched += val.sched;
 		acc->sched += val.sched;
 	}  else {
 	}  else {
-		acc->cpu = cputime_add(acc->cpu, val.cpu);
+		acc->cpu += val.cpu;
 	}
 	}
 }
 }
 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
@@ -98,24 +98,11 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
 		a.sched -= b.sched;
 		a.sched -= b.sched;
 	}  else {
 	}  else {
-		a.cpu = cputime_sub(a.cpu, b.cpu);
+		a.cpu -= b.cpu;
 	}
 	}
 	return a;
 	return a;
 }
 }
 
 
-/*
- * Divide and limit the result to res >= 1
- *
- * This is necessary to prevent signal delivery starvation, when the result of
- * the division would be rounded down to 0.
- */
-static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
-{
-	cputime_t res = cputime_div(time, div);
-
-	return max_t(cputime_t, res, 1);
-}
-
 /*
 /*
  * Update expiry time from increment, and increase overrun count,
  * Update expiry time from increment, and increase overrun count,
  * given the current clock sample.
  * given the current clock sample.
@@ -148,28 +135,26 @@ static void bump_cpu_timer(struct k_itimer *timer,
 	} else {
 	} else {
 		cputime_t delta, incr;
 		cputime_t delta, incr;
 
 
-		if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
+		if (now.cpu < timer->it.cpu.expires.cpu)
 			return;
 			return;
 		incr = timer->it.cpu.incr.cpu;
 		incr = timer->it.cpu.incr.cpu;
-		delta = cputime_sub(cputime_add(now.cpu, incr),
-				    timer->it.cpu.expires.cpu);
+		delta = now.cpu + incr - timer->it.cpu.expires.cpu;
 		/* Don't use (incr*2 < delta), incr*2 might overflow. */
 		/* Don't use (incr*2 < delta), incr*2 might overflow. */
-		for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
-			     incr = cputime_add(incr, incr);
-		for (; i >= 0; incr = cputime_halve(incr), i--) {
-			if (cputime_lt(delta, incr))
+		for (i = 0; incr < delta - incr; i++)
+			     incr += incr;
+		for (; i >= 0; incr = incr >> 1, i--) {
+			if (delta < incr)
 				continue;
 				continue;
-			timer->it.cpu.expires.cpu =
-				cputime_add(timer->it.cpu.expires.cpu, incr);
+			timer->it.cpu.expires.cpu += incr;
 			timer->it_overrun += 1 << i;
 			timer->it_overrun += 1 << i;
-			delta = cputime_sub(delta, incr);
+			delta -= incr;
 		}
 		}
 	}
 	}
 }
 }
 
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
 {
-	return cputime_add(p->utime, p->stime);
+	return p->utime + p->stime;
 }
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
 {
@@ -248,8 +233,8 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 
 	t = tsk;
 	t = tsk;
 	do {
 	do {
-		times->utime = cputime_add(times->utime, t->utime);
-		times->stime = cputime_add(times->stime, t->stime);
+		times->utime += t->utime;
+		times->stime += t->stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 	} while_each_thread(tsk, t);
 out:
 out:
@@ -258,10 +243,10 @@ out:
 
 
 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
 {
 {
-	if (cputime_gt(b->utime, a->utime))
+	if (b->utime > a->utime)
 		a->utime = b->utime;
 		a->utime = b->utime;
 
 
-	if (cputime_gt(b->stime, a->stime))
+	if (b->stime > a->stime)
 		a->stime = b->stime;
 		a->stime = b->stime;
 
 
 	if (b->sum_exec_runtime > a->sum_exec_runtime)
 	if (b->sum_exec_runtime > a->sum_exec_runtime)
@@ -306,7 +291,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
 		return -EINVAL;
 		return -EINVAL;
 	case CPUCLOCK_PROF:
 	case CPUCLOCK_PROF:
 		thread_group_cputime(p, &cputime);
 		thread_group_cputime(p, &cputime);
-		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 		break;
 	case CPUCLOCK_VIRT:
 	case CPUCLOCK_VIRT:
 		thread_group_cputime(p, &cputime);
 		thread_group_cputime(p, &cputime);
@@ -470,26 +455,24 @@ static void cleanup_timers(struct list_head *head,
 			   unsigned long long sum_exec_runtime)
 			   unsigned long long sum_exec_runtime)
 {
 {
 	struct cpu_timer_list *timer, *next;
 	struct cpu_timer_list *timer, *next;
-	cputime_t ptime = cputime_add(utime, stime);
+	cputime_t ptime = utime + stime;
 
 
 	list_for_each_entry_safe(timer, next, head, entry) {
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
 		list_del_init(&timer->entry);
-		if (cputime_lt(timer->expires.cpu, ptime)) {
-			timer->expires.cpu = cputime_zero;
+		if (timer->expires.cpu < ptime) {
+			timer->expires.cpu = 0;
 		} else {
 		} else {
-			timer->expires.cpu = cputime_sub(timer->expires.cpu,
-							 ptime);
+			timer->expires.cpu -= ptime;
 		}
 		}
 	}
 	}
 
 
 	++head;
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
 	list_for_each_entry_safe(timer, next, head, entry) {
 		list_del_init(&timer->entry);
 		list_del_init(&timer->entry);
-		if (cputime_lt(timer->expires.cpu, utime)) {
-			timer->expires.cpu = cputime_zero;
+		if (timer->expires.cpu < utime) {
+			timer->expires.cpu = 0;
 		} else {
 		} else {
-			timer->expires.cpu = cputime_sub(timer->expires.cpu,
-							 utime);
+			timer->expires.cpu -= utime;
 		}
 		}
 	}
 	}
 
 
@@ -520,8 +503,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 	struct signal_struct *const sig = tsk->signal;
 	struct signal_struct *const sig = tsk->signal;
 
 
 	cleanup_timers(tsk->signal->cpu_timers,
 	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, sig->utime),
-		       cputime_add(tsk->stime, sig->stime),
+		       tsk->utime + sig->utime, tsk->stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 }
 
 
@@ -540,8 +522,7 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
 
 
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 {
 {
-	return cputime_eq(expires, cputime_zero) ||
-	       cputime_gt(expires, new_exp);
+	return expires == 0 || expires > new_exp;
 }
 }
 
 
 /*
 /*
@@ -651,7 +632,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
 	case CPUCLOCK_PROF:
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+		cpu->cpu = cputime.utime + cputime.stime;
 		break;
 		break;
 	case CPUCLOCK_VIRT:
 	case CPUCLOCK_VIRT:
 		cpu->cpu = cputime.utime;
 		cpu->cpu = cputime.utime;
@@ -918,12 +899,12 @@ static void check_thread_timers(struct task_struct *tsk,
 	unsigned long soft;
 	unsigned long soft;
 
 
 	maxfire = 20;
 	maxfire = 20;
-	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.prof_exp = 0;
 	while (!list_empty(timers)) {
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      struct cpu_timer_list,
 						      entry);
 						      entry);
-		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
+		if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
 			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 			break;
 		}
 		}
@@ -933,12 +914,12 @@ static void check_thread_timers(struct task_struct *tsk,
 
 
 	++timers;
 	++timers;
 	maxfire = 20;
 	maxfire = 20;
-	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = 0;
 	while (!list_empty(timers)) {
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      struct cpu_timer_list,
 						      entry);
 						      entry);
-		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
+		if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
 			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 			break;
 		}
 		}
@@ -1009,20 +990,19 @@ static u32 onecputick;
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			     cputime_t *expires, cputime_t cur_time, int signo)
 			     cputime_t *expires, cputime_t cur_time, int signo)
 {
 {
-	if (cputime_eq(it->expires, cputime_zero))
+	if (!it->expires)
 		return;
 		return;
 
 
-	if (cputime_ge(cur_time, it->expires)) {
-		if (!cputime_eq(it->incr, cputime_zero)) {
-			it->expires = cputime_add(it->expires, it->incr);
+	if (cur_time >= it->expires) {
+		if (it->incr) {
+			it->expires += it->incr;
 			it->error += it->incr_error;
 			it->error += it->incr_error;
 			if (it->error >= onecputick) {
 			if (it->error >= onecputick) {
-				it->expires = cputime_sub(it->expires,
-							  cputime_one_jiffy);
+				it->expires -= cputime_one_jiffy;
 				it->error -= onecputick;
 				it->error -= onecputick;
 			}
 			}
 		} else {
 		} else {
-			it->expires = cputime_zero;
+			it->expires = 0;
 		}
 		}
 
 
 		trace_itimer_expire(signo == SIGPROF ?
 		trace_itimer_expire(signo == SIGPROF ?
@@ -1031,9 +1011,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 	}
 
 
-	if (!cputime_eq(it->expires, cputime_zero) &&
-	    (cputime_eq(*expires, cputime_zero) ||
-	     cputime_lt(it->expires, *expires))) {
+	if (it->expires && (!*expires || it->expires < *expires)) {
 		*expires = it->expires;
 		*expires = it->expires;
 	}
 	}
 }
 }
@@ -1048,9 +1026,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
  */
  */
 static inline int task_cputime_zero(const struct task_cputime *cputime)
 static inline int task_cputime_zero(const struct task_cputime *cputime)
 {
 {
-	if (cputime_eq(cputime->utime, cputime_zero) &&
-	    cputime_eq(cputime->stime, cputime_zero) &&
-	    cputime->sum_exec_runtime == 0)
+	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
 		return 1;
 		return 1;
 	return 0;
 	return 0;
 }
 }
@@ -1076,15 +1052,15 @@ static void check_process_timers(struct task_struct *tsk,
 	 */
 	 */
 	thread_group_cputimer(tsk, &cputime);
 	thread_group_cputimer(tsk, &cputime);
 	utime = cputime.utime;
 	utime = cputime.utime;
-	ptime = cputime_add(utime, cputime.stime);
+	ptime = utime + cputime.stime;
 	sum_sched_runtime = cputime.sum_exec_runtime;
 	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	maxfire = 20;
-	prof_expires = cputime_zero;
+	prof_expires = 0;
 	while (!list_empty(timers)) {
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *tl = list_first_entry(timers,
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      struct cpu_timer_list,
 						      entry);
 						      entry);
-		if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
+		if (!--maxfire || ptime < tl->expires.cpu) {
 			prof_expires = tl->expires.cpu;
 			prof_expires = tl->expires.cpu;
 			break;
 			break;
 		}
 		}
@@ -1094,12 +1070,12 @@ static void check_process_timers(struct task_struct *tsk,
 
 
 	++timers;
 	++timers;
 	maxfire = 20;
 	maxfire = 20;
-	virt_expires = cputime_zero;
+	virt_expires = 0;
 	while (!list_empty(timers)) {
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *tl = list_first_entry(timers,
 		struct cpu_timer_list *tl = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      struct cpu_timer_list,
 						      entry);
 						      entry);
-		if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
+		if (!--maxfire || utime < tl->expires.cpu) {
 			virt_expires = tl->expires.cpu;
 			virt_expires = tl->expires.cpu;
 			break;
 			break;
 		}
 		}
@@ -1154,8 +1130,7 @@ static void check_process_timers(struct task_struct *tsk,
 			}
 			}
 		}
 		}
 		x = secs_to_cputime(soft);
 		x = secs_to_cputime(soft);
-		if (cputime_eq(prof_expires, cputime_zero) ||
-		    cputime_lt(x, prof_expires)) {
+		if (!prof_expires || x < prof_expires) {
 			prof_expires = x;
 			prof_expires = x;
 		}
 		}
 	}
 	}
@@ -1249,12 +1224,9 @@ out:
 static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int task_cputime_expired(const struct task_cputime *sample,
 					const struct task_cputime *expires)
 					const struct task_cputime *expires)
 {
 {
-	if (!cputime_eq(expires->utime, cputime_zero) &&
-	    cputime_ge(sample->utime, expires->utime))
+	if (expires->utime && sample->utime >= expires->utime)
 		return 1;
 		return 1;
-	if (!cputime_eq(expires->stime, cputime_zero) &&
-	    cputime_ge(cputime_add(sample->utime, sample->stime),
-		       expires->stime))
+	if (expires->stime && sample->utime + sample->stime >= expires->stime)
 		return 1;
 		return 1;
 	if (expires->sum_exec_runtime != 0 &&
 	if (expires->sum_exec_runtime != 0 &&
 	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
 	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
@@ -1389,18 +1361,18 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		 * it to be relative, *newval argument is relative and we update
 		 * it to be relative, *newval argument is relative and we update
 		 * it to be absolute.
 		 * it to be absolute.
 		 */
 		 */
-		if (!cputime_eq(*oldval, cputime_zero)) {
-			if (cputime_le(*oldval, now.cpu)) {
+		if (*oldval) {
+			if (*oldval <= now.cpu) {
 				/* Just about to fire. */
 				/* Just about to fire. */
 				*oldval = cputime_one_jiffy;
 				*oldval = cputime_one_jiffy;
 			} else {
 			} else {
-				*oldval = cputime_sub(*oldval, now.cpu);
+				*oldval -= now.cpu;
 			}
 			}
 		}
 		}
 
 
-		if (cputime_eq(*newval, cputime_zero))
+		if (!*newval)
 			return;
 			return;
-		*newval = cputime_add(*newval, now.cpu);
+		*newval += now.cpu;
 	}
 	}
 
 
 	/*
 	/*

+ 20 - 0
kernel/sched/Makefile

@@ -0,0 +1,20 @@
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_clock.o = -pg
+endif
+
+ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
+# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+# needed for x86 only.  Why this used to be enabled for all architectures is beyond
+# me.  I suspect most platforms don't need this, but until we know that for sure
+# I turn this off for IA-64 only.  Andreas Schwab says it's also needed on m68k
+# to get a correct value for the wait-channel (WCHAN in ps). --davidm
+CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
+endif
+
+obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
+obj-$(CONFIG_SMP) += cpupri.o
+obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
+obj-$(CONFIG_SCHEDSTATS) += stats.o
+obj-$(CONFIG_SCHED_DEBUG) += debug.o
+
+

+ 8 - 25
kernel/sched_autogroup.c → kernel/sched/auto_group.c

@@ -1,15 +1,19 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 #ifdef CONFIG_SCHED_AUTOGROUP
 
 
+#include "sched.h"
+
 #include <linux/proc_fs.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include <linux/utsname.h>
+#include <linux/security.h>
+#include <linux/export.h>
 
 
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
 unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
 static struct autogroup autogroup_default;
 static struct autogroup autogroup_default;
 static atomic_t autogroup_seq_nr;
 static atomic_t autogroup_seq_nr;
 
 
-static void __init autogroup_init(struct task_struct *init_task)
+void __init autogroup_init(struct task_struct *init_task)
 {
 {
 	autogroup_default.tg = &root_task_group;
 	autogroup_default.tg = &root_task_group;
 	kref_init(&autogroup_default.kref);
 	kref_init(&autogroup_default.kref);
@@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task)
 	init_task->signal->autogroup = &autogroup_default;
 	init_task->signal->autogroup = &autogroup_default;
 }
 }
 
 
-static inline void autogroup_free(struct task_group *tg)
+void autogroup_free(struct task_group *tg)
 {
 {
 	kfree(tg->autogroup);
 	kfree(tg->autogroup);
 }
 }
@@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
 	return ag;
 	return ag;
 }
 }
 
 
-#ifdef CONFIG_RT_GROUP_SCHED
-static void free_rt_sched_group(struct task_group *tg);
-#endif
-
 static inline struct autogroup *autogroup_create(void)
 static inline struct autogroup *autogroup_create(void)
 {
 {
 	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
 	struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -108,8 +108,7 @@ out_fail:
 	return autogroup_kref_get(&autogroup_default);
 	return autogroup_kref_get(&autogroup_default);
 }
 }
 
 
-static inline bool
-task_wants_autogroup(struct task_struct *p, struct task_group *tg)
+bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
 {
 	if (tg != &root_task_group)
 	if (tg != &root_task_group)
 		return false;
 		return false;
@@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 	return true;
 	return true;
 }
 }
 
 
-static inline bool task_group_is_autogroup(struct task_group *tg)
-{
-	return !!tg->autogroup;
-}
-
-static inline struct task_group *
-autogroup_task_group(struct task_struct *p, struct task_group *tg)
-{
-	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
-	if (enabled && task_wants_autogroup(p, tg))
-		return p->signal->autogroup->tg;
-
-	return tg;
-}
-
 static void
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
 {
@@ -263,7 +246,7 @@ out:
 #endif /* CONFIG_PROC_FS */
 #endif /* CONFIG_PROC_FS */
 
 
 #ifdef CONFIG_SCHED_DEBUG
 #ifdef CONFIG_SCHED_DEBUG
-static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+int autogroup_path(struct task_group *tg, char *buf, int buflen)
 {
 {
 	if (!task_group_is_autogroup(tg))
 	if (!task_group_is_autogroup(tg))
 		return 0;
 		return 0;

+ 24 - 2
kernel/sched_autogroup.h → kernel/sched/auto_group.h

@@ -1,5 +1,8 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 #ifdef CONFIG_SCHED_AUTOGROUP
 
 
+#include <linux/kref.h>
+#include <linux/rwsem.h>
+
 struct autogroup {
 struct autogroup {
 	/*
 	/*
 	 * reference doesn't mean how many thread attach to this
 	 * reference doesn't mean how many thread attach to this
@@ -13,9 +16,28 @@ struct autogroup {
 	int			nice;
 	int			nice;
 };
 };
 
 
-static inline bool task_group_is_autogroup(struct task_group *tg);
+extern void autogroup_init(struct task_struct *init_task);
+extern void autogroup_free(struct task_group *tg);
+
+static inline bool task_group_is_autogroup(struct task_group *tg)
+{
+	return !!tg->autogroup;
+}
+
+extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
+
 static inline struct task_group *
 static inline struct task_group *
-autogroup_task_group(struct task_struct *p, struct task_group *tg);
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+	int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+	if (enabled && task_wants_autogroup(p, tg))
+		return p->signal->autogroup->tg;
+
+	return tg;
+}
+
+extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
 
 
 #else /* !CONFIG_SCHED_AUTOGROUP */
 #else /* !CONFIG_SCHED_AUTOGROUP */
 
 

+ 0 - 0
kernel/sched_clock.c → kernel/sched/clock.c


File diff suppressed because it is too large
+ 49 - 849
kernel/sched/core.c


+ 2 - 2
kernel/sched_cpupri.c → kernel/sched/cpupri.c

@@ -1,5 +1,5 @@
 /*
 /*
- *  kernel/sched_cpupri.c
+ *  kernel/sched/cpupri.c
  *
  *
  *  CPU priority management
  *  CPU priority management
  *
  *
@@ -28,7 +28,7 @@
  */
  */
 
 
 #include <linux/gfp.h>
 #include <linux/gfp.h>
-#include "sched_cpupri.h"
+#include "cpupri.h"
 
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
 static int convert_prio(int prio)
 static int convert_prio(int prio)

+ 0 - 0
kernel/sched_cpupri.h → kernel/sched/cpupri.h


+ 4 - 2
kernel/sched_debug.c → kernel/sched/debug.c

@@ -1,5 +1,5 @@
 /*
 /*
- * kernel/time/sched_debug.c
+ * kernel/sched/debug.c
  *
  *
  * Print the CFS rbtree
  * Print the CFS rbtree
  *
  *
@@ -16,6 +16,8 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
 #include <linux/utsname.h>
 
 
+#include "sched.h"
+
 static DEFINE_SPINLOCK(sched_debug_lock);
 static DEFINE_SPINLOCK(sched_debug_lock);
 
 
 /*
 /*
@@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	return 0;
 	return 0;
 }
 }
 
 
-static void sysrq_sched_debug_show(void)
+void sysrq_sched_debug_show(void)
 {
 {
 	sched_debug_show(NULL, NULL);
 	sched_debug_show(NULL, NULL);
 }
 }

File diff suppressed because it is too large
+ 492 - 196
kernel/sched/fair.c


+ 15 - 15
kernel/sched_features.h → kernel/sched/features.h

@@ -3,13 +3,13 @@
  * them to run sooner, but does not allow tons of sleepers to
  * them to run sooner, but does not allow tons of sleepers to
  * rip the spread apart.
  * rip the spread apart.
  */
  */
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
 
 
 /*
 /*
  * Place new tasks ahead so that they do not starve already running
  * Place new tasks ahead so that they do not starve already running
  * tasks
  * tasks
  */
  */
-SCHED_FEAT(START_DEBIT, 1)
+SCHED_FEAT(START_DEBIT, true)
 
 
 /*
 /*
  * Based on load and program behaviour, see if it makes sense to place
  * Based on load and program behaviour, see if it makes sense to place
@@ -17,54 +17,54 @@ SCHED_FEAT(START_DEBIT, 1)
  * improve cache locality. Typically used with SYNC wakeups as
  * improve cache locality. Typically used with SYNC wakeups as
  * generated by pipes and the like, see also SYNC_WAKEUPS.
  * generated by pipes and the like, see also SYNC_WAKEUPS.
  */
  */
-SCHED_FEAT(AFFINE_WAKEUPS, 1)
+SCHED_FEAT(AFFINE_WAKEUPS, true)
 
 
 /*
 /*
  * Prefer to schedule the task we woke last (assuming it failed
  * Prefer to schedule the task we woke last (assuming it failed
  * wakeup-preemption), since its likely going to consume data we
  * wakeup-preemption), since its likely going to consume data we
  * touched, increases cache locality.
  * touched, increases cache locality.
  */
  */
-SCHED_FEAT(NEXT_BUDDY, 0)
+SCHED_FEAT(NEXT_BUDDY, false)
 
 
 /*
 /*
  * Prefer to schedule the task that ran last (when we did
  * Prefer to schedule the task that ran last (when we did
  * wake-preempt) as that likely will touch the same data, increases
  * wake-preempt) as that likely will touch the same data, increases
  * cache locality.
  * cache locality.
  */
  */
-SCHED_FEAT(LAST_BUDDY, 1)
+SCHED_FEAT(LAST_BUDDY, true)
 
 
 /*
 /*
  * Consider buddies to be cache hot, decreases the likelyness of a
  * Consider buddies to be cache hot, decreases the likelyness of a
  * cache buddy being migrated away, increases cache locality.
  * cache buddy being migrated away, increases cache locality.
  */
  */
-SCHED_FEAT(CACHE_HOT_BUDDY, 1)
+SCHED_FEAT(CACHE_HOT_BUDDY, true)
 
 
 /*
 /*
  * Use arch dependent cpu power functions
  * Use arch dependent cpu power functions
  */
  */
-SCHED_FEAT(ARCH_POWER, 0)
+SCHED_FEAT(ARCH_POWER, false)
 
 
-SCHED_FEAT(HRTICK, 0)
-SCHED_FEAT(DOUBLE_TICK, 0)
-SCHED_FEAT(LB_BIAS, 1)
+SCHED_FEAT(HRTICK, false)
+SCHED_FEAT(DOUBLE_TICK, false)
+SCHED_FEAT(LB_BIAS, true)
 
 
 /*
 /*
  * Spin-wait on mutex acquisition when the mutex owner is running on
  * Spin-wait on mutex acquisition when the mutex owner is running on
  * another cpu -- assumes that when the owner is running, it will soon
  * another cpu -- assumes that when the owner is running, it will soon
  * release the lock. Decreases scheduling overhead.
  * release the lock. Decreases scheduling overhead.
  */
  */
-SCHED_FEAT(OWNER_SPIN, 1)
+SCHED_FEAT(OWNER_SPIN, true)
 
 
 /*
 /*
  * Decrement CPU power based on time not spent running tasks
  * Decrement CPU power based on time not spent running tasks
  */
  */
-SCHED_FEAT(NONTASK_POWER, 1)
+SCHED_FEAT(NONTASK_POWER, true)
 
 
 /*
 /*
  * Queue remote wakeups on the target CPU and process them
  * Queue remote wakeups on the target CPU and process them
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
  */
-SCHED_FEAT(TTWU_QUEUE, 1)
+SCHED_FEAT(TTWU_QUEUE, true)
 
 
-SCHED_FEAT(FORCE_SD_OVERLAP, 0)
-SCHED_FEAT(RT_RUNTIME_SHARE, 1)
+SCHED_FEAT(FORCE_SD_OVERLAP, false)
+SCHED_FEAT(RT_RUNTIME_SHARE, true)

+ 3 - 1
kernel/sched_idletask.c → kernel/sched/idle_task.c

@@ -1,3 +1,5 @@
+#include "sched.h"
+
 /*
 /*
  * idle-task scheduling class.
  * idle-task scheduling class.
  *
  *
@@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task
 /*
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
  */
-static const struct sched_class idle_sched_class = {
+const struct sched_class idle_sched_class = {
 	/* .next is NULL */
 	/* .next is NULL */
 	/* no enqueue/yield_task for idle tasks */
 	/* no enqueue/yield_task for idle tasks */
 
 

+ 209 - 9
kernel/sched_rt.c → kernel/sched/rt.c

@@ -3,7 +3,92 @@
  * policies)
  * policies)
  */
  */
 
 
+#include "sched.h"
+
+#include <linux/slab.h>
+
+static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
+
+struct rt_bandwidth def_rt_bandwidth;
+
+static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
+{
+	struct rt_bandwidth *rt_b =
+		container_of(timer, struct rt_bandwidth, rt_period_timer);
+	ktime_t now;
+	int overrun;
+	int idle = 0;
+
+	for (;;) {
+		now = hrtimer_cb_get_time(timer);
+		overrun = hrtimer_forward(timer, now, rt_b->rt_period);
+
+		if (!overrun)
+			break;
+
+		idle = do_sched_rt_period_timer(rt_b, overrun);
+	}
+
+	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
+
+void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
+{
+	rt_b->rt_period = ns_to_ktime(period);
+	rt_b->rt_runtime = runtime;
+
+	raw_spin_lock_init(&rt_b->rt_runtime_lock);
+
+	hrtimer_init(&rt_b->rt_period_timer,
+			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	rt_b->rt_period_timer.function = sched_rt_period_timer;
+}
+
+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+		return;
+
+	if (hrtimer_active(&rt_b->rt_period_timer))
+		return;
+
+	raw_spin_lock(&rt_b->rt_runtime_lock);
+	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
+	raw_spin_unlock(&rt_b->rt_runtime_lock);
+}
+
+void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+{
+	struct rt_prio_array *array;
+	int i;
+
+	array = &rt_rq->active;
+	for (i = 0; i < MAX_RT_PRIO; i++) {
+		INIT_LIST_HEAD(array->queue + i);
+		__clear_bit(i, array->bitmap);
+	}
+	/* delimiter for bitsearch: */
+	__set_bit(MAX_RT_PRIO, array->bitmap);
+
+#if defined CONFIG_SMP
+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
+	rt_rq->highest_prio.next = MAX_RT_PRIO;
+	rt_rq->rt_nr_migratory = 0;
+	rt_rq->overloaded = 0;
+	plist_head_init(&rt_rq->pushable_tasks);
+#endif
+
+	rt_rq->rt_time = 0;
+	rt_rq->rt_throttled = 0;
+	rt_rq->rt_runtime = 0;
+	raw_spin_lock_init(&rt_rq->rt_runtime_lock);
+}
+
 #ifdef CONFIG_RT_GROUP_SCHED
 #ifdef CONFIG_RT_GROUP_SCHED
+static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+	hrtimer_cancel(&rt_b->rt_period_timer);
+}
 
 
 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
 
 
@@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return rt_se->rt_rq;
 	return rt_se->rt_rq;
 }
 }
 
 
+void free_rt_sched_group(struct task_group *tg)
+{
+	int i;
+
+	if (tg->rt_se)
+		destroy_rt_bandwidth(&tg->rt_bandwidth);
+
+	for_each_possible_cpu(i) {
+		if (tg->rt_rq)
+			kfree(tg->rt_rq[i]);
+		if (tg->rt_se)
+			kfree(tg->rt_se[i]);
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->rt_se);
+}
+
+void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+		struct sched_rt_entity *rt_se, int cpu,
+		struct sched_rt_entity *parent)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	rt_rq->highest_prio.curr = MAX_RT_PRIO;
+	rt_rq->rt_nr_boosted = 0;
+	rt_rq->rq = rq;
+	rt_rq->tg = tg;
+
+	tg->rt_rq[cpu] = rt_rq;
+	tg->rt_se[cpu] = rt_se;
+
+	if (!rt_se)
+		return;
+
+	if (!parent)
+		rt_se->rt_rq = &rq->rt;
+	else
+		rt_se->rt_rq = parent->my_q;
+
+	rt_se->my_q = rt_rq;
+	rt_se->parent = parent;
+	INIT_LIST_HEAD(&rt_se->run_list);
+}
+
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	struct rt_rq *rt_rq;
+	struct sched_rt_entity *rt_se;
+	int i;
+
+	tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->rt_rq)
+		goto err;
+	tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
+	if (!tg->rt_se)
+		goto err;
+
+	init_rt_bandwidth(&tg->rt_bandwidth,
+			ktime_to_ns(def_rt_bandwidth.rt_period), 0);
+
+	for_each_possible_cpu(i) {
+		rt_rq = kzalloc_node(sizeof(struct rt_rq),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!rt_rq)
+			goto err;
+
+		rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!rt_se)
+			goto err_free_rq;
+
+		init_rt_rq(rt_rq, cpu_rq(i));
+		rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
+		init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
+	}
+
+	return 1;
+
+err_free_rq:
+	kfree(rt_rq);
+err:
+	return 0;
+}
+
 #else /* CONFIG_RT_GROUP_SCHED */
 #else /* CONFIG_RT_GROUP_SCHED */
 
 
 #define rt_entity_is_task(rt_se) (1)
 #define rt_entity_is_task(rt_se) (1)
@@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 	return &rq->rt;
 	return &rq->rt;
 }
 }
 
 
+void free_rt_sched_group(struct task_group *tg) { }
+
+int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
+{
+	return 1;
+}
 #endif /* CONFIG_RT_GROUP_SCHED */
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
@@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq)
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 }
 
 
+int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	int cpu = (int)(long)hcpu;
+
+	switch (action) {
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		disable_runtime(cpu_rq(cpu));
+		return NOTIFY_OK;
+
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		enable_runtime(cpu_rq(cpu));
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int balance_runtime(struct rt_rq *rt_rq)
 static int balance_runtime(struct rt_rq *rt_rq)
 {
 {
 	int more = 0;
 	int more = 0;
@@ -648,7 +846,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	if (rt_rq->rt_throttled)
 	if (rt_rq->rt_throttled)
 		return rt_rq_throttled(rt_rq);
 		return rt_rq_throttled(rt_rq);
 
 
-	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
+	if (runtime >= sched_rt_period(rt_rq))
 		return 0;
 		return 0;
 
 
 	balance_runtime(rt_rq);
 	balance_runtime(rt_rq);
@@ -957,8 +1155,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 }
 }
 
 
 /*
 /*
- * Put task to the end of the run list without the overhead of dequeue
- * followed by enqueue.
+ * Put task to the head or the end of the run list without the overhead of
+ * dequeue followed by enqueue.
  */
  */
 static void
 static void
 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
@@ -1002,6 +1200,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
 
 	cpu = task_cpu(p);
 	cpu = task_cpu(p);
 
 
+	if (p->rt.nr_cpus_allowed == 1)
+		goto out;
+
 	/* For anything but wake ups, just return the task_cpu */
 	/* For anything but wake ups, just return the task_cpu */
 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
 	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
 		goto out;
 		goto out;
@@ -1178,8 +1379,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 /* Only try algorithms three times */
 /* Only try algorithms three times */
 #define RT_MAX_TRIES 3
 #define RT_MAX_TRIES 3
 
 
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
-
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 {
 	if (!task_running(rq, p) &&
 	if (!task_running(rq, p) &&
@@ -1653,13 +1852,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 		pull_rt_task(rq);
 		pull_rt_task(rq);
 }
 }
 
 
-static inline void init_sched_rt_class(void)
+void init_sched_rt_class(void)
 {
 {
 	unsigned int i;
 	unsigned int i;
 
 
-	for_each_possible_cpu(i)
+	for_each_possible_cpu(i) {
 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 		zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
 					GFP_KERNEL, cpu_to_node(i));
 					GFP_KERNEL, cpu_to_node(i));
+	}
 }
 }
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */
 
 
@@ -1800,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 		return 0;
 		return 0;
 }
 }
 
 
-static const struct sched_class rt_sched_class = {
+const struct sched_class rt_sched_class = {
 	.next			= &fair_sched_class,
 	.next			= &fair_sched_class,
 	.enqueue_task		= enqueue_task_rt,
 	.enqueue_task		= enqueue_task_rt,
 	.dequeue_task		= dequeue_task_rt,
 	.dequeue_task		= dequeue_task_rt,
@@ -1835,7 +2035,7 @@ static const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SCHED_DEBUG
 #ifdef CONFIG_SCHED_DEBUG
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
 
-static void print_rt_stats(struct seq_file *m, int cpu)
+void print_rt_stats(struct seq_file *m, int cpu)
 {
 {
 	rt_rq_iter_t iter;
 	rt_rq_iter_t iter;
 	struct rt_rq *rt_rq;
 	struct rt_rq *rt_rq;

+ 1166 - 0
kernel/sched/sched.h

@@ -0,0 +1,1166 @@
+
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+
+#include "cpupri.h"
+
+extern __read_mostly int scheduler_running;
+
+/*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+ * and back.
+ */
+#define NICE_TO_PRIO(nice)	(MAX_RT_PRIO + (nice) + 20)
+#define PRIO_TO_NICE(prio)	((prio) - MAX_RT_PRIO - 20)
+#define TASK_NICE(p)		PRIO_TO_NICE((p)->static_prio)
+
+/*
+ * 'User priority' is the nice value converted to something we
+ * can work with better when scaling various scheduler parameters,
+ * it's a [ 0 ... 39 ] range.
+ */
+#define USER_PRIO(p)		((p)-MAX_RT_PRIO)
+#define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
+#define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
+
+/*
+ * Helpers for converting nanosecond timing to jiffy resolution
+ */
+#define NS_TO_JIFFIES(TIME)	((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
+
+#define NICE_0_LOAD		SCHED_LOAD_SCALE
+#define NICE_0_SHIFT		SCHED_LOAD_SHIFT
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define DEF_TIMESLICE		(100 * HZ / 1000)
+
+/*
+ * single value that denotes runtime == period, ie unlimited time.
+ */
+#define RUNTIME_INF	((u64)~0ULL)
+
+static inline int rt_policy(int policy)
+{
+	if (policy == SCHED_FIFO || policy == SCHED_RR)
+		return 1;
+	return 0;
+}
+
+static inline int task_has_rt_policy(struct task_struct *p)
+{
+	return rt_policy(p->policy);
+}
+
+/*
+ * This is the priority-queue data structure of the RT scheduling class:
+ */
+struct rt_prio_array {
+	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
+	struct list_head queue[MAX_RT_PRIO];
+};
+
+struct rt_bandwidth {
+	/* nests inside the rq lock: */
+	raw_spinlock_t		rt_runtime_lock;
+	ktime_t			rt_period;
+	u64			rt_runtime;
+	struct hrtimer		rt_period_timer;
+};
+
+extern struct mutex sched_domains_mutex;
+
+#ifdef CONFIG_CGROUP_SCHED
+
+#include <linux/cgroup.h>
+
+struct cfs_rq;
+struct rt_rq;
+
+static LIST_HEAD(task_groups);
+
+struct cfs_bandwidth {
+#ifdef CONFIG_CFS_BANDWIDTH
+	raw_spinlock_t lock;
+	ktime_t period;
+	u64 quota, runtime;
+	s64 hierarchal_quota;
+	u64 runtime_expires;
+
+	int idle, timer_active;
+	struct hrtimer period_timer, slack_timer;
+	struct list_head throttled_cfs_rq;
+
+	/* statistics */
+	int nr_periods, nr_throttled;
+	u64 throttled_time;
+#endif
+};
+
+/* task group related information */
+struct task_group {
+	struct cgroup_subsys_state css;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* schedulable entities of this group on each cpu */
+	struct sched_entity **se;
+	/* runqueue "owned" by this group on each cpu */
+	struct cfs_rq **cfs_rq;
+	unsigned long shares;
+
+	atomic_t load_weight;
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct sched_rt_entity **rt_se;
+	struct rt_rq **rt_rq;
+
+	struct rt_bandwidth rt_bandwidth;
+#endif
+
+	struct rcu_head rcu;
+	struct list_head list;
+
+	struct task_group *parent;
+	struct list_head siblings;
+	struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
+
+	struct cfs_bandwidth cfs_bandwidth;
+};
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+#define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
+
+/*
+ * A weight of 0 or 1 can cause arithmetics problems.
+ * A weight of a cfs_rq is the sum of weights of which entities
+ * are queued on this cfs_rq, so a weight of a entity should not be
+ * too large, so as the shares value of a task group.
+ * (The default weight is 1024 - so there's no practical
+ *  limitation from this.)
+ */
+#define MIN_SHARES	(1UL <<  1)
+#define MAX_SHARES	(1UL << 18)
+#endif
+
+/* Default task group.
+ *	Every task in system belong to this group at bootup.
+ */
+extern struct task_group root_task_group;
+
+typedef int (*tg_visitor)(struct task_group *, void *);
+
+extern int walk_tg_tree_from(struct task_group *from,
+			     tg_visitor down, tg_visitor up, void *data);
+
+/*
+ * Iterate the full tree, calling @down when first entering a node and @up when
+ * leaving it for the final time.
+ *
+ * Caller must hold rcu_lock or sufficient equivalent.
+ */
+static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
+{
+	return walk_tg_tree_from(&root_task_group, down, up, data);
+}
+
+extern int tg_nop(struct task_group *tg, void *data);
+
+extern void free_fair_sched_group(struct task_group *tg);
+extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
+extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
+extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
+			struct sched_entity *se, int cpu,
+			struct sched_entity *parent);
+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
+
+extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
+extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
+
+extern void free_rt_sched_group(struct task_group *tg);
+extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
+extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
+		struct sched_rt_entity *rt_se, int cpu,
+		struct sched_rt_entity *parent);
+
+#else /* CONFIG_CGROUP_SCHED */
+
+struct cfs_bandwidth { };
+
+#endif	/* CONFIG_CGROUP_SCHED */
+
+/* CFS-related fields in a runqueue */
+struct cfs_rq {
+	struct load_weight load;
+	unsigned long nr_running, h_nr_running;
+
+	u64 exec_clock;
+	u64 min_vruntime;
+#ifndef CONFIG_64BIT
+	u64 min_vruntime_copy;
+#endif
+
+	struct rb_root tasks_timeline;
+	struct rb_node *rb_leftmost;
+
+	struct list_head tasks;
+	struct list_head *balance_iterator;
+
+	/*
+	 * 'curr' points to currently running entity on this cfs_rq.
+	 * It is set to NULL otherwise (i.e when none are currently running).
+	 */
+	struct sched_entity *curr, *next, *last, *skip;
+
+#ifdef	CONFIG_SCHED_DEBUG
+	unsigned int nr_spread_over;
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
+
+	/*
+	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
+	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
+	 * (like users, containers etc.)
+	 *
+	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
+	 * list is used during load balance.
+	 */
+	int on_list;
+	struct list_head leaf_cfs_rq_list;
+	struct task_group *tg;	/* group that "owns" this runqueue */
+
+#ifdef CONFIG_SMP
+	/*
+	 * the part of load.weight contributed by tasks
+	 */
+	unsigned long task_weight;
+
+	/*
+	 *   h_load = weight * f(tg)
+	 *
+	 * Where f(tg) is the recursive weight fraction assigned to
+	 * this group.
+	 */
+	unsigned long h_load;
+
+	/*
+	 * Maintaining per-cpu shares distribution for group scheduling
+	 *
+	 * load_stamp is the last time we updated the load average
+	 * load_last is the last time we updated the load average and saw load
+	 * load_unacc_exec_time is currently unaccounted execution time
+	 */
+	u64 load_avg;
+	u64 load_period;
+	u64 load_stamp, load_last, load_unacc_exec_time;
+
+	unsigned long load_contribution;
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_CFS_BANDWIDTH
+	int runtime_enabled;
+	u64 runtime_expires;
+	s64 runtime_remaining;
+
+	u64 throttled_timestamp;
+	int throttled, throttle_count;
+	struct list_head throttled_list;
+#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+};
+
+static inline int rt_bandwidth_enabled(void)
+{
+	return sysctl_sched_rt_runtime >= 0;
+}
+
+/* Real-Time classes' related field in a runqueue: */
+struct rt_rq {
+	struct rt_prio_array active;
+	unsigned long rt_nr_running;
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+	struct {
+		int curr; /* highest queued rt task prio */
+#ifdef CONFIG_SMP
+		int next; /* next highest */
+#endif
+	} highest_prio;
+#endif
+#ifdef CONFIG_SMP
+	unsigned long rt_nr_migratory;
+	unsigned long rt_nr_total;
+	int overloaded;
+	struct plist_head pushable_tasks;
+#endif
+	int rt_throttled;
+	u64 rt_time;
+	u64 rt_runtime;
+	/* Nests inside the rq lock: */
+	raw_spinlock_t rt_runtime_lock;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	unsigned long rt_nr_boosted;
+
+	struct rq *rq;
+	struct list_head leaf_rt_rq_list;
+	struct task_group *tg;
+#endif
+};
+
+#ifdef CONFIG_SMP
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables. Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset. Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ */
+struct root_domain {
+	atomic_t refcount;
+	atomic_t rto_count;
+	struct rcu_head rcu;
+	cpumask_var_t span;
+	cpumask_var_t online;
+
+	/*
+	 * The "RT overload" flag: it gets set if a CPU has more than
+	 * one runnable RT task.
+	 */
+	cpumask_var_t rto_mask;
+	struct cpupri cpupri;
+};
+
+extern struct root_domain def_root_domain;
+
+#endif /* CONFIG_SMP */
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ *
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
+struct rq {
+	/* runqueue lock: */
+	raw_spinlock_t lock;
+
+	/*
+	 * nr_running and cpu_load should be in the same cacheline because
+	 * remote CPUs use both these fields when doing load calculation.
+	 */
+	unsigned long nr_running;
+	#define CPU_LOAD_IDX_MAX 5
+	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+	unsigned long last_load_update_tick;
+#ifdef CONFIG_NO_HZ
+	u64 nohz_stamp;
+	unsigned long nohz_flags;
+#endif
+	int skip_clock_update;
+
+	/* capture load from *all* tasks on this cpu: */
+	struct load_weight load;
+	unsigned long nr_load_updates;
+	u64 nr_switches;
+
+	struct cfs_rq cfs;
+	struct rt_rq rt;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* list of leaf cfs_rq on this cpu: */
+	struct list_head leaf_cfs_rq_list;
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+	struct list_head leaf_rt_rq_list;
+#endif
+
+	/*
+	 * This is part of a global counter where only the total sum
+	 * over all CPUs matters. A task can increase this counter on
+	 * one CPU and if it got migrated afterwards it may decrease
+	 * it on another CPU. Always updated under the runqueue lock:
+	 */
+	unsigned long nr_uninterruptible;
+
+	struct task_struct *curr, *idle, *stop;
+	unsigned long next_balance;
+	struct mm_struct *prev_mm;
+
+	u64 clock;
+	u64 clock_task;
+
+	atomic_t nr_iowait;
+
+#ifdef CONFIG_SMP
+	struct root_domain *rd;
+	struct sched_domain *sd;
+
+	unsigned long cpu_power;
+
+	unsigned char idle_balance;
+	/* For active balancing */
+	int post_schedule;
+	int active_balance;
+	int push_cpu;
+	struct cpu_stop_work active_balance_work;
+	/* cpu of this runqueue: */
+	int cpu;
+	int online;
+
+	u64 rt_avg;
+	u64 age_stamp;
+	u64 idle_stamp;
+	u64 avg_idle;
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	u64 prev_irq_time;
+#endif
+#ifdef CONFIG_PARAVIRT
+	u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+	u64 prev_steal_time_rq;
+#endif
+
+	/* calc_load related fields */
+	unsigned long calc_load_update;
+	long calc_load_active;
+
+#ifdef CONFIG_SCHED_HRTICK
+#ifdef CONFIG_SMP
+	int hrtick_csd_pending;
+	struct call_single_data hrtick_csd;
+#endif
+	struct hrtimer hrtick_timer;
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+	/* latency stats */
+	struct sched_info rq_sched_info;
+	unsigned long long rq_cpu_time;
+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+
+	/* sys_sched_yield() stats */
+	unsigned int yld_count;
+
+	/* schedule() stats */
+	unsigned int sched_switch;
+	unsigned int sched_count;
+	unsigned int sched_goidle;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_count;
+	unsigned int ttwu_local;
+#endif
+
+#ifdef CONFIG_SMP
+	struct llist_head wake_list;
+#endif
+};
+
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+	return rq->cpu;
+#else
+	return 0;
+#endif
+}
+
+DECLARE_PER_CPU(struct rq, runqueues);
+
+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
+#define this_rq()		(&__get_cpu_var(runqueues))
+#define task_rq(p)		cpu_rq(task_cpu(p))
+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+#define raw_rq()		(&__raw_get_cpu_var(runqueues))
+
+#ifdef CONFIG_SMP
+
+#define rcu_dereference_check_sched_domain(p) \
+	rcu_dereference_check((p), \
+			      lockdep_is_held(&sched_domains_mutex))
+
+/*
+ * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+ * See detach_destroy_domains: synchronize_sched for details.
+ *
+ * The domain tree of any CPU may only be accessed from within
+ * preempt-disabled sections.
+ */
+#define for_each_domain(cpu, __sd) \
+	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
+			__sd; __sd = __sd->parent)
+
+#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
+
+/**
+ * highest_flag_domain - Return highest sched_domain containing flag.
+ * @cpu:	The cpu whose highest level of sched domain is to
+ *		be returned.
+ * @flag:	The flag to check for the highest sched_domain
+ *		for the given cpu.
+ *
+ * Returns the highest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd, *hsd = NULL;
+
+	for_each_domain(cpu, sd) {
+		if (!(sd->flags & flag))
+			break;
+		hsd = sd;
+	}
+
+	return hsd;
+}
+
+DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+DECLARE_PER_CPU(int, sd_llc_id);
+
+#endif /* CONFIG_SMP */
+
+#include "stats.h"
+#include "auto_group.h"
+
+#ifdef CONFIG_CGROUP_SCHED
+
+/*
+ * Return the group to which this tasks belongs.
+ *
+ * We use task_subsys_state_check() and extend the RCU verification with
+ * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each
+ * task it moves into the cgroup. Therefore by holding either of those locks,
+ * we pin the task to the current cgroup.
+ */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+	struct task_group *tg;
+	struct cgroup_subsys_state *css;
+
+	css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
+			lockdep_is_held(&p->pi_lock) ||
+			lockdep_is_held(&task_rq(p)->lock));
+	tg = container_of(css, struct task_group, css);
+
+	return autogroup_task_group(p, tg);
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+{
+#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
+	struct task_group *tg = task_group(p);
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	p->se.cfs_rq = tg->cfs_rq[cpu];
+	p->se.parent = tg->se[cpu];
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+	p->rt.rt_rq  = tg->rt_rq[cpu];
+	p->rt.parent = tg->rt_se[cpu];
+#endif
+}
+
+#else /* CONFIG_CGROUP_SCHED */
+
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline struct task_group *task_group(struct task_struct *p)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_CGROUP_SCHED */
+
+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+	set_task_rq(p, cpu);
+#ifdef CONFIG_SMP
+	/*
+	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+	 * successfuly executed on another CPU. We must ensure that updates of
+	 * per-task data have been completed by this moment.
+	 */
+	smp_wmb();
+	task_thread_info(p)->cpu = cpu;
+#endif
+}
+
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# include <linux/jump_label.h>
+# define const_debug __read_mostly
+#else
+# define const_debug const
+#endif
+
+extern const_debug unsigned int sysctl_sched_features;
+
+#define SCHED_FEAT(name, enabled)	\
+	__SCHED_FEAT_##name ,
+
+enum {
+#include "features.h"
+	__SCHED_FEAT_NR,
+};
+
+#undef SCHED_FEAT
+
+#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
+static __always_inline bool static_branch__true(struct jump_label_key *key)
+{
+	return likely(static_branch(key)); /* Not out of line branch. */
+}
+
+static __always_inline bool static_branch__false(struct jump_label_key *key)
+{
+	return unlikely(static_branch(key)); /* Out of line branch. */
+}
+
+#define SCHED_FEAT(name, enabled)					\
+static __always_inline bool static_branch_##name(struct jump_label_key *key) \
+{									\
+	return static_branch__##enabled(key);				\
+}
+
+#include "features.h"
+
+#undef SCHED_FEAT
+
+extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR];
+#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
+#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
+#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
+#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
+
+static inline u64 global_rt_period(void)
+{
+	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
+}
+
+static inline u64 global_rt_runtime(void)
+{
+	if (sysctl_sched_rt_runtime < 0)
+		return RUNTIME_INF;
+
+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+}
+
+
+
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+	return rq->curr == p;
+}
+
+static inline int task_running(struct rq *rq, struct task_struct *p)
+{
+#ifdef CONFIG_SMP
+	return p->on_cpu;
+#else
+	return task_current(rq, p);
+#endif
+}
+
+
+#ifndef prepare_arch_switch
+# define prepare_arch_switch(next)	do { } while (0)
+#endif
+#ifndef finish_arch_switch
+# define finish_arch_switch(prev)	do { } while (0)
+#endif
+
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+	/* this is a valid case when another task releases the spinlock */
+	rq->lock.owner = current;
+#endif
+	/*
+	 * If we are tracking spinlock dependencies then we have to
+	 * fix up the runqueue lock - which gets 'carried over' from
+	 * prev into current:
+	 */
+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
+	raw_spin_unlock_irq(&rq->lock);
+}
+
+#else /* __ARCH_WANT_UNLOCKED_CTXSW */
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * We can optimise this out completely for !SMP, because the
+	 * SMP rebalancing from interrupt is the only thing that cares
+	 * here.
+	 */
+	next->on_cpu = 1;
+#endif
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	raw_spin_unlock_irq(&rq->lock);
+#else
+	raw_spin_unlock(&rq->lock);
+#endif
+}
+
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+{
+#ifdef CONFIG_SMP
+	/*
+	 * After ->on_cpu is cleared, the task can be moved to a different CPU.
+	 * We must ensure this doesn't happen until the switch is completely
+	 * finished.
+	 */
+	smp_wmb();
+	prev->on_cpu = 0;
+#endif
+#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+	local_irq_enable();
+#endif
+}
+#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+
+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
+{
+	lw->weight += inc;
+	lw->inv_weight = 0;
+}
+
+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
+{
+	lw->weight -= dec;
+	lw->inv_weight = 0;
+}
+
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+	lw->weight = w;
+	lw->inv_weight = 0;
+}
+
+/*
+ * To aid in avoiding the subversion of "niceness" due to uneven distribution
+ * of tasks with abnormal "nice" values across CPUs the contribution that
+ * each task makes to its run queue's load is weighted according to its
+ * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
+ * scaled version of the new time slice allocation that they receive on time
+ * slice expiry etc.
+ */
+
+#define WEIGHT_IDLEPRIO                3
+#define WMULT_IDLEPRIO         1431655765
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+static const int prio_to_weight[40] = {
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
+};
+
+/*
+ * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+static const u32 prio_to_wmult[40] = {
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* -15 */    147320,    184698,    229616,    287308,    360437,
+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};
+
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+	CPUACCT_STAT_USER,	/* ... user mode */
+	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
+
+	CPUACCT_STAT_NSTATS,
+};
+
+
+#define sched_class_highest (&stop_sched_class)
+#define for_each_class(class) \
+   for (class = sched_class_highest; class; class = class->next)
+
+extern const struct sched_class stop_sched_class;
+extern const struct sched_class rt_sched_class;
+extern const struct sched_class fair_sched_class;
+extern const struct sched_class idle_sched_class;
+
+
+#ifdef CONFIG_SMP
+
+extern void trigger_load_balance(struct rq *rq, int cpu);
+extern void idle_balance(int this_cpu, struct rq *this_rq);
+
+#else	/* CONFIG_SMP */
+
+static inline void idle_balance(int cpu, struct rq *rq)
+{
+}
+
+#endif
+
+extern void sysrq_sched_debug_show(void);
+extern void sched_init_granularity(void);
+extern void update_max_interval(void);
+extern void update_group_power(struct sched_domain *sd, int cpu);
+extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu);
+extern void init_sched_rt_class(void);
+extern void init_sched_fair_class(void);
+
+extern void resched_task(struct task_struct *p);
+extern void resched_cpu(int cpu);
+
+extern struct rt_bandwidth def_rt_bandwidth;
+extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+extern void update_cpu_load(struct rq *this_rq);
+
+#ifdef CONFIG_CGROUP_CPUACCT
+#include <linux/cgroup.h>
+/* track cpu usage of a group of tasks and its child groups */
+struct cpuacct {
+	struct cgroup_subsys_state css;
+	/* cpuusage holds pointer to a u64-type object on every cpu */
+	u64 __percpu *cpuusage;
+	struct kernel_cpustat __percpu *cpustat;
+};
+
+/* return cpu accounting group corresponding to this container */
+static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
+{
+	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+/* return cpu accounting group to which this task belongs */
+static inline struct cpuacct *task_ca(struct task_struct *tsk)
+{
+	return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+static inline struct cpuacct *parent_ca(struct cpuacct *ca)
+{
+	if (!ca || !ca->css.cgroup->parent)
+		return NULL;
+	return cgroup_ca(ca->css.cgroup->parent);
+}
+
+extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+#endif
+
+static inline void inc_nr_running(struct rq *rq)
+{
+	rq->nr_running++;
+}
+
+static inline void dec_nr_running(struct rq *rq)
+{
+	rq->nr_running--;
+}
+
+extern void update_rq_clock(struct rq *rq);
+
+extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
+extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
+
+extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
+
+extern const_debug unsigned int sysctl_sched_time_avg;
+extern const_debug unsigned int sysctl_sched_nr_migrate;
+extern const_debug unsigned int sysctl_sched_migration_cost;
+
+static inline u64 sched_avg_period(void)
+{
+	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+void calc_load_account_idle(struct rq *this_rq);
+
+#ifdef CONFIG_SCHED_HRTICK
+
+/*
+ * Use hrtick when:
+ *  - enabled by features
+ *  - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+	if (!sched_feat(HRTICK))
+		return 0;
+	if (!cpu_active(cpu_of(rq)))
+		return 0;
+	return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
+void hrtick_start(struct rq *rq, u64 delay);
+
+#else
+
+static inline int hrtick_enabled(struct rq *rq)
+{
+	return 0;
+}
+
+#endif /* CONFIG_SCHED_HRTICK */
+
+#ifdef CONFIG_SMP
+extern void sched_avg_update(struct rq *rq);
+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+	rq->rt_avg += rt_delta;
+	sched_avg_update(rq);
+}
+#else
+static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
+static inline void sched_avg_update(struct rq *rq) { }
+#endif
+
+extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT
+
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+/*
+ * fair double_lock_balance: Safely acquires both rq->locks in a fair
+ * way at the expense of forcing extra atomic operations in all
+ * invocations.  This assures that the double_lock is acquired using the
+ * same underlying policy as the spinlock_t on this architecture, which
+ * reduces latency compared to the unfair variant below.  However, it
+ * also adds more overhead and therefore may reduce throughput.
+ */
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	raw_spin_unlock(&this_rq->lock);
+	double_rq_lock(this_rq, busiest);
+
+	return 1;
+}
+
+#else
+/*
+ * Unfair double_lock_balance: Optimizes throughput at the expense of
+ * latency by eliminating extra atomic operations when the locks are
+ * already in proper order on entry.  This favors lower cpu-ids and will
+ * grant the double lock to lower cpus over higher ids under contention,
+ * regardless of entry order into the function.
+ */
+static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(this_rq->lock)
+	__acquires(busiest->lock)
+	__acquires(this_rq->lock)
+{
+	int ret = 0;
+
+	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
+		if (busiest < this_rq) {
+			raw_spin_unlock(&this_rq->lock);
+			raw_spin_lock(&busiest->lock);
+			raw_spin_lock_nested(&this_rq->lock,
+					      SINGLE_DEPTH_NESTING);
+			ret = 1;
+		} else
+			raw_spin_lock_nested(&busiest->lock,
+					      SINGLE_DEPTH_NESTING);
+	}
+	return ret;
+}
+
+#endif /* CONFIG_PREEMPT */
+
+/*
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
+ */
+static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
+{
+	if (unlikely(!irqs_disabled())) {
+		/* printk() doesn't work good under rq->lock */
+		raw_spin_unlock(&this_rq->lock);
+		BUG_ON(1);
+	}
+
+	return _double_lock_balance(this_rq, busiest);
+}
+
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+	__releases(busiest->lock)
+{
+	raw_spin_unlock(&busiest->lock);
+	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	BUG_ON(!irqs_disabled());
+	if (rq1 == rq2) {
+		raw_spin_lock(&rq1->lock);
+		__acquire(rq2->lock);	/* Fake it out ;) */
+	} else {
+		if (rq1 < rq2) {
+			raw_spin_lock(&rq1->lock);
+			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+		} else {
+			raw_spin_lock(&rq2->lock);
+			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+		}
+	}
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
+{
+	raw_spin_unlock(&rq1->lock);
+	if (rq1 != rq2)
+		raw_spin_unlock(&rq2->lock);
+	else
+		__release(rq2->lock);
+}
+
+#else /* CONFIG_SMP */
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
+	__acquires(rq1->lock)
+	__acquires(rq2->lock)
+{
+	BUG_ON(!irqs_disabled());
+	BUG_ON(rq1 != rq2);
+	raw_spin_lock(&rq1->lock);
+	__acquire(rq2->lock);	/* Fake it out ;) */
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+	__releases(rq1->lock)
+	__releases(rq2->lock)
+{
+	BUG_ON(rq1 != rq2);
+	raw_spin_unlock(&rq1->lock);
+	__release(rq2->lock);
+}
+
+#endif
+
+extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
+extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
+extern void print_cfs_stats(struct seq_file *m, int cpu);
+extern void print_rt_stats(struct seq_file *m, int cpu);
+
+extern void init_cfs_rq(struct cfs_rq *cfs_rq);
+extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
+extern void unthrottle_offline_cfs_rqs(struct rq *rq);
+
+extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
+
+#ifdef CONFIG_NO_HZ
+enum rq_nohz_flag_bits {
+	NOHZ_TICK_STOPPED,
+	NOHZ_BALANCE_KICK,
+	NOHZ_IDLE,
+};
+
+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
+#endif

+ 111 - 0
kernel/sched/stats.c

@@ -0,0 +1,111 @@
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
+#include "sched.h"
+
+/*
+ * bump this up when changing the output format or the meaning of an existing
+ * format, so that tools can adapt (or abort)
+ */
+#define SCHEDSTAT_VERSION 15
+
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+	int cpu;
+	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
+	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
+
+	if (mask_str == NULL)
+		return -ENOMEM;
+
+	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+	seq_printf(seq, "timestamp %lu\n", jiffies);
+	for_each_online_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+#ifdef CONFIG_SMP
+		struct sched_domain *sd;
+		int dcount = 0;
+#endif
+
+		/* runqueue-specific stats */
+		seq_printf(seq,
+		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
+		    cpu, rq->yld_count,
+		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
+		    rq->ttwu_count, rq->ttwu_local,
+		    rq->rq_cpu_time,
+		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+
+		seq_printf(seq, "\n");
+
+#ifdef CONFIG_SMP
+		/* domain-specific stats */
+		rcu_read_lock();
+		for_each_domain(cpu, sd) {
+			enum cpu_idle_type itype;
+
+			cpumask_scnprintf(mask_str, mask_len,
+					  sched_domain_span(sd));
+			seq_printf(seq, "domain%d %s", dcount++, mask_str);
+			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
+					itype++) {
+				seq_printf(seq, " %u %u %u %u %u %u %u %u",
+				    sd->lb_count[itype],
+				    sd->lb_balanced[itype],
+				    sd->lb_failed[itype],
+				    sd->lb_imbalance[itype],
+				    sd->lb_gained[itype],
+				    sd->lb_hot_gained[itype],
+				    sd->lb_nobusyq[itype],
+				    sd->lb_nobusyg[itype]);
+			}
+			seq_printf(seq,
+				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
+			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
+			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
+			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
+			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
+			    sd->ttwu_move_balance);
+		}
+		rcu_read_unlock();
+#endif
+	}
+	kfree(mask_str);
+	return 0;
+}
+
+static int schedstat_open(struct inode *inode, struct file *file)
+{
+	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
+	char *buf = kmalloc(size, GFP_KERNEL);
+	struct seq_file *m;
+	int res;
+
+	if (!buf)
+		return -ENOMEM;
+	res = single_open(file, show_schedstat, NULL);
+	if (!res) {
+		m = file->private_data;
+		m->buf = buf;
+		m->size = size;
+	} else
+		kfree(buf);
+	return res;
+}
+
+static const struct file_operations proc_schedstat_operations = {
+	.open    = schedstat_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+static int __init proc_schedstat_init(void)
+{
+	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
+	return 0;
+}
+module_init(proc_schedstat_init);

+ 2 - 107
kernel/sched_stats.h → kernel/sched/stats.h

@@ -1,108 +1,5 @@
 
 
 #ifdef CONFIG_SCHEDSTATS
 #ifdef CONFIG_SCHEDSTATS
-/*
- * bump this up when changing the output format or the meaning of an existing
- * format, so that tools can adapt (or abort)
- */
-#define SCHEDSTAT_VERSION 15
-
-static int show_schedstat(struct seq_file *seq, void *v)
-{
-	int cpu;
-	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
-	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
-
-	if (mask_str == NULL)
-		return -ENOMEM;
-
-	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
-	seq_printf(seq, "timestamp %lu\n", jiffies);
-	for_each_online_cpu(cpu) {
-		struct rq *rq = cpu_rq(cpu);
-#ifdef CONFIG_SMP
-		struct sched_domain *sd;
-		int dcount = 0;
-#endif
-
-		/* runqueue-specific stats */
-		seq_printf(seq,
-		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
-		    cpu, rq->yld_count,
-		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
-		    rq->ttwu_count, rq->ttwu_local,
-		    rq->rq_cpu_time,
-		    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
-
-		seq_printf(seq, "\n");
-
-#ifdef CONFIG_SMP
-		/* domain-specific stats */
-		rcu_read_lock();
-		for_each_domain(cpu, sd) {
-			enum cpu_idle_type itype;
-
-			cpumask_scnprintf(mask_str, mask_len,
-					  sched_domain_span(sd));
-			seq_printf(seq, "domain%d %s", dcount++, mask_str);
-			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
-					itype++) {
-				seq_printf(seq, " %u %u %u %u %u %u %u %u",
-				    sd->lb_count[itype],
-				    sd->lb_balanced[itype],
-				    sd->lb_failed[itype],
-				    sd->lb_imbalance[itype],
-				    sd->lb_gained[itype],
-				    sd->lb_hot_gained[itype],
-				    sd->lb_nobusyq[itype],
-				    sd->lb_nobusyg[itype]);
-			}
-			seq_printf(seq,
-				   " %u %u %u %u %u %u %u %u %u %u %u %u\n",
-			    sd->alb_count, sd->alb_failed, sd->alb_pushed,
-			    sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
-			    sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
-			    sd->ttwu_wake_remote, sd->ttwu_move_affine,
-			    sd->ttwu_move_balance);
-		}
-		rcu_read_unlock();
-#endif
-	}
-	kfree(mask_str);
-	return 0;
-}
-
-static int schedstat_open(struct inode *inode, struct file *file)
-{
-	unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
-	char *buf = kmalloc(size, GFP_KERNEL);
-	struct seq_file *m;
-	int res;
-
-	if (!buf)
-		return -ENOMEM;
-	res = single_open(file, show_schedstat, NULL);
-	if (!res) {
-		m = file->private_data;
-		m->buf = buf;
-		m->size = size;
-	} else
-		kfree(buf);
-	return res;
-}
-
-static const struct file_operations proc_schedstat_operations = {
-	.open    = schedstat_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
-static int __init proc_schedstat_init(void)
-{
-	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
-	return 0;
-}
-module_init(proc_schedstat_init);
 
 
 /*
 /*
  * Expects runqueue lock to be held for atomicity of update
  * Expects runqueue lock to be held for atomicity of update
@@ -283,8 +180,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 		return;
 		return;
 
 
 	raw_spin_lock(&cputimer->lock);
 	raw_spin_lock(&cputimer->lock);
-	cputimer->cputime.utime =
-		cputime_add(cputimer->cputime.utime, cputime);
+	cputimer->cputime.utime += cputime;
 	raw_spin_unlock(&cputimer->lock);
 	raw_spin_unlock(&cputimer->lock);
 }
 }
 
 
@@ -307,8 +203,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 		return;
 		return;
 
 
 	raw_spin_lock(&cputimer->lock);
 	raw_spin_lock(&cputimer->lock);
-	cputimer->cputime.stime =
-		cputime_add(cputimer->cputime.stime, cputime);
+	cputimer->cputime.stime += cputime;
 	raw_spin_unlock(&cputimer->lock);
 	raw_spin_unlock(&cputimer->lock);
 }
 }
 
 

+ 3 - 1
kernel/sched_stoptask.c → kernel/sched/stop_task.c

@@ -1,3 +1,5 @@
+#include "sched.h"
+
 /*
 /*
  * stop-task scheduling class.
  * stop-task scheduling class.
  *
  *
@@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
 /*
 /*
  * Simple, special scheduling class for the per-CPU stop tasks:
  * Simple, special scheduling class for the per-CPU stop tasks:
  */
  */
-static const struct sched_class stop_sched_class = {
+const struct sched_class stop_sched_class = {
 	.next			= &rt_sched_class,
 	.next			= &rt_sched_class,
 
 
 	.enqueue_task		= enqueue_task_stop,
 	.enqueue_task		= enqueue_task_stop,

+ 2 - 4
kernel/signal.c

@@ -1629,10 +1629,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_uid = __task_cred(tsk)->uid;
 	info.si_uid = __task_cred(tsk)->uid;
 	rcu_read_unlock();
 	rcu_read_unlock();
 
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-				tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-				tsk->signal->stime));
+	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
 
 
 	info.si_status = tsk->exit_code & 0x7f;
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
 	if (tsk->exit_code & 0x80)

+ 3 - 3
kernel/sys.c

@@ -1605,7 +1605,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	unsigned long maxrss = 0;
 	unsigned long maxrss = 0;
 
 
 	memset((char *) r, 0, sizeof *r);
 	memset((char *) r, 0, sizeof *r);
-	utime = stime = cputime_zero;
+	utime = stime = 0;
 
 
 	if (who == RUSAGE_THREAD) {
 	if (who == RUSAGE_THREAD) {
 		task_times(current, &utime, &stime);
 		task_times(current, &utime, &stime);
@@ -1635,8 +1635,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 
 
 		case RUSAGE_SELF:
 		case RUSAGE_SELF:
 			thread_group_times(p, &tgutime, &tgstime);
 			thread_group_times(p, &tgutime, &tgstime);
-			utime = cputime_add(utime, tgutime);
-			stime = cputime_add(stime, tgstime);
+			utime += tgutime;
+			stime += tgstime;
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
 			r->ru_minflt += p->signal->min_flt;

+ 8 - 0
kernel/time/tick-sched.c

@@ -466,6 +466,14 @@ void tick_nohz_idle_enter(void)
 
 
 	WARN_ON_ONCE(irqs_disabled());
 	WARN_ON_ONCE(irqs_disabled());
 
 
+	/*
+ 	 * Update the idle state in the scheduler domain hierarchy
+ 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
+ 	 * State will be updated to busy during the first busy tick after
+ 	 * exiting idle.
+ 	 */
+	set_cpu_sd_state_idle();
+
 	local_irq_disable();
 	local_irq_disable();
 
 
 	ts = &__get_cpu_var(tick_cpu_sched);
 	ts = &__get_cpu_var(tick_cpu_sched);

+ 1 - 1
kernel/tsacct.c

@@ -127,7 +127,7 @@ void acct_update_integrals(struct task_struct *tsk)
 
 
 		local_irq_save(flags);
 		local_irq_save(flags);
 		time = tsk->stime + tsk->utime;
 		time = tsk->stime + tsk->utime;
-		dtime = cputime_sub(time, tsk->acct_timexpd);
+		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
 		delta = value.tv_sec;
 		delta = delta * USEC_PER_SEC + value.tv_usec;
 		delta = delta * USEC_PER_SEC + value.tv_usec;

Some files were not shown because too many files changed in this diff