17 years ago · 3174ffaa93
--- a/Documentation/sched-rt-group.txt
+++ b/Documentation/sched-rt-group.txt
@@ -0,0 +1,59 @@
 
				+
			
 
				+
			
 
				+Real-Time group scheduling.
			
 
				+
			
 
				+The problem space:
			
 
				+
			
 
				+In order to schedule multiple groups of realtime tasks each group must
			
 
				+be assigned a fixed portion of the CPU time available. Without a minimum
			
 
				+guarantee a realtime group can obviously fall short. A fuzzy upper limit
			
 
				+is of no use since it cannot be relied upon. Which leaves us with just
			
 
				+the single fixed portion.
			
 
				+
			
 
				+CPU time is divided by means of specifying how much time can be spent
			
 
				+running in a given period. Say a frame fixed realtime renderer must
			
 
				+deliver 25 frames a second, which yields a period of 0.04s. Now say
			
 
				+it will also have to play some music and respond to input, leaving it
			
 
				+with around 80% for the graphics. We can then give this group a runtime
			
 
				+of 0.8 * 0.04s = 0.032s.
			
 
				+
			
 
				+This way the graphics group will have a 0.04s period with a 0.032s runtime
			
 
				+limit.
			
 
				+
			
 
				+Now if the audio thread needs to refill the DMA buffer every 0.005s, but
			
 
				+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
			
 
				+= 0.00015s.
			
 
				+
			
 
				+
			
 
				+The Interface:
			
 
				+
			
 
				+system wide:
			
 
				+
			
 
				+/proc/sys/kernel/sched_rt_period_ms
			
 
				+/proc/sys/kernel/sched_rt_runtime_us
			
 
				+
			
 
				+CONFIG_FAIR_USER_SCHED
			
 
				+
			
 
				+/sys/kernel/uids/<uid>/cpu_rt_runtime_us
			
 
				+
			
 
				+or
			
 
				+
			
 
				+CONFIG_FAIR_CGROUP_SCHED
			
 
				+
			
 
				+/cgroup/<cgroup>/cpu.rt_runtime_us
			
 
				+
			
 
				+[ time is specified in us because the interface is s32; this gives an
			
 
				+  operating range of ~35m to 1us ]
			
 
				+
			
 
				+The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
			
 
				+
			
 
				+A runtime of -1 specifies runtime == period, ie. no limit.
			
 
				+
			
 
				+New groups get the period from /proc/sys/kernel/sched_rt_period_us and
			
 
				+a runtime of 0.
			
 
				+
			
 
				+Settings are constrained to:
			
 
				+
			
 
				+   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
			
 
				+
			
 
				+in order to keep the configuration schedulable.
			
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -25,7 +25,7 @@ SUBSYS(ns)
 
				 
			
 
				 /* */
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_CGROUP_SCHED
			
 
				+#ifdef CONFIG_CGROUP_SCHED
			
 
				 SUBSYS(cpu_cgroup)
			
 
				 #endif
			
 
				 
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -590,7 +590,7 @@ struct user_struct {
 
				 	struct hlist_node uidhash_node;
			
 
				 	uid_t uid;
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_USER_SCHED
			
 
				+#ifdef CONFIG_USER_SCHED
			
 
				 	struct task_group *tg;
			
 
				 #ifdef CONFIG_SYSFS
			
 
				 	struct kobject kobj;
			
@@ -973,7 +973,7 @@ struct sched_rt_entity {
 
				 	unsigned long timeout;
			
 
				 	int nr_cpus_allowed;
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	struct sched_rt_entity	*parent;
			
 
				 	/* rq on which this entity is (to be) queued: */
			
 
				 	struct rt_rq		*rt_rq;
			
@@ -1541,8 +1541,6 @@ extern unsigned int sysctl_sched_child_runs_first;
 
				 extern unsigned int sysctl_sched_features;
			
 
				 extern unsigned int sysctl_sched_migration_cost;
			
 
				 extern unsigned int sysctl_sched_nr_migrate;
			
 
				-extern unsigned int sysctl_sched_rt_period;
			
 
				-extern unsigned int sysctl_sched_rt_ratio;
			
 
				 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
			
 
				 extern unsigned int sysctl_sched_min_bal_int_shares;
			
 
				 extern unsigned int sysctl_sched_max_bal_int_shares;
			
@@ -1552,6 +1550,8 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 
				 		struct file *file, void __user *buffer, size_t *length,
			
 
				 		loff_t *ppos);
			
 
				 #endif
			
 
				+extern unsigned int sysctl_sched_rt_period;
			
 
				+extern int sysctl_sched_rt_runtime;
			
 
				 
			
 
				 extern unsigned int sysctl_sched_compat_yield;
			
 
				 
			
@@ -2027,16 +2027,22 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
 
				 
			
 
				 extern void normalize_rt_tasks(void);
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_GROUP_SCHED
			
 
				 
			
 
				 extern struct task_group init_task_group;
			
 
				 
			
 
				 extern struct task_group *sched_create_group(void);
			
 
				 extern void sched_destroy_group(struct task_group *tg);
			
 
				 extern void sched_move_task(struct task_struct *tsk);
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
			
 
				 extern unsigned long sched_group_shares(struct task_group *tg);
			
 
				-
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+extern int sched_group_set_rt_runtime(struct task_group *tg,
			
 
				+				      long rt_runtime_us);
			
 
				+extern long sched_group_rt_runtime(struct task_group *tg);
			
 
				+#endif
			
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_TASK_XACCT
			
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -311,25 +311,36 @@ config CPUSETS
 
				 
			
 
				 	  Say N if unsure.
			
 
				 
			
 
				-config FAIR_GROUP_SCHED
			
 
				-	bool "Fair group CPU scheduler"
			
 
				+config GROUP_SCHED
			
 
				+	bool "Group CPU scheduler"
			
 
				 	default y
			
 
				 	help
			
 
				 	  This feature lets CPU scheduler recognize task groups and control CPU
			
 
				 	  bandwidth allocation to such task groups.
			
 
				 
			
 
				+config FAIR_GROUP_SCHED
			
 
				+	bool "Group scheduling for SCHED_OTHER"
			
 
				+	depends on GROUP_SCHED
			
 
				+	default y
			
 
				+
			
 
				+config RT_GROUP_SCHED
			
 
				+	bool "Group scheduling for SCHED_RR/FIFO"
			
 
				+	depends on EXPERIMENTAL
			
 
				+	depends on GROUP_SCHED
			
 
				+	default n
			
 
				+
			
 
				 choice
			
 
				-	depends on FAIR_GROUP_SCHED
			
 
				+	depends on GROUP_SCHED
			
 
				 	prompt "Basis for grouping tasks"
			
 
				-	default FAIR_USER_SCHED
			
 
				+	default USER_SCHED
			
 
				 
			
 
				-config FAIR_USER_SCHED
			
 
				+config USER_SCHED
			
 
				 	bool "user id"
			
 
				 	help
			
 
				 	  This option will choose userid as the basis for grouping
			
 
				 	  tasks, thus providing equal CPU bandwidth to each user.
			
 
				 
			
 
				-config FAIR_CGROUP_SCHED
			
 
				+config CGROUP_SCHED
			
 
				 	bool "Control groups"
			
 
				  	depends on CGROUPS
			
 
				  	help
			
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
				 	set_current_state(state);
			
 
				 
			
 
				 	/* Setup the timer, when timeout != NULL */
			
 
				-	if (unlikely(timeout))
			
 
				+	if (unlikely(timeout)) {
			
 
				 		hrtimer_start(&timeout->timer, timeout->timer.expires,
			
 
				 			      HRTIMER_MODE_ABS);
			
 
				+		if (!hrtimer_active(&timeout->timer))
			
 
				+			timeout->task = NULL;
			
 
				+	}
			
 
				 
			
 
				 	for (;;) {
			
 
				 		/* Try to acquire the lock: */
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -155,7 +155,7 @@ struct rt_prio_array {
 
				 	struct list_head queue[MAX_RT_PRIO];
			
 
				 };
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_GROUP_SCHED
			
 
				 
			
 
				 #include <linux/cgroup.h>
			
 
				 
			
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups);
 
				 
			
 
				 /* task group related information */
			
 
				 struct task_group {
			
 
				-#ifdef CONFIG_FAIR_CGROUP_SCHED
			
 
				+#ifdef CONFIG_CGROUP_SCHED
			
 
				 	struct cgroup_subsys_state css;
			
 
				 #endif
			
 
				+
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	/* schedulable entities of this group on each cpu */
			
 
				 	struct sched_entity **se;
			
 
				 	/* runqueue "owned" by this group on each cpu */
			
 
				 	struct cfs_rq **cfs_rq;
			
 
				 
			
 
				-	struct sched_rt_entity **rt_se;
			
 
				-	struct rt_rq **rt_rq;
			
 
				-
			
 
				-	unsigned int rt_ratio;
			
 
				-
			
 
				 	/*
			
 
				 	 * shares assigned to a task group governs how much of cpu bandwidth
			
 
				 	 * is allocated to the group. The more shares a group has, the more is
			
@@ -213,33 +210,46 @@ struct task_group {
 
				 	 *
			
 
				 	 */
			
 
				 	unsigned long shares;
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	struct sched_rt_entity **rt_se;
			
 
				+	struct rt_rq **rt_rq;
			
 
				+
			
 
				+	u64 rt_runtime;
			
 
				+#endif
			
 
				 
			
 
				 	struct rcu_head rcu;
			
 
				 	struct list_head list;
			
 
				 };
			
 
				 
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 /* Default task group's sched entity on each cpu */
			
 
				 static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
			
 
				 /* Default task group's cfs_rq on each cpu */
			
 
				 static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
			
 
				 
			
 
				-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
			
 
				-static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
			
 
				-
			
 
				 static struct sched_entity *init_sched_entity_p[NR_CPUS];
			
 
				 static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
			
 
				+static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
			
 
				 
			
 
				 static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
			
 
				 static struct rt_rq *init_rt_rq_p[NR_CPUS];
			
 
				+#endif
			
 
				 
			
 
				-/* task_group_mutex serializes add/remove of task groups and also changes to
			
 
				+/* task_group_lock serializes add/remove of task groups and also changes to
			
 
				  * a task group's cpu shares.
			
 
				  */
			
 
				-static DEFINE_MUTEX(task_group_mutex);
			
 
				+static DEFINE_SPINLOCK(task_group_lock);
			
 
				 
			
 
				 /* doms_cur_mutex serializes access to doms_cur[] array */
			
 
				 static DEFINE_MUTEX(doms_cur_mutex);
			
 
				 
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 #ifdef CONFIG_SMP
			
 
				 /* kernel thread that runs rebalance_shares() periodically */
			
 
				 static struct task_struct *lb_monitor_task;
			
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused);
 
				 
			
 
				 static void set_se_shares(struct sched_entity *se, unsigned long shares);
			
 
				 
			
 
				+#ifdef CONFIG_USER_SCHED
			
 
				+# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
			
 
				+#else
			
 
				+# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
			
 
				+#endif
			
 
				+
			
 
				+#define MIN_GROUP_SHARES	2
			
 
				+
			
 
				+static int init_task_group_load = INIT_TASK_GROUP_LOAD;
			
 
				+#endif
			
 
				+
			
 
				 /* Default task group.
			
 
				  *	Every task in system belong to this group at bootup.
			
 
				  */
			
 
				 struct task_group init_task_group = {
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	.se	= init_sched_entity_p,
			
 
				 	.cfs_rq = init_cfs_rq_p,
			
 
				+#endif
			
 
				 
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	.rt_se	= init_sched_rt_entity_p,
			
 
				 	.rt_rq	= init_rt_rq_p,
			
 
				-};
			
 
				-
			
 
				-#ifdef CONFIG_FAIR_USER_SCHED
			
 
				-# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
			
 
				-#else
			
 
				-# define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
			
 
				 #endif
			
 
				-
			
 
				-#define MIN_GROUP_SHARES	2
			
 
				-
			
 
				-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
			
 
				+};
			
 
				 
			
 
				 /* return group to which a task belongs */
			
 
				 static inline struct task_group *task_group(struct task_struct *p)
			
 
				 {
			
 
				 	struct task_group *tg;
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_USER_SCHED
			
 
				+#ifdef CONFIG_USER_SCHED
			
 
				 	tg = p->user->tg;
			
 
				-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
			
 
				+#elif defined(CONFIG_CGROUP_SCHED)
			
 
				 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
			
 
				 				struct task_group, css);
			
 
				 #else
			
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p)
 
				 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
			
 
				 static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
			
 
				 {
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
			
 
				 	p->se.parent = task_group(p)->se[cpu];
			
 
				+#endif
			
 
				 
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
			
 
				 	p->rt.parent = task_group(p)->rt_se[cpu];
			
 
				-}
			
 
				-
			
 
				-static inline void lock_task_group_list(void)
			
 
				-{
			
 
				-	mutex_lock(&task_group_mutex);
			
 
				-}
			
 
				-
			
 
				-static inline void unlock_task_group_list(void)
			
 
				-{
			
 
				-	mutex_unlock(&task_group_mutex);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static inline void lock_doms_cur(void)
			
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void)
 
				 #else
			
 
				 
			
 
				 static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
			
 
				-static inline void lock_task_group_list(void) { }
			
 
				-static inline void unlock_task_group_list(void) { }
			
 
				 static inline void lock_doms_cur(void) { }
			
 
				 static inline void unlock_doms_cur(void) { }
			
 
				 
			
 
				-#endif	/* CONFIG_FAIR_GROUP_SCHED */
			
 
				+#endif	/* CONFIG_GROUP_SCHED */
			
 
				 
			
 
				 /* CFS-related fields in a runqueue */
			
 
				 struct cfs_rq {
			
@@ -363,7 +370,7 @@ struct cfs_rq {
 
				 struct rt_rq {
			
 
				 	struct rt_prio_array active;
			
 
				 	unsigned long rt_nr_running;
			
 
				-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
			
 
				+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				 	int highest_prio; /* highest queued rt task prio */
			
 
				 #endif
			
 
				 #ifdef CONFIG_SMP
			
@@ -373,7 +380,9 @@ struct rt_rq {
 
				 	int rt_throttled;
			
 
				 	u64 rt_time;
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	unsigned long rt_nr_boosted;
			
 
				+
			
 
				 	struct rq *rq;
			
 
				 	struct list_head leaf_rt_rq_list;
			
 
				 	struct task_group *tg;
			
@@ -447,6 +456,8 @@ struct rq {
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	/* list of leaf cfs_rq on this cpu: */
			
 
				 	struct list_head leaf_cfs_rq_list;
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	struct list_head leaf_rt_rq_list;
			
 
				 #endif
			
 
				 
			
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features =
 
				 const_debug unsigned int sysctl_sched_nr_migrate = 32;
			
 
				 
			
 
				 /*
			
 
				- * period over which we measure -rt task cpu usage in ms.
			
 
				+ * period over which we measure -rt task cpu usage in us.
			
 
				  * default: 1s
			
 
				  */
			
 
				-const_debug unsigned int sysctl_sched_rt_period = 1000;
			
 
				+unsigned int sysctl_sched_rt_period = 1000000;
			
 
				 
			
 
				-#define SCHED_RT_FRAC_SHIFT	16
			
 
				-#define SCHED_RT_FRAC		(1UL << SCHED_RT_FRAC_SHIFT)
			
 
				+/*
			
 
				+ * part of the period that we allow rt tasks to run in us.
			
 
				+ * default: 0.95s
			
 
				+ */
			
 
				+int sysctl_sched_rt_runtime = 950000;
			
 
				 
			
 
				 /*
			
 
				- * ratio of time -rt tasks may consume.
			
 
				- * default: 95%
			
 
				+ * single value that denotes runtime == period, ie unlimited time.
			
 
				  */
			
 
				-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
			
 
				+#define RUNTIME_INF	((u64)~0ULL)
			
 
				 
			
 
				 /*
			
 
				  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
			
@@ -4571,6 +4584,15 @@ recheck:
 
				 			return -EPERM;
			
 
				 	}
			
 
				 
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	/*
			
 
				+	 * Do not allow realtime tasks into groups that have no runtime
			
 
				+	 * assigned.
			
 
				+	 */
			
 
				+	if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
			
 
				+		return -EPERM;
			
 
				+#endif
			
 
				+
			
 
				 	retval = security_task_setscheduler(p, policy, param);
			
 
				 	if (retval)
			
 
				 		return retval;
			
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 
				 	/* delimiter for bitsearch: */
			
 
				 	__set_bit(MAX_RT_PRIO, array->bitmap);
			
 
				 
			
 
				-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
			
 
				+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				 	rt_rq->highest_prio = MAX_RT_PRIO;
			
 
				 #endif
			
 
				 #ifdef CONFIG_SMP
			
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 
				 	rt_rq->rt_time = 0;
			
 
				 	rt_rq->rt_throttled = 0;
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	rt_rq->rt_nr_boosted = 0;
			
 
				 	rt_rq->rq = rq;
			
 
				 #endif
			
 
				 }
			
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
 
				 	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
			
 
				 	se->parent = NULL;
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
			
 
				 		struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
			
 
				 		int cpu, int add)
			
@@ -7175,7 +7200,7 @@ void __init sched_init(void)
 
				 	init_defrootdomain();
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_GROUP_SCHED
			
 
				 	list_add(&init_task_group.list, &task_groups);
			
 
				 #endif
			
 
				 
			
@@ -7196,7 +7221,10 @@ void __init sched_init(void)
 
				 				&per_cpu(init_cfs_rq, i),
			
 
				 				&per_cpu(init_sched_entity, i), i, 1);
			
 
				 
			
 
				-		init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+		init_task_group.rt_runtime =
			
 
				+			sysctl_sched_rt_runtime * NSEC_PER_USEC;
			
 
				 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
			
 
				 		init_tg_rt_entry(rq, &init_task_group,
			
 
				 				&per_cpu(init_rt_rq, i),
			
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void)
 
				 	unsigned long flags;
			
 
				 	struct rq *rq;
			
 
				 
			
 
				-	read_lock_irq(&tasklist_lock);
			
 
				+	read_lock_irqsave(&tasklist_lock, flags);
			
 
				 	do_each_thread(g, p) {
			
 
				 		/*
			
 
				 		 * Only normalize user tasks:
			
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void)
 
				 			continue;
			
 
				 		}
			
 
				 
			
 
				-		spin_lock_irqsave(&p->pi_lock, flags);
			
 
				+		spin_lock(&p->pi_lock);
			
 
				 		rq = __task_rq_lock(p);
			
 
				 
			
 
				 		normalize_task(rq, p);
			
 
				 
			
 
				 		__task_rq_unlock(rq);
			
 
				-		spin_unlock_irqrestore(&p->pi_lock, flags);
			
 
				+		spin_unlock(&p->pi_lock);
			
 
				 	} while_each_thread(g, p);
			
 
				 
			
 
				-	read_unlock_irq(&tasklist_lock);
			
 
				+	read_unlock_irqrestore(&tasklist_lock, flags);
			
 
				 }
			
 
				 
			
 
				 #endif /* CONFIG_MAGIC_SYSRQ */
			
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p)
 
				 
			
 
				 #endif
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_GROUP_SCHED
			
 
				 
			
 
				-#ifdef CONFIG_SMP
			
 
				+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
			
 
				 /*
			
 
				  * distribute shares of all task groups among their schedulable entities,
			
 
				  * to reflect load distribution across cpus.
			
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused)
 
				 }
			
 
				 #endif	/* CONFIG_SMP */
			
 
				 
			
 
				-static void free_sched_group(struct task_group *tg)
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+static void free_fair_sched_group(struct task_group *tg)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg)
 
				 			kfree(tg->cfs_rq[i]);
			
 
				 		if (tg->se)
			
 
				 			kfree(tg->se[i]);
			
 
				-		if (tg->rt_rq)
			
 
				-			kfree(tg->rt_rq[i]);
			
 
				-		if (tg->rt_se)
			
 
				-			kfree(tg->rt_se[i]);
			
 
				 	}
			
 
				 
			
 
				 	kfree(tg->cfs_rq);
			
 
				 	kfree(tg->se);
			
 
				-	kfree(tg->rt_rq);
			
 
				-	kfree(tg->rt_se);
			
 
				-	kfree(tg);
			
 
				 }
			
 
				 
			
 
				-/* allocate runqueue etc for a new task group */
			
 
				-struct task_group *sched_create_group(void)
			
 
				+static int alloc_fair_sched_group(struct task_group *tg)
			
 
				 {
			
 
				-	struct task_group *tg;
			
 
				 	struct cfs_rq *cfs_rq;
			
 
				 	struct sched_entity *se;
			
 
				-	struct rt_rq *rt_rq;
			
 
				-	struct sched_rt_entity *rt_se;
			
 
				 	struct rq *rq;
			
 
				 	int i;
			
 
				 
			
 
				-	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
			
 
				-	if (!tg)
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				-
			
 
				 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
			
 
				 	if (!tg->cfs_rq)
			
 
				 		goto err;
			
 
				 	tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
			
 
				 	if (!tg->se)
			
 
				 		goto err;
			
 
				-	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
			
 
				-	if (!tg->rt_rq)
			
 
				-		goto err;
			
 
				-	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
			
 
				-	if (!tg->rt_se)
			
 
				-		goto err;
			
 
				 
			
 
				 	tg->shares = NICE_0_LOAD;
			
 
				-	tg->rt_ratio = 0; /* XXX */
			
 
				 
			
 
				 	for_each_possible_cpu(i) {
			
 
				 		rq = cpu_rq(i);
			
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void)
 
				 		if (!se)
			
 
				 			goto err;
			
 
				 
			
 
				+		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+
			
 
				+ err:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+	list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
			
 
				+			&cpu_rq(cpu)->leaf_cfs_rq_list);
			
 
				+}
			
 
				+
			
 
				+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+	list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
			
 
				+}
			
 
				+#else
			
 
				+static inline void free_fair_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int alloc_fair_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+static void free_rt_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		if (tg->rt_rq)
			
 
				+			kfree(tg->rt_rq[i]);
			
 
				+		if (tg->rt_se)
			
 
				+			kfree(tg->rt_se[i]);
			
 
				+	}
			
 
				+
			
 
				+	kfree(tg->rt_rq);
			
 
				+	kfree(tg->rt_se);
			
 
				+}
			
 
				+
			
 
				+static int alloc_rt_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	struct rt_rq *rt_rq;
			
 
				+	struct sched_rt_entity *rt_se;
			
 
				+	struct rq *rq;
			
 
				+	int i;
			
 
				+
			
 
				+	tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
			
 
				+	if (!tg->rt_rq)
			
 
				+		goto err;
			
 
				+	tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
			
 
				+	if (!tg->rt_se)
			
 
				+		goto err;
			
 
				+
			
 
				+	tg->rt_runtime = 0;
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		rq = cpu_rq(i);
			
 
				+
			
 
				 		rt_rq = kmalloc_node(sizeof(struct rt_rq),
			
 
				 				GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
			
 
				 		if (!rt_rq)
			
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void)
 
				 		if (!rt_se)
			
 
				 			goto err;
			
 
				 
			
 
				-		init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
			
 
				 		init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
			
 
				 	}
			
 
				 
			
 
				-	lock_task_group_list();
			
 
				+	return 1;
			
 
				+
			
 
				+ err:
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+	list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
			
 
				+			&cpu_rq(cpu)->leaf_rt_rq_list);
			
 
				+}
			
 
				+
			
 
				+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+	list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
			
 
				+}
			
 
				+#else
			
 
				+static inline void free_rt_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline int alloc_rt_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
			
 
				+{
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static void free_sched_group(struct task_group *tg)
			
 
				+{
			
 
				+	free_fair_sched_group(tg);
			
 
				+	free_rt_sched_group(tg);
			
 
				+	kfree(tg);
			
 
				+}
			
 
				+
			
 
				+/* allocate runqueue etc for a new task group */
			
 
				+struct task_group *sched_create_group(void)
			
 
				+{
			
 
				+	struct task_group *tg;
			
 
				+	unsigned long flags;
			
 
				+	int i;
			
 
				+
			
 
				+	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
			
 
				+	if (!tg)
			
 
				+		return ERR_PTR(-ENOMEM);
			
 
				+
			
 
				+	if (!alloc_fair_sched_group(tg))
			
 
				+		goto err;
			
 
				+
			
 
				+	if (!alloc_rt_sched_group(tg))
			
 
				+		goto err;
			
 
				+
			
 
				+	spin_lock_irqsave(&task_group_lock, flags);
			
 
				 	for_each_possible_cpu(i) {
			
 
				-		rq = cpu_rq(i);
			
 
				-		cfs_rq = tg->cfs_rq[i];
			
 
				-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
			
 
				-		rt_rq = tg->rt_rq[i];
			
 
				-		list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
			
 
				+		register_fair_sched_group(tg, i);
			
 
				+		register_rt_sched_group(tg, i);
			
 
				 	}
			
 
				 	list_add_rcu(&tg->list, &task_groups);
			
 
				-	unlock_task_group_list();
			
 
				+	spin_unlock_irqrestore(&task_group_lock, flags);
			
 
				 
			
 
				 	return tg;
			
 
				 
			
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 
				 /* Destroy runqueue etc associated with a task group */
			
 
				 void sched_destroy_group(struct task_group *tg)
			
 
				 {
			
 
				-	struct cfs_rq *cfs_rq = NULL;
			
 
				-	struct rt_rq *rt_rq = NULL;
			
 
				+	unsigned long flags;
			
 
				 	int i;
			
 
				 
			
 
				-	lock_task_group_list();
			
 
				+	spin_lock_irqsave(&task_group_lock, flags);
			
 
				 	for_each_possible_cpu(i) {
			
 
				-		cfs_rq = tg->cfs_rq[i];
			
 
				-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
			
 
				-		rt_rq = tg->rt_rq[i];
			
 
				-		list_del_rcu(&rt_rq->leaf_rt_rq_list);
			
 
				+		unregister_fair_sched_group(tg, i);
			
 
				+		unregister_rt_sched_group(tg, i);
			
 
				 	}
			
 
				 	list_del_rcu(&tg->list);
			
 
				-	unlock_task_group_list();
			
 
				-
			
 
				-	BUG_ON(!cfs_rq);
			
 
				+	spin_unlock_irqrestore(&task_group_lock, flags);
			
 
				 
			
 
				 	/* wait for possible concurrent references to cfs_rqs complete */
			
 
				 	call_rcu(&tg->rcu, free_sched_group_rcu);
			
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk)
 
				 	task_rq_unlock(rq, &flags);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 /* rq->lock to be locked by caller */
			
 
				 static void set_se_shares(struct sched_entity *se, unsigned long shares)
			
 
				 {
			
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+static DEFINE_MUTEX(shares_mutex);
			
 
				+
			
 
				 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
			
 
				 {
			
 
				 	int i;
			
 
				-	struct cfs_rq *cfs_rq;
			
 
				-	struct rq *rq;
			
 
				+	unsigned long flags;
			
 
				 
			
 
				-	lock_task_group_list();
			
 
				+	mutex_lock(&shares_mutex);
			
 
				 	if (tg->shares == shares)
			
 
				 		goto done;
			
 
				 
			
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 
				 	 * load_balance_fair) from referring to this group first,
			
 
				 	 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
			
 
				 	 */
			
 
				-	for_each_possible_cpu(i) {
			
 
				-		cfs_rq = tg->cfs_rq[i];
			
 
				-		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
			
 
				-	}
			
 
				+	spin_lock_irqsave(&task_group_lock, flags);
			
 
				+	for_each_possible_cpu(i)
			
 
				+		unregister_fair_sched_group(tg, i);
			
 
				+	spin_unlock_irqrestore(&task_group_lock, flags);
			
 
				 
			
 
				 	/* wait for any ongoing reference to this group to finish */
			
 
				 	synchronize_sched();
			
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 
				 	 * Enable load balance activity on this group, by inserting it back on
			
 
				 	 * each cpu's rq->leaf_cfs_rq_list.
			
 
				 	 */
			
 
				-	for_each_possible_cpu(i) {
			
 
				-		rq = cpu_rq(i);
			
 
				-		cfs_rq = tg->cfs_rq[i];
			
 
				-		list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
			
 
				-	}
			
 
				+	spin_lock_irqsave(&task_group_lock, flags);
			
 
				+	for_each_possible_cpu(i)
			
 
				+		register_fair_sched_group(tg, i);
			
 
				+	spin_unlock_irqrestore(&task_group_lock, flags);
			
 
				 done:
			
 
				-	unlock_task_group_list();
			
 
				+	mutex_unlock(&shares_mutex);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg)
 
				 {
			
 
				 	return tg->shares;
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 /*
			
 
				- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
			
 
				+ * Ensure that the real time constraints are schedulable.
			
 
				  */
			
 
				-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
			
 
				+static DEFINE_MUTEX(rt_constraints_mutex);
			
 
				+
			
 
				+static unsigned long to_ratio(u64 period, u64 runtime)
			
 
				+{
			
 
				+	if (runtime == RUNTIME_INF)
			
 
				+		return 1ULL << 16;
			
 
				+
			
 
				+	runtime *= (1ULL << 16);
			
 
				+	div64_64(runtime, period);
			
 
				+	return runtime;
			
 
				+}
			
 
				+
			
 
				+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
			
 
				 {
			
 
				 	struct task_group *tgi;
			
 
				 	unsigned long total = 0;
			
 
				+	unsigned long global_ratio =
			
 
				+		to_ratio(sysctl_sched_rt_period,
			
 
				+			 sysctl_sched_rt_runtime < 0 ?
			
 
				+				RUNTIME_INF : sysctl_sched_rt_runtime);
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	list_for_each_entry_rcu(tgi, &task_groups, list)
			
 
				-		total += tgi->rt_ratio;
			
 
				-	rcu_read_unlock();
			
 
				+	list_for_each_entry_rcu(tgi, &task_groups, list) {
			
 
				+		if (tgi == tg)
			
 
				+			continue;
			
 
				 
			
 
				-	if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
			
 
				-		return -EINVAL;
			
 
				+		total += to_ratio(period, tgi->rt_runtime);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				-	tg->rt_ratio = rt_ratio;
			
 
				-	return 0;
			
 
				+	return total + to_ratio(period, runtime) < global_ratio;
			
 
				 }
			
 
				 
			
 
				-unsigned long sched_group_rt_ratio(struct task_group *tg)
			
 
				+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
			
 
				 {
			
 
				-	return tg->rt_ratio;
			
 
				+	u64 rt_runtime, rt_period;
			
 
				+	int err = 0;
			
 
				+
			
 
				+	rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
			
 
				+	rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
			
 
				+	if (rt_runtime_us == -1)
			
 
				+		rt_runtime = rt_period;
			
 
				+
			
 
				+	mutex_lock(&rt_constraints_mutex);
			
 
				+	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
			
 
				+		err = -EINVAL;
			
 
				+		goto unlock;
			
 
				+	}
			
 
				+	if (rt_runtime_us == -1)
			
 
				+		rt_runtime = RUNTIME_INF;
			
 
				+	tg->rt_runtime = rt_runtime;
			
 
				+ unlock:
			
 
				+	mutex_unlock(&rt_constraints_mutex);
			
 
				+
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				-#endif	/* CONFIG_FAIR_GROUP_SCHED */
			
 
				+long sched_group_rt_runtime(struct task_group *tg)
			
 
				+{
			
 
				+	u64 rt_runtime_us;
			
 
				+
			
 
				+	if (tg->rt_runtime == RUNTIME_INF)
			
 
				+		return -1;
			
 
				+
			
 
				+	rt_runtime_us = tg->rt_runtime;
			
 
				+	do_div(rt_runtime_us, NSEC_PER_USEC);
			
 
				+	return rt_runtime_us;
			
 
				+}
			
 
				+#endif
			
 
				+#endif	/* CONFIG_GROUP_SCHED */
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_CGROUP_SCHED
			
 
				+#ifdef CONFIG_CGROUP_SCHED
			
 
				 
			
 
				 /* return corresponding task_group object of a cgroup */
			
 
				 static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
			
@@ -7857,9 +8037,15 @@ static int
 
				 cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
			
 
				 		      struct task_struct *tsk)
			
 
				 {
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	/* Don't accept realtime tasks when there is no way for them to run */
			
 
				+	if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
			
 
				+		return -EINVAL;
			
 
				+#else
			
 
				 	/* We don't support RT-tasks being in separate groups */
			
 
				 	if (tsk->sched_class != &fair_sched_class)
			
 
				 		return -EINVAL;
			
 
				+#endif
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 
				 	sched_move_task(tsk);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
			
 
				 				u64 shareval)
			
 
				 {
			
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
 
				 
			
 
				 	return (u64) tg->shares;
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
			
 
				-		u64 rt_ratio_val)
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
			
 
				+				struct file *file,
			
 
				+				const char __user *userbuf,
			
 
				+				size_t nbytes, loff_t *unused_ppos)
			
 
				 {
			
 
				-	return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
			
 
				+	char buffer[64];
			
 
				+	int retval = 0;
			
 
				+	s64 val;
			
 
				+	char *end;
			
 
				+
			
 
				+	if (!nbytes)
			
 
				+		return -EINVAL;
			
 
				+	if (nbytes >= sizeof(buffer))
			
 
				+		return -E2BIG;
			
 
				+	if (copy_from_user(buffer, userbuf, nbytes))
			
 
				+		return -EFAULT;
			
 
				+
			
 
				+	buffer[nbytes] = 0;     /* nul-terminate */
			
 
				+
			
 
				+	/* strip newline if necessary */
			
 
				+	if (nbytes && (buffer[nbytes-1] == '\n'))
			
 
				+		buffer[nbytes-1] = 0;
			
 
				+	val = simple_strtoll(buffer, &end, 0);
			
 
				+	if (*end)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/* Pass to subsystem */
			
 
				+	retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
			
 
				+	if (!retval)
			
 
				+		retval = nbytes;
			
 
				+	return retval;
			
 
				 }
			
 
				 
			
 
				-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
			
 
				+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
			
 
				+				   struct file *file,
			
 
				+				   char __user *buf, size_t nbytes,
			
 
				+				   loff_t *ppos)
			
 
				 {
			
 
				-	struct task_group *tg = cgroup_tg(cgrp);
			
 
				+	char tmp[64];
			
 
				+	long val = sched_group_rt_runtime(cgroup_tg(cgrp));
			
 
				+	int len = sprintf(tmp, "%ld\n", val);
			
 
				 
			
 
				-	return (u64) tg->rt_ratio;
			
 
				+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
			
 
				 }
			
 
				+#endif
			
 
				 
			
 
				 static struct cftype cpu_files[] = {
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	{
			
 
				 		.name = "shares",
			
 
				 		.read_uint = cpu_shares_read_uint,
			
 
				 		.write_uint = cpu_shares_write_uint,
			
 
				 	},
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	{
			
 
				-		.name = "rt_ratio",
			
 
				-		.read_uint = cpu_rt_ratio_read_uint,
			
 
				-		.write_uint = cpu_rt_ratio_write_uint,
			
 
				+		.name = "rt_runtime_us",
			
 
				+		.read = cpu_rt_runtime_read,
			
 
				+		.write = cpu_rt_runtime_write,
			
 
				 	},
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
			
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 
				 	.early_init	= 1,
			
 
				 };
			
 
				 
			
 
				-#endif	/* CONFIG_FAIR_CGROUP_SCHED */
			
 
				+#endif	/* CONFIG_CGROUP_SCHED */
			
 
				 
			
 
				 #ifdef CONFIG_CGROUP_CPUACCT
			
 
				 
			
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 
				 	return !list_empty(&rt_se->run_list);
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 
			
 
				-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
			
 
				+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 	if (!rt_rq->tg)
			
 
				-		return SCHED_RT_FRAC;
			
 
				+		return RUNTIME_INF;
			
 
				 
			
 
				-	return rt_rq->tg->rt_ratio;
			
 
				+	return rt_rq->tg->rt_runtime;
			
 
				 }
			
 
				 
			
 
				 #define for_each_leaf_rt_rq(rt_rq, rq) \
			
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 
				 static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
			
 
				 static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
			
 
				 
			
 
				-static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
			
 
				+static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
			
 
				 
			
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
			
 
				+static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 	struct sched_rt_entity *rt_se = rt_rq->rt_se;
			
 
				 
			
@@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
 
				 		dequeue_rt_entity(rt_se);
			
 
				 }
			
 
				 
			
 
				+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
			
 
				+{
			
 
				+	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
			
 
				+}
			
 
				+
			
 
				+static int rt_se_boosted(struct sched_rt_entity *rt_se)
			
 
				+{
			
 
				+	struct rt_rq *rt_rq = group_rt_rq(rt_se);
			
 
				+	struct task_struct *p;
			
 
				+
			
 
				+	if (rt_rq)
			
 
				+		return !!rt_rq->rt_nr_boosted;
			
 
				+
			
 
				+	p = rt_task_of(rt_se);
			
 
				+	return p->prio != p->normal_prio;
			
 
				+}
			
 
				+
			
 
				 #else
			
 
				 
			
 
				-static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
			
 
				+static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
			
 
				 {
			
 
				-	return sysctl_sched_rt_ratio;
			
 
				+	if (sysctl_sched_rt_runtime == -1)
			
 
				+		return RUNTIME_INF;
			
 
				+
			
 
				+	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
			
 
				 }
			
 
				 
			
 
				 #define for_each_leaf_rt_rq(rt_rq, rq) \
			
@@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
			
 
				+static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 }
			
 
				 
			
 
				-static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
			
 
				+static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 }
			
 
				 
			
 
				+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
			
 
				+{
			
 
				+	return rt_rq->rt_throttled;
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
			
 
				 {
			
 
				-#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
			
 
				 
			
 
				 	if (rt_rq)
			
@@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 
				 	return rt_task_of(rt_se)->prio;
			
 
				 }
			
 
				 
			
 
				-static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
			
 
				+static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
			
 
				 {
			
 
				-	unsigned int rt_ratio = sched_rt_ratio(rt_rq);
			
 
				-	u64 period, ratio;
			
 
				+	u64 runtime = sched_rt_runtime(rt_rq);
			
 
				 
			
 
				-	if (rt_ratio == SCHED_RT_FRAC)
			
 
				+	if (runtime == RUNTIME_INF)
			
 
				 		return 0;
			
 
				 
			
 
				 	if (rt_rq->rt_throttled)
			
 
				-		return 1;
			
 
				-
			
 
				-	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
			
 
				-	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
			
 
				+		return rt_rq_throttled(rt_rq);
			
 
				 
			
 
				-	if (rt_rq->rt_time > ratio) {
			
 
				+	if (rt_rq->rt_time > runtime) {
			
 
				 		struct rq *rq = rq_of_rt_rq(rt_rq);
			
 
				 
			
 
				 		rq->rt_throttled = 1;
			
 
				 		rt_rq->rt_throttled = 1;
			
 
				 
			
 
				-		sched_rt_ratio_dequeue(rt_rq);
			
 
				-		return 1;
			
 
				+		if (rt_rq_throttled(rt_rq)) {
			
 
				+			sched_rt_rq_dequeue(rt_rq);
			
 
				+			return 1;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return 0;
			
@@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq)
 
				 	u64 period;
			
 
				 
			
 
				 	while (rq->clock > rq->rt_period_expire) {
			
 
				-		period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
			
 
				+		period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
			
 
				 		rq->rt_period_expire += period;
			
 
				 
			
 
				 		for_each_leaf_rt_rq(rt_rq, rq) {
			
 
				-			unsigned long rt_ratio = sched_rt_ratio(rt_rq);
			
 
				-			u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
			
 
				+			u64 runtime = sched_rt_runtime(rt_rq);
			
 
				 
			
 
				-			rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
			
 
				-			if (rt_rq->rt_throttled) {
			
 
				+			rt_rq->rt_time -= min(rt_rq->rt_time, runtime);
			
 
				+			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
			
 
				 				rt_rq->rt_throttled = 0;
			
 
				-				sched_rt_ratio_enqueue(rt_rq);
			
 
				+				sched_rt_rq_enqueue(rt_rq);
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq)
 
				 	cpuacct_charge(curr, delta_exec);
			
 
				 
			
 
				 	rt_rq->rt_time += delta_exec;
			
 
				-	/*
			
 
				-	 * might make it a tad more accurate:
			
 
				-	 *
			
 
				-	 * update_sched_rt_period(rq);
			
 
				-	 */
			
 
				-	if (sched_rt_ratio_exceeded(rt_rq))
			
 
				+	if (sched_rt_runtime_exceeded(rt_rq))
			
 
				 		resched_task(curr);
			
 
				 }
			
 
				 
			
@@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 {
			
 
				 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
			
 
				 	rt_rq->rt_nr_running++;
			
 
				-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
			
 
				+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				 	if (rt_se_prio(rt_se) < rt_rq->highest_prio)
			
 
				 		rt_rq->highest_prio = rt_se_prio(rt_se);
			
 
				 #endif
			
@@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 
			
 
				 	update_rt_migration(rq_of_rt_rq(rt_rq));
			
 
				 #endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	if (rt_se_boosted(rt_se))
			
 
				+		rt_rq->rt_nr_boosted++;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static inline
			
@@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
			
 
				 	WARN_ON(!rt_rq->rt_nr_running);
			
 
				 	rt_rq->rt_nr_running--;
			
 
				-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
			
 
				+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
			
 
				 	if (rt_rq->rt_nr_running) {
			
 
				 		struct rt_prio_array *array;
			
 
				 
			
@@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 
			
 
				 	update_rt_migration(rq_of_rt_rq(rt_rq));
			
 
				 #endif /* CONFIG_SMP */
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	if (rt_se_boosted(rt_se))
			
 
				+		rt_rq->rt_nr_boosted--;
			
 
				+
			
 
				+	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
			
@@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
 
				 	struct rt_prio_array *array = &rt_rq->active;
			
 
				 	struct rt_rq *group_rq = group_rt_rq(rt_se);
			
 
				 
			
 
				-	if (group_rq && group_rq->rt_throttled)
			
 
				+	if (group_rq && rt_rq_throttled(group_rq))
			
 
				 		return;
			
 
				 
			
 
				 	list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
			
@@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 
				 	if (unlikely(!rt_rq->rt_nr_running))
			
 
				 		return NULL;
			
 
				 
			
 
				-	if (sched_rt_ratio_exceeded(rt_rq))
			
 
				+	if (rt_rq_throttled(rt_rq))
			
 
				 		return NULL;
			
 
				 
			
 
				 	do {
			
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = {
 
				 		.mode		= 0644,
			
 
				 		.proc_handler	= &proc_dointvec,
			
 
				 	},
			
 
				-	{
			
 
				-		.ctl_name	= CTL_UNNUMBERED,
			
 
				-		.procname	= "sched_rt_period_ms",
			
 
				-		.data		= &sysctl_sched_rt_period,
			
 
				-		.maxlen		= sizeof(unsigned int),
			
 
				-		.mode		= 0644,
			
 
				-		.proc_handler	= &proc_dointvec,
			
 
				-	},
			
 
				-	{
			
 
				-		.ctl_name	= CTL_UNNUMBERED,
			
 
				-		.procname	= "sched_rt_ratio",
			
 
				-		.data		= &sysctl_sched_rt_ratio,
			
 
				-		.maxlen		= sizeof(unsigned int),
			
 
				-		.mode		= 0644,
			
 
				-		.proc_handler	= &proc_dointvec,
			
 
				-	},
			
 
				 #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
			
 
				 	{
			
 
				 		.ctl_name       = CTL_UNNUMBERED,
			
@@ -346,6 +330,22 @@ static struct ctl_table kern_table[] = {
 
				 	},
			
 
				 #endif
			
 
				 #endif
			
 
				+	{
			
 
				+		.ctl_name	= CTL_UNNUMBERED,
			
 
				+		.procname	= "sched_rt_period_us",
			
 
				+		.data		= &sysctl_sched_rt_period,
			
 
				+		.maxlen		= sizeof(unsigned int),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= &proc_dointvec,
			
 
				+	},
			
 
				+	{
			
 
				+		.ctl_name	= CTL_UNNUMBERED,
			
 
				+		.procname	= "sched_rt_runtime_us",
			
 
				+		.data		= &sysctl_sched_rt_runtime,
			
 
				+		.maxlen		= sizeof(int),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= &proc_dointvec,
			
 
				+	},
			
 
				 	{
			
 
				 		.ctl_name	= CTL_UNNUMBERED,
			
 
				 		.procname	= "sched_compat_yield",
			
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -57,7 +57,7 @@ struct user_struct root_user = {
 
				 	.uid_keyring	= &root_user_keyring,
			
 
				 	.session_keyring = &root_session_keyring,
			
 
				 #endif
			
 
				-#ifdef CONFIG_FAIR_USER_SCHED
			
 
				+#ifdef CONFIG_USER_SCHED
			
 
				 	.tg		= &init_task_group,
			
 
				 #endif
			
 
				 };
			
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-#ifdef CONFIG_FAIR_USER_SCHED
			
 
				+#ifdef CONFIG_USER_SCHED
			
 
				 
			
 
				 static void sched_destroy_user(struct user_struct *up)
			
 
				 {
			
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p)
 
				 	sched_move_task(p);
			
 
				 }
			
 
				 
			
 
				-#else	/* CONFIG_FAIR_USER_SCHED */
			
 
				+#else	/* CONFIG_USER_SCHED */
			
 
				 
			
 
				 static void sched_destroy_user(struct user_struct *up) { }
			
 
				 static int sched_create_user(struct user_struct *up) { return 0; }
			
 
				 static void sched_switch_user(struct task_struct *p) { }
			
 
				 
			
 
				-#endif	/* CONFIG_FAIR_USER_SCHED */
			
 
				+#endif	/* CONFIG_USER_SCHED */
			
 
				 
			
 
				-#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
			
 
				+#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
			
 
				 
			
 
				 static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
			
 
				 static DEFINE_MUTEX(uids_mutex);
			
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void)
 
				 }
			
 
				 
			
 
				 /* uid directory attributes */
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 static ssize_t cpu_shares_show(struct kobject *kobj,
			
 
				 			       struct kobj_attribute *attr,
			
 
				 			       char *buf)
			
@@ -163,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj,
 
				 
			
 
				 static struct kobj_attribute cpu_share_attr =
			
 
				 	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
			
 
				+				   struct kobj_attribute *attr,
			
 
				+				   char *buf)
			
 
				+{
			
 
				+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
			
 
				+
			
 
				+	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
			
 
				+}
			
 
				+
			
 
				+static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
			
 
				+				    struct kobj_attribute *attr,
			
 
				+				    const char *buf, size_t size)
			
 
				+{
			
 
				+	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
			
 
				+	unsigned long rt_runtime;
			
 
				+	int rc;
			
 
				+
			
 
				+	sscanf(buf, "%lu", &rt_runtime);
			
 
				+
			
 
				+	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
			
 
				+
			
 
				+	return (rc ? rc : size);
			
 
				+}
			
 
				+
			
 
				+static struct kobj_attribute cpu_rt_runtime_attr =
			
 
				+	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
			
 
				+#endif
			
 
				 
			
 
				 /* default attributes per uid directory */
			
 
				 static struct attribute *uids_attributes[] = {
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				 	&cpu_share_attr.attr,
			
 
				+#endif
			
 
				+#ifdef CONFIG_RT_GROUP_SCHED
			
 
				+	&cpu_rt_runtime_attr.attr,
			
 
				+#endif
			
 
				 	NULL
			
 
				 };
			
 
				 
			
@@ -269,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
 
				 	schedule_work(&up->work);
			
 
				 }
			
 
				 
			
 
				-#else	/* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
			
 
				+#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
			
 
				 
			
 
				 int uids_sysfs_init(void) { return 0; }
			
 
				 static inline int uids_user_create(struct user_struct *up) { return 0; }
			
@@ -373,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 
				 		spin_lock_irq(&uidhash_lock);
			
 
				 		up = uid_hash_find(uid, hashent);
			
 
				 		if (up) {
			
 
				-			/* This case is not possible when CONFIG_FAIR_USER_SCHED
			
 
				+			/* This case is not possible when CONFIG_USER_SCHED
			
 
				 			 * is defined, since we serialize alloc_uid() using
			
 
				 			 * uids_mutex. Hence no need to call
			
 
				 			 * sched_destroy_user() or remove_user_sysfs_dir().