|
@@ -155,7 +155,7 @@ struct rt_prio_array {
|
|
|
struct list_head queue[MAX_RT_PRIO];
|
|
|
};
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+#ifdef CONFIG_GROUP_SCHED
|
|
|
|
|
|
#include <linux/cgroup.h>
|
|
|
|
|
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups);
|
|
|
|
|
|
/* task group related information */
|
|
|
struct task_group {
|
|
|
-#ifdef CONFIG_FAIR_CGROUP_SCHED
|
|
|
+#ifdef CONFIG_CGROUP_SCHED
|
|
|
struct cgroup_subsys_state css;
|
|
|
#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
/* schedulable entities of this group on each cpu */
|
|
|
struct sched_entity **se;
|
|
|
/* runqueue "owned" by this group on each cpu */
|
|
|
struct cfs_rq **cfs_rq;
|
|
|
|
|
|
- struct sched_rt_entity **rt_se;
|
|
|
- struct rt_rq **rt_rq;
|
|
|
-
|
|
|
- unsigned int rt_ratio;
|
|
|
-
|
|
|
/*
|
|
|
* shares assigned to a task group governs how much of cpu bandwidth
|
|
|
* is allocated to the group. The more shares a group has, the more is
|
|
@@ -213,33 +210,46 @@ struct task_group {
|
|
|
*
|
|
|
*/
|
|
|
unsigned long shares;
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ struct sched_rt_entity **rt_se;
|
|
|
+ struct rt_rq **rt_rq;
|
|
|
+
|
|
|
+ u64 rt_runtime;
|
|
|
+#endif
|
|
|
|
|
|
struct rcu_head rcu;
|
|
|
struct list_head list;
|
|
|
};
|
|
|
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
/* Default task group's sched entity on each cpu */
|
|
|
static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
|
|
|
/* Default task group's cfs_rq on each cpu */
|
|
|
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
|
|
|
|
|
|
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
|
|
|
-static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
|
|
|
-
|
|
|
static struct sched_entity *init_sched_entity_p[NR_CPUS];
|
|
|
static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
|
|
|
+static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
|
|
|
|
|
|
static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
|
|
|
static struct rt_rq *init_rt_rq_p[NR_CPUS];
|
|
|
+#endif
|
|
|
|
|
|
-/* task_group_mutex serializes add/remove of task groups and also changes to
|
|
|
+/* task_group_lock serializes add/remove of task groups and also changes to
|
|
|
* a task group's cpu shares.
|
|
|
*/
|
|
|
-static DEFINE_MUTEX(task_group_mutex);
|
|
|
+static DEFINE_SPINLOCK(task_group_lock);
|
|
|
|
|
|
/* doms_cur_mutex serializes access to doms_cur[] array */
|
|
|
static DEFINE_MUTEX(doms_cur_mutex);
|
|
|
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
#ifdef CONFIG_SMP
|
|
|
/* kernel thread that runs rebalance_shares() periodically */
|
|
|
static struct task_struct *lb_monitor_task;
|
|
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused);
|
|
|
|
|
|
static void set_se_shares(struct sched_entity *se, unsigned long shares);
|
|
|
|
|
|
+#ifdef CONFIG_USER_SCHED
|
|
|
+# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
|
|
|
+#else
|
|
|
+# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
|
|
|
+#endif
|
|
|
+
|
|
|
+#define MIN_GROUP_SHARES 2
|
|
|
+
|
|
|
+static int init_task_group_load = INIT_TASK_GROUP_LOAD;
|
|
|
+#endif
|
|
|
+
|
|
|
/* Default task group.
|
|
|
* Every task in system belong to this group at bootup.
|
|
|
*/
|
|
|
struct task_group init_task_group = {
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
.se = init_sched_entity_p,
|
|
|
.cfs_rq = init_cfs_rq_p,
|
|
|
+#endif
|
|
|
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
.rt_se = init_sched_rt_entity_p,
|
|
|
.rt_rq = init_rt_rq_p,
|
|
|
-};
|
|
|
-
|
|
|
-#ifdef CONFIG_FAIR_USER_SCHED
|
|
|
-# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
|
|
|
-#else
|
|
|
-# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
|
|
|
#endif
|
|
|
-
|
|
|
-#define MIN_GROUP_SHARES 2
|
|
|
-
|
|
|
-static int init_task_group_load = INIT_TASK_GROUP_LOAD;
|
|
|
+};
|
|
|
|
|
|
/* return group to which a task belongs */
|
|
|
static inline struct task_group *task_group(struct task_struct *p)
|
|
|
{
|
|
|
struct task_group *tg;
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_USER_SCHED
|
|
|
+#ifdef CONFIG_USER_SCHED
|
|
|
tg = p->user->tg;
|
|
|
-#elif defined(CONFIG_FAIR_CGROUP_SCHED)
|
|
|
+#elif defined(CONFIG_CGROUP_SCHED)
|
|
|
tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
|
|
|
struct task_group, css);
|
|
|
#else
|
|
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p)
|
|
|
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
|
|
|
static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
|
|
|
{
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
|
|
|
p->se.parent = task_group(p)->se[cpu];
|
|
|
+#endif
|
|
|
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
p->rt.rt_rq = task_group(p)->rt_rq[cpu];
|
|
|
p->rt.parent = task_group(p)->rt_se[cpu];
|
|
|
-}
|
|
|
-
|
|
|
-static inline void lock_task_group_list(void)
|
|
|
-{
|
|
|
- mutex_lock(&task_group_mutex);
|
|
|
-}
|
|
|
-
|
|
|
-static inline void unlock_task_group_list(void)
|
|
|
-{
|
|
|
- mutex_unlock(&task_group_mutex);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
static inline void lock_doms_cur(void)
|
|
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void)
|
|
|
#else
|
|
|
|
|
|
static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
|
|
|
-static inline void lock_task_group_list(void) { }
|
|
|
-static inline void unlock_task_group_list(void) { }
|
|
|
static inline void lock_doms_cur(void) { }
|
|
|
static inline void unlock_doms_cur(void) { }
|
|
|
|
|
|
-#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
+#endif /* CONFIG_GROUP_SCHED */
|
|
|
|
|
|
/* CFS-related fields in a runqueue */
|
|
|
struct cfs_rq {
|
|
@@ -363,7 +370,7 @@ struct cfs_rq {
|
|
|
struct rt_rq {
|
|
|
struct rt_prio_array active;
|
|
|
unsigned long rt_nr_running;
|
|
|
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
|
|
|
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
|
|
int highest_prio; /* highest queued rt task prio */
|
|
|
#endif
|
|
|
#ifdef CONFIG_SMP
|
|
@@ -373,7 +380,9 @@ struct rt_rq {
|
|
|
int rt_throttled;
|
|
|
u64 rt_time;
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ unsigned long rt_nr_boosted;
|
|
|
+
|
|
|
struct rq *rq;
|
|
|
struct list_head leaf_rt_rq_list;
|
|
|
struct task_group *tg;
|
|
@@ -447,6 +456,8 @@ struct rq {
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
/* list of leaf cfs_rq on this cpu: */
|
|
|
struct list_head leaf_cfs_rq_list;
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
struct list_head leaf_rt_rq_list;
|
|
|
#endif
|
|
|
|
|
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features =
|
|
|
const_debug unsigned int sysctl_sched_nr_migrate = 32;
|
|
|
|
|
|
/*
|
|
|
- * period over which we measure -rt task cpu usage in ms.
|
|
|
+ * period over which we measure -rt task cpu usage in us.
|
|
|
* default: 1s
|
|
|
*/
|
|
|
-const_debug unsigned int sysctl_sched_rt_period = 1000;
|
|
|
+unsigned int sysctl_sched_rt_period = 1000000;
|
|
|
|
|
|
-#define SCHED_RT_FRAC_SHIFT 16
|
|
|
-#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
|
|
|
+/*
|
|
|
+ * part of the period that we allow rt tasks to run in us.
|
|
|
+ * default: 0.95s
|
|
|
+ */
|
|
|
+int sysctl_sched_rt_runtime = 950000;
|
|
|
|
|
|
/*
|
|
|
- * ratio of time -rt tasks may consume.
|
|
|
- * default: 95%
|
|
|
+ * single value that denotes runtime == period, ie unlimited time.
|
|
|
*/
|
|
|
-const_debug unsigned int sysctl_sched_rt_ratio = 62259;
|
|
|
+#define RUNTIME_INF ((u64)~0ULL)
|
|
|
|
|
|
/*
|
|
|
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
|
|
@@ -4571,6 +4584,15 @@ recheck:
|
|
|
return -EPERM;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ /*
|
|
|
+ * Do not allow realtime tasks into groups that have no runtime
|
|
|
+ * assigned.
|
|
|
+ */
|
|
|
+ if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
|
|
|
+ return -EPERM;
|
|
|
+#endif
|
|
|
+
|
|
|
retval = security_task_setscheduler(p, policy, param);
|
|
|
if (retval)
|
|
|
return retval;
|
|
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
|
|
|
/* delimiter for bitsearch: */
|
|
|
__set_bit(MAX_RT_PRIO, array->bitmap);
|
|
|
|
|
|
-#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
|
|
|
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
|
|
rt_rq->highest_prio = MAX_RT_PRIO;
|
|
|
#endif
|
|
|
#ifdef CONFIG_SMP
|
|
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
|
|
|
rt_rq->rt_time = 0;
|
|
|
rt_rq->rt_throttled = 0;
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ rt_rq->rt_nr_boosted = 0;
|
|
|
rt_rq->rq = rq;
|
|
|
#endif
|
|
|
}
|
|
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
|
|
|
se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
|
|
|
se->parent = NULL;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
|
|
|
struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
|
|
|
int cpu, int add)
|
|
@@ -7175,7 +7200,7 @@ void __init sched_init(void)
|
|
|
init_defrootdomain();
|
|
|
#endif
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+#ifdef CONFIG_GROUP_SCHED
|
|
|
list_add(&init_task_group.list, &task_groups);
|
|
|
#endif
|
|
|
|
|
@@ -7196,7 +7221,10 @@ void __init sched_init(void)
|
|
|
&per_cpu(init_cfs_rq, i),
|
|
|
&per_cpu(init_sched_entity, i), i, 1);
|
|
|
|
|
|
- init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ init_task_group.rt_runtime =
|
|
|
+ sysctl_sched_rt_runtime * NSEC_PER_USEC;
|
|
|
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
|
|
|
init_tg_rt_entry(rq, &init_task_group,
|
|
|
&per_cpu(init_rt_rq, i),
|
|
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void)
|
|
|
unsigned long flags;
|
|
|
struct rq *rq;
|
|
|
|
|
|
- read_lock_irq(&tasklist_lock);
|
|
|
+ read_lock_irqsave(&tasklist_lock, flags);
|
|
|
do_each_thread(g, p) {
|
|
|
/*
|
|
|
* Only normalize user tasks:
|
|
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void)
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- spin_lock_irqsave(&p->pi_lock, flags);
|
|
|
+ spin_lock(&p->pi_lock);
|
|
|
rq = __task_rq_lock(p);
|
|
|
|
|
|
normalize_task(rq, p);
|
|
|
|
|
|
__task_rq_unlock(rq);
|
|
|
- spin_unlock_irqrestore(&p->pi_lock, flags);
|
|
|
+ spin_unlock(&p->pi_lock);
|
|
|
} while_each_thread(g, p);
|
|
|
|
|
|
- read_unlock_irq(&tasklist_lock);
|
|
|
+ read_unlock_irqrestore(&tasklist_lock, flags);
|
|
|
}
|
|
|
|
|
|
#endif /* CONFIG_MAGIC_SYSRQ */
|
|
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p)
|
|
|
|
|
|
#endif
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+#ifdef CONFIG_GROUP_SCHED
|
|
|
|
|
|
-#ifdef CONFIG_SMP
|
|
|
+#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
|
|
|
/*
|
|
|
* distribute shares of all task groups among their schedulable entities,
|
|
|
* to reflect load distribution across cpus.
|
|
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused)
|
|
|
}
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
-static void free_sched_group(struct task_group *tg)
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
+static void free_fair_sched_group(struct task_group *tg)
|
|
|
{
|
|
|
int i;
|
|
|
|
|
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg)
|
|
|
kfree(tg->cfs_rq[i]);
|
|
|
if (tg->se)
|
|
|
kfree(tg->se[i]);
|
|
|
- if (tg->rt_rq)
|
|
|
- kfree(tg->rt_rq[i]);
|
|
|
- if (tg->rt_se)
|
|
|
- kfree(tg->rt_se[i]);
|
|
|
}
|
|
|
|
|
|
kfree(tg->cfs_rq);
|
|
|
kfree(tg->se);
|
|
|
- kfree(tg->rt_rq);
|
|
|
- kfree(tg->rt_se);
|
|
|
- kfree(tg);
|
|
|
}
|
|
|
|
|
|
-/* allocate runqueue etc for a new task group */
|
|
|
-struct task_group *sched_create_group(void)
|
|
|
+static int alloc_fair_sched_group(struct task_group *tg)
|
|
|
{
|
|
|
- struct task_group *tg;
|
|
|
struct cfs_rq *cfs_rq;
|
|
|
struct sched_entity *se;
|
|
|
- struct rt_rq *rt_rq;
|
|
|
- struct sched_rt_entity *rt_se;
|
|
|
struct rq *rq;
|
|
|
int i;
|
|
|
|
|
|
- tg = kzalloc(sizeof(*tg), GFP_KERNEL);
|
|
|
- if (!tg)
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
-
|
|
|
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
|
|
|
if (!tg->cfs_rq)
|
|
|
goto err;
|
|
|
tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
|
|
|
if (!tg->se)
|
|
|
goto err;
|
|
|
- tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
|
|
|
- if (!tg->rt_rq)
|
|
|
- goto err;
|
|
|
- tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
|
|
|
- if (!tg->rt_se)
|
|
|
- goto err;
|
|
|
|
|
|
tg->shares = NICE_0_LOAD;
|
|
|
- tg->rt_ratio = 0; /* XXX */
|
|
|
|
|
|
for_each_possible_cpu(i) {
|
|
|
rq = cpu_rq(i);
|
|
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void)
|
|
|
if (!se)
|
|
|
goto err;
|
|
|
|
|
|
+ init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ err:
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+ list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
|
|
|
+ &cpu_rq(cpu)->leaf_cfs_rq_list);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+ list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
|
|
|
+}
|
|
|
+#else
|
|
|
+static inline void free_fair_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+static inline int alloc_fair_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+static void free_rt_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for_each_possible_cpu(i) {
|
|
|
+ if (tg->rt_rq)
|
|
|
+ kfree(tg->rt_rq[i]);
|
|
|
+ if (tg->rt_se)
|
|
|
+ kfree(tg->rt_se[i]);
|
|
|
+ }
|
|
|
+
|
|
|
+ kfree(tg->rt_rq);
|
|
|
+ kfree(tg->rt_se);
|
|
|
+}
|
|
|
+
|
|
|
+static int alloc_rt_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+ struct rt_rq *rt_rq;
|
|
|
+ struct sched_rt_entity *rt_se;
|
|
|
+ struct rq *rq;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
|
|
|
+ if (!tg->rt_rq)
|
|
|
+ goto err;
|
|
|
+ tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
|
|
|
+ if (!tg->rt_se)
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ tg->rt_runtime = 0;
|
|
|
+
|
|
|
+ for_each_possible_cpu(i) {
|
|
|
+ rq = cpu_rq(i);
|
|
|
+
|
|
|
rt_rq = kmalloc_node(sizeof(struct rt_rq),
|
|
|
GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
|
|
if (!rt_rq)
|
|
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void)
|
|
|
if (!rt_se)
|
|
|
goto err;
|
|
|
|
|
|
- init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
|
|
|
init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
|
|
|
}
|
|
|
|
|
|
- lock_task_group_list();
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ err:
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+ list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
|
|
|
+ &cpu_rq(cpu)->leaf_rt_rq_list);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+ list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
|
|
|
+}
|
|
|
+#else
|
|
|
+static inline void free_rt_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+static inline int alloc_rt_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+}
|
|
|
+
|
|
|
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
|
|
|
+{
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+static void free_sched_group(struct task_group *tg)
|
|
|
+{
|
|
|
+ free_fair_sched_group(tg);
|
|
|
+ free_rt_sched_group(tg);
|
|
|
+ kfree(tg);
|
|
|
+}
|
|
|
+
|
|
|
+/* allocate runqueue etc for a new task group */
|
|
|
+struct task_group *sched_create_group(void)
|
|
|
+{
|
|
|
+ struct task_group *tg;
|
|
|
+ unsigned long flags;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ tg = kzalloc(sizeof(*tg), GFP_KERNEL);
|
|
|
+ if (!tg)
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
+
|
|
|
+ if (!alloc_fair_sched_group(tg))
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ if (!alloc_rt_sched_group(tg))
|
|
|
+ goto err;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&task_group_lock, flags);
|
|
|
for_each_possible_cpu(i) {
|
|
|
- rq = cpu_rq(i);
|
|
|
- cfs_rq = tg->cfs_rq[i];
|
|
|
- list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
|
|
|
- rt_rq = tg->rt_rq[i];
|
|
|
- list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
|
|
|
+ register_fair_sched_group(tg, i);
|
|
|
+ register_rt_sched_group(tg, i);
|
|
|
}
|
|
|
list_add_rcu(&tg->list, &task_groups);
|
|
|
- unlock_task_group_list();
|
|
|
+ spin_unlock_irqrestore(&task_group_lock, flags);
|
|
|
|
|
|
return tg;
|
|
|
|
|
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
|
|
|
/* Destroy runqueue etc associated with a task group */
|
|
|
void sched_destroy_group(struct task_group *tg)
|
|
|
{
|
|
|
- struct cfs_rq *cfs_rq = NULL;
|
|
|
- struct rt_rq *rt_rq = NULL;
|
|
|
+ unsigned long flags;
|
|
|
int i;
|
|
|
|
|
|
- lock_task_group_list();
|
|
|
+ spin_lock_irqsave(&task_group_lock, flags);
|
|
|
for_each_possible_cpu(i) {
|
|
|
- cfs_rq = tg->cfs_rq[i];
|
|
|
- list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
|
|
|
- rt_rq = tg->rt_rq[i];
|
|
|
- list_del_rcu(&rt_rq->leaf_rt_rq_list);
|
|
|
+ unregister_fair_sched_group(tg, i);
|
|
|
+ unregister_rt_sched_group(tg, i);
|
|
|
}
|
|
|
list_del_rcu(&tg->list);
|
|
|
- unlock_task_group_list();
|
|
|
-
|
|
|
- BUG_ON(!cfs_rq);
|
|
|
+ spin_unlock_irqrestore(&task_group_lock, flags);
|
|
|
|
|
|
/* wait for possible concurrent references to cfs_rqs complete */
|
|
|
call_rcu(&tg->rcu, free_sched_group_rcu);
|
|
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk)
|
|
|
task_rq_unlock(rq, &flags);
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
/* rq->lock to be locked by caller */
|
|
|
static void set_se_shares(struct sched_entity *se, unsigned long shares)
|
|
|
{
|
|
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static DEFINE_MUTEX(shares_mutex);
|
|
|
+
|
|
|
int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
|
|
{
|
|
|
int i;
|
|
|
- struct cfs_rq *cfs_rq;
|
|
|
- struct rq *rq;
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- lock_task_group_list();
|
|
|
+ mutex_lock(&shares_mutex);
|
|
|
if (tg->shares == shares)
|
|
|
goto done;
|
|
|
|
|
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
|
|
* load_balance_fair) from referring to this group first,
|
|
|
* by taking it off the rq->leaf_cfs_rq_list on each cpu.
|
|
|
*/
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- cfs_rq = tg->cfs_rq[i];
|
|
|
- list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
|
|
|
- }
|
|
|
+ spin_lock_irqsave(&task_group_lock, flags);
|
|
|
+ for_each_possible_cpu(i)
|
|
|
+ unregister_fair_sched_group(tg, i);
|
|
|
+ spin_unlock_irqrestore(&task_group_lock, flags);
|
|
|
|
|
|
/* wait for any ongoing reference to this group to finish */
|
|
|
synchronize_sched();
|
|
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
|
|
|
* Enable load balance activity on this group, by inserting it back on
|
|
|
* each cpu's rq->leaf_cfs_rq_list.
|
|
|
*/
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- rq = cpu_rq(i);
|
|
|
- cfs_rq = tg->cfs_rq[i];
|
|
|
- list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
|
|
|
- }
|
|
|
+ spin_lock_irqsave(&task_group_lock, flags);
|
|
|
+ for_each_possible_cpu(i)
|
|
|
+ register_fair_sched_group(tg, i);
|
|
|
+ spin_unlock_irqrestore(&task_group_lock, flags);
|
|
|
done:
|
|
|
- unlock_task_group_list();
|
|
|
+ mutex_unlock(&shares_mutex);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg)
|
|
|
{
|
|
|
return tg->shares;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
/*
|
|
|
- * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
|
|
|
+ * Ensure that the real time constraints are schedulable.
|
|
|
*/
|
|
|
-int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
|
|
|
+static DEFINE_MUTEX(rt_constraints_mutex);
|
|
|
+
|
|
|
+static unsigned long to_ratio(u64 period, u64 runtime)
|
|
|
+{
|
|
|
+ if (runtime == RUNTIME_INF)
|
|
|
+ return 1ULL << 16;
|
|
|
+
|
|
|
+ runtime *= (1ULL << 16);
|
|
|
+ div64_64(runtime, period);
|
|
|
+ return runtime;
|
|
|
+}
|
|
|
+
|
|
|
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
|
|
{
|
|
|
struct task_group *tgi;
|
|
|
unsigned long total = 0;
|
|
|
+ unsigned long global_ratio =
|
|
|
+ to_ratio(sysctl_sched_rt_period,
|
|
|
+ sysctl_sched_rt_runtime < 0 ?
|
|
|
+ RUNTIME_INF : sysctl_sched_rt_runtime);
|
|
|
|
|
|
rcu_read_lock();
|
|
|
- list_for_each_entry_rcu(tgi, &task_groups, list)
|
|
|
- total += tgi->rt_ratio;
|
|
|
- rcu_read_unlock();
|
|
|
+ list_for_each_entry_rcu(tgi, &task_groups, list) {
|
|
|
+ if (tgi == tg)
|
|
|
+ continue;
|
|
|
|
|
|
- if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
|
|
|
- return -EINVAL;
|
|
|
+ total += to_ratio(period, tgi->rt_runtime);
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
|
|
|
- tg->rt_ratio = rt_ratio;
|
|
|
- return 0;
|
|
|
+ return total + to_ratio(period, runtime) < global_ratio;
|
|
|
}
|
|
|
|
|
|
-unsigned long sched_group_rt_ratio(struct task_group *tg)
|
|
|
+int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
|
|
|
{
|
|
|
- return tg->rt_ratio;
|
|
|
+ u64 rt_runtime, rt_period;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ rt_period = sysctl_sched_rt_period * NSEC_PER_USEC;
|
|
|
+ rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
|
|
|
+ if (rt_runtime_us == -1)
|
|
|
+ rt_runtime = rt_period;
|
|
|
+
|
|
|
+ mutex_lock(&rt_constraints_mutex);
|
|
|
+ if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
|
|
|
+ err = -EINVAL;
|
|
|
+ goto unlock;
|
|
|
+ }
|
|
|
+ if (rt_runtime_us == -1)
|
|
|
+ rt_runtime = RUNTIME_INF;
|
|
|
+ tg->rt_runtime = rt_runtime;
|
|
|
+ unlock:
|
|
|
+ mutex_unlock(&rt_constraints_mutex);
|
|
|
+
|
|
|
+ return err;
|
|
|
}
|
|
|
|
|
|
-#endif /* CONFIG_FAIR_GROUP_SCHED */
|
|
|
+long sched_group_rt_runtime(struct task_group *tg)
|
|
|
+{
|
|
|
+ u64 rt_runtime_us;
|
|
|
+
|
|
|
+ if (tg->rt_runtime == RUNTIME_INF)
|
|
|
+ return -1;
|
|
|
+
|
|
|
+ rt_runtime_us = tg->rt_runtime;
|
|
|
+ do_div(rt_runtime_us, NSEC_PER_USEC);
|
|
|
+ return rt_runtime_us;
|
|
|
+}
|
|
|
+#endif
|
|
|
+#endif /* CONFIG_GROUP_SCHED */
|
|
|
|
|
|
-#ifdef CONFIG_FAIR_CGROUP_SCHED
|
|
|
+#ifdef CONFIG_CGROUP_SCHED
|
|
|
|
|
|
/* return corresponding task_group object of a cgroup */
|
|
|
static inline struct task_group *cgroup_tg(struct cgroup *cgrp)
|
|
@@ -7857,9 +8037,15 @@ static int
|
|
|
cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
|
|
struct task_struct *tsk)
|
|
|
{
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+ /* Don't accept realtime tasks when there is no way for them to run */
|
|
|
+ if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
|
|
|
+ return -EINVAL;
|
|
|
+#else
|
|
|
/* We don't support RT-tasks being in separate groups */
|
|
|
if (tsk->sched_class != &fair_sched_class)
|
|
|
return -EINVAL;
|
|
|
+#endif
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
|
|
sched_move_task(tsk);
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype,
|
|
|
u64 shareval)
|
|
|
{
|
|
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
|
|
|
|
|
|
return (u64) tg->shares;
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
-static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
|
|
|
- u64 rt_ratio_val)
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
+static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
|
|
|
+ struct file *file,
|
|
|
+ const char __user *userbuf,
|
|
|
+ size_t nbytes, loff_t *unused_ppos)
|
|
|
{
|
|
|
- return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
|
|
|
+ char buffer[64];
|
|
|
+ int retval = 0;
|
|
|
+ s64 val;
|
|
|
+ char *end;
|
|
|
+
|
|
|
+ if (!nbytes)
|
|
|
+ return -EINVAL;
|
|
|
+ if (nbytes >= sizeof(buffer))
|
|
|
+ return -E2BIG;
|
|
|
+ if (copy_from_user(buffer, userbuf, nbytes))
|
|
|
+ return -EFAULT;
|
|
|
+
|
|
|
+ buffer[nbytes] = 0; /* nul-terminate */
|
|
|
+
|
|
|
+ /* strip newline if necessary */
|
|
|
+ if (nbytes && (buffer[nbytes-1] == '\n'))
|
|
|
+ buffer[nbytes-1] = 0;
|
|
|
+ val = simple_strtoll(buffer, &end, 0);
|
|
|
+ if (*end)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* Pass to subsystem */
|
|
|
+ retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
|
|
|
+ if (!retval)
|
|
|
+ retval = nbytes;
|
|
|
+ return retval;
|
|
|
}
|
|
|
|
|
|
-static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
|
|
|
+static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft,
|
|
|
+ struct file *file,
|
|
|
+ char __user *buf, size_t nbytes,
|
|
|
+ loff_t *ppos)
|
|
|
{
|
|
|
- struct task_group *tg = cgroup_tg(cgrp);
|
|
|
+ char tmp[64];
|
|
|
+ long val = sched_group_rt_runtime(cgroup_tg(cgrp));
|
|
|
+ int len = sprintf(tmp, "%ld\n", val);
|
|
|
|
|
|
- return (u64) tg->rt_ratio;
|
|
|
+ return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
static struct cftype cpu_files[] = {
|
|
|
+#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
{
|
|
|
.name = "shares",
|
|
|
.read_uint = cpu_shares_read_uint,
|
|
|
.write_uint = cpu_shares_write_uint,
|
|
|
},
|
|
|
+#endif
|
|
|
+#ifdef CONFIG_RT_GROUP_SCHED
|
|
|
{
|
|
|
- .name = "rt_ratio",
|
|
|
- .read_uint = cpu_rt_ratio_read_uint,
|
|
|
- .write_uint = cpu_rt_ratio_write_uint,
|
|
|
+ .name = "rt_runtime_us",
|
|
|
+ .read = cpu_rt_runtime_read,
|
|
|
+ .write = cpu_rt_runtime_write,
|
|
|
},
|
|
|
+#endif
|
|
|
};
|
|
|
|
|
|
static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
|
|
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
|
|
|
.early_init = 1,
|
|
|
};
|
|
|
|
|
|
-#endif /* CONFIG_FAIR_CGROUP_SCHED */
|
|
|
+#endif /* CONFIG_CGROUP_SCHED */
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
|