|
@@ -267,6 +267,10 @@ struct task_group {
|
|
|
struct cgroup_subsys_state css;
|
|
|
#endif
|
|
|
|
|
|
+#ifdef CONFIG_USER_SCHED
|
|
|
+ uid_t uid;
|
|
|
+#endif
|
|
|
+
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
|
/* schedulable entities of this group on each cpu */
|
|
|
struct sched_entity **se;
|
|
@@ -292,6 +296,12 @@ struct task_group {
|
|
|
|
|
|
#ifdef CONFIG_USER_SCHED
|
|
|
|
|
|
+/* Helper function to pass uid information to create_sched_user() */
|
|
|
+void set_tg_uid(struct user_struct *user)
|
|
|
+{
|
|
|
+ user->tg->uid = user->uid;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Root task group.
|
|
|
* Every UID task group (including init_task_group aka UID-0) will
|
|
@@ -594,6 +604,8 @@ struct rq {
|
|
|
#ifdef CONFIG_SCHEDSTATS
|
|
|
/* latency stats */
|
|
|
struct sched_info rq_sched_info;
|
|
|
+ unsigned long long rq_cpu_time;
|
|
|
+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
|
|
|
|
|
|
/* sys_sched_yield() stats */
|
|
|
unsigned int yld_exp_empty;
|
|
@@ -711,45 +723,18 @@ static __read_mostly char *sched_feat_names[] = {
|
|
|
|
|
|
#undef SCHED_FEAT
|
|
|
|
|
|
-static int sched_feat_open(struct inode *inode, struct file *filp)
|
|
|
-{
|
|
|
- filp->private_data = inode->i_private;
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-static ssize_t
|
|
|
-sched_feat_read(struct file *filp, char __user *ubuf,
|
|
|
- size_t cnt, loff_t *ppos)
|
|
|
+static int sched_feat_show(struct seq_file *m, void *v)
|
|
|
{
|
|
|
- char *buf;
|
|
|
- int r = 0;
|
|
|
- int len = 0;
|
|
|
int i;
|
|
|
|
|
|
for (i = 0; sched_feat_names[i]; i++) {
|
|
|
- len += strlen(sched_feat_names[i]);
|
|
|
- len += 4;
|
|
|
- }
|
|
|
-
|
|
|
- buf = kmalloc(len + 2, GFP_KERNEL);
|
|
|
- if (!buf)
|
|
|
- return -ENOMEM;
|
|
|
-
|
|
|
- for (i = 0; sched_feat_names[i]; i++) {
|
|
|
- if (sysctl_sched_features & (1UL << i))
|
|
|
- r += sprintf(buf + r, "%s ", sched_feat_names[i]);
|
|
|
- else
|
|
|
- r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
|
|
|
+ if (!(sysctl_sched_features & (1UL << i)))
|
|
|
+ seq_puts(m, "NO_");
|
|
|
+ seq_printf(m, "%s ", sched_feat_names[i]);
|
|
|
}
|
|
|
+ seq_puts(m, "\n");
|
|
|
|
|
|
- r += sprintf(buf + r, "\n");
|
|
|
- WARN_ON(r >= len + 2);
|
|
|
-
|
|
|
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
|
|
-
|
|
|
- kfree(buf);
|
|
|
-
|
|
|
- return r;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
static ssize_t
|
|
@@ -794,10 +779,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
|
|
|
return cnt;
|
|
|
}
|
|
|
|
|
|
+static int sched_feat_open(struct inode *inode, struct file *filp)
|
|
|
+{
|
|
|
+ return single_open(filp, sched_feat_show, NULL);
|
|
|
+}
|
|
|
+
|
|
|
static struct file_operations sched_feat_fops = {
|
|
|
- .open = sched_feat_open,
|
|
|
- .read = sched_feat_read,
|
|
|
- .write = sched_feat_write,
|
|
|
+ .open = sched_feat_open,
|
|
|
+ .write = sched_feat_write,
|
|
|
+ .read = seq_read,
|
|
|
+ .llseek = seq_lseek,
|
|
|
+ .release = single_release,
|
|
|
};
|
|
|
|
|
|
static __init int sched_init_debug(void)
|
|
@@ -1482,27 +1474,13 @@ static void
|
|
|
update_group_shares_cpu(struct task_group *tg, int cpu,
|
|
|
unsigned long sd_shares, unsigned long sd_rq_weight)
|
|
|
{
|
|
|
- int boost = 0;
|
|
|
unsigned long shares;
|
|
|
unsigned long rq_weight;
|
|
|
|
|
|
if (!tg->se[cpu])
|
|
|
return;
|
|
|
|
|
|
- rq_weight = tg->cfs_rq[cpu]->load.weight;
|
|
|
-
|
|
|
- /*
|
|
|
- * If there are currently no tasks on the cpu pretend there is one of
|
|
|
- * average load so that when a new task gets to run here it will not
|
|
|
- * get delayed by group starvation.
|
|
|
- */
|
|
|
- if (!rq_weight) {
|
|
|
- boost = 1;
|
|
|
- rq_weight = NICE_0_LOAD;
|
|
|
- }
|
|
|
-
|
|
|
- if (unlikely(rq_weight > sd_rq_weight))
|
|
|
- rq_weight = sd_rq_weight;
|
|
|
+ rq_weight = tg->cfs_rq[cpu]->rq_weight;
|
|
|
|
|
|
/*
|
|
|
* \Sum shares * rq_weight
|
|
@@ -1510,7 +1488,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|
|
* \Sum rq_weight
|
|
|
*
|
|
|
*/
|
|
|
- shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
|
|
|
+ shares = (sd_shares * rq_weight) / sd_rq_weight;
|
|
|
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
|
|
|
|
|
|
if (abs(shares - tg->se[cpu]->load.weight) >
|
|
@@ -1519,11 +1497,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|
|
unsigned long flags;
|
|
|
|
|
|
spin_lock_irqsave(&rq->lock, flags);
|
|
|
- /*
|
|
|
- * record the actual number of shares, not the boosted amount.
|
|
|
- */
|
|
|
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
|
|
|
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
|
|
|
+ tg->cfs_rq[cpu]->shares = shares;
|
|
|
|
|
|
__set_se_shares(tg->se[cpu], shares);
|
|
|
spin_unlock_irqrestore(&rq->lock, flags);
|
|
@@ -1537,13 +1511,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
|
|
*/
|
|
|
static int tg_shares_up(struct task_group *tg, void *data)
|
|
|
{
|
|
|
- unsigned long rq_weight = 0;
|
|
|
+ unsigned long weight, rq_weight = 0;
|
|
|
unsigned long shares = 0;
|
|
|
struct sched_domain *sd = data;
|
|
|
int i;
|
|
|
|
|
|
for_each_cpu_mask(i, sd->span) {
|
|
|
- rq_weight += tg->cfs_rq[i]->load.weight;
|
|
|
+ /*
|
|
|
+ * If there are currently no tasks on the cpu pretend there
|
|
|
+ * is one of average load so that when a new task gets to
|
|
|
+ * run here it will not get delayed by group starvation.
|
|
|
+ */
|
|
|
+ weight = tg->cfs_rq[i]->load.weight;
|
|
|
+ if (!weight)
|
|
|
+ weight = NICE_0_LOAD;
|
|
|
+
|
|
|
+ tg->cfs_rq[i]->rq_weight = weight;
|
|
|
+ rq_weight += weight;
|
|
|
shares += tg->cfs_rq[i]->shares;
|
|
|
}
|
|
|
|
|
@@ -1553,9 +1537,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
|
|
|
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
|
|
|
shares = tg->shares;
|
|
|
|
|
|
- if (!rq_weight)
|
|
|
- rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
|
|
|
-
|
|
|
for_each_cpu_mask(i, sd->span)
|
|
|
update_group_shares_cpu(tg, i, shares, rq_weight);
|
|
|
|
|
@@ -1620,6 +1601,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
|
|
|
|
|
|
#endif
|
|
|
|
|
|
+/*
|
|
|
+ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
|
|
|
+ */
|
|
|
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
|
|
|
+ __releases(this_rq->lock)
|
|
|
+ __acquires(busiest->lock)
|
|
|
+ __acquires(this_rq->lock)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+
|
|
|
+ if (unlikely(!irqs_disabled())) {
|
|
|
+ /* printk() doesn't work good under rq->lock */
|
|
|
+ spin_unlock(&this_rq->lock);
|
|
|
+ BUG_ON(1);
|
|
|
+ }
|
|
|
+ if (unlikely(!spin_trylock(&busiest->lock))) {
|
|
|
+ if (busiest < this_rq) {
|
|
|
+ spin_unlock(&this_rq->lock);
|
|
|
+ spin_lock(&busiest->lock);
|
|
|
+ spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
|
|
|
+ ret = 1;
|
|
|
+ } else
|
|
|
+ spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
|
|
|
+ __releases(busiest->lock)
|
|
|
+{
|
|
|
+ spin_unlock(&busiest->lock);
|
|
|
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
|
|
|
+}
|
|
|
#endif
|
|
|
|
|
|
#ifdef CONFIG_FAIR_GROUP_SCHED
|
|
@@ -2264,6 +2278,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
|
|
|
|
|
|
smp_wmb();
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
+ update_rq_clock(rq);
|
|
|
old_state = p->state;
|
|
|
if (!(old_state & state))
|
|
|
goto out;
|
|
@@ -2321,7 +2336,6 @@ out_activate:
|
|
|
schedstat_inc(p, se.nr_wakeups_local);
|
|
|
else
|
|
|
schedstat_inc(p, se.nr_wakeups_remote);
|
|
|
- update_rq_clock(rq);
|
|
|
activate_task(rq, p, 1);
|
|
|
success = 1;
|
|
|
|
|
@@ -2821,40 +2835,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
|
|
|
__release(rq2->lock);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
|
|
|
- */
|
|
|
-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
|
|
|
- __releases(this_rq->lock)
|
|
|
- __acquires(busiest->lock)
|
|
|
- __acquires(this_rq->lock)
|
|
|
-{
|
|
|
- int ret = 0;
|
|
|
-
|
|
|
- if (unlikely(!irqs_disabled())) {
|
|
|
- /* printk() doesn't work good under rq->lock */
|
|
|
- spin_unlock(&this_rq->lock);
|
|
|
- BUG_ON(1);
|
|
|
- }
|
|
|
- if (unlikely(!spin_trylock(&busiest->lock))) {
|
|
|
- if (busiest < this_rq) {
|
|
|
- spin_unlock(&this_rq->lock);
|
|
|
- spin_lock(&busiest->lock);
|
|
|
- spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
|
|
|
- ret = 1;
|
|
|
- } else
|
|
|
- spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
|
|
|
- }
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
-static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
|
|
|
- __releases(busiest->lock)
|
|
|
-{
|
|
|
- spin_unlock(&busiest->lock);
|
|
|
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* If dest_cpu is allowed for this process, migrate the task to it.
|
|
|
* This is accomplished by forcing the cpu_allowed mask to only
|
|
@@ -3716,7 +3696,7 @@ out_balanced:
|
|
|
static void idle_balance(int this_cpu, struct rq *this_rq)
|
|
|
{
|
|
|
struct sched_domain *sd;
|
|
|
- int pulled_task = -1;
|
|
|
+ int pulled_task = 0;
|
|
|
unsigned long next_balance = jiffies + HZ;
|
|
|
cpumask_t tmpmask;
|
|
|
|
|
@@ -6150,7 +6130,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
|
|
|
|
|
|
/*
|
|
|
* Figure out where task on dead CPU should go, use force if necessary.
|
|
|
- * NOTE: interrupts should be disabled by the caller
|
|
|
*/
|
|
|
static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
|
|
|
{
|
|
@@ -6662,28 +6641,6 @@ early_initcall(migration_init);
|
|
|
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
|
|
-static inline const char *sd_level_to_string(enum sched_domain_level lvl)
|
|
|
-{
|
|
|
- switch (lvl) {
|
|
|
- case SD_LV_NONE:
|
|
|
- return "NONE";
|
|
|
- case SD_LV_SIBLING:
|
|
|
- return "SIBLING";
|
|
|
- case SD_LV_MC:
|
|
|
- return "MC";
|
|
|
- case SD_LV_CPU:
|
|
|
- return "CPU";
|
|
|
- case SD_LV_NODE:
|
|
|
- return "NODE";
|
|
|
- case SD_LV_ALLNODES:
|
|
|
- return "ALLNODES";
|
|
|
- case SD_LV_MAX:
|
|
|
- return "MAX";
|
|
|
-
|
|
|
- }
|
|
|
- return "MAX";
|
|
|
-}
|
|
|
-
|
|
|
static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|
|
cpumask_t *groupmask)
|
|
|
{
|
|
@@ -6703,8 +6660,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
- printk(KERN_CONT "span %s level %s\n",
|
|
|
- str, sd_level_to_string(sd->level));
|
|
|
+ printk(KERN_CONT "span %s level %s\n", str, sd->name);
|
|
|
|
|
|
if (!cpu_isset(cpu, sd->span)) {
|
|
|
printk(KERN_ERR "ERROR: domain->span does not contain "
|
|
@@ -6840,6 +6796,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
|
|
SD_BALANCE_EXEC |
|
|
|
SD_SHARE_CPUPOWER |
|
|
|
SD_SHARE_PKG_RESOURCES);
|
|
|
+ if (nr_node_ids == 1)
|
|
|
+ pflags &= ~SD_SERIALIZE;
|
|
|
}
|
|
|
if (~cflags & pflags)
|
|
|
return 0;
|
|
@@ -7360,13 +7318,21 @@ struct allmasks {
|
|
|
};
|
|
|
|
|
|
#if NR_CPUS > 128
|
|
|
-#define SCHED_CPUMASK_ALLOC 1
|
|
|
-#define SCHED_CPUMASK_FREE(v) kfree(v)
|
|
|
-#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
|
|
|
+#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
|
|
|
+static inline void sched_cpumask_alloc(struct allmasks **masks)
|
|
|
+{
|
|
|
+ *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
|
|
|
+}
|
|
|
+static inline void sched_cpumask_free(struct allmasks *masks)
|
|
|
+{
|
|
|
+ kfree(masks);
|
|
|
+}
|
|
|
#else
|
|
|
-#define SCHED_CPUMASK_ALLOC 0
|
|
|
-#define SCHED_CPUMASK_FREE(v)
|
|
|
-#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
|
|
|
+#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
|
|
|
+static inline void sched_cpumask_alloc(struct allmasks **masks)
|
|
|
+{ }
|
|
|
+static inline void sched_cpumask_free(struct allmasks *masks)
|
|
|
+{ }
|
|
|
#endif
|
|
|
|
|
|
#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
|
|
@@ -7442,9 +7408,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|
|
return -ENOMEM;
|
|
|
}
|
|
|
|
|
|
-#if SCHED_CPUMASK_ALLOC
|
|
|
/* get space for all scratch cpumask variables */
|
|
|
- allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
|
|
|
+ sched_cpumask_alloc(&allmasks);
|
|
|
if (!allmasks) {
|
|
|
printk(KERN_WARNING "Cannot alloc cpumask array\n");
|
|
|
kfree(rd);
|
|
@@ -7453,7 +7418,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|
|
#endif
|
|
|
return -ENOMEM;
|
|
|
}
|
|
|
-#endif
|
|
|
+
|
|
|
tmpmask = (cpumask_t *)allmasks;
|
|
|
|
|
|
|
|
@@ -7707,13 +7672,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
|
|
|
cpu_attach_domain(sd, rd, i);
|
|
|
}
|
|
|
|
|
|
- SCHED_CPUMASK_FREE((void *)allmasks);
|
|
|
+ sched_cpumask_free(allmasks);
|
|
|
return 0;
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
error:
|
|
|
free_sched_groups(cpu_map, tmpmask);
|
|
|
- SCHED_CPUMASK_FREE((void *)allmasks);
|
|
|
+ sched_cpumask_free(allmasks);
|
|
|
kfree(rd);
|
|
|
return -ENOMEM;
|
|
|
#endif
|
|
@@ -7736,8 +7701,14 @@ static struct sched_domain_attr *dattr_cur;
|
|
|
*/
|
|
|
static cpumask_t fallback_doms;
|
|
|
|
|
|
-void __attribute__((weak)) arch_update_cpu_topology(void)
|
|
|
+/*
|
|
|
+ * arch_update_cpu_topology lets virtualized architectures update the
|
|
|
+ * cpu core maps. It is supposed to return 1 if the topology changed
|
|
|
+ * or 0 if it stayed the same.
|
|
|
+ */
|
|
|
+int __attribute__((weak)) arch_update_cpu_topology(void)
|
|
|
{
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -7777,8 +7748,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
|
|
|
cpumask_t tmpmask;
|
|
|
int i;
|
|
|
|
|
|
- unregister_sched_domain_sysctl();
|
|
|
-
|
|
|
for_each_cpu_mask_nr(i, *cpu_map)
|
|
|
cpu_attach_domain(NULL, &def_root_domain, i);
|
|
|
synchronize_sched();
|
|
@@ -7831,17 +7800,21 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
|
|
struct sched_domain_attr *dattr_new)
|
|
|
{
|
|
|
int i, j, n;
|
|
|
+ int new_topology;
|
|
|
|
|
|
mutex_lock(&sched_domains_mutex);
|
|
|
|
|
|
/* always unregister in case we don't destroy any domains */
|
|
|
unregister_sched_domain_sysctl();
|
|
|
|
|
|
+ /* Let architecture update cpu core mappings. */
|
|
|
+ new_topology = arch_update_cpu_topology();
|
|
|
+
|
|
|
n = doms_new ? ndoms_new : 0;
|
|
|
|
|
|
/* Destroy deleted domains */
|
|
|
for (i = 0; i < ndoms_cur; i++) {
|
|
|
- for (j = 0; j < n; j++) {
|
|
|
+ for (j = 0; j < n && !new_topology; j++) {
|
|
|
if (cpus_equal(doms_cur[i], doms_new[j])
|
|
|
&& dattrs_equal(dattr_cur, i, dattr_new, j))
|
|
|
goto match1;
|
|
@@ -7856,12 +7829,12 @@ match1:
|
|
|
ndoms_cur = 0;
|
|
|
doms_new = &fallback_doms;
|
|
|
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
|
|
|
- dattr_new = NULL;
|
|
|
+ WARN_ON_ONCE(dattr_new);
|
|
|
}
|
|
|
|
|
|
/* Build new domains */
|
|
|
for (i = 0; i < ndoms_new; i++) {
|
|
|
- for (j = 0; j < ndoms_cur; j++) {
|
|
|
+ for (j = 0; j < ndoms_cur && !new_topology; j++) {
|
|
|
if (cpus_equal(doms_new[i], doms_cur[j])
|
|
|
&& dattrs_equal(dattr_new, i, dattr_cur, j))
|
|
|
goto match2;
|
|
@@ -8516,7 +8489,7 @@ static
|
|
|
int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
{
|
|
|
struct cfs_rq *cfs_rq;
|
|
|
- struct sched_entity *se, *parent_se;
|
|
|
+ struct sched_entity *se;
|
|
|
struct rq *rq;
|
|
|
int i;
|
|
|
|
|
@@ -8532,18 +8505,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
for_each_possible_cpu(i) {
|
|
|
rq = cpu_rq(i);
|
|
|
|
|
|
- cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
|
|
|
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
|
|
+ cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
|
|
|
+ GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!cfs_rq)
|
|
|
goto err;
|
|
|
|
|
|
- se = kmalloc_node(sizeof(struct sched_entity),
|
|
|
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
|
|
+ se = kzalloc_node(sizeof(struct sched_entity),
|
|
|
+ GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!se)
|
|
|
goto err;
|
|
|
|
|
|
- parent_se = parent ? parent->se[i] : NULL;
|
|
|
- init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
|
|
|
+ init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
|
|
|
}
|
|
|
|
|
|
return 1;
|
|
@@ -8604,7 +8576,7 @@ static
|
|
|
int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
{
|
|
|
struct rt_rq *rt_rq;
|
|
|
- struct sched_rt_entity *rt_se, *parent_se;
|
|
|
+ struct sched_rt_entity *rt_se;
|
|
|
struct rq *rq;
|
|
|
int i;
|
|
|
|
|
@@ -8621,18 +8593,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
|
|
|
for_each_possible_cpu(i) {
|
|
|
rq = cpu_rq(i);
|
|
|
|
|
|
- rt_rq = kmalloc_node(sizeof(struct rt_rq),
|
|
|
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
|
|
+ rt_rq = kzalloc_node(sizeof(struct rt_rq),
|
|
|
+ GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!rt_rq)
|
|
|
goto err;
|
|
|
|
|
|
- rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
|
|
|
- GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
|
|
|
+ rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
|
|
|
+ GFP_KERNEL, cpu_to_node(i));
|
|
|
if (!rt_se)
|
|
|
goto err;
|
|
|
|
|
|
- parent_se = parent ? parent->rt_se[i] : NULL;
|
|
|
- init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
|
|
|
+ init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
|
|
|
}
|
|
|
|
|
|
return 1;
|
|
@@ -9275,11 +9246,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
|
|
|
* (balbir@in.ibm.com).
|
|
|
*/
|
|
|
|
|
|
-/* track cpu usage of a group of tasks */
|
|
|
+/* track cpu usage of a group of tasks and its child groups */
|
|
|
struct cpuacct {
|
|
|
struct cgroup_subsys_state css;
|
|
|
/* cpuusage holds pointer to a u64-type object on every cpu */
|
|
|
u64 *cpuusage;
|
|
|
+ struct cpuacct *parent;
|
|
|
};
|
|
|
|
|
|
struct cgroup_subsys cpuacct_subsys;
|
|
@@ -9313,6 +9285,9 @@ static struct cgroup_subsys_state *cpuacct_create(
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
}
|
|
|
|
|
|
+ if (cgrp->parent)
|
|
|
+ ca->parent = cgroup_ca(cgrp->parent);
|
|
|
+
|
|
|
return &ca->css;
|
|
|
}
|
|
|
|
|
@@ -9326,6 +9301,41 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
kfree(ca);
|
|
|
}
|
|
|
|
|
|
+static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
|
|
+{
|
|
|
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
|
|
+ u64 data;
|
|
|
+
|
|
|
+#ifndef CONFIG_64BIT
|
|
|
+ /*
|
|
|
+ * Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
|
|
+ */
|
|
|
+ spin_lock_irq(&cpu_rq(cpu)->lock);
|
|
|
+ data = *cpuusage;
|
|
|
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
|
|
|
+#else
|
|
|
+ data = *cpuusage;
|
|
|
+#endif
|
|
|
+
|
|
|
+ return data;
|
|
|
+}
|
|
|
+
|
|
|
+static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
|
|
+{
|
|
|
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
|
|
+
|
|
|
+#ifndef CONFIG_64BIT
|
|
|
+ /*
|
|
|
+ * Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
|
|
+ */
|
|
|
+ spin_lock_irq(&cpu_rq(cpu)->lock);
|
|
|
+ *cpuusage = val;
|
|
|
+ spin_unlock_irq(&cpu_rq(cpu)->lock);
|
|
|
+#else
|
|
|
+ *cpuusage = val;
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
/* return total cpu usage (in nanoseconds) of a group */
|
|
|
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
|
|
{
|
|
@@ -9333,17 +9343,8 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
|
|
u64 totalcpuusage = 0;
|
|
|
int i;
|
|
|
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
|
|
|
-
|
|
|
- /*
|
|
|
- * Take rq->lock to make 64-bit addition safe on 32-bit
|
|
|
- * platforms.
|
|
|
- */
|
|
|
- spin_lock_irq(&cpu_rq(i)->lock);
|
|
|
- totalcpuusage += *cpuusage;
|
|
|
- spin_unlock_irq(&cpu_rq(i)->lock);
|
|
|
- }
|
|
|
+ for_each_present_cpu(i)
|
|
|
+ totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
|
|
|
|
|
return totalcpuusage;
|
|
|
}
|
|
@@ -9360,23 +9361,39 @@ static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
|
|
|
+ for_each_present_cpu(i)
|
|
|
+ cpuacct_cpuusage_write(ca, i, 0);
|
|
|
|
|
|
- spin_lock_irq(&cpu_rq(i)->lock);
|
|
|
- *cpuusage = 0;
|
|
|
- spin_unlock_irq(&cpu_rq(i)->lock);
|
|
|
- }
|
|
|
out:
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
|
|
+ struct seq_file *m)
|
|
|
+{
|
|
|
+ struct cpuacct *ca = cgroup_ca(cgroup);
|
|
|
+ u64 percpu;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for_each_present_cpu(i) {
|
|
|
+ percpu = cpuacct_cpuusage_read(ca, i);
|
|
|
+ seq_printf(m, "%llu ", (unsigned long long) percpu);
|
|
|
+ }
|
|
|
+ seq_printf(m, "\n");
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static struct cftype files[] = {
|
|
|
{
|
|
|
.name = "usage",
|
|
|
.read_u64 = cpuusage_read,
|
|
|
.write_u64 = cpuusage_write,
|
|
|
},
|
|
|
+ {
|
|
|
+ .name = "usage_percpu",
|
|
|
+ .read_seq_string = cpuacct_percpu_seq_read,
|
|
|
+ },
|
|
|
+
|
|
|
};
|
|
|
|
|
|
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
@@ -9392,14 +9409,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
|
|
{
|
|
|
struct cpuacct *ca;
|
|
|
+ int cpu;
|
|
|
|
|
|
if (!cpuacct_subsys.active)
|
|
|
return;
|
|
|
|
|
|
+ cpu = task_cpu(tsk);
|
|
|
ca = task_ca(tsk);
|
|
|
- if (ca) {
|
|
|
- u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
|
|
|
|
|
|
+ for (; ca; ca = ca->parent) {
|
|
|
+ u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
|
|
|
*cpuusage += cputime;
|
|
|
}
|
|
|
}
|