|
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|
|
struct rq_iterator *iterator);
|
|
|
#endif
|
|
|
|
|
|
+/* Time spent by the tasks of the cpu accounting group executing in ... */
|
|
|
+enum cpuacct_stat_index {
|
|
|
+ CPUACCT_STAT_USER, /* ... user mode */
|
|
|
+ CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
|
|
+
|
|
|
+ CPUACCT_STAT_NSTATS,
|
|
|
+};
|
|
|
+
|
|
|
#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
|
|
|
+static void cpuacct_update_stats(struct task_struct *tsk,
|
|
|
+ enum cpuacct_stat_index idx, cputime_t val);
|
|
|
#else
|
|
|
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
|
|
|
+static inline void cpuacct_update_stats(struct task_struct *tsk,
|
|
|
+ enum cpuacct_stat_index idx, cputime_t val) {}
|
|
|
#endif
|
|
|
|
|
|
static inline void inc_cpu_load(struct rq *rq, unsigned long load)
|
|
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
|
|
|
EXPORT_PER_CPU_SYMBOL(kstat);
|
|
|
|
|
|
/*
|
|
|
- * Return any ns on the sched_clock that have not yet been banked in
|
|
|
+ * Return any ns on the sched_clock that have not yet been accounted in
|
|
|
* @p in case that task is currently running.
|
|
|
+ *
|
|
|
+ * Called with task_rq_lock() held on @rq.
|
|
|
*/
|
|
|
+static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
|
|
|
+{
|
|
|
+ u64 ns = 0;
|
|
|
+
|
|
|
+ if (task_current(rq, p)) {
|
|
|
+ update_rq_clock(rq);
|
|
|
+ ns = rq->clock - p->se.exec_start;
|
|
|
+ if ((s64)ns < 0)
|
|
|
+ ns = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ns;
|
|
|
+}
|
|
|
+
|
|
|
unsigned long long task_delta_exec(struct task_struct *p)
|
|
|
{
|
|
|
unsigned long flags;
|
|
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
|
|
|
u64 ns = 0;
|
|
|
|
|
|
rq = task_rq_lock(p, &flags);
|
|
|
+ ns = do_task_delta_exec(p, rq);
|
|
|
+ task_rq_unlock(rq, &flags);
|
|
|
|
|
|
- if (task_current(rq, p)) {
|
|
|
- u64 delta_exec;
|
|
|
+ return ns;
|
|
|
+}
|
|
|
|
|
|
- update_rq_clock(rq);
|
|
|
- delta_exec = rq->clock - p->se.exec_start;
|
|
|
- if ((s64)delta_exec > 0)
|
|
|
- ns = delta_exec;
|
|
|
- }
|
|
|
+/*
|
|
|
+ * Return accounted runtime for the task.
|
|
|
+ * In case the task is currently running, return the runtime plus current's
|
|
|
+ * pending runtime that have not been accounted yet.
|
|
|
+ */
|
|
|
+unsigned long long task_sched_runtime(struct task_struct *p)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ struct rq *rq;
|
|
|
+ u64 ns = 0;
|
|
|
+
|
|
|
+ rq = task_rq_lock(p, &flags);
|
|
|
+ ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
|
|
|
+ task_rq_unlock(rq, &flags);
|
|
|
+
|
|
|
+ return ns;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Return sum_exec_runtime for the thread group.
|
|
|
+ * In case the task is currently running, return the sum plus current's
|
|
|
+ * pending runtime that have not been accounted yet.
|
|
|
+ *
|
|
|
+ * Note that the thread group might have other running tasks as well,
|
|
|
+ * so the return value not includes other pending runtime that other
|
|
|
+ * running tasks might have.
|
|
|
+ */
|
|
|
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
|
|
|
+{
|
|
|
+ struct task_cputime totals;
|
|
|
+ unsigned long flags;
|
|
|
+ struct rq *rq;
|
|
|
+ u64 ns;
|
|
|
|
|
|
+ rq = task_rq_lock(p, &flags);
|
|
|
+ thread_group_cputime(p, &totals);
|
|
|
+ ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
|
|
|
task_rq_unlock(rq, &flags);
|
|
|
|
|
|
return ns;
|
|
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
|
|
|
cpustat->nice = cputime64_add(cpustat->nice, tmp);
|
|
|
else
|
|
|
cpustat->user = cputime64_add(cpustat->user, tmp);
|
|
|
+
|
|
|
+ cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
|
|
|
/* Account for user time used */
|
|
|
acct_update_integrals(p);
|
|
|
}
|
|
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
|
|
|
else
|
|
|
cpustat->system = cputime64_add(cpustat->system, tmp);
|
|
|
|
|
|
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
|
|
|
+
|
|
|
/* Account for system time used */
|
|
|
acct_update_integrals(p);
|
|
|
}
|
|
@@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
|
|
cpumask_or(groupmask, groupmask, sched_group_cpus(group));
|
|
|
|
|
|
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
|
|
|
- printk(KERN_CONT " %s", str);
|
|
|
+ printk(KERN_CONT " %s (__cpu_power = %d)", str,
|
|
|
+ group->__cpu_power);
|
|
|
|
|
|
group = group->next;
|
|
|
} while (group != sd->groups);
|
|
@@ -9925,6 +9991,7 @@ struct cpuacct {
|
|
|
struct cgroup_subsys_state css;
|
|
|
/* cpuusage holds pointer to a u64-type object on every cpu */
|
|
|
u64 *cpuusage;
|
|
|
+ struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
|
|
|
struct cpuacct *parent;
|
|
|
};
|
|
|
|
|
@@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create(
|
|
|
struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
{
|
|
|
struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
|
+ int i;
|
|
|
|
|
|
if (!ca)
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
+ goto out;
|
|
|
|
|
|
ca->cpuusage = alloc_percpu(u64);
|
|
|
- if (!ca->cpuusage) {
|
|
|
- kfree(ca);
|
|
|
- return ERR_PTR(-ENOMEM);
|
|
|
- }
|
|
|
+ if (!ca->cpuusage)
|
|
|
+ goto out_free_ca;
|
|
|
+
|
|
|
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
|
|
+ if (percpu_counter_init(&ca->cpustat[i], 0))
|
|
|
+ goto out_free_counters;
|
|
|
|
|
|
if (cgrp->parent)
|
|
|
ca->parent = cgroup_ca(cgrp->parent);
|
|
|
|
|
|
return &ca->css;
|
|
|
+
|
|
|
+out_free_counters:
|
|
|
+ while (--i >= 0)
|
|
|
+ percpu_counter_destroy(&ca->cpustat[i]);
|
|
|
+ free_percpu(ca->cpuusage);
|
|
|
+out_free_ca:
|
|
|
+ kfree(ca);
|
|
|
+out:
|
|
|
+ return ERR_PTR(-ENOMEM);
|
|
|
}
|
|
|
|
|
|
/* destroy an existing cpu accounting group */
|
|
@@ -9970,7 +10049,10 @@ static void
|
|
|
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
{
|
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
|
+ int i;
|
|
|
|
|
|
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
|
|
+ percpu_counter_destroy(&ca->cpustat[i]);
|
|
|
free_percpu(ca->cpuusage);
|
|
|
kfree(ca);
|
|
|
}
|
|
@@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static const char *cpuacct_stat_desc[] = {
|
|
|
+ [CPUACCT_STAT_USER] = "user",
|
|
|
+ [CPUACCT_STAT_SYSTEM] = "system",
|
|
|
+};
|
|
|
+
|
|
|
+static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
|
|
+ struct cgroup_map_cb *cb)
|
|
|
+{
|
|
|
+ struct cpuacct *ca = cgroup_ca(cgrp);
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
|
|
|
+ s64 val = percpu_counter_read(&ca->cpustat[i]);
|
|
|
+ val = cputime64_to_clock_t(val);
|
|
|
+ cb->fill(cb, cpuacct_stat_desc[i], val);
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static struct cftype files[] = {
|
|
|
{
|
|
|
.name = "usage",
|
|
@@ -10067,7 +10168,10 @@ static struct cftype files[] = {
|
|
|
.name = "usage_percpu",
|
|
|
.read_seq_string = cpuacct_percpu_seq_read,
|
|
|
},
|
|
|
-
|
|
|
+ {
|
|
|
+ .name = "stat",
|
|
|
+ .read_map = cpuacct_stats_show,
|
|
|
+ },
|
|
|
};
|
|
|
|
|
|
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
@@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
|
|
return;
|
|
|
|
|
|
cpu = task_cpu(tsk);
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+
|
|
|
ca = task_ca(tsk);
|
|
|
|
|
|
for (; ca; ca = ca->parent) {
|
|
|
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
|
|
*cpuusage += cputime;
|
|
|
}
|
|
|
+
|
|
|
+ rcu_read_unlock();
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Charge the system/user time to the task's accounting group.
|
|
|
+ */
|
|
|
+static void cpuacct_update_stats(struct task_struct *tsk,
|
|
|
+ enum cpuacct_stat_index idx, cputime_t val)
|
|
|
+{
|
|
|
+ struct cpuacct *ca;
|
|
|
+
|
|
|
+ if (unlikely(!cpuacct_subsys.active))
|
|
|
+ return;
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ ca = task_ca(tsk);
|
|
|
+
|
|
|
+ do {
|
|
|
+ percpu_counter_add(&ca->cpustat[idx], val);
|
|
|
+ ca = ca->parent;
|
|
|
+ } while (ca);
|
|
|
+ rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
struct cgroup_subsys cpuacct_subsys = {
|