|
@@ -2556,6 +2556,42 @@ unsigned long long task_sched_runtime(struct task_struct *p)
|
|
|
return ns;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
+struct cgroup_subsys cpuacct_subsys;
|
|
|
+struct cpuacct root_cpuacct;
|
|
|
+#endif
|
|
|
+
|
|
|
+static inline void task_group_account_field(struct task_struct *p,
|
|
|
+ u64 tmp, int index)
|
|
|
+{
|
|
|
+#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
+ struct kernel_cpustat *kcpustat;
|
|
|
+ struct cpuacct *ca;
|
|
|
+#endif
|
|
|
+ /*
|
|
|
+ * Since all updates are sure to touch the root cgroup, we
|
|
|
+ * get ourselves ahead and touch it first. If the root cgroup
|
|
|
+ * is the only cgroup, then nothing else should be necessary.
|
|
|
+ *
|
|
|
+ */
|
|
|
+ __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
|
|
|
+
|
|
|
+#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
+ if (unlikely(!cpuacct_subsys.active))
|
|
|
+ return;
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
+ ca = task_ca(p);
|
|
|
+ while (ca && (ca != &root_cpuacct)) {
|
|
|
+ kcpustat = this_cpu_ptr(ca->cpustat);
|
|
|
+ kcpustat->cpustat[index] += tmp;
|
|
|
+ ca = parent_ca(ca);
|
|
|
+ }
|
|
|
+ rcu_read_unlock();
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
/*
|
|
|
* Account user cpu time to a process.
|
|
|
* @p: the process that the cpu time gets accounted to
|
|
@@ -2580,7 +2616,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
|
|
|
index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
|
|
|
cpustat[index] += tmp;
|
|
|
|
|
|
- cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
|
|
|
+ task_group_account_field(p, index, cputime);
|
|
|
/* Account for user time used */
|
|
|
acct_update_integrals(p);
|
|
|
}
|
|
@@ -2636,7 +2672,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
|
|
|
|
|
|
/* Add system time to cpustat. */
|
|
|
cpustat[index] += tmp;
|
|
|
- cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
|
|
|
+ task_group_account_field(p, index, cputime);
|
|
|
|
|
|
/* Account for system time used */
|
|
|
acct_update_integrals(p);
|
|
@@ -6781,8 +6817,15 @@ void __init sched_init(void)
|
|
|
INIT_LIST_HEAD(&root_task_group.children);
|
|
|
INIT_LIST_HEAD(&root_task_group.siblings);
|
|
|
autogroup_init(&init_task);
|
|
|
+
|
|
|
#endif /* CONFIG_CGROUP_SCHED */
|
|
|
|
|
|
+#ifdef CONFIG_CGROUP_CPUACCT
|
|
|
+ root_cpuacct.cpustat = &kernel_cpustat;
|
|
|
+ root_cpuacct.cpuusage = alloc_percpu(u64);
|
|
|
+ /* Too early, not expected to fail */
|
|
|
+ BUG_ON(!root_cpuacct.cpuusage);
|
|
|
+#endif
|
|
|
for_each_possible_cpu(i) {
|
|
|
struct rq *rq;
|
|
|
|
|
@@ -7843,44 +7886,16 @@ struct cgroup_subsys cpu_cgroup_subsys = {
|
|
|
* (balbir@in.ibm.com).
|
|
|
*/
|
|
|
|
|
|
-/* track cpu usage of a group of tasks and its child groups */
|
|
|
-struct cpuacct {
|
|
|
- struct cgroup_subsys_state css;
|
|
|
- /* cpuusage holds pointer to a u64-type object on every cpu */
|
|
|
- u64 __percpu *cpuusage;
|
|
|
- struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
|
|
|
-};
|
|
|
-
|
|
|
-struct cgroup_subsys cpuacct_subsys;
|
|
|
-
|
|
|
-/* return cpu accounting group corresponding to this container */
|
|
|
-static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
|
|
|
-{
|
|
|
- return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
|
|
|
- struct cpuacct, css);
|
|
|
-}
|
|
|
-
|
|
|
-/* return cpu accounting group to which this task belongs */
|
|
|
-static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
|
|
-{
|
|
|
- return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
|
|
|
- struct cpuacct, css);
|
|
|
-}
|
|
|
-
|
|
|
-static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
|
|
-{
|
|
|
- if (!ca || !ca->css.cgroup->parent)
|
|
|
- return NULL;
|
|
|
- return cgroup_ca(ca->css.cgroup->parent);
|
|
|
-}
|
|
|
-
|
|
|
/* create a new cpu accounting group */
|
|
|
static struct cgroup_subsys_state *cpuacct_create(
|
|
|
struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
{
|
|
|
- struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
|
- int i;
|
|
|
+ struct cpuacct *ca;
|
|
|
|
|
|
+ if (!cgrp->parent)
|
|
|
+ return &root_cpuacct.css;
|
|
|
+
|
|
|
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
|
|
if (!ca)
|
|
|
goto out;
|
|
|
|
|
@@ -7888,15 +7903,13 @@ static struct cgroup_subsys_state *cpuacct_create(
|
|
|
if (!ca->cpuusage)
|
|
|
goto out_free_ca;
|
|
|
|
|
|
- for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
|
|
- if (percpu_counter_init(&ca->cpustat[i], 0))
|
|
|
- goto out_free_counters;
|
|
|
+ ca->cpustat = alloc_percpu(struct kernel_cpustat);
|
|
|
+ if (!ca->cpustat)
|
|
|
+ goto out_free_cpuusage;
|
|
|
|
|
|
return &ca->css;
|
|
|
|
|
|
-out_free_counters:
|
|
|
- while (--i >= 0)
|
|
|
- percpu_counter_destroy(&ca->cpustat[i]);
|
|
|
+out_free_cpuusage:
|
|
|
free_percpu(ca->cpuusage);
|
|
|
out_free_ca:
|
|
|
kfree(ca);
|
|
@@ -7909,10 +7922,8 @@ static void
|
|
|
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
|
|
{
|
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
|
- int i;
|
|
|
|
|
|
- for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
|
|
|
- percpu_counter_destroy(&ca->cpustat[i]);
|
|
|
+ free_percpu(ca->cpustat);
|
|
|
free_percpu(ca->cpuusage);
|
|
|
kfree(ca);
|
|
|
}
|
|
@@ -8005,16 +8016,31 @@ static const char *cpuacct_stat_desc[] = {
|
|
|
};
|
|
|
|
|
|
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
|
|
- struct cgroup_map_cb *cb)
|
|
|
+ struct cgroup_map_cb *cb)
|
|
|
{
|
|
|
struct cpuacct *ca = cgroup_ca(cgrp);
|
|
|
- int i;
|
|
|
+ int cpu;
|
|
|
+ s64 val = 0;
|
|
|
|
|
|
- for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
|
|
|
- s64 val = percpu_counter_read(&ca->cpustat[i]);
|
|
|
- val = cputime64_to_clock_t(val);
|
|
|
- cb->fill(cb, cpuacct_stat_desc[i], val);
|
|
|
+ for_each_online_cpu(cpu) {
|
|
|
+ struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
|
|
+ val += kcpustat->cpustat[CPUTIME_USER];
|
|
|
+ val += kcpustat->cpustat[CPUTIME_NICE];
|
|
|
}
|
|
|
+ val = cputime64_to_clock_t(val);
|
|
|
+ cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
|
|
+
|
|
|
+ val = 0;
|
|
|
+ for_each_online_cpu(cpu) {
|
|
|
+ struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
|
|
+ val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
|
|
+ val += kcpustat->cpustat[CPUTIME_IRQ];
|
|
|
+ val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
|
|
|
+ }
|
|
|
+
|
|
|
+ val = cputime64_to_clock_t(val);
|
|
|
+ cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -8066,45 +8092,6 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
|
|
|
- * in cputime_t units. As a result, cpuacct_update_stats calls
|
|
|
- * percpu_counter_add with values large enough to always overflow the
|
|
|
- * per cpu batch limit causing bad SMP scalability.
|
|
|
- *
|
|
|
- * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
|
|
|
- * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
|
|
|
- * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
|
|
|
- */
|
|
|
-#ifdef CONFIG_SMP
|
|
|
-#define CPUACCT_BATCH \
|
|
|
- min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
|
|
|
-#else
|
|
|
-#define CPUACCT_BATCH 0
|
|
|
-#endif
|
|
|
-
|
|
|
-/*
|
|
|
- * Charge the system/user time to the task's accounting group.
|
|
|
- */
|
|
|
-void cpuacct_update_stats(struct task_struct *tsk,
|
|
|
- enum cpuacct_stat_index idx, cputime_t val)
|
|
|
-{
|
|
|
- struct cpuacct *ca;
|
|
|
- int batch = CPUACCT_BATCH;
|
|
|
-
|
|
|
- if (unlikely(!cpuacct_subsys.active))
|
|
|
- return;
|
|
|
-
|
|
|
- rcu_read_lock();
|
|
|
- ca = task_ca(tsk);
|
|
|
-
|
|
|
- do {
|
|
|
- __percpu_counter_add(&ca->cpustat[idx], val, batch);
|
|
|
- ca = parent_ca(ca);
|
|
|
- } while (ca);
|
|
|
- rcu_read_unlock();
|
|
|
-}
|
|
|
-
|
|
|
struct cgroup_subsys cpuacct_subsys = {
|
|
|
.name = "cpuacct",
|
|
|
.create = cpuacct_create,
|