|
@@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
|
|
|
spin_lock(&ctx->lock);
|
|
|
|
|
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
|
|
- counter->hw_ops->disable(counter);
|
|
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
+ counter->hw_ops->disable(counter);
|
|
|
ctx->nr_active--;
|
|
|
cpuctx->active_oncpu--;
|
|
|
counter->task = NULL;
|
|
|
+ counter->oncpu = -1;
|
|
|
}
|
|
|
ctx->nr_counters--;
|
|
|
|
|
@@ -192,8 +193,36 @@ retry:
|
|
|
spin_unlock_irq(&ctx->lock);
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+counter_sched_in(struct perf_counter *counter,
|
|
|
+ struct perf_cpu_context *cpuctx,
|
|
|
+ struct perf_counter_context *ctx,
|
|
|
+ int cpu)
|
|
|
+{
|
|
|
+ if (counter->state == PERF_COUNTER_STATE_OFF)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ counter->state = PERF_COUNTER_STATE_ACTIVE;
|
|
|
+ counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
|
|
|
+ /*
|
|
|
+ * The new state must be visible before we turn it on in the hardware:
|
|
|
+ */
|
|
|
+ smp_wmb();
|
|
|
+
|
|
|
+ if (counter->hw_ops->enable(counter)) {
|
|
|
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
+ counter->oncpu = -1;
|
|
|
+ return -EAGAIN;
|
|
|
+ }
|
|
|
+
|
|
|
+ cpuctx->active_oncpu++;
|
|
|
+ ctx->nr_active++;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
- * Cross CPU call to install and enable a preformance counter
|
|
|
+ * Cross CPU call to install and enable a performance counter
|
|
|
*/
|
|
|
static void __perf_install_in_context(void *info)
|
|
|
{
|
|
@@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
|
|
|
* counters on a global level. NOP for non NMI based counters.
|
|
|
*/
|
|
|
perf_flags = hw_perf_save_disable();
|
|
|
- list_add_counter(counter, ctx);
|
|
|
- hw_perf_restore(perf_flags);
|
|
|
|
|
|
+ list_add_counter(counter, ctx);
|
|
|
ctx->nr_counters++;
|
|
|
|
|
|
- if (cpuctx->active_oncpu < perf_max_counters) {
|
|
|
- counter->state = PERF_COUNTER_STATE_ACTIVE;
|
|
|
- counter->oncpu = cpu;
|
|
|
- ctx->nr_active++;
|
|
|
- cpuctx->active_oncpu++;
|
|
|
- counter->hw_ops->enable(counter);
|
|
|
- }
|
|
|
+ counter_sched_in(counter, cpuctx, ctx, cpu);
|
|
|
|
|
|
if (!ctx->task && cpuctx->max_pertask)
|
|
|
cpuctx->max_pertask--;
|
|
|
|
|
|
+ hw_perf_restore(perf_flags);
|
|
|
+
|
|
|
spin_unlock(&ctx->lock);
|
|
|
curr_rq_unlock_irq_restore(&flags);
|
|
|
}
|
|
@@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
|
|
|
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
|
|
|
return;
|
|
|
|
|
|
- counter->hw_ops->disable(counter);
|
|
|
counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
+ counter->hw_ops->disable(counter);
|
|
|
counter->oncpu = -1;
|
|
|
|
|
|
cpuctx->active_oncpu--;
|
|
@@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
|
|
|
counter_sched_out(counter, cpuctx, ctx);
|
|
|
}
|
|
|
|
|
|
+void __perf_counter_sched_out(struct perf_counter_context *ctx,
|
|
|
+ struct perf_cpu_context *cpuctx)
|
|
|
+{
|
|
|
+ struct perf_counter *counter;
|
|
|
+
|
|
|
+ if (likely(!ctx->nr_counters))
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock(&ctx->lock);
|
|
|
+ if (ctx->nr_active) {
|
|
|
+ list_for_each_entry(counter, &ctx->counter_list, list_entry)
|
|
|
+ group_sched_out(counter, cpuctx, ctx);
|
|
|
+ }
|
|
|
+ spin_unlock(&ctx->lock);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Called from scheduler to remove the counters of the current task,
|
|
|
* with interrupts disabled.
|
|
@@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
|
|
|
{
|
|
|
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
|
struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
|
|
- struct perf_counter *counter;
|
|
|
|
|
|
if (likely(!cpuctx->task_ctx))
|
|
|
return;
|
|
|
|
|
|
- spin_lock(&ctx->lock);
|
|
|
- if (ctx->nr_active) {
|
|
|
- list_for_each_entry(counter, &ctx->counter_list, list_entry)
|
|
|
- group_sched_out(counter, cpuctx, ctx);
|
|
|
- }
|
|
|
- spin_unlock(&ctx->lock);
|
|
|
+ __perf_counter_sched_out(ctx, cpuctx);
|
|
|
+
|
|
|
cpuctx->task_ctx = NULL;
|
|
|
}
|
|
|
|
|
|
-static int
|
|
|
-counter_sched_in(struct perf_counter *counter,
|
|
|
- struct perf_cpu_context *cpuctx,
|
|
|
- struct perf_counter_context *ctx,
|
|
|
- int cpu)
|
|
|
+static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
|
|
|
{
|
|
|
- if (counter->state == PERF_COUNTER_STATE_OFF)
|
|
|
- return 0;
|
|
|
-
|
|
|
- if (counter->hw_ops->enable(counter))
|
|
|
- return -EAGAIN;
|
|
|
-
|
|
|
- counter->state = PERF_COUNTER_STATE_ACTIVE;
|
|
|
- counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
|
|
|
-
|
|
|
- cpuctx->active_oncpu++;
|
|
|
- ctx->nr_active++;
|
|
|
-
|
|
|
- return 0;
|
|
|
+ __perf_counter_sched_out(&cpuctx->ctx, cpuctx);
|
|
|
}
|
|
|
|
|
|
static int
|
|
@@ -416,21 +435,10 @@ group_error:
|
|
|
return -EAGAIN;
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Called from scheduler to add the counters of the current task
|
|
|
- * with interrupts disabled.
|
|
|
- *
|
|
|
- * We restore the counter value and then enable it.
|
|
|
- *
|
|
|
- * This does not protect us against NMI, but enable()
|
|
|
- * sets the enabled bit in the control field of counter _before_
|
|
|
- * accessing the counter control register. If a NMI hits, then it will
|
|
|
- * keep the counter running.
|
|
|
- */
|
|
|
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
|
|
+static void
|
|
|
+__perf_counter_sched_in(struct perf_counter_context *ctx,
|
|
|
+ struct perf_cpu_context *cpuctx, int cpu)
|
|
|
{
|
|
|
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
|
- struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
|
|
struct perf_counter *counter;
|
|
|
|
|
|
if (likely(!ctx->nr_counters))
|
|
@@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
|
|
break;
|
|
|
}
|
|
|
spin_unlock(&ctx->lock);
|
|
|
+}
|
|
|
|
|
|
+/*
|
|
|
+ * Called from scheduler to add the counters of the current task
|
|
|
+ * with interrupts disabled.
|
|
|
+ *
|
|
|
+ * We restore the counter value and then enable it.
|
|
|
+ *
|
|
|
+ * This does not protect us against NMI, but enable()
|
|
|
+ * sets the enabled bit in the control field of counter _before_
|
|
|
+ * accessing the counter control register. If a NMI hits, then it will
|
|
|
+ * keep the counter running.
|
|
|
+ */
|
|
|
+void perf_counter_task_sched_in(struct task_struct *task, int cpu)
|
|
|
+{
|
|
|
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
|
+ struct perf_counter_context *ctx = &task->perf_counter_ctx;
|
|
|
+
|
|
|
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
|
|
|
cpuctx->task_ctx = ctx;
|
|
|
}
|
|
|
|
|
|
+static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
|
|
|
+{
|
|
|
+ struct perf_counter_context *ctx = &cpuctx->ctx;
|
|
|
+
|
|
|
+ __perf_counter_sched_in(ctx, cpuctx, cpu);
|
|
|
+}
|
|
|
+
|
|
|
int perf_counter_task_disable(void)
|
|
|
{
|
|
|
struct task_struct *curr = current;
|
|
@@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
|
|
|
/* force the update of the task clock: */
|
|
|
__task_delta_exec(curr, 1);
|
|
|
|
|
|
+ perf_counter_task_sched_out(curr, cpu);
|
|
|
+
|
|
|
spin_lock(&ctx->lock);
|
|
|
|
|
|
/*
|
|
@@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
|
|
+/*
|
|
|
+ * Round-robin a context's counters:
|
|
|
+ */
|
|
|
+static void rotate_ctx(struct perf_counter_context *ctx)
|
|
|
{
|
|
|
- struct perf_counter_context *ctx = &curr->perf_counter_ctx;
|
|
|
struct perf_counter *counter;
|
|
|
u64 perf_flags;
|
|
|
|
|
|
- if (likely(!ctx->nr_counters))
|
|
|
+ if (!ctx->nr_counters)
|
|
|
return;
|
|
|
|
|
|
- perf_counter_task_sched_out(curr, cpu);
|
|
|
-
|
|
|
spin_lock(&ctx->lock);
|
|
|
-
|
|
|
/*
|
|
|
* Rotate the first entry last (works just fine for group counters too):
|
|
|
*/
|
|
@@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
|
|
hw_perf_restore(perf_flags);
|
|
|
|
|
|
spin_unlock(&ctx->lock);
|
|
|
+}
|
|
|
+
|
|
|
+void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
|
|
+{
|
|
|
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
|
+ struct perf_counter_context *ctx = &curr->perf_counter_ctx;
|
|
|
+ const int rotate_percpu = 0;
|
|
|
+
|
|
|
+ if (rotate_percpu)
|
|
|
+ perf_counter_cpu_sched_out(cpuctx);
|
|
|
+ perf_counter_task_sched_out(curr, cpu);
|
|
|
|
|
|
+ if (rotate_percpu)
|
|
|
+ rotate_ctx(&cpuctx->ctx);
|
|
|
+ rotate_ctx(ctx);
|
|
|
+
|
|
|
+ if (rotate_percpu)
|
|
|
+ perf_counter_cpu_sched_in(cpuctx, cpu);
|
|
|
perf_counter_task_sched_in(curr, cpu);
|
|
|
}
|
|
|
|
|
@@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
|
|
|
struct task_struct *curr = counter->task;
|
|
|
u64 delta;
|
|
|
|
|
|
- WARN_ON_ONCE(counter->task != current);
|
|
|
-
|
|
|
delta = __task_delta_exec(curr, update);
|
|
|
|
|
|
return curr->se.sum_exec_runtime + delta;
|
|
@@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
|
|
counter->group_leader = group_leader;
|
|
|
counter->hw_ops = NULL;
|
|
|
|
|
|
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
if (hw_event->disabled)
|
|
|
counter->state = PERF_COUNTER_STATE_OFF;
|
|
|
|
|
@@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child,
|
|
|
{
|
|
|
struct perf_counter *parent_counter;
|
|
|
u64 parent_val, child_val;
|
|
|
- unsigned long flags;
|
|
|
- u64 perf_flags;
|
|
|
|
|
|
/*
|
|
|
- * Disable and unlink this counter.
|
|
|
- *
|
|
|
- * Be careful about zapping the list - IRQ/NMI context
|
|
|
- * could still be processing it:
|
|
|
+ * If we do not self-reap then we have to wait for the
|
|
|
+ * child task to unschedule (it will happen for sure),
|
|
|
+ * so that its counter is at its final count. (This
|
|
|
+ * condition triggers rarely - child tasks usually get
|
|
|
+ * off their CPU before the parent has a chance to
|
|
|
+ * get this far into the reaping action)
|
|
|
*/
|
|
|
- curr_rq_lock_irq_save(&flags);
|
|
|
- perf_flags = hw_perf_save_disable();
|
|
|
-
|
|
|
- if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
|
|
+ if (child != current) {
|
|
|
+ wait_task_inactive(child, 0);
|
|
|
+ list_del_init(&child_counter->list_entry);
|
|
|
+ } else {
|
|
|
struct perf_cpu_context *cpuctx;
|
|
|
+ unsigned long flags;
|
|
|
+ u64 perf_flags;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Disable and unlink this counter.
|
|
|
+ *
|
|
|
+ * Be careful about zapping the list - IRQ/NMI context
|
|
|
+ * could still be processing it:
|
|
|
+ */
|
|
|
+ curr_rq_lock_irq_save(&flags);
|
|
|
+ perf_flags = hw_perf_save_disable();
|
|
|
|
|
|
cpuctx = &__get_cpu_var(perf_cpu_context);
|
|
|
|
|
|
- child_counter->hw_ops->disable(child_counter);
|
|
|
- child_counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
- child_counter->oncpu = -1;
|
|
|
+ if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
|
|
+ child_counter->state = PERF_COUNTER_STATE_INACTIVE;
|
|
|
+ child_counter->hw_ops->disable(child_counter);
|
|
|
+ cpuctx->active_oncpu--;
|
|
|
+ child_ctx->nr_active--;
|
|
|
+ child_counter->oncpu = -1;
|
|
|
+ }
|
|
|
|
|
|
- cpuctx->active_oncpu--;
|
|
|
- child_ctx->nr_active--;
|
|
|
- }
|
|
|
+ list_del_init(&child_counter->list_entry);
|
|
|
|
|
|
- list_del_init(&child_counter->list_entry);
|
|
|
+ child_ctx->nr_counters--;
|
|
|
|
|
|
- hw_perf_restore(perf_flags);
|
|
|
- curr_rq_unlock_irq_restore(&flags);
|
|
|
+ hw_perf_restore(perf_flags);
|
|
|
+ curr_rq_unlock_irq_restore(&flags);
|
|
|
+ }
|
|
|
|
|
|
parent_counter = child_counter->parent;
|
|
|
/*
|