|
@@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
|
|
|
|
|
|
list_del_init(&sibling->list_entry);
|
|
|
list_add_tail(&sibling->list_entry, &ctx->counter_list);
|
|
|
- WARN_ON_ONCE(!sibling->group_leader);
|
|
|
- WARN_ON_ONCE(sibling->group_leader == sibling);
|
|
|
sibling->group_leader = sibling;
|
|
|
}
|
|
|
}
|
|
@@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info)
|
|
|
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
|
|
struct perf_counter *counter = info;
|
|
|
struct perf_counter_context *ctx = counter->ctx;
|
|
|
+ unsigned long flags;
|
|
|
u64 perf_flags;
|
|
|
|
|
|
/*
|
|
@@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info)
|
|
|
if (ctx->task && cpuctx->task_ctx != ctx)
|
|
|
return;
|
|
|
|
|
|
- spin_lock(&ctx->lock);
|
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
|
|
|
|
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
|
|
|
counter->hw_ops->hw_perf_counter_disable(counter);
|
|
@@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info)
|
|
|
perf_max_counters - perf_reserved_percpu);
|
|
|
}
|
|
|
|
|
|
- spin_unlock(&ctx->lock);
|
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
|
}
|
|
|
|
|
|
|
|
@@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info)
|
|
|
struct perf_counter *counter = info;
|
|
|
struct perf_counter_context *ctx = counter->ctx;
|
|
|
int cpu = smp_processor_id();
|
|
|
+ unsigned long flags;
|
|
|
u64 perf_flags;
|
|
|
|
|
|
/*
|
|
@@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info)
|
|
|
if (ctx->task && cpuctx->task_ctx != ctx)
|
|
|
return;
|
|
|
|
|
|
- spin_lock(&ctx->lock);
|
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
|
|
|
|
/*
|
|
|
* Protect the list operation against NMI by disabling the
|
|
@@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info)
|
|
|
if (!ctx->task && cpuctx->max_pertask)
|
|
|
cpuctx->max_pertask--;
|
|
|
|
|
|
- spin_unlock(&ctx->lock);
|
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -446,10 +446,9 @@ int perf_counter_task_disable(void)
|
|
|
*/
|
|
|
perf_flags = hw_perf_save_disable();
|
|
|
|
|
|
- list_for_each_entry(counter, &ctx->counter_list, list_entry) {
|
|
|
- WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE);
|
|
|
+ list_for_each_entry(counter, &ctx->counter_list, list_entry)
|
|
|
counter->state = PERF_COUNTER_STATE_OFF;
|
|
|
- }
|
|
|
+
|
|
|
hw_perf_restore(perf_flags);
|
|
|
|
|
|
spin_unlock(&ctx->lock);
|
|
@@ -525,26 +524,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
|
|
|
perf_counter_task_sched_in(curr, cpu);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * Initialize the perf_counter context in a task_struct:
|
|
|
- */
|
|
|
-static void
|
|
|
-__perf_counter_init_context(struct perf_counter_context *ctx,
|
|
|
- struct task_struct *task)
|
|
|
-{
|
|
|
- spin_lock_init(&ctx->lock);
|
|
|
- INIT_LIST_HEAD(&ctx->counter_list);
|
|
|
- ctx->nr_counters = 0;
|
|
|
- ctx->task = task;
|
|
|
-}
|
|
|
-/*
|
|
|
- * Initialize the perf_counter context in task_struct
|
|
|
- */
|
|
|
-void perf_counter_init_task(struct task_struct *task)
|
|
|
-{
|
|
|
- __perf_counter_init_context(&task->perf_counter_ctx, task);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Cross CPU call to read the hardware counter
|
|
|
*/
|
|
@@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
|
|
|
cpuctx = &per_cpu(perf_cpu_context, cpu);
|
|
|
ctx = &cpuctx->ctx;
|
|
|
|
|
|
- WARN_ON_ONCE(ctx->task);
|
|
|
return ctx;
|
|
|
}
|
|
|
|
|
@@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter)
|
|
|
static struct perf_counter *
|
|
|
perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
|
|
int cpu,
|
|
|
- struct perf_counter *group_leader)
|
|
|
+ struct perf_counter *group_leader,
|
|
|
+ gfp_t gfpflags)
|
|
|
{
|
|
|
const struct hw_perf_counter_ops *hw_ops;
|
|
|
struct perf_counter *counter;
|
|
|
|
|
|
- counter = kzalloc(sizeof(*counter), GFP_KERNEL);
|
|
|
+ counter = kzalloc(sizeof(*counter), gfpflags);
|
|
|
if (!counter)
|
|
|
return NULL;
|
|
|
|
|
@@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
|
|
|
hw_ops = NULL;
|
|
|
if (!hw_event->raw && hw_event->type < 0)
|
|
|
hw_ops = sw_perf_counter_init(counter);
|
|
|
- if (!hw_ops) {
|
|
|
+ if (!hw_ops)
|
|
|
hw_ops = hw_perf_counter_init(counter);
|
|
|
- }
|
|
|
|
|
|
if (!hw_ops) {
|
|
|
kfree(counter);
|
|
@@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
|
|
|
struct perf_counter *counter, *group_leader;
|
|
|
struct perf_counter_hw_event hw_event;
|
|
|
struct perf_counter_context *ctx;
|
|
|
+ struct file *counter_file = NULL;
|
|
|
struct file *group_file = NULL;
|
|
|
int fput_needed = 0;
|
|
|
+ int fput_needed2 = 0;
|
|
|
int ret;
|
|
|
|
|
|
if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
|
|
@@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
|
|
|
}
|
|
|
|
|
|
ret = -EINVAL;
|
|
|
- counter = perf_counter_alloc(&hw_event, cpu, group_leader);
|
|
|
+ counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
|
|
|
if (!counter)
|
|
|
goto err_put_context;
|
|
|
|
|
|
- perf_install_in_context(ctx, counter, cpu);
|
|
|
-
|
|
|
ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
|
|
|
if (ret < 0)
|
|
|
- goto err_remove_free_put_context;
|
|
|
+ goto err_free_put_context;
|
|
|
+
|
|
|
+ counter_file = fget_light(ret, &fput_needed2);
|
|
|
+ if (!counter_file)
|
|
|
+ goto err_free_put_context;
|
|
|
+
|
|
|
+ counter->filp = counter_file;
|
|
|
+ perf_install_in_context(ctx, counter, cpu);
|
|
|
+
|
|
|
+ fput_light(counter_file, fput_needed2);
|
|
|
|
|
|
out_fput:
|
|
|
fput_light(group_file, fput_needed);
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
-err_remove_free_put_context:
|
|
|
- mutex_lock(&counter->mutex);
|
|
|
- perf_counter_remove_from_context(counter);
|
|
|
- mutex_unlock(&counter->mutex);
|
|
|
+err_free_put_context:
|
|
|
kfree(counter);
|
|
|
|
|
|
err_put_context:
|
|
@@ -1044,6 +1028,186 @@ err_put_context:
|
|
|
goto out_fput;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Initialize the perf_counter context in a task_struct:
|
|
|
+ */
|
|
|
+static void
|
|
|
+__perf_counter_init_context(struct perf_counter_context *ctx,
|
|
|
+ struct task_struct *task)
|
|
|
+{
|
|
|
+ memset(ctx, 0, sizeof(*ctx));
|
|
|
+ spin_lock_init(&ctx->lock);
|
|
|
+ INIT_LIST_HEAD(&ctx->counter_list);
|
|
|
+ ctx->task = task;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * inherit a counter from parent task to child task:
|
|
|
+ */
|
|
|
+static int
|
|
|
+inherit_counter(struct perf_counter *parent_counter,
|
|
|
+ struct task_struct *parent,
|
|
|
+ struct perf_counter_context *parent_ctx,
|
|
|
+ struct task_struct *child,
|
|
|
+ struct perf_counter_context *child_ctx)
|
|
|
+{
|
|
|
+ struct perf_counter *child_counter;
|
|
|
+
|
|
|
+ child_counter = perf_counter_alloc(&parent_counter->hw_event,
|
|
|
+ parent_counter->cpu, NULL,
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (!child_counter)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Link it up in the child's context:
|
|
|
+ */
|
|
|
+ child_counter->ctx = child_ctx;
|
|
|
+ child_counter->task = child;
|
|
|
+ list_add_counter(child_counter, child_ctx);
|
|
|
+ child_ctx->nr_counters++;
|
|
|
+
|
|
|
+ child_counter->parent = parent_counter;
|
|
|
+ parent_counter->nr_inherited++;
|
|
|
+ /*
|
|
|
+ * inherit into child's child as well:
|
|
|
+ */
|
|
|
+ child_counter->hw_event.inherit = 1;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Get a reference to the parent filp - we will fput it
|
|
|
+ * when the child counter exits. This is safe to do because
|
|
|
+ * we are in the parent and we know that the filp still
|
|
|
+ * exists and has a nonzero count:
|
|
|
+ */
|
|
|
+ atomic_long_inc(&parent_counter->filp->f_count);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+__perf_counter_exit_task(struct task_struct *child,
|
|
|
+ struct perf_counter *child_counter,
|
|
|
+ struct perf_counter_context *child_ctx)
|
|
|
+{
|
|
|
+ struct perf_counter *parent_counter;
|
|
|
+ u64 parent_val, child_val;
|
|
|
+ u64 perf_flags;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Disable and unlink this counter.
|
|
|
+ *
|
|
|
+ * Be careful about zapping the list - IRQ/NMI context
|
|
|
+ * could still be processing it:
|
|
|
+ */
|
|
|
+ local_irq_disable();
|
|
|
+ perf_flags = hw_perf_save_disable();
|
|
|
+
|
|
|
+ if (child_counter->state == PERF_COUNTER_STATE_ACTIVE)
|
|
|
+ child_counter->hw_ops->hw_perf_counter_disable(child_counter);
|
|
|
+ list_del_init(&child_counter->list_entry);
|
|
|
+
|
|
|
+ hw_perf_restore(perf_flags);
|
|
|
+ local_irq_enable();
|
|
|
+
|
|
|
+ parent_counter = child_counter->parent;
|
|
|
+ /*
|
|
|
+ * It can happen that parent exits first, and has counters
|
|
|
+ * that are still around due to the child reference. These
|
|
|
+ * counters need to be zapped - but otherwise linger.
|
|
|
+ */
|
|
|
+ if (!parent_counter)
|
|
|
+ return;
|
|
|
+
|
|
|
+ parent_val = atomic64_read(&parent_counter->count);
|
|
|
+ child_val = atomic64_read(&child_counter->count);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Add back the child's count to the parent's count:
|
|
|
+ */
|
|
|
+ atomic64_add(child_val, &parent_counter->count);
|
|
|
+
|
|
|
+ fput(parent_counter->filp);
|
|
|
+
|
|
|
+ kfree(child_counter);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * When a child task exist, feed back counter values to parent counters.
|
|
|
+ *
|
|
|
+ * Note: we are running in child context, but the PID is not hashed
|
|
|
+ * anymore so new counters will not be added.
|
|
|
+ */
|
|
|
+void perf_counter_exit_task(struct task_struct *child)
|
|
|
+{
|
|
|
+ struct perf_counter *child_counter, *tmp;
|
|
|
+ struct perf_counter_context *child_ctx;
|
|
|
+
|
|
|
+ child_ctx = &child->perf_counter_ctx;
|
|
|
+
|
|
|
+ if (likely(!child_ctx->nr_counters))
|
|
|
+ return;
|
|
|
+
|
|
|
+ list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
|
|
|
+ list_entry)
|
|
|
+ __perf_counter_exit_task(child, child_counter, child_ctx);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Initialize the perf_counter context in task_struct
|
|
|
+ */
|
|
|
+void perf_counter_init_task(struct task_struct *child)
|
|
|
+{
|
|
|
+ struct perf_counter_context *child_ctx, *parent_ctx;
|
|
|
+ struct perf_counter *counter, *parent_counter;
|
|
|
+ struct task_struct *parent = current;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ child_ctx = &child->perf_counter_ctx;
|
|
|
+ parent_ctx = &parent->perf_counter_ctx;
|
|
|
+
|
|
|
+ __perf_counter_init_context(child_ctx, child);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This is executed from the parent task context, so inherit
|
|
|
+ * counters that have been marked for cloning:
|
|
|
+ */
|
|
|
+
|
|
|
+ if (likely(!parent_ctx->nr_counters))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Lock the parent list. No need to lock the child - not PID
|
|
|
+ * hashed yet and not running, so nobody can access it.
|
|
|
+ */
|
|
|
+ spin_lock_irqsave(&parent_ctx->lock, flags);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We dont have to disable NMIs - we are only looking at
|
|
|
+ * the list, not manipulating it:
|
|
|
+ */
|
|
|
+ list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
|
|
|
+ if (!counter->hw_event.inherit || counter->group_leader != counter)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Instead of creating recursive hierarchies of counters,
|
|
|
+ * we link inheritd counters back to the original parent,
|
|
|
+ * which has a filp for sure, which we use as the reference
|
|
|
+ * count:
|
|
|
+ */
|
|
|
+ parent_counter = counter;
|
|
|
+ if (counter->parent)
|
|
|
+ parent_counter = counter->parent;
|
|
|
+
|
|
|
+ if (inherit_counter(parent_counter, parent,
|
|
|
+ parent_ctx, child, child_ctx))
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock_irqrestore(&parent_ctx->lock, flags);
|
|
|
+}
|
|
|
+
|
|
|
static void __cpuinit perf_counter_init_cpu(int cpu)
|
|
|
{
|
|
|
struct perf_cpu_context *cpuctx;
|