|
@@ -2297,11 +2297,6 @@ unlock:
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
|
|
-static unsigned long perf_data_size(struct perf_mmap_data *data)
|
|
|
|
-{
|
|
|
|
- return data->nr_pages << (PAGE_SHIFT + data->data_order);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
#ifndef CONFIG_PERF_USE_VMALLOC
|
|
#ifndef CONFIG_PERF_USE_VMALLOC
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2320,6 +2315,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
|
|
return virt_to_page(data->data_pages[pgoff - 1]);
|
|
return virt_to_page(data->data_pages[pgoff - 1]);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void *perf_mmap_alloc_page(int cpu)
|
|
|
|
+{
|
|
|
|
+ struct page *page;
|
|
|
|
+ int node;
|
|
|
|
+
|
|
|
|
+ node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
|
|
|
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
|
|
|
+ if (!page)
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ return page_address(page);
|
|
|
|
+}
|
|
|
|
+
|
|
static struct perf_mmap_data *
|
|
static struct perf_mmap_data *
|
|
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
|
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
|
{
|
|
{
|
|
@@ -2336,17 +2344,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
|
if (!data)
|
|
if (!data)
|
|
goto fail;
|
|
goto fail;
|
|
|
|
|
|
- data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
|
|
|
|
|
|
+ data->user_page = perf_mmap_alloc_page(event->cpu);
|
|
if (!data->user_page)
|
|
if (!data->user_page)
|
|
goto fail_user_page;
|
|
goto fail_user_page;
|
|
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
for (i = 0; i < nr_pages; i++) {
|
|
- data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
|
|
|
|
|
|
+ data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
|
|
if (!data->data_pages[i])
|
|
if (!data->data_pages[i])
|
|
goto fail_data_pages;
|
|
goto fail_data_pages;
|
|
}
|
|
}
|
|
|
|
|
|
- data->data_order = 0;
|
|
|
|
data->nr_pages = nr_pages;
|
|
data->nr_pages = nr_pages;
|
|
|
|
|
|
return data;
|
|
return data;
|
|
@@ -2382,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
|
|
kfree(data);
|
|
kfree(data);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static inline int page_order(struct perf_mmap_data *data)
|
|
|
|
+{
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
#else
|
|
#else
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -2390,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
|
|
* Required for architectures that have d-cache aliasing issues.
|
|
* Required for architectures that have d-cache aliasing issues.
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+static inline int page_order(struct perf_mmap_data *data)
|
|
|
|
+{
|
|
|
|
+ return data->page_order;
|
|
|
|
+}
|
|
|
|
+
|
|
static struct page *
|
|
static struct page *
|
|
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
|
|
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
|
|
{
|
|
{
|
|
- if (pgoff > (1UL << data->data_order))
|
|
|
|
|
|
+ if (pgoff > (1UL << page_order(data)))
|
|
return NULL;
|
|
return NULL;
|
|
|
|
|
|
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
|
|
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
|
|
@@ -2413,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
|
|
int i, nr;
|
|
int i, nr;
|
|
|
|
|
|
data = container_of(work, struct perf_mmap_data, work);
|
|
data = container_of(work, struct perf_mmap_data, work);
|
|
- nr = 1 << data->data_order;
|
|
|
|
|
|
+ nr = 1 << page_order(data);
|
|
|
|
|
|
base = data->user_page;
|
|
base = data->user_page;
|
|
for (i = 0; i < nr + 1; i++)
|
|
for (i = 0; i < nr + 1; i++)
|
|
@@ -2452,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
|
|
|
|
|
data->user_page = all_buf;
|
|
data->user_page = all_buf;
|
|
data->data_pages[0] = all_buf + PAGE_SIZE;
|
|
data->data_pages[0] = all_buf + PAGE_SIZE;
|
|
- data->data_order = ilog2(nr_pages);
|
|
|
|
|
|
+ data->page_order = ilog2(nr_pages);
|
|
data->nr_pages = 1;
|
|
data->nr_pages = 1;
|
|
|
|
|
|
return data;
|
|
return data;
|
|
@@ -2466,6 +2483,11 @@ fail:
|
|
|
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+static unsigned long perf_data_size(struct perf_mmap_data *data)
|
|
|
|
+{
|
|
|
|
+ return data->nr_pages << (PAGE_SHIFT + page_order(data));
|
|
|
|
+}
|
|
|
|
+
|
|
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
{
|
|
{
|
|
struct perf_event *event = vma->vm_file->private_data;
|
|
struct perf_event *event = vma->vm_file->private_data;
|
|
@@ -2506,8 +2528,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
|
|
{
|
|
{
|
|
long max_size = perf_data_size(data);
|
|
long max_size = perf_data_size(data);
|
|
|
|
|
|
- atomic_set(&data->lock, -1);
|
|
|
|
-
|
|
|
|
if (event->attr.watermark) {
|
|
if (event->attr.watermark) {
|
|
data->watermark = min_t(long, max_size,
|
|
data->watermark = min_t(long, max_size,
|
|
event->attr.wakeup_watermark);
|
|
event->attr.wakeup_watermark);
|
|
@@ -2580,6 +2600,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
|
long user_extra, extra;
|
|
long user_extra, extra;
|
|
int ret = 0;
|
|
int ret = 0;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Don't allow mmap() of inherited per-task counters. This would
|
|
|
|
+ * create a performance issue due to all children writing to the
|
|
|
|
+ * same buffer.
|
|
|
|
+ */
|
|
|
|
+ if (event->cpu == -1 && event->attr.inherit)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
if (!(vma->vm_flags & VM_SHARED))
|
|
if (!(vma->vm_flags & VM_SHARED))
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
@@ -2885,120 +2913,80 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Curious locking construct.
|
|
|
|
- *
|
|
|
|
* We need to ensure a later event_id doesn't publish a head when a former
|
|
* We need to ensure a later event_id doesn't publish a head when a former
|
|
- * event_id isn't done writing. However since we need to deal with NMIs we
|
|
|
|
|
|
+ * event isn't done writing. However since we need to deal with NMIs we
|
|
* cannot fully serialize things.
|
|
* cannot fully serialize things.
|
|
*
|
|
*
|
|
- * What we do is serialize between CPUs so we only have to deal with NMI
|
|
|
|
- * nesting on a single CPU.
|
|
|
|
- *
|
|
|
|
* We only publish the head (and generate a wakeup) when the outer-most
|
|
* We only publish the head (and generate a wakeup) when the outer-most
|
|
- * event_id completes.
|
|
|
|
|
|
+ * event completes.
|
|
*/
|
|
*/
|
|
-static void perf_output_lock(struct perf_output_handle *handle)
|
|
|
|
|
|
+static void perf_output_get_handle(struct perf_output_handle *handle)
|
|
{
|
|
{
|
|
struct perf_mmap_data *data = handle->data;
|
|
struct perf_mmap_data *data = handle->data;
|
|
- int cur, cpu = get_cpu();
|
|
|
|
-
|
|
|
|
- handle->locked = 0;
|
|
|
|
-
|
|
|
|
- for (;;) {
|
|
|
|
- cur = atomic_cmpxchg(&data->lock, -1, cpu);
|
|
|
|
- if (cur == -1) {
|
|
|
|
- handle->locked = 1;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- if (cur == cpu)
|
|
|
|
- break;
|
|
|
|
|
|
|
|
- cpu_relax();
|
|
|
|
- }
|
|
|
|
|
|
+ preempt_disable();
|
|
|
|
+ local_inc(&data->nest);
|
|
|
|
+ handle->wakeup = local_read(&data->wakeup);
|
|
}
|
|
}
|
|
|
|
|
|
-static void perf_output_unlock(struct perf_output_handle *handle)
|
|
|
|
|
|
+static void perf_output_put_handle(struct perf_output_handle *handle)
|
|
{
|
|
{
|
|
struct perf_mmap_data *data = handle->data;
|
|
struct perf_mmap_data *data = handle->data;
|
|
unsigned long head;
|
|
unsigned long head;
|
|
- int cpu;
|
|
|
|
-
|
|
|
|
- data->done_head = data->head;
|
|
|
|
-
|
|
|
|
- if (!handle->locked)
|
|
|
|
- goto out;
|
|
|
|
|
|
|
|
again:
|
|
again:
|
|
- /*
|
|
|
|
- * The xchg implies a full barrier that ensures all writes are done
|
|
|
|
- * before we publish the new head, matched by a rmb() in userspace when
|
|
|
|
- * reading this position.
|
|
|
|
- */
|
|
|
|
- while ((head = atomic_long_xchg(&data->done_head, 0)))
|
|
|
|
- data->user_page->data_head = head;
|
|
|
|
|
|
+ head = local_read(&data->head);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * NMI can happen here, which means we can miss a done_head update.
|
|
|
|
|
|
+ * IRQ/NMI can happen here, which means we can miss a head update.
|
|
*/
|
|
*/
|
|
|
|
|
|
- cpu = atomic_xchg(&data->lock, -1);
|
|
|
|
- WARN_ON_ONCE(cpu != smp_processor_id());
|
|
|
|
|
|
+ if (!local_dec_and_test(&data->nest))
|
|
|
|
+ goto out;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Therefore we have to validate we did not indeed do so.
|
|
|
|
|
|
+ * Publish the known good head. Rely on the full barrier implied
|
|
|
|
+ * by atomic_dec_and_test() order the data->head read and this
|
|
|
|
+ * write.
|
|
*/
|
|
*/
|
|
- if (unlikely(atomic_long_read(&data->done_head))) {
|
|
|
|
- /*
|
|
|
|
- * Since we had it locked, we can lock it again.
|
|
|
|
- */
|
|
|
|
- while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
|
|
|
|
- cpu_relax();
|
|
|
|
|
|
+ data->user_page->data_head = head;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Now check if we missed an update, rely on the (compiler)
|
|
|
|
+ * barrier in atomic_dec_and_test() to re-read data->head.
|
|
|
|
+ */
|
|
|
|
+ if (unlikely(head != local_read(&data->head))) {
|
|
|
|
+ local_inc(&data->nest);
|
|
goto again;
|
|
goto again;
|
|
}
|
|
}
|
|
|
|
|
|
- if (atomic_xchg(&data->wakeup, 0))
|
|
|
|
|
|
+ if (handle->wakeup != local_read(&data->wakeup))
|
|
perf_output_wakeup(handle);
|
|
perf_output_wakeup(handle);
|
|
-out:
|
|
|
|
- put_cpu();
|
|
|
|
|
|
+
|
|
|
|
+ out:
|
|
|
|
+ preempt_enable();
|
|
}
|
|
}
|
|
|
|
|
|
-void perf_output_copy(struct perf_output_handle *handle,
|
|
|
|
|
|
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
|
|
const void *buf, unsigned int len)
|
|
const void *buf, unsigned int len)
|
|
{
|
|
{
|
|
- unsigned int pages_mask;
|
|
|
|
- unsigned long offset;
|
|
|
|
- unsigned int size;
|
|
|
|
- void **pages;
|
|
|
|
-
|
|
|
|
- offset = handle->offset;
|
|
|
|
- pages_mask = handle->data->nr_pages - 1;
|
|
|
|
- pages = handle->data->data_pages;
|
|
|
|
-
|
|
|
|
do {
|
|
do {
|
|
- unsigned long page_offset;
|
|
|
|
- unsigned long page_size;
|
|
|
|
- int nr;
|
|
|
|
|
|
+ unsigned long size = min_t(unsigned long, handle->size, len);
|
|
|
|
|
|
- nr = (offset >> PAGE_SHIFT) & pages_mask;
|
|
|
|
- page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
|
|
|
|
- page_offset = offset & (page_size - 1);
|
|
|
|
- size = min_t(unsigned int, page_size - page_offset, len);
|
|
|
|
|
|
+ memcpy(handle->addr, buf, size);
|
|
|
|
|
|
- memcpy(pages[nr] + page_offset, buf, size);
|
|
|
|
|
|
+ len -= size;
|
|
|
|
+ handle->addr += size;
|
|
|
|
+ handle->size -= size;
|
|
|
|
+ if (!handle->size) {
|
|
|
|
+ struct perf_mmap_data *data = handle->data;
|
|
|
|
|
|
- len -= size;
|
|
|
|
- buf += size;
|
|
|
|
- offset += size;
|
|
|
|
|
|
+ handle->page++;
|
|
|
|
+ handle->page &= data->nr_pages - 1;
|
|
|
|
+ handle->addr = data->data_pages[handle->page];
|
|
|
|
+ handle->size = PAGE_SIZE << page_order(data);
|
|
|
|
+ }
|
|
} while (len);
|
|
} while (len);
|
|
-
|
|
|
|
- handle->offset = offset;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * Check we didn't copy past our reservation window, taking the
|
|
|
|
- * possible unsigned int wrap into account.
|
|
|
|
- */
|
|
|
|
- WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
int perf_output_begin(struct perf_output_handle *handle,
|
|
int perf_output_begin(struct perf_output_handle *handle,
|
|
@@ -3036,13 +3024,13 @@ int perf_output_begin(struct perf_output_handle *handle,
|
|
handle->sample = sample;
|
|
handle->sample = sample;
|
|
|
|
|
|
if (!data->nr_pages)
|
|
if (!data->nr_pages)
|
|
- goto fail;
|
|
|
|
|
|
+ goto out;
|
|
|
|
|
|
- have_lost = atomic_read(&data->lost);
|
|
|
|
|
|
+ have_lost = local_read(&data->lost);
|
|
if (have_lost)
|
|
if (have_lost)
|
|
size += sizeof(lost_event);
|
|
size += sizeof(lost_event);
|
|
|
|
|
|
- perf_output_lock(handle);
|
|
|
|
|
|
+ perf_output_get_handle(handle);
|
|
|
|
|
|
do {
|
|
do {
|
|
/*
|
|
/*
|
|
@@ -3052,24 +3040,28 @@ int perf_output_begin(struct perf_output_handle *handle,
|
|
*/
|
|
*/
|
|
tail = ACCESS_ONCE(data->user_page->data_tail);
|
|
tail = ACCESS_ONCE(data->user_page->data_tail);
|
|
smp_rmb();
|
|
smp_rmb();
|
|
- offset = head = atomic_long_read(&data->head);
|
|
|
|
|
|
+ offset = head = local_read(&data->head);
|
|
head += size;
|
|
head += size;
|
|
if (unlikely(!perf_output_space(data, tail, offset, head)))
|
|
if (unlikely(!perf_output_space(data, tail, offset, head)))
|
|
goto fail;
|
|
goto fail;
|
|
- } while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
|
|
|
|
|
|
+ } while (local_cmpxchg(&data->head, offset, head) != offset);
|
|
|
|
|
|
- handle->offset = offset;
|
|
|
|
- handle->head = head;
|
|
|
|
|
|
+ if (head - local_read(&data->wakeup) > data->watermark)
|
|
|
|
+ local_add(data->watermark, &data->wakeup);
|
|
|
|
|
|
- if (head - tail > data->watermark)
|
|
|
|
- atomic_set(&data->wakeup, 1);
|
|
|
|
|
|
+ handle->page = offset >> (PAGE_SHIFT + page_order(data));
|
|
|
|
+ handle->page &= data->nr_pages - 1;
|
|
|
|
+ handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
|
|
|
|
+ handle->addr = data->data_pages[handle->page];
|
|
|
|
+ handle->addr += handle->size;
|
|
|
|
+ handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
|
|
|
|
|
|
if (have_lost) {
|
|
if (have_lost) {
|
|
lost_event.header.type = PERF_RECORD_LOST;
|
|
lost_event.header.type = PERF_RECORD_LOST;
|
|
lost_event.header.misc = 0;
|
|
lost_event.header.misc = 0;
|
|
lost_event.header.size = sizeof(lost_event);
|
|
lost_event.header.size = sizeof(lost_event);
|
|
lost_event.id = event->id;
|
|
lost_event.id = event->id;
|
|
- lost_event.lost = atomic_xchg(&data->lost, 0);
|
|
|
|
|
|
+ lost_event.lost = local_xchg(&data->lost, 0);
|
|
|
|
|
|
perf_output_put(handle, lost_event);
|
|
perf_output_put(handle, lost_event);
|
|
}
|
|
}
|
|
@@ -3077,8 +3069,8 @@ int perf_output_begin(struct perf_output_handle *handle,
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
fail:
|
|
fail:
|
|
- atomic_inc(&data->lost);
|
|
|
|
- perf_output_unlock(handle);
|
|
|
|
|
|
+ local_inc(&data->lost);
|
|
|
|
+ perf_output_put_handle(handle);
|
|
out:
|
|
out:
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
|
|
|
|
@@ -3093,14 +3085,14 @@ void perf_output_end(struct perf_output_handle *handle)
|
|
int wakeup_events = event->attr.wakeup_events;
|
|
int wakeup_events = event->attr.wakeup_events;
|
|
|
|
|
|
if (handle->sample && wakeup_events) {
|
|
if (handle->sample && wakeup_events) {
|
|
- int events = atomic_inc_return(&data->events);
|
|
|
|
|
|
+ int events = local_inc_return(&data->events);
|
|
if (events >= wakeup_events) {
|
|
if (events >= wakeup_events) {
|
|
- atomic_sub(wakeup_events, &data->events);
|
|
|
|
- atomic_set(&data->wakeup, 1);
|
|
|
|
|
|
+ local_sub(wakeup_events, &data->events);
|
|
|
|
+ local_inc(&data->wakeup);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- perf_output_unlock(handle);
|
|
|
|
|
|
+ perf_output_put_handle(handle);
|
|
rcu_read_unlock();
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3436,22 +3428,13 @@ static void perf_event_task_output(struct perf_event *event,
|
|
{
|
|
{
|
|
struct perf_output_handle handle;
|
|
struct perf_output_handle handle;
|
|
struct task_struct *task = task_event->task;
|
|
struct task_struct *task = task_event->task;
|
|
- unsigned long flags;
|
|
|
|
int size, ret;
|
|
int size, ret;
|
|
|
|
|
|
- /*
|
|
|
|
- * If this CPU attempts to acquire an rq lock held by a CPU spinning
|
|
|
|
- * in perf_output_lock() from interrupt context, it's game over.
|
|
|
|
- */
|
|
|
|
- local_irq_save(flags);
|
|
|
|
-
|
|
|
|
size = task_event->event_id.header.size;
|
|
size = task_event->event_id.header.size;
|
|
ret = perf_output_begin(&handle, event, size, 0, 0);
|
|
ret = perf_output_begin(&handle, event, size, 0, 0);
|
|
|
|
|
|
- if (ret) {
|
|
|
|
- local_irq_restore(flags);
|
|
|
|
|
|
+ if (ret)
|
|
return;
|
|
return;
|
|
- }
|
|
|
|
|
|
|
|
task_event->event_id.pid = perf_event_pid(event, task);
|
|
task_event->event_id.pid = perf_event_pid(event, task);
|
|
task_event->event_id.ppid = perf_event_pid(event, current);
|
|
task_event->event_id.ppid = perf_event_pid(event, current);
|
|
@@ -3462,7 +3445,6 @@ static void perf_event_task_output(struct perf_event *event,
|
|
perf_output_put(&handle, task_event->event_id);
|
|
perf_output_put(&handle, task_event->event_id);
|
|
|
|
|
|
perf_output_end(&handle);
|
|
perf_output_end(&handle);
|
|
- local_irq_restore(flags);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
static int perf_event_task_match(struct perf_event *event)
|
|
static int perf_event_task_match(struct perf_event *event)
|
|
@@ -4020,9 +4002,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
|
|
perf_swevent_overflow(event, 0, nmi, data, regs);
|
|
perf_swevent_overflow(event, 0, nmi, data, regs);
|
|
}
|
|
}
|
|
|
|
|
|
-static int perf_tp_event_match(struct perf_event *event,
|
|
|
|
- struct perf_sample_data *data);
|
|
|
|
-
|
|
|
|
static int perf_exclude_event(struct perf_event *event,
|
|
static int perf_exclude_event(struct perf_event *event,
|
|
struct pt_regs *regs)
|
|
struct pt_regs *regs)
|
|
{
|
|
{
|
|
@@ -4052,10 +4031,6 @@ static int perf_swevent_match(struct perf_event *event,
|
|
if (perf_exclude_event(event, regs))
|
|
if (perf_exclude_event(event, regs))
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
- if (event->attr.type == PERF_TYPE_TRACEPOINT &&
|
|
|
|
- !perf_tp_event_match(event, data))
|
|
|
|
- return 0;
|
|
|
|
-
|
|
|
|
return 1;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -4066,19 +4041,46 @@ static inline u64 swevent_hash(u64 type, u32 event_id)
|
|
return hash_64(val, SWEVENT_HLIST_BITS);
|
|
return hash_64(val, SWEVENT_HLIST_BITS);
|
|
}
|
|
}
|
|
|
|
|
|
-static struct hlist_head *
|
|
|
|
-find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
|
|
|
|
|
|
+static inline struct hlist_head *
|
|
|
|
+__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
|
|
{
|
|
{
|
|
- u64 hash;
|
|
|
|
- struct swevent_hlist *hlist;
|
|
|
|
|
|
+ u64 hash = swevent_hash(type, event_id);
|
|
|
|
+
|
|
|
|
+ return &hlist->heads[hash];
|
|
|
|
+}
|
|
|
|
|
|
- hash = swevent_hash(type, event_id);
|
|
|
|
|
|
+/* For the read side: events when they trigger */
|
|
|
|
+static inline struct hlist_head *
|
|
|
|
+find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
|
|
|
|
+{
|
|
|
|
+ struct swevent_hlist *hlist;
|
|
|
|
|
|
hlist = rcu_dereference(ctx->swevent_hlist);
|
|
hlist = rcu_dereference(ctx->swevent_hlist);
|
|
if (!hlist)
|
|
if (!hlist)
|
|
return NULL;
|
|
return NULL;
|
|
|
|
|
|
- return &hlist->heads[hash];
|
|
|
|
|
|
+ return __find_swevent_head(hlist, type, event_id);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* For the event head insertion and removal in the hlist */
|
|
|
|
+static inline struct hlist_head *
|
|
|
|
+find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
|
|
|
|
+{
|
|
|
|
+ struct swevent_hlist *hlist;
|
|
|
|
+ u32 event_id = event->attr.config;
|
|
|
|
+ u64 type = event->attr.type;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Event scheduling is always serialized against hlist allocation
|
|
|
|
+ * and release. Which makes the protected version suitable here.
|
|
|
|
+ * The context lock guarantees that.
|
|
|
|
+ */
|
|
|
|
+ hlist = rcu_dereference_protected(ctx->swevent_hlist,
|
|
|
|
+ lockdep_is_held(&event->ctx->lock));
|
|
|
|
+ if (!hlist)
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ return __find_swevent_head(hlist, type, event_id);
|
|
}
|
|
}
|
|
|
|
|
|
static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
|
|
static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
|
|
@@ -4095,7 +4097,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
|
|
|
|
|
|
rcu_read_lock();
|
|
rcu_read_lock();
|
|
|
|
|
|
- head = find_swevent_head(cpuctx, type, event_id);
|
|
|
|
|
|
+ head = find_swevent_head_rcu(cpuctx, type, event_id);
|
|
|
|
|
|
if (!head)
|
|
if (!head)
|
|
goto end;
|
|
goto end;
|
|
@@ -4110,7 +4112,7 @@ end:
|
|
|
|
|
|
int perf_swevent_get_recursion_context(void)
|
|
int perf_swevent_get_recursion_context(void)
|
|
{
|
|
{
|
|
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
|
|
|
|
|
|
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
|
int rctx;
|
|
int rctx;
|
|
|
|
|
|
if (in_nmi())
|
|
if (in_nmi())
|
|
@@ -4122,10 +4124,8 @@ int perf_swevent_get_recursion_context(void)
|
|
else
|
|
else
|
|
rctx = 0;
|
|
rctx = 0;
|
|
|
|
|
|
- if (cpuctx->recursion[rctx]) {
|
|
|
|
- put_cpu_var(perf_cpu_context);
|
|
|
|
|
|
+ if (cpuctx->recursion[rctx])
|
|
return -1;
|
|
return -1;
|
|
- }
|
|
|
|
|
|
|
|
cpuctx->recursion[rctx]++;
|
|
cpuctx->recursion[rctx]++;
|
|
barrier();
|
|
barrier();
|
|
@@ -4139,7 +4139,6 @@ void perf_swevent_put_recursion_context(int rctx)
|
|
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
|
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
|
barrier();
|
|
barrier();
|
|
cpuctx->recursion[rctx]--;
|
|
cpuctx->recursion[rctx]--;
|
|
- put_cpu_var(perf_cpu_context);
|
|
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
|
|
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
|
|
|
|
|
|
@@ -4150,6 +4149,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
|
|
struct perf_sample_data data;
|
|
struct perf_sample_data data;
|
|
int rctx;
|
|
int rctx;
|
|
|
|
|
|
|
|
+ preempt_disable_notrace();
|
|
rctx = perf_swevent_get_recursion_context();
|
|
rctx = perf_swevent_get_recursion_context();
|
|
if (rctx < 0)
|
|
if (rctx < 0)
|
|
return;
|
|
return;
|
|
@@ -4159,6 +4159,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
|
|
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
|
|
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
|
|
|
|
|
|
perf_swevent_put_recursion_context(rctx);
|
|
perf_swevent_put_recursion_context(rctx);
|
|
|
|
+ preempt_enable_notrace();
|
|
}
|
|
}
|
|
|
|
|
|
static void perf_swevent_read(struct perf_event *event)
|
|
static void perf_swevent_read(struct perf_event *event)
|
|
@@ -4178,7 +4179,7 @@ static int perf_swevent_enable(struct perf_event *event)
|
|
perf_swevent_set_period(event);
|
|
perf_swevent_set_period(event);
|
|
}
|
|
}
|
|
|
|
|
|
- head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
|
|
|
|
|
|
+ head = find_swevent_head(cpuctx, event);
|
|
if (WARN_ON_ONCE(!head))
|
|
if (WARN_ON_ONCE(!head))
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
@@ -4366,6 +4367,14 @@ static const struct pmu perf_ops_task_clock = {
|
|
.read = task_clock_perf_event_read,
|
|
.read = task_clock_perf_event_read,
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+/* Deref the hlist from the update side */
|
|
|
|
+static inline struct swevent_hlist *
|
|
|
|
+swevent_hlist_deref(struct perf_cpu_context *cpuctx)
|
|
|
|
+{
|
|
|
|
+ return rcu_dereference_protected(cpuctx->swevent_hlist,
|
|
|
|
+ lockdep_is_held(&cpuctx->hlist_mutex));
|
|
|
|
+}
|
|
|
|
+
|
|
static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
|
|
static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
|
|
{
|
|
{
|
|
struct swevent_hlist *hlist;
|
|
struct swevent_hlist *hlist;
|
|
@@ -4376,12 +4385,11 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
|
|
|
|
|
|
static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
|
|
static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
|
|
{
|
|
{
|
|
- struct swevent_hlist *hlist;
|
|
|
|
|
|
+ struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx);
|
|
|
|
|
|
- if (!cpuctx->swevent_hlist)
|
|
|
|
|
|
+ if (!hlist)
|
|
return;
|
|
return;
|
|
|
|
|
|
- hlist = cpuctx->swevent_hlist;
|
|
|
|
rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
|
|
rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
|
|
call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
|
|
call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
|
|
}
|
|
}
|
|
@@ -4418,7 +4426,7 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
|
|
|
|
|
|
mutex_lock(&cpuctx->hlist_mutex);
|
|
mutex_lock(&cpuctx->hlist_mutex);
|
|
|
|
|
|
- if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
|
|
|
|
|
|
+ if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) {
|
|
struct swevent_hlist *hlist;
|
|
struct swevent_hlist *hlist;
|
|
|
|
|
|
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
|
|
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
|
|
@@ -4467,10 +4475,46 @@ static int swevent_hlist_get(struct perf_event *event)
|
|
|
|
|
|
#ifdef CONFIG_EVENT_TRACING
|
|
#ifdef CONFIG_EVENT_TRACING
|
|
|
|
|
|
-void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
|
|
|
|
- int entry_size, struct pt_regs *regs)
|
|
|
|
|
|
+static const struct pmu perf_ops_tracepoint = {
|
|
|
|
+ .enable = perf_trace_enable,
|
|
|
|
+ .disable = perf_trace_disable,
|
|
|
|
+ .read = perf_swevent_read,
|
|
|
|
+ .unthrottle = perf_swevent_unthrottle,
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+static int perf_tp_filter_match(struct perf_event *event,
|
|
|
|
+ struct perf_sample_data *data)
|
|
|
|
+{
|
|
|
|
+ void *record = data->raw->data;
|
|
|
|
+
|
|
|
|
+ if (likely(!event->filter) || filter_match_preds(event->filter, record))
|
|
|
|
+ return 1;
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int perf_tp_event_match(struct perf_event *event,
|
|
|
|
+ struct perf_sample_data *data,
|
|
|
|
+ struct pt_regs *regs)
|
|
|
|
+{
|
|
|
|
+ /*
|
|
|
|
+ * All tracepoints are from kernel-space.
|
|
|
|
+ */
|
|
|
|
+ if (event->attr.exclude_kernel)
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ if (!perf_tp_filter_match(event, data))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ return 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
|
|
|
|
+ struct pt_regs *regs, struct hlist_head *head)
|
|
{
|
|
{
|
|
struct perf_sample_data data;
|
|
struct perf_sample_data data;
|
|
|
|
+ struct perf_event *event;
|
|
|
|
+ struct hlist_node *node;
|
|
|
|
+
|
|
struct perf_raw_record raw = {
|
|
struct perf_raw_record raw = {
|
|
.size = entry_size,
|
|
.size = entry_size,
|
|
.data = record,
|
|
.data = record,
|
|
@@ -4479,26 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
|
|
perf_sample_data_init(&data, addr);
|
|
perf_sample_data_init(&data, addr);
|
|
data.raw = &raw;
|
|
data.raw = &raw;
|
|
|
|
|
|
- /* Trace events already protected against recursion */
|
|
|
|
- do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
|
|
|
|
- &data, regs);
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
|
|
|
|
+ if (perf_tp_event_match(event, &data, regs))
|
|
|
|
+ perf_swevent_add(event, count, 1, &data, regs);
|
|
|
|
+ }
|
|
|
|
+ rcu_read_unlock();
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(perf_tp_event);
|
|
EXPORT_SYMBOL_GPL(perf_tp_event);
|
|
|
|
|
|
-static int perf_tp_event_match(struct perf_event *event,
|
|
|
|
- struct perf_sample_data *data)
|
|
|
|
-{
|
|
|
|
- void *record = data->raw->data;
|
|
|
|
-
|
|
|
|
- if (likely(!event->filter) || filter_match_preds(event->filter, record))
|
|
|
|
- return 1;
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static void tp_perf_event_destroy(struct perf_event *event)
|
|
static void tp_perf_event_destroy(struct perf_event *event)
|
|
{
|
|
{
|
|
- perf_trace_disable(event->attr.config);
|
|
|
|
- swevent_hlist_put(event);
|
|
|
|
|
|
+ perf_trace_destroy(event);
|
|
}
|
|
}
|
|
|
|
|
|
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
|
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
|
@@ -4514,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
|
!capable(CAP_SYS_ADMIN))
|
|
!capable(CAP_SYS_ADMIN))
|
|
return ERR_PTR(-EPERM);
|
|
return ERR_PTR(-EPERM);
|
|
|
|
|
|
- if (perf_trace_enable(event->attr.config))
|
|
|
|
|
|
+ err = perf_trace_init(event);
|
|
|
|
+ if (err)
|
|
return NULL;
|
|
return NULL;
|
|
|
|
|
|
event->destroy = tp_perf_event_destroy;
|
|
event->destroy = tp_perf_event_destroy;
|
|
- err = swevent_hlist_get(event);
|
|
|
|
- if (err) {
|
|
|
|
- perf_trace_disable(event->attr.config);
|
|
|
|
- return ERR_PTR(err);
|
|
|
|
- }
|
|
|
|
|
|
|
|
- return &perf_ops_generic;
|
|
|
|
|
|
+ return &perf_ops_tracepoint;
|
|
}
|
|
}
|
|
|
|
|
|
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
|
|
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
|
|
@@ -4552,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event)
|
|
|
|
|
|
#else
|
|
#else
|
|
|
|
|
|
-static int perf_tp_event_match(struct perf_event *event,
|
|
|
|
- struct perf_sample_data *data)
|
|
|
|
-{
|
|
|
|
- return 1;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
|
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
|
{
|
|
{
|
|
return NULL;
|
|
return NULL;
|
|
@@ -4894,6 +4920,13 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
|
|
int fput_needed = 0;
|
|
int fput_needed = 0;
|
|
int ret = -EINVAL;
|
|
int ret = -EINVAL;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Don't allow output of inherited per-task events. This would
|
|
|
|
+ * create performance issues due to cross cpu access.
|
|
|
|
+ */
|
|
|
|
+ if (event->cpu == -1 && event->attr.inherit)
|
|
|
|
+ return -EINVAL;
|
|
|
|
+
|
|
if (!output_fd)
|
|
if (!output_fd)
|
|
goto set;
|
|
goto set;
|
|
|
|
|
|
@@ -4914,6 +4947,18 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
|
|
if (event->data)
|
|
if (event->data)
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Don't allow cross-cpu buffers
|
|
|
|
+ */
|
|
|
|
+ if (output_event->cpu != event->cpu)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If its not a per-cpu buffer, it must be the same task.
|
|
|
|
+ */
|
|
|
|
+ if (output_event->cpu == -1 && output_event->ctx != event->ctx)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
atomic_long_inc(&output_file->f_count);
|
|
atomic_long_inc(&output_file->f_count);
|
|
|
|
|
|
set:
|
|
set:
|