|
@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task)
|
|
|
|
|
|
static void perf_log_throttle(struct perf_event *event, int enable);
|
|
|
|
|
|
-static void perf_adjust_period(struct perf_event *event, u64 events)
|
|
|
+static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
|
|
+{
|
|
|
+ u64 frequency = event->attr.sample_freq;
|
|
|
+ u64 sec = NSEC_PER_SEC;
|
|
|
+ u64 divisor, dividend;
|
|
|
+
|
|
|
+ int count_fls, nsec_fls, frequency_fls, sec_fls;
|
|
|
+
|
|
|
+ count_fls = fls64(count);
|
|
|
+ nsec_fls = fls64(nsec);
|
|
|
+ frequency_fls = fls64(frequency);
|
|
|
+ sec_fls = 30;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We got @count in @nsec, with a target of sample_freq HZ
|
|
|
+ * the target period becomes:
|
|
|
+ *
|
|
|
+ * @count * 10^9
|
|
|
+ * period = -------------------
|
|
|
+ * @nsec * sample_freq
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Reduce accuracy by one bit such that @a and @b converge
|
|
|
+ * to a similar magnitude.
|
|
|
+ */
|
|
|
+#define REDUCE_FLS(a, b) \
|
|
|
+do { \
|
|
|
+ if (a##_fls > b##_fls) { \
|
|
|
+ a >>= 1; \
|
|
|
+ a##_fls--; \
|
|
|
+ } else { \
|
|
|
+ b >>= 1; \
|
|
|
+ b##_fls--; \
|
|
|
+ } \
|
|
|
+} while (0)
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Reduce accuracy until either term fits in a u64, then proceed with
|
|
|
+ * the other, so that finally we can do a u64/u64 division.
|
|
|
+ */
|
|
|
+ while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
|
|
|
+ REDUCE_FLS(nsec, frequency);
|
|
|
+ REDUCE_FLS(sec, count);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (count_fls + sec_fls > 64) {
|
|
|
+ divisor = nsec * frequency;
|
|
|
+
|
|
|
+ while (count_fls + sec_fls > 64) {
|
|
|
+ REDUCE_FLS(count, sec);
|
|
|
+ divisor >>= 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ dividend = count * sec;
|
|
|
+ } else {
|
|
|
+ dividend = count * sec;
|
|
|
+
|
|
|
+ while (nsec_fls + frequency_fls > 64) {
|
|
|
+ REDUCE_FLS(nsec, frequency);
|
|
|
+ dividend >>= 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ divisor = nsec * frequency;
|
|
|
+ }
|
|
|
+
|
|
|
+ return div64_u64(dividend, divisor);
|
|
|
+}
|
|
|
+
|
|
|
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
|
|
|
{
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
u64 period, sample_period;
|
|
|
s64 delta;
|
|
|
|
|
|
- events *= hwc->sample_period;
|
|
|
- period = div64_u64(events, event->attr.sample_freq);
|
|
|
+ period = perf_calculate_period(event, nsec, count);
|
|
|
|
|
|
delta = (s64)(period - hwc->sample_period);
|
|
|
delta = (delta + 7) / 8; /* low pass filter */
|
|
@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
|
|
|
sample_period = 1;
|
|
|
|
|
|
hwc->sample_period = sample_period;
|
|
|
+
|
|
|
+ if (atomic64_read(&hwc->period_left) > 8*sample_period) {
|
|
|
+ perf_disable();
|
|
|
+ event->pmu->disable(event);
|
|
|
+ atomic64_set(&hwc->period_left, 0);
|
|
|
+ event->pmu->enable(event);
|
|
|
+ perf_enable();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
|
|
|
{
|
|
|
struct perf_event *event;
|
|
|
struct hw_perf_event *hwc;
|
|
|
- u64 interrupts, freq;
|
|
|
+ u64 interrupts, now;
|
|
|
+ s64 delta;
|
|
|
|
|
|
raw_spin_lock(&ctx->lock);
|
|
|
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
|
|
@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
|
|
|
if (interrupts == MAX_INTERRUPTS) {
|
|
|
perf_log_throttle(event, 1);
|
|
|
event->pmu->unthrottle(event);
|
|
|
- interrupts = 2*sysctl_perf_event_sample_rate/HZ;
|
|
|
}
|
|
|
|
|
|
if (!event->attr.freq || !event->attr.sample_freq)
|
|
|
continue;
|
|
|
|
|
|
- /*
|
|
|
- * if the specified freq < HZ then we need to skip ticks
|
|
|
- */
|
|
|
- if (event->attr.sample_freq < HZ) {
|
|
|
- freq = event->attr.sample_freq;
|
|
|
-
|
|
|
- hwc->freq_count += freq;
|
|
|
- hwc->freq_interrupts += interrupts;
|
|
|
-
|
|
|
- if (hwc->freq_count < HZ)
|
|
|
- continue;
|
|
|
-
|
|
|
- interrupts = hwc->freq_interrupts;
|
|
|
- hwc->freq_interrupts = 0;
|
|
|
- hwc->freq_count -= HZ;
|
|
|
- } else
|
|
|
- freq = HZ;
|
|
|
-
|
|
|
- perf_adjust_period(event, freq * interrupts);
|
|
|
+ event->pmu->read(event);
|
|
|
+ now = atomic64_read(&event->count);
|
|
|
+ delta = now - hwc->freq_count_stamp;
|
|
|
+ hwc->freq_count_stamp = now;
|
|
|
|
|
|
- /*
|
|
|
- * In order to avoid being stalled by an (accidental) huge
|
|
|
- * sample period, force reset the sample period if we didn't
|
|
|
- * get any events in this freq period.
|
|
|
- */
|
|
|
- if (!interrupts) {
|
|
|
- perf_disable();
|
|
|
- event->pmu->disable(event);
|
|
|
- atomic64_set(&hwc->period_left, 0);
|
|
|
- event->pmu->enable(event);
|
|
|
- perf_enable();
|
|
|
- }
|
|
|
+ if (delta > 0)
|
|
|
+ perf_adjust_period(event, TICK_NSEC, delta);
|
|
|
}
|
|
|
raw_spin_unlock(&ctx->lock);
|
|
|
}
|
|
@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
|
|
|
|
|
|
if (event->attr.freq) {
|
|
|
u64 now = perf_clock();
|
|
|
- s64 delta = now - hwc->freq_stamp;
|
|
|
+ s64 delta = now - hwc->freq_time_stamp;
|
|
|
|
|
|
- hwc->freq_stamp = now;
|
|
|
+ hwc->freq_time_stamp = now;
|
|
|
|
|
|
- if (delta > 0 && delta < TICK_NSEC)
|
|
|
- perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
|
|
|
+ if (delta > 0 && delta < 2*TICK_NSEC)
|
|
|
+ perf_adjust_period(event, delta, hwc->last_period);
|
|
|
}
|
|
|
|
|
|
/*
|