Browse Source

perf/x86: Add ability to calculate TSC from perf sample timestamps

For modern CPUs, perf clock is directly related to TSC.  TSC
can be calculated from perf clock and vice versa using a simple
calculation.  Two of the three componenets of that calculation
are already exported in struct perf_event_mmap_page.  This patch
exports the third.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1372425741-1676-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Adrian Hunter 12 years ago
parent
commit
c73deb6aec

+ 1 - 0
arch/x86/include/asm/tsc.h

@@ -49,6 +49,7 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
+extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_tsc(void);
 
 extern int tsc_clocksource_reliable;

+ 6 - 0
arch/x86/kernel/cpu/perf_event.c

@@ -1884,6 +1884,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
 	userpg->cap_usr_time = 0;
+	userpg->cap_usr_time_zero = 0;
 	userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
@@ -1897,6 +1898,11 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 	userpg->time_mult = this_cpu_read(cyc2ns);
 	userpg->time_shift = CYC2NS_SCALE_FACTOR;
 	userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+	if (sched_clock_stable && !check_tsc_disabled()) {
+		userpg->cap_usr_time_zero = 1;
+		userpg->time_zero = this_cpu_read(cyc2ns_offset);
+	}
 }
 
 /*

+ 6 - 0
arch/x86/kernel/tsc.c

@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
+int check_tsc_disabled(void)
+{
+	return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {

+ 20 - 2
include/uapi/linux/perf_event.h

@@ -378,7 +378,8 @@ struct perf_event_mmap_page {
 		struct {
 			__u64	cap_usr_time		: 1,
 				cap_usr_rdpmc		: 1,
-				cap_____res		: 62;
+				cap_usr_time_zero	: 1,
+				cap_____res		: 61;
 		};
 	};
 
@@ -420,12 +421,29 @@ struct perf_event_mmap_page {
 	__u16	time_shift;
 	__u32	time_mult;
 	__u64	time_offset;
+	/*
+	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+	 * from sample timestamps.
+	 *
+	 *   time = timestamp - time_zero;
+	 *   quot = time / time_mult;
+	 *   rem  = time % time_mult;
+	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+	 *
+	 * And vice versa:
+	 *
+	 *   quot = cyc >> time_shift;
+	 *   rem  = cyc & ((1 << time_shift) - 1);
+	 *   timestamp = time_zero + quot * time_mult +
+	 *               ((rem * time_mult) >> time_shift);
+	 */
+	__u64	time_zero;
 
 		/*
 		 * Hole for extension of the self monitor capabilities
 		 */
 
-	__u64	__reserved[120];	/* align to 1k */
+	__u64	__reserved[119];	/* align to 1k */
 
 	/*
 	 * Control data for the mmap() data buffer.