浏览代码

Merge branch 'auto' of git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile into oprofile

Ingo Molnar 16 年之前
父节点
当前提交
4f6e1fe1d8

+ 51 - 50
arch/x86/oprofile/nmi_int.c

@@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 /* 0 == registered but off, 1 == registered and on */
 /* 0 == registered but off, 1 == registered and on */
 static int nmi_enabled = 0;
 static int nmi_enabled = 0;
 
 
+/* common functions */
+
+u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+		    struct op_counter_config *counter_config)
+{
+	u64 val = 0;
+	u16 event = (u16)counter_config->event;
+
+	val |= ARCH_PERFMON_EVENTSEL_INT;
+	val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
+	val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
+	val |= (counter_config->unit_mask & 0xFF) << 8;
+	event &= model->event_mask ? model->event_mask : 0xFF;
+	val |= event & 0xFF;
+	val |= (event & 0x0F00) << 24;
+
+	return val;
+}
+
+
 static int profile_exceptions_notify(struct notifier_block *self,
 static int profile_exceptions_notify(struct notifier_block *self,
 				     unsigned long val, void *data)
 				     unsigned long val, void *data)
 {
 {
@@ -52,26 +72,18 @@ static int profile_exceptions_notify(struct notifier_block *self,
 
 
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
 {
 {
-	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *controls = msrs->controls;
 	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 	unsigned int i;
 
 
-	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr) {
-			rdmsr(counters[i].addr,
-				counters[i].saved.low,
-				counters[i].saved.high);
-		}
+	for (i = 0; i < model->num_counters; ++i) {
+		if (counters[i].addr)
+			rdmsrl(counters[i].addr, counters[i].saved);
 	}
 	}
 
 
-	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr) {
-			rdmsr(controls[i].addr,
-				controls[i].saved.low,
-				controls[i].saved.high);
-		}
+	for (i = 0; i < model->num_controls; ++i) {
+		if (controls[i].addr)
+			rdmsrl(controls[i].addr, controls[i].saved);
 	}
 	}
 }
 }
 
 
@@ -126,7 +138,7 @@ static void nmi_cpu_setup(void *dummy)
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 	spin_lock(&oprofilefs_lock);
 	spin_lock(&oprofilefs_lock);
-	model->setup_ctrs(msrs);
+	model->setup_ctrs(model, msrs);
 	spin_unlock(&oprofilefs_lock);
 	spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -178,26 +190,18 @@ static int nmi_setup(void)
 
 
 static void nmi_restore_registers(struct op_msrs *msrs)
 static void nmi_restore_registers(struct op_msrs *msrs)
 {
 {
-	unsigned int const nr_ctrs = model->num_counters;
-	unsigned int const nr_ctrls = model->num_controls;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *counters = msrs->counters;
 	struct op_msr *controls = msrs->controls;
 	struct op_msr *controls = msrs->controls;
 	unsigned int i;
 	unsigned int i;
 
 
-	for (i = 0; i < nr_ctrls; ++i) {
-		if (controls[i].addr) {
-			wrmsr(controls[i].addr,
-				controls[i].saved.low,
-				controls[i].saved.high);
-		}
+	for (i = 0; i < model->num_controls; ++i) {
+		if (controls[i].addr)
+			wrmsrl(controls[i].addr, controls[i].saved);
 	}
 	}
 
 
-	for (i = 0; i < nr_ctrs; ++i) {
-		if (counters[i].addr) {
-			wrmsr(counters[i].addr,
-				counters[i].saved.low,
-				counters[i].saved.high);
-		}
+	for (i = 0; i < model->num_counters; ++i) {
+		if (counters[i].addr)
+			wrmsrl(counters[i].addr, counters[i].saved);
 	}
 	}
 }
 }
 
 
@@ -402,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
 static int __init ppro_init(char **cpu_type)
 static int __init ppro_init(char **cpu_type)
 {
 {
 	__u8 cpu_model = boot_cpu_data.x86_model;
 	__u8 cpu_model = boot_cpu_data.x86_model;
+	struct op_x86_model_spec const *spec = &op_ppro_spec;	/* default */
 
 
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 	if (force_arch_perfmon && cpu_has_arch_perfmon)
 		return 0;
 		return 0;
@@ -428,7 +433,7 @@ static int __init ppro_init(char **cpu_type)
 		*cpu_type = "i386/core_2";
 		*cpu_type = "i386/core_2";
 		break;
 		break;
 	case 26:
 	case 26:
-		arch_perfmon_setup_counters();
+		spec = &op_arch_perfmon_spec;
 		*cpu_type = "i386/core_i7";
 		*cpu_type = "i386/core_i7";
 		break;
 		break;
 	case 28:
 	case 28:
@@ -439,17 +444,7 @@ static int __init ppro_init(char **cpu_type)
 		return 0;
 		return 0;
 	}
 	}
 
 
-	model = &op_ppro_spec;
-	return 1;
-}
-
-static int __init arch_perfmon_init(char **cpu_type)
-{
-	if (!cpu_has_arch_perfmon)
-		return 0;
-	*cpu_type = "i386/arch_perfmon";
-	model = &op_arch_perfmon_spec;
-	arch_perfmon_setup_counters();
+	model = spec;
 	return 1;
 	return 1;
 }
 }
 
 
@@ -471,27 +466,26 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
 		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
 
 
 		switch (family) {
 		switch (family) {
-		default:
-			return -ENODEV;
 		case 6:
 		case 6:
-			model = &op_amd_spec;
 			cpu_type = "i386/athlon";
 			cpu_type = "i386/athlon";
 			break;
 			break;
 		case 0xf:
 		case 0xf:
-			model = &op_amd_spec;
-			/* Actually it could be i386/hammer too, but give
-			 user space an consistent name. */
+			/*
+			 * Actually it could be i386/hammer too, but
+			 * give user space an consistent name.
+			 */
 			cpu_type = "x86-64/hammer";
 			cpu_type = "x86-64/hammer";
 			break;
 			break;
 		case 0x10:
 		case 0x10:
-			model = &op_amd_spec;
 			cpu_type = "x86-64/family10";
 			cpu_type = "x86-64/family10";
 			break;
 			break;
 		case 0x11:
 		case 0x11:
-			model = &op_amd_spec;
 			cpu_type = "x86-64/family11h";
 			cpu_type = "x86-64/family11h";
 			break;
 			break;
+		default:
+			return -ENODEV;
 		}
 		}
+		model = &op_amd_spec;
 		break;
 		break;
 
 
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
@@ -510,8 +504,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 			break;
 			break;
 		}
 		}
 
 
-		if (!cpu_type && !arch_perfmon_init(&cpu_type))
+		if (cpu_type)
+			break;
+
+		if (!cpu_has_arch_perfmon)
 			return -ENODEV;
 			return -ENODEV;
+
+		/* use arch perfmon as fallback */
+		cpu_type = "i386/arch_perfmon";
+		model = &op_arch_perfmon_spec;
 		break;
 		break;
 
 
 	default:
 	default:

+ 118 - 149
arch/x86/oprofile/op_model_amd.c

@@ -25,43 +25,28 @@
 
 
 #define NUM_COUNTERS 4
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 #define NUM_CONTROLS 4
+#define OP_EVENT_MASK			0x0FFF
+#define OP_CTR_OVERFLOW			(1ULL<<31)
 
 
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR_LO(x) (x &= (1<<21))
-#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
-#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
-#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
-#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
+#define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
 
 static unsigned long reset_value[NUM_COUNTERS];
 static unsigned long reset_value[NUM_COUNTERS];
 
 
 #ifdef CONFIG_OPROFILE_IBS
 #ifdef CONFIG_OPROFILE_IBS
 
 
 /* IbsFetchCtl bits/masks */
 /* IbsFetchCtl bits/masks */
-#define IBS_FETCH_HIGH_VALID_BIT	(1UL << 17)	/* bit 49 */
-#define IBS_FETCH_HIGH_ENABLE		(1UL << 16)	/* bit 48 */
-#define IBS_FETCH_LOW_MAX_CNT_MASK	0x0000FFFFUL	/* MaxCnt mask */
+#define IBS_FETCH_RAND_EN		(1ULL<<57)
+#define IBS_FETCH_VAL			(1ULL<<49)
+#define IBS_FETCH_ENABLE		(1ULL<<48)
+#define IBS_FETCH_CNT_MASK		0xFFFF0000ULL
 
 
 /*IbsOpCtl bits */
 /*IbsOpCtl bits */
-#define IBS_OP_LOW_VALID_BIT		(1ULL<<18)	/* bit 18 */
-#define IBS_OP_LOW_ENABLE		(1ULL<<17)	/* bit 17 */
+#define IBS_OP_CNT_CTL			(1ULL<<19)
+#define IBS_OP_VAL			(1ULL<<18)
+#define IBS_OP_ENABLE			(1ULL<<17)
 
 
-#define IBS_FETCH_SIZE	6
-#define IBS_OP_SIZE	12
+#define IBS_FETCH_SIZE			6
+#define IBS_OP_SIZE			12
 
 
 static int has_ibs;	/* AMD Family10h and later */
 static int has_ibs;	/* AMD Family10h and later */
 
 
@@ -99,49 +84,38 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 	}
 	}
 }
 }
 
 
-
-static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
+static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
+			      struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	/* clear all counters */
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_CLEAR_LO(low);
-		CTRL_CLEAR_HI(high);
-		CTRL_WRITE(low, high, msrs, i);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 	}
 
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->counters[i].addr))
 			continue;
 			continue;
-		CTR_WRITE(1, msrs, i);
+		wrmsrl(msrs->counters[i].addr, -1LL);
 	}
 	}
 
 
 	/* enable active counters */
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
-
-			CTR_WRITE(counter_config[i].count, msrs, i);
-
-			CTRL_READ(low, high, msrs, i);
-			CTRL_CLEAR_LO(low);
-			CTRL_CLEAR_HI(high);
-			CTRL_SET_ENABLE(low);
-			CTRL_SET_USR(low, counter_config[i].user);
-			CTRL_SET_KERN(low, counter_config[i].kernel);
-			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT_LOW(low, counter_config[i].event);
-			CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
-			CTRL_SET_HOST_ONLY(high, 0);
-			CTRL_SET_GUEST_ONLY(high, 0);
-
-			CTRL_WRITE(low, high, msrs, i);
+			wrmsrl(msrs->counters[i].addr,
+			       -(s64)counter_config[i].count);
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			wrmsrl(msrs->controls[i].addr, val);
 		} else {
 		} else {
 			reset_value[i] = 0;
 			reset_value[i] = 0;
 		}
 		}
@@ -154,93 +128,116 @@ static inline int
 op_amd_handle_ibs(struct pt_regs * const regs,
 op_amd_handle_ibs(struct pt_regs * const regs,
 		  struct op_msrs const * const msrs)
 		  struct op_msrs const * const msrs)
 {
 {
-	u32 low, high;
-	u64 msr;
+	u64 val, ctl;
 	struct op_entry entry;
 	struct op_entry entry;
 
 
 	if (!has_ibs)
 	if (!has_ibs)
 		return 1;
 		return 1;
 
 
 	if (ibs_config.fetch_enabled) {
 	if (ibs_config.fetch_enabled) {
-		rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-		if (high & IBS_FETCH_HIGH_VALID_BIT) {
-			rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
-			oprofile_write_reserve(&entry, regs, msr,
+		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+		if (ctl & IBS_FETCH_VAL) {
+			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+			oprofile_write_reserve(&entry, regs, val,
 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			oprofile_add_data(&entry, low);
-			oprofile_add_data(&entry, high);
-			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data64(&entry, val);
+			oprofile_add_data64(&entry, ctl);
+			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 			oprofile_write_commit(&entry);
 
 
 			/* reenable the IRQ */
 			/* reenable the IRQ */
-			high &= ~IBS_FETCH_HIGH_VALID_BIT;
-			high |= IBS_FETCH_HIGH_ENABLE;
-			low &= IBS_FETCH_LOW_MAX_CNT_MASK;
-			wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
+			ctl |= IBS_FETCH_ENABLE;
+			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 		}
 		}
 	}
 	}
 
 
 	if (ibs_config.op_enabled) {
 	if (ibs_config.op_enabled) {
-		rdmsr(MSR_AMD64_IBSOPCTL, low, high);
-		if (low & IBS_OP_LOW_VALID_BIT) {
-			rdmsrl(MSR_AMD64_IBSOPRIP, msr);
-			oprofile_write_reserve(&entry, regs, msr,
+		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+		if (ctl & IBS_OP_VAL) {
+			rdmsrl(MSR_AMD64_IBSOPRIP, val);
+			oprofile_write_reserve(&entry, regs, val,
 					       IBS_OP_CODE, IBS_OP_SIZE);
 					       IBS_OP_CODE, IBS_OP_SIZE);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
-			rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
-			oprofile_add_data(&entry, (u32)msr);
-			oprofile_add_data(&entry, (u32)(msr >> 32));
+			oprofile_add_data64(&entry, val);
+			rdmsrl(MSR_AMD64_IBSOPDATA, val);
+			oprofile_add_data64(&entry, val);
+			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+			oprofile_add_data64(&entry, val);
+			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+			oprofile_add_data64(&entry, val);
+			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+			oprofile_add_data64(&entry, val);
+			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 			oprofile_write_commit(&entry);
 
 
 			/* reenable the IRQ */
 			/* reenable the IRQ */
-			high = 0;
-			low &= ~IBS_OP_LOW_VALID_BIT;
-			low |= IBS_OP_LOW_ENABLE;
-			wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+			ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
+			ctl |= IBS_OP_ENABLE;
+			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 		}
 		}
 	}
 	}
 
 
 	return 1;
 	return 1;
 }
 }
 
 
+static inline void op_amd_start_ibs(void)
+{
+	u64 val;
+	if (has_ibs && ibs_config.fetch_enabled) {
+		val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+		val |= IBS_FETCH_ENABLE;
+		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
+	}
+
+	if (has_ibs && ibs_config.op_enabled) {
+		val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
+		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+		val |= IBS_OP_ENABLE;
+		wrmsrl(MSR_AMD64_IBSOPCTL, val);
+	}
+}
+
+static void op_amd_stop_ibs(void)
+{
+	if (has_ibs && ibs_config.fetch_enabled)
+		/* clear max count and enable */
+		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
+
+	if (has_ibs && ibs_config.op_enabled)
+		/* clear max count and enable */
+		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
+}
+
+#else
+
+static inline int op_amd_handle_ibs(struct pt_regs * const regs,
+				    struct op_msrs const * const msrs) { }
+static inline void op_amd_start_ibs(void) { }
+static inline void op_amd_stop_ibs(void) { }
+
 #endif
 #endif
 
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
 static int op_amd_check_ctrs(struct pt_regs * const regs,
 			     struct op_msrs const * const msrs)
 			     struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
-			oprofile_add_sample(regs, i);
-			CTR_WRITE(reset_value[i], msrs, i);
-		}
+		rdmsrl(msrs->counters[i].addr, val);
+		/* bit is clear if overflowed: */
+		if (val & OP_CTR_OVERFLOW)
+			continue;
+		oprofile_add_sample(regs, i);
+		wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]);
 	}
 	}
 
 
-#ifdef CONFIG_OPROFILE_IBS
 	op_amd_handle_ibs(regs, msrs);
 	op_amd_handle_ibs(regs, msrs);
-#endif
 
 
 	/* See op_model_ppro.c */
 	/* See op_model_ppro.c */
 	return 1;
 	return 1;
@@ -248,38 +245,22 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 
 
 static void op_amd_start(struct op_msrs const * const msrs)
 static void op_amd_start(struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (reset_value[i]) {
 		if (reset_value[i]) {
-			CTRL_READ(low, high, msrs, i);
-			CTRL_SET_ACTIVE(low);
-			CTRL_WRITE(low, high, msrs, i);
+			rdmsrl(msrs->controls[i].addr, val);
+			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(msrs->controls[i].addr, val);
 		}
 		}
 	}
 	}
 
 
-#ifdef CONFIG_OPROFILE_IBS
-	if (has_ibs && ibs_config.fetch_enabled) {
-		low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
-		high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
-			+ IBS_FETCH_HIGH_ENABLE;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
-	}
-
-	if (has_ibs && ibs_config.op_enabled) {
-		low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
-			+ ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
-			+ IBS_OP_LOW_ENABLE;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
-	}
-#endif
+	op_amd_start_ibs();
 }
 }
 
 
-
 static void op_amd_stop(struct op_msrs const * const msrs)
 static void op_amd_stop(struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	/*
 	/*
@@ -289,26 +270,12 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_SET_INACTIVE(low);
-		CTRL_WRITE(low, high, msrs, i);
-	}
-
-#ifdef CONFIG_OPROFILE_IBS
-	if (has_ibs && ibs_config.fetch_enabled) {
-		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 	}
 
 
-	if (has_ibs && ibs_config.op_enabled) {
-		/* clear max count and enable */
-		low = 0;
-		high = 0;
-		wrmsr(MSR_AMD64_IBSOPCTL, low, high);
-	}
-#endif
+	op_amd_stop_ibs();
 }
 }
 
 
 static void op_amd_shutdown(struct op_msrs const * const msrs)
 static void op_amd_shutdown(struct op_msrs const * const msrs)
@@ -316,11 +283,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 	int i;
 	int i;
 
 
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
 	}
 	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
 	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
 	}
 }
 }
@@ -491,14 +458,16 @@ static void op_amd_exit(void) {}
 #endif /* CONFIG_OPROFILE_IBS */
 #endif /* CONFIG_OPROFILE_IBS */
 
 
 struct op_x86_model_spec const op_amd_spec = {
 struct op_x86_model_spec const op_amd_spec = {
-	.init			= op_amd_init,
-	.exit			= op_amd_exit,
 	.num_counters		= NUM_COUNTERS,
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
 	.num_controls		= NUM_CONTROLS,
+	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
+	.event_mask		= OP_EVENT_MASK,
+	.init			= op_amd_init,
+	.exit			= op_amd_exit,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.setup_ctrs		= &op_amd_setup_ctrs,
 	.setup_ctrs		= &op_amd_setup_ctrs,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.start			= &op_amd_start,
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
 	.stop			= &op_amd_stop,
-	.shutdown		= &op_amd_shutdown
+	.shutdown		= &op_amd_shutdown,
 };
 };

+ 29 - 31
arch/x86/oprofile/op_model_p4.c

@@ -32,6 +32,8 @@
 #define NUM_CCCRS_HT2 9
 #define NUM_CCCRS_HT2 9
 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
 
 
+#define OP_CTR_OVERFLOW			(1ULL<<31)
+
 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
 static unsigned int num_controls = NUM_CONTROLS_NON_HT;
 static unsigned int num_controls = NUM_CONTROLS_NON_HT;
 
 
@@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
-#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
 
 
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_RESERVED_BITS 0x38030FFF
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
 
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
-#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
-#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
-
 
 
 /* this assigns a "stagger" to the current CPU, which is used throughout
 /* this assigns a "stagger" to the current CPU, which is used throughout
    the code in this module as an extra array offset, to select the "even"
    the code in this module as an extra array offset, to select the "even"
@@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 		if (ev->bindings[i].virt_counter & counter_bit) {
 		if (ev->bindings[i].virt_counter & counter_bit) {
 
 
 			/* modify ESCR */
 			/* modify ESCR */
-			ESCR_READ(escr, high, ev, i);
+			rdmsr(ev->bindings[i].escr_address, escr, high);
 			ESCR_CLEAR(escr);
 			ESCR_CLEAR(escr);
 			if (stag == 0) {
 			if (stag == 0) {
 				ESCR_SET_USR_0(escr, counter_config[ctr].user);
 				ESCR_SET_USR_0(escr, counter_config[ctr].user);
@@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 			}
 			}
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
 			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
-			ESCR_WRITE(escr, high, ev, i);
+			wrmsr(ev->bindings[i].escr_address, escr, high);
 
 
 			/* modify CCCR */
 			/* modify CCCR */
-			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+			      cccr, high);
 			CCCR_CLEAR(cccr);
 			CCCR_CLEAR(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_REQUIRED_BITS(cccr);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
@@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 				CCCR_SET_PMI_OVF_0(cccr);
 				CCCR_SET_PMI_OVF_0(cccr);
 			else
 			else
 				CCCR_SET_PMI_OVF_1(cccr);
 				CCCR_SET_PMI_OVF_1(cccr);
-			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+			      cccr, high);
 			return;
 			return;
 		}
 		}
 	}
 	}
@@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
 }
 }
 
 
 
 
-static void p4_setup_ctrs(struct op_msrs const * const msrs)
+static void p4_setup_ctrs(struct op_x86_model_spec const *model,
+			  struct op_msrs const * const msrs)
 {
 {
 	unsigned int i;
 	unsigned int i;
 	unsigned int low, high;
 	unsigned int low, high;
@@ -564,7 +559,7 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
 
 
 	/* clear the cccrs we will use */
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
 	for (i = 0 ; i < num_counters ; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
 		CCCR_CLEAR(low);
@@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
 
 
 	/* clear all escrs (including those outside our concern) */
 	/* clear all escrs (including those outside our concern) */
 	for (i = num_counters; i < num_controls; i++) {
 	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 			continue;
 		wrmsr(msrs->controls[i].addr, 0, 0);
 		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 	}
 
 
 	/* setup all counters */
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
 	for (i = 0 ; i < num_counters ; ++i) {
-		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->controls[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			pmc_setup_one_p4_counter(i);
-			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
+			       -(s64)counter_config[i].count);
 		} else {
 		} else {
 			reset_value[i] = 0;
 			reset_value[i] = 0;
 		}
 		}
@@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs,
 
 
 		real = VIRT_CTR(stag, i);
 		real = VIRT_CTR(stag, i);
 
 
-		CCCR_READ(low, high, real);
-		CTR_READ(ctr, high, real);
-		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+		rdmsr(p4_counters[real].cccr_address, low, high);
+		rdmsr(p4_counters[real].counter_address, ctr, high);
+		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
 			oprofile_add_sample(regs, i);
 			oprofile_add_sample(regs, i);
-			CTR_WRITE(reset_value[i], real);
+			wrmsrl(p4_counters[real].counter_address,
+			       -(s64)reset_value[i]);
 			CCCR_CLEAR_OVF(low);
 			CCCR_CLEAR_OVF(low);
-			CCCR_WRITE(low, high, real);
-			CTR_WRITE(reset_value[i], real);
+			wrmsr(p4_counters[real].cccr_address, low, high);
+			wrmsrl(p4_counters[real].counter_address,
+			       -(s64)reset_value[i]);
 		}
 		}
 	}
 	}
 
 
@@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
-		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_SET_ENABLE(low);
 		CCCR_SET_ENABLE(low);
-		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 	}
 	}
 }
 }
 
 
@@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
-		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_SET_DISABLE(low);
 		CCCR_SET_DISABLE(low);
-		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 	}
 	}
 }
 }
 
 
@@ -681,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 	int i;
 	int i;
 
 
 	for (i = 0 ; i < num_counters ; ++i) {
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(msrs->counters[i].addr);
 			release_perfctr_nmi(msrs->counters[i].addr);
 	}
 	}
 	/*
 	/*
@@ -690,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 	 * This saves a few bits.
 	 * This saves a few bits.
 	 */
 	 */
 	for (i = num_counters ; i < num_controls ; ++i) {
 	for (i = num_counters ; i < num_controls ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(msrs->controls[i].addr);
 			release_evntsel_nmi(msrs->controls[i].addr);
 	}
 	}
 }
 }

+ 42 - 53
arch/x86/oprofile/op_model_ppro.c

@@ -10,6 +10,7 @@
  * @author Philippe Elie
  * @author Philippe Elie
  * @author Graydon Hoare
  * @author Graydon Hoare
  * @author Andi Kleen
  * @author Andi Kleen
+ * @author Robert Richter <robert.richter@amd.com>
  */
  */
 
 
 #include <linux/oprofile.h>
 #include <linux/oprofile.h>
@@ -18,7 +19,6 @@
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
-#include <asm/perf_counter.h>
 
 
 #include "op_x86_model.h"
 #include "op_x86_model.h"
 #include "op_counter.h"
 #include "op_counter.h"
@@ -26,20 +26,7 @@
 static int num_counters = 2;
 static int num_counters = 2;
 static int counter_width = 32;
 static int counter_width = 32;
 
 
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT(val, e) (val |= e)
+#define MSR_PPRO_EVENTSEL_RESERVED	((0xFFFFFFFFULL<<32)|(1ULL<<21))
 
 
 static u64 *reset_value;
 static u64 *reset_value;
 
 
@@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 }
 }
 
 
 
 
-static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
+			    struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	if (!reset_value) {
 	if (!reset_value) {
@@ -94,35 +82,29 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 
 
 	/* clear all counters */
 	/* clear all counters */
 	for (i = 0 ; i < num_counters; ++i) {
 	for (i = 0 ; i < num_counters; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->controls[i].addr))
 			continue;
 			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_CLEAR(low);
-		CTRL_WRITE(low, high, msrs, i);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= model->reserved;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 	}
 
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+		if (unlikely(!msrs->counters[i].addr))
 			continue;
 			continue;
 		wrmsrl(msrs->counters[i].addr, -1LL);
 		wrmsrl(msrs->counters[i].addr, -1LL);
 	}
 	}
 
 
 	/* enable active counters */
 	/* enable active counters */
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+		if (counter_config[i].enabled && msrs->counters[i].addr) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
-
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-
-			CTRL_READ(low, high, msrs, i);
-			CTRL_CLEAR(low);
-			CTRL_SET_ENABLE(low);
-			CTRL_SET_USR(low, counter_config[i].user);
-			CTRL_SET_KERN(low, counter_config[i].kernel);
-			CTRL_SET_UM(low, counter_config[i].unit_mask);
-			CTRL_SET_EVENT(low, counter_config[i].event);
-			CTRL_WRITE(low, high, msrs, i);
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			wrmsrl(msrs->controls[i].addr, val);
 		} else {
 		} else {
 			reset_value[i] = 0;
 			reset_value[i] = 0;
 		}
 		}
@@ -147,10 +129,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
 		rdmsrl(msrs->counters[i].addr, val);
-		if (CTR_OVERFLOWED(val)) {
-			oprofile_add_sample(regs, i);
-			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-		}
+		if (val & (1ULL << (counter_width - 1)))
+			continue;
+		oprofile_add_sample(regs, i);
+		wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 	}
 	}
 
 
 out:
 out:
@@ -171,16 +153,16 @@ out:
 
 
 static void ppro_start(struct op_msrs const * const msrs)
 static void ppro_start(struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	if (!reset_value)
 	if (!reset_value)
 		return;
 		return;
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 		if (reset_value[i]) {
-			CTRL_READ(low, high, msrs, i);
-			CTRL_SET_ACTIVE(low);
-			CTRL_WRITE(low, high, msrs, i);
+			rdmsrl(msrs->controls[i].addr, val);
+			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+			wrmsrl(msrs->controls[i].addr, val);
 		}
 		}
 	}
 	}
 }
 }
@@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 
 
 static void ppro_stop(struct op_msrs const * const msrs)
 static void ppro_stop(struct op_msrs const * const msrs)
 {
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 	int i;
 
 
 	if (!reset_value)
 	if (!reset_value)
@@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 		if (!reset_value[i])
 			continue;
 			continue;
-		CTRL_READ(low, high, msrs, i);
-		CTRL_SET_INACTIVE(low);
-		CTRL_WRITE(low, high, msrs, i);
+		rdmsrl(msrs->controls[i].addr, val);
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(msrs->controls[i].addr, val);
 	}
 	}
 }
 }
 
 
@@ -207,11 +189,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 	int i;
 	int i;
 
 
 	for (i = 0 ; i < num_counters ; ++i) {
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTR_IS_RESERVED(msrs, i))
+		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 	}
 	}
 	for (i = 0 ; i < num_counters ; ++i) {
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (CTRL_IS_RESERVED(msrs, i))
+		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
 	}
 	if (reset_value) {
 	if (reset_value) {
@@ -221,9 +203,10 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 }
 }
 
 
 
 
-struct op_x86_model_spec op_ppro_spec = {
-	.num_counters		= 2,	/* can be overriden */
-	.num_controls		= 2,	/* dito */
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters		= 2,
+	.num_controls		= 2,
+	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
 	.setup_ctrs		= &ppro_setup_ctrs,
 	.check_ctrs		= &ppro_check_ctrs,
 	.check_ctrs		= &ppro_check_ctrs,
@@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = {
  * the specific CPU.
  * the specific CPU.
  */
  */
 
 
-void arch_perfmon_setup_counters(void)
+static void arch_perfmon_setup_counters(void)
 {
 {
 	union cpuid10_eax eax;
 	union cpuid10_eax eax;
 
 
@@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void)
 
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
-	op_ppro_spec.num_counters = num_counters;
-	op_ppro_spec.num_controls = num_counters;
+}
+
+static int arch_perfmon_init(struct oprofile_operations *ignore)
+{
+	arch_perfmon_setup_counters();
+	return 0;
 }
 }
 
 
 struct op_x86_model_spec op_arch_perfmon_spec = {
 struct op_x86_model_spec op_arch_perfmon_spec = {
+	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
+	.init			= &arch_perfmon_init,
 	/* num_counters/num_controls filled in at runtime */
 	/* num_counters/num_controls filled in at runtime */
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	/* user space does the cpuid check for available events */
 	/* user space does the cpuid check for available events */

+ 27 - 20
arch/x86/oprofile/op_x86_model.h

@@ -6,19 +6,18 @@
  * @remark Read the file COPYING
  * @remark Read the file COPYING
  *
  *
  * @author Graydon Hoare
  * @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
  */
  */
 
 
 #ifndef OP_X86_MODEL_H
 #ifndef OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 
 
-struct op_saved_msr {
-	unsigned int high;
-	unsigned int low;
-};
+#include <asm/types.h>
+#include <asm/perf_counter.h>
 
 
 struct op_msr {
 struct op_msr {
-	unsigned long addr;
-	struct op_saved_msr saved;
+	unsigned long	addr;
+	u64		saved;
 };
 };
 
 
 struct op_msrs {
 struct op_msrs {
@@ -28,29 +27,37 @@ struct op_msrs {
 
 
 struct pt_regs;
 struct pt_regs;
 
 
+struct oprofile_operations;
+
 /* The model vtable abstracts the differences between
 /* The model vtable abstracts the differences between
  * various x86 CPU models' perfctr support.
  * various x86 CPU models' perfctr support.
  */
  */
 struct op_x86_model_spec {
 struct op_x86_model_spec {
-	int (*init)(struct oprofile_operations *ops);
-	void (*exit)(void);
-	unsigned int num_counters;
-	unsigned int num_controls;
-	void (*fill_in_addresses)(struct op_msrs * const msrs);
-	void (*setup_ctrs)(struct op_msrs const * const msrs);
-	int (*check_ctrs)(struct pt_regs * const regs,
-		struct op_msrs const * const msrs);
-	void (*start)(struct op_msrs const * const msrs);
-	void (*stop)(struct op_msrs const * const msrs);
-	void (*shutdown)(struct op_msrs const * const msrs);
+	unsigned int	num_counters;
+	unsigned int	num_controls;
+	u64		reserved;
+	u16		event_mask;
+	int		(*init)(struct oprofile_operations *ops);
+	void		(*exit)(void);
+	void		(*fill_in_addresses)(struct op_msrs * const msrs);
+	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
+				      struct op_msrs const * const msrs);
+	int		(*check_ctrs)(struct pt_regs * const regs,
+				      struct op_msrs const * const msrs);
+	void		(*start)(struct op_msrs const * const msrs);
+	void		(*stop)(struct op_msrs const * const msrs);
+	void		(*shutdown)(struct op_msrs const * const msrs);
 };
 };
 
 
-extern struct op_x86_model_spec op_ppro_spec;
+struct op_counter_config;
+
+extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+			   struct op_counter_config *counter_config);
+
+extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_p4_ht2_spec;
 extern struct op_x86_model_spec const op_amd_spec;
 extern struct op_x86_model_spec const op_amd_spec;
 extern struct op_x86_model_spec op_arch_perfmon_spec;
 extern struct op_x86_model_spec op_arch_perfmon_spec;
 
 
-extern void arch_perfmon_setup_counters(void);
-
 #endif /* OP_X86_MODEL_H */
 #endif /* OP_X86_MODEL_H */

+ 15 - 1
drivers/oprofile/cpu_buffer.c

@@ -21,7 +21,6 @@
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/oprofile.h>
 #include <linux/oprofile.h>
-#include <linux/vmalloc.h>
 #include <linux/errno.h>
 #include <linux/errno.h>
 
 
 #include "event_buffer.h"
 #include "event_buffer.h"
@@ -407,6 +406,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
 	return op_cpu_buffer_add_data(entry, val);
 	return op_cpu_buffer_add_data(entry, val);
 }
 }
 
 
+int oprofile_add_data64(struct op_entry *entry, u64 val)
+{
+	if (!entry->event)
+		return 0;
+	if (op_cpu_buffer_get_size(entry) < 2)
+		/*
+		 * the function returns 0 to indicate a too small
+		 * buffer, even if there is some space left
+		 */
+		return 0;
+	if (!op_cpu_buffer_add_data(entry, (u32)val))
+		return 0;
+	return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
+}
+
 int oprofile_write_commit(struct op_entry *entry)
 int oprofile_write_commit(struct op_entry *entry)
 {
 {
 	if (!entry->event)
 	if (!entry->event)

+ 1 - 0
drivers/oprofile/oprofile_stats.c

@@ -33,6 +33,7 @@ void oprofile_reset_stats(void)
 	atomic_set(&oprofile_stats.sample_lost_no_mm, 0);
 	atomic_set(&oprofile_stats.sample_lost_no_mm, 0);
 	atomic_set(&oprofile_stats.sample_lost_no_mapping, 0);
 	atomic_set(&oprofile_stats.sample_lost_no_mapping, 0);
 	atomic_set(&oprofile_stats.event_lost_overflow, 0);
 	atomic_set(&oprofile_stats.event_lost_overflow, 0);
+	atomic_set(&oprofile_stats.bt_lost_no_mapping,0);
 }
 }
 
 
 
 

+ 1 - 1
include/linux/oprofile.h

@@ -171,7 +171,6 @@ struct op_sample;
 struct op_entry {
 struct op_entry {
 	struct ring_buffer_event *event;
 	struct ring_buffer_event *event;
 	struct op_sample *sample;
 	struct op_sample *sample;
-	unsigned long irq_flags;
 	unsigned long size;
 	unsigned long size;
 	unsigned long *data;
 	unsigned long *data;
 };
 };
@@ -180,6 +179,7 @@ void oprofile_write_reserve(struct op_entry *entry,
 			    struct pt_regs * const regs,
 			    struct pt_regs * const regs,
 			    unsigned long pc, int code, int size);
 			    unsigned long pc, int code, int size);
 int oprofile_add_data(struct op_entry *entry, unsigned long val);
 int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 int oprofile_write_commit(struct op_entry *entry);
 
 
 #endif /* OPROFILE_H */
 #endif /* OPROFILE_H */