Browse Source

Merge tag 'metag-for-v3.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag

Pull arch/metag update from James Hogan:

 - Various fixes for the interrupting perf counter handling in metag's
   perf backend.

 - Add OProfile support based on perf.

 - Sets up cache partitions for SMP so bootloader doesn't have to.

 - Patch from Paul Bolle to remove ARCH_POPULATES_NODE_MAP again
   (touches microblaze too).

 - Add TLS pointer regset to metag ptrace api.

 - Add exported metag DSP extended context handling header <asm/ech.h>.

 - Increase defconfig log buffer size to 128KiB.

 - Various fixes, typos, missing exports.

* tag 'metag-for-v3.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag:
  metag: defconfigs: increase log buffer 8KiB => 128KiB
  metag: avoid unnecessary builtin dtb rebuilds
  metag: add exported <asm/ech.h> for extended context handling
  metag: export _metag_da_present and cpu_2_hwthread_id
  metag: ptrace: Implement NT_METAG_TLS
  memblock: Kill ARCH_POPULATES_NODE_MAP once more
  metag: cachepart: fix get_global_dcache_size() typo
  metag: cachepart: take into account small cache bits
  metag: smp: copy cache partition and enable GCOn
  metag: OProfile support
  metag: perf: prepare for use by oprofile
  metag: perf: don't reset TXTACTCYC
  metag: perf: use hard_processor_id() to get thread
  metag: perf: fix frequency sampling (dynamic period)
  metag: perf: add missing prev_count updates
  metag: perf: fixes for interrupting perf counters
  metag: perf: fix wrap handling in delta calculation
  metag: perf: fix core internal / perf channel mux
Linus Torvalds 12 years ago
parent
commit
87c1f0f8c9

+ 4 - 0
arch/metag/Kconfig

@@ -25,6 +25,7 @@ config METAG
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
@@ -209,6 +210,9 @@ config METAG_PERFCOUNTER_IRQS
 	  When disabled, Performance Counters information will be collected
 	  based on Timer Interrupt.
 
+config HW_PERF_EVENTS
+	def_bool METAG_PERFCOUNTER_IRQS && PERF_EVENTS
+
 config METAG_DA
 	bool "DA support"
 	help

+ 2 - 0
arch/metag/Makefile

@@ -49,6 +49,8 @@ core-y					+= arch/metag/mm/
 libs-y					+= arch/metag/lib/
 libs-y					+= arch/metag/tbx/
 
+drivers-$(CONFIG_OPROFILE)		+= arch/metag/oprofile/
+
 boot					:= arch/metag/boot
 
 boot_targets				+= uImage

+ 7 - 3
arch/metag/boot/dts/Makefile

@@ -4,13 +4,17 @@ dtb-y	+= skeleton.dtb
 builtindtb-y				:= skeleton
 
 ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"")
-	builtindtb-y			:= $(CONFIG_METAG_BUILTIN_DTB_NAME)
+	builtindtb-y			:= $(patsubst "%",%,$(CONFIG_METAG_BUILTIN_DTB_NAME))
 endif
-obj-$(CONFIG_METAG_BUILTIN_DTB)	+= $(patsubst "%",%,$(builtindtb-y)).dtb.o
+
+dtb-$(CONFIG_METAG_BUILTIN_DTB)	+= $(builtindtb-y).dtb
+obj-$(CONFIG_METAG_BUILTIN_DTB)	+= $(builtindtb-y).dtb.o
 
 targets	+= dtbs
 targets	+= $(dtb-y)
 
+.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
+
 dtbs: $(addprefix $(obj)/, $(dtb-y))
 
-clean-files += *.dtb
+clean-files += *.dtb *.dtb.S

+ 0 - 1
arch/metag/configs/meta1_defconfig

@@ -1,6 +1,5 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
-CONFIG_LOG_BUF_SHIFT=13
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_KALLSYMS_ALL=y

+ 0 - 1
arch/metag/configs/meta2_defconfig

@@ -1,7 +1,6 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=13
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_KALLSYMS_ALL=y

+ 0 - 1
arch/metag/configs/meta2_smp_defconfig

@@ -1,7 +1,6 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=13
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_KALLSYMS_ALL=y

+ 3 - 0
arch/metag/include/asm/metag_mem.h

@@ -700,6 +700,9 @@
 #define     SYSC_xCPARTG_AND_S    8
 #define     SYSC_xCPARTL_OR_BITS  0x000F0000 /* Ors into top 4 bits */
 #define     SYSC_xCPARTL_OR_S     16
+#ifdef METAC_2_1
+#define     SYSC_DCPART_GCON_BIT  0x00100000 /* Coherent shared local */
+#endif /* METAC_2_1 */
 #define     SYSC_xCPARTG_OR_BITS  0x0F000000 /* Ors into top 4 bits */
 #define     SYSC_xCPARTG_OR_S     24
 #define     SYSC_CWRMODE_BIT      0x80000000 /* Write cache mode bit */

+ 1 - 0
arch/metag/include/uapi/asm/Kbuild

@@ -2,6 +2,7 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 header-y += byteorder.h
+header-y += ech.h
 header-y += ptrace.h
 header-y += resource.h
 header-y += sigcontext.h

+ 15 - 0
arch/metag/include/uapi/asm/ech.h

@@ -0,0 +1,15 @@
+#ifndef _UAPI_METAG_ECH_H
+#define _UAPI_METAG_ECH_H
+
+/*
+ * These bits can be set in the top half of the D0.8 register when DSP context
+ * switching is enabled, in order to support partial DSP context save/restore.
+ */
+
+#define TBICTX_XEXT_BIT	0x1000	/* Enable extended context save */
+#define TBICTX_XTDP_BIT	0x0800	/* DSP accumulators/RAM/templates */
+#define TBICTX_XHL2_BIT	0x0400	/* Hardware loops */
+#define TBICTX_XAXX_BIT	0x0200	/* Extended AX registers (A*.4-7) */
+#define TBICTX_XDX8_BIT	0x0100	/* Extended DX registers (D*.8-15) */
+
+#endif /* _UAPI_METAG_ECH_H */

+ 11 - 5
arch/metag/kernel/cachepart.c

@@ -24,15 +24,21 @@
 unsigned int get_dcache_size(void)
 {
 	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
-	return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
-				>> METAC_CORECFG2_DCSZ_S);
+	unsigned int sz = 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
+				     >> METAC_CORECFG2_DCSZ_S);
+	if (config2 & METAC_CORECFG2_DCSMALL_BIT)
+		sz >>= 6;
+	return sz;
 }
 
 unsigned int get_icache_size(void)
 {
 	unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
-	return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
-				>> METAC_CORE_C2ICSZ_S);
+	unsigned int sz = 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
+				     >> METAC_CORE_C2ICSZ_S);
+	if (config2 & METAC_CORECFG2_ICSMALL_BIT)
+		sz >>= 6;
+	return sz;
 }
 
 unsigned int get_global_dcache_size(void)
@@ -61,7 +67,7 @@ static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
 		return 0;
 #if PAGE_OFFSET >= LINGLOBAL_BASE
 	/* Checking for global cache */
-	cache_size = (cache == DCACHE ? get_global_dache_size() :
+	cache_size = (cache == DCACHE ? get_global_dcache_size() :
 		get_global_icache_size());
 	offset = 8;
 #else

+ 2 - 0
arch/metag/kernel/da.c

@@ -5,12 +5,14 @@
  */
 
 
+#include <linux/export.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <asm/da.h>
 #include <asm/metag_mem.h>
 
 bool _metag_da_present;
+EXPORT_SYMBOL_GPL(_metag_da_present);
 
 int __init metag_da_probe(void)
 {

+ 8 - 0
arch/metag/kernel/head.S

@@ -1,6 +1,7 @@
 	! Copyright 2005,2006,2007,2009 Imagination Technologies
 
 #include <linux/init.h>
+#include <asm/metag_mem.h>
 #include <generated/asm-offsets.h>
 #undef __exit
 
@@ -48,6 +49,13 @@ __exit:
 	.global _secondary_startup
 	.type _secondary_startup,function
 _secondary_startup:
+#if CONFIG_PAGE_OFFSET < LINGLOBAL_BASE
+	! In case GCOn has just been turned on we need to fence any writes that
+	! the boot thread might have performed prior to coherency taking effect.
+	MOVT	D0Re0,#HI(LINSYSEVENT_WR_ATOMIC_UNLOCK)
+	MOV	D1Re0,#0
+	SETD	[D0Re0], D1Re0
+#endif
 	MOVT	A0StP,#HI(_secondary_data_stack)
 	ADD	A0StP,A0StP,#LO(_secondary_data_stack)
 	GETD	A0StP,[A0StP]

+ 51 - 23
arch/metag/kernel/perf/perf_event.c

@@ -22,9 +22,9 @@
 #include <linux/slab.h>
 
 #include <asm/core_reg.h>
-#include <asm/hwthread.h>
 #include <asm/io.h>
 #include <asm/irq.h>
+#include <asm/processor.h>
 
 #include "perf_event.h"
 
@@ -40,10 +40,10 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 /* PMU admin */
 const char *perf_pmu_name(void)
 {
-	if (metag_pmu)
-		return metag_pmu->pmu.name;
+	if (!metag_pmu)
+		return NULL;
 
-	return NULL;
+	return metag_pmu->name;
 }
 EXPORT_SYMBOL_GPL(perf_pmu_name);
 
@@ -171,6 +171,7 @@ static int metag_pmu_event_init(struct perf_event *event)
 	switch (event->attr.type) {
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_RAW:
 		err = _hw_perf_event_init(event);
 		break;
 
@@ -211,9 +212,10 @@ again:
 	/*
 	 * Calculate the delta and add it to the counter.
 	 */
-	delta = new_raw_count - prev_raw_count;
+	delta = (new_raw_count - prev_raw_count) & MAX_PERIOD;
 
 	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 }
 
 int metag_pmu_event_set_period(struct perf_event *event,
@@ -223,6 +225,10 @@ int metag_pmu_event_set_period(struct perf_event *event,
 	s64 period = hwc->sample_period;
 	int ret = 0;
 
+	/* The period may have been changed */
+	if (unlikely(period != hwc->last_period))
+		left += period - hwc->last_period;
+
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
@@ -240,8 +246,10 @@ int metag_pmu_event_set_period(struct perf_event *event,
 	if (left > (s64)metag_pmu->max_period)
 		left = metag_pmu->max_period;
 
-	if (metag_pmu->write)
-		metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+	if (metag_pmu->write) {
+		local64_set(&hwc->prev_count, -(s32)left);
+		metag_pmu->write(idx, -left & MAX_PERIOD);
+	}
 
 	perf_event_update_userpage(event);
 
@@ -549,6 +557,10 @@ static int _hw_perf_event_init(struct perf_event *event)
 		if (err)
 			return err;
 		break;
+
+	case PERF_TYPE_RAW:
+		mapping = attr->config;
+		break;
 	}
 
 	/* Return early if the event is unsupported */
@@ -610,15 +622,13 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
 		WARN_ONCE((config != 0x100),
 			"invalid configuration (%d) for counter (%d)\n",
 			config, idx);
-
-		/* Reset the cycle count */
-		__core_reg_set(TXTACTCYC, 0);
+		local64_set(&event->prev_count, __core_reg_get(TXTACTCYC));
 		goto unlock;
 	}
 
 	/* Check for a core internal or performance channel event. */
 	if (tmp) {
-		void *perf_addr = (void *)PERF_COUNT(idx);
+		void *perf_addr;
 
 		/*
 		 * Anything other than a cycle count will write the low-
@@ -632,9 +642,14 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
 		case 0xf0:
 			perf_addr = (void *)PERF_CHAN(idx);
 			break;
+
+		default:
+			perf_addr = NULL;
+			break;
 		}
 
-		metag_out32((tmp & 0x0f), perf_addr);
+		if (perf_addr)
+			metag_out32((config & 0x0f), perf_addr);
 
 		/*
 		 * Now we use the high nibble as the performance event to
@@ -643,13 +658,21 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
 		config = tmp >> 4;
 	}
 
-	/*
-	 * Enabled counters start from 0. Early cores clear the count on
-	 * write but newer cores don't, so we make sure that the count is
-	 * set to 0.
-	 */
 	tmp = ((config & 0xf) << 28) |
-			((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+			((1 << 24) << hard_processor_id());
+	if (metag_pmu->max_period)
+		/*
+		 * Cores supporting overflow interrupts may have had the counter
+		 * set to a specific value that needs preserving.
+		 */
+		tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+	else
+		/*
+		 * Older cores reset the counter on write, so prev_count needs
+		 * resetting too so we can calculate a correct delta.
+		 */
+		local64_set(&event->prev_count, 0);
+
 	metag_out32(tmp, PERF_COUNT(idx));
 unlock:
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
@@ -693,9 +716,8 @@ static u64 metag_pmu_read_counter(int idx)
 {
 	u32 tmp = 0;
 
-	/* The act of reading the cycle counter also clears it */
 	if (METAG_INST_COUNTER == idx) {
-		__core_reg_swap(TXTACTCYC, tmp);
+		tmp = __core_reg_get(TXTACTCYC);
 		goto out;
 	}
 
@@ -764,10 +786,16 @@ static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
 
 	/*
 	 * Enable the counter again once core overflow processing has
-	 * completed.
+	 * completed. Note the counter value may have been modified while it was
+	 * inactive to set it up ready for the next interrupt.
 	 */
-	if (!perf_event_overflow(event, &sampledata, regs))
+	if (!perf_event_overflow(event, &sampledata, regs)) {
+		__global_lock2(flags);
+		counter = (counter & 0xff000000) |
+			  (metag_in32(PERF_COUNT(idx)) & 0x00ffffff);
 		metag_out32(counter, PERF_COUNT(idx));
+		__global_unlock2(flags);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -830,7 +858,7 @@ static int __init init_hw_perf_events(void)
 			metag_pmu->max_period = 0;
 		}
 
-		metag_pmu->name = "Meta 2";
+		metag_pmu->name = "meta2";
 		metag_pmu->version = version;
 		metag_pmu->pmu = pmu;
 	}

+ 34 - 0
arch/metag/kernel/ptrace.c

@@ -288,10 +288,36 @@ static int metag_rp_state_set(struct task_struct *target,
 	return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
 }
 
+static int metag_tls_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	void __user *tls = target->thread.tls_ptr;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+}
+
+static int metag_tls_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	void __user *tls;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+	if (ret)
+		return ret;
+
+	target->thread.tls_ptr = tls;
+	return ret;
+}
+
 enum metag_regset {
 	REGSET_GENERAL,
 	REGSET_CBUF,
 	REGSET_READPIPE,
+	REGSET_TLS,
 };
 
 static const struct user_regset metag_regsets[] = {
@@ -319,6 +345,14 @@ static const struct user_regset metag_regsets[] = {
 		.get = metag_rp_state_get,
 		.set = metag_rp_state_set,
 	},
+	[REGSET_TLS] = {
+		.core_note_type = NT_METAG_TLS,
+		.n = 1,
+		.size = sizeof(void *),
+		.align = sizeof(void *),
+		.get = metag_tls_get,
+		.set = metag_tls_set,
+	},
 };
 
 static const struct user_regset_view user_metag_view = {

+ 1 - 0
arch/metag/kernel/setup.c

@@ -124,6 +124,7 @@ struct machine_desc *machine_desc __initdata;
 u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = BAD_HWTHREAD_ID
 };
+EXPORT_SYMBOL_GPL(cpu_2_hwthread_id);
 
 /*
  * Map a hardware thread ID to a Linux CPU number

+ 115 - 0
arch/metag/kernel/smp.c

@@ -28,6 +28,8 @@
 #include <asm/cachepart.h>
 #include <asm/core_reg.h>
 #include <asm/cpu.h>
+#include <asm/global_lock.h>
+#include <asm/metag_mem.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -37,6 +39,9 @@
 #include <asm/hwthread.h>
 #include <asm/traps.h>
 
+#define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
+#define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
+
 DECLARE_PER_CPU(PTBI, pTBI);
 
 void *secondary_data_stack;
@@ -99,6 +104,114 @@ int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
 	return 0;
 }
 
+/**
+ * describe_cachepart_change: describe a change to cache partitions.
+ * @thread:	Hardware thread number.
+ * @label:	Label of cache type, e.g. "dcache" or "icache".
+ * @sz:		Total size of the cache.
+ * @old:	Old cache partition configuration (*CPART* register).
+ * @new:	New cache partition configuration (*CPART* register).
+ *
+ * If the cache partition has changed, prints a message to the log describing
+ * those changes.
+ */
+static __cpuinit void describe_cachepart_change(unsigned int thread,
+						const char *label,
+						unsigned int sz,
+						unsigned int old,
+						unsigned int new)
+{
+	unsigned int lor1, land1, gor1, gand1;
+	unsigned int lor2, land2, gor2, gand2;
+	unsigned int diff = old ^ new;
+
+	if (!diff)
+		return;
+
+	pr_info("Thread %d: %s partition changed:", thread, label);
+	if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
+		lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
+		lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
+		land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
+		land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
+		pr_cont(" L:%#x+%#x->%#x+%#x",
+			(lor1 * sz) >> 4,
+			((land1 + 1) * sz) >> 4,
+			(lor2 * sz) >> 4,
+			((land2 + 1) * sz) >> 4);
+	}
+	if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
+		gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
+		gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
+		gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
+		gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
+		pr_cont(" G:%#x+%#x->%#x+%#x",
+			(gor1 * sz) >> 4,
+			((gand1 + 1) * sz) >> 4,
+			(gor2 * sz) >> 4,
+			((gand2 + 1) * sz) >> 4);
+	}
+	if (diff & SYSC_CWRMODE_BIT)
+		pr_cont(" %sWR",
+			(new & SYSC_CWRMODE_BIT) ? "+" : "-");
+	if (diff & SYSC_DCPART_GCON_BIT)
+		pr_cont(" %sGCOn",
+			(new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
+	pr_cont("\n");
+}
+
+/**
+ * setup_smp_cache: ensure cache coherency for new SMP thread.
+ * @thread:	New hardware thread number.
+ *
+ * Ensures that coherency is enabled and that the threads share the same cache
+ * partitions.
+ */
+static __cpuinit void setup_smp_cache(unsigned int thread)
+{
+	unsigned int this_thread, lflags;
+	unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
+	unsigned int icsz, icpart_old, icpart_new;
+
+	/*
+	 * Copy over the current thread's cache partition configuration to the
+	 * new thread so that they share cache partitions.
+	 */
+	__global_lock2(lflags);
+	this_thread = hard_processor_id();
+	/* Share dcache partition */
+	dcpart_this = metag_in32(SYSC_DCPART(this_thread));
+	dcpart_old = metag_in32(SYSC_DCPART(thread));
+	dcpart_new = dcpart_this;
+#if PAGE_OFFSET < LINGLOBAL_BASE
+	/*
+	 * For the local data cache to be coherent the threads must also have
+	 * GCOn enabled.
+	 */
+	dcpart_new |= SYSC_DCPART_GCON_BIT;
+	metag_out32(dcpart_new, SYSC_DCPART(this_thread));
+#endif
+	metag_out32(dcpart_new, SYSC_DCPART(thread));
+	/* Share icache partition too */
+	icpart_new = metag_in32(SYSC_ICPART(this_thread));
+	icpart_old = metag_in32(SYSC_ICPART(thread));
+	metag_out32(icpart_new, SYSC_ICPART(thread));
+	__global_unlock2(lflags);
+
+	/*
+	 * Log if the cache partitions were altered so the user is aware of any
+	 * potential unintentional cache wastage.
+	 */
+	dcsz = get_dcache_size();
+	icsz = get_dcache_size();
+	describe_cachepart_change(this_thread, "dcache", dcsz,
+				  dcpart_this, dcpart_new);
+	describe_cachepart_change(thread, "dcache", dcsz,
+				  dcpart_old, dcpart_new);
+	describe_cachepart_change(thread, "icache", icsz,
+				  icpart_old, icpart_new);
+}
+
 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned int thread = cpu_2_hwthread_id[cpu];
@@ -108,6 +221,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 
 	flush_tlb_all();
 
+	setup_smp_cache(thread);
+
 	/*
 	 * Tell the secondary CPU where to find its idle thread's stack.
 	 */

+ 0 - 3
arch/metag/mm/Kconfig

@@ -98,9 +98,6 @@ config MAX_ACTIVE_REGIONS
 	default "2" if SPARSEMEM
 	default "1"
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 

+ 17 - 0
arch/metag/oprofile/Makefile

@@ -0,0 +1,17 @@
+obj-$(CONFIG_OPROFILE)	+= oprofile.o
+
+oprofile-core-y	+= buffer_sync.o
+oprofile-core-y	+= cpu_buffer.o
+oprofile-core-y	+= event_buffer.o
+oprofile-core-y	+= oprof.o
+oprofile-core-y	+= oprofile_files.o
+oprofile-core-y	+= oprofile_stats.o
+oprofile-core-y	+= oprofilefs.o
+oprofile-core-y	+= timer_int.o
+oprofile-core-$(CONFIG_HW_PERF_EVENTS)	+= oprofile_perf.o
+
+oprofile-y	+= backtrace.o
+oprofile-y	+= common.o
+oprofile-y	+= $(addprefix ../../../drivers/oprofile/,$(oprofile-core-y))
+
+ccflags-y	+= -Werror

+ 63 - 0
arch/metag/oprofile/backtrace.c

@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2010-2013 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/uaccess.h>
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+#include "backtrace.h"
+
+static void user_backtrace_fp(unsigned long __user *fp, unsigned int depth)
+{
+	while (depth-- && access_ok(VERIFY_READ, fp, 8)) {
+		unsigned long addr;
+		unsigned long __user *fpnew;
+		if (__copy_from_user_inatomic(&addr, fp + 1, sizeof(addr)))
+			break;
+		addr -= 4;
+
+		oprofile_add_trace(addr);
+
+		/* stack grows up, so frame pointers must decrease */
+		if (__copy_from_user_inatomic(&fpnew, fp + 0, sizeof(fpnew)))
+			break;
+		if (fpnew >= fp)
+			break;
+		fp = fpnew;
+	}
+}
+
+static int kernel_backtrace_frame(struct stackframe *frame, void *data)
+{
+	unsigned int *depth = data;
+
+	oprofile_add_trace(frame->pc);
+
+	/* decrement depth and stop if we reach 0 */
+	if ((*depth)-- == 0)
+		return 1;
+
+	/* otherwise onto the next frame */
+	return 0;
+}
+
+void metag_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	if (user_mode(regs)) {
+		unsigned long *fp = (unsigned long *)regs->ctx.AX[1].U0;
+		user_backtrace_fp((unsigned long __user __force *)fp, depth);
+	} else {
+		struct stackframe frame;
+		frame.fp = regs->ctx.AX[1].U0;		/* A0FrP */
+		frame.sp = user_stack_pointer(regs);	/* A0StP */
+		frame.lr = 0;				/* from stack */
+		frame.pc = regs->ctx.CurrPC;		/* PC */
+		walk_stackframe(&frame, &kernel_backtrace_frame, &depth);
+	}
+}

+ 6 - 0
arch/metag/oprofile/backtrace.h

@@ -0,0 +1,6 @@
+#ifndef _METAG_OPROFILE_BACKTRACE_H
+#define _METAG_OPROFILE_BACKTRACE_H
+
+void metag_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+#endif

+ 66 - 0
arch/metag/oprofile/common.c

@@ -0,0 +1,66 @@
+/*
+ * arch/metag/oprofile/common.c
+ *
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ *
+ * Based on arch/sh/oprofile/common.c:
+ *
+ * Copyright (C) 2003 - 2010  Paul Mundt
+ *
+ * Based on arch/mips/oprofile/common.c:
+ *
+ *	Copyright (C) 2004, 2005 Ralf Baechle
+ *	Copyright (C) 2005 MIPS Technologies, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/oprofile.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include "backtrace.h"
+
+#ifdef CONFIG_HW_PERF_EVENTS
+/*
+ * This will need to be reworked when multiple PMUs are supported.
+ */
+static char *metag_pmu_op_name;
+
+char *op_name_from_perf_id(void)
+{
+	return metag_pmu_op_name;
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	ops->backtrace = metag_backtrace;
+
+	if (perf_num_counters() == 0)
+		return -ENODEV;
+
+	metag_pmu_op_name = kasprintf(GFP_KERNEL, "metag/%s",
+				      perf_pmu_name());
+	if (unlikely(!metag_pmu_op_name))
+		return -ENOMEM;
+
+	return oprofile_perf_init(ops);
+}
+
+void oprofile_arch_exit(void)
+{
+	oprofile_perf_exit();
+	kfree(metag_pmu_op_name);
+}
+#else
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	ops->backtrace = metag_backtrace;
+	/* fall back to timer interrupt PC sampling */
+	return -ENODEV;
+}
+void oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */

+ 0 - 3
arch/microblaze/Kconfig

@@ -39,9 +39,6 @@ config RWSEM_GENERIC_SPINLOCK
 config ZONE_DMA
 	def_bool y
 
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 

+ 1 - 0
include/uapi/linux/elf.h

@@ -397,6 +397,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
+#define NT_METAG_TLS	0x502		/* Metag TLS pointer */
 
 
 /* Note header in a PT_NOTE section */