Эх сурвалжийг харах

[ARM] tegra: SMP support

Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Erik Gilling <konkers@android.com>
Colin Cross 15 жил өмнө
parent
commit
1cea7326b3

+ 6 - 4
arch/arm/Kconfig

@@ -1112,10 +1112,11 @@ config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP ||\
 	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP ||\
 		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
 		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
-		 ARCH_U8500 || ARCH_VEXPRESS_CA9X4)
+		 ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || ARCH_TEGRA)
 	depends on GENERIC_CLOCKEVENTS
 	depends on GENERIC_CLOCKEVENTS
 	select USE_GENERIC_SMP_HELPERS
 	select USE_GENERIC_SMP_HELPERS
-	select HAVE_ARM_SCU if (ARCH_REALVIEW || ARCH_OMAP4 || ARCH_U8500 || ARCH_VEXPRESS_CA9X4)
+	select HAVE_ARM_SCU if (ARCH_REALVIEW || ARCH_OMAP4 || ARCH_U8500 || \
+		 ARCH_VEXPRESS_CA9X4 || ARCH_TEGRA)
 	help
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
 	  a system with only one CPU, like most personal computers, say N. If
@@ -1185,9 +1186,10 @@ config LOCAL_TIMERS
 	bool "Use local timer interrupts"
 	bool "Use local timer interrupts"
 	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || \
 	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || \
 		REALVIEW_EB_A9MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
 		REALVIEW_EB_A9MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
-		ARCH_U8500 || ARCH_VEXPRESS_CA9X4)
+		ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || ARCH_TEGRA)
 	default y
 	default y
-	select HAVE_ARM_TWD if (ARCH_REALVIEW || ARCH_VEXPRESS || ARCH_OMAP4 || ARCH_U8500)
+	select HAVE_ARM_TWD if (ARCH_REALVIEW || ARCH_VEXPRESS || ARCH_OMAP4 || \\
+		ARCH_U8500 || ARCH_TEGRA
 	help
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
 	  legacy IPI broadcast method.  Local timers allows the system

+ 2 - 0
arch/arm/mach-tegra/Makefile

@@ -3,3 +3,5 @@ obj-y                                   += io.o
 obj-y                                   += irq.o
 obj-y                                   += irq.o
 obj-y                                   += clock.o
 obj-y                                   += clock.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)         += tegra2_clocks.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)         += tegra2_clocks.o
+obj-$(CONFIG_SMP)                       += platsmp.o localtimer.o headsmp.o
+obj-$(CONFIG_HOTPLUG_CPU)               += hotplug.o

+ 61 - 0
arch/arm/mach-tegra/headsmp.S

@@ -0,0 +1,61 @@
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+        .section ".text.head", "ax"
+	__CPUINIT
+
+/*
+ * Tegra specific entry point for secondary CPUs.
+ *   The secondary kernel init calls v7_flush_dcache_all before it enables
+ *   the L1; however, the L1 comes out of reset in an undefined state, so
+ *   the clean + invalidate performed by v7_flush_dcache_all causes a bunch
+ *   of cache lines with uninitialized data and uninitialized tags to get
+ *   written out to memory, which does really unpleasant things to the main
+ *   processor.  We fix this by performing an invalidate, rather than a
+ *   clean + invalidate, before jumping into the kernel.
+ */
+ENTRY(v7_invalidate_l1)
+        mov     r0, #0
+        mcr     p15, 2, r0, c0, c0, 0
+        mrc     p15, 1, r0, c0, c0, 0
+
+        ldr     r1, =0x7fff
+        and     r2, r1, r0, lsr #13
+
+        ldr     r1, =0x3ff
+
+        and     r3, r1, r0, lsr #3  @ NumWays - 1
+        add     r2, r2, #1          @ NumSets
+
+        and     r0, r0, #0x7
+        add     r0, r0, #4          @ SetShift
+
+        clz     r1, r3              @ WayShift
+        add     r4, r3, #1          @ NumWays
+1:      sub     r2, r2, #1          @ NumSets--
+        mov     r3, r4              @ Temp = NumWays
+2:      subs    r3, r3, #1          @ Temp--
+        mov     r5, r3, lsl r1
+        mov     r6, r2, lsl r0
+        orr     r5, r5, r6          @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+        mcr     p15, 0, r5, c7, c6, 2
+        bgt     2b
+        cmp     r2, #0
+        bgt     1b
+        dsb
+        isb
+        mov     pc, lr
+ENDPROC(v7_invalidate_l1)
+
+ENTRY(tegra_secondary_startup)
+	msr	cpsr_fsxc, #0xd3
+        bl      v7_invalidate_l1
+	mrc	p15, 0, r0, c0, c0, 5
+        and	r0, r0, #15
+        ldr     r1, =0x6000f100
+        str     r0, [r1]
+1:      ldr     r2, [r1]
+        cmp     r0, r2
+        beq     1b
+        b       secondary_startup
+ENDPROC(tegra_secondary_startup)

+ 140 - 0
arch/arm/mach-tegra/hotplug.c

@@ -0,0 +1,140 @@
+/*
+ *  linux/arch/arm/mach-realview/hotplug.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+
+#include <asm/cacheflush.h>
+
+static DECLARE_COMPLETION(cpu_killed);
+
+static inline void cpu_enter_lowpower(void)
+{
+	unsigned int v;
+
+	flush_cache_all();
+	asm volatile(
+	"	mcr	p15, 0, %1, c7, c5, 0\n"
+	"	mcr	p15, 0, %1, c7, c10, 4\n"
+	/*
+	 * Turn off coherency
+	 */
+	"	mrc	p15, 0, %0, c1, c0, 1\n"
+	"	bic	%0, %0, #0x20\n"
+	"	mcr	p15, 0, %0, c1, c0, 1\n"
+	"	mrc	p15, 0, %0, c1, c0, 0\n"
+	"	bic	%0, %0, #0x04\n"
+	"	mcr	p15, 0, %0, c1, c0, 0\n"
+	  : "=&r" (v)
+	  : "r" (0)
+	  : "cc");
+}
+
+static inline void cpu_leave_lowpower(void)
+{
+	unsigned int v;
+
+	asm volatile(
+	"mrc	p15, 0, %0, c1, c0, 0\n"
+	"	orr	%0, %0, #0x04\n"
+	"	mcr	p15, 0, %0, c1, c0, 0\n"
+	"	mrc	p15, 0, %0, c1, c0, 1\n"
+	"	orr	%0, %0, #0x20\n"
+	"	mcr	p15, 0, %0, c1, c0, 1\n"
+	  : "=&r" (v)
+	  :
+	  : "cc");
+}
+
+static inline void platform_do_lowpower(unsigned int cpu)
+{
+	/*
+	 * there is no power-control hardware on this platform, so all
+	 * we can do is put the core into WFI; this is safe as the calling
+	 * code will have already disabled interrupts
+	 */
+	for (;;) {
+		/*
+		 * here's the WFI
+		 */
+		asm(".word	0xe320f003\n"
+		    :
+		    :
+		    : "memory", "cc");
+
+		/*if (pen_release == cpu) {*/
+			/*
+			 * OK, proper wakeup, we're done
+			 */
+			break;
+		/*}*/
+
+		/*
+		 * getting here, means that we have come out of WFI without
+		 * having been woken up - this shouldn't happen
+		 *
+		 * The trouble is, letting people know about this is not really
+		 * possible, since we are currently running incoherently, and
+		 * therefore cannot safely call printk() or anything else
+		 */
+#ifdef DEBUG
+		printk(KERN_WARN "CPU%u: spurious wakeup call\n", cpu);
+#endif
+	}
+}
+
+int platform_cpu_kill(unsigned int cpu)
+{
+	return wait_for_completion_timeout(&cpu_killed, 5000);
+}
+
+/*
+ * platform-specific code to shutdown a CPU
+ *
+ * Called with IRQs disabled
+ */
+void platform_cpu_die(unsigned int cpu)
+{
+#ifdef DEBUG
+	unsigned int this_cpu = hard_smp_processor_id();
+
+	if (cpu != this_cpu) {
+		printk(KERN_CRIT "Eek! platform_cpu_die running on %u, should be %u\n",
+			   this_cpu, cpu);
+		BUG();
+	}
+#endif
+
+	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
+	complete(&cpu_killed);
+
+	/*
+	 * we're ready for shutdown now, so do it
+	 */
+	cpu_enter_lowpower();
+	platform_do_lowpower(cpu);
+
+	/*
+	 * bring this CPU back into the world of cache
+	 * coherency, and then restore interrupts
+	 */
+	cpu_leave_lowpower();
+}
+
+int platform_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * we don't allow CPU 0 to be shutdown (it is still too special
+	 * e.g. clock tick interrupts)
+	 */
+	return cpu == 0 ? -EPERM : 0;
+}

+ 30 - 0
arch/arm/mach-tegra/include/mach/smp.h

@@ -0,0 +1,30 @@
+#ifndef ASMARM_ARCH_SMP_H
+#define ASMARM_ARCH_SMP_H
+
+
+#include <asm/hardware/gic.h>
+
+#define hard_smp_processor_id()			\
+	({						\
+		unsigned int cpunum;			\
+		__asm__("mrc p15, 0, %0, c0, c0, 5"	\
+			: "=r" (cpunum));		\
+		cpunum &= 0x0F;				\
+	})
+
+/*
+ * We use IRQ1 as the IPI
+ */
+static inline void smp_cross_call(const struct cpumask *mask)
+{
+	gic_raise_softirq(mask, 1);
+}
+
+/*
+ * Do nothing on MPcore.
+ */
+static inline void smp_cross_call_done(cpumask_t callmap)
+{
+}
+
+#endif

+ 25 - 0
arch/arm/mach-tegra/localtimer.c

@@ -0,0 +1,25 @@
+/*
+ *  arch/arm/mach-tegra/localtimer.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/clockchips.h>
+#include <asm/irq.h>
+#include <asm/smp_twd.h>
+#include <asm/localtimer.h>
+
+/*
+ * Setup the local clock events for a CPU.
+ */
+void __cpuinit local_timer_setup(struct clock_event_device *evt)
+{
+	evt->irq = IRQ_LOCALTIMER;
+	twd_timer_setup(evt);
+}

+ 156 - 0
arch/arm/mach-tegra/platsmp.c

@@ -0,0 +1,156 @@
+/*
+ *  linux/arch/arm/mach-tegra/platsmp.c
+ *
+ *  Copyright (C) 2002 ARM Ltd.
+ *  All Rights Reserved
+ *
+ *  Copyright (C) 2009 Palm
+ *  All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/smp.h>
+#include <linux/io.h>
+
+#include <asm/cacheflush.h>
+#include <mach/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/localtimer.h>
+#include <asm/smp_scu.h>
+
+#include <mach/iomap.h>
+
+extern void tegra_secondary_startup(void);
+
+static DEFINE_SPINLOCK(boot_lock);
+static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
+
+#define EVP_CPU_RESET_VECTOR \
+	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
+#define CLK_RST_CONTROLLER_CLK_CPU_CMPLX \
+	(IO_ADDRESS(TEGRA_CLK_RESET_BASE) + 0x4c)
+#define CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR \
+	(IO_ADDRESS(TEGRA_CLK_RESET_BASE) + 0x344)
+
+void __cpuinit platform_secondary_init(unsigned int cpu)
+{
+	trace_hardirqs_off();
+
+	/*
+	 * if any interrupts are already enabled for the primary
+	 * core (e.g. timer irq), then they will not have been enabled
+	 * for us: do so
+	 */
+	gic_cpu_init(0, IO_ADDRESS(TEGRA_ARM_PERIF_BASE) + 0x100);
+
+	/*
+	 * Synchronise with the boot thread.
+	 */
+	spin_lock(&boot_lock);
+	spin_unlock(&boot_lock);
+}
+
+int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned long old_boot_vector;
+	unsigned long boot_vector;
+	unsigned long timeout;
+	u32 reg;
+
+	/*
+	 * set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	spin_lock(&boot_lock);
+
+
+	/* set the reset vector to point to the secondary_startup routine */
+
+	boot_vector = virt_to_phys(tegra_secondary_startup);
+	old_boot_vector = readl(EVP_CPU_RESET_VECTOR);
+	writel(boot_vector, EVP_CPU_RESET_VECTOR);
+
+	/* enable cpu clock on cpu1 */
+	reg = readl(CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
+	writel(reg & ~(1<<9), CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
+
+	reg = (1<<13) | (1<<9) | (1<<5) | (1<<1);
+	writel(reg, CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR);
+
+	smp_wmb();
+	flush_cache_all();
+
+	/* unhalt the cpu */
+	writel(0, IO_ADDRESS(TEGRA_FLOW_CTRL_BASE) + 0x14);
+
+	timeout = jiffies + (1 * HZ);
+	while (time_before(jiffies, timeout)) {
+		if (readl(EVP_CPU_RESET_VECTOR) != boot_vector)
+			break;
+		udelay(10);
+	}
+
+	/* put the old boot vector back */
+	writel(old_boot_vector, EVP_CPU_RESET_VECTOR);
+
+	/*
+	 * now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	spin_unlock(&boot_lock);
+
+	return 0;
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+	unsigned int i, ncores = scu_get_core_count(scu_base);
+
+	for (i = 0; i < ncores; i++)
+		cpu_set(i, cpu_possible_map);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	unsigned int ncores = scu_get_core_count(scu_base);
+	unsigned int cpu = smp_processor_id();
+	int i;
+
+	smp_store_cpu_info(cpu);
+
+	/*
+	 * are we trying to boot more cores than exist?
+	 */
+	if (max_cpus > ncores)
+		max_cpus = ncores;
+
+	/*
+	 * Initialise the present map, which describes the set of CPUs
+	 * actually populated at the present time.
+	 */
+	for (i = 0; i < max_cpus; i++)
+		set_cpu_present(i, true);
+
+	/*
+	 * Initialise the SCU if there are more than one CPU and let
+	 * them know where to start. Note that, on modern versions of
+	 * MILO, the "poke" doesn't actually do anything until each
+	 * individual core is sent a soft interrupt to get it out of
+	 * WFI
+	 */
+	if (max_cpus > 1) {
+		percpu_timer_setup();
+		scu_enable(scu_base);
+	}
+}