11 years ago · edae583a6d
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -35,6 +35,12 @@ config ARC
 
				 	select PERF_USE_VMALLOC
			
 
				 	select HAVE_DEBUG_STACKOVERFLOW
			
 
				 
			
 
				+config TRACE_IRQFLAGS_SUPPORT
			
 
				+	def_bool y
			
 
				+
			
 
				+config LOCKDEP_SUPPORT
			
 
				+	def_bool y
			
 
				+
			
 
				 config SCHED_OMIT_FRAME_POINTER
			
 
				 	def_bool y
			
 
				 
			
@@ -130,17 +136,14 @@ if SMP
 
				 config ARC_HAS_COH_CACHES
			
 
				 	def_bool n
			
 
				 
			
 
				-config ARC_HAS_COH_RTSC
			
 
				-	def_bool n
			
 
				-
			
 
				 config ARC_HAS_REENTRANT_IRQ_LV2
			
 
				 	def_bool n
			
 
				 
			
 
				 endif
			
 
				 
			
 
				 config NR_CPUS
			
 
				-	int "Maximum number of CPUs (2-32)"
			
 
				-	range 2 32
			
 
				+	int "Maximum number of CPUs (2-4096)"
			
 
				+	range 2 4096
			
 
				 	depends on SMP
			
 
				 	default "2"
			
 
				 
			
@@ -326,8 +329,7 @@ config ARC_HAS_RTSC
 
				 	bool "Insn: RTSC (64-bit r/o cycle counter)"
			
 
				 	default y
			
 
				 	depends on ARC_CPU_REL_4_10
			
 
				-	# if SMP, enable RTSC only if counter is coherent across cores
			
 
				-	depends on !SMP || ARC_HAS_COH_RTSC
			
 
				+	depends on !SMP
			
 
				 
			
 
				 endmenu   # "ARC CPU Configuration"
			
 
				 
			
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/fpga_defconfig
@@ -2,6 +2,8 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 
				 # CONFIG_LOCALVERSION_AUTO is not set
			
 
				 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
			
 
				 # CONFIG_SWAP is not set
			
 
				+CONFIG_SYSVIPC=y
			
 
				+CONFIG_POSIX_MQUEUE=y
			
 
				 CONFIG_HIGH_RES_TIMERS=y
			
 
				 CONFIG_IKCONFIG=y
			
 
				 CONFIG_IKCONFIG_PROC=y
			
@@ -62,4 +64,5 @@ CONFIG_TMPFS=y
 
				 CONFIG_NFS_FS=y
			
 
				 # CONFIG_ENABLE_WARN_DEPRECATED is not set
			
 
				 # CONFIG_ENABLE_MUST_CHECK is not set
			
 
				+# CONFIG_DEBUG_PREEMPT is not set
			
 
				 CONFIG_XZ_DEC=y
			
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -17,13 +17,7 @@
 
				 #endif
			
 
				 
			
 
				 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
			
 
				-
			
 
				-/* For a rare case where customers have differently config I/D */
			
 
				-#define ARC_ICACHE_LINE_LEN	L1_CACHE_BYTES
			
 
				-#define ARC_DCACHE_LINE_LEN	L1_CACHE_BYTES
			
 
				-
			
 
				-#define ICACHE_LINE_MASK	(~(ARC_ICACHE_LINE_LEN - 1))
			
 
				-#define DCACHE_LINE_MASK	(~(ARC_DCACHE_LINE_LEN - 1))
			
 
				+#define CACHE_LINE_MASK		(~(L1_CACHE_BYTES - 1))
			
 
				 
			
 
				 /*
			
 
				  * ARC700 doesn't cache any access in top 256M.
			
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -18,8 +18,8 @@
 
				 
			
 
				 #include <asm-generic/irq.h>
			
 
				 
			
 
				-extern void __init arc_init_IRQ(void);
			
 
				-extern int __init get_hw_config_num_irq(void);
			
 
				+extern void arc_init_IRQ(void);
			
 
				+extern int get_hw_config_num_irq(void);
			
 
				 
			
 
				 void arc_local_timer_setup(unsigned int cpu);
			
 
				 
			
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -151,16 +151,38 @@ static inline void arch_unmask_irq(unsigned int irq)
 
				 
			
 
				 #else
			
 
				 
			
 
				+#ifdef CONFIG_TRACE_IRQFLAGS
			
 
				+
			
 
				+.macro TRACE_ASM_IRQ_DISABLE
			
 
				+	bl	trace_hardirqs_off
			
 
				+.endm
			
 
				+
			
 
				+.macro TRACE_ASM_IRQ_ENABLE
			
 
				+	bl	trace_hardirqs_on
			
 
				+.endm
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+.macro TRACE_ASM_IRQ_DISABLE
			
 
				+.endm
			
 
				+
			
 
				+.macro TRACE_ASM_IRQ_ENABLE
			
 
				+.endm
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 .macro IRQ_DISABLE  scratch
			
 
				 	lr	\scratch, [status32]
			
 
				 	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
			
 
				 	flag	\scratch
			
 
				+	TRACE_ASM_IRQ_DISABLE
			
 
				 .endm
			
 
				 
			
 
				 .macro IRQ_ENABLE  scratch
			
 
				 	lr	\scratch, [status32]
			
 
				 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
			
 
				 	flag	\scratch
			
 
				+	TRACE_ASM_IRQ_ENABLE
			
 
				 .endm
			
 
				 
			
 
				 #endif	/* __ASSEMBLY__ */
			
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -48,7 +48,7 @@
 
				 #ifndef __ASSEMBLY__
			
 
				 
			
 
				 typedef struct {
			
 
				-	unsigned long asid;	/* 8 bit MMU PID + Generation cycle */
			
 
				+	unsigned long asid[NR_CPUS];	/* 8 bit MMU PID + Generation cycle */
			
 
				 } mm_context_t;
			
 
				 
			
 
				 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
			
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,13 +30,13 @@
 
				  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
			
 
				  *
			
 
				  * Linux assigns each task a unique ASID. A simple round-robin allocation
			
 
				- * of H/w ASID is done using software tracker @asid_cache.
			
 
				+ * of H/w ASID is done using software tracker @asid_cpu.
			
 
				  * When it reaches max 255, the allocation cycle starts afresh by flushing
			
 
				  * the entire TLB and wrapping ASID back to zero.
			
 
				  *
			
 
				  * A new allocation cycle, post rollover, could potentially reassign an ASID
			
 
				  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
			
 
				- * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
			
 
				+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
			
 
				  * serve as cycle/generation indicator and natural 32 bit unsigned math
			
 
				  * automagically increments the generation when lower 8 bits rollover.
			
 
				  */
			
@@ -47,9 +47,11 @@
 
				 #define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
			
 
				 #define MM_CTXT_NO_ASID		0UL
			
 
				 
			
 
				-#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
			
 
				+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
			
 
				+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
			
 
				 
			
 
				-extern unsigned int asid_cache;
			
 
				+DECLARE_PER_CPU(unsigned int, asid_cache);
			
 
				+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
			
 
				 
			
 
				 /*
			
 
				  * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
			
@@ -57,6 +59,7 @@ extern unsigned int asid_cache;
 
				  */
			
 
				 static inline void get_new_mmu_context(struct mm_struct *mm)
			
 
				 {
			
 
				+	const unsigned int cpu = smp_processor_id();
			
 
				 	unsigned long flags;
			
 
				 
			
 
				 	local_irq_save(flags);
			
@@ -71,28 +74,28 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 
				 	 * 	 first need to destroy the context, setting it to invalid
			
 
				 	 * 	 value.
			
 
				 	 */
			
 
				-	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
			
 
				+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
			
 
				 		goto set_hw;
			
 
				 
			
 
				 	/* move to new ASID and handle rollover */
			
 
				-	if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) {
			
 
				+	if (unlikely(!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK))) {
			
 
				 
			
 
				-		flush_tlb_all();
			
 
				+		local_flush_tlb_all();
			
 
				 
			
 
				 		/*
			
 
				 		 * Above checke for rollover of 8 bit ASID in 32 bit container.
			
 
				 		 * If the container itself wrapped around, set it to a non zero
			
 
				 		 * "generation" to distinguish from no context
			
 
				 		 */
			
 
				-		if (!asid_cache)
			
 
				-			asid_cache = MM_CTXT_FIRST_CYCLE;
			
 
				+		if (!asid_cpu(cpu))
			
 
				+			asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
			
 
				 	}
			
 
				 
			
 
				 	/* Assign new ASID to tsk */
			
 
				-	mm->context.asid = asid_cache;
			
 
				+	asid_mm(mm, cpu) = asid_cpu(cpu);
			
 
				 
			
 
				 set_hw:
			
 
				-	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
			
 
				+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
			
 
				 
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
@@ -104,16 +107,45 @@ set_hw:
 
				 static inline int
			
 
				 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
			
 
				 {
			
 
				-	mm->context.asid = MM_CTXT_NO_ASID;
			
 
				+	int i;
			
 
				+
			
 
				+	for_each_possible_cpu(i)
			
 
				+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static inline void destroy_context(struct mm_struct *mm)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	/* Needed to elide CONFIG_DEBUG_PREEMPT warning */
			
 
				+	local_irq_save(flags);
			
 
				+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
			
 
				+	local_irq_restore(flags);
			
 
				+}
			
 
				+
			
 
				 /* Prepare the MMU for task: setup PID reg with allocated ASID
			
 
				     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
			
 
				 */
			
 
				 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
			
 
				 			     struct task_struct *tsk)
			
 
				 {
			
 
				+	const int cpu = smp_processor_id();
			
 
				+
			
 
				+	/*
			
 
				+	 * Note that the mm_cpumask is "aggregating" only, we don't clear it
			
 
				+	 * for the switched-out task, unlike some other arches.
			
 
				+	 * It is used to enlist cpus for sending TLB flush IPIs and not sending
			
 
				+	 * it to CPUs where a task once ran-on, could cause stale TLB entry
			
 
				+	 * re-use, specially for a multi-threaded task.
			
 
				+	 * e.g. T1 runs on C1, migrates to C3. T2 running on C2 munmaps.
			
 
				+	 *      For a non-aggregating mm_cpumask, IPI not sent C1, and if T1
			
 
				+	 *      were to re-migrate to C1, it could access the unmapped region
			
 
				+	 *      via any existing stale TLB entries.
			
 
				+	 */
			
 
				+	cpumask_set_cpu(cpu, mm_cpumask(next));
			
 
				+
			
 
				 #ifndef CONFIG_SMP
			
 
				 	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
			
 
				 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
			
@@ -131,11 +163,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
				  */
			
 
				 #define activate_mm(prev, next)		switch_mm(prev, next, NULL)
			
 
				 
			
 
				-static inline void destroy_context(struct mm_struct *mm)
			
 
				-{
			
 
				-	mm->context.asid = MM_CTXT_NO_ASID;
			
 
				-}
			
 
				-
			
 
				 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
			
 
				  * for retiring-mm. However destroy_context( ) still needs to do that because
			
 
				  * between mm_release( ) = >deactive_mm( ) and
			
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -31,7 +31,7 @@ struct cpuinfo_data {
 
				 extern int root_mountflags, end_mem;
			
 
				 extern int running_on_hw;
			
 
				 
			
 
				-void __init setup_processor(void);
			
 
				+void setup_processor(void);
			
 
				 void __init setup_arch_memory(void);
			
 
				 
			
 
				 #endif /* __ASMARC_SETUP_H */
			
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -30,7 +30,7 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
				  * APIs provided by arch SMP code to rest of arch code
			
 
				  */
			
 
				 extern void __init smp_init_cpus(void);
			
 
				-extern void __init first_lines_of_secondary(void);
			
 
				+extern void first_lines_of_secondary(void);
			
 
				 extern const char *arc_platform_smp_cpuinfo(void);
			
 
				 
			
 
				 /*
			
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
				 void local_flush_tlb_range(struct vm_area_struct *vma,
			
 
				 			   unsigned long start, unsigned long end);
			
 
				 
			
 
				-/* XXX: Revisit for SMP */
			
 
				+#ifndef CONFIG_SMP
			
 
				 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
			
 
				 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
			
 
				 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
			
 
				 #define flush_tlb_all()			local_flush_tlb_all()
			
 
				 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
			
 
				-
			
 
				+#else
			
 
				+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
			
 
				+							 unsigned long end);
			
 
				+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
			
 
				+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
			
 
				+extern void flush_tlb_all(void);
			
 
				+extern void flush_tlb_mm(struct mm_struct *mm);
			
 
				+#endif /* CONFIG_SMP */
			
 
				 #endif
			
--- a/arch/arc/include/asm/unaligned.h
+++ b/arch/arc/include/asm/unaligned.h
@@ -22,7 +22,8 @@ static inline int
 
				 misaligned_fixup(unsigned long address, struct pt_regs *regs,
			
 
				 		 struct callee_regs *cregs)
			
 
				 {
			
 
				-	return 0;
			
 
				+	/* Not fixed */
			
 
				+	return 1;
			
 
				 }
			
 
				 #endif
			
 
				 
			
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -17,6 +17,8 @@
 
				 #include <asm/asm-offsets.h>
			
 
				 #include <linux/sched.h>
			
 
				 
			
 
				+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
			
 
				+
			
 
				 struct task_struct *__sched
			
 
				 __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
			
 
				 {
			
@@ -45,7 +47,16 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 
				 #endif
			
 
				 
			
 
				 		/* set ksp of outgoing task in tsk->thread.ksp */
			
 
				+#if KSP_WORD_OFF <= 255
			
 
				 		"st.as   sp, [%3, %1]    \n\t"
			
 
				+#else
			
 
				+		/*
			
 
				+		 * Workaround for NR_CPUS=4k
			
 
				+		 * %1 is bigger than 255 (S9 offset for st.as)
			
 
				+		 */
			
 
				+		"add2    r24, %3, %1     \n\t"
			
 
				+		"st      sp, [r24]       \n\t"
			
 
				+#endif
			
 
				 
			
 
				 		"sync   \n\t"
			
 
				 
			
@@ -97,7 +108,7 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 
				 		/* FP/BLINK restore generated by gcc (standard func epilogue */
			
 
				 
			
 
				 		: "=r"(tmp)
			
 
				-		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev)
			
 
				+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
			
 
				 		: "blink"
			
 
				 	);
			
 
				 
			
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -14,6 +14,8 @@
 
				 #include <asm/asm-offsets.h>
			
 
				 #include <asm/linkage.h>
			
 
				 
			
 
				+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
			
 
				+
			
 
				 ;################### Low Level Context Switch ##########################
			
 
				 
			
 
				 	.section .sched.text,"ax",@progbits
			
@@ -28,8 +30,13 @@ __switch_to:
 
				 	SAVE_CALLEE_SAVED_KERNEL
			
 
				 
			
 
				 	/* Save the now KSP in task->thread.ksp */
			
 
				-	st.as  sp, [r0, (TASK_THREAD + THREAD_KSP)/4]
			
 
				-
			
 
				+#if KSP_WORD_OFF  <= 255
			
 
				+	st.as  sp, [r0, KSP_WORD_OFF]
			
 
				+#else
			
 
				+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
			
 
				+	add2	r24, r0, KSP_WORD_OFF
			
 
				+	st	sp, [r24]
			
 
				+#endif
			
 
				 	/*
			
 
				 	* Return last task in r0 (return reg)
			
 
				 	* On ARC, Return reg = First Arg reg = r0.
			
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -250,6 +250,14 @@ ARC_ENTRY handle_interrupt_level1
 
				 	lr  r0, [icause1]
			
 
				 	and r0, r0, 0x1f
			
 
				 
			
 
				+#ifdef CONFIG_TRACE_IRQFLAGS
			
 
				+	; icause1 needs to be read early, before calling tracing, which
			
 
				+	; can clobber scratch regs, hence use of stack to stash it
			
 
				+	push r0
			
 
				+	TRACE_ASM_IRQ_DISABLE
			
 
				+	pop  r0
			
 
				+#endif
			
 
				+
			
 
				 	bl.d  @arch_do_IRQ
			
 
				 	mov r1, sp
			
 
				 
			
@@ -337,9 +345,9 @@ ARC_ENTRY EV_TLBProtV
 
				 	;  vineetg: Mar 6th: Random Seg Fault issue #1
			
 
				 	;  ecr and efa were not saved in case an Intr sneaks in
			
 
				 	;  after fake rtie
			
 
				-	;
			
 
				+
			
 
				 	lr  r2, [ecr]
			
 
				-	lr  r1, [efa]	; Faulting Data address
			
 
				+	lr  r0, [efa]	; Faulting Data address
			
 
				 
			
 
				 	; --------(4) Return from CPU Exception Mode ---------
			
 
				 	;  Fake a rtie, but rtie to next label
			
@@ -348,6 +356,8 @@ ARC_ENTRY EV_TLBProtV
 
				 
			
 
				 	FAKE_RET_FROM_EXCPN r9
			
 
				 
			
 
				+	mov   r1, sp
			
 
				+
			
 
				 	;------ (5) Type of Protection Violation? ----------
			
 
				 	;
			
 
				 	; ProtV Hardware Exception is triggered for Access Faults of 2 types
			
@@ -358,16 +368,12 @@ ARC_ENTRY EV_TLBProtV
 
				 	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
			
 
				 
			
 
				 	;========= (6a) Access Violation Processing ========
			
 
				-	mov r0, sp              ; pt_regs
			
 
				 	bl  do_page_fault
			
 
				 	b   ret_from_exception
			
 
				 
			
 
				 	;========== (6b) Non aligned access ============
			
 
				 4:
			
 
				-	mov r0, r1
			
 
				-	mov r1, sp              ; pt_regs
			
 
				 
			
 
				-#ifdef  CONFIG_ARC_MISALIGN_ACCESS
			
 
				 	SAVE_CALLEE_SAVED_USER
			
 
				 	mov r2, sp              ; callee_regs
			
 
				 
			
@@ -376,9 +382,6 @@ ARC_ENTRY EV_TLBProtV
 
				 	; TBD: optimize - do this only if a callee reg was involved
			
 
				 	; either a dst of emulated LD/ST or src with address-writeback
			
 
				 	RESTORE_CALLEE_SAVED_USER
			
 
				-#else
			
 
				-	bl  do_misaligned_error
			
 
				-#endif
			
 
				 
			
 
				 	b   ret_from_exception
			
 
				 
			
@@ -575,6 +578,7 @@ resume_user_mode_begin:
 
				 	; --- (Slow Path #2) pending signal  ---
			
 
				 	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
			
 
				 
			
 
				+	GET_CURR_THR_INFO_FLAGS   r9
			
 
				 	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
			
 
				 
			
 
				 	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
			
@@ -640,6 +644,8 @@ resume_kernel_mode:
 
				 
			
 
				 restore_regs :
			
 
				 
			
 
				+	TRACE_ASM_IRQ_ENABLE
			
 
				+
			
 
				 	lr	r10, [status32]
			
 
				 
			
 
				 	; Restore REG File. In case multiple Events outstanding,
			
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -95,7 +95,7 @@ stext:
 
				 ;----------------------------------------------------------------
			
 
				 ;     First lines of code run by secondary before jumping to 'C'
			
 
				 ;----------------------------------------------------------------
			
 
				-	.section .init.text, "ax",@progbits
			
 
				+	.section .text, "ax",@progbits
			
 
				 	.type first_lines_of_secondary, @function
			
 
				 	.globl first_lines_of_secondary
			
 
				 
			
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -39,10 +39,14 @@ void arc_init_IRQ(void)
 
				 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ5_LV2) << 5;
			
 
				 	level_mask |= IS_ENABLED(CONFIG_ARC_IRQ6_LV2) << 6;
			
 
				 
			
 
				-	if (level_mask) {
			
 
				+	/*
			
 
				+	 * Write to register, even if no LV2 IRQs configured to reset it
			
 
				+	 * in case bootloader had mucked with it
			
 
				+	 */
			
 
				+	write_aux_reg(AUX_IRQ_LEV, level_mask);
			
 
				+
			
 
				+	if (level_mask)
			
 
				 		pr_info("Level-2 interrupts bitset %x\n", level_mask);
			
 
				-		write_aux_reg(AUX_IRQ_LEV, level_mask);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -146,7 +150,7 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 
				 	set_irq_regs(old_regs);
			
 
				 }
			
 
				 
			
 
				-int __init get_hw_config_num_irq(void)
			
 
				+int get_hw_config_num_irq(void)
			
 
				 {
			
 
				 	uint32_t val = read_aux_reg(ARC_REG_VECBASE_BCR);
			
 
				 
			
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -196,6 +196,18 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 
				 	instruction_pointer(regs) = ip;
			
 
				 }
			
 
				 
			
 
				+static void kgdb_call_nmi_hook(void *ignored)
			
 
				+{
			
 
				+	kgdb_nmicallback(raw_smp_processor_id(), NULL);
			
 
				+}
			
 
				+
			
 
				+void kgdb_roundup_cpus(unsigned long flags)
			
 
				+{
			
 
				+	local_irq_enable();
			
 
				+	smp_call_function(kgdb_call_nmi_hook, NULL, 0);
			
 
				+	local_irq_disable();
			
 
				+}
			
 
				+
			
 
				 struct kgdb_arch arch_kgdb_ops = {
			
 
				 	/* breakpoint instruction: TRAP_S 0x3 */
			
 
				 #ifdef CONFIG_CPU_BIG_ENDIAN
			
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -87,13 +87,13 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
 
				 
			
 
				 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
			
 
				 {
			
 
				-	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
			
 
				+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
			
 
				 	kcb->kprobe_status = kcb->prev_kprobe.status;
			
 
				 }
			
 
				 
			
 
				 static inline void __kprobes set_current_kprobe(struct kprobe *p)
			
 
				 {
			
 
				-	__get_cpu_var(current_kprobe) = p;
			
 
				+	__this_cpu_write(current_kprobe, p);
			
 
				 }
			
 
				 
			
 
				 static void __kprobes resume_execution(struct kprobe *p, unsigned long addr,
			
@@ -237,7 +237,7 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
 
				 
			
 
				 		return 1;
			
 
				 	} else if (kprobe_running()) {
			
 
				-		p = __get_cpu_var(current_kprobe);
			
 
				+		p = __this_cpu_read(current_kprobe);
			
 
				 		if (p->break_handler && p->break_handler(p, regs)) {
			
 
				 			setup_singlestep(p, regs);
			
 
				 			kcb->kprobe_status = KPROBE_HIT_SS;
			
--- a/arch/arc/kernel/reset.c
+++ b/arch/arc/kernel/reset.c
@@ -31,3 +31,4 @@ void machine_power_off(void)
 
				 }
			
 
				 
			
 
				 void (*pm_power_off) (void) = NULL;
			
 
				+EXPORT_SYMBOL(pm_power_off);
			
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -37,8 +37,7 @@ struct task_struct *_current_task[NR_CPUS];	/* For stack switching */
 
				 
			
 
				 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
			
 
				 
			
 
				-
			
 
				-void read_arc_build_cfg_regs(void)
			
 
				+static void read_arc_build_cfg_regs(void)
			
 
				 {
			
 
				 	struct bcr_perip uncached_space;
			
 
				 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
			
@@ -106,7 +105,7 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
 
				 	{ {0x00, NULL		} }
			
 
				 };
			
 
				 
			
 
				-char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
			
 
				+static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
			
 
				 {
			
 
				 	int n = 0;
			
 
				 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
			
@@ -171,7 +170,7 @@ static const struct id_to_str mac_mul_nm[] = {
 
				 	{0x6, "Dual 16x16 and 32x16"}
			
 
				 };
			
 
				 
			
 
				-char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
			
 
				+static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
			
 
				 {
			
 
				 	int n = 0;
			
 
				 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
			
@@ -234,7 +233,7 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 
				 	return buf;
			
 
				 }
			
 
				 
			
 
				-void arc_chk_ccms(void)
			
 
				+static void arc_chk_ccms(void)
			
 
				 {
			
 
				 #if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM)
			
 
				 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
			
@@ -269,7 +268,7 @@ void arc_chk_ccms(void)
 
				  * hardware has dedicated regs which need to be saved/restored on ctx-sw
			
 
				  * (Single Precision uses core regs), thus kernel is kind of oblivious to it
			
 
				  */
			
 
				-void arc_chk_fpu(void)
			
 
				+static void arc_chk_fpu(void)
			
 
				 {
			
 
				 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
			
 
				 
			
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -95,7 +95,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
				  *        If it turns out to be elaborate, it's better to code it in assembly
			
 
				  *
			
 
				  */
			
 
				-void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu)
			
 
				+void __weak arc_platform_smp_wait_to_boot(int cpu)
			
 
				 {
			
 
				 	/*
			
 
				 	 * As a hack for debugging - since debugger will single-step over the
			
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 
				 	atomic_inc(&mm->mm_users);
			
 
				 	atomic_inc(&mm->mm_count);
			
 
				 	current->active_mm = mm;
			
 
				+	cpumask_set_cpu(cpu, mm_cpumask(mm));
			
 
				 
			
 
				 	notify_cpu_starting(cpu);
			
 
				 	set_cpu_online(cpu, true);
			
@@ -210,7 +211,6 @@ enum ipi_msg_type {
 
				 	IPI_NOP = 0,
			
 
				 	IPI_RESCHEDULE = 1,
			
 
				 	IPI_CALL_FUNC,
			
 
				-	IPI_CALL_FUNC_SINGLE,
			
 
				 	IPI_CPU_STOP
			
 
				 };
			
 
				 
			
@@ -254,7 +254,7 @@ void smp_send_stop(void)
 
				 
			
 
				 void arch_send_call_function_single_ipi(int cpu)
			
 
				 {
			
 
				-	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
			
 
				+	ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC);
			
 
				 }
			
 
				 
			
 
				 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
			
@@ -286,10 +286,6 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu)
 
				 			generic_smp_call_function_interrupt();
			
 
				 			break;
			
 
				 
			
 
				-		case IPI_CALL_FUNC_SINGLE:
			
 
				-			generic_smp_call_function_single_interrupt();
			
 
				-			break;
			
 
				-
			
 
				 		case IPI_CPU_STOP:
			
 
				 			ipi_cpu_stop(cpu);
			
 
				 			break;
			
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -237,11 +237,14 @@ unsigned int get_wchan(struct task_struct *tsk)
 
				  */
			
 
				 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
			
 
				 {
			
 
				+	/* Assumes @tsk is sleeping so unwinds from __switch_to */
			
 
				 	arc_unwind_core(tsk, NULL, __collect_all_but_sched, trace);
			
 
				 }
			
 
				 
			
 
				 void save_stack_trace(struct stack_trace *trace)
			
 
				 {
			
 
				-	arc_unwind_core(current, NULL, __collect_all, trace);
			
 
				+	/* Pass NULL for task so it unwinds the current call frame */
			
 
				+	arc_unwind_core(NULL, NULL, __collect_all, trace);
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(save_stack_trace);
			
 
				 #endif
			
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -63,9 +63,10 @@
 
				 
			
 
				 int arc_counter_setup(void)
			
 
				 {
			
 
				-	/* RTSC insn taps into cpu clk, needs no setup */
			
 
				-
			
 
				-	/* For SMP, only allowed if cross-core-sync, hence usable as cs */
			
 
				+	/*
			
 
				+	 * For SMP this needs to be 0. However Kconfig glue doesn't
			
 
				+	 * enable this option for SMP configs
			
 
				+	 */
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
@@ -206,7 +207,7 @@ static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
 
				 
			
 
				 static irqreturn_t timer_irq_handler(int irq, void *dev_id)
			
 
				 {
			
 
				-	struct clock_event_device *clk = &__get_cpu_var(arc_clockevent_device);
			
 
				+	struct clock_event_device *clk = this_cpu_ptr(&arc_clockevent_device);
			
 
				 
			
 
				 	arc_timer_event_ack(clk->mode == CLOCK_EVT_MODE_PERIODIC);
			
 
				 	clk->event_handler(clk);
			
@@ -223,7 +224,7 @@ static struct irqaction arc_timer_irq = {
 
				  * Setup the local event timer for @cpu
			
 
				  * N.B. weak so that some exotic ARC SoCs can completely override it
			
 
				  */
			
 
				-void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu)
			
 
				+void __weak arc_local_timer_setup(unsigned int cpu)
			
 
				 {
			
 
				 	struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
			
 
				 
			
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -84,19 +84,18 @@ DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", do_memory_error, BUS_ADRERR)
 
				 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
			
 
				 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
			
 
				 
			
 
				-#ifdef CONFIG_ARC_MISALIGN_ACCESS
			
 
				 /*
			
 
				  * Entry Point for Misaligned Data access Exception, for emulating in software
			
 
				  */
			
 
				 int do_misaligned_access(unsigned long address, struct pt_regs *regs,
			
 
				 			 struct callee_regs *cregs)
			
 
				 {
			
 
				+	/* If emulation not enabled, or failed, kill the task */
			
 
				 	if (misaligned_fixup(address, regs, cregs) != 0)
			
 
				 		return do_misaligned_error(address, regs);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 /*
			
 
				  * Entry point for miscll errors such as Nested Exceptions
			
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -182,7 +182,7 @@ void arc_cache_init(void)
 
				 
			
 
				 #ifdef CONFIG_ARC_HAS_ICACHE
			
 
				 	/* 1. Confirm some of I-cache params which Linux assumes */
			
 
				-	if (ic->line_len != ARC_ICACHE_LINE_LEN)
			
 
				+	if (ic->line_len != L1_CACHE_BYTES)
			
 
				 		panic("Cache H/W doesn't match kernel Config");
			
 
				 
			
 
				 	if (ic->ver != CONFIG_ARC_MMU_VER)
			
@@ -205,7 +205,7 @@ chk_dc:
 
				 		return;
			
 
				 
			
 
				 #ifdef CONFIG_ARC_HAS_DCACHE
			
 
				-	if (dc->line_len != ARC_DCACHE_LINE_LEN)
			
 
				+	if (dc->line_len != L1_CACHE_BYTES)
			
 
				 		panic("Cache H/W doesn't match kernel Config");
			
 
				 
			
 
				 	/* check for D-Cache aliasing */
			
@@ -240,6 +240,67 @@ chk_dc:
 
				 #define OP_INV		0x1
			
 
				 #define OP_FLUSH	0x2
			
 
				 #define OP_FLUSH_N_INV	0x3
			
 
				+#define OP_INV_IC	0x4
			
 
				+
			
 
				+/*
			
 
				+ * Common Helper for Line Operations on {I,D}-Cache
			
 
				+ */
			
 
				+static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
			
 
				+				     unsigned long sz, const int cacheop)
			
 
				+{
			
 
				+	unsigned int aux_cmd, aux_tag;
			
 
				+	int num_lines;
			
 
				+	const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
			
 
				+
			
 
				+	if (cacheop == OP_INV_IC) {
			
 
				+		aux_cmd = ARC_REG_IC_IVIL;
			
 
				+		aux_tag = ARC_REG_IC_PTAG;
			
 
				+	}
			
 
				+	else {
			
 
				+		/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
			
 
				+		aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
			
 
				+		aux_tag = ARC_REG_DC_PTAG;
			
 
				+	}
			
 
				+
			
 
				+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
			
 
				+	 * and have @paddr - aligned to cache line and integral @num_lines.
			
 
				+	 * This however can be avoided for page sized since:
			
 
				+	 *  -@paddr will be cache-line aligned already (being page aligned)
			
 
				+	 *  -@sz will be integral multiple of line size (being page sized).
			
 
				+	 */
			
 
				+	if (!full_page_op) {
			
 
				+		sz += paddr & ~CACHE_LINE_MASK;
			
 
				+		paddr &= CACHE_LINE_MASK;
			
 
				+		vaddr &= CACHE_LINE_MASK;
			
 
				+	}
			
 
				+
			
 
				+	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
			
 
				+
			
 
				+#if (CONFIG_ARC_MMU_VER <= 2)
			
 
				+	/* MMUv2 and before: paddr contains stuffed vaddrs bits */
			
 
				+	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
			
 
				+#else
			
 
				+	/* if V-P const for loop, PTAG can be written once outside loop */
			
 
				+	if (full_page_op)
			
 
				+		write_aux_reg(ARC_REG_DC_PTAG, paddr);
			
 
				+#endif
			
 
				+
			
 
				+	while (num_lines-- > 0) {
			
 
				+#if (CONFIG_ARC_MMU_VER > 2)
			
 
				+		/* MMUv3, cache ops require paddr seperately */
			
 
				+		if (!full_page_op) {
			
 
				+			write_aux_reg(aux_tag, paddr);
			
 
				+			paddr += L1_CACHE_BYTES;
			
 
				+		}
			
 
				+
			
 
				+		write_aux_reg(aux_cmd, vaddr);
			
 
				+		vaddr += L1_CACHE_BYTES;
			
 
				+#else
			
 
				+		write_aux_reg(aux, paddr);
			
 
				+		paddr += L1_CACHE_BYTES;
			
 
				+#endif
			
 
				+	}
			
 
				+}
			
 
				 
			
 
				 #ifdef CONFIG_ARC_HAS_DCACHE
			
 
				 
			
@@ -289,53 +350,6 @@ static inline void __dc_entire_op(const int cacheop)
 
				 		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Per Line Operation on D-Cache
			
 
				- * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
			
 
				- * It's sole purpose is to help gcc generate ZOL
			
 
				- * (aliasing VIPT dcache flushing needs both vaddr and paddr)
			
 
				- */
			
 
				-static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
			
 
				-				  unsigned long sz, const int aux_reg)
			
 
				-{
			
 
				-	int num_lines;
			
 
				-
			
 
				-	/* Ensure we properly floor/ceil the non-line aligned/sized requests
			
 
				-	 * and have @paddr - aligned to cache line and integral @num_lines.
			
 
				-	 * This however can be avoided for page sized since:
			
 
				-	 *  -@paddr will be cache-line aligned already (being page aligned)
			
 
				-	 *  -@sz will be integral multiple of line size (being page sized).
			
 
				-	 */
			
 
				-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
			
 
				-		sz += paddr & ~DCACHE_LINE_MASK;
			
 
				-		paddr &= DCACHE_LINE_MASK;
			
 
				-		vaddr &= DCACHE_LINE_MASK;
			
 
				-	}
			
 
				-
			
 
				-	num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
			
 
				-
			
 
				-#if (CONFIG_ARC_MMU_VER <= 2)
			
 
				-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
			
 
				-#endif
			
 
				-
			
 
				-	while (num_lines-- > 0) {
			
 
				-#if (CONFIG_ARC_MMU_VER > 2)
			
 
				-		/*
			
 
				-		 * Just as for I$, in MMU v3, D$ ops also require
			
 
				-		 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
			
 
				-		 */
			
 
				-		write_aux_reg(ARC_REG_DC_PTAG, paddr);
			
 
				-
			
 
				-		write_aux_reg(aux_reg, vaddr);
			
 
				-		vaddr += ARC_DCACHE_LINE_LEN;
			
 
				-#else
			
 
				-		/* paddr contains stuffed vaddrs bits */
			
 
				-		write_aux_reg(aux_reg, paddr);
			
 
				-#endif
			
 
				-		paddr += ARC_DCACHE_LINE_LEN;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /* For kernel mappings cache operation: index is same as paddr */
			
 
				 #define __dc_line_op_k(p, sz, op)	__dc_line_op(p, p, sz, op)
			
 
				 
			
@@ -346,7 +360,6 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 
				 				unsigned long sz, const int cacheop)
			
 
				 {
			
 
				 	unsigned long flags, tmp = tmp;
			
 
				-	int aux;
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 
			
@@ -361,12 +374,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
 
				 		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
			
 
				 	}
			
 
				 
			
 
				-	if (cacheop & OP_INV)	/* Inv / flush-n-inv use same cmd reg */
			
 
				-		aux = ARC_REG_DC_IVDL;
			
 
				-	else
			
 
				-		aux = ARC_REG_DC_FLDL;
			
 
				-
			
 
				-	__dc_line_loop(paddr, vaddr, sz, aux);
			
 
				+	__cache_line_loop(paddr, vaddr, sz, cacheop);
			
 
				 
			
 
				 	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
			
 
				 		wait_for_flush();
			
@@ -438,42 +446,9 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 
				 				unsigned long sz)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	int num_lines;
			
 
				-
			
 
				-	/*
			
 
				-	 * Ensure we properly floor/ceil the non-line aligned/sized requests:
			
 
				-	 * However page sized flushes can be compile time optimised.
			
 
				-	 *  -@paddr will be cache-line aligned already (being page aligned)
			
 
				-	 *  -@sz will be integral multiple of line size (being page sized).
			
 
				-	 */
			
 
				-	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
			
 
				-		sz += paddr & ~ICACHE_LINE_MASK;
			
 
				-		paddr &= ICACHE_LINE_MASK;
			
 
				-		vaddr &= ICACHE_LINE_MASK;
			
 
				-	}
			
 
				-
			
 
				-	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
			
 
				-
			
 
				-#if (CONFIG_ARC_MMU_VER <= 2)
			
 
				-	/* bits 17:13 of vaddr go as bits 4:0 of paddr */
			
 
				-	paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
			
 
				-#endif
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				-	while (num_lines-- > 0) {
			
 
				-#if (CONFIG_ARC_MMU_VER > 2)
			
 
				-		/* tag comes from phy addr */
			
 
				-		write_aux_reg(ARC_REG_IC_PTAG, paddr);
			
 
				-
			
 
				-		/* index bits come from vaddr */
			
 
				-		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
			
 
				-		vaddr += ARC_ICACHE_LINE_LEN;
			
 
				-#else
			
 
				-		/* paddr contains stuffed vaddrs bits */
			
 
				-		write_aux_reg(ARC_REG_IC_IVIL, paddr);
			
 
				-#endif
			
 
				-		paddr += ARC_ICACHE_LINE_LEN;
			
 
				-	}
			
 
				+	__cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -52,7 +52,7 @@ bad_area:
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-void do_page_fault(struct pt_regs *regs, unsigned long address)
			
 
				+void do_page_fault(unsigned long address, struct pt_regs *regs)
			
 
				 {
			
 
				 	struct vm_area_struct *vma = NULL;
			
 
				 	struct task_struct *tsk = current;
			
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -100,7 +100,7 @@
 
				 
			
 
				 
			
 
				 /* A copy of the ASID from the PID reg is kept in asid_cache */
			
 
				-unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
			
 
				+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
			
 
				 
			
 
				 /*
			
 
				  * Utility Routine to erase a J-TLB entry
			
@@ -274,6 +274,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 
				 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
			
 
				 			   unsigned long end)
			
 
				 {
			
 
				+	const unsigned int cpu = smp_processor_id();
			
 
				 	unsigned long flags;
			
 
				 
			
 
				 	/* If range @start to @end is more than 32 TLB entries deep,
			
@@ -297,9 +298,9 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 
			
 
				-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
			
 
				+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
			
 
				 		while (start < end) {
			
 
				-			tlb_entry_erase(start | hw_pid(vma->vm_mm));
			
 
				+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
			
 
				 			start += PAGE_SIZE;
			
 
				 		}
			
 
				 	}
			
@@ -346,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
				 
			
 
				 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
			
 
				 {
			
 
				+	const unsigned int cpu = smp_processor_id();
			
 
				 	unsigned long flags;
			
 
				 
			
 
				 	/* Note that it is critical that interrupts are DISABLED between
			
@@ -353,14 +355,87 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 
				 	 */
			
 
				 	local_irq_save(flags);
			
 
				 
			
 
				-	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
			
 
				-		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
			
 
				+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
			
 
				+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
			
 
				 		utlb_invalidate();
			
 
				 	}
			
 
				 
			
 
				 	local_irq_restore(flags);
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_SMP
			
 
				+
			
 
				+struct tlb_args {
			
 
				+	struct vm_area_struct *ta_vma;
			
 
				+	unsigned long ta_start;
			
 
				+	unsigned long ta_end;
			
 
				+};
			
 
				+
			
 
				+static inline void ipi_flush_tlb_page(void *arg)
			
 
				+{
			
 
				+	struct tlb_args *ta = arg;
			
 
				+
			
 
				+	local_flush_tlb_page(ta->ta_vma, ta->ta_start);
			
 
				+}
			
 
				+
			
 
				+static inline void ipi_flush_tlb_range(void *arg)
			
 
				+{
			
 
				+	struct tlb_args *ta = arg;
			
 
				+
			
 
				+	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
			
 
				+}
			
 
				+
			
 
				+static inline void ipi_flush_tlb_kernel_range(void *arg)
			
 
				+{
			
 
				+	struct tlb_args *ta = (struct tlb_args *)arg;
			
 
				+
			
 
				+	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
			
 
				+}
			
 
				+
			
 
				+void flush_tlb_all(void)
			
 
				+{
			
 
				+	on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1);
			
 
				+}
			
 
				+
			
 
				+void flush_tlb_mm(struct mm_struct *mm)
			
 
				+{
			
 
				+	on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm,
			
 
				+			 mm, 1);
			
 
				+}
			
 
				+
			
 
				+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
			
 
				+{
			
 
				+	struct tlb_args ta = {
			
 
				+		.ta_vma = vma,
			
 
				+		.ta_start = uaddr
			
 
				+	};
			
 
				+
			
 
				+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
			
 
				+}
			
 
				+
			
 
				+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
			
 
				+		     unsigned long end)
			
 
				+{
			
 
				+	struct tlb_args ta = {
			
 
				+		.ta_vma = vma,
			
 
				+		.ta_start = start,
			
 
				+		.ta_end = end
			
 
				+	};
			
 
				+
			
 
				+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
			
 
				+}
			
 
				+
			
 
				+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
			
 
				+{
			
 
				+	struct tlb_args ta = {
			
 
				+		.ta_start = start,
			
 
				+		.ta_end = end
			
 
				+	};
			
 
				+
			
 
				+	on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /*
			
 
				  * Routine to create a TLB entry
			
 
				  */
			
@@ -400,7 +475,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 
			
 
				-	tlb_paranoid_check(vma->vm_mm->context.asid, address);
			
 
				+	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address);
			
 
				 
			
 
				 	address &= PAGE_MASK;
			
 
				 
			
@@ -610,9 +685,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
				 			  struct pt_regs *regs)
			
 
				 {
			
 
				 	int set, way, n;
			
 
				-	unsigned int pd0[4], pd1[4];	/* assume max 4 ways */
			
 
				 	unsigned long flags, is_valid;
			
 
				 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
			
 
				+	unsigned int pd0[mmu->ways], pd1[mmu->ways];
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 
			
@@ -637,7 +712,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
				 			continue;
			
 
				 
			
 
				 		/* Scan the set for duplicate ways: needs a nested loop */
			
 
				-		for (way = 0; way < mmu->ways; way++) {
			
 
				+		for (way = 0; way < mmu->ways - 1; way++) {
			
 
				 			if (!pd0[way])
			
 
				 				continue;
			
 
				 
			
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -369,8 +369,8 @@ do_slow_path_pf:
 
				 	EXCEPTION_PROLOGUE
			
 
				 
			
 
				 	; ------- setup args for Linux Page fault Hanlder ---------
			
 
				-	mov_s r0, sp
			
 
				-	lr  r1, [efa]
			
 
				+	mov_s r1, sp
			
 
				+	lr    r0, [efa]
			
 
				 
			
 
				 	; We don't want exceptions to be disabled while the fault is handled.
			
 
				 	; Now that we have saved the context we return from exception hence