فهرست منبع

Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze

* 'next' of git://git.monstr.eu/linux-2.6-microblaze:
  microblaze: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW usage
  microblaze: Use delay slot in __strnlen_user, __strncpy_user
  microblaze: Remove NET_IP_ALIGN from system.h
  microblaze: Add __ucmpdi2() helper function
  microblaze: Raise SIGFPE/FPE_INTDIV for div by zero
  microblaze: Switch ELF_ARCH code to 189
  microblaze: Added DMA sync operations
  microblaze: Moved __dma_sync() to dma-mapping.h
  microblaze: Add PVR for Microblaze v8.20.a
  microblaze: Fix access_ok macro
  microblaze: Add loop unrolling for PAGE in copy_tofrom_user
  microblaze: Simplify logic for unaligned byte copying
  microblaze: Change label names - copy_tofrom_user
  microblaze: Separate fixup section definition
  microblaze: Change label name in copy_tofrom_user
  microblaze: Clear top bit from cnt32_to_63
Linus Torvalds 13 سال پیش
والد
کامیت
b1c907f3b2

+ 18 - 2
arch/microblaze/include/asm/dma-mapping.h

@@ -28,12 +28,12 @@
 #include <linux/dma-attrs.h>
 #include <asm/io.h>
 #include <asm-generic/dma-coherent.h>
+#include <asm/cacheflush.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
 #define __dma_alloc_coherent(dev, gfp, size, handle)	NULL
 #define __dma_free_coherent(size, addr)		((void)0)
-#define __dma_sync(addr, size, rw)		((void)0)
 
 static inline unsigned long device_to_mask(struct device *dev)
 {
@@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 
 #include <asm-generic/dma-mapping-common.h>
 
+static inline void __dma_sync(unsigned long paddr,
+			      size_t size, enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_TO_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		flush_dcache_range(paddr, paddr + size);
+		break;
+	case DMA_FROM_DEVICE:
+		invalidate_dcache_range(paddr, paddr + size);
+		break;
+	default:
+		BUG();
+	}
+}
+
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
@@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
-	__dma_sync(vaddr, size, (int)direction);
+	__dma_sync(virt_to_phys(vaddr), size, (int)direction);
 }
 
 #endif	/* _ASM_MICROBLAZE_DMA_MAPPING_H */

+ 5 - 3
arch/microblaze/include/asm/elf.h

@@ -16,13 +16,15 @@
  * I've snaffled the value from the microblaze binutils source code
  * /binutils/microblaze/include/elf/microblaze.h
  */
-#define EM_XILINX_MICROBLAZE	0xbaab
-#define ELF_ARCH		EM_XILINX_MICROBLAZE
+#define EM_MICROBLAZE		189
+#define EM_MICROBLAZE_OLD	0xbaab
+#define ELF_ARCH		EM_MICROBLAZE
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(x)	((x)->e_machine == EM_XILINX_MICROBLAZE)
+#define elf_check_arch(x)	((x)->e_machine == EM_MICROBLAZE \
+				 || (x)->e_machine == EM_MICROBLAZE_OLD)
 
 /*
  * These are used to set parameters in the core dumps.

+ 0 - 9
arch/microblaze/include/asm/system.h

@@ -17,8 +17,6 @@
 #include <asm-generic/cmpxchg.h>
 #include <asm-generic/cmpxchg-local.h>
 
-#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
-
 struct task_struct;
 struct thread_info;
 
@@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root;
 
 #define arch_align_stack(x) (x)
 
-/*
- * MicroBlaze doesn't handle unaligned accesses in hardware.
- *
- * Based on this we force the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN	2
-
 #endif /* _ASM_MICROBLAZE_SYSTEM_H */

+ 1 - 1
arch/microblaze/include/asm/uaccess.h

@@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size)
  *  - "addr", "addr + size" and "size" are all below the limit
  */
 #define access_ok(type, addr, size) \
-	(get_fs().seg > (((unsigned long)(addr)) | \
+	(get_fs().seg >= (((unsigned long)(addr)) | \
 		(size) | ((unsigned long)(addr) + (size))))
 
 /* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",

+ 1 - 0
arch/microblaze/kernel/cpu/cpuinfo.c

@@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
 	{"8.00.a", 0x12},
 	{"8.00.b", 0x13},
 	{"8.10.a", 0x14},
+	{"8.20.a", 0x15},
 	{NULL, 0},
 };
 

+ 63 - 19
arch/microblaze/kernel/dma.c

@@ -11,7 +11,6 @@
 #include <linux/gfp.h>
 #include <linux/dma-debug.h>
 #include <asm/bug.h>
-#include <asm/cacheflush.h>
 
 /*
  * Generic direct DMA implementation
@@ -21,21 +20,6 @@
  * can set archdata.dma_data to an unsigned long holding the offset. By
  * default the offset is PCI_DRAM_OFFSET.
  */
-static inline void __dma_sync_page(unsigned long paddr, unsigned long offset,
-				size_t size, enum dma_data_direction direction)
-{
-	switch (direction) {
-	case DMA_TO_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		flush_dcache_range(paddr + offset, paddr + offset + size);
-		break;
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range(paddr + offset, paddr + offset + size);
-		break;
-	default:
-		BUG();
-	}
-}
 
 static unsigned long get_dma_direct_offset(struct device *dev)
 {
@@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 	/* FIXME this part of code is untested */
 	for_each_sg(sgl, sg, nents, i) {
 		sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
-		__dma_sync_page(page_to_phys(sg_page(sg)), sg->offset,
+		__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
 							sg->length, direction);
 	}
 
@@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 					     enum dma_data_direction direction,
 					     struct dma_attrs *attrs)
 {
-	__dma_sync_page(page_to_phys(page), offset, size, direction);
+	__dma_sync(page_to_phys(page) + offset, size, direction);
 	return page_to_phys(page) + offset + get_dma_direct_offset(dev);
 }
 
@@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev,
  * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
  * dma_address is physical address
  */
-	__dma_sync_page(dma_address, 0 , size, direction);
+	__dma_sync(dma_address, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_cpu(struct device *dev,
+			       dma_addr_t dma_handle, size_t size,
+			       enum dma_data_direction direction)
+{
+	/*
+	 * It's pointless to flush the cache as the memory segment
+	 * is given to the CPU
+	 */
+
+	if (direction == DMA_FROM_DEVICE)
+		__dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_device(struct device *dev,
+				  dma_addr_t dma_handle, size_t size,
+				  enum dma_data_direction direction)
+{
+	/*
+	 * It's pointless to invalidate the cache if the device isn't
+	 * supposed to write to the relevant region
+	 */
+
+	if (direction == DMA_TO_DEVICE)
+		__dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_cpu(struct device *dev,
+			   struct scatterlist *sgl, int nents,
+			   enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	/* FIXME this part of code is untested */
+	if (direction == DMA_FROM_DEVICE)
+		for_each_sg(sgl, sg, nents, i)
+			__dma_sync(sg->dma_address, sg->length, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_device(struct device *dev,
+			      struct scatterlist *sgl, int nents,
+			      enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	/* FIXME this part of code is untested */
+	if (direction == DMA_TO_DEVICE)
+		for_each_sg(sgl, sg, nents, i)
+			__dma_sync(sg->dma_address, sg->length, direction);
 }
 
 struct dma_map_ops dma_direct_ops = {
@@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = {
 	.dma_supported	= dma_direct_dma_supported,
 	.map_page	= dma_direct_map_page,
 	.unmap_page	= dma_direct_unmap_page,
+	.sync_single_for_cpu		= dma_direct_sync_single_for_cpu,
+	.sync_single_for_device		= dma_direct_sync_single_for_device,
+	.sync_sg_for_cpu		= dma_direct_sync_sg_for_cpu,
+	.sync_sg_for_device		= dma_direct_sync_sg_for_device,
 };
 EXPORT_SYMBOL(dma_direct_ops);
 

+ 1 - 1
arch/microblaze/kernel/exceptions.c

@@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 	case MICROBLAZE_DIV_ZERO_EXCEPTION:
 		if (user_mode(regs)) {
 			pr_debug("Divide by zero exception in user mode\n");
-			_exception(SIGILL, regs, FPE_INTDIV, addr);
+			_exception(SIGFPE, regs, FPE_INTDIV, addr);
 			return;
 		}
 		printk(KERN_WARNING "Divide by zero exception " \

+ 1 - 0
arch/microblaze/kernel/process.c

@@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	ti->cpu_context.msr = (childregs->msr|MSR_VM);
 	ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
+	ti->cpu_context.msr &= ~MSR_IE;
 #endif
 	ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
 

+ 1 - 1
arch/microblaze/kernel/ptrace.c

@@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(current->audit_context))
-		audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+		audit_syscall_entry(EM_MICROBLAZE, regs->r12,
 				    regs->r5, regs->r6,
 				    regs->r7, regs->r8);
 

+ 2 - 1
arch/microblaze/kernel/timer.c

@@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void)
 {
 	if (timer_initialized) {
 		struct clocksource *cs = &clocksource_microblaze;
-		cycle_t cyc = cnt32_to_63(cs->read(NULL));
+
+		cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX;
 		return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
 	}
 	return 0;

+ 1 - 0
arch/microblaze/lib/Makefile

@@ -25,5 +25,6 @@ lib-y += lshrdi3.o
 lib-y += modsi3.o
 lib-y += muldi3.o
 lib-y += mulsi3.o
+lib-y += ucmpdi2.o
 lib-y += udivsi3.o
 lib-y += umodsi3.o

+ 106 - 17
arch/microblaze/lib/uaccess_old.S

@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
 
 /*
  * int __strncpy_user(char *to, char *from, int len);
@@ -33,8 +34,8 @@ __strncpy_user:
 	 * r3 - temp count
 	 * r4 - temp val
 	 */
+	beqid	r7,3f
 	addik	r3,r7,0		/* temp_count = len */
-	beqi	r3,3f
 1:
 	lbu	r4,r6,r0
 	sb	r4,r5,r0
@@ -76,8 +77,8 @@ __strncpy_user:
 .type  __strnlen_user, @function
 .align 4;
 __strnlen_user:
+	beqid	r6,3f
 	addik	r3,r6,0
-	beqi	r3,3f
 1:
 	lbu	r4,r5,r0
 	beqid	r4,2f		/* break on NUL */
@@ -102,6 +103,49 @@ __strnlen_user:
 	.section	__ex_table,"a"
 	.word	1b,4b
 
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset)	\
+1:	lwi	r4 , r6, 0x0000 + offset;	\
+2:	lwi	r19, r6, 0x0004 + offset;	\
+3:	lwi	r20, r6, 0x0008 + offset;	\
+4:	lwi	r21, r6, 0x000C + offset;	\
+5:	lwi	r22, r6, 0x0010 + offset;	\
+6:	lwi	r23, r6, 0x0014 + offset;	\
+7:	lwi	r24, r6, 0x0018 + offset;	\
+8:	lwi	r25, r6, 0x001C + offset;	\
+9:	swi	r4 , r5, 0x0000 + offset;	\
+10:	swi	r19, r5, 0x0004 + offset;	\
+11:	swi	r20, r5, 0x0008 + offset;	\
+12:	swi	r21, r5, 0x000C + offset;	\
+13:	swi	r22, r5, 0x0010 + offset;	\
+14:	swi	r23, r5, 0x0014 + offset;	\
+15:	swi	r24, r5, 0x0018 + offset;	\
+16:	swi	r25, r5, 0x001C + offset;	\
+	.section __ex_table,"a";		\
+	.word	1b, 0f;				\
+	.word	2b, 0f;				\
+	.word	3b, 0f;				\
+	.word	4b, 0f;				\
+	.word	5b, 0f;				\
+	.word	6b, 0f;				\
+	.word	7b, 0f;				\
+	.word	8b, 0f;				\
+	.word	9b, 0f;				\
+	.word	10b, 0f;			\
+	.word	11b, 0f;			\
+	.word	12b, 0f;			\
+	.word	13b, 0f;			\
+	.word	14b, 0f;			\
+	.word	15b, 0f;			\
+	.word	16b, 0f;			\
+	.text
+
+#define COPY_80(offset)	\
+	COPY(0x00 + offset);\
+	COPY(0x20 + offset);\
+	COPY(0x40 + offset);\
+	COPY(0x60 + offset);
+
 /*
  * int __copy_tofrom_user(char *to, char *from, int len)
  * Return:
@@ -119,34 +163,79 @@ __copy_tofrom_user:
 	 * r7, r3 - count
 	 * r4 - tempval
 	 */
-	beqid	r7, 3f /* zero size is not likely */
-	andi	r3, r7, 0x3 /* filter add count */
-	bneid	r3, 4f /* if is odd value then byte copying */
+	beqid	r7, 0f /* zero size is not likely */
 	or	r3, r5, r6 /* find if is any to/from unaligned */
-	andi	r3, r3, 0x3 /* mask unaligned */
-	bneid	r3, 1f /* it is unaligned -> then jump */
+	or	r3, r3, r7 /* find if count is unaligned */
+	andi	r3, r3, 0x3 /* mask last 3 bits */
+	bneid	r3, bu1 /* if r3 is not zero then byte copying */
+	or	r3, r0, r0
+
+	rsubi	r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+	beqid	r3, page;
 	or	r3, r0, r0
 
-/* at least one 4 byte copy */
-5:	lw	r4, r6, r3
-6:	sw	r4, r5, r3
+w1:	lw	r4, r6, r3 /* at least one 4 byte copy */
+w2:	sw	r4, r5, r3
 	addik	r7, r7, -4
-	bneid	r7, 5b
+	bneid	r7, w1
 	addik	r3, r3, 4
 	addik	r3, r7, 0
 	rtsd	r15, 8
 	nop
-4:	or	r3, r0, r0
-1:	lbu	r4,r6,r3
-2:	sb	r4,r5,r3
+
+	.section	__ex_table,"a"
+	.word	w1, 0f;
+	.word	w2, 0f;
+	.text
+
+.align 4 /* Alignment is important to keep icache happy */
+page:	/* Create room on stack and save registers for storign values */
+	addik   r1, r1, -32
+	swi	r19, r1, 4
+	swi	r20, r1, 8
+	swi	r21, r1, 12
+	swi	r22, r1, 16
+	swi	r23, r1, 20
+	swi	r24, r1, 24
+	swi	r25, r1, 28
+loop:	/* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+	/* Loop unrolling to get performance boost */
+	COPY_80(0x000);
+	COPY_80(0x080);
+	COPY_80(0x100);
+	COPY_80(0x180);
+	/* copy loop */
+	addik	r6, r6, 0x200
+	addik	r7, r7, -0x200
+	bneid	r7, loop
+	addik	r5, r5, 0x200
+	/* Restore register content */
+	lwi	r19, r1, 4
+	lwi	r20, r1, 8
+	lwi	r21, r1, 12
+	lwi	r22, r1, 16
+	lwi	r23, r1, 20
+	lwi	r24, r1, 24
+	lwi	r25, r1, 28
+	addik   r1, r1, 32
+	/* return back */
+	addik	r3, r7, 0
+	rtsd	r15, 8
+	nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1:	lbu	r4,r6,r3
+bu2:	sb	r4,r5,r3
 	addik	r7,r7,-1
-	bneid	r7,1b
+	bneid	r7,bu1
 	addik	r3,r3,1		/* delay slot */
-3:
+0:
 	addik	r3,r7,0
 	rtsd	r15,8
 	nop
 	.size   __copy_tofrom_user, . - __copy_tofrom_user
 
 	.section	__ex_table,"a"
-	.word	1b,3b,2b,3b,5b,3b,6b,3b
+	.word	bu1, 0b;
+	.word	bu2, 0b;
+	.text

+ 20 - 0
arch/microblaze/lib/ucmpdi2.c

@@ -0,0 +1,20 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	const DWunion au = {.ll = a};
+	const DWunion bu = {.ll = b};
+
+	if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+		return 0;
+	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+		return 2;
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+	return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);