Browse Source

microblaze: Add loop unrolling for PAGE in copy_tofrom_user

Increase performance by loop unrolling.

Signed-off-by: Michal Simek <monstr@monstr.eu>
Michal Simek 14 years ago
parent
commit
ebe211254b
1 changed files with 84 additions and 0 deletions
  1. 84 0
      arch/microblaze/lib/uaccess_old.S

+ 84 - 0
arch/microblaze/lib/uaccess_old.S

@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
 
 /*
  * int __strncpy_user(char *to, char *from, int len);
@@ -102,6 +103,49 @@ __strnlen_user:
 	.section	__ex_table,"a"
 	.word	1b,4b
 
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset)	\
+1:	lwi	r4 , r6, 0x0000 + offset;	\
+2:	lwi	r19, r6, 0x0004 + offset;	\
+3:	lwi	r20, r6, 0x0008 + offset;	\
+4:	lwi	r21, r6, 0x000C + offset;	\
+5:	lwi	r22, r6, 0x0010 + offset;	\
+6:	lwi	r23, r6, 0x0014 + offset;	\
+7:	lwi	r24, r6, 0x0018 + offset;	\
+8:	lwi	r25, r6, 0x001C + offset;	\
+9:	swi	r4 , r5, 0x0000 + offset;	\
+10:	swi	r19, r5, 0x0004 + offset;	\
+11:	swi	r20, r5, 0x0008 + offset;	\
+12:	swi	r21, r5, 0x000C + offset;	\
+13:	swi	r22, r5, 0x0010 + offset;	\
+14:	swi	r23, r5, 0x0014 + offset;	\
+15:	swi	r24, r5, 0x0018 + offset;	\
+16:	swi	r25, r5, 0x001C + offset;	\
+	.section __ex_table,"a";		\
+	.word	1b, 0f;				\
+	.word	2b, 0f;				\
+	.word	3b, 0f;				\
+	.word	4b, 0f;				\
+	.word	5b, 0f;				\
+	.word	6b, 0f;				\
+	.word	7b, 0f;				\
+	.word	8b, 0f;				\
+	.word	9b, 0f;				\
+	.word	10b, 0f;			\
+	.word	11b, 0f;			\
+	.word	12b, 0f;			\
+	.word	13b, 0f;			\
+	.word	14b, 0f;			\
+	.word	15b, 0f;			\
+	.word	16b, 0f;			\
+	.text
+
+#define COPY_80(offset)	\
+	COPY(0x00 + offset);\
+	COPY(0x20 + offset);\
+	COPY(0x40 + offset);\
+	COPY(0x60 + offset);
+
 /*
  * int __copy_tofrom_user(char *to, char *from, int len)
  * Return:
@@ -126,6 +170,10 @@ __copy_tofrom_user:
 	bneid	r3, bu1 /* if r3 is not zero then byte copying */
 	or	r3, r0, r0
 
+	rsubi	r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+	beqid	r3, page;
+	or	r3, r0, r0
+
 w1:	lw	r4, r6, r3 /* at least one 4 byte copy */
 w2:	sw	r4, r5, r3
 	addik	r7, r7, -4
@@ -140,6 +188,42 @@ w2:	sw	r4, r5, r3
 	.word	w2, 0f;
 	.text
 
+.align 4 /* Alignment is important to keep icache happy */
+page:	/* Create room on stack and save registers for storign values */
+	addik   r1, r1, -32
+	swi	r19, r1, 4
+	swi	r20, r1, 8
+	swi	r21, r1, 12
+	swi	r22, r1, 16
+	swi	r23, r1, 20
+	swi	r24, r1, 24
+	swi	r25, r1, 28
+loop:	/* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+	/* Loop unrolling to get performance boost */
+	COPY_80(0x000);
+	COPY_80(0x080);
+	COPY_80(0x100);
+	COPY_80(0x180);
+	/* copy loop */
+	addik	r6, r6, 0x200
+	addik	r7, r7, -0x200
+	bneid	r7, loop
+	addik	r5, r5, 0x200
+	/* Restore register content */
+	lwi	r19, r1, 4
+	lwi	r20, r1, 8
+	lwi	r21, r1, 12
+	lwi	r22, r1, 16
+	lwi	r23, r1, 20
+	lwi	r24, r1, 24
+	lwi	r25, r1, 28
+	addik   r1, r1, 32
+	/* return back */
+	addik	r3, r7, 0
+	rtsd	r15, 8
+	nop
+
+.align 4 /* Alignment is important to keep icache happy */
 bu1:	lbu	r4,r6,r3
 bu2:	sb	r4,r5,r3
 	addik	r7,r7,-1