Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 "Here is the crypto update for 3.9:

   - Added accelerated implementation of crc32 using pclmulqdq.

   - Added test vector for fcrypt.

   - Added support for OMAP4/AM33XX cipher and hash.

   - Fixed loose crypto_user input checks.

   - Misc fixes"

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (43 commits)
  crypto: user - ensure user supplied strings are nul-terminated
  crypto: user - fix empty string test in report API
  crypto: user - fix info leaks in report API
  crypto: caam - Added property fsl,sec-era in SEC4.0 device tree binding.
  crypto: use ERR_CAST
  crypto: atmel-aes - adjust duplicate test
  crypto: crc32-pclmul - Kill warning on x86-32
  crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels
  crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC
  crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets
  crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*
  crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions
  crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC
  crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions
  crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: aesni-intel - add ENDPROC statements for assembler functions
  crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets
  crypto: testmgr - add test vector for fcrypt
  ...
Linus Torvalds 12 years ago
parent
commit
32dc43e40a
51 changed files with 2075 additions and 698 deletions
  1. 1 1
      Documentation/devicetree/bindings/crypto/fsl-sec4.txt
  2. 2 0
      arch/x86/crypto/Makefile
  3. 5 10
      arch/x86/crypto/aes-i586-asm_32.S
  4. 15 15
      arch/x86/crypto/aes-x86_64-asm_64.S
  5. 22 1
      arch/x86/crypto/aesni-intel_asm.S
  6. 14 25
      arch/x86/crypto/blowfish-x86_64-asm_64.S
  7. 14 24
      arch/x86/crypto/camellia-aesni-avx-asm_64.S
  8. 22 28
      arch/x86/crypto/camellia-x86_64-asm_64.S
  9. 18 30
      arch/x86/crypto/cast5-avx-x86_64-asm_64.S
  10. 11 24
      arch/x86/crypto/cast6-avx-x86_64-asm_64.S
  11. 246 0
      arch/x86/crypto/crc32-pclmul_asm.S
  12. 201 0
      arch/x86/crypto/crc32-pclmul_glue.c
  13. 6 2
      arch/x86/crypto/crc32c-pcl-intel-asm_64.S
  14. 4 0
      arch/x86/crypto/ghash-clmulni-intel_asm.S
  15. 14 14
      arch/x86/crypto/salsa20-i586-asm_32.S
  16. 13 15
      arch/x86/crypto/salsa20-x86_64-asm_64.S
  17. 0 5
      arch/x86/crypto/salsa20_glue.c
  18. 11 24
      arch/x86/crypto/serpent-avx-x86_64-asm_64.S
  19. 8 12
      arch/x86/crypto/serpent-sse2-i586-asm_32.S
  20. 8 12
      arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
  21. 5 5
      arch/x86/crypto/sha1_ssse3_asm.S
  22. 11 24
      arch/x86/crypto/twofish-avx-x86_64-asm_64.S
  23. 5 6
      arch/x86/crypto/twofish-i586-asm_32.S
  24. 8 12
      arch/x86/crypto/twofish-x86_64-asm_64-3way.S
  25. 5 6
      arch/x86/crypto/twofish-x86_64-asm_64.S
  26. 21 0
      crypto/Kconfig
  27. 1 0
      crypto/Makefile
  28. 6 6
      crypto/ablkcipher.c
  29. 6 9
      crypto/aead.c
  30. 1 1
      crypto/ahash.c
  31. 1 3
      crypto/algapi.c
  32. 1 2
      crypto/authenc.c
  33. 1 2
      crypto/authencesn.c
  34. 5 7
      crypto/blkcipher.c
  35. 7 16
      crypto/ccm.c
  36. 1 2
      crypto/chainiv.c
  37. 158 0
      crypto/crc32.c
  38. 26 12
      crypto/crypto_user.c
  39. 2 4
      crypto/ctr.c
  40. 1 2
      crypto/cts.c
  41. 9 20
      crypto/gcm.c
  42. 1 2
      crypto/pcompress.c
  43. 1 1
      crypto/rng.c
  44. 1 2
      crypto/seqiv.c
  45. 2 1
      crypto/shash.c
  46. 15 0
      crypto/testmgr.c
  47. 1 1
      drivers/crypto/atmel-aes.c
  48. 3 3
      drivers/crypto/bfin_crc.c
  49. 484 174
      drivers/crypto/omap-aes.c
  50. 650 130
      drivers/crypto/omap-sham.c
  51. 1 3
      drivers/crypto/s5p-sss.c

+ 1 - 1
Documentation/devicetree/bindings/crypto/fsl-sec4.txt

@@ -113,7 +113,7 @@ PROPERTIES
 EXAMPLE
 	crypto@300000 {
 		compatible = "fsl,sec-v4.0";
-		fsl,sec-era = <0x2>;
+		fsl,sec-era = <2>;
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x300000 0x10000>;

+ 2 - 0
arch/x86/crypto/Makefile

@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
 obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
+obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
+crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o

+ 5 - 10
arch/x86/crypto/aes-i586-asm_32.S

@@ -36,6 +36,7 @@
 .file "aes-i586-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
@@ -219,14 +220,10 @@
 // AES (Rijndael) Encryption Subroutine
 /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 
-.global  aes_enc_blk
-
 .extern  crypto_ft_tab
 .extern  crypto_fl_tab
 
-.align 4
-
-aes_enc_blk:
+ENTRY(aes_enc_blk)
 	push    %ebp
 	mov     ctx(%esp),%ebp
 
@@ -290,18 +287,15 @@ aes_enc_blk:
 	mov     %r0,(%ebp)
 	pop     %ebp
 	ret
+ENDPROC(aes_enc_blk)
 
 // AES (Rijndael) Decryption Subroutine
 /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 
-.global  aes_dec_blk
-
 .extern  crypto_it_tab
 .extern  crypto_il_tab
 
-.align 4
-
-aes_dec_blk:
+ENTRY(aes_dec_blk)
 	push    %ebp
 	mov     ctx(%esp),%ebp
 
@@ -365,3 +359,4 @@ aes_dec_blk:
 	mov     %r0,(%ebp)
 	pop     %ebp
 	ret
+ENDPROC(aes_dec_blk)

+ 15 - 15
arch/x86/crypto/aes-x86_64-asm_64.S

@@ -15,6 +15,7 @@
 
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define R1	%rax
@@ -49,10 +50,8 @@
 #define R11	%r11
 
 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
-	.global	FUNC;			\
-	.type	FUNC,@function;		\
-	.align	8;			\
-FUNC:	movq	r1,r2;			\
+	ENTRY(FUNC);			\
+	movq	r1,r2;			\
 	movq	r3,r4;			\
 	leaq	KEY+48(r8),r9;		\
 	movq	r10,r11;		\
@@ -71,14 +70,15 @@ FUNC:	movq	r1,r2;			\
 	je	B192;			\
 	leaq	32(r9),r9;
 
-#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
 	movq	r1,r2;			\
 	movq	r3,r4;			\
 	movl	r5 ## E,(r9);		\
 	movl	r6 ## E,4(r9);		\
 	movl	r7 ## E,8(r9);		\
 	movl	r8 ## E,12(r9);		\
-	ret;
+	ret;				\
+	ENDPROC(FUNC);
 
 #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
 	movzbl	r2 ## H,r5 ## E;	\
@@ -133,7 +133,7 @@ FUNC:	movq	r1,r2;			\
 #define entry(FUNC,KEY,B128,B192) \
 	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
 
 #define encrypt_round(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC:	movq	r1,r2;			\
 
 /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-	entry(aes_enc_blk,0,enc128,enc192)
+	entry(aes_enc_blk,0,.Le128,.Le192)
 	encrypt_round(crypto_ft_tab,-96)
 	encrypt_round(crypto_ft_tab,-80)
-enc192:	encrypt_round(crypto_ft_tab,-64)
+.Le192:	encrypt_round(crypto_ft_tab,-64)
 	encrypt_round(crypto_ft_tab,-48)
-enc128:	encrypt_round(crypto_ft_tab,-32)
+.Le128:	encrypt_round(crypto_ft_tab,-32)
 	encrypt_round(crypto_ft_tab,-16)
 	encrypt_round(crypto_ft_tab,  0)
 	encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128:	encrypt_round(crypto_ft_tab,-32)
 	encrypt_round(crypto_ft_tab, 80)
 	encrypt_round(crypto_ft_tab, 96)
 	encrypt_final(crypto_fl_tab,112)
-	return
+	return(aes_enc_blk)
 
 /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-	entry(aes_dec_blk,240,dec128,dec192)
+	entry(aes_dec_blk,240,.Ld128,.Ld192)
 	decrypt_round(crypto_it_tab,-96)
 	decrypt_round(crypto_it_tab,-80)
-dec192:	decrypt_round(crypto_it_tab,-64)
+.Ld192:	decrypt_round(crypto_it_tab,-64)
 	decrypt_round(crypto_it_tab,-48)
-dec128:	decrypt_round(crypto_it_tab,-32)
+.Ld128:	decrypt_round(crypto_it_tab,-32)
 	decrypt_round(crypto_it_tab,-16)
 	decrypt_round(crypto_it_tab,  0)
 	decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128:	decrypt_round(crypto_it_tab,-32)
 	decrypt_round(crypto_it_tab, 80)
 	decrypt_round(crypto_it_tab, 96)
 	decrypt_final(crypto_il_tab,112)
-	return
+	return(aes_dec_blk)

+ 22 - 1
arch/x86/crypto/aesni-intel_asm.S

@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 *****************************************************************************/
-
 ENTRY(aesni_gcm_dec)
 	push	%r12
 	push	%r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
 	pop	%r13
 	pop	%r12
 	ret
+ENDPROC(aesni_gcm_dec)
 
 
 /*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
 	pop	%r13
 	pop	%r12
 	ret
+ENDPROC(aesni_gcm_enc)
 
 #endif
 
 
+.align 4
 _key_expansion_128:
 _key_expansion_256a:
 	pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
 	ret
+ENDPROC(_key_expansion_128)
+ENDPROC(_key_expansion_256a)
 
 .align 4
 _key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
 	movaps %xmm1, 0x10(TKEYP)
 	add $0x20, TKEYP
 	ret
+ENDPROC(_key_expansion_192a)
 
 .align 4
 _key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
 	movaps %xmm0, (TKEYP)
 	add $0x10, TKEYP
 	ret
+ENDPROC(_key_expansion_192b)
 
 .align 4
 _key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
 	movaps %xmm2, (TKEYP)
 	add $0x10, TKEYP
 	ret
+ENDPROC(_key_expansion_256b)
 
 /*
  * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
 	popl KEYP
 #endif
 	ret
+ENDPROC(aesni_set_key)
 
 /*
  * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
 	popl KEYP
 #endif
 	ret
+ENDPROC(aesni_enc)
 
 /*
  * _aesni_enc1:		internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
 	movaps 0x70(TKEYP), KEY
 	AESENCLAST KEY STATE
 	ret
+ENDPROC(_aesni_enc1)
 
 /*
  * _aesni_enc4:	internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
 	AESENCLAST KEY STATE3
 	AESENCLAST KEY STATE4
 	ret
+ENDPROC(_aesni_enc4)
 
 /*
  * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
 	popl KEYP
 #endif
 	ret
+ENDPROC(aesni_dec)
 
 /*
  * _aesni_dec1:		internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
 	movaps 0x70(TKEYP), KEY
 	AESDECLAST KEY STATE
 	ret
+ENDPROC(_aesni_dec1)
 
 /*
  * _aesni_dec4:	internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
 	AESDECLAST KEY STATE3
 	AESDECLAST KEY STATE4
 	ret
+ENDPROC(_aesni_dec4)
 
 /*
  * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
 	popl LEN
 #endif
 	ret
+ENDPROC(aesni_ecb_enc)
 
 /*
  * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
 	popl LEN
 #endif
 	ret
+ENDPROC(aesni_ecb_dec)
 
 /*
  * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
 	popl IVP
 #endif
 	ret
+ENDPROC(aesni_cbc_enc)
 
 /*
  * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
 	popl IVP
 #endif
 	ret
+ENDPROC(aesni_cbc_dec)
 
 #ifdef __x86_64__
 .align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
 	MOVQ_R64_XMM TCTR_LOW INC
 	MOVQ_R64_XMM CTR TCTR_LOW
 	ret
+ENDPROC(_aesni_inc_init)
 
 /*
  * _aesni_inc:		internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
 	movaps CTR, IV
 	PSHUFB_XMM BSWAP_MASK IV
 	ret
+ENDPROC(_aesni_inc)
 
 /*
  * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
 	movups IV, (IVP)
 .Lctr_enc_just_ret:
 	ret
+ENDPROC(aesni_ctr_enc)
 #endif

+ 14 - 25
arch/x86/crypto/blowfish-x86_64-asm_64.S

@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "blowfish-x86_64-asm.S"
 .text
 
@@ -116,11 +118,7 @@
 	bswapq 			RX0; \
 	xorq RX0, 		(RIO);
 
-.align 8
-.global __blowfish_enc_blk
-.type   __blowfish_enc_blk,@function;
-
-__blowfish_enc_blk:
+ENTRY(__blowfish_enc_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
 
 	movq %r10, RIO;
 	test %cl, %cl;
-	jnz __enc_xor;
+	jnz .L__enc_xor;
 
 	write_block();
 	ret;
-__enc_xor:
+.L__enc_xor:
 	xor_block();
 	ret;
+ENDPROC(__blowfish_enc_blk)
 
-.align 8
-.global blowfish_dec_blk
-.type   blowfish_dec_blk,@function;
-
-blowfish_dec_blk:
+ENTRY(blowfish_dec_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
 	movq %r11, %rbp;
 
 	ret;
+ENDPROC(blowfish_dec_blk)
 
 /**********************************************************************
   4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
 	bswapq 			RX3; \
 	xorq RX3,		24(RIO);
 
-.align 8
-.global __blowfish_enc_blk_4way
-.type   __blowfish_enc_blk_4way,@function;
-
-__blowfish_enc_blk_4way:
+ENTRY(__blowfish_enc_blk_4way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
 	movq %r11, RIO;
 
 	test %bpl, %bpl;
-	jnz __enc_xor4;
+	jnz .L__enc_xor4;
 
 	write_block4();
 
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
 	popq %rbp;
 	ret;
 
-__enc_xor4:
+.L__enc_xor4:
 	xor_block4();
 
 	popq %rbx;
 	popq %rbp;
 	ret;
+ENDPROC(__blowfish_enc_blk_4way)
 
-.align 8
-.global blowfish_dec_blk_4way
-.type   blowfish_dec_blk_4way,@function;
-
-blowfish_dec_blk_4way:
+ENTRY(blowfish_dec_blk_4way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
 	popq %rbp;
 
 	ret;
-
+ENDPROC(blowfish_dec_blk_4way)

+ 14 - 24
arch/x86/crypto/camellia-aesni-avx-asm_64.S

@@ -15,6 +15,8 @@
  *	http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
  */
 
+#include <linux/linkage.h>
+
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 /* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
 		  %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
 		  %rcx, (%r9));
 	ret;
+ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
 .align 8
 roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
 		  %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
 		  %rax, (%r9));
 	ret;
+ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 /*
  * IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
 .text
 
 .align 8
-.type   __camellia_enc_blk16,@function;
-
 __camellia_enc_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
 		     %xmm15, %rax, %rcx, 24);
 
 	jmp .Lenc_done;
+ENDPROC(__camellia_enc_blk16)
 
 .align 8
-.type   __camellia_dec_blk16,@function;
-
 __camellia_dec_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
 	      ((key_table + (24) * 8) + 4)(CTX));
 
 	jmp .Ldec_max24;
+ENDPROC(__camellia_dec_blk16)
 
-.align 8
-.global camellia_ecb_enc_16way
-.type   camellia_ecb_enc_16way,@function;
-
-camellia_ecb_enc_16way:
+ENTRY(camellia_ecb_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
 		     %xmm8, %rsi);
 
 	ret;
+ENDPROC(camellia_ecb_enc_16way)
 
-.align 8
-.global camellia_ecb_dec_16way
-.type   camellia_ecb_dec_16way,@function;
-
-camellia_ecb_dec_16way:
+ENTRY(camellia_ecb_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
 		     %xmm8, %rsi);
 
 	ret;
+ENDPROC(camellia_ecb_dec_16way)
 
-.align 8
-.global camellia_cbc_dec_16way
-.type   camellia_cbc_dec_16way,@function;
-
-camellia_cbc_dec_16way:
+ENTRY(camellia_cbc_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
 		     %xmm8, %rsi);
 
 	ret;
+ENDPROC(camellia_cbc_dec_16way)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
-.global camellia_ctr_16way
-.type   camellia_ctr_16way,@function;
-
-camellia_ctr_16way:
+ENTRY(camellia_ctr_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
 		     %xmm8, %rsi);
 
 	ret;
+ENDPROC(camellia_ctr_16way)

+ 22 - 28
arch/x86/crypto/camellia-x86_64-asm_64.S

@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "camellia-x86_64-asm_64.S"
 .text
 
@@ -188,10 +190,7 @@
 	bswapq				RAB0; \
 	movq RAB0,			4*2(RIO);
 
-.global __camellia_enc_blk;
-.type   __camellia_enc_blk,@function;
-
-__camellia_enc_blk:
+ENTRY(__camellia_enc_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
 	movl $24, RT1d; /* max */
 
 	cmpb $16, key_length(CTX);
-	je __enc_done;
+	je .L__enc_done;
 
 	enc_fls(24);
 	enc_rounds(24);
 	movl $32, RT1d; /* max */
 
-__enc_done:
+.L__enc_done:
 	testb RXORbl, RXORbl;
 	movq RDST, RIO;
 
-	jnz __enc_xor;
+	jnz .L__enc_xor;
 
 	enc_outunpack(mov, RT1);
 
 	movq RRBP, %rbp;
 	ret;
 
-__enc_xor:
+.L__enc_xor:
 	enc_outunpack(xor, RT1);
 
 	movq RRBP, %rbp;
 	ret;
+ENDPROC(__camellia_enc_blk)
 
-.global camellia_dec_blk;
-.type   camellia_dec_blk,@function;
-
-camellia_dec_blk:
+ENTRY(camellia_dec_blk)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
 	dec_inpack(RT2);
 
 	cmpb $24, RT2bl;
-	je __dec_rounds16;
+	je .L__dec_rounds16;
 
 	dec_rounds(24);
 	dec_fls(24);
 
-__dec_rounds16:
+.L__dec_rounds16:
 	dec_rounds(16);
 	dec_fls(16);
 	dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:
 
 	movq RRBP, %rbp;
 	ret;
+ENDPROC(camellia_dec_blk)
 
 /**********************************************************************
   2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
 		bswapq				RAB1; \
 		movq RAB1,			12*2(RIO);
 
-.global __camellia_enc_blk_2way;
-.type   __camellia_enc_blk_2way,@function;
-
-__camellia_enc_blk_2way:
+ENTRY(__camellia_enc_blk_2way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
 	movl $24, RT2d; /* max */
 
 	cmpb $16, key_length(CTX);
-	je __enc2_done;
+	je .L__enc2_done;
 
 	enc_fls2(24);
 	enc_rounds2(24);
 	movl $32, RT2d; /* max */
 
-__enc2_done:
+.L__enc2_done:
 	test RXORbl, RXORbl;
 	movq RDST, RIO;
-	jnz __enc2_xor;
+	jnz .L__enc2_xor;
 
 	enc_outunpack2(mov, RT2);
 
@@ -470,17 +465,15 @@ __enc2_done:
 	popq %rbx;
 	ret;
 
-__enc2_xor:
+.L__enc2_xor:
 	enc_outunpack2(xor, RT2);
 
 	movq RRBP, %rbp;
 	popq %rbx;
 	ret;
+ENDPROC(__camellia_enc_blk_2way)
 
-.global camellia_dec_blk_2way;
-.type   camellia_dec_blk_2way,@function;
-
-camellia_dec_blk_2way:
+ENTRY(camellia_dec_blk_2way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
 	dec_inpack2(RT2);
 
 	cmpb $24, RT2bl;
-	je __dec2_rounds16;
+	je .L__dec2_rounds16;
 
 	dec_rounds2(24);
 	dec_fls2(24);
 
-__dec2_rounds16:
+.L__dec2_rounds16:
 	dec_rounds2(16);
 	dec_fls2(16);
 	dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
 	movq RRBP, %rbp;
 	movq RXOR, %rbx;
 	ret;
+ENDPROC(camellia_dec_blk_2way)

+ 18 - 30
arch/x86/crypto/cast5-avx-x86_64-asm_64.S

@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "cast5-avx-x86_64-asm_64.S"
 
 .extern cast_s1
@@ -211,8 +213,6 @@
 .text
 
 .align 16
-.type   __cast5_enc_blk16,@function;
-
 __cast5_enc_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
 
 	movzbl rr(CTX), %eax;
 	testl %eax, %eax;
-	jnz __skip_enc;
+	jnz .L__skip_enc;
 
 	round(RL, RR, 12, 1);
 	round(RR, RL, 13, 2);
 	round(RL, RR, 14, 3);
 	round(RR, RL, 15, 1);
 
-__skip_enc:
+.L__skip_enc:
 	popq %rbx;
 	popq %rbp;
 
@@ -282,10 +282,9 @@ __skip_enc:
 	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
 
 	ret;
+ENDPROC(__cast5_enc_blk16)
 
 .align 16
-.type   __cast5_dec_blk16,@function;
-
 __cast5_dec_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
 
 	movzbl rr(CTX), %eax;
 	testl %eax, %eax;
-	jnz __skip_dec;
+	jnz .L__skip_dec;
 
 	round(RL, RR, 15, 1);
 	round(RR, RL, 14, 3);
 	round(RL, RR, 13, 2);
 	round(RR, RL, 12, 1);
 
-__dec_tail:
+.L__dec_tail:
 	round(RL, RR, 11, 3);
 	round(RR, RL, 10, 2);
 	round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:
 
 	ret;
 
-__skip_dec:
+.L__skip_dec:
 	vpsrldq $4, RKR, RKR;
-	jmp __dec_tail;
+	jmp .L__dec_tail;
+ENDPROC(__cast5_dec_blk16)
 
-.align 16
-.global cast5_ecb_enc_16way
-.type   cast5_ecb_enc_16way,@function;
-
-cast5_ecb_enc_16way:
+ENTRY(cast5_ecb_enc_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
 	vmovdqu RL4, (7*4*4)(%r11);
 
 	ret;
+ENDPROC(cast5_ecb_enc_16way)
 
-.align 16
-.global cast5_ecb_dec_16way
-.type   cast5_ecb_dec_16way,@function;
-
-cast5_ecb_dec_16way:
+ENTRY(cast5_ecb_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
 	vmovdqu RL4, (7*4*4)(%r11);
 
 	ret;
+ENDPROC(cast5_ecb_dec_16way)
 
-.align 16
-.global cast5_cbc_dec_16way
-.type   cast5_cbc_dec_16way,@function;
-
-cast5_cbc_dec_16way:
+ENTRY(cast5_cbc_dec_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
 	popq %r12;
 
 	ret;
+ENDPROC(cast5_cbc_dec_16way)
 
-.align 16
-.global cast5_ctr_16way
-.type   cast5_ctr_16way,@function;
-
-cast5_ctr_16way:
+ENTRY(cast5_ctr_16way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
 	popq %r12;
 
 	ret;
+ENDPROC(cast5_ctr_16way)

+ 11 - 24
arch/x86/crypto/cast6-avx-x86_64-asm_64.S

@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "cast6-avx-x86_64-asm_64.S"
@@ -250,8 +251,6 @@
 .text
 
 .align 8
-.type   __cast6_enc_blk8,@function;
-
 __cast6_enc_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
+ENDPROC(__cast6_enc_blk8)
 
 .align 8
-.type   __cast6_dec_blk8,@function;
-
 __cast6_dec_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
+ENDPROC(__cast6_dec_blk8)
 
-.align 8
-.global cast6_ecb_enc_8way
-.type   cast6_ecb_enc_8way,@function;
-
-cast6_ecb_enc_8way:
+ENTRY(cast6_ecb_enc_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	ret;
+ENDPROC(cast6_ecb_enc_8way)
 
-.align 8
-.global cast6_ecb_dec_8way
-.type   cast6_ecb_dec_8way,@function;
-
-cast6_ecb_dec_8way:
+ENTRY(cast6_ecb_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	ret;
+ENDPROC(cast6_ecb_dec_8way)
 
-.align 8
-.global cast6_cbc_dec_8way
-.type   cast6_cbc_dec_8way,@function;
-
-cast6_cbc_dec_8way:
+ENTRY(cast6_cbc_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
 	popq %r12;
 
 	ret;
+ENDPROC(cast6_cbc_dec_8way)
 
-.align 8
-.global cast6_ctr_8way
-.type   cast6_ctr_8way,@function;
-
-cast6_ctr_8way:
+ENTRY(cast6_ctr_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
 	popq %r12;
 
 	ret;
+ENDPROC(cast6_ctr_8way)

+ 246 - 0
arch/x86/crypto/crc32-pclmul_asm.S

@@ -0,0 +1,246 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/inst.h>
+
+
+.align 16
+/*
+ * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+ * #define CONSTANT_R1  0x154442bd4LL
+ *
+ * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+ * #define CONSTANT_R2  0x1c6e41596LL
+ */
+.Lconstant_R2R1:
+	.octa 0x00000001c6e415960000000154442bd4
+/*
+ * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+ * #define CONSTANT_R3  0x1751997d0LL
+ *
+ * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+ * #define CONSTANT_R4  0x0ccaa009eLL
+ */
+.Lconstant_R4R3:
+	.octa 0x00000000ccaa009e00000001751997d0
+/*
+ * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+ * #define CONSTANT_R5  0x163cd6124LL
+ */
+.Lconstant_R5:
+	.octa 0x00000000000000000000000163cd6124
+.Lconstant_mask32:
+	.octa 0x000000000000000000000000FFFFFFFF
+/*
+ * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+ *
+ * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
+ * #define CONSTANT_RU  0x1F7011641LL
+ */
+.Lconstant_RUpoly:
+	.octa 0x00000001F701164100000001DB710641
+
+#define CONSTANT %xmm0
+
+#ifdef __x86_64__
+#define BUF     %rdi
+#define LEN     %rsi
+#define CRC     %edx
+#else
+#define BUF     %eax
+#define LEN     %edx
+#define CRC     %ecx
+#endif
+
+
+
+.text
+/**
+ *      Calculate crc32
+ *      BUF - buffer (16 bytes aligned)
+ *      LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
+ *      CRC - initial crc32
+ *      return %eax crc32
+ *      uint crc32_pclmul_le_16(unsigned char const *buffer,
+ *	                     size_t len, uint crc32)
+ */
+.globl crc32_pclmul_le_16
+.align 4, 0x90
+crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
+	movdqa  (BUF), %xmm1
+	movdqa  0x10(BUF), %xmm2
+	movdqa  0x20(BUF), %xmm3
+	movdqa  0x30(BUF), %xmm4
+	movd    CRC, CONSTANT
+	pxor    CONSTANT, %xmm1
+	sub     $0x40, LEN
+	add     $0x40, BUF
+#ifndef __x86_64__
+	/* This is for position independent code(-fPIC) support for 32bit */
+	call    delta
+delta:
+	pop     %ecx
+#endif
+	cmp     $0x40, LEN
+	jb      less_64
+
+#ifdef __x86_64__
+	movdqa .Lconstant_R2R1(%rip), CONSTANT
+#else
+	movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
+#endif
+
+loop_64:/*  64 bytes Full cache line folding */
+	prefetchnta    0x40(BUF)
+	movdqa  %xmm1, %xmm5
+	movdqa  %xmm2, %xmm6
+	movdqa  %xmm3, %xmm7
+#ifdef __x86_64__
+	movdqa  %xmm4, %xmm8
+#endif
+	PCLMULQDQ 00, CONSTANT, %xmm1
+	PCLMULQDQ 00, CONSTANT, %xmm2
+	PCLMULQDQ 00, CONSTANT, %xmm3
+#ifdef __x86_64__
+	PCLMULQDQ 00, CONSTANT, %xmm4
+#endif
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	PCLMULQDQ 0x11, CONSTANT, %xmm6
+	PCLMULQDQ 0x11, CONSTANT, %xmm7
+#ifdef __x86_64__
+	PCLMULQDQ 0x11, CONSTANT, %xmm8
+#endif
+	pxor    %xmm5, %xmm1
+	pxor    %xmm6, %xmm2
+	pxor    %xmm7, %xmm3
+#ifdef __x86_64__
+	pxor    %xmm8, %xmm4
+#else
+	/* xmm8 unsupported for x32 */
+	movdqa  %xmm4, %xmm5
+	PCLMULQDQ 00, CONSTANT, %xmm4
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	pxor    %xmm5, %xmm4
+#endif
+
+	pxor    (BUF), %xmm1
+	pxor    0x10(BUF), %xmm2
+	pxor    0x20(BUF), %xmm3
+	pxor    0x30(BUF), %xmm4
+
+	sub     $0x40, LEN
+	add     $0x40, BUF
+	cmp     $0x40, LEN
+	jge     loop_64
+less_64:/*  Folding cache line into 128bit */
+#ifdef __x86_64__
+	movdqa  .Lconstant_R4R3(%rip), CONSTANT
+#else
+	movdqa  .Lconstant_R4R3 - delta(%ecx), CONSTANT
+#endif
+	prefetchnta     (BUF)
+
+	movdqa  %xmm1, %xmm5
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	pxor    %xmm5, %xmm1
+	pxor    %xmm2, %xmm1
+
+	movdqa  %xmm1, %xmm5
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	pxor    %xmm5, %xmm1
+	pxor    %xmm3, %xmm1
+
+	movdqa  %xmm1, %xmm5
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	pxor    %xmm5, %xmm1
+	pxor    %xmm4, %xmm1
+
+	cmp     $0x10, LEN
+	jb      fold_64
+loop_16:/* Folding rest buffer into 128bit */
+	movdqa  %xmm1, %xmm5
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	PCLMULQDQ 0x11, CONSTANT, %xmm5
+	pxor    %xmm5, %xmm1
+	pxor    (BUF), %xmm1
+	sub     $0x10, LEN
+	add     $0x10, BUF
+	cmp     $0x10, LEN
+	jge     loop_16
+
+fold_64:
+	/* perform the last 64 bit fold, also adds 32 zeroes
+	 * to the input stream */
+	PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+	psrldq  $0x08, %xmm1
+	pxor    CONSTANT, %xmm1
+
+	/* final 32-bit fold */
+	movdqa  %xmm1, %xmm2
+#ifdef __x86_64__
+	movdqa  .Lconstant_R5(%rip), CONSTANT
+	movdqa  .Lconstant_mask32(%rip), %xmm3
+#else
+	movdqa  .Lconstant_R5 - delta(%ecx), CONSTANT
+	movdqa  .Lconstant_mask32 - delta(%ecx), %xmm3
+#endif
+	psrldq  $0x04, %xmm2
+	pand    %xmm3, %xmm1
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	pxor    %xmm2, %xmm1
+
+	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+#ifdef __x86_64__
+	movdqa  .Lconstant_RUpoly(%rip), CONSTANT
+#else
+	movdqa  .Lconstant_RUpoly - delta(%ecx), CONSTANT
+#endif
+	movdqa  %xmm1, %xmm2
+	pand    %xmm3, %xmm1
+	PCLMULQDQ 0x10, CONSTANT, %xmm1
+	pand    %xmm3, %xmm1
+	PCLMULQDQ 0x00, CONSTANT, %xmm1
+	pxor    %xmm2, %xmm1
+	pextrd  $0x01, %xmm1, %eax
+
+	ret

+ 201 - 0
arch/x86/crypto/crc32-pclmul_glue.c

@@ -0,0 +1,201 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+
+#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
+#include <asm/i387.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+#define PCLMUL_MIN_LEN		64L     /* minimum size of buffer
+					 * for crc32_pclmul_le_16 */
+#define SCALE_F			16L	/* size of xmm register */
+#define SCALE_F_MASK		(SCALE_F - 1)
+
+u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
+
+static u32 __attribute__((pure))
+	crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
+{
+	unsigned int iquotient;
+	unsigned int iremainder;
+	unsigned int prealign;
+
+	if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
+		return crc32_le(crc, p, len);
+
+	if ((long)p & SCALE_F_MASK) {
+		/* align p to 16 byte */
+		prealign = SCALE_F - ((long)p & SCALE_F_MASK);
+
+		crc = crc32_le(crc, p, prealign);
+		len -= prealign;
+		p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
+				     ~SCALE_F_MASK);
+	}
+	iquotient = len & (~SCALE_F_MASK);
+	iremainder = len & SCALE_F_MASK;
+
+	kernel_fpu_begin();
+	crc = crc32_pclmul_le_16(p, iquotient, crc);
+	kernel_fpu_end();
+
+	if (iremainder)
+		crc = crc32_le(crc, p + iquotient, iremainder);
+
+	return crc;
+}
+
+static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = 0;
+
+	return 0;
+}
+
+static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
+			unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32)) {
+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	*mctx = le32_to_cpup((__le32 *)key);
+	return 0;
+}
+
+static int crc32_pclmul_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = crc32_pclmul_le(*crcp, data, len);
+	return 0;
+}
+
+/* No final XOR 0xFFFFFFFF, like crc32_le */
+static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
+				u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
+	return 0;
+}
+
+static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(crcp);
+	return 0;
+}
+
+static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, u8 *out)
+{
+	return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
+				    out);
+}
+
+static struct shash_alg alg = {
+	.setkey		= crc32_pclmul_setkey,
+	.init		= crc32_pclmul_init,
+	.update		= crc32_pclmul_update,
+	.final		= crc32_pclmul_final,
+	.finup		= crc32_pclmul_finup,
+	.digest		= crc32_pclmul_digest,
+	.descsize	= sizeof(u32),
+	.digestsize	= CHKSUM_DIGEST_SIZE,
+	.base		= {
+			.cra_name		= "crc32",
+			.cra_driver_name	= "crc32-pclmul",
+			.cra_priority		= 200,
+			.cra_blocksize		= CHKSUM_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(u32),
+			.cra_module		= THIS_MODULE,
+			.cra_init		= crc32_pclmul_cra_init,
+	}
+};
+
+static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
+	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
+
+
+static int __init crc32_pclmul_mod_init(void)
+{
+
+	if (!x86_match_cpu(crc32pclmul_cpu_id)) {
+		pr_info("PCLMULQDQ-NI instructions are not detected.\n");
+		return -ENODEV;
+	}
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crc32_pclmul_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(crc32_pclmul_mod_init);
+module_exit(crc32_pclmul_mod_fini);
+
+MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("crc32");
+MODULE_ALIAS("crc32-pclmul");

+ 6 - 2
arch/x86/crypto/crc32c-pcl-intel-asm_64.S

@@ -42,6 +42,8 @@
  * SOFTWARE.
  */
 
+#include <linux/linkage.h>
+
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
 .macro LABEL prefix n
@@ -68,8 +70,7 @@
 
 # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
 
-.global crc_pcl
-crc_pcl:
+ENTRY(crc_pcl)
 #define    bufp		%rdi
 #define    bufp_dw	%edi
 #define    bufp_w	%di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
 .noaltmacro
 	i=i+1
 .endr
+
+ENDPROC(crc_pcl)
+
 	################################################################
 	## PCLMULQDQ tables
 	## Table is 128 entries x 2 quad words each

+ 4 - 0
arch/x86/crypto/ghash-clmulni-intel_asm.S

@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
 	pxor T2, T1
 	pxor T1, DATA
 	ret
+ENDPROC(__clmul_gf128mul_ble)
 
 /* void clmul_ghash_mul(char *dst, const be128 *shash) */
 ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
 	PSHUFB_XMM BSWAP DATA
 	movups DATA, (%rdi)
 	ret
+ENDPROC(clmul_ghash_mul)
 
 /*
  * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
 	movups DATA, (%rdi)
 .Lupdate_just_ret:
 	ret
+ENDPROC(clmul_ghash_update)
 
 /*
  * void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
 	pxor %xmm1, %xmm0
 	movups %xmm0, (%rdi)
 	ret
+ENDPROC(clmul_ghash_setkey)

+ 14 - 14
arch/x86/crypto/salsa20-i586-asm_32.S

@@ -2,11 +2,12 @@
 # D. J. Bernstein
 # Public domain.
 
-# enter ECRYPT_encrypt_bytes
+#include <linux/linkage.h>
+
 .text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
 	mov	%esp,%eax
 	and	$31,%eax
 	add	$256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
 	add	$64,%esi
 	# goto bytesatleast1
 	jmp	._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
 	mov	%esp,%eax
 	and	$31,%eax
 	add	$256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
 	# leave
 	add	%eax,%esp
 	ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
 	mov	%esp,%eax
 	and	$31,%eax
 	add	$256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
 	# leave
 	add	%eax,%esp
 	ret
+ENDPROC(salsa20_ivsetup)

+ 13 - 15
arch/x86/crypto/salsa20-x86_64-asm_64.S

@@ -1,8 +1,7 @@
-# enter ECRYPT_encrypt_bytes
-.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+#include <linux/linkage.h>
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
 	mov	%rsp,%r11
 	and	$31,%r11
 	add	$256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
 	# comment:fp stack unchanged by jump
 	# goto bytesatleast1
 	jmp	._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
 	mov	%rsp,%r11
 	and	$31,%r11
 	add	$256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
 	mov	%rdi,%rax
 	mov	%rsi,%rdx
 	ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
 	mov	%rsp,%r11
 	and	$31,%r11
 	add	$256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
 	mov	%rdi,%rax
 	mov	%rsi,%rdx
 	ret
+ENDPROC(salsa20_ivsetup)

+ 0 - 5
arch/x86/crypto/salsa20_glue.c

@@ -26,11 +26,6 @@
 #define SALSA20_MIN_KEY_SIZE  16U
 #define SALSA20_MAX_KEY_SIZE  32U
 
-// use the ECRYPT_* function names
-#define salsa20_keysetup        ECRYPT_keysetup
-#define salsa20_ivsetup         ECRYPT_ivsetup
-#define salsa20_encrypt_bytes   ECRYPT_encrypt_bytes
-
 struct salsa20_ctx
 {
 	u32 input[16];

+ 11 - 24
arch/x86/crypto/serpent-avx-x86_64-asm_64.S

@@ -24,6 +24,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "serpent-avx-x86_64-asm_64.S"
@@ -566,8 +567,6 @@
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
 .align 8
-.type   __serpent_enc_blk8_avx,@function;
-
 __serpent_enc_blk8_avx:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
+ENDPROC(__serpent_enc_blk8_avx)
 
 .align 8
-.type   __serpent_dec_blk8_avx,@function;
-
 __serpent_dec_blk8_avx:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
 	ret;
+ENDPROC(__serpent_dec_blk8_avx)
 
-.align 8
-.global serpent_ecb_enc_8way_avx
-.type   serpent_ecb_enc_8way_avx,@function;
-
-serpent_ecb_enc_8way_avx:
+ENTRY(serpent_ecb_enc_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
 	store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	ret;
+ENDPROC(serpent_ecb_enc_8way_avx)
 
-.align 8
-.global serpent_ecb_dec_8way_avx
-.type   serpent_ecb_dec_8way_avx,@function;
-
-serpent_ecb_dec_8way_avx:
+ENTRY(serpent_ecb_dec_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
 	store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
 	ret;
+ENDPROC(serpent_ecb_dec_8way_avx)
 
-.align 8
-.global serpent_cbc_dec_8way_avx
-.type   serpent_cbc_dec_8way_avx,@function;
-
-serpent_cbc_dec_8way_avx:
+ENTRY(serpent_cbc_dec_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
 	store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
 	ret;
+ENDPROC(serpent_cbc_dec_8way_avx)
 
-.align 8
-.global serpent_ctr_8way_avx
-.type   serpent_ctr_8way_avx,@function;
-
-serpent_ctr_8way_avx:
+ENTRY(serpent_ctr_8way_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
 	store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	ret;
+ENDPROC(serpent_ctr_8way_avx)

+ 8 - 12
arch/x86/crypto/serpent-sse2-i586-asm_32.S

@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "serpent-sse2-i586-asm_32.S"
 .text
 
@@ -510,11 +512,7 @@
 	pxor t0,		x3; \
 	movdqu x3,		(3*4*4)(out);
 
-.align 8
-.global __serpent_enc_blk_4way
-.type   __serpent_enc_blk_4way,@function;
-
-__serpent_enc_blk_4way:
+ENTRY(__serpent_enc_blk_4way)
 	/* input:
 	 *	arg_ctx(%esp): ctx, CTX
 	 *	arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
 	movl arg_dst(%esp), %eax;
 
 	cmpb $0, arg_xor(%esp);
-	jnz __enc_xor4;
+	jnz .L__enc_xor4;
 
 	write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
 
 	ret;
 
-__enc_xor4:
+.L__enc_xor4:
 	xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
 
 	ret;
+ENDPROC(__serpent_enc_blk_4way)
 
-.align 8
-.global serpent_dec_blk_4way
-.type   serpent_dec_blk_4way,@function;
-
-serpent_dec_blk_4way:
+ENTRY(serpent_dec_blk_4way)
 	/* input:
 	 *	arg_ctx(%esp): ctx, CTX
 	 *	arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
 	write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
 
 	ret;
+ENDPROC(serpent_dec_blk_4way)

+ 8 - 12
arch/x86/crypto/serpent-sse2-x86_64-asm_64.S

@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "serpent-sse2-x86_64-asm_64.S"
 .text
 
@@ -632,11 +634,7 @@
 	pxor t0,		x3; \
 	movdqu x3,		(3*4*4)(out);
 
-.align 8
-.global __serpent_enc_blk_8way
-.type   __serpent_enc_blk_8way,@function;
-
-__serpent_enc_blk_8way:
+ENTRY(__serpent_enc_blk_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
 	leaq (4*4*4)(%rsi), %rax;
 
 	testb %cl, %cl;
-	jnz __enc_xor8;
+	jnz .L__enc_xor8;
 
 	write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
 
-__enc_xor8:
+.L__enc_xor8:
 	xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
 	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
 	ret;
+ENDPROC(__serpent_enc_blk_8way)
 
-.align 8
-.global serpent_dec_blk_8way
-.type   serpent_dec_blk_8way,@function;
-
-serpent_dec_blk_8way:
+ENTRY(serpent_dec_blk_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
 	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
 	ret;
+ENDPROC(serpent_dec_blk_8way)

+ 5 - 5
arch/x86/crypto/sha1_ssse3_asm.S

@@ -28,6 +28,8 @@
  * (at your option) any later version.
  */
 
+#include <linux/linkage.h>
+
 #define CTX	%rdi	// arg1
 #define BUF	%rsi	// arg2
 #define CNT	%rdx	// arg3
@@ -69,10 +71,8 @@
  * param: function's name
  */
 .macro SHA1_VECTOR_ASM  name
-	.global	\name
-	.type	\name, @function
-	.align 32
-\name:
+	ENTRY(\name)
+
 	push	%rbx
 	push	%rbp
 	push	%r12
@@ -106,7 +106,7 @@
 	pop	%rbx
 	ret
 
-	.size	\name, .-\name
+	ENDPROC(\name)
 .endm
 
 /*

+ 11 - 24
arch/x86/crypto/twofish-avx-x86_64-asm_64.S

@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "twofish-avx-x86_64-asm_64.S"
@@ -243,8 +244,6 @@
 	vpxor		x3, wkey, x3;
 
 .align 8
-.type	__twofish_enc_blk8,@function;
-
 __twofish_enc_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
 	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
+ENDPROC(__twofish_enc_blk8)
 
 .align 8
-.type	__twofish_dec_blk8,@function;
-
 __twofish_dec_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
 	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
 	ret;
+ENDPROC(__twofish_dec_blk8)
 
-.align 8
-.global twofish_ecb_enc_8way
-.type   twofish_ecb_enc_8way,@function;
-
-twofish_ecb_enc_8way:
+ENTRY(twofish_ecb_enc_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
 	store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
 	ret;
+ENDPROC(twofish_ecb_enc_8way)
 
-.align 8
-.global twofish_ecb_dec_8way
-.type   twofish_ecb_dec_8way,@function;
-
-twofish_ecb_dec_8way:
+ENTRY(twofish_ecb_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
 	ret;
+ENDPROC(twofish_ecb_dec_8way)
 
-.align 8
-.global twofish_cbc_dec_8way
-.type   twofish_cbc_dec_8way,@function;
-
-twofish_cbc_dec_8way:
+ENTRY(twofish_cbc_dec_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
 	popq %r12;
 
 	ret;
+ENDPROC(twofish_cbc_dec_8way)
 
-.align 8
-.global twofish_ctr_8way
-.type   twofish_ctr_8way,@function;
-
-twofish_ctr_8way:
+ENTRY(twofish_ctr_8way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
 	popq %r12;
 
 	ret;
+ENDPROC(twofish_ctr_8way)

+ 5 - 6
arch/x86/crypto/twofish-i586-asm_32.S

@@ -20,6 +20,7 @@
 .file "twofish-i586-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 /* return address at 0 */
@@ -219,11 +220,7 @@
 	xor	%esi,		d ## D;\
 	ror	$1,		d ## D;
 
-.align 4
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
 	push	%ebp			/* save registers according to calling convention*/
 	push    %ebx
 	push    %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
 	pop	%ebp
 	mov	$1,	%eax
 	ret
+ENDPROC(twofish_enc_blk)
 
-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
 	push	%ebp			/* save registers according to calling convention*/
 	push    %ebx
 	push    %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
 	pop	%ebp
 	mov	$1,	%eax
 	ret
+ENDPROC(twofish_dec_blk)

+ 8 - 12
arch/x86/crypto/twofish-x86_64-asm_64-3way.S

@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "twofish-x86_64-asm-3way.S"
 .text
 
@@ -214,11 +216,7 @@
 	rorq $32,			RAB2; \
 	outunpack3(mov, RIO, 2, RAB, 2);
 
-.align 8
-.global __twofish_enc_blk_3way
-.type   __twofish_enc_blk_3way,@function;
-
-__twofish_enc_blk_3way:
+ENTRY(__twofish_enc_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
 	popq %rbp; /* bool xor */
 
 	testb %bpl, %bpl;
-	jnz __enc_xor3;
+	jnz .L__enc_xor3;
 
 	outunpack_enc3(mov);
 
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
 	popq %r15;
 	ret;
 
-__enc_xor3:
+.L__enc_xor3:
 	outunpack_enc3(xor);
 
 	popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
 	popq %r14;
 	popq %r15;
 	ret;
+ENDPROC(__twofish_enc_blk_3way)
 
-.global twofish_dec_blk_3way
-.type   twofish_dec_blk_3way,@function;
-
-twofish_dec_blk_3way:
+ENTRY(twofish_dec_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
 	popq %r14;
 	popq %r15;
 	ret;
-
+ENDPROC(twofish_dec_blk_3way)

+ 5 - 6
arch/x86/crypto/twofish-x86_64-asm_64.S

@@ -20,6 +20,7 @@
 .file "twofish-x86_64-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define a_offset	0
@@ -214,11 +215,7 @@
 	xor	%r8d,		d ## D;\
 	ror	$1,		d ## D;
 
-.align 8
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
 	pushq    R1
 
 	/* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
 	popq	R1
 	movq	$1,%rax
 	ret
+ENDPROC(twofish_enc_blk)
 
-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
 	pushq    R1
 
 	/* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
 	popq	R1
 	movq	$1,%rax
 	ret
+ENDPROC(twofish_dec_blk)

+ 21 - 0
crypto/Kconfig

@@ -353,6 +353,27 @@ config CRYPTO_CRC32C_SPARC64
 	  CRC32c CRC algorithm implemented using sparc64 crypto instructions,
 	  when available.
 
+config CRYPTO_CRC32
+	tristate "CRC32 CRC algorithm"
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC-32-IEEE 802.3 cyclic redundancy-check algorithm.
+	  Shash crypto api wrappers to crc32_le function.
+
+config CRYPTO_CRC32_PCLMUL
+	tristate "CRC32 PCLMULQDQ hardware acceleration"
+	depends on X86
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  From Intel Westmere and AMD Bulldozer processor with SSE4.2
+	  and PCLMULQDQ supported, the processor will support
+	  CRC32 PCLMULQDQ implementation using hardware accelerated PCLMULQDQ
+	  instruction. This option will create 'crc32-plcmul' module,
+	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
+	  and gain better performance as compared with the table implementation.
+
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL

+ 1 - 0
crypto/Makefile

@@ -81,6 +81,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_842) += 842.o

+ 6 - 6
crypto/ablkcipher.c

@@ -388,9 +388,9 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_blkcipher rblkcipher;
 
-	snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "ablkcipher");
-	snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-		 alg->cra_ablkcipher.geniv ?: "<default>");
+	strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type));
+	strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<default>",
+		sizeof(rblkcipher.geniv));
 
 	rblkcipher.blocksize = alg->cra_blocksize;
 	rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
@@ -469,9 +469,9 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_blkcipher rblkcipher;
 
-	snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "givcipher");
-	snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-		 alg->cra_ablkcipher.geniv ?: "<built-in>");
+	strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type));
+	strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<built-in>",
+		sizeof(rblkcipher.geniv));
 
 	rblkcipher.blocksize = alg->cra_blocksize;
 	rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;

+ 6 - 9
crypto/aead.c

@@ -117,9 +117,8 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
 	struct crypto_report_aead raead;
 	struct aead_alg *aead = &alg->cra_aead;
 
-	snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "aead");
-	snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-		 aead->geniv ?: "<built-in>");
+	strncpy(raead.type, "aead", sizeof(raead.type));
+	strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv));
 
 	raead.blocksize = alg->cra_blocksize;
 	raead.maxauthsize = aead->maxauthsize;
@@ -203,8 +202,8 @@ static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg)
 	struct crypto_report_aead raead;
 	struct aead_alg *aead = &alg->cra_aead;
 
-	snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "nivaead");
-	snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s", aead->geniv);
+	strncpy(raead.type, "nivaead", sizeof(raead.type));
+	strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv));
 
 	raead.blocksize = alg->cra_blocksize;
 	raead.maxauthsize = aead->maxauthsize;
@@ -282,18 +281,16 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) &
 	    algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(name);
 	if (IS_ERR(name))
-		return ERR_PTR(err);
+		return ERR_CAST(name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)

+ 1 - 1
crypto/ahash.c

@@ -404,7 +404,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_hash rhash;
 
-	snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "ahash");
+	strncpy(rhash.type, "ahash", sizeof(rhash.type));
 
 	rhash.blocksize = alg->cra_blocksize;
 	rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize;

+ 1 - 3
crypto/algapi.c

@@ -749,12 +749,10 @@ struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
 				    u32 type, u32 mask)
 {
 	const char *name;
-	int err;
 
 	name = crypto_attr_alg_name(rta);
-	err = PTR_ERR(name);
 	if (IS_ERR(name))
-		return ERR_PTR(err);
+		return ERR_CAST(name);
 
 	return crypto_find_alg(name, frontend, type, mask);
 }

+ 1 - 2
crypto/authenc.c

@@ -592,9 +592,8 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);

+ 1 - 2
crypto/authencesn.c

@@ -715,9 +715,8 @@ static struct crypto_instance *crypto_authenc_esn_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);

+ 5 - 7
crypto/blkcipher.c

@@ -499,9 +499,9 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_blkcipher rblkcipher;
 
-	snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "blkcipher");
-	snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-		 alg->cra_blkcipher.geniv ?: "<default>");
+	strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type));
+	strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "<default>",
+		sizeof(rblkcipher.geniv));
 
 	rblkcipher.blocksize = alg->cra_blocksize;
 	rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
@@ -588,18 +588,16 @@ struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl,
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) &
 	    algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(name);
 	if (IS_ERR(name))
-		return ERR_PTR(err);
+		return ERR_CAST(name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)

+ 7 - 16
crypto/ccm.c

@@ -484,18 +484,16 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	cipher = crypto_alg_mod_lookup(cipher_name,  CRYPTO_ALG_TYPE_CIPHER,
 				       CRYPTO_ALG_TYPE_MASK);
-	err = PTR_ERR(cipher);
 	if (IS_ERR(cipher))
-		return ERR_PTR(err);
+		return ERR_CAST(cipher);
 
 	err = -EINVAL;
 	if (cipher->cra_blocksize != 16)
@@ -573,15 +571,13 @@ out_put_cipher:
 
 static struct crypto_instance *crypto_ccm_alloc(struct rtattr **tb)
 {
-	int err;
 	const char *cipher_name;
 	char ctr_name[CRYPTO_MAX_ALG_NAME];
 	char full_name[CRYPTO_MAX_ALG_NAME];
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(cipher_name);
 	if (IS_ERR(cipher_name))
-		return ERR_PTR(err);
+		return ERR_CAST(cipher_name);
 
 	if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)",
 		     cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -612,20 +608,17 @@ static struct crypto_template crypto_ccm_tmpl = {
 
 static struct crypto_instance *crypto_ccm_base_alloc(struct rtattr **tb)
 {
-	int err;
 	const char *ctr_name;
 	const char *cipher_name;
 	char full_name[CRYPTO_MAX_ALG_NAME];
 
 	ctr_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(ctr_name);
 	if (IS_ERR(ctr_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ctr_name);
 
 	cipher_name = crypto_attr_alg_name(tb[2]);
-	err = PTR_ERR(cipher_name);
 	if (IS_ERR(cipher_name))
-		return ERR_PTR(err);
+		return ERR_CAST(cipher_name);
 
 	if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)",
 		     ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -760,17 +753,15 @@ static struct crypto_instance *crypto_rfc4309_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	ccm_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(ccm_name);
 	if (IS_ERR(ccm_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ccm_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)

+ 1 - 2
crypto/chainiv.c

@@ -291,9 +291,8 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	err = crypto_get_default_rng();
 	if (err)

+ 158 - 0
crypto/crc32.c

@@ -0,0 +1,158 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/*
+ * This is crypto api shash wrappers to crc32_le.
+ */
+
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+	return crc32_le(crc, p, len);
+}
+
+/** No default init with ~0 */
+static int crc32_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = 0;
+
+	return 0;
+}
+
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
+			unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32)) {
+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	*mctx = le32_to_cpup((__le32 *)key);
+	return 0;
+}
+
+static int crc32_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+			unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = __crc32_le(*crcp, data, len);
+	return 0;
+}
+
+/* No final XOR 0xFFFFFFFF, like crc32_le */
+static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
+			 u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
+	return 0;
+}
+
+static int crc32_finup(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, u8 *out)
+{
+	return __crc32_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(crcp);
+	return 0;
+}
+
+static int crc32_digest(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len,
+			     out);
+}
+static struct shash_alg alg = {
+	.setkey		= crc32_setkey,
+	.init		= crc32_init,
+	.update		= crc32_update,
+	.final		= crc32_final,
+	.finup		= crc32_finup,
+	.digest		= crc32_digest,
+	.descsize	= sizeof(u32),
+	.digestsize	= CHKSUM_DIGEST_SIZE,
+	.base		= {
+		.cra_name		= "crc32",
+		.cra_driver_name	= "crc32-table",
+		.cra_priority		= 100,
+		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(u32),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= crc32_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crc32_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(crc32_mod_init);
+module_exit(crc32_mod_fini);
+
+MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
+MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32");
+MODULE_LICENSE("GPL");

+ 26 - 12
crypto/crypto_user.c

@@ -30,6 +30,8 @@
 
 #include "internal.h"
 
+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
+
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
 /* The crypto netlink socket */
@@ -75,7 +77,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_cipher rcipher;
 
-	snprintf(rcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "cipher");
+	strncpy(rcipher.type, "cipher", sizeof(rcipher.type));
 
 	rcipher.blocksize = alg->cra_blocksize;
 	rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
@@ -94,8 +96,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_comp rcomp;
 
-	snprintf(rcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "compression");
-
+	strncpy(rcomp.type, "compression", sizeof(rcomp.type));
 	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
 		    sizeof(struct crypto_report_comp), &rcomp))
 		goto nla_put_failure;
@@ -108,12 +109,14 @@ nla_put_failure:
 static int crypto_report_one(struct crypto_alg *alg,
 			     struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
-	memcpy(&ualg->cru_name, &alg->cra_name, sizeof(ualg->cru_name));
-	memcpy(&ualg->cru_driver_name, &alg->cra_driver_name,
-	       sizeof(ualg->cru_driver_name));
-	memcpy(&ualg->cru_module_name, module_name(alg->cra_module),
-	       CRYPTO_MAX_ALG_NAME);
-
+	strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+	strncpy(ualg->cru_driver_name, alg->cra_driver_name,
+		sizeof(ualg->cru_driver_name));
+	strncpy(ualg->cru_module_name, module_name(alg->cra_module),
+		sizeof(ualg->cru_module_name));
+
+	ualg->cru_type = 0;
+	ualg->cru_mask = 0;
 	ualg->cru_flags = alg->cra_flags;
 	ualg->cru_refcnt = atomic_read(&alg->cra_refcnt);
 
@@ -122,8 +125,7 @@ static int crypto_report_one(struct crypto_alg *alg,
 	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
 		struct crypto_report_larval rl;
 
-		snprintf(rl.type, CRYPTO_MAX_ALG_NAME, "%s", "larval");
-
+		strncpy(rl.type, "larval", sizeof(rl.type));
 		if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
 			    sizeof(struct crypto_report_larval), &rl))
 			goto nla_put_failure;
@@ -196,7 +198,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 	struct crypto_dump_info info;
 	int err;
 
-	if (!p->cru_driver_name)
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	if (!p->cru_driver_name[0])
 		return -EINVAL;
 
 	alg = crypto_alg_match(p, 1);
@@ -260,6 +265,9 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
 	LIST_HEAD(list);
 
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
 	if (priority && !strlen(p->cru_driver_name))
 		return -EINVAL;
 
@@ -287,6 +295,9 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct crypto_alg *alg;
 	struct crypto_user_alg *p = nlmsg_data(nlh);
 
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
 	alg = crypto_alg_match(p, 1);
 	if (!alg)
 		return -ENOENT;
@@ -368,6 +379,9 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct crypto_user_alg *p = nlmsg_data(nlh);
 	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
 
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
 	if (strlen(p->cru_driver_name))
 		exact = 1;
 

+ 2 - 4
crypto/ctr.c

@@ -343,17 +343,15 @@ static struct crypto_instance *crypto_rfc3686_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(cipher_name);
 	if (IS_ERR(cipher_name))
-		return ERR_PTR(err);
+		return ERR_CAST(cipher_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)

+ 1 - 2
crypto/cts.c

@@ -282,9 +282,8 @@ static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb)
 
 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
-	err = PTR_ERR(alg);
 	if (IS_ERR(alg))
-		return ERR_PTR(err);
+		return ERR_CAST(alg);
 
 	inst = ERR_PTR(-EINVAL);
 	if (!is_power_of_2(alg->cra_blocksize))

+ 9 - 20
crypto/gcm.c

@@ -701,9 +701,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);
@@ -711,9 +710,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
 	ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
 				    CRYPTO_ALG_TYPE_HASH,
 				    CRYPTO_ALG_TYPE_AHASH_MASK);
-	err = PTR_ERR(ghash_alg);
 	if (IS_ERR(ghash_alg))
-		return ERR_PTR(err);
+		return ERR_CAST(ghash_alg);
 
 	err = -ENOMEM;
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -787,15 +785,13 @@ out_put_ghash:
 
 static struct crypto_instance *crypto_gcm_alloc(struct rtattr **tb)
 {
-	int err;
 	const char *cipher_name;
 	char ctr_name[CRYPTO_MAX_ALG_NAME];
 	char full_name[CRYPTO_MAX_ALG_NAME];
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(cipher_name);
 	if (IS_ERR(cipher_name))
-		return ERR_PTR(err);
+		return ERR_CAST(cipher_name);
 
 	if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >=
 	    CRYPTO_MAX_ALG_NAME)
@@ -826,20 +822,17 @@ static struct crypto_template crypto_gcm_tmpl = {
 
 static struct crypto_instance *crypto_gcm_base_alloc(struct rtattr **tb)
 {
-	int err;
 	const char *ctr_name;
 	const char *ghash_name;
 	char full_name[CRYPTO_MAX_ALG_NAME];
 
 	ctr_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(ctr_name);
 	if (IS_ERR(ctr_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ctr_name);
 
 	ghash_name = crypto_attr_alg_name(tb[2]);
-	err = PTR_ERR(ghash_name);
 	if (IS_ERR(ghash_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ghash_name);
 
 	if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)",
 		     ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME)
@@ -971,17 +964,15 @@ static struct crypto_instance *crypto_rfc4106_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	ccm_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(ccm_name);
 	if (IS_ERR(ccm_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ccm_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)
@@ -1222,17 +1213,15 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return ERR_PTR(-EINVAL);
 
 	ccm_name = crypto_attr_alg_name(tb[1]);
-	err = PTR_ERR(ccm_name);
 	if (IS_ERR(ccm_name))
-		return ERR_PTR(err);
+		return ERR_CAST(ccm_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)

+ 1 - 2
crypto/pcompress.c

@@ -53,8 +53,7 @@ static int crypto_pcomp_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_comp rpcomp;
 
-	snprintf(rpcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "pcomp");
-
+	strncpy(rpcomp.type, "pcomp", sizeof(rpcomp.type));
 	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
 		    sizeof(struct crypto_report_comp), &rpcomp))
 		goto nla_put_failure;

+ 1 - 1
crypto/rng.c

@@ -65,7 +65,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_rng rrng;
 
-	snprintf(rrng.type, CRYPTO_MAX_ALG_NAME, "%s", "rng");
+	strncpy(rrng.type, "rng", sizeof(rrng.type));
 
 	rrng.seedsize = alg->cra_rng.seedsize;
 

+ 1 - 2
crypto/seqiv.c

@@ -305,9 +305,8 @@ static struct crypto_instance *seqiv_alloc(struct rtattr **tb)
 	int err;
 
 	algt = crypto_get_attr_type(tb);
-	err = PTR_ERR(algt);
 	if (IS_ERR(algt))
-		return ERR_PTR(err);
+		return ERR_CAST(algt);
 
 	err = crypto_get_default_rng();
 	if (err)

+ 2 - 1
crypto/shash.c

@@ -530,7 +530,8 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
 	struct crypto_report_hash rhash;
 	struct shash_alg *salg = __crypto_shash_alg(alg);
 
-	snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "shash");
+	strncpy(rhash.type, "shash", sizeof(rhash.type));
+
 	rhash.blocksize = alg->cra_blocksize;
 	rhash.digestsize = salg->digestsize;
 

+ 15 - 0
crypto/testmgr.c

@@ -2268,6 +2268,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "ecb(fcrypt)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = fcrypt_pcbc_enc_tv_template,
+					.count = 1
+				},
+				.dec = {
+					.vecs = fcrypt_pcbc_dec_tv_template,
+					.count = 1
+				}
+			}
+		}
 	}, {
 		.alg = "ecb(khazad)",
 		.test = alg_test_skcipher,

+ 1 - 1
drivers/crypto/atmel-aes.c

@@ -332,7 +332,7 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
 		return -EINVAL;
 
 	dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg);
-	if (!dd->nb_in_sg)
+	if (!dd->nb_out_sg)
 		return -EINVAL;
 
 	dd->bufcnt = sg_copy_to_buffer(dd->in_sg, dd->nb_in_sg,

+ 3 - 3
drivers/crypto/bfin_crc.c

@@ -694,7 +694,7 @@ out_error_dma:
 		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
 	free_dma(crc->dma_ch);
 out_error_irq:
-	free_irq(crc->irq, crc->dev);
+	free_irq(crc->irq, crc);
 out_error_unmap:
 	iounmap((void *)crc->regs);
 out_error_free_mem:
@@ -720,10 +720,10 @@ static int bfin_crypto_crc_remove(struct platform_device *pdev)
 
 	crypto_unregister_ahash(&algs);
 	tasklet_kill(&crc->done_task);
-	iounmap((void *)crc->regs);
 	free_dma(crc->dma_ch);
 	if (crc->irq > 0)
-		free_irq(crc->irq, crc->dev);
+		free_irq(crc->irq, crc);
+	iounmap((void *)crc->regs);
 	kfree(crc);
 
 	return 0;

+ 484 - 174
drivers/crypto/omap-aes.c

@@ -5,6 +5,7 @@
  *
  * Copyright (c) 2010 Nokia Corporation
  * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ * Copyright (c) 2011 Texas Instruments Incorporated
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as published
@@ -19,28 +20,39 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
 #include <linux/io.h>
 #include <linux/crypto.h>
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/aes.h>
 
-#include <linux/omap-dma.h>
+#define DST_MAXBURST			4
+#define DMA_MIN				(DST_MAXBURST * sizeof(u32))
 
 /* OMAP TRM gives bitfields as start:end, where start is the higher bit
    number. For example 7:0 */
 #define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
 #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
 
-#define AES_REG_KEY(x)			(0x1C - ((x ^ 0x01) * 0x04))
-#define AES_REG_IV(x)			(0x20 + ((x) * 0x04))
+#define AES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
+						((x ^ 0x01) * 0x04))
+#define AES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
 
-#define AES_REG_CTRL			0x30
-#define AES_REG_CTRL_CTR_WIDTH		(1 << 7)
+#define AES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
+#define AES_REG_CTRL_CTR_WIDTH_MASK	(3 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_32		(0 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_64		(1 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_96		(2 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_128		(3 << 7)
 #define AES_REG_CTRL_CTR		(1 << 6)
 #define AES_REG_CTRL_CBC		(1 << 5)
 #define AES_REG_CTRL_KEY_SIZE		(3 << 3)
@@ -48,14 +60,11 @@
 #define AES_REG_CTRL_INPUT_READY	(1 << 1)
 #define AES_REG_CTRL_OUTPUT_READY	(1 << 0)
 
-#define AES_REG_DATA			0x34
-#define AES_REG_DATA_N(x)		(0x34 + ((x) * 0x04))
+#define AES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
 
-#define AES_REG_REV			0x44
-#define AES_REG_REV_MAJOR		0xF0
-#define AES_REG_REV_MINOR		0x0F
+#define AES_REG_REV(dd)			((dd)->pdata->rev_ofs)
 
-#define AES_REG_MASK			0x48
+#define AES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
 #define AES_REG_MASK_SIDLE		(1 << 6)
 #define AES_REG_MASK_START		(1 << 5)
 #define AES_REG_MASK_DMA_OUT_EN		(1 << 3)
@@ -63,8 +72,7 @@
 #define AES_REG_MASK_SOFTRESET		(1 << 1)
 #define AES_REG_AUTOIDLE		(1 << 0)
 
-#define AES_REG_SYSSTATUS		0x4C
-#define AES_REG_SYSSTATUS_RESETDONE	(1 << 0)
+#define AES_REG_LENGTH_N(x)		(0x54 + ((x) * 0x04))
 
 #define DEFAULT_TIMEOUT		(5*HZ)
 
@@ -72,6 +80,7 @@
 #define FLAGS_ENCRYPT		BIT(0)
 #define FLAGS_CBC		BIT(1)
 #define FLAGS_GIV		BIT(2)
+#define FLAGS_CTR		BIT(3)
 
 #define FLAGS_INIT		BIT(4)
 #define FLAGS_FAST		BIT(5)
@@ -92,11 +101,39 @@ struct omap_aes_reqctx {
 #define OMAP_AES_QUEUE_LENGTH	1
 #define OMAP_AES_CACHE_SIZE	0
 
+struct omap_aes_algs_info {
+	struct crypto_alg	*algs_list;
+	unsigned int		size;
+	unsigned int		registered;
+};
+
+struct omap_aes_pdata {
+	struct omap_aes_algs_info	*algs_info;
+	unsigned int	algs_info_size;
+
+	void		(*trigger)(struct omap_aes_dev *dd, int length);
+
+	u32		key_ofs;
+	u32		iv_ofs;
+	u32		ctrl_ofs;
+	u32		data_ofs;
+	u32		rev_ofs;
+	u32		mask_ofs;
+
+	u32		dma_enable_in;
+	u32		dma_enable_out;
+	u32		dma_start;
+
+	u32		major_mask;
+	u32		major_shift;
+	u32		minor_mask;
+	u32		minor_shift;
+};
+
 struct omap_aes_dev {
 	struct list_head	list;
 	unsigned long		phys_base;
 	void __iomem		*io_base;
-	struct clk		*iclk;
 	struct omap_aes_ctx	*ctx;
 	struct device		*dev;
 	unsigned long		flags;
@@ -111,20 +148,24 @@ struct omap_aes_dev {
 	struct ablkcipher_request	*req;
 	size_t				total;
 	struct scatterlist		*in_sg;
+	struct scatterlist		in_sgl;
 	size_t				in_offset;
 	struct scatterlist		*out_sg;
+	struct scatterlist		out_sgl;
 	size_t				out_offset;
 
 	size_t			buflen;
 	void			*buf_in;
 	size_t			dma_size;
 	int			dma_in;
-	int			dma_lch_in;
+	struct dma_chan		*dma_lch_in;
 	dma_addr_t		dma_addr_in;
 	void			*buf_out;
 	int			dma_out;
-	int			dma_lch_out;
+	struct dma_chan		*dma_lch_out;
 	dma_addr_t		dma_addr_out;
+
+	const struct omap_aes_pdata	*pdata;
 };
 
 /* keep registered devices data here */
@@ -160,19 +201,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
 		omap_aes_write(dd, offset, *value);
 }
 
-static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit)
-{
-	unsigned long timeout = jiffies + DEFAULT_TIMEOUT;
-
-	while (!(omap_aes_read(dd, offset) & bit)) {
-		if (time_is_before_jiffies(timeout)) {
-			dev_err(dd->dev, "omap-aes timeout\n");
-			return -ETIMEDOUT;
-		}
-	}
-	return 0;
-}
-
 static int omap_aes_hw_init(struct omap_aes_dev *dd)
 {
 	/*
@@ -180,23 +208,9 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
 	 * It may be long delays between requests.
 	 * Device might go to off mode to save power.
 	 */
-	clk_enable(dd->iclk);
+	pm_runtime_get_sync(dd->dev);
 
 	if (!(dd->flags & FLAGS_INIT)) {
-		/* is it necessary to reset before every operation? */
-		omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET,
-					AES_REG_MASK_SOFTRESET);
-		/*
-		 * prevent OCP bus error (SRESP) in case an access to the module
-		 * is performed while the module is coming out of soft reset
-		 */
-		__asm__ __volatile__("nop");
-		__asm__ __volatile__("nop");
-
-		if (omap_aes_wait(dd, AES_REG_SYSSTATUS,
-				AES_REG_SYSSTATUS_RESETDONE))
-			return -ETIMEDOUT;
-
 		dd->flags |= FLAGS_INIT;
 		dd->err = 0;
 	}
@@ -208,59 +222,75 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 {
 	unsigned int key32;
 	int i, err;
-	u32 val, mask;
+	u32 val, mask = 0;
 
 	err = omap_aes_hw_init(dd);
 	if (err)
 		return err;
 
-	val = 0;
-	if (dd->dma_lch_out >= 0)
-		val |= AES_REG_MASK_DMA_OUT_EN;
-	if (dd->dma_lch_in >= 0)
-		val |= AES_REG_MASK_DMA_IN_EN;
-
-	mask = AES_REG_MASK_DMA_IN_EN | AES_REG_MASK_DMA_OUT_EN;
-
-	omap_aes_write_mask(dd, AES_REG_MASK, val, mask);
-
 	key32 = dd->ctx->keylen / sizeof(u32);
 
 	/* it seems a key should always be set even if it has not changed */
 	for (i = 0; i < key32; i++) {
-		omap_aes_write(dd, AES_REG_KEY(i),
+		omap_aes_write(dd, AES_REG_KEY(dd, i),
 			__le32_to_cpu(dd->ctx->key[i]));
 	}
 
-	if ((dd->flags & FLAGS_CBC) && dd->req->info)
-		omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4);
+	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
+		omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
 
 	val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
 	if (dd->flags & FLAGS_CBC)
 		val |= AES_REG_CTRL_CBC;
+	if (dd->flags & FLAGS_CTR) {
+		val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_32;
+		mask = AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_MASK;
+	}
 	if (dd->flags & FLAGS_ENCRYPT)
 		val |= AES_REG_CTRL_DIRECTION;
 
-	mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
+	mask |= AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
 			AES_REG_CTRL_KEY_SIZE;
 
-	omap_aes_write_mask(dd, AES_REG_CTRL, val, mask);
+	omap_aes_write_mask(dd, AES_REG_CTRL(dd), val, mask);
 
-	/* IN */
-	omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
-				 dd->phys_base + AES_REG_DATA, 0, 4);
+	return 0;
+}
 
-	omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
-	omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+static void omap_aes_dma_trigger_omap2(struct omap_aes_dev *dd, int length)
+{
+	u32 mask, val;
 
-	/* OUT */
-	omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
-				dd->phys_base + AES_REG_DATA, 0, 4);
+	val = dd->pdata->dma_start;
 
-	omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
-	omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+	if (dd->dma_lch_out != NULL)
+		val |= dd->pdata->dma_enable_out;
+	if (dd->dma_lch_in != NULL)
+		val |= dd->pdata->dma_enable_in;
+
+	mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+	       dd->pdata->dma_start;
+
+	omap_aes_write_mask(dd, AES_REG_MASK(dd), val, mask);
 
-	return 0;
+}
+
+static void omap_aes_dma_trigger_omap4(struct omap_aes_dev *dd, int length)
+{
+	omap_aes_write(dd, AES_REG_LENGTH_N(0), length);
+	omap_aes_write(dd, AES_REG_LENGTH_N(1), 0);
+
+	omap_aes_dma_trigger_omap2(dd, length);
+}
+
+static void omap_aes_dma_stop(struct omap_aes_dev *dd)
+{
+	u32 mask;
+
+	mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+	       dd->pdata->dma_start;
+
+	omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask);
 }
 
 static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
@@ -284,18 +314,10 @@ static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
 	return dd;
 }
 
-static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
+static void omap_aes_dma_out_callback(void *data)
 {
 	struct omap_aes_dev *dd = data;
 
-	if (ch_status != OMAP_DMA_BLOCK_IRQ) {
-		pr_err("omap-aes DMA error status: 0x%hx\n", ch_status);
-		dd->err = -EIO;
-		dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
-	} else if (lch == dd->dma_lch_in) {
-		return;
-	}
-
 	/* dma_lch_out - completed */
 	tasklet_schedule(&dd->done_task);
 }
@@ -303,9 +325,10 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
 static int omap_aes_dma_init(struct omap_aes_dev *dd)
 {
 	int err = -ENOMEM;
+	dma_cap_mask_t mask;
 
-	dd->dma_lch_out = -1;
-	dd->dma_lch_in = -1;
+	dd->dma_lch_out = NULL;
+	dd->dma_lch_in = NULL;
 
 	dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
 	dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
@@ -334,23 +357,31 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd)
 		goto err_map_out;
 	}
 
-	err = omap_request_dma(dd->dma_in, "omap-aes-rx",
-			       omap_aes_dma_callback, dd, &dd->dma_lch_in);
-	if (err) {
-		dev_err(dd->dev, "Unable to request DMA channel\n");
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	dd->dma_lch_in = dma_request_slave_channel_compat(mask,
+							  omap_dma_filter_fn,
+							  &dd->dma_in,
+							  dd->dev, "rx");
+	if (!dd->dma_lch_in) {
+		dev_err(dd->dev, "Unable to request in DMA channel\n");
 		goto err_dma_in;
 	}
-	err = omap_request_dma(dd->dma_out, "omap-aes-tx",
-			       omap_aes_dma_callback, dd, &dd->dma_lch_out);
-	if (err) {
-		dev_err(dd->dev, "Unable to request DMA channel\n");
+
+	dd->dma_lch_out = dma_request_slave_channel_compat(mask,
+							   omap_dma_filter_fn,
+							   &dd->dma_out,
+							   dd->dev, "tx");
+	if (!dd->dma_lch_out) {
+		dev_err(dd->dev, "Unable to request out DMA channel\n");
 		goto err_dma_out;
 	}
 
 	return 0;
 
 err_dma_out:
-	omap_free_dma(dd->dma_lch_in);
+	dma_release_channel(dd->dma_lch_in);
 err_dma_in:
 	dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
 			 DMA_FROM_DEVICE);
@@ -367,8 +398,8 @@ err_alloc:
 
 static void omap_aes_dma_cleanup(struct omap_aes_dev *dd)
 {
-	omap_free_dma(dd->dma_lch_out);
-	omap_free_dma(dd->dma_lch_in);
+	dma_release_channel(dd->dma_lch_out);
+	dma_release_channel(dd->dma_lch_in);
 	dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
 			 DMA_FROM_DEVICE);
 	dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, DMA_TO_DEVICE);
@@ -426,12 +457,15 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf,
 	return off;
 }
 
-static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
-			       dma_addr_t dma_addr_out, int length)
+static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
+		struct scatterlist *in_sg, struct scatterlist *out_sg)
 {
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct omap_aes_dev *dd = ctx->dd;
-	int len32;
+	struct dma_async_tx_descriptor *tx_in, *tx_out;
+	struct dma_slave_config cfg;
+	dma_addr_t dma_addr_in = sg_dma_address(in_sg);
+	int ret, length = sg_dma_len(in_sg);
 
 	pr_debug("len: %d\n", length);
 
@@ -441,30 +475,61 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
 		dma_sync_single_for_device(dd->dev, dma_addr_in, length,
 					   DMA_TO_DEVICE);
 
-	len32 = DIV_ROUND_UP(length, sizeof(u32));
+	memset(&cfg, 0, sizeof(cfg));
+
+	cfg.src_addr = dd->phys_base + AES_REG_DATA_N(dd, 0);
+	cfg.dst_addr = dd->phys_base + AES_REG_DATA_N(dd, 0);
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.src_maxburst = DST_MAXBURST;
+	cfg.dst_maxburst = DST_MAXBURST;
 
 	/* IN */
-	omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32,
-				     len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in,
-					OMAP_DMA_DST_SYNC);
+	ret = dmaengine_slave_config(dd->dma_lch_in, &cfg);
+	if (ret) {
+		dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
+			ret);
+		return ret;
+	}
+
+	tx_in = dmaengine_prep_slave_sg(dd->dma_lch_in, in_sg, 1,
+					DMA_MEM_TO_DEV,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!tx_in) {
+		dev_err(dd->dev, "IN prep_slave_sg() failed\n");
+		return -EINVAL;
+	}
 
-	omap_set_dma_src_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_POST_INC,
-				dma_addr_in, 0, 0);
+	/* No callback necessary */
+	tx_in->callback_param = dd;
 
 	/* OUT */
-	omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32,
-				     len32, 1, OMAP_DMA_SYNC_PACKET,
-					dd->dma_out, OMAP_DMA_SRC_SYNC);
+	ret = dmaengine_slave_config(dd->dma_lch_out, &cfg);
+	if (ret) {
+		dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
+			ret);
+		return ret;
+	}
+
+	tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, 1,
+					DMA_DEV_TO_MEM,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!tx_out) {
+		dev_err(dd->dev, "OUT prep_slave_sg() failed\n");
+		return -EINVAL;
+	}
 
-	omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC,
-				 dma_addr_out, 0, 0);
+	tx_out->callback = omap_aes_dma_out_callback;
+	tx_out->callback_param = dd;
 
-	omap_start_dma(dd->dma_lch_in);
-	omap_start_dma(dd->dma_lch_out);
+	dmaengine_submit(tx_in);
+	dmaengine_submit(tx_out);
 
-	/* start DMA or disable idle mode */
-	omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
-			    AES_REG_MASK_START);
+	dma_async_issue_pending(dd->dma_lch_in);
+	dma_async_issue_pending(dd->dma_lch_out);
+
+	/* start DMA */
+	dd->pdata->trigger(dd, length);
 
 	return 0;
 }
@@ -476,6 +541,8 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 	int err, fast = 0, in, out;
 	size_t count;
 	dma_addr_t addr_in, addr_out;
+	struct scatterlist *in_sg, *out_sg;
+	int len32;
 
 	pr_debug("total: %d\n", dd->total);
 
@@ -514,6 +581,9 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 		addr_in = sg_dma_address(dd->in_sg);
 		addr_out = sg_dma_address(dd->out_sg);
 
+		in_sg = dd->in_sg;
+		out_sg = dd->out_sg;
+
 		dd->flags |= FLAGS_FAST;
 
 	} else {
@@ -521,6 +591,27 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 		count = sg_copy(&dd->in_sg, &dd->in_offset, dd->buf_in,
 				 dd->buflen, dd->total, 0);
 
+		len32 = DIV_ROUND_UP(count, DMA_MIN) * DMA_MIN;
+
+		/*
+		 * The data going into the AES module has been copied
+		 * to a local buffer and the data coming out will go
+		 * into a local buffer so set up local SG entries for
+		 * both.
+		 */
+		sg_init_table(&dd->in_sgl, 1);
+		dd->in_sgl.offset = dd->in_offset;
+		sg_dma_len(&dd->in_sgl) = len32;
+		sg_dma_address(&dd->in_sgl) = dd->dma_addr_in;
+
+		sg_init_table(&dd->out_sgl, 1);
+		dd->out_sgl.offset = dd->out_offset;
+		sg_dma_len(&dd->out_sgl) = len32;
+		sg_dma_address(&dd->out_sgl) = dd->dma_addr_out;
+
+		in_sg = &dd->in_sgl;
+		out_sg = &dd->out_sgl;
+
 		addr_in = dd->dma_addr_in;
 		addr_out = dd->dma_addr_out;
 
@@ -530,7 +621,7 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 
 	dd->total -= count;
 
-	err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count);
+	err = omap_aes_crypt_dma(tfm, in_sg, out_sg);
 	if (err) {
 		dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
 		dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
@@ -545,7 +636,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
 	pr_debug("err: %d\n", err);
 
-	clk_disable(dd->iclk);
+	pm_runtime_put_sync(dd->dev);
 	dd->flags &= ~FLAGS_BUSY;
 
 	req->base.complete(&req->base, err);
@@ -558,10 +649,10 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 
 	pr_debug("total: %d\n", dd->total);
 
-	omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START);
+	omap_aes_dma_stop(dd);
 
-	omap_stop_dma(dd->dma_lch_in);
-	omap_stop_dma(dd->dma_lch_out);
+	dmaengine_terminate_all(dd->dma_lch_in);
+	dmaengine_terminate_all(dd->dma_lch_out);
 
 	if (dd->flags & FLAGS_FAST) {
 		dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
@@ -734,6 +825,16 @@ static int omap_aes_cbc_decrypt(struct ablkcipher_request *req)
 	return omap_aes_crypt(req, FLAGS_CBC);
 }
 
+static int omap_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CTR);
+}
+
+static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return omap_aes_crypt(req, FLAGS_CTR);
+}
+
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
 	pr_debug("enter\n");
@@ -750,7 +851,7 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 
 /* ********************** ALGS ************************************ */
 
-static struct crypto_alg algs[] = {
+static struct crypto_alg algs_ecb_cbc[] = {
 {
 	.cra_name		= "ecb(aes)",
 	.cra_driver_name	= "ecb-aes-omap",
@@ -798,11 +899,213 @@ static struct crypto_alg algs[] = {
 }
 };
 
+static struct crypto_alg algs_ctr[] = {
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-omap",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= omap_aes_cra_init,
+	.cra_exit		= omap_aes_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.geniv		= "eseqiv",
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= omap_aes_setkey,
+		.encrypt	= omap_aes_ctr_encrypt,
+		.decrypt	= omap_aes_ctr_decrypt,
+	}
+} ,
+};
+
+static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
+	{
+		.algs_list	= algs_ecb_cbc,
+		.size		= ARRAY_SIZE(algs_ecb_cbc),
+	},
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap2 = {
+	.algs_info	= omap_aes_algs_info_ecb_cbc,
+	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc),
+	.trigger	= omap_aes_dma_trigger_omap2,
+	.key_ofs	= 0x1c,
+	.iv_ofs		= 0x20,
+	.ctrl_ofs	= 0x30,
+	.data_ofs	= 0x34,
+	.rev_ofs	= 0x44,
+	.mask_ofs	= 0x48,
+	.dma_enable_in	= BIT(2),
+	.dma_enable_out	= BIT(3),
+	.dma_start	= BIT(5),
+	.major_mask	= 0xf0,
+	.major_shift	= 4,
+	.minor_mask	= 0x0f,
+	.minor_shift	= 0,
+};
+
+#ifdef CONFIG_OF
+static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc_ctr[] = {
+	{
+		.algs_list	= algs_ecb_cbc,
+		.size		= ARRAY_SIZE(algs_ecb_cbc),
+	},
+	{
+		.algs_list	= algs_ctr,
+		.size		= ARRAY_SIZE(algs_ctr),
+	},
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap3 = {
+	.algs_info	= omap_aes_algs_info_ecb_cbc_ctr,
+	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+	.trigger	= omap_aes_dma_trigger_omap2,
+	.key_ofs	= 0x1c,
+	.iv_ofs		= 0x20,
+	.ctrl_ofs	= 0x30,
+	.data_ofs	= 0x34,
+	.rev_ofs	= 0x44,
+	.mask_ofs	= 0x48,
+	.dma_enable_in	= BIT(2),
+	.dma_enable_out	= BIT(3),
+	.dma_start	= BIT(5),
+	.major_mask	= 0xf0,
+	.major_shift	= 4,
+	.minor_mask	= 0x0f,
+	.minor_shift	= 0,
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap4 = {
+	.algs_info	= omap_aes_algs_info_ecb_cbc_ctr,
+	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+	.trigger	= omap_aes_dma_trigger_omap4,
+	.key_ofs	= 0x3c,
+	.iv_ofs		= 0x40,
+	.ctrl_ofs	= 0x50,
+	.data_ofs	= 0x60,
+	.rev_ofs	= 0x80,
+	.mask_ofs	= 0x84,
+	.dma_enable_in	= BIT(5),
+	.dma_enable_out	= BIT(6),
+	.major_mask	= 0x0700,
+	.major_shift	= 8,
+	.minor_mask	= 0x003f,
+	.minor_shift	= 0,
+};
+
+static const struct of_device_id omap_aes_of_match[] = {
+	{
+		.compatible	= "ti,omap2-aes",
+		.data		= &omap_aes_pdata_omap2,
+	},
+	{
+		.compatible	= "ti,omap3-aes",
+		.data		= &omap_aes_pdata_omap3,
+	},
+	{
+		.compatible	= "ti,omap4-aes",
+		.data		= &omap_aes_pdata_omap4,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_aes_of_match);
+
+static int omap_aes_get_res_of(struct omap_aes_dev *dd,
+		struct device *dev, struct resource *res)
+{
+	struct device_node *node = dev->of_node;
+	const struct of_device_id *match;
+	int err = 0;
+
+	match = of_match_device(of_match_ptr(omap_aes_of_match), dev);
+	if (!match) {
+		dev_err(dev, "no compatible OF match\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = of_address_to_resource(node, 0, res);
+	if (err < 0) {
+		dev_err(dev, "can't translate OF node address\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	dd->dma_out = -1; /* Dummy value that's unused */
+	dd->dma_in = -1; /* Dummy value that's unused */
+
+	dd->pdata = match->data;
+
+err:
+	return err;
+}
+#else
+static const struct of_device_id omap_aes_of_match[] = {
+	{},
+};
+
+static int omap_aes_get_res_of(struct omap_aes_dev *dd,
+		struct device *dev, struct resource *res)
+{
+	return -EINVAL;
+}
+#endif
+
+static int omap_aes_get_res_pdev(struct omap_aes_dev *dd,
+		struct platform_device *pdev, struct resource *res)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	int err = 0;
+
+	/* Get the base address */
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto err;
+	}
+	memcpy(res, r, sizeof(*res));
+
+	/* Get the DMA out channel */
+	r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!r) {
+		dev_err(dev, "no DMA out resource info\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dd->dma_out = r->start;
+
+	/* Get the DMA in channel */
+	r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (!r) {
+		dev_err(dev, "no DMA in resource info\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dd->dma_in = r->start;
+
+	/* Only OMAP2/3 can be non-DT */
+	dd->pdata = &omap_aes_pdata_omap2;
+
+err:
+	return err;
+}
+
 static int omap_aes_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct omap_aes_dev *dd;
-	struct resource *res;
+	struct crypto_alg *algp;
+	struct resource res;
 	int err = -ENOMEM, i, j;
 	u32 reg;
 
@@ -817,49 +1120,31 @@ static int omap_aes_probe(struct platform_device *pdev)
 	spin_lock_init(&dd->lock);
 	crypto_init_queue(&dd->queue, OMAP_AES_QUEUE_LENGTH);
 
-	/* Get the base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "invalid resource type\n");
-		err = -ENODEV;
-		goto err_res;
-	}
-	dd->phys_base = res->start;
-
-	/* Get the DMA */
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!res)
-		dev_info(dev, "no DMA info\n");
-	else
-		dd->dma_out = res->start;
-
-	/* Get the DMA */
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!res)
-		dev_info(dev, "no DMA info\n");
-	else
-		dd->dma_in = res->start;
-
-	/* Initializing the clock */
-	dd->iclk = clk_get(dev, "ick");
-	if (IS_ERR(dd->iclk)) {
-		dev_err(dev, "clock intialization failed.\n");
-		err = PTR_ERR(dd->iclk);
+	err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) :
+			       omap_aes_get_res_pdev(dd, pdev, &res);
+	if (err)
 		goto err_res;
-	}
 
-	dd->io_base = ioremap(dd->phys_base, SZ_4K);
+	dd->io_base = devm_request_and_ioremap(dev, &res);
 	if (!dd->io_base) {
 		dev_err(dev, "can't ioremap\n");
 		err = -ENOMEM;
-		goto err_io;
+		goto err_res;
 	}
+	dd->phys_base = res.start;
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	omap_aes_dma_stop(dd);
+
+	reg = omap_aes_read(dd, AES_REG_REV(dd));
+
+	pm_runtime_put_sync(dev);
 
-	clk_enable(dd->iclk);
-	reg = omap_aes_read(dd, AES_REG_REV);
 	dev_info(dev, "OMAP AES hw accel rev: %u.%u\n",
-		 (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR);
-	clk_disable(dd->iclk);
+		 (reg & dd->pdata->major_mask) >> dd->pdata->major_shift,
+		 (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
 
 	tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd);
 	tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd);
@@ -873,26 +1158,32 @@ static int omap_aes_probe(struct platform_device *pdev)
 	list_add_tail(&dd->list, &dev_list);
 	spin_unlock(&list_lock);
 
-	for (i = 0; i < ARRAY_SIZE(algs); i++) {
-		pr_debug("i: %d\n", i);
-		err = crypto_register_alg(&algs[i]);
-		if (err)
-			goto err_algs;
-	}
+	for (i = 0; i < dd->pdata->algs_info_size; i++) {
+		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+			algp = &dd->pdata->algs_info[i].algs_list[j];
+
+			pr_debug("reg alg: %s\n", algp->cra_name);
+			INIT_LIST_HEAD(&algp->cra_list);
+
+			err = crypto_register_alg(algp);
+			if (err)
+				goto err_algs;
 
-	pr_info("probe() done\n");
+			dd->pdata->algs_info[i].registered++;
+		}
+	}
 
 	return 0;
 err_algs:
-	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&algs[j]);
+	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+			crypto_unregister_alg(
+					&dd->pdata->algs_info[i].algs_list[j]);
 	omap_aes_dma_cleanup(dd);
 err_dma:
 	tasklet_kill(&dd->done_task);
 	tasklet_kill(&dd->queue_task);
-	iounmap(dd->io_base);
-err_io:
-	clk_put(dd->iclk);
+	pm_runtime_disable(dev);
 err_res:
 	kfree(dd);
 	dd = NULL;
@@ -904,7 +1195,7 @@ err_data:
 static int omap_aes_remove(struct platform_device *pdev)
 {
 	struct omap_aes_dev *dd = platform_get_drvdata(pdev);
-	int i;
+	int i, j;
 
 	if (!dd)
 		return -ENODEV;
@@ -913,33 +1204,52 @@ static int omap_aes_remove(struct platform_device *pdev)
 	list_del(&dd->list);
 	spin_unlock(&list_lock);
 
-	for (i = 0; i < ARRAY_SIZE(algs); i++)
-		crypto_unregister_alg(&algs[i]);
+	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+			crypto_unregister_alg(
+					&dd->pdata->algs_info[i].algs_list[j]);
 
 	tasklet_kill(&dd->done_task);
 	tasklet_kill(&dd->queue_task);
 	omap_aes_dma_cleanup(dd);
-	iounmap(dd->io_base);
-	clk_put(dd->iclk);
+	pm_runtime_disable(dd->dev);
 	kfree(dd);
 	dd = NULL;
 
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int omap_aes_suspend(struct device *dev)
+{
+	pm_runtime_put_sync(dev);
+	return 0;
+}
+
+static int omap_aes_resume(struct device *dev)
+{
+	pm_runtime_get_sync(dev);
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap_aes_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(omap_aes_suspend, omap_aes_resume)
+};
+
 static struct platform_driver omap_aes_driver = {
 	.probe	= omap_aes_probe,
 	.remove	= omap_aes_remove,
 	.driver	= {
 		.name	= "omap-aes",
 		.owner	= THIS_MODULE,
+		.pm	= &omap_aes_pm_ops,
+		.of_match_table	= omap_aes_of_match,
 	},
 };
 
 static int __init omap_aes_mod_init(void)
 {
-	pr_info("loading %s driver\n", "omap-aes");
-
 	return  platform_driver_register(&omap_aes_driver);
 }
 

File diff suppressed because it is too large
+ 650 - 130
drivers/crypto/omap-sham.c


+ 1 - 3
drivers/crypto/s5p-sss.c

@@ -580,7 +580,7 @@ static int s5p_aes_probe(struct platform_device *pdev)
 				     resource_size(res), pdev->name))
 		return -EBUSY;
 
-	pdata->clk = clk_get(dev, "secss");
+	pdata->clk = devm_clk_get(dev, "secss");
 	if (IS_ERR(pdata->clk)) {
 		dev_err(dev, "failed to find secss clock source\n");
 		return -ENOENT;
@@ -645,7 +645,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
 
  err_irq:
 	clk_disable(pdata->clk);
-	clk_put(pdata->clk);
 
 	s5p_dev = NULL;
 	platform_set_drvdata(pdev, NULL);
@@ -667,7 +666,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
 	tasklet_kill(&pdata->tasklet);
 
 	clk_disable(pdata->clk);
-	clk_put(pdata->clk);
 
 	s5p_dev = NULL;
 	platform_set_drvdata(pdev, NULL);

Some files were not shown because too many files changed in this diff