Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 - Optimised AES/SHA1 for ARM.
 - IPsec ESN support in talitos and caam.
 - x86_64/avx implementation of cast5/cast6.
 - Add/use multi-algorithm registration helpers where possible.
 - Added IBM Power7+ in-Nest support.
 - Misc fixes.

Fix up trivial conflicts in crypto/Kconfig due to the sparc64 crypto
config options being added next to the new ARM ones.

[ Side note: cut-and-paste duplicate help texts make those conflicts
  harder to read than necessary, thanks to git being smart about
  minimizing conflicts and maximizing the common parts... ]

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (71 commits)
  crypto: x86/glue_helper - fix storing of new IV in CBC encryption
  crypto: cast5/avx - fix storing of new IV in CBC encryption
  crypto: tcrypt - add missing tests for camellia and ghash
  crypto: testmgr - make test_aead also test 'dst != src' code paths
  crypto: testmgr - make test_skcipher also test 'dst != src' code paths
  crypto: testmgr - add test vectors for CTR mode IV increasement
  crypto: testmgr - add test vectors for partial ctr(cast5) and ctr(cast6)
  crypto: testmgr - allow non-multi page and multi page skcipher tests from same test template
  crypto: caam - increase TRNG clocks per sample
  crypto, tcrypt: remove local_bh_disable/enable() around local_irq_disable/enable()
  crypto: tegra-aes - fix error return code
  crypto: crypto4xx - fix error return code
  crypto: hifn_795x - fix error return code
  crypto: ux500 - fix error return code
  crypto: caam - fix error IDs for SEC v5.x RNG4
  hwrng: mxc-rnga - Access data via structure
  hwrng: mxc-rnga - Adapt clocks to new i.mx clock framework
  crypto: caam - add IPsec ESN support
  crypto: 842 - remove .cra_list initialization
  Revert "[CRYPTO] cast6: inline bloat--"
  ...
Linus Torvalds 12 years ago
parent
commit
d66e6737d4
100 changed files with 8889 additions and 1824 deletions
  1. 12 0
      MAINTAINERS
  2. 1 0
      arch/arm/Makefile
  3. 9 0
      arch/arm/crypto/Makefile
  4. 1112 0
      arch/arm/crypto/aes-armv4.S
  5. 108 0
      arch/arm/crypto/aes_glue.c
  6. 503 0
      arch/arm/crypto/sha1-armv4-large.S
  7. 179 0
      arch/arm/crypto/sha1_glue.c
  8. 2 1
      arch/powerpc/configs/ppc64_defconfig
  9. 2 1
      arch/powerpc/configs/pseries_defconfig
  10. 2 2
      arch/powerpc/kernel/prom_init.c
  11. 0 5
      arch/s390/crypto/aes_s390.c
  12. 0 10
      arch/s390/crypto/des_s390.c
  13. 0 1
      arch/s390/crypto/ghash_s390.c
  14. 4 0
      arch/x86/crypto/Makefile
  15. 0 1
      arch/x86/crypto/aes_glue.c
  16. 219 39
      arch/x86/crypto/aesni-intel_glue.c
  17. 0 4
      arch/x86/crypto/blowfish_glue.c
  18. 688 688
      arch/x86/crypto/camellia_glue.c
  19. 376 0
      arch/x86/crypto/cast5-avx-x86_64-asm_64.S
  20. 530 0
      arch/x86/crypto/cast5_avx_glue.c
  21. 383 0
      arch/x86/crypto/cast6-avx-x86_64-asm_64.S
  22. 648 0
      arch/x86/crypto/cast6_avx_glue.c
  23. 0 2
      arch/x86/crypto/ghash-clmulni-intel_glue.c
  24. 1 1
      arch/x86/crypto/glue_helper.c
  25. 0 1
      arch/x86/crypto/salsa20_glue.c
  26. 0 10
      arch/x86/crypto/serpent_avx_glue.c
  27. 0 10
      arch/x86/crypto/serpent_sse2_glue.c
  28. 142 85
      arch/x86/crypto/twofish-avx-x86_64-asm_64.S
  29. 0 10
      arch/x86/crypto/twofish_avx_glue.c
  30. 0 1
      arch/x86/crypto/twofish_glue.c
  31. 0 5
      arch/x86/crypto/twofish_glue_3way.c
  32. 182 0
      crypto/842.c
  33. 75 0
      crypto/Kconfig
  34. 3 2
      crypto/Makefile
  35. 0 1
      crypto/aes_generic.c
  36. 23 40
      crypto/ansi_cprng.c
  37. 0 1
      crypto/anubis.c
  38. 0 1
      crypto/blowfish_generic.c
  39. 0 1
      crypto/camellia_generic.c
  40. 46 34
      crypto/cast5_generic.c
  41. 46 27
      crypto/cast6_generic.c
  42. 18 39
      crypto/crypto_null.c
  43. 1 1
      crypto/crypto_user.c
  44. 0 1
      crypto/deflate.c
  45. 5 20
      crypto/des_generic.c
  46. 0 1
      crypto/fcrypt.c
  47. 0 1
      crypto/ghash-generic.c
  48. 0 1
      crypto/khazad.c
  49. 0 1
      crypto/krng.c
  50. 0 1
      crypto/lzo.c
  51. 0 1
      crypto/salsa20_generic.c
  52. 0 1
      crypto/seed.c
  53. 19 34
      crypto/serpent_generic.c
  54. 5 20
      crypto/sha256_generic.c
  55. 5 15
      crypto/sha512_generic.c
  56. 36 0
      crypto/shash.c
  57. 89 6
      crypto/tcrypt.c
  58. 1 0
      crypto/tcrypt.h
  59. 6 35
      crypto/tea.c
  60. 392 80
      crypto/testmgr.c
  61. 973 100
      crypto/testmgr.h
  62. 6 32
      crypto/tgr192.c
  63. 0 1
      crypto/twofish_generic.c
  64. 5 5
      crypto/vmac.c
  65. 6 33
      crypto/wp512.c
  66. 51 57
      drivers/char/hw_random/mxc-rnga.c
  67. 5 12
      drivers/char/hw_random/octeon-rng.c
  68. 8 14
      drivers/crypto/Kconfig
  69. 1 0
      drivers/crypto/amcc/crypto4xx_core.c
  70. 0 7
      drivers/crypto/atmel-aes.c
  71. 0 5
      drivers/crypto/atmel-sha.c
  72. 0 6
      drivers/crypto/atmel-tdes.c
  73. 37 14
      drivers/crypto/caam/caamalg.c
  74. 16 6
      drivers/crypto/caam/caamhash.c
  75. 6 3
      drivers/crypto/caam/caamrng.c
  76. 1 0
      drivers/crypto/caam/compat.h
  77. 3 3
      drivers/crypto/caam/ctrl.c
  78. 0 2
      drivers/crypto/caam/error.c
  79. 4 0
      drivers/crypto/caam/key_gen.c
  80. 1 17
      drivers/crypto/geode-aes.c
  81. 4 1
      drivers/crypto/hifn_795x.c
  82. 26 0
      drivers/crypto/nx/Kconfig
  83. 4 1
      drivers/crypto/nx/Makefile
  84. 1617 0
      drivers/crypto/nx/nx-842.c
  85. 0 1
      drivers/crypto/nx/nx-aes-cbc.c
  86. 0 2
      drivers/crypto/nx/nx-aes-ccm.c
  87. 0 2
      drivers/crypto/nx/nx-aes-ctr.c
  88. 0 1
      drivers/crypto/nx/nx-aes-ecb.c
  89. 0 2
      drivers/crypto/nx/nx-aes-gcm.c
  90. 0 1
      drivers/crypto/omap-aes.c
  91. 0 3
      drivers/crypto/padlock-aes.c
  92. 0 1
      drivers/crypto/s5p-sss.c
  93. 172 247
      drivers/crypto/talitos.c
  94. 1 2
      drivers/crypto/tegra-aes.c
  95. 1 0
      drivers/crypto/ux500/cryp/cryp_core.c
  96. 0 1
      drivers/crypto/ux500/hash/hash_core.c
  97. 27 0
      include/crypto/cast5.h
  98. 28 0
      include/crypto/cast6.h
  99. 2 0
      include/crypto/internal/hash.h
  100. 11 0
      include/linux/nx842.h

+ 12 - 0
MAINTAINERS

@@ -3438,6 +3438,18 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git
 S:	Maintained
 F:	arch/ia64/
 
+IBM Power in-Nest Crypto Acceleration
+M:	Kent Yoder <key@linux.vnet.ibm.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	drivers/crypto/nx/
+
+IBM Power 842 compression accelerator
+M:	Robert Jennings <rcj@linux.vnet.ibm.com>
+S:	Supported
+F:	drivers/crypto/nx/nx-842.c
+F:	include/linux/nx842.h
+
 IBM Power Linux RAID adapter
 M:	Brian King <brking@us.ibm.com>
 S:	Supported

+ 1 - 0
arch/arm/Makefile

@@ -254,6 +254,7 @@ core-$(CONFIG_VFP)		+= arch/arm/vfp/
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
 core-y				+= arch/arm/net/
+core-y				+= arch/arm/crypto/
 core-y				+= $(machdirs) $(platdirs)
 
 drivers-$(CONFIG_OPROFILE)      += arch/arm/oprofile/

+ 9 - 0
arch/arm/crypto/Makefile

@@ -0,0 +1,9 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+
+aes-arm-y  := aes-armv4.o aes_glue.o
+sha1-arm-y := sha1-armv4-large.o sha1_glue.o

+ 1112 - 0
arch/arm/crypto/aes-armv4.S

@@ -0,0 +1,1112 @@
+#define __ARM_ARCH__ __LINUX_ARM_ARCH__
+@ ====================================================================
+@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@ ====================================================================
+
+@ AES for ARMv4
+
+@ January 2007.
+@
+@ Code uses single 1K S-box and is >2 times faster than code generated
+@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
+@ allows to merge logical or arithmetic operation with shift or rotate
+@ in one instruction and emit combined result every cycle. The module
+@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
+@ key [on single-issue Xscale PXA250 core].
+
+@ May 2007.
+@
+@ AES_set_[en|de]crypt_key is added.
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 12% improvement on
+@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~21.5 cycles per byte.
+
+@ A little glue here to select the correct code below for the ARM CPU
+@ that is being targetted.
+
+.text
+.code	32
+
+.type	AES_Te,%object
+.align	5
+AES_Te:
+.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size	AES_Te,.-AES_Te
+
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ 		 const AES_KEY *key) {
+.global AES_encrypt
+.type   AES_encrypt,%function
+.align	5
+AES_encrypt:
+	sub	r3,pc,#8		@ AES_encrypt
+	stmdb   sp!,{r1,r4-r12,lr}
+	mov	r12,r0		@ inp
+	mov	r11,r2
+	sub	r10,r3,#AES_encrypt-AES_Te	@ Te
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	orr	r3,r3,r5,lsl#16
+	orr	r3,r3,r6,lsl#24
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#endif
+	bl	_armv4_AES_encrypt
+
+	ldr	r12,[sp],#4		@ pop out
+#if __ARM_ARCH__>=7
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r12,#0]
+	str	r1,[r12,#4]
+	str	r2,[r12,#8]
+	str	r3,[r12,#12]
+#else
+	mov	r4,r0,lsr#24		@ write output in endian-neutral
+	mov	r5,r0,lsr#16		@ manner...
+	mov	r6,r0,lsr#8
+	strb	r4,[r12,#0]
+	strb	r5,[r12,#1]
+	mov	r4,r1,lsr#24
+	strb	r6,[r12,#2]
+	mov	r5,r1,lsr#16
+	strb	r0,[r12,#3]
+	mov	r6,r1,lsr#8
+	strb	r4,[r12,#4]
+	strb	r5,[r12,#5]
+	mov	r4,r2,lsr#24
+	strb	r6,[r12,#6]
+	mov	r5,r2,lsr#16
+	strb	r1,[r12,#7]
+	mov	r6,r2,lsr#8
+	strb	r4,[r12,#8]
+	strb	r5,[r12,#9]
+	mov	r4,r3,lsr#24
+	strb	r6,[r12,#10]
+	mov	r5,r3,lsr#16
+	strb	r2,[r12,#11]
+	mov	r6,r3,lsr#8
+	strb	r4,[r12,#12]
+	strb	r5,[r12,#13]
+	strb	r6,[r12,#14]
+	strb	r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	AES_encrypt,.-AES_encrypt
+
+.type   _armv4_AES_encrypt,%function
+.align	2
+_armv4_AES_encrypt:
+	str	lr,[sp,#-4]!		@ push lr
+	ldmia	r11!,{r4-r7}
+	eor	r0,r0,r4
+	ldr	r12,[r11,#240-16]
+	eor	r1,r1,r5
+	eor	r2,r2,r6
+	eor	r3,r3,r7
+	sub	r12,r12,#1
+	mov	lr,#255
+
+	and	r7,lr,r0
+	and	r8,lr,r0,lsr#8
+	and	r9,lr,r0,lsr#16
+	mov	r0,r0,lsr#24
+.Lenc_loop:
+	ldr	r4,[r10,r7,lsl#2]	@ Te3[s0>>0]
+	and	r7,lr,r1,lsr#16	@ i0
+	ldr	r5,[r10,r8,lsl#2]	@ Te2[s0>>8]
+	and	r8,lr,r1
+	ldr	r6,[r10,r9,lsl#2]	@ Te1[s0>>16]
+	and	r9,lr,r1,lsr#8
+	ldr	r0,[r10,r0,lsl#2]	@ Te0[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldr	r7,[r10,r7,lsl#2]	@ Te1[s1>>16]
+	ldr	r8,[r10,r8,lsl#2]	@ Te3[s1>>0]
+	ldr	r9,[r10,r9,lsl#2]	@ Te2[s1>>8]
+	eor	r0,r0,r7,ror#8
+	ldr	r1,[r10,r1,lsl#2]	@ Te0[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r5,r8,ror#8
+	and	r8,lr,r2,lsr#16	@ i1
+	eor	r6,r6,r9,ror#8
+	and	r9,lr,r2
+	ldr	r7,[r10,r7,lsl#2]	@ Te2[s2>>8]
+	eor	r1,r1,r4,ror#24
+	ldr	r8,[r10,r8,lsl#2]	@ Te1[s2>>16]
+	mov	r2,r2,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Te3[s2>>0]
+	eor	r0,r0,r7,ror#16
+	ldr	r2,[r10,r2,lsl#2]	@ Te0[s2>>24]
+	and	r7,lr,r3		@ i0
+	eor	r1,r1,r8,ror#8
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r6,r9,ror#16
+	and	r9,lr,r3,lsr#16	@ i2
+	ldr	r7,[r10,r7,lsl#2]	@ Te3[s3>>0]
+	eor	r2,r2,r5,ror#16
+	ldr	r8,[r10,r8,lsl#2]	@ Te2[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Te1[s3>>16]
+	eor	r0,r0,r7,ror#24
+	ldr	r7,[r11],#16
+	eor	r1,r1,r8,ror#16
+	ldr	r3,[r10,r3,lsl#2]	@ Te0[s3>>24]
+	eor	r2,r2,r9,ror#8
+	ldr	r4,[r11,#-12]
+	eor	r3,r3,r6,ror#8
+
+	ldr	r5,[r11,#-8]
+	eor	r0,r0,r7
+	ldr	r6,[r11,#-4]
+	and	r7,lr,r0
+	eor	r1,r1,r4
+	and	r8,lr,r0,lsr#8
+	eor	r2,r2,r5
+	and	r9,lr,r0,lsr#16
+	eor	r3,r3,r6
+	mov	r0,r0,lsr#24
+
+	subs	r12,r12,#1
+	bne	.Lenc_loop
+
+	add	r10,r10,#2
+
+	ldrb	r4,[r10,r7,lsl#2]	@ Te4[s0>>0]
+	and	r7,lr,r1,lsr#16	@ i0
+	ldrb	r5,[r10,r8,lsl#2]	@ Te4[s0>>8]
+	and	r8,lr,r1
+	ldrb	r6,[r10,r9,lsl#2]	@ Te4[s0>>16]
+	and	r9,lr,r1,lsr#8
+	ldrb	r0,[r10,r0,lsl#2]	@ Te4[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s1>>16]
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s1>>0]
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s1>>8]
+	eor	r0,r7,r0,lsl#8
+	ldrb	r1,[r10,r1,lsl#2]	@ Te4[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r8,r5,lsl#8
+	and	r8,lr,r2,lsr#16	@ i1
+	eor	r6,r9,r6,lsl#8
+	and	r9,lr,r2
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s2>>8]
+	eor	r1,r4,r1,lsl#24
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s2>>16]
+	mov	r2,r2,lsr#24
+
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s2>>0]
+	eor	r0,r7,r0,lsl#8
+	ldrb	r2,[r10,r2,lsl#2]	@ Te4[s2>>24]
+	and	r7,lr,r3		@ i0
+	eor	r1,r1,r8,lsl#16
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r9,r6,lsl#8
+	and	r9,lr,r3,lsr#16	@ i2
+	ldrb	r7,[r10,r7,lsl#2]	@ Te4[s3>>0]
+	eor	r2,r5,r2,lsl#24
+	ldrb	r8,[r10,r8,lsl#2]	@ Te4[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldrb	r9,[r10,r9,lsl#2]	@ Te4[s3>>16]
+	eor	r0,r7,r0,lsl#8
+	ldr	r7,[r11,#0]
+	ldrb	r3,[r10,r3,lsl#2]	@ Te4[s3>>24]
+	eor	r1,r1,r8,lsl#8
+	ldr	r4,[r11,#4]
+	eor	r2,r2,r9,lsl#16
+	ldr	r5,[r11,#8]
+	eor	r3,r6,r3,lsl#24
+	ldr	r6,[r11,#12]
+
+	eor	r0,r0,r7
+	eor	r1,r1,r4
+	eor	r2,r2,r5
+	eor	r3,r3,r6
+
+	sub	r10,r10,#2
+	ldr	pc,[sp],#4		@ pop and return
+.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
+
+.global private_AES_set_encrypt_key
+.type   private_AES_set_encrypt_key,%function
+.align	5
+private_AES_set_encrypt_key:
+_armv4_AES_set_encrypt_key:
+	sub	r3,pc,#8		@ AES_set_encrypt_key
+	teq	r0,#0
+	moveq	r0,#-1
+	beq	.Labrt
+	teq	r2,#0
+	moveq	r0,#-1
+	beq	.Labrt
+
+	teq	r1,#128
+	beq	.Lok
+	teq	r1,#192
+	beq	.Lok
+	teq	r1,#256
+	movne	r0,#-1
+	bne	.Labrt
+
+.Lok:	stmdb   sp!,{r4-r12,lr}
+	sub	r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
+
+	mov	r12,r0		@ inp
+	mov	lr,r1			@ bits
+	mov	r11,r2			@ key
+
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	str	r0,[r11],#16
+	orr	r3,r3,r5,lsl#16
+	str	r1,[r11,#-12]
+	orr	r3,r3,r6,lsl#24
+	str	r2,[r11,#-8]
+	str	r3,[r11,#-4]
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r11],#16
+	str	r1,[r11,#-12]
+	str	r2,[r11,#-8]
+	str	r3,[r11,#-4]
+#endif
+
+	teq	lr,#128
+	bne	.Lnot128
+	mov	r12,#10
+	str	r12,[r11,#240-16]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+
+.L128_loop:
+	and	r5,lr,r3,lsr#24
+	and	r7,lr,r3,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r3,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r3
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r5,r5,r4
+	eor	r0,r0,r5			@ rk[4]=rk[0]^...
+	eor	r1,r1,r0			@ rk[5]=rk[1]^rk[4]
+	str	r0,[r11],#16
+	eor	r2,r2,r1			@ rk[6]=rk[2]^rk[5]
+	str	r1,[r11,#-12]
+	eor	r3,r3,r2			@ rk[7]=rk[3]^rk[6]
+	str	r2,[r11,#-8]
+	subs	r12,r12,#1
+	str	r3,[r11,#-4]
+	bne	.L128_loop
+	sub	r2,r11,#176
+	b	.Ldone
+
+.Lnot128:
+#if __ARM_ARCH__<7
+	ldrb	r8,[r12,#19]
+	ldrb	r4,[r12,#18]
+	ldrb	r5,[r12,#17]
+	ldrb	r6,[r12,#16]
+	orr	r8,r8,r4,lsl#8
+	ldrb	r9,[r12,#23]
+	orr	r8,r8,r5,lsl#16
+	ldrb	r4,[r12,#22]
+	orr	r8,r8,r6,lsl#24
+	ldrb	r5,[r12,#21]
+	ldrb	r6,[r12,#20]
+	orr	r9,r9,r4,lsl#8
+	orr	r9,r9,r5,lsl#16
+	str	r8,[r11],#8
+	orr	r9,r9,r6,lsl#24
+	str	r9,[r11,#-4]
+#else
+	ldr	r8,[r12,#16]
+	ldr	r9,[r12,#20]
+#ifdef __ARMEL__
+	rev	r8,r8
+	rev	r9,r9
+#endif
+	str	r8,[r11],#8
+	str	r9,[r11,#-4]
+#endif
+
+	teq	lr,#192
+	bne	.Lnot192
+	mov	r12,#12
+	str	r12,[r11,#240-24]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+	mov	r12,#8
+
+.L192_loop:
+	and	r5,lr,r9,lsr#24
+	and	r7,lr,r9,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r9,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r9
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r9,r5,r4
+	eor	r0,r0,r9			@ rk[6]=rk[0]^...
+	eor	r1,r1,r0			@ rk[7]=rk[1]^rk[6]
+	str	r0,[r11],#24
+	eor	r2,r2,r1			@ rk[8]=rk[2]^rk[7]
+	str	r1,[r11,#-20]
+	eor	r3,r3,r2			@ rk[9]=rk[3]^rk[8]
+	str	r2,[r11,#-16]
+	subs	r12,r12,#1
+	str	r3,[r11,#-12]
+	subeq	r2,r11,#216
+	beq	.Ldone
+
+	ldr	r7,[r11,#-32]
+	ldr	r8,[r11,#-28]
+	eor	r7,r7,r3			@ rk[10]=rk[4]^rk[9]
+	eor	r9,r8,r7			@ rk[11]=rk[5]^rk[10]
+	str	r7,[r11,#-8]
+	str	r9,[r11,#-4]
+	b	.L192_loop
+
+.Lnot192:
+#if __ARM_ARCH__<7
+	ldrb	r8,[r12,#27]
+	ldrb	r4,[r12,#26]
+	ldrb	r5,[r12,#25]
+	ldrb	r6,[r12,#24]
+	orr	r8,r8,r4,lsl#8
+	ldrb	r9,[r12,#31]
+	orr	r8,r8,r5,lsl#16
+	ldrb	r4,[r12,#30]
+	orr	r8,r8,r6,lsl#24
+	ldrb	r5,[r12,#29]
+	ldrb	r6,[r12,#28]
+	orr	r9,r9,r4,lsl#8
+	orr	r9,r9,r5,lsl#16
+	str	r8,[r11],#8
+	orr	r9,r9,r6,lsl#24
+	str	r9,[r11,#-4]
+#else
+	ldr	r8,[r12,#24]
+	ldr	r9,[r12,#28]
+#ifdef __ARMEL__
+	rev	r8,r8
+	rev	r9,r9
+#endif
+	str	r8,[r11],#8
+	str	r9,[r11,#-4]
+#endif
+
+	mov	r12,#14
+	str	r12,[r11,#240-32]
+	add	r6,r10,#256			@ rcon
+	mov	lr,#255
+	mov	r12,#7
+
+.L256_loop:
+	and	r5,lr,r9,lsr#24
+	and	r7,lr,r9,lsr#16
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r9,lsr#8
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r9
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#24
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r6],#4			@ rcon[i++]
+	orr	r5,r5,r9,lsl#8
+	eor	r9,r5,r4
+	eor	r0,r0,r9			@ rk[8]=rk[0]^...
+	eor	r1,r1,r0			@ rk[9]=rk[1]^rk[8]
+	str	r0,[r11],#32
+	eor	r2,r2,r1			@ rk[10]=rk[2]^rk[9]
+	str	r1,[r11,#-28]
+	eor	r3,r3,r2			@ rk[11]=rk[3]^rk[10]
+	str	r2,[r11,#-24]
+	subs	r12,r12,#1
+	str	r3,[r11,#-20]
+	subeq	r2,r11,#256
+	beq	.Ldone
+
+	and	r5,lr,r3
+	and	r7,lr,r3,lsr#8
+	ldrb	r5,[r10,r5]
+	and	r8,lr,r3,lsr#16
+	ldrb	r7,[r10,r7]
+	and	r9,lr,r3,lsr#24
+	ldrb	r8,[r10,r8]
+	orr	r5,r5,r7,lsl#8
+	ldrb	r9,[r10,r9]
+	orr	r5,r5,r8,lsl#16
+	ldr	r4,[r11,#-48]
+	orr	r5,r5,r9,lsl#24
+
+	ldr	r7,[r11,#-44]
+	ldr	r8,[r11,#-40]
+	eor	r4,r4,r5			@ rk[12]=rk[4]^...
+	ldr	r9,[r11,#-36]
+	eor	r7,r7,r4			@ rk[13]=rk[5]^rk[12]
+	str	r4,[r11,#-16]
+	eor	r8,r8,r7			@ rk[14]=rk[6]^rk[13]
+	str	r7,[r11,#-12]
+	eor	r9,r9,r8			@ rk[15]=rk[7]^rk[14]
+	str	r8,[r11,#-8]
+	str	r9,[r11,#-4]
+	b	.L256_loop
+
+.Ldone:	mov	r0,#0
+	ldmia   sp!,{r4-r12,lr}
+.Labrt:	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+
+.global private_AES_set_decrypt_key
+.type   private_AES_set_decrypt_key,%function
+.align	5
+private_AES_set_decrypt_key:
+	str	lr,[sp,#-4]!            @ push lr
+#if 0
+	@ kernel does both of these in setkey so optimise this bit out by
+	@ expecting the key to already have the enc_key work done (see aes_glue.c)
+	bl	_armv4_AES_set_encrypt_key
+#else
+	mov	r0,#0
+#endif
+	teq	r0,#0
+	ldrne	lr,[sp],#4              @ pop lr
+	bne	.Labrt
+
+	stmdb   sp!,{r4-r12}
+
+	ldr	r12,[r2,#240]	@ AES_set_encrypt_key preserves r2,
+	mov	r11,r2			@ which is AES_KEY *key
+	mov	r7,r2
+	add	r8,r2,r12,lsl#4
+
+.Linv:	ldr	r0,[r7]
+	ldr	r1,[r7,#4]
+	ldr	r2,[r7,#8]
+	ldr	r3,[r7,#12]
+	ldr	r4,[r8]
+	ldr	r5,[r8,#4]
+	ldr	r6,[r8,#8]
+	ldr	r9,[r8,#12]
+	str	r0,[r8],#-16
+	str	r1,[r8,#16+4]
+	str	r2,[r8,#16+8]
+	str	r3,[r8,#16+12]
+	str	r4,[r7],#16
+	str	r5,[r7,#-12]
+	str	r6,[r7,#-8]
+	str	r9,[r7,#-4]
+	teq	r7,r8
+	bne	.Linv
+	ldr	r0,[r11,#16]!		@ prefetch tp1
+	mov	r7,#0x80
+	mov	r8,#0x1b
+	orr	r7,r7,#0x8000
+	orr	r8,r8,#0x1b00
+	orr	r7,r7,r7,lsl#16
+	orr	r8,r8,r8,lsl#16
+	sub	r12,r12,#1
+	mvn	r9,r7
+	mov	r12,r12,lsl#2	@ (rounds-1)*4
+
+.Lmix:	and	r4,r0,r7
+	and	r1,r0,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r1,r4,r1,lsl#1	@ tp2
+
+	and	r4,r1,r7
+	and	r2,r1,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r2,r4,r2,lsl#1	@ tp4
+
+	and	r4,r2,r7
+	and	r3,r2,r9
+	sub	r4,r4,r4,lsr#7
+	and	r4,r4,r8
+	eor	r3,r4,r3,lsl#1	@ tp8
+
+	eor	r4,r1,r2
+	eor	r5,r0,r3		@ tp9
+	eor	r4,r4,r3		@ tpe
+	eor	r4,r4,r1,ror#24
+	eor	r4,r4,r5,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
+	eor	r4,r4,r2,ror#16
+	eor	r4,r4,r5,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
+	eor	r4,r4,r5,ror#8	@ ^= ROTATE(tp9,24)
+
+	ldr	r0,[r11,#4]		@ prefetch tp1
+	str	r4,[r11],#4
+	subs	r12,r12,#1
+	bne	.Lmix
+
+	mov	r0,#0
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+
+.type	AES_Td,%object
+.align	5
+AES_Td:
+.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size	AES_Td,.-AES_Td
+
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ 		 const AES_KEY *key) {
+.global AES_decrypt
+.type   AES_decrypt,%function
+.align	5
+AES_decrypt:
+	sub	r3,pc,#8		@ AES_decrypt
+	stmdb   sp!,{r1,r4-r12,lr}
+	mov	r12,r0		@ inp
+	mov	r11,r2
+	sub	r10,r3,#AES_decrypt-AES_Td		@ Td
+#if __ARM_ARCH__<7
+	ldrb	r0,[r12,#3]	@ load input data in endian-neutral
+	ldrb	r4,[r12,#2]	@ manner...
+	ldrb	r5,[r12,#1]
+	ldrb	r6,[r12,#0]
+	orr	r0,r0,r4,lsl#8
+	ldrb	r1,[r12,#7]
+	orr	r0,r0,r5,lsl#16
+	ldrb	r4,[r12,#6]
+	orr	r0,r0,r6,lsl#24
+	ldrb	r5,[r12,#5]
+	ldrb	r6,[r12,#4]
+	orr	r1,r1,r4,lsl#8
+	ldrb	r2,[r12,#11]
+	orr	r1,r1,r5,lsl#16
+	ldrb	r4,[r12,#10]
+	orr	r1,r1,r6,lsl#24
+	ldrb	r5,[r12,#9]
+	ldrb	r6,[r12,#8]
+	orr	r2,r2,r4,lsl#8
+	ldrb	r3,[r12,#15]
+	orr	r2,r2,r5,lsl#16
+	ldrb	r4,[r12,#14]
+	orr	r2,r2,r6,lsl#24
+	ldrb	r5,[r12,#13]
+	ldrb	r6,[r12,#12]
+	orr	r3,r3,r4,lsl#8
+	orr	r3,r3,r5,lsl#16
+	orr	r3,r3,r6,lsl#24
+#else
+	ldr	r0,[r12,#0]
+	ldr	r1,[r12,#4]
+	ldr	r2,[r12,#8]
+	ldr	r3,[r12,#12]
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+#endif
+	bl	_armv4_AES_decrypt
+
+	ldr	r12,[sp],#4		@ pop out
+#if __ARM_ARCH__>=7
+#ifdef __ARMEL__
+	rev	r0,r0
+	rev	r1,r1
+	rev	r2,r2
+	rev	r3,r3
+#endif
+	str	r0,[r12,#0]
+	str	r1,[r12,#4]
+	str	r2,[r12,#8]
+	str	r3,[r12,#12]
+#else
+	mov	r4,r0,lsr#24		@ write output in endian-neutral
+	mov	r5,r0,lsr#16		@ manner...
+	mov	r6,r0,lsr#8
+	strb	r4,[r12,#0]
+	strb	r5,[r12,#1]
+	mov	r4,r1,lsr#24
+	strb	r6,[r12,#2]
+	mov	r5,r1,lsr#16
+	strb	r0,[r12,#3]
+	mov	r6,r1,lsr#8
+	strb	r4,[r12,#4]
+	strb	r5,[r12,#5]
+	mov	r4,r2,lsr#24
+	strb	r6,[r12,#6]
+	mov	r5,r2,lsr#16
+	strb	r1,[r12,#7]
+	mov	r6,r2,lsr#8
+	strb	r4,[r12,#8]
+	strb	r5,[r12,#9]
+	mov	r4,r3,lsr#24
+	strb	r6,[r12,#10]
+	mov	r5,r3,lsr#16
+	strb	r2,[r12,#11]
+	mov	r6,r3,lsr#8
+	strb	r4,[r12,#12]
+	strb	r5,[r12,#13]
+	strb	r6,[r12,#14]
+	strb	r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia   sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	AES_decrypt,.-AES_decrypt
+
+.type   _armv4_AES_decrypt,%function
+.align	2
+_armv4_AES_decrypt:
+	str	lr,[sp,#-4]!		@ push lr
+	ldmia	r11!,{r4-r7}
+	eor	r0,r0,r4
+	ldr	r12,[r11,#240-16]
+	eor	r1,r1,r5
+	eor	r2,r2,r6
+	eor	r3,r3,r7
+	sub	r12,r12,#1
+	mov	lr,#255
+
+	and	r7,lr,r0,lsr#16
+	and	r8,lr,r0,lsr#8
+	and	r9,lr,r0
+	mov	r0,r0,lsr#24
+.Ldec_loop:
+	ldr	r4,[r10,r7,lsl#2]	@ Td1[s0>>16]
+	and	r7,lr,r1		@ i0
+	ldr	r5,[r10,r8,lsl#2]	@ Td2[s0>>8]
+	and	r8,lr,r1,lsr#16
+	ldr	r6,[r10,r9,lsl#2]	@ Td3[s0>>0]
+	and	r9,lr,r1,lsr#8
+	ldr	r0,[r10,r0,lsl#2]	@ Td0[s0>>24]
+	mov	r1,r1,lsr#24
+
+	ldr	r7,[r10,r7,lsl#2]	@ Td3[s1>>0]
+	ldr	r8,[r10,r8,lsl#2]	@ Td1[s1>>16]
+	ldr	r9,[r10,r9,lsl#2]	@ Td2[s1>>8]
+	eor	r0,r0,r7,ror#24
+	ldr	r1,[r10,r1,lsl#2]	@ Td0[s1>>24]
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r8,r5,ror#8
+	and	r8,lr,r2		@ i1
+	eor	r6,r9,r6,ror#8
+	and	r9,lr,r2,lsr#16
+	ldr	r7,[r10,r7,lsl#2]	@ Td2[s2>>8]
+	eor	r1,r1,r4,ror#8
+	ldr	r8,[r10,r8,lsl#2]	@ Td3[s2>>0]
+	mov	r2,r2,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Td1[s2>>16]
+	eor	r0,r0,r7,ror#16
+	ldr	r2,[r10,r2,lsl#2]	@ Td0[s2>>24]
+	and	r7,lr,r3,lsr#16	@ i0
+	eor	r1,r1,r8,ror#24
+	and	r8,lr,r3,lsr#8	@ i1
+	eor	r6,r9,r6,ror#8
+	and	r9,lr,r3		@ i2
+	ldr	r7,[r10,r7,lsl#2]	@ Td1[s3>>16]
+	eor	r2,r2,r5,ror#8
+	ldr	r8,[r10,r8,lsl#2]	@ Td2[s3>>8]
+	mov	r3,r3,lsr#24
+
+	ldr	r9,[r10,r9,lsl#2]	@ Td3[s3>>0]
+	eor	r0,r0,r7,ror#8
+	ldr	r7,[r11],#16
+	eor	r1,r1,r8,ror#16
+	ldr	r3,[r10,r3,lsl#2]	@ Td0[s3>>24]
+	eor	r2,r2,r9,ror#24
+
+	ldr	r4,[r11,#-12]
+	eor	r0,r0,r7
+	ldr	r5,[r11,#-8]
+	eor	r3,r3,r6,ror#8
+	ldr	r6,[r11,#-4]
+	and	r7,lr,r0,lsr#16
+	eor	r1,r1,r4
+	and	r8,lr,r0,lsr#8
+	eor	r2,r2,r5
+	and	r9,lr,r0
+	eor	r3,r3,r6
+	mov	r0,r0,lsr#24
+
+	subs	r12,r12,#1
+	bne	.Ldec_loop
+
+	add	r10,r10,#1024
+
+	ldr	r5,[r10,#0]		@ prefetch Td4
+	ldr	r6,[r10,#32]
+	ldr	r4,[r10,#64]
+	ldr	r5,[r10,#96]
+	ldr	r6,[r10,#128]
+	ldr	r4,[r10,#160]
+	ldr	r5,[r10,#192]
+	ldr	r6,[r10,#224]
+
+	ldrb	r0,[r10,r0]		@ Td4[s0>>24]
+	ldrb	r4,[r10,r7]		@ Td4[s0>>16]
+	and	r7,lr,r1		@ i0
+	ldrb	r5,[r10,r8]		@ Td4[s0>>8]
+	and	r8,lr,r1,lsr#16
+	ldrb	r6,[r10,r9]		@ Td4[s0>>0]
+	and	r9,lr,r1,lsr#8
+
+	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
+	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
+	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
+	eor	r0,r7,r0,lsl#24
+	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
+	eor	r1,r4,r1,lsl#8
+	and	r7,lr,r2,lsr#8	@ i0
+	eor	r5,r5,r8,lsl#8
+	and	r8,lr,r2		@ i1
+	ldrb	r7,[r10,r7]		@ Td4[s2>>8]
+	eor	r6,r6,r9,lsl#8
+	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
+	and	r9,lr,r2,lsr#16
+
+	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
+	eor	r0,r0,r7,lsl#8
+	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
+	eor	r1,r8,r1,lsl#16
+	and	r7,lr,r3,lsr#16	@ i0
+	eor	r2,r5,r2,lsl#16
+	and	r8,lr,r3,lsr#8	@ i1
+	ldrb	r7,[r10,r7]		@ Td4[s3>>16]
+	eor	r6,r6,r9,lsl#16
+	ldrb	r8,[r10,r8]		@ Td4[s3>>8]
+	and	r9,lr,r3		@ i2
+
+	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
+	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
+	eor	r0,r0,r7,lsl#16
+	ldr	r7,[r11,#0]
+	eor	r1,r1,r8,lsl#8
+	ldr	r4,[r11,#4]
+	eor	r2,r9,r2,lsl#8
+	ldr	r5,[r11,#8]
+	eor	r3,r6,r3,lsl#24
+	ldr	r6,[r11,#12]
+
+	eor	r0,r0,r7
+	eor	r1,r1,r4
+	eor	r2,r2,r5
+	eor	r3,r3,r6
+
+	sub	r10,r10,#1024
+	ldr	pc,[sp],#4		@ pop and return
+.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz	"AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align	2

+ 108 - 0
arch/arm/crypto/aes_glue.c

@@ -0,0 +1,108 @@
+/*
+ * Glue Code for the asm optimized version of the AES Cipher Algorithm
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/aes.h>
+
+#define AES_MAXNR 14
+
+typedef struct {
+	unsigned int rd_key[4 *(AES_MAXNR + 1)];
+	int rounds;
+} AES_KEY;
+
+struct AES_CTX {
+	AES_KEY enc_key;
+	AES_KEY dec_key;
+};
+
+asmlinkage void AES_encrypt(const u8 *in, u8 *out, AES_KEY *ctx);
+asmlinkage void AES_decrypt(const u8 *in, u8 *out, AES_KEY *ctx);
+asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
+	AES_encrypt(src, dst, &ctx->enc_key);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
+	AES_decrypt(src, dst, &ctx->dec_key);
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		key_len = 128;
+		break;
+	case AES_KEYSIZE_192:
+		key_len = 192;
+		break;
+	case AES_KEYSIZE_256:
+		key_len = 256;
+		break;
+	default:
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	/* private_AES_set_decrypt_key expects an encryption key as input */
+	ctx->dec_key = ctx->enc_key;
+	if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-asm",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct AES_CTX),
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= AES_MIN_KEY_SIZE,
+			.cia_max_keysize	= AES_MAX_KEY_SIZE,
+			.cia_setkey			= aes_set_key,
+			.cia_encrypt		= aes_encrypt,
+			.cia_decrypt		= aes_decrypt
+		}
+	}
+};
+
+static int __init aes_init(void)
+{
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("aes");
+MODULE_ALIAS("aes-asm");
+MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");

+ 503 - 0
arch/arm/crypto/sha1-armv4-large.S

@@ -0,0 +1,503 @@
+#define __ARM_ARCH__ __LINUX_ARM_ARCH__
+@ ====================================================================
+@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@ ====================================================================
+
+@ sha1_block procedure for ARMv4.
+@
+@ January 2007.
+
+@ Size/performance trade-off
+@ ====================================================================
+@ impl		size in bytes	comp cycles[*]	measured performance
+@ ====================================================================
+@ thumb		304		3212		4420
+@ armv4-small	392/+29%	1958/+64%	2250/+96%
+@ armv4-compact	740/+89%	1552/+26%	1840/+22%
+@ armv4-large	1420/+92%	1307/+19%	1370/+34%[***]
+@ full unroll	~5100/+260%	~1260/+4%	~1300/+5%
+@ ====================================================================
+@ thumb		= same as 'small' but in Thumb instructions[**] and
+@		  with recurring code in two private functions;
+@ small		= detached Xload/update, loops are folded;
+@ compact	= detached Xload/update, 5x unroll;
+@ large		= interleaved Xload/update, 5x unroll;
+@ full unroll	= interleaved Xload/update, full unroll, estimated[!];
+@
+@ [*]	Manually counted instructions in "grand" loop body. Measured
+@	performance is affected by prologue and epilogue overhead,
+@	i-cache availability, branch penalties, etc.
+@ [**]	While each Thumb instruction is twice smaller, they are not as
+@	diverse as ARM ones: e.g., there are only two arithmetic
+@	instructions with 3 arguments, no [fixed] rotate, addressing
+@	modes are limited. As result it takes more instructions to do
+@	the same job in Thumb, therefore the code is never twice as
+@	small and always slower.
+@ [***]	which is also ~35% better than compiler generated code. Dual-
+@	issue Cortex A8 core was measured to process input block in
+@	~990 cycles.
+
+@ August 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 13% improvement on
+@ Cortex A8 core and in absolute terms ~870 cycles per input block
+@ [or 13.6 cycles per byte].
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 10%
+@ improvement on Cortex A8 core and 12.2 cycles per byte.
+
+.text
+
+.global	sha1_block_data_order
+.type	sha1_block_data_order,%function
+
+.align	2
+sha1_block_data_order:
+	stmdb	sp!,{r4-r12,lr}
+	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
+	ldmia	r0,{r3,r4,r5,r6,r7}
+.Lloop:
+	ldr	r8,.LK_00_19
+	mov	r14,sp
+	sub	sp,sp,#15*4
+	mov	r5,r5,ror#30
+	mov	r6,r6,ror#30
+	mov	r7,r7,ror#30		@ [6]
+.L_00_15:
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r5,r6			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	eor	r10,r5,r6			@ F_xx_xx
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r4,r10,ror#2
+	add	r7,r7,r9			@ E+=X[i]
+	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r6,r8,r6,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r4,r5			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r6,r8,r6,ror#2			@ E+=K_00_19
+	eor	r10,r4,r5			@ F_xx_xx
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r3,r10,ror#2
+	add	r6,r6,r9			@ E+=X[i]
+	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r5,r8,r5,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r3,r4			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r5,r8,r5,ror#2			@ E+=K_00_19
+	eor	r10,r3,r4			@ F_xx_xx
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r7,r10,ror#2
+	add	r5,r5,r9			@ E+=X[i]
+	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r4,r8,r4,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r7,r3			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r4,r8,r4,ror#2			@ E+=K_00_19
+	eor	r10,r7,r3			@ F_xx_xx
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r6,r10,ror#2
+	add	r4,r4,r9			@ E+=X[i]
+	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r3,r8,r3,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r6,r7			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r3,r8,r3,ror#2			@ E+=K_00_19
+	eor	r10,r6,r7			@ F_xx_xx
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r5,r10,ror#2
+	add	r3,r3,r9			@ E+=X[i]
+	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
+	teq	r14,sp
+	bne	.L_00_15		@ [((11+4)*5+2)*3]
+#if __ARM_ARCH__<7
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r5,r6			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	eor	r10,r5,r6			@ F_xx_xx
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r4,r10,ror#2
+	add	r7,r7,r9			@ E+=X[i]
+	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r3,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
+	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r7,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
+	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r6,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
+	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r5,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
+	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
+
+	ldr	r8,.LK_20_39		@ [+15+16*4]
+	sub	sp,sp,#25*4
+	cmn	sp,#0			@ [+3], clear carry to denote 20_39
+.L_20_39_or_60_79:
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r5,r6			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r4,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r7,r7,r9			@ E+=X[i]
+	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r3,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r7,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r6,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r5,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
+	teq	r14,sp			@ preserve carry
+	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
+	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
+
+	ldr	r8,.LK_40_59
+	sub	sp,sp,#20*4		@ [+2]
+.L_40_59:
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r5,r6			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r4,r10,ror#2					@ F_xx_xx
+	and r11,r5,r6					@ F_xx_xx
+	add	r7,r7,r9			@ E+=X[i]
+	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
+	add	r7,r7,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r3,r10,ror#2					@ F_xx_xx
+	and r11,r4,r5					@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
+	add	r6,r6,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r7,r10,ror#2					@ F_xx_xx
+	and r11,r3,r4					@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
+	add	r5,r5,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r6,r10,ror#2					@ F_xx_xx
+	and r11,r7,r3					@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
+	add	r4,r4,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r5,r10,ror#2					@ F_xx_xx
+	and r11,r6,r7					@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
+	add	r3,r3,r11,ror#2
+	teq	r14,sp
+	bne	.L_40_59		@ [+((12+5)*5+2)*4]
+
+	ldr	r8,.LK_60_79
+	sub	sp,sp,#20*4
+	cmp	sp,#0			@ set carry to denote 60_79
+	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
+.L_done:
+	add	sp,sp,#80*4		@ "deallocate" stack frame
+	ldmia	r0,{r8,r9,r10,r11,r12}
+	add	r3,r8,r3
+	add	r4,r9,r4
+	add	r5,r10,r5,ror#2
+	add	r6,r11,r6,ror#2
+	add	r7,r12,r7,ror#2
+	stmia	r0,{r3,r4,r5,r6,r7}
+	teq	r1,r2
+	bne	.Lloop			@ [+18], total 1307
+
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.align	2
+.LK_00_19:	.word	0x5a827999
+.LK_20_39:	.word	0x6ed9eba1
+.LK_40_59:	.word	0x8f1bbcdc
+.LK_60_79:	.word	0xca62c1d6
+.size	sha1_block_data_order,.-sha1_block_data_order
+.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align	2

+ 179 - 0
arch/arm/crypto/sha1_glue.c

@@ -0,0 +1,179 @@
+/*
+ * Cryptographic API.
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation
+ *
+ * This file is based on sha1_generic.c and sha1_ssse3_glue.c
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+
+struct SHA1_CTX {
+	uint32_t h0,h1,h2,h3,h4;
+	u64 count;
+	u8 data[SHA1_BLOCK_SIZE];
+};
+
+asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
+		const unsigned char *data, unsigned int rounds);
+
+
+static int sha1_init(struct shash_desc *desc)
+{
+	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	memset(sctx, 0, sizeof(*sctx));
+	sctx->h0 = SHA1_H0;
+	sctx->h1 = SHA1_H1;
+	sctx->h2 = SHA1_H2;
+	sctx->h3 = SHA1_H3;
+	sctx->h4 = SHA1_H4;
+	return 0;
+}
+
+
+static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
+			       unsigned int len, unsigned int partial)
+{
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->data + partial, data, done);
+		sha1_block_data_order(sctx, sctx->data, 1);
+	}
+
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+		sha1_block_data_order(sctx, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->data, data + done, len - done);
+	return 0;
+}
+
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->data + partial, data, len);
+		return 0;
+	}
+	res = __sha1_update(sctx, data, len, partial);
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+	/* We need to fill a whole block for __sha1_update() */
+	if (padlen <= 56) {
+		sctx->count += padlen;
+		memcpy(sctx->data + index, padding, padlen);
+	} else {
+		__sha1_update(sctx, padding, padlen, index);
+	}
+	__sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+	return 0;
+}
+
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct SHA1_CTX *sctx = shash_desc_ctx(desc);
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_init,
+	.update		=	sha1_update,
+	.final		=	sha1_final,
+	.export		=	sha1_export,
+	.import		=	sha1_import,
+	.descsize	=	sizeof(struct SHA1_CTX),
+	.statesize	=	sizeof(struct SHA1_CTX),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-asm",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+
+static int __init sha1_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+
+static void __exit sha1_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+
+module_init(sha1_mod_init);
+module_exit(sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)");
+MODULE_ALIAS("sha1");
+MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");

+ 2 - 1
arch/powerpc/configs/ppc64_defconfig

@@ -486,7 +486,8 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_NX=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=y

+ 2 - 1
arch/powerpc/configs/pseries_defconfig

@@ -369,7 +369,8 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
-CONFIG_CRYPTO_DEV_NX=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM_BOOK3S_64=m
 CONFIG_KVM_BOOK3S_64_HV=y

+ 2 - 2
arch/powerpc/kernel/prom_init.c

@@ -705,6 +705,7 @@ static void __init early_cmdline_parse(void)
 #endif
 #define OV5_TYPE1_AFFINITY	0x80	/* Type 1 NUMA affinity */
 #define OV5_PFO_HW_RNG		0x80	/* PFO Random Number Generator */
+#define OV5_PFO_HW_842		0x40	/* PFO Compression Accelerator */
 #define OV5_PFO_HW_ENCR		0x20	/* PFO Encryption Accelerator */
 
 /* Option Vector 6: IBM PAPR hints */
@@ -774,8 +775,7 @@ static unsigned char ibm_architecture_vec[] = {
 	0,
 	0,
 	0,
-	OV5_PFO_HW_RNG | OV5_PFO_HW_ENCR,
-
+	OV5_PFO_HW_RNG | OV5_PFO_HW_ENCR | OV5_PFO_HW_842,
 	/* option vector 6: IBM PAPR hints */
 	4 - 2,				/* length */
 	0,

+ 0 - 5
arch/s390/crypto/aes_s390.c

@@ -216,7 +216,6 @@ static struct crypto_alg aes_alg = {
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_init               =       fallback_init_cip,
 	.cra_exit               =       fallback_exit_cip,
 	.cra_u			=	{
@@ -398,7 +397,6 @@ static struct crypto_alg ecb_aes_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
 	.cra_init		=	fallback_init_blk,
 	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
@@ -508,7 +506,6 @@ static struct crypto_alg cbc_aes_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
 	.cra_init		=	fallback_init_blk,
 	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
@@ -710,7 +707,6 @@ static struct crypto_alg xts_aes_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xts_aes_alg.cra_list),
 	.cra_init		=	xts_fallback_init,
 	.cra_exit		=	xts_fallback_exit,
 	.cra_u			=	{
@@ -832,7 +828,6 @@ static struct crypto_alg ctr_aes_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_aes_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,

+ 0 - 10
arch/s390/crypto/des_s390.c

@@ -70,7 +70,6 @@ static struct crypto_alg des_alg = {
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	DES_KEY_SIZE,
@@ -163,7 +162,6 @@ static struct crypto_alg ecb_des_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_des_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES_KEY_SIZE,
@@ -206,7 +204,6 @@ static struct crypto_alg cbc_des_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_des_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES_KEY_SIZE,
@@ -271,7 +268,6 @@ static struct crypto_alg des3_alg = {
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des3_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	DES3_KEY_SIZE,
@@ -314,8 +310,6 @@ static struct crypto_alg ecb_des3_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						ecb_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES3_KEY_SIZE,
@@ -358,8 +352,6 @@ static struct crypto_alg cbc_des3_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(
-						cbc_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES3_KEY_SIZE,
@@ -452,7 +444,6 @@ static struct crypto_alg ctr_des_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_des_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES_KEY_SIZE,
@@ -496,7 +487,6 @@ static struct crypto_alg ctr_des3_alg = {
 	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	DES3_KEY_SIZE,

+ 0 - 1
arch/s390/crypto/ghash_s390.c

@@ -135,7 +135,6 @@ static struct shash_alg ghash_alg = {
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
 	},
 };
 

+ 4 - 0
arch/x86/crypto/Makefile

@@ -12,6 +12,8 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -32,6 +34,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
+cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
+cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o

+ 0 - 1
arch/x86/crypto/aes_glue.c

@@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = {
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u	= {
 		.cipher	= {
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,

+ 219 - 39
arch/x86/crypto/aesni-intel_glue.c

@@ -28,6 +28,9 @@
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i387.h>
 #include <asm/crypto/aes.h>
@@ -41,18 +44,10 @@
 #define HAS_CTR
 #endif
 
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
 #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
 #define HAS_PCBC
 #endif
 
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
@@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data {
 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
 
+struct aesni_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
+struct aesni_xts_ctx {
+	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
 #endif
 #endif
 
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
-}
-#endif
-
 #ifdef HAS_PCBC
 static int ablk_pcbc_init(struct crypto_tfm *tfm)
 {
@@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm)
 }
 #endif
 
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
+static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
 {
-	return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))");
+	aesni_ecb_enc(ctx, blks, blks, nbytes);
+}
+
+static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
+{
+	aesni_ecb_dec(ctx, blks, blks, nbytes);
+}
+
+static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
+				 keylen - AES_BLOCK_SIZE);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
+}
+
+static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
+				  keylen / 2);
+}
+
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
 }
-#endif
 
 #ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_tfm *tfm)
@@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { {
 	},
 #endif
 #endif
-#ifdef HAS_LRW
+#ifdef HAS_PCBC
 }, {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
+	.cra_name		= "pcbc(aes)",
+	.cra_driver_name	= "pcbc-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_lrw_init,
+	.cra_init		= ablk_pcbc_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
@@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { {
 		},
 	},
 #endif
-#ifdef HAS_PCBC
 }, {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
+	.cra_name		= "__lrw-aes-aesni",
+	.cra_driver_name	= "__driver-lrw-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_aesni_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= lrw_aesni_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-aes-aesni",
+	.cra_driver_name	= "__driver-xts-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= xts_aesni_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "lrw(aes)",
+	.cra_driver_name	= "lrw-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_pcbc_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
-#ifdef HAS_XTS
 }, {
 	.cra_name		= "xts(aes)",
 	.cra_driver_name	= "xts-aes-aesni",
@@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_xts_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
 } };
 
 
@@ -1118,7 +1301,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
 
 static int __init aesni_init(void)
 {
-	int err, i;
+	int err;
 
 	if (!x86_match_cpu(aesni_cpu_id))
 		return -ENODEV;
@@ -1127,9 +1310,6 @@ static int __init aesni_init(void)
 	if (err)
 		return err;
 
-	for (i = 0; i < ARRAY_SIZE(aesni_algs); i++)
-		INIT_LIST_HEAD(&aesni_algs[i].cra_list);
-
 	return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 }
 

+ 0 - 4
arch/x86/crypto/blowfish_glue.c

@@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_ctxsize		= sizeof(struct bf_ctx),
 	.cra_alignmask		= 0,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[0].cra_list),
 	.cra_u = {
 		.cipher = {
 			.cia_min_keysize	= BF_MIN_KEY_SIZE,
@@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,

File diff suppressed because it is too large
+ 688 - 688
arch/x86/crypto/camellia_glue.c


+ 376 - 0
arch/x86/crypto/cast5-avx-x86_64-asm_64.S

@@ -0,0 +1,376 @@
+/*
+ * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast5-avx-x86_64-asm_64.S"
+
+.extern cast5_s1
+.extern cast5_s2
+.extern cast5_s3
+.extern cast5_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(16*4)
+#define rr	((16*4)+16)
+
+/* s-boxes */
+#define s1	cast5_s1
+#define s2	cast5_s2
+#define s3	cast5_s3
+#define s4	cast5_s4
+
+/**********************************************************************
+  16-way AVX cast5
+ **********************************************************************/
+#define CTX %rdi
+
+#define RL1 %xmm0
+#define RR1 %xmm1
+#define RL2 %xmm2
+#define RR2 %xmm3
+#define RL3 %xmm4
+#define RR3 %xmm5
+#define RL4 %xmm6
+#define RR4 %xmm7
+
+#define RX %xmm8
+
+#define RKM  %xmm9
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+
+#define R32  %xmm13
+#define R1ST %xmm14
+
+#define RTMP %xmm15
+
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
+	op0	a,	RKM,  x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
+	\
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
+	\
+	vmovq		RFS2, x;                                   \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
+
+#define subround(a1, b1, a2, b2, f) \
+	F ## f ## _2(a1, b1, a2, b2);
+
+#define round(l, r, n, f) \
+	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
+	vpand		R1ST,            RKR,  RKRF; \
+	vpsubq		RKRF,            R32,  RKRR; \
+	vpsrldq $1,	RKR,             RKR;        \
+	subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
+	subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
+
+#define enc_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR;
+
+#define dec_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR; \
+	vpshufb		.Lbswap128_mask,          RKR, RKR;
+
+#define transpose_2x4(x0, x1, t0, t1) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t1; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1;
+
+#define inpack_blocks(in, x0, x1, t0, t1, rmask) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vpshufb rmask, 	x0,	x0; \
+	vpshufb rmask, 	x1,	x1; \
+	\
+	transpose_2x4(x0, x1, t0, t1)
+
+#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb rmask,	x0, x0;           \
+	vpshufb rmask,	x1, x1;           \
+	vmovdqu 	x0, (0*4*4)(out); \
+	vmovdqu		x1, (1*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb rmask,	x0, x0;               \
+	vpshufb rmask,	x1, x1;               \
+	vpxor		(0*4*4)(out), x0, x0; \
+	vmovdqu 	x0, (0*4*4)(out);     \
+	vpxor		(1*4*4)(out), x1, x1; \
+	vmovdqu	        x1, (1*4*4)(out);
+
+.data
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lbswap128_mask:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
+.L32_mask:
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
+
+.align 16
+.global __cast5_enc_blk_16way
+.type   __cast5_enc_blk_16way,@function;
+
+__cast5_enc_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	enc_preload_rkr();
+
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
+
+	movq %rsi, %r11;
+
+	round(RL, RR, 0, 1);
+	round(RR, RL, 1, 2);
+	round(RL, RR, 2, 3);
+	round(RR, RL, 3, 1);
+	round(RL, RR, 4, 2);
+	round(RR, RL, 5, 3);
+	round(RL, RR, 6, 1);
+	round(RR, RL, 7, 2);
+	round(RL, RR, 8, 3);
+	round(RR, RL, 9, 1);
+	round(RL, RR, 10, 2);
+	round(RR, RL, 11, 3);
+
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
+	jnz __skip_enc;
+
+	round(RL, RR, 12, 1);
+	round(RR, RL, 13, 2);
+	round(RL, RR, 14, 3);
+	round(RR, RL, 15, 1);
+
+__skip_enc:
+	popq %rcx;
+	popq %rbx;
+	popq %rbp;
+
+	vmovdqa .Lbswap_mask, RKM;
+	leaq 1*(2*4*4)(%r11), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor16;
+
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
+
+	ret;
+
+__enc_xor16:
+	outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
+
+	ret;
+
+.align 16
+.global cast5_dec_blk_16way
+.type   cast5_dec_blk_16way,@function;
+
+cast5_dec_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	dec_preload_rkr();
+
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
+
+	movq %rsi, %r11;
+
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
+	jnz __skip_dec;
+
+	round(RL, RR, 15, 1);
+	round(RR, RL, 14, 3);
+	round(RL, RR, 13, 2);
+	round(RR, RL, 12, 1);
+
+__dec_tail:
+	round(RL, RR, 11, 3);
+	round(RR, RL, 10, 2);
+	round(RL, RR, 9, 1);
+	round(RR, RL, 8, 3);
+	round(RL, RR, 7, 2);
+	round(RR, RL, 6, 1);
+	round(RL, RR, 5, 3);
+	round(RR, RL, 4, 2);
+	round(RL, RR, 3, 1);
+	round(RR, RL, 2, 3);
+	round(RL, RR, 1, 2);
+	round(RR, RL, 0, 1);
+
+	vmovdqa .Lbswap_mask, RKM;
+	popq %rbx;
+	popq %rbp;
+
+	leaq 1*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
+
+	ret;
+
+__skip_dec:
+	vpsrldq $4, RKR, RKR;
+	jmp __dec_tail;

+ 530 - 0
arch/x86/crypto/cast5_avx_glue.c

@@ -0,0 +1,530 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast5 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast5.h>
+#include <crypto/cryptd.h>
+#include <crypto/ctr.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST5_PARALLEL_BLOCKS 16
+
+asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src, bool xor);
+asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				    const u8 *src);
+
+static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, false);
+}
+
+static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, true);
+}
+
+static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast5_dec_blk_16way(ctx, dst, src);
+}
+
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast5_fpu_end(bool fpu_enabled)
+{
+	return glue_fpu_end(fpu_enabled);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     bool enc)
+{
+	bool fpu_enabled = false;
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+
+		/* Process multi-block batch */
+		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			do {
+				if (enc)
+					cast5_enc_blk_xway(ctx, wdst, wsrc);
+				else
+					cast5_dec_blk_xway(ctx, wdst, wsrc);
+
+				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
+				wdst += bsize * CAST5_PARALLEL_BLOCKS;
+				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			if (enc)
+				__cast5_encrypt(ctx, wdst, wsrc);
+			else
+				__cast5_decrypt(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, false);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv = *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
+	u64 last_iv;
+	int i;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
+			src -= CAST5_PARALLEL_BLOCKS - 1;
+			dst -= CAST5_PARALLEL_BLOCKS - 1;
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				ivs[i] = src[i];
+
+			cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				*(dst + (i + 1)) ^= *(ivs + i);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes)) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *ctrblk = walk->iv;
+	u8 keystream[CAST5_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	__cast5_encrypt(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
+	int i;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			/* create ctrblks for parallel encrypt */
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
+				if (dst != src)
+					dst[i] = src[i];
+
+				ctrblocks[i] = cpu_to_be64(ctrblk++);
+			}
+
+			cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
+					       (u8 *)ctrblocks);
+
+			src += CAST5_PARALLEL_BLOCKS;
+			dst += CAST5_PARALLEL_BLOCKS;
+			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
+		*dst ^= ctrblocks[0];
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+
+static struct crypto_alg cast5_algs[6] = { {
+	.cra_name		= "__ecb-cast5-avx",
+	.cra_driver_name	= "__driver-ecb-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast5-avx",
+	.cra_driver_name	= "__driver-cbc-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast5-avx",
+	.cra_driver_name	= "__driver-ctr-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast5)",
+	.cra_driver_name	= "ecb-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast5)",
+	.cra_driver_name	= "cbc-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast5)",
+	.cra_driver_name	= "ctr-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+} };
+
+static int __init cast5_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+static void __exit cast5_exit(void)
+{
+	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+module_init(cast5_init);
+module_exit(cast5_exit);
+
+MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast5");

+ 383 - 0
arch/x86/crypto/cast6-avx-x86_64-asm_64.S

@@ -0,0 +1,383 @@
+/*
+ * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast6-avx-x86_64-asm_64.S"
+
+.extern cast6_s1
+.extern cast6_s2
+.extern cast6_s3
+.extern cast6_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(12*4*4)
+
+/* s-boxes */
+#define s1	cast6_s1
+#define s2	cast6_s2
+#define s3	cast6_s3
+#define s4	cast6_s4
+
+/**********************************************************************
+  8-way AVX cast6
+ **********************************************************************/
+#define CTX %rdi
+
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+
+#define RA2 %xmm4
+#define RB2 %xmm5
+#define RC2 %xmm6
+#define RD2 %xmm7
+
+#define RX  %xmm8
+
+#define RKM  %xmm9
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+#define R32  %xmm13
+#define R1ST %xmm14
+
+#define RTMP %xmm15
+
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
+	op0	a,	RKM,  x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
+	\
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
+	\
+	vmovq		RFS2, x;                                   \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
+
+#define qop(in, out, f) \
+	F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
+
+#define get_round_keys(nn) \
+	vbroadcastss	(km+(4*(nn)))(CTX), RKM;        \
+	vpand		R1ST,               RKR,  RKRF; \
+	vpsubq		RKRF,               R32,  RKRR; \
+	vpsrldq $1,	RKR,                RKR;
+
+#define Q(n) \
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);        \
+	\
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
+	\
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
+	\
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);
+
+#define QBAR(n) \
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);        \
+	\
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
+	\
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
+	\
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);
+
+#define shuffle(mask) \
+	vpshufb		mask,            RKR, RKR;
+
+#define preload_rkr(n, do_mask, mask) \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		(kr+n*16)(CTX),           RKR, RKR; \
+	do_mask(mask);
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t2; \
+	vpunpckldq		x3, x2, t1; \
+	vpunpckhdq		x3, x2, x3; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1; \
+	vpunpcklqdq		x3, t2, x2; \
+	vpunpckhqdq		x3, t2, x3;
+
+#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vmovdqu (2*4*4)(in),	x2; \
+	vmovdqu (3*4*4)(in),	x3; \
+	vpshufb rmask, x0,	x0; \
+	vpshufb rmask, x1,	x1; \
+	vpshufb rmask, x2,	x2; \
+	vpshufb rmask, x3,	x3; \
+	\
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
+	vmovdqu x0,		(0*4*4)(out); \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vmovdqu	x2,		(2*4*4)(out); \
+	vmovdqu	x3,		(3*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
+	vpxor (0*4*4)(out),	x0, x0;       \
+	vmovdqu	x0,		(0*4*4)(out); \
+	vpxor (1*4*4)(out),	x1, x1;       \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vpxor (2*4*4)(out),	x2, x2;       \
+	vmovdqu x2,		(2*4*4)(out); \
+	vpxor (3*4*4)(out),	x3, x3;       \
+	vmovdqu x3,		(3*4*4)(out);
+
+.data
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_Q_Q_QBAR_QBAR:
+	.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_dec_Q_Q_Q_Q:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+.Lrkr_dec_Q_Q_QBAR_QBAR:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
+.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
+.L32_mask:
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
+
+.align 16
+.global __cast6_enc_blk_8way
+.type   __cast6_enc_blk_8way,@function;
+
+__cast6_enc_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+	movq %rsi, %r11;
+
+	preload_rkr(0, dummy, none);
+	Q(0);
+	Q(1);
+	Q(2);
+	Q(3);
+	preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
+	Q(4);
+	Q(5);
+	QBAR(6);
+	QBAR(7);
+	preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
+	QBAR(8);
+	QBAR(9);
+	QBAR(10);
+	QBAR(11);
+
+	popq %rcx;
+	popq %rbx;
+	popq %rbp;
+
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor8;
+
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+	ret;
+
+__enc_xor8:
+	outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+	ret;
+
+.align 16
+.global cast6_dec_blk_8way
+.type   cast6_dec_blk_8way,@function;
+
+cast6_dec_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+	movq %rsi, %r11;
+
+	preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
+	Q(11);
+	Q(10);
+	Q(9);
+	Q(8);
+	preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
+	Q(7);
+	Q(6);
+	QBAR(5);
+	QBAR(4);
+	preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
+	QBAR(3);
+	QBAR(2);
+	QBAR(1);
+	QBAR(0);
+
+	popq %rbx;
+	popq %rbp;
+
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+	ret;

+ 648 - 0
arch/x86/crypto/cast6_avx_glue.c

@@ -0,0 +1,648 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast6 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast6.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST6_PARALLEL_BLOCKS 8
+
+asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				     const u8 *src, bool xor);
+asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				   const u8 *src);
+
+static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast6_dec_blk_8way(ctx, dst, src);
+}
+
+
+static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+	u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
+	unsigned int j;
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
+
+	cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+	be128 ctrblk;
+
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
+
+	__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				   u128 *iv)
+{
+	be128 ctrblks[CAST6_PARALLEL_BLOCKS];
+	unsigned int i;
+
+	for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
+
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
+	}
+
+	cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx cast6_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
+				       dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
+				       nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast6_fpu_end(bool fpu_enabled)
+{
+	glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+	struct cast6_ctx *ctx;
+	bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
+}
+
+struct cast6_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct cast6_ctx cast6_ctx;
+};
+
+static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
+			     &tfm->crt_flags);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+struct cast6_xts_ctx {
+	struct cast6_ctx tweak_ctx;
+	struct cast6_ctx crypt_ctx;
+};
+
+static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+			      flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static struct crypto_alg cast6_algs[10] = { {
+	.cra_name		= "__ecb-cast6-avx",
+	.cra_driver_name	= "__driver-ecb-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast6-avx",
+	.cra_driver_name	= "__driver-cbc-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast6-avx",
+	.cra_driver_name	= "__driver-ctr-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-cast6-avx",
+	.cra_driver_name	= "__driver-lrw-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= lrw_cast6_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-cast6-avx",
+	.cra_driver_name	= "__driver-xts-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= xts_cast6_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast6)",
+	.cra_driver_name	= "ecb-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast6)",
+	.cra_driver_name	= "cbc-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast6)",
+	.cra_driver_name	= "ctr-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+}, {
+	.cra_name		= "lrw(cast6)",
+	.cra_driver_name	= "lrw-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(cast6)",
+	.cra_driver_name	= "xts-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+} };
+
+static int __init cast6_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+static void __exit cast6_exit(void)
+{
+	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+module_init(cast6_init);
+module_exit(cast6_exit);
+
+MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast6");

+ 0 - 2
arch/x86/crypto/ghash-clmulni-intel_glue.c

@@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = {
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
 	},
 };
 
@@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = {
 			.cra_blocksize		= GHASH_BLOCK_SIZE,
 			.cra_type		= &crypto_ahash_type,
 			.cra_module		= THIS_MODULE,
-			.cra_list		= LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
 			.cra_init		= ghash_async_init_tfm,
 			.cra_exit		= ghash_async_exit_tfm,
 		},

+ 1 - 1
arch/x86/crypto/glue_helper.c

@@ -110,7 +110,7 @@ static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
 		nbytes -= bsize;
 	} while (nbytes >= bsize);
 
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	*(u128 *)walk->iv = *iv;
 	return nbytes;
 }
 

+ 0 - 1
arch/x86/crypto/salsa20_glue.c

@@ -97,7 +97,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
 	.cra_alignmask      =	3,
 	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
 	.cra_u              =   {
 		.blkcipher = {
 			.setkey         =   setkey,

+ 0 - 10
arch/x86/crypto/serpent_avx_glue.c

@@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {

+ 0 - 10
arch/x86/crypto/serpent_sse2_glue.c

@@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {

+ 142 - 85
arch/x86/crypto/twofish-avx-x86_64-asm_64.S

@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -47,16 +49,22 @@
 #define RC2 %xmm6
 #define RD2 %xmm7
 
-#define RX %xmm8
-#define RY %xmm9
+#define RX0 %xmm8
+#define RY0 %xmm9
+
+#define RX1 %xmm10
+#define RY1 %xmm11
 
-#define RK1 %xmm10
-#define RK2 %xmm11
+#define RK1 %xmm12
+#define RK2 %xmm13
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RT %xmm14
+#define RR %xmm15
+
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -65,6 +73,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RGS1  %r8
 #define RGS1d %r8d
 #define RGS2  %r9
@@ -73,89 +88,123 @@
 #define RGS3d %r10d
 
 
-#define lookup_32bit(t0, t1, t2, t3, src, dst) \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
-	movl		t0(CTX, RID1, 4), dst ## d;  \
-	xorl		t1(CTX, RID2, 4), dst ## d;  \
+#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \
+	movzbl		src ## bl,        RID1d;     \
+	movzbl		src ## bh,        RID2d;     \
 	shrq $16,	src;                         \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
+	movl		t0(CTX, RID1, 4), dst ## d;  \
+	movl		t1(CTX, RID2, 4), RID2d;     \
+	movzbl		src ## bl,        RID1d;     \
+	xorl		RID2d,            dst ## d;  \
+	movzbl		src ## bh,        RID2d;     \
+	interleave_op(il_reg);			     \
 	xorl		t2(CTX, RID1, 4), dst ## d;  \
 	xorl		t3(CTX, RID2, 4), dst ## d;
 
-#define G(a, x, t0, t1, t2, t3) \
-	vmovq		a,    RGI1;               \
-	vpsrldq $8,	a,    x;                  \
-	vmovq		x,    RGI2;               \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define G(gi1, gi2, x, t0, t1, t2, t3) \
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1);  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2);  \
+	\
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none);      \
+	shlq $32,	RGS2;                                        \
+	orq		RGS1, RGS2;                                  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none);      \
+	shlq $32,	RGS1;                                        \
+	orq		RGS1, RGS3;
+
+#define round_head_2(a, b, x1, y1, x2, y2) \
+	vmovq		b ## 1, RGI3;           \
+	vpextrq $1,	b ## 1, RGI4;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
-	shrq $16,	RGI1;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
-	shlq $32,	RGS2;                     \
-	orq		RGS1, RGS2;               \
+	G(RGI1, RGI2, x1, s0, s1, s2, s3);      \
+	vmovq		a ## 2, RGI1;           \
+	vpextrq $1,	a ## 2, RGI2;           \
+	vmovq		RGS2, x1;               \
+	vpinsrq $1,	RGS3, x1, x1;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
-	shrq $16,	RGI2;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
-	shlq $32,	RGS3;                     \
-	orq		RGS1, RGS3;               \
+	G(RGI3, RGI4, y1, s1, s2, s3, s0);      \
+	vmovq		b ## 2, RGI3;           \
+	vpextrq $1,	b ## 2, RGI4;           \
+	vmovq		RGS2, y1;               \
+	vpinsrq $1,	RGS3, y1, y1;           \
 	\
-	vmovq		RGS2, x;                  \
-	vpinsrq $1,	RGS3, x, x;
+	G(RGI1, RGI2, x2, s0, s1, s2, s3);      \
+	vmovq		RGS2, x2;               \
+	vpinsrq $1,	RGS3, x2, x2;           \
+	\
+	G(RGI3, RGI4, y2, s1, s2, s3, s0);      \
+	vmovq		RGS2, y2;               \
+	vpinsrq $1,	RGS3, y2, y2;
 
-#define encround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+#define encround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(b);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
-	vpaddd			x, RK1, x; \
 	vpaddd			y, RK2, y; \
-	vpxor			x, c,   c; \
-	vpsrld $1,		c, x;      \
+	vpsrld $1,		c, RT;     \
 	vpslld $(32 - 1),	c, c;      \
-	vpor			c, x,   c; \
-	vpslld $1,		d, x;      \
-	vpsrld $(32 - 1),	d, d;      \
-	vpor			d, x,   d; \
-	vpxor			d, y,   d;
-
-#define decround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+	vpor			c, RT,  c; \
+	vpxor			d, y,   d; \
+
+#define decround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(a);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
 	vpaddd			y, RK2, y; \
 	vpxor			d, y,   d; \
 	vpsrld $1,		d, y;      \
 	vpslld $(32 - 1),	d, d;      \
 	vpor			d, y,   d; \
-	vpslld $1,		c, y;      \
-	vpsrld $(32 - 1),	c, c;      \
-	vpor			c, y,   c; \
-	vpaddd			x, RK1, x; \
-	vpxor			x, c,   c;
-
-#define encrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
-
-#define decrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define rotate_1l(x) \
+	vpslld $1,		x, RR;     \
+	vpsrld $(32 - 1),	x, x;      \
+	vpor			x, RR,  x;
+
+#define preload_rgi(c) \
+	vmovq			c, RGI1; \
+	vpextrq $1,		c, RGI2;
+
+#define encrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
+
+#define decrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
 
 #define encrypt_cycle(n) \
-	encrypt_round((2*n), RA, RB, RC, RD);       \
-	encrypt_round(((2*n) + 1), RC, RD, RA, RB);
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l);
+
+#define encrypt_cycle_last(n) \
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy);
 
 #define decrypt_cycle(n) \
-	decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
-	decrypt_round((2*n), RA, RB, RC, RD);
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l);
 
+#define decrypt_cycle_last(n) \
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy);
 
 #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	vpunpckldq		x1, x0, t0; \
@@ -216,17 +265,20 @@ __twofish_enc_blk_8way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
 	vmovdqu w(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	preload_rgi(RA1);
+	rotate_1l(RD1);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+	rotate_1l(RD2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	encrypt_cycle(0);
 	encrypt_cycle(1);
@@ -235,26 +287,27 @@ __twofish_enc_blk_8way:
 	encrypt_cycle(4);
 	encrypt_cycle(5);
 	encrypt_cycle(6);
-	encrypt_cycle(7);
+	encrypt_cycle_last(7);
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
+	leaq (4*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor8;
 
-	outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
 __enc_xor8:
-	outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
@@ -269,16 +322,19 @@ twofish_dec_blk_8way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	preload_rgi(RC1);
+	rotate_1l(RA1);
+	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+	rotate_1l(RA2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	decrypt_cycle(7);
 	decrypt_cycle(6);
@@ -287,14 +343,15 @@ twofish_dec_blk_8way:
 	decrypt_cycle(3);
 	decrypt_cycle(2);
 	decrypt_cycle(1);
-	decrypt_cycle(0);
+	decrypt_cycle_last(0);
 
 	vmovdqu (w)(CTX), RK1;
 
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
 	ret;

+ 0 - 10
arch/x86/crypto/twofish_avx_glue.c

@@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
@@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {

+ 0 - 1
arch/x86/crypto/twofish_glue.c

@@ -70,7 +70,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize		=	sizeof(struct twofish_ctx),
 	.cra_alignmask		=	0,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	TF_MIN_KEY_SIZE,

+ 0 - 5
arch/x86/crypto/twofish_glue_3way.c

@@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,

+ 182 - 0
crypto/842.c

@@ -0,0 +1,182 @@
+/*
+ * Cryptographic API for the 842 compression algorithm.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/nx842.h>
+#include <linux/lzo.h>
+#include <linux/timer.h>
+
+static int nx842_uselzo;
+
+struct nx842_ctx {
+	void *nx842_wmem; /* working memory for 842/lzo */
+};
+
+enum nx842_crypto_type {
+	NX842_CRYPTO_TYPE_842,
+	NX842_CRYPTO_TYPE_LZO
+};
+
+#define NX842_SENTINEL 0xdeadbeef
+
+struct nx842_crypto_header {
+	unsigned int sentinel; /* debug */
+	enum nx842_crypto_type type;
+};
+
+static int nx842_init(struct crypto_tfm *tfm)
+{
+	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+	int wmemsize;
+
+	wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS);
+	ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
+	if (!ctx->nx842_wmem)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nx842_exit(struct crypto_tfm *tfm)
+{
+	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	kfree(ctx->nx842_wmem);
+}
+
+static void nx842_reset_uselzo(unsigned long data)
+{
+	nx842_uselzo = 0;
+}
+
+static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0);
+
+static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr;
+	unsigned int tmp_len = *dlen;
+	size_t lzodlen; /* needed for lzo */
+	int err;
+
+	*dlen = 0;
+	hdr = (struct nx842_crypto_header *)dst;
+	hdr->sentinel = NX842_SENTINEL; /* debug */
+	dst += sizeof(struct nx842_crypto_header);
+	tmp_len -= sizeof(struct nx842_crypto_header);
+	lzodlen = tmp_len;
+
+	if (likely(!nx842_uselzo)) {
+		err = nx842_compress(src, slen, dst, &tmp_len, ctx->nx842_wmem);
+
+		if (likely(!err)) {
+			hdr->type = NX842_CRYPTO_TYPE_842;
+			*dlen = tmp_len + sizeof(struct nx842_crypto_header);
+			return 0;
+		}
+
+		/* hardware failed */
+		nx842_uselzo = 1;
+
+		/* set timer to check for hardware again in 1 second */
+		mod_timer(&failover_timer, jiffies + msecs_to_jiffies(1000));
+	}
+
+	/* no hardware, use lzo */
+	err = lzo1x_1_compress(src, slen, dst, &lzodlen, ctx->nx842_wmem);
+	if (err != LZO_E_OK)
+		return -EINVAL;
+
+	hdr->type = NX842_CRYPTO_TYPE_LZO;
+	*dlen = lzodlen + sizeof(struct nx842_crypto_header);
+	return 0;
+}
+
+static int nx842_crypto_decompress(struct crypto_tfm *tfm, const u8 *src,
+			      unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr;
+	unsigned int tmp_len = *dlen;
+	size_t lzodlen; /* needed for lzo */
+	int err;
+
+	*dlen = 0;
+	hdr = (struct nx842_crypto_header *)src;
+
+	if (unlikely(hdr->sentinel != NX842_SENTINEL))
+		return -EINVAL;
+
+	src += sizeof(struct nx842_crypto_header);
+	slen -= sizeof(struct nx842_crypto_header);
+
+	if (likely(hdr->type == NX842_CRYPTO_TYPE_842)) {
+		err = nx842_decompress(src, slen, dst, &tmp_len,
+			ctx->nx842_wmem);
+		if (err)
+			return -EINVAL;
+		*dlen = tmp_len;
+	} else if (hdr->type == NX842_CRYPTO_TYPE_LZO) {
+		lzodlen = tmp_len;
+		err = lzo1x_decompress_safe(src, slen, dst, &lzodlen);
+		if (err != LZO_E_OK)
+			return -EINVAL;
+		*dlen = lzodlen;
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "842",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct nx842_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= nx842_init,
+	.cra_exit		= nx842_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= nx842_crypto_compress,
+	.coa_decompress		= nx842_crypto_decompress } }
+};
+
+static int __init nx842_mod_init(void)
+{
+	del_timer(&failover_timer);
+	return crypto_register_alg(&alg);
+}
+
+static void __exit nx842_mod_exit(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(nx842_mod_init);
+module_exit(nx842_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("842 Compression Algorithm");

+ 75 - 0
crypto/Kconfig

@@ -460,6 +460,15 @@ config CRYPTO_SHA1_SPARC64
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
 	  using sparc64 crypto instructions, when available.
 
+config CRYPTO_SHA1_ARM
+	tristate "SHA1 digest algorithm (ARM-asm)"
+	depends on ARM
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+	  using optimized ARM assembler.
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
@@ -609,6 +618,8 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_CRYPTD
 	select CRYPTO_ABLK_HELPER_X86
 	select CRYPTO_ALGAPI
+	select CRYPTO_LRW
+	select CRYPTO_XTS
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
@@ -661,6 +672,30 @@ config CRYPTO_AES_SPARC64
 	  for some popular block cipher mode is supported too, including
 	  ECB and CBC.
 
+config CRYPTO_AES_ARM
+	tristate "AES cipher algorithms (ARM-asm)"
+	depends on ARM
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES
+	help
+	  Use optimized AES assembler routines for ARM platforms.
+
+	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
+	  algorithm.
+
+	  Rijndael appears to be consistently a very good performer in
+	  both hardware and software across a wide range of computing
+	  environments regardless of its use in feedback or non-feedback
+	  modes. Its key setup time is excellent, and its key agility is
+	  good. Rijndael's very low memory requirements make it very well
+	  suited for restricted-space environments, in which it also
+	  demonstrates excellent performance. Rijndael's operations are
+	  among the easiest to defend against power and timing attacks.
+
+	  The AES specifies three key sizes: 128, 192 and 256 bits
+
+	  See <http://csrc.nist.gov/encryption/aes/> for more information.
+
 config CRYPTO_ANUBIS
 	tristate "Anubis cipher algorithm"
 	select CRYPTO_ALGAPI
@@ -781,6 +816,20 @@ config CRYPTO_CAST5
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 
+config CRYPTO_CAST5_AVX_X86_64
+	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_CAST5
+	help
+	  The CAST5 encryption algorithm (synonymous with CAST-128) is
+	  described in RFC2144.
+
+	  This module provides the Cast5 cipher algorithm that processes
+	  sixteen blocks parallel using the AVX instruction set.
+
 config CRYPTO_CAST6
 	tristate "CAST6 (CAST-256) cipher algorithm"
 	select CRYPTO_ALGAPI
@@ -788,6 +837,23 @@ config CRYPTO_CAST6
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  described in RFC2612.
 
+config CRYPTO_CAST6_AVX_X86_64
+	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_CAST6
+	select CRYPTO_LRW
+	select CRYPTO_XTS
+	help
+	  The CAST6 encryption algorithm (synonymous with CAST-256) is
+	  described in RFC2612.
+
+	  This module provides the Cast6 cipher algorithm that processes
+	  eight blocks parallel using the AVX instruction set.
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1106,6 +1172,15 @@ config CRYPTO_LZO
 	help
 	  This is the LZO algorithm.
 
+config CRYPTO_842
+	tristate "842 compression algorithm"
+	depends on CRYPTO_DEV_NX_COMPRESS
+	# 842 uses lzo if the hardware becomes unavailable
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
+	help
+	  This is the 842 algorithm.
+
 comment "Random Number Generation"
 
 config CRYPTO_ANSI_CPRNG

+ 3 - 2
crypto/Makefile

@@ -68,8 +68,8 @@ obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
-obj-$(CONFIG_CRYPTO_CAST5) += cast5.o
-obj-$(CONFIG_CRYPTO_CAST6) += cast6.o
+obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
+obj-$(CONFIG_CRYPTO_CAST6) += cast6_generic.o
 obj-$(CONFIG_CRYPTO_ARC4) += arc4.o
 obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
@@ -82,6 +82,7 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
+obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
 obj-$(CONFIG_CRYPTO_RNG2) += krng.o
 obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o

+ 0 - 1
crypto/aes_generic.c

@@ -1448,7 +1448,6 @@ static struct crypto_alg aes_alg = {
 	.cra_ctxsize		=	sizeof(struct crypto_aes_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,

+ 23 - 40
crypto/ansi_cprng.c

@@ -382,26 +382,6 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 	return 0;
 }
 
-static struct crypto_alg rng_alg = {
-	.cra_name		= "stdrng",
-	.cra_driver_name	= "ansi_cprng",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= sizeof(struct prng_context),
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(rng_alg.cra_list),
-	.cra_init		= cprng_init,
-	.cra_exit		= cprng_exit,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= cprng_get_random,
-			.rng_reset		= cprng_reset,
-			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
-		}
-	}
-};
-
 #ifdef CONFIG_CRYPTO_FIPS
 static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
 			    unsigned int dlen)
@@ -438,8 +418,27 @@ static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 out:
 	return rc;
 }
+#endif
 
-static struct crypto_alg fips_rng_alg = {
+static struct crypto_alg rng_algs[] = { {
+	.cra_name		= "stdrng",
+	.cra_driver_name	= "ansi_cprng",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
+	.cra_ctxsize		= sizeof(struct prng_context),
+	.cra_type		= &crypto_rng_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= cprng_init,
+	.cra_exit		= cprng_exit,
+	.cra_u			= {
+		.rng = {
+			.rng_make_random	= cprng_get_random,
+			.rng_reset		= cprng_reset,
+			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
+		}
+	}
+#ifdef CONFIG_CRYPTO_FIPS
+}, {
 	.cra_name		= "fips(ansi_cprng)",
 	.cra_driver_name	= "fips_ansi_cprng",
 	.cra_priority		= 300,
@@ -447,7 +446,6 @@ static struct crypto_alg fips_rng_alg = {
 	.cra_ctxsize		= sizeof(struct prng_context),
 	.cra_type		= &crypto_rng_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(rng_alg.cra_list),
 	.cra_init		= cprng_init,
 	.cra_exit		= cprng_exit,
 	.cra_u			= {
@@ -457,33 +455,18 @@ static struct crypto_alg fips_rng_alg = {
 			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
 		}
 	}
-};
 #endif
+} };
 
 /* Module initalization */
 static int __init prng_mod_init(void)
 {
-	int rc = 0;
-
-	rc = crypto_register_alg(&rng_alg);
-#ifdef CONFIG_CRYPTO_FIPS
-	if (rc)
-		goto out;
-
-	rc = crypto_register_alg(&fips_rng_alg);
-
-out:
-#endif
-	return rc;
+	return crypto_register_algs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 static void __exit prng_mod_fini(void)
 {
-	crypto_unregister_alg(&rng_alg);
-#ifdef CONFIG_CRYPTO_FIPS
-	crypto_unregister_alg(&fips_rng_alg);
-#endif
-	return;
+	crypto_unregister_algs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 MODULE_LICENSE("GPL");

+ 0 - 1
crypto/anubis.c

@@ -678,7 +678,6 @@ static struct crypto_alg anubis_alg = {
 	.cra_ctxsize		=	sizeof (struct anubis_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(anubis_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	ANUBIS_MIN_KEY_SIZE,
 	.cia_max_keysize	=	ANUBIS_MAX_KEY_SIZE,

+ 0 - 1
crypto/blowfish_generic.c

@@ -115,7 +115,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize		=	sizeof(struct bf_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	BF_MIN_KEY_SIZE,
 	.cia_max_keysize	=	BF_MAX_KEY_SIZE,

+ 0 - 1
crypto/camellia_generic.c

@@ -1072,7 +1072,6 @@ static struct crypto_alg camellia_alg = {
 	.cra_ctxsize		=	sizeof(struct camellia_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(camellia_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	CAMELLIA_MIN_KEY_SIZE,

+ 46 - 34
crypto/cast5.c → crypto/cast5_generic.c

@@ -4,8 +4,8 @@
 * Derived from GnuPG implementation of cast5.
 *
 * Major Changes.
-* 	Complete conformance to rfc2144.
-* 	Supports key size from 40 to 128 bits.
+*	Complete conformance to rfc2144.
+*	Supports key size from 40 to 128 bits.
 *
 * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
 * Copyright (C) 2003 Kartikey Mahendra Bhatt <kartik_me@hotmail.com>.
@@ -28,19 +28,10 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <crypto/cast5.h>
 
-#define CAST5_BLOCK_SIZE 8
-#define CAST5_MIN_KEY_SIZE 5
-#define CAST5_MAX_KEY_SIZE 16
 
-struct cast5_ctx {
-	u32 Km[16];
-	u8 Kr[16];
-	int rr;	/* rr?number of rounds = 16:number of rounds = 12; (rfc 2144) */
-};
-
-
-static const u32 s1[256] = {
+const u32 cast5_s1[256] = {
 	0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f,
 	0x9c004dd3, 0x6003e540, 0xcf9fc949,
 	0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0,
@@ -106,7 +97,8 @@ static const u32 s1[256] = {
 	0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c,
 	0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
 };
-static const u32 s2[256] = {
+EXPORT_SYMBOL_GPL(cast5_s1);
+const u32 cast5_s2[256] = {
 	0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a,
 	0xeec5207a, 0x55889c94, 0x72fc0651,
 	0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef,
@@ -172,7 +164,8 @@ static const u32 s2[256] = {
 	0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539,
 	0x73bfbe70, 0x83877605, 0x4523ecf1
 };
-static const u32 s3[256] = {
+EXPORT_SYMBOL_GPL(cast5_s2);
+const u32 cast5_s3[256] = {
 	0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff,
 	0x369fe44b, 0x8c1fc644, 0xaececa90,
 	0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806,
@@ -238,7 +231,8 @@ static const u32 s3[256] = {
 	0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636,
 	0xa133c501, 0xe9d3531c, 0xee353783
 };
-static const u32 s4[256] = {
+EXPORT_SYMBOL_GPL(cast5_s3);
+const u32 cast5_s4[256] = {
 	0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb,
 	0x64ad8c57, 0x85510443, 0xfa020ed1,
 	0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43,
@@ -304,6 +298,7 @@ static const u32 s4[256] = {
 	0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0,
 	0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
 };
+EXPORT_SYMBOL_GPL(cast5_s4);
 static const u32 s5[256] = {
 	0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff,
 	0x1dd358f5, 0x44dd9d44, 0x1731167f,
@@ -569,17 +564,21 @@ static const u32 sb8[256] = {
 	0xeaee6801, 0x8db2a283, 0xea8bf59e
 };
 
+#define s1 cast5_s1
+#define s2 cast5_s2
+#define s3 cast5_s3
+#define s4 cast5_s4
+
 #define F1(D, m, r)  ((I = ((m) + (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
+	(((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
 #define F2(D, m, r)  ((I = ((m) ^ (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]))
+	(((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]))
 #define F3(D, m, r)  ((I = ((m) - (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]))
+	(((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]))
 
 
-static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+void __cast5_encrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 l, r, t;
@@ -628,10 +627,15 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	dst[0] = cpu_to_be32(r);
 	dst[1] = cpu_to_be32(l);
 }
+EXPORT_SYMBOL_GPL(__cast5_encrypt);
 
-static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+{
+	__cast5_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+}
+
+void __cast5_decrypt(struct cast5_ctx *c, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 l, r, t;
@@ -667,6 +671,12 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	dst[0] = cpu_to_be32(r);
 	dst[1] = cpu_to_be32(l);
 }
+EXPORT_SYMBOL_GPL(__cast5_decrypt);
+
+static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+{
+	__cast5_decrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+}
 
 static void key_schedule(u32 *x, u32 *z, u32 *k)
 {
@@ -743,7 +753,7 @@ static void key_schedule(u32 *x, u32 *z, u32 *k)
 }
 
 
-static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len)
+int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
 {
 	struct cast5_ctx *c = crypto_tfm_ctx(tfm);
 	int i;
@@ -771,20 +781,22 @@ static int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned key_len)
 		c->Kr[i] = k[i] & 0x1f;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cast5_setkey);
 
 static struct crypto_alg alg = {
-	.cra_name 	= "cast5",
-	.cra_flags 	= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize 	= CAST5_BLOCK_SIZE,
-	.cra_ctxsize 	= sizeof(struct cast5_ctx),
-	.cra_alignmask	= 3,
-	.cra_module 	= THIS_MODULE,
-	.cra_list 	= LIST_HEAD_INIT(alg.cra_list),
-	.cra_u 		= {
+	.cra_name		= "cast5",
+	.cra_driver_name	= "cast5-generic",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 3,
+	.cra_module		= THIS_MODULE,
+	.cra_u			= {
 		.cipher = {
 			.cia_min_keysize = CAST5_MIN_KEY_SIZE,
 			.cia_max_keysize = CAST5_MAX_KEY_SIZE,
-			.cia_setkey = cast5_setkey,
+			.cia_setkey  = cast5_setkey,
 			.cia_encrypt = cast5_encrypt,
 			.cia_decrypt = cast5_decrypt
 		}
@@ -806,4 +818,4 @@ module_exit(cast5_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast5 Cipher Algorithm");
-
+MODULE_ALIAS("cast5");

+ 46 - 27
crypto/cast6.c → crypto/cast6_generic.c

@@ -25,24 +25,21 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <crypto/cast6.h>
 
-#define CAST6_BLOCK_SIZE 16
-#define CAST6_MIN_KEY_SIZE 16
-#define CAST6_MAX_KEY_SIZE 32
-
-struct cast6_ctx {
-	u32 Km[12][4];
-	u8 Kr[12][4];
-};
+#define s1 cast6_s1
+#define s2 cast6_s2
+#define s3 cast6_s3
+#define s4 cast6_s4
 
 #define F1(D, r, m)  ((I = ((m) + (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
+	(((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]))
 #define F2(D, r, m)  ((I = ((m) ^ (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]))
+	(((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]))
 #define F3(D, r, m)  ((I = ((m) - (D))), (I = rol32(I, (r))),   \
-    (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]))
+	(((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]))
 
-static const u32 s1[256] = {
+const u32 cast6_s1[256] = {
 	0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f,
 	0x9c004dd3, 0x6003e540, 0xcf9fc949,
 	0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0,
@@ -108,8 +105,9 @@ static const u32 s1[256] = {
 	0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c,
 	0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
 };
+EXPORT_SYMBOL_GPL(cast6_s1);
 
-static const u32 s2[256] = {
+const u32 cast6_s2[256] = {
 	0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a,
 	0xeec5207a, 0x55889c94, 0x72fc0651,
 	0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef,
@@ -175,8 +173,9 @@ static const u32 s2[256] = {
 	0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539,
 	0x73bfbe70, 0x83877605, 0x4523ecf1
 };
+EXPORT_SYMBOL_GPL(cast6_s2);
 
-static const u32 s3[256] = {
+const u32 cast6_s3[256] = {
 	0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff,
 	0x369fe44b, 0x8c1fc644, 0xaececa90,
 	0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806,
@@ -242,8 +241,9 @@ static const u32 s3[256] = {
 	0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636,
 	0xa133c501, 0xe9d3531c, 0xee353783
 };
+EXPORT_SYMBOL_GPL(cast6_s3);
 
-static const u32 s4[256] = {
+const u32 cast6_s4[256] = {
 	0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb,
 	0x64ad8c57, 0x85510443, 0xfa020ed1,
 	0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43,
@@ -309,6 +309,7 @@ static const u32 s4[256] = {
 	0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0,
 	0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
 };
+EXPORT_SYMBOL_GPL(cast6_s4);
 
 static const u32 Tm[24][8] = {
 	{ 0x5a827999, 0xc95c653a, 0x383650db, 0xa7103c7c, 0x15ea281d,
@@ -369,7 +370,7 @@ static const u8 Tr[4][8] = {
 };
 
 /* forward octave */
-static void W(u32 *key, unsigned int i)
+static inline void W(u32 *key, unsigned int i)
 {
 	u32 I;
 	key[6] ^= F1(key[7], Tr[i % 4][0], Tm[i][0]);
@@ -382,14 +383,12 @@ static void W(u32 *key, unsigned int i)
 	key[7] ^= F2(key[0], Tr[i % 4][7], Tm[i][7]);
 }
 
-static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			unsigned key_len)
+int __cast6_setkey(struct cast6_ctx *c, const u8 *in_key,
+		   unsigned key_len, u32 *flags)
 {
 	int i;
 	u32 key[8];
 	__be32 p_key[8]; /* padded key */
-	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 
 	if (key_len % 4 != 0) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -425,9 +424,17 @@ static int cast6_setkey(struct crypto_tfm *tfm, const u8 *in_key,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__cast6_setkey);
+
+int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+{
+	return __cast6_setkey(crypto_tfm_ctx(tfm), key, keylen,
+			      &tfm->crt_flags);
+}
+EXPORT_SYMBOL_GPL(cast6_setkey);
 
 /*forward quad round*/
-static void Q(u32 *block, u8 *Kr, u32 *Km)
+static inline void Q(u32 *block, u8 *Kr, u32 *Km)
 {
 	u32 I;
 	block[2] ^= F1(block[3], Kr[0], Km[0]);
@@ -437,7 +444,7 @@ static void Q(u32 *block, u8 *Kr, u32 *Km)
 }
 
 /*reverse quad round*/
-static void QBAR(u32 *block, u8 *Kr, u32 *Km)
+static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
 {
 	u32 I;
 	block[3] ^= F1(block[0], Kr[3], Km[3]);
@@ -446,9 +453,8 @@ static void QBAR(u32 *block, u8 *Kr, u32 *Km)
 	block[2] ^= F1(block[3], Kr[0], Km[0]);
 }
 
-static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+void __cast6_encrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
@@ -478,10 +484,15 @@ static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	dst[2] = cpu_to_be32(block[2]);
 	dst[3] = cpu_to_be32(block[3]);
 }
+EXPORT_SYMBOL_GPL(__cast6_encrypt);
 
-static void cast6_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+static void cast6_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+{
+	__cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+}
+
+void __cast6_decrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
 {
-	struct cast6_ctx *c = crypto_tfm_ctx(tfm);
 	const __be32 *src = (const __be32 *)inbuf;
 	__be32 *dst = (__be32 *)outbuf;
 	u32 block[4];
@@ -511,15 +522,22 @@ static void cast6_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
 	dst[2] = cpu_to_be32(block[2]);
 	dst[3] = cpu_to_be32(block[3]);
 }
+EXPORT_SYMBOL_GPL(__cast6_decrypt);
+
+static void cast6_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
+{
+	__cast6_decrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+}
 
 static struct crypto_alg alg = {
 	.cra_name = "cast6",
+	.cra_driver_name = "cast6-generic",
+	.cra_priority = 100,
 	.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize = CAST6_BLOCK_SIZE,
 	.cra_ctxsize = sizeof(struct cast6_ctx),
 	.cra_alignmask = 3,
 	.cra_module = THIS_MODULE,
-	.cra_list = LIST_HEAD_INIT(alg.cra_list),
 	.cra_u = {
 		  .cipher = {
 			     .cia_min_keysize = CAST6_MIN_KEY_SIZE,
@@ -545,3 +563,4 @@ module_exit(cast6_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast6 Cipher Algorithm");
+MODULE_ALIAS("cast6");

+ 18 - 39
crypto/crypto_null.c

@@ -94,18 +94,6 @@ static int skcipher_null_crypt(struct blkcipher_desc *desc,
 	return err;
 }
 
-static struct crypto_alg compress_null = {
-	.cra_name		=	"compress_null",
-	.cra_flags		=	CRYPTO_ALG_TYPE_COMPRESS,
-	.cra_blocksize		=	NULL_BLOCK_SIZE,
-	.cra_ctxsize		=	0,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=       LIST_HEAD_INIT(compress_null.cra_list),
-	.cra_u			=	{ .compress = {
-	.coa_compress 		=	null_compress,
-	.coa_decompress		=	null_compress } }
-};
-
 static struct shash_alg digest_null = {
 	.digestsize		=	NULL_DIGEST_SIZE,
 	.setkey   		=	null_hash_setkey,
@@ -122,22 +110,19 @@ static struct shash_alg digest_null = {
 	}
 };
 
-static struct crypto_alg cipher_null = {
+static struct crypto_alg null_algs[3] = { {
 	.cra_name		=	"cipher_null",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	NULL_BLOCK_SIZE,
 	.cra_ctxsize		=	0,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cipher_null.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	NULL_KEY_SIZE,
 	.cia_max_keysize	=	NULL_KEY_SIZE,
 	.cia_setkey		= 	null_setkey,
 	.cia_encrypt		=	null_crypt,
 	.cia_decrypt		=	null_crypt } }
-};
-
-static struct crypto_alg skcipher_null = {
+}, {
 	.cra_name		=	"ecb(cipher_null)",
 	.cra_driver_name	=	"ecb-cipher_null",
 	.cra_priority		=	100,
@@ -146,7 +131,6 @@ static struct crypto_alg skcipher_null = {
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_ctxsize		=	0,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(skcipher_null.cra_list),
 	.cra_u			=	{ .blkcipher = {
 	.min_keysize		=	NULL_KEY_SIZE,
 	.max_keysize		=	NULL_KEY_SIZE,
@@ -154,7 +138,16 @@ static struct crypto_alg skcipher_null = {
 	.setkey			= 	null_setkey,
 	.encrypt		=	skcipher_null_crypt,
 	.decrypt		=	skcipher_null_crypt } }
-};
+}, {
+	.cra_name		=	"compress_null",
+	.cra_flags		=	CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_blocksize		=	NULL_BLOCK_SIZE,
+	.cra_ctxsize		=	0,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{ .compress = {
+	.coa_compress		=	null_compress,
+	.coa_decompress		=	null_compress } }
+} };
 
 MODULE_ALIAS("compress_null");
 MODULE_ALIAS("digest_null");
@@ -164,40 +157,26 @@ static int __init crypto_null_mod_init(void)
 {
 	int ret = 0;
 
-	ret = crypto_register_alg(&cipher_null);
+	ret = crypto_register_algs(null_algs, ARRAY_SIZE(null_algs));
 	if (ret < 0)
 		goto out;
 
-	ret = crypto_register_alg(&skcipher_null);
-	if (ret < 0)
-		goto out_unregister_cipher;
-
 	ret = crypto_register_shash(&digest_null);
 	if (ret < 0)
-		goto out_unregister_skcipher;
+		goto out_unregister_algs;
 
-	ret = crypto_register_alg(&compress_null);
-	if (ret < 0)
-		goto out_unregister_digest;
+	return 0;
 
+out_unregister_algs:
+	crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs));
 out:
 	return ret;
-
-out_unregister_digest:
-	crypto_unregister_shash(&digest_null);
-out_unregister_skcipher:
-	crypto_unregister_alg(&skcipher_null);
-out_unregister_cipher:
-	crypto_unregister_alg(&cipher_null);
-	goto out;
 }
 
 static void __exit crypto_null_mod_fini(void)
 {
-	crypto_unregister_alg(&compress_null);
 	crypto_unregister_shash(&digest_null);
-	crypto_unregister_alg(&skcipher_null);
-	crypto_unregister_alg(&cipher_null);
+	crypto_unregister_algs(null_algs, ARRAY_SIZE(null_algs));
 }
 
 module_init(crypto_null_mod_init);

+ 1 - 1
crypto/crypto_user.c

@@ -30,7 +30,7 @@
 
 #include "internal.h"
 
-DEFINE_MUTEX(crypto_cfg_mutex);
+static DEFINE_MUTEX(crypto_cfg_mutex);
 
 /* The crypto netlink socket */
 static struct sock *crypto_nlsk;

+ 0 - 1
crypto/deflate.c

@@ -199,7 +199,6 @@ static struct crypto_alg alg = {
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
 	.cra_ctxsize		= sizeof(struct deflate_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(alg.cra_list),
 	.cra_init		= deflate_init,
 	.cra_exit		= deflate_exit,
 	.cra_u			= { .compress = {

+ 5 - 20
crypto/des_generic.c

@@ -943,59 +943,44 @@ static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	d[1] = cpu_to_le32(L);
 }
 
-static struct crypto_alg des_alg = {
+static struct crypto_alg des_algs[2] = { {
 	.cra_name		=	"des",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_alignmask		=	3,
-	.cra_list		=	LIST_HEAD_INIT(des_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES_KEY_SIZE,
 	.cia_max_keysize	=	DES_KEY_SIZE,
 	.cia_setkey		=	des_setkey,
 	.cia_encrypt		=	des_encrypt,
 	.cia_decrypt		=	des_decrypt } }
-};
-
-static struct crypto_alg des3_ede_alg = {
+}, {
 	.cra_name		=	"des3_ede",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES3_EDE_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des3_ede_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_alignmask		=	3,
-	.cra_list		=	LIST_HEAD_INIT(des3_ede_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_max_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_setkey		=	des3_ede_setkey,
 	.cia_encrypt		=	des3_ede_encrypt,
 	.cia_decrypt		=	des3_ede_decrypt } }
-};
+} };
 
 MODULE_ALIAS("des3_ede");
 
 static int __init des_generic_mod_init(void)
 {
-	int ret = 0;
-
-	ret = crypto_register_alg(&des_alg);
-	if (ret < 0)
-		goto out;
-
-	ret = crypto_register_alg(&des3_ede_alg);
-	if (ret < 0)
-		crypto_unregister_alg(&des_alg);
-out:
-	return ret;
+	return crypto_register_algs(des_algs, ARRAY_SIZE(des_algs));
 }
 
 static void __exit des_generic_mod_fini(void)
 {
-	crypto_unregister_alg(&des3_ede_alg);
-	crypto_unregister_alg(&des_alg);
+	crypto_unregister_algs(des_algs, ARRAY_SIZE(des_algs));
 }
 
 module_init(des_generic_mod_init);

+ 0 - 1
crypto/fcrypt.c

@@ -396,7 +396,6 @@ static struct crypto_alg fcrypt_alg = {
 	.cra_ctxsize		=	sizeof(struct fcrypt_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_alignmask		=	3,
-	.cra_list		=	LIST_HEAD_INIT(fcrypt_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	8,
 	.cia_max_keysize	=	8,

+ 0 - 1
crypto/ghash-generic.c

@@ -153,7 +153,6 @@ static struct shash_alg ghash_alg = {
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
 		.cra_exit		= ghash_exit_tfm,
 	},
 };

+ 0 - 1
crypto/khazad.c

@@ -853,7 +853,6 @@ static struct crypto_alg khazad_alg = {
 	.cra_ctxsize		=	sizeof (struct khazad_ctx),
 	.cra_alignmask		=	7,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(khazad_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	KHAZAD_KEY_SIZE,
 	.cia_max_keysize	=	KHAZAD_KEY_SIZE,

+ 0 - 1
crypto/krng.c

@@ -35,7 +35,6 @@ static struct crypto_alg krng_alg = {
 	.cra_ctxsize		= 0,
 	.cra_type		= &crypto_rng_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(krng_alg.cra_list),
 	.cra_u			= {
 		.rng = {
 			.rng_make_random	= krng_get_random,

+ 0 - 1
crypto/lzo.c

@@ -81,7 +81,6 @@ static struct crypto_alg alg = {
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
 	.cra_ctxsize		= sizeof(struct lzo_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(alg.cra_list),
 	.cra_init		= lzo_init,
 	.cra_exit		= lzo_exit,
 	.cra_u			= { .compress = {

+ 0 - 1
crypto/salsa20_generic.c

@@ -221,7 +221,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
 	.cra_alignmask      =	3,
 	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
 	.cra_u              =   {
 		.blkcipher = {
 			.setkey         =   setkey,

+ 0 - 1
crypto/seed.c

@@ -449,7 +449,6 @@ static struct crypto_alg seed_alg = {
 	.cra_ctxsize		=	sizeof(struct seed_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(seed_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	SEED_KEY_SIZE,

+ 19 - 34
crypto/serpent_generic.c

@@ -567,24 +567,6 @@ static void serpent_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	__serpent_decrypt(ctx, dst, src);
 }
 
-static struct crypto_alg serpent_alg = {
-	.cra_name		=	"serpent",
-	.cra_driver_name	=	"serpent-generic",
-	.cra_priority		=	100,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct serpent_ctx),
-	.cra_alignmask		=	3,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(serpent_alg.cra_list),
-	.cra_u			=	{ .cipher = {
-	.cia_min_keysize	=	SERPENT_MIN_KEY_SIZE,
-	.cia_max_keysize	=	SERPENT_MAX_KEY_SIZE,
-	.cia_setkey		=	serpent_setkey,
-	.cia_encrypt		=	serpent_encrypt,
-	.cia_decrypt		=	serpent_decrypt } }
-};
-
 static int tnepres_setkey(struct crypto_tfm *tfm, const u8 *key,
 			  unsigned int keylen)
 {
@@ -637,41 +619,44 @@ static void tnepres_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	d[3] = swab32(rd[0]);
 }
 
-static struct crypto_alg tnepres_alg = {
+static struct crypto_alg srp_algs[2] = { {
+	.cra_name		=	"serpent",
+	.cra_driver_name	=	"serpent-generic",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	SERPENT_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct serpent_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{ .cipher = {
+	.cia_min_keysize	=	SERPENT_MIN_KEY_SIZE,
+	.cia_max_keysize	=	SERPENT_MAX_KEY_SIZE,
+	.cia_setkey		=	serpent_setkey,
+	.cia_encrypt		=	serpent_encrypt,
+	.cia_decrypt		=	serpent_decrypt } }
+}, {
 	.cra_name		=	"tnepres",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct serpent_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(serpent_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	SERPENT_MIN_KEY_SIZE,
 	.cia_max_keysize	=	SERPENT_MAX_KEY_SIZE,
 	.cia_setkey		=	tnepres_setkey,
 	.cia_encrypt		=	tnepres_encrypt,
 	.cia_decrypt		=	tnepres_decrypt } }
-};
+} };
 
 static int __init serpent_mod_init(void)
 {
-	int ret = crypto_register_alg(&serpent_alg);
-
-	if (ret)
-		return ret;
-
-	ret = crypto_register_alg(&tnepres_alg);
-
-	if (ret)
-		crypto_unregister_alg(&serpent_alg);
-
-	return ret;
+	return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
 }
 
 static void __exit serpent_mod_fini(void)
 {
-	crypto_unregister_alg(&tnepres_alg);
-	crypto_unregister_alg(&serpent_alg);
+	crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
 }
 
 module_init(serpent_mod_init);

+ 5 - 20
crypto/sha256_generic.c

@@ -336,7 +336,7 @@ static int sha256_import(struct shash_desc *desc, const void *in)
 	return 0;
 }
 
-static struct shash_alg sha256 = {
+static struct shash_alg sha256_algs[2] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_init,
 	.update		=	sha256_update,
@@ -352,9 +352,7 @@ static struct shash_alg sha256 = {
 		.cra_blocksize	=	SHA256_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg sha224 = {
+}, {
 	.digestsize	=	SHA224_DIGEST_SIZE,
 	.init		=	sha224_init,
 	.update		=	sha256_update,
@@ -367,29 +365,16 @@ static struct shash_alg sha224 = {
 		.cra_blocksize	=	SHA224_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
+} };
 
 static int __init sha256_generic_mod_init(void)
 {
-	int ret = 0;
-
-	ret = crypto_register_shash(&sha224);
-
-	if (ret < 0)
-		return ret;
-
-	ret = crypto_register_shash(&sha256);
-
-	if (ret < 0)
-		crypto_unregister_shash(&sha224);
-
-	return ret;
+	return crypto_register_shashes(sha256_algs, ARRAY_SIZE(sha256_algs));
 }
 
 static void __exit sha256_generic_mod_fini(void)
 {
-	crypto_unregister_shash(&sha224);
-	crypto_unregister_shash(&sha256);
+	crypto_unregister_shashes(sha256_algs, ARRAY_SIZE(sha256_algs));
 }
 
 module_init(sha256_generic_mod_init);

+ 5 - 15
crypto/sha512_generic.c

@@ -242,7 +242,7 @@ static int sha384_final(struct shash_desc *desc, u8 *hash)
 	return 0;
 }
 
-static struct shash_alg sha512 = {
+static struct shash_alg sha512_algs[2] = { {
 	.digestsize	=	SHA512_DIGEST_SIZE,
 	.init		=	sha512_init,
 	.update		=	sha512_update,
@@ -254,9 +254,7 @@ static struct shash_alg sha512 = {
 		.cra_blocksize	=	SHA512_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg sha384 = {
+}, {
 	.digestsize	=	SHA384_DIGEST_SIZE,
 	.init		=	sha384_init,
 	.update		=	sha512_update,
@@ -268,24 +266,16 @@ static struct shash_alg sha384 = {
 		.cra_blocksize	=	SHA384_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
+} };
 
 static int __init sha512_generic_mod_init(void)
 {
-        int ret = 0;
-
-        if ((ret = crypto_register_shash(&sha384)) < 0)
-                goto out;
-        if ((ret = crypto_register_shash(&sha512)) < 0)
-                crypto_unregister_shash(&sha384);
-out:
-        return ret;
+	return crypto_register_shashes(sha512_algs, ARRAY_SIZE(sha512_algs));
 }
 
 static void __exit sha512_generic_mod_fini(void)
 {
-        crypto_unregister_shash(&sha384);
-        crypto_unregister_shash(&sha512);
+	crypto_unregister_shashes(sha512_algs, ARRAY_SIZE(sha512_algs));
 }
 
 module_init(sha512_generic_mod_init);

+ 36 - 0
crypto/shash.c

@@ -629,6 +629,42 @@ int crypto_unregister_shash(struct shash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_shash);
 
+int crypto_register_shashes(struct shash_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_shash(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_shash(&algs[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_shashes);
+
+int crypto_unregister_shashes(struct shash_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = count - 1; i >= 0; --i) {
+		ret = crypto_unregister_shash(&algs[i]);
+		if (ret)
+			pr_err("Failed to unregister %s %s: %d\n",
+			       algs[i].base.cra_driver_name,
+			       algs[i].base.cra_name, ret);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_shashes);
+
 int shash_register_instance(struct crypto_template *tmpl,
 			    struct shash_instance *inst)
 {

+ 89 - 6
crypto/tcrypt.c

@@ -97,7 +97,6 @@ static int test_cipher_cycles(struct blkcipher_desc *desc, int enc,
 	int ret = 0;
 	int i;
 
-	local_bh_disable();
 	local_irq_disable();
 
 	/* Warm-up run. */
@@ -130,7 +129,6 @@ static int test_cipher_cycles(struct blkcipher_desc *desc, int enc,
 
 out:
 	local_irq_enable();
-	local_bh_enable();
 
 	if (ret == 0)
 		printk("1 operation in %lu cycles (%d bytes)\n",
@@ -300,7 +298,6 @@ static int test_hash_cycles_digest(struct hash_desc *desc,
 	int i;
 	int ret;
 
-	local_bh_disable();
 	local_irq_disable();
 
 	/* Warm-up run. */
@@ -327,7 +324,6 @@ static int test_hash_cycles_digest(struct hash_desc *desc,
 
 out:
 	local_irq_enable();
-	local_bh_enable();
 
 	if (ret)
 		return ret;
@@ -348,7 +344,6 @@ static int test_hash_cycles(struct hash_desc *desc, struct scatterlist *sg,
 	if (plen == blen)
 		return test_hash_cycles_digest(desc, sg, blen, out);
 
-	local_bh_disable();
 	local_irq_disable();
 
 	/* Warm-up run. */
@@ -391,7 +386,6 @@ static int test_hash_cycles(struct hash_desc *desc, struct scatterlist *sg,
 
 out:
 	local_irq_enable();
-	local_bh_enable();
 
 	if (ret)
 		return ret;
@@ -1037,10 +1031,16 @@ static int do_test(int m)
 
 	case 14:
 		ret += tcrypt_test("ecb(cast5)");
+		ret += tcrypt_test("cbc(cast5)");
+		ret += tcrypt_test("ctr(cast5)");
 		break;
 
 	case 15:
 		ret += tcrypt_test("ecb(cast6)");
+		ret += tcrypt_test("cbc(cast6)");
+		ret += tcrypt_test("ctr(cast6)");
+		ret += tcrypt_test("lrw(cast6)");
+		ret += tcrypt_test("xts(cast6)");
 		break;
 
 	case 16:
@@ -1112,6 +1112,9 @@ static int do_test(int m)
 	case 32:
 		ret += tcrypt_test("ecb(camellia)");
 		ret += tcrypt_test("cbc(camellia)");
+		ret += tcrypt_test("ctr(camellia)");
+		ret += tcrypt_test("lrw(camellia)");
+		ret += tcrypt_test("xts(camellia)");
 		break;
 	case 33:
 		ret += tcrypt_test("sha224");
@@ -1165,6 +1168,10 @@ static int do_test(int m)
 		ret += tcrypt_test("rfc4309(ccm(aes))");
 		break;
 
+	case 46:
+		ret += tcrypt_test("ghash");
+		break;
+
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -1359,6 +1366,44 @@ static int do_test(int m)
 				  speed_template_8);
 		break;
 
+	case 209:
+		test_cipher_speed("ecb(cast5)", ENCRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		test_cipher_speed("ecb(cast5)", DECRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		test_cipher_speed("cbc(cast5)", ENCRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		test_cipher_speed("cbc(cast5)", DECRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		test_cipher_speed("ctr(cast5)", ENCRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		test_cipher_speed("ctr(cast5)", DECRYPT, sec, NULL, 0,
+				  speed_template_8_16);
+		break;
+
+	case 210:
+		test_cipher_speed("ecb(cast6)", ENCRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("ecb(cast6)", DECRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("cbc(cast6)", ENCRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("cbc(cast6)", DECRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("ctr(cast6)", ENCRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("ctr(cast6)", DECRYPT, sec, NULL, 0,
+				  speed_template_16_32);
+		test_cipher_speed("lrw(cast6)", ENCRYPT, sec, NULL, 0,
+				  speed_template_32_48);
+		test_cipher_speed("lrw(cast6)", DECRYPT, sec, NULL, 0,
+				  speed_template_32_48);
+		test_cipher_speed("xts(cast6)", ENCRYPT, sec, NULL, 0,
+				  speed_template_32_64);
+		test_cipher_speed("xts(cast6)", DECRYPT, sec, NULL, 0,
+				  speed_template_32_64);
+		break;
+
 	case 300:
 		/* fall through */
 
@@ -1639,6 +1684,44 @@ static int do_test(int m)
 				   speed_template_8);
 		break;
 
+	case 506:
+		test_acipher_speed("ecb(cast5)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		test_acipher_speed("ecb(cast5)", DECRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		test_acipher_speed("cbc(cast5)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		test_acipher_speed("cbc(cast5)", DECRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		test_acipher_speed("ctr(cast5)", ENCRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		test_acipher_speed("ctr(cast5)", DECRYPT, sec, NULL, 0,
+				   speed_template_8_16);
+		break;
+
+	case 507:
+		test_acipher_speed("ecb(cast6)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("ecb(cast6)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("cbc(cast6)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("cbc(cast6)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("ctr(cast6)", ENCRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("ctr(cast6)", DECRYPT, sec, NULL, 0,
+				   speed_template_16_32);
+		test_acipher_speed("lrw(cast6)", ENCRYPT, sec, NULL, 0,
+				   speed_template_32_48);
+		test_acipher_speed("lrw(cast6)", DECRYPT, sec, NULL, 0,
+				   speed_template_32_48);
+		test_acipher_speed("xts(cast6)", ENCRYPT, sec, NULL, 0,
+				   speed_template_32_64);
+		test_acipher_speed("xts(cast6)", DECRYPT, sec, NULL, 0,
+				   speed_template_32_64);
+		break;
+
 	case 1000:
 		test_available();
 		break;

+ 1 - 0
crypto/tcrypt.h

@@ -47,6 +47,7 @@ static struct cipher_speed_template des3_speed_template[] = {
  */
 static u8 speed_template_8[] = {8, 0};
 static u8 speed_template_24[] = {24, 0};
+static u8 speed_template_8_16[] = {8, 16, 0};
 static u8 speed_template_8_32[] = {8, 32, 0};
 static u8 speed_template_16_32[] = {16, 32, 0};
 static u8 speed_template_16_24_32[] = {16, 24, 32, 0};

+ 6 - 35
crypto/tea.c

@@ -219,84 +219,55 @@ static void xeta_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	out[1] = cpu_to_le32(z);
 }
 
-static struct crypto_alg tea_alg = {
+static struct crypto_alg tea_algs[3] = { {
 	.cra_name		=	"tea",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	TEA_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof (struct tea_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(tea_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	TEA_KEY_SIZE,
 	.cia_max_keysize	=	TEA_KEY_SIZE,
 	.cia_setkey		= 	tea_setkey,
 	.cia_encrypt		=	tea_encrypt,
 	.cia_decrypt		=	tea_decrypt } }
-};
-
-static struct crypto_alg xtea_alg = {
+}, {
 	.cra_name		=	"xtea",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	XTEA_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof (struct xtea_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xtea_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	XTEA_KEY_SIZE,
 	.cia_max_keysize	=	XTEA_KEY_SIZE,
 	.cia_setkey		= 	xtea_setkey,
 	.cia_encrypt		=	xtea_encrypt,
 	.cia_decrypt		=	xtea_decrypt } }
-};
-
-static struct crypto_alg xeta_alg = {
+}, {
 	.cra_name		=	"xeta",
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	XTEA_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof (struct xtea_ctx),
 	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xtea_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	XTEA_KEY_SIZE,
 	.cia_max_keysize	=	XTEA_KEY_SIZE,
 	.cia_setkey		= 	xtea_setkey,
 	.cia_encrypt		=	xeta_encrypt,
 	.cia_decrypt		=	xeta_decrypt } }
-};
+} };
 
 static int __init tea_mod_init(void)
 {
-	int ret = 0;
-	
-	ret = crypto_register_alg(&tea_alg);
-	if (ret < 0)
-		goto out;
-
-	ret = crypto_register_alg(&xtea_alg);
-	if (ret < 0) {
-		crypto_unregister_alg(&tea_alg);
-		goto out;
-	}
-
-	ret = crypto_register_alg(&xeta_alg);
-	if (ret < 0) {
-		crypto_unregister_alg(&tea_alg);
-		crypto_unregister_alg(&xtea_alg);
-		goto out;
-	}
-
-out:	
-	return ret;
+	return crypto_register_algs(tea_algs, ARRAY_SIZE(tea_algs));
 }
 
 static void __exit tea_mod_fini(void)
 {
-	crypto_unregister_alg(&tea_alg);
-	crypto_unregister_alg(&xtea_alg);
-	crypto_unregister_alg(&xeta_alg);
+	crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs));
 }
 
 MODULE_ALIAS("xtea");

+ 392 - 80
crypto/testmgr.c

@@ -358,8 +358,9 @@ out_nobuf:
 	return ret;
 }
 
-static int test_aead(struct crypto_aead *tfm, int enc,
-		     struct aead_testvec *template, unsigned int tcount)
+static int __test_aead(struct crypto_aead *tfm, int enc,
+		       struct aead_testvec *template, unsigned int tcount,
+		       const bool diff_dst)
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
 	unsigned int i, j, k, n, temp;
@@ -367,15 +368,18 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	char *q;
 	char *key;
 	struct aead_request *req;
-	struct scatterlist sg[8];
-	struct scatterlist asg[8];
-	const char *e;
+	struct scatterlist *sg;
+	struct scatterlist *asg;
+	struct scatterlist *sgout;
+	const char *e, *d;
 	struct tcrypt_result result;
 	unsigned int authsize;
 	void *input;
+	void *output;
 	void *assoc;
 	char iv[MAX_IVLEN];
 	char *xbuf[XBUFSIZE];
+	char *xoutbuf[XBUFSIZE];
 	char *axbuf[XBUFSIZE];
 
 	if (testmgr_alloc_buf(xbuf))
@@ -383,6 +387,21 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 	if (testmgr_alloc_buf(axbuf))
 		goto out_noaxbuf;
 
+	if (diff_dst && testmgr_alloc_buf(xoutbuf))
+		goto out_nooutbuf;
+
+	/* avoid "the frame size is larger than 1024 bytes" compiler warning */
+	sg = kmalloc(sizeof(*sg) * 8 * (diff_dst ? 3 : 2), GFP_KERNEL);
+	if (!sg)
+		goto out_nosg;
+	asg = &sg[8];
+	sgout = &asg[8];
+
+	if (diff_dst)
+		d = "-ddst";
+	else
+		d = "";
+
 	if (enc == ENCRYPT)
 		e = "encryption";
 	else
@@ -392,8 +411,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 	req = aead_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
-		printk(KERN_ERR "alg: aead: Failed to allocate request for "
-		       "%s\n", algo);
+		pr_err("alg: aead%s: Failed to allocate request for %s\n",
+		       d, algo);
 		goto out;
 	}
 
@@ -432,9 +451,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			ret = crypto_aead_setkey(tfm, key,
 						 template[i].klen);
 			if (!ret == template[i].fail) {
-				printk(KERN_ERR "alg: aead: setkey failed on "
-				       "test %d for %s: flags=%x\n", j, algo,
-				       crypto_aead_get_flags(tfm));
+				pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n",
+				       d, j, algo, crypto_aead_get_flags(tfm));
 				goto out;
 			} else if (ret)
 				continue;
@@ -442,18 +460,26 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			authsize = abs(template[i].rlen - template[i].ilen);
 			ret = crypto_aead_setauthsize(tfm, authsize);
 			if (ret) {
-				printk(KERN_ERR "alg: aead: Failed to set "
-				       "authsize to %u on test %d for %s\n",
-				       authsize, j, algo);
+				pr_err("alg: aead%s: Failed to set authsize to %u on test %d for %s\n",
+				       d, authsize, j, algo);
 				goto out;
 			}
 
 			sg_init_one(&sg[0], input,
 				    template[i].ilen + (enc ? authsize : 0));
 
+			if (diff_dst) {
+				output = xoutbuf[0];
+				sg_init_one(&sgout[0], output,
+					    template[i].ilen +
+						(enc ? authsize : 0));
+			} else {
+				output = input;
+			}
+
 			sg_init_one(&asg[0], assoc, template[i].alen);
 
-			aead_request_set_crypt(req, sg, sg,
+			aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
 					       template[i].ilen, iv);
 
 			aead_request_set_assoc(req, asg, template[i].alen);
@@ -466,10 +492,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			case 0:
 				if (template[i].novrfy) {
 					/* verification was supposed to fail */
-					printk(KERN_ERR "alg: aead: %s failed "
-					       "on test %d for %s: ret was 0, "
-					       "expected -EBADMSG\n",
-					       e, j, algo);
+					pr_err("alg: aead%s: %s failed on test %d for %s: ret was 0, expected -EBADMSG\n",
+					       d, e, j, algo);
 					/* so really, we got a bad message */
 					ret = -EBADMSG;
 					goto out;
@@ -489,15 +513,15 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					continue;
 				/* fall through */
 			default:
-				printk(KERN_ERR "alg: aead: %s failed on test "
-				       "%d for %s: ret=%d\n", e, j, algo, -ret);
+				pr_err("alg: aead%s: %s failed on test %d for %s: ret=%d\n",
+				       d, e, j, algo, -ret);
 				goto out;
 			}
 
-			q = input;
+			q = output;
 			if (memcmp(q, template[i].result, template[i].rlen)) {
-				printk(KERN_ERR "alg: aead: Test %d failed on "
-				       "%s for %s\n", j, e, algo);
+				pr_err("alg: aead%s: Test %d failed on %s for %s\n",
+				       d, j, e, algo);
 				hexdump(q, template[i].rlen);
 				ret = -EINVAL;
 				goto out;
@@ -522,9 +546,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			ret = crypto_aead_setkey(tfm, key, template[i].klen);
 			if (!ret == template[i].fail) {
-				printk(KERN_ERR "alg: aead: setkey failed on "
-				       "chunk test %d for %s: flags=%x\n", j,
-				       algo, crypto_aead_get_flags(tfm));
+				pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n",
+				       d, j, algo, crypto_aead_get_flags(tfm));
 				goto out;
 			} else if (ret)
 				continue;
@@ -533,6 +556,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 			ret = -EINVAL;
 			sg_init_table(sg, template[i].np);
+			if (diff_dst)
+				sg_init_table(sgout, template[i].np);
 			for (k = 0, temp = 0; k < template[i].np; k++) {
 				if (WARN_ON(offset_in_page(IDX[k]) +
 					    template[i].tap[k] > PAGE_SIZE))
@@ -551,14 +576,26 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					q[n] = 0;
 
 				sg_set_buf(&sg[k], q, template[i].tap[k]);
+
+				if (diff_dst) {
+					q = xoutbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
+
+					memset(q, 0, template[i].tap[k]);
+					if (offset_in_page(q) + n < PAGE_SIZE)
+						q[n] = 0;
+
+					sg_set_buf(&sgout[k], q,
+						   template[i].tap[k]);
+				}
+
 				temp += template[i].tap[k];
 			}
 
 			ret = crypto_aead_setauthsize(tfm, authsize);
 			if (ret) {
-				printk(KERN_ERR "alg: aead: Failed to set "
-				       "authsize to %u on chunk test %d for "
-				       "%s\n", authsize, j, algo);
+				pr_err("alg: aead%s: Failed to set authsize to %u on chunk test %d for %s\n",
+				       d, authsize, j, algo);
 				goto out;
 			}
 
@@ -571,6 +608,9 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 				}
 
 				sg[k - 1].length += authsize;
+
+				if (diff_dst)
+					sgout[k - 1].length += authsize;
 			}
 
 			sg_init_table(asg, template[i].anp);
@@ -588,7 +628,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 				temp += template[i].atap[k];
 			}
 
-			aead_request_set_crypt(req, sg, sg,
+			aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
 					       template[i].ilen,
 					       iv);
 
@@ -602,10 +642,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 			case 0:
 				if (template[i].novrfy) {
 					/* verification was supposed to fail */
-					printk(KERN_ERR "alg: aead: %s failed "
-					       "on chunk test %d for %s: ret "
-					       "was 0, expected -EBADMSG\n",
-					       e, j, algo);
+					pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret was 0, expected -EBADMSG\n",
+					       d, e, j, algo);
 					/* so really, we got a bad message */
 					ret = -EBADMSG;
 					goto out;
@@ -625,32 +663,35 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 					continue;
 				/* fall through */
 			default:
-				printk(KERN_ERR "alg: aead: %s failed on "
-				       "chunk test %d for %s: ret=%d\n", e, j,
-				       algo, -ret);
+				pr_err("alg: aead%s: %s failed on chunk test %d for %s: ret=%d\n",
+				       d, e, j, algo, -ret);
 				goto out;
 			}
 
 			ret = -EINVAL;
 			for (k = 0, temp = 0; k < template[i].np; k++) {
-				q = xbuf[IDX[k] >> PAGE_SHIFT] +
-				    offset_in_page(IDX[k]);
+				if (diff_dst)
+					q = xoutbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
+				else
+					q = xbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
 
 				n = template[i].tap[k];
 				if (k == template[i].np - 1)
 					n += enc ? authsize : -authsize;
 
 				if (memcmp(q, template[i].result + temp, n)) {
-					printk(KERN_ERR "alg: aead: Chunk "
-					       "test %d failed on %s at page "
-					       "%u for %s\n", j, e, k, algo);
+					pr_err("alg: aead%s: Chunk test %d failed on %s at page %u for %s\n",
+					       d, j, e, k, algo);
 					hexdump(q, n);
 					goto out;
 				}
 
 				q += n;
 				if (k == template[i].np - 1 && !enc) {
-					if (memcmp(q, template[i].input +
+					if (!diff_dst &&
+						memcmp(q, template[i].input +
 						      temp + n, authsize))
 						n = authsize;
 					else
@@ -661,11 +702,8 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 						;
 				}
 				if (n) {
-					printk(KERN_ERR "alg: aead: Result "
-					       "buffer corruption in chunk "
-					       "test %d on %s at page %u for "
-					       "%s: %u bytes:\n", j, e, k,
-					       algo, n);
+					pr_err("alg: aead%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n",
+					       d, j, e, k, algo, n);
 					hexdump(q, n);
 					goto out;
 				}
@@ -679,6 +717,11 @@ static int test_aead(struct crypto_aead *tfm, int enc,
 
 out:
 	aead_request_free(req);
+	kfree(sg);
+out_nosg:
+	if (diff_dst)
+		testmgr_free_buf(xoutbuf);
+out_nooutbuf:
 	testmgr_free_buf(axbuf);
 out_noaxbuf:
 	testmgr_free_buf(xbuf);
@@ -686,6 +729,20 @@ out_noxbuf:
 	return ret;
 }
 
+static int test_aead(struct crypto_aead *tfm, int enc,
+		     struct aead_testvec *template, unsigned int tcount)
+{
+	int ret;
+
+	/* test 'dst == src' case */
+	ret = __test_aead(tfm, enc, template, tcount, false);
+	if (ret)
+		return ret;
+
+	/* test 'dst != src' case */
+	return __test_aead(tfm, enc, template, tcount, true);
+}
+
 static int test_cipher(struct crypto_cipher *tfm, int enc,
 		       struct cipher_testvec *template, unsigned int tcount)
 {
@@ -761,8 +818,9 @@ out_nobuf:
 	return ret;
 }
 
-static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
-			 struct cipher_testvec *template, unsigned int tcount)
+static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
+			   struct cipher_testvec *template, unsigned int tcount,
+			   const bool diff_dst)
 {
 	const char *algo =
 		crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
@@ -770,16 +828,26 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 	char *q;
 	struct ablkcipher_request *req;
 	struct scatterlist sg[8];
-	const char *e;
+	struct scatterlist sgout[8];
+	const char *e, *d;
 	struct tcrypt_result result;
 	void *data;
 	char iv[MAX_IVLEN];
 	char *xbuf[XBUFSIZE];
+	char *xoutbuf[XBUFSIZE];
 	int ret = -ENOMEM;
 
 	if (testmgr_alloc_buf(xbuf))
 		goto out_nobuf;
 
+	if (diff_dst && testmgr_alloc_buf(xoutbuf))
+		goto out_nooutbuf;
+
+	if (diff_dst)
+		d = "-ddst";
+	else
+		d = "";
+
 	if (enc == ENCRYPT)
 	        e = "encryption";
 	else
@@ -789,8 +857,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 
 	req = ablkcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
-		printk(KERN_ERR "alg: skcipher: Failed to allocate request "
-		       "for %s\n", algo);
+		pr_err("alg: skcipher%s: Failed to allocate request for %s\n",
+		       d, algo);
 		goto out;
 	}
 
@@ -804,7 +872,7 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 		else
 			memset(iv, 0, MAX_IVLEN);
 
-		if (!(template[i].np)) {
+		if (!(template[i].np) || (template[i].also_non_np)) {
 			j++;
 
 			ret = -EINVAL;
@@ -822,16 +890,21 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 			ret = crypto_ablkcipher_setkey(tfm, template[i].key,
 						       template[i].klen);
 			if (!ret == template[i].fail) {
-				printk(KERN_ERR "alg: skcipher: setkey failed "
-				       "on test %d for %s: flags=%x\n", j,
-				       algo, crypto_ablkcipher_get_flags(tfm));
+				pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n",
+				       d, j, algo,
+				       crypto_ablkcipher_get_flags(tfm));
 				goto out;
 			} else if (ret)
 				continue;
 
 			sg_init_one(&sg[0], data, template[i].ilen);
+			if (diff_dst) {
+				data = xoutbuf[0];
+				sg_init_one(&sgout[0], data, template[i].ilen);
+			}
 
-			ablkcipher_request_set_crypt(req, sg, sg,
+			ablkcipher_request_set_crypt(req, sg,
+						     (diff_dst) ? sgout : sg,
 						     template[i].ilen, iv);
 			ret = enc ?
 				crypto_ablkcipher_encrypt(req) :
@@ -850,16 +923,15 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 				}
 				/* fall through */
 			default:
-				printk(KERN_ERR "alg: skcipher: %s failed on "
-				       "test %d for %s: ret=%d\n", e, j, algo,
-				       -ret);
+				pr_err("alg: skcipher%s: %s failed on test %d for %s: ret=%d\n",
+				       d, e, j, algo, -ret);
 				goto out;
 			}
 
 			q = data;
 			if (memcmp(q, template[i].result, template[i].rlen)) {
-				printk(KERN_ERR "alg: skcipher: Test %d "
-				       "failed on %s for %s\n", j, e, algo);
+				pr_err("alg: skcipher%s: Test %d failed on %s for %s\n",
+				       d, j, e, algo);
 				hexdump(q, template[i].rlen);
 				ret = -EINVAL;
 				goto out;
@@ -886,9 +958,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 			ret = crypto_ablkcipher_setkey(tfm, template[i].key,
 						       template[i].klen);
 			if (!ret == template[i].fail) {
-				printk(KERN_ERR "alg: skcipher: setkey failed "
-				       "on chunk test %d for %s: flags=%x\n",
-				       j, algo,
+				pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n",
+				       d, j, algo,
 				       crypto_ablkcipher_get_flags(tfm));
 				goto out;
 			} else if (ret)
@@ -897,6 +968,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 			temp = 0;
 			ret = -EINVAL;
 			sg_init_table(sg, template[i].np);
+			if (diff_dst)
+				sg_init_table(sgout, template[i].np);
 			for (k = 0; k < template[i].np; k++) {
 				if (WARN_ON(offset_in_page(IDX[k]) +
 					    template[i].tap[k] > PAGE_SIZE))
@@ -913,11 +986,24 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 					q[template[i].tap[k]] = 0;
 
 				sg_set_buf(&sg[k], q, template[i].tap[k]);
+				if (diff_dst) {
+					q = xoutbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
+
+					sg_set_buf(&sgout[k], q,
+						   template[i].tap[k]);
+
+					memset(q, 0, template[i].tap[k]);
+					if (offset_in_page(q) +
+					    template[i].tap[k] < PAGE_SIZE)
+						q[template[i].tap[k]] = 0;
+				}
 
 				temp += template[i].tap[k];
 			}
 
-			ablkcipher_request_set_crypt(req, sg, sg,
+			ablkcipher_request_set_crypt(req, sg,
+					(diff_dst) ? sgout : sg,
 					template[i].ilen, iv);
 
 			ret = enc ?
@@ -937,23 +1023,25 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 				}
 				/* fall through */
 			default:
-				printk(KERN_ERR "alg: skcipher: %s failed on "
-				       "chunk test %d for %s: ret=%d\n", e, j,
-				       algo, -ret);
+				pr_err("alg: skcipher%s: %s failed on chunk test %d for %s: ret=%d\n",
+				       d, e, j, algo, -ret);
 				goto out;
 			}
 
 			temp = 0;
 			ret = -EINVAL;
 			for (k = 0; k < template[i].np; k++) {
-				q = xbuf[IDX[k] >> PAGE_SHIFT] +
-				    offset_in_page(IDX[k]);
+				if (diff_dst)
+					q = xoutbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
+				else
+					q = xbuf[IDX[k] >> PAGE_SHIFT] +
+					    offset_in_page(IDX[k]);
 
 				if (memcmp(q, template[i].result + temp,
 					   template[i].tap[k])) {
-					printk(KERN_ERR "alg: skcipher: Chunk "
-					       "test %d failed on %s at page "
-					       "%u for %s\n", j, e, k, algo);
+					pr_err("alg: skcipher%s: Chunk test %d failed on %s at page %u for %s\n",
+					       d, j, e, k, algo);
 					hexdump(q, template[i].tap[k]);
 					goto out;
 				}
@@ -962,11 +1050,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 				for (n = 0; offset_in_page(q + n) && q[n]; n++)
 					;
 				if (n) {
-					printk(KERN_ERR "alg: skcipher: "
-					       "Result buffer corruption in "
-					       "chunk test %d on %s at page "
-					       "%u for %s: %u bytes:\n", j, e,
-					       k, algo, n);
+					pr_err("alg: skcipher%s: Result buffer corruption in chunk test %d on %s at page %u for %s: %u bytes:\n",
+					       d, j, e, k, algo, n);
 					hexdump(q, n);
 					goto out;
 				}
@@ -979,11 +1064,28 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
 
 out:
 	ablkcipher_request_free(req);
+	if (diff_dst)
+		testmgr_free_buf(xoutbuf);
+out_nooutbuf:
 	testmgr_free_buf(xbuf);
 out_nobuf:
 	return ret;
 }
 
+static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
+			 struct cipher_testvec *template, unsigned int tcount)
+{
+	int ret;
+
+	/* test 'dst == src' case */
+	ret = __test_skcipher(tfm, enc, template, tcount, false);
+	if (ret)
+		return ret;
+
+	/* test 'dst != src' case */
+	return __test_skcipher(tfm, enc, template, tcount, true);
+}
+
 static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
 		     struct comp_testvec *dtemplate, int ctcount, int dtcount)
 {
@@ -1534,6 +1636,36 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "__cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "__cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
 		.alg = "__cbc-serpent-avx",
 		.test = alg_test_null,
 		.suite = {
@@ -1594,6 +1726,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "__driver-cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1655,6 +1817,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "__driver-ecb-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-serpent-avx",
 		.test = alg_test_null,
@@ -1817,6 +2009,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cbc(cast5)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast5_cbc_enc_tv_template,
+					.count = CAST5_CBC_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast5_cbc_dec_tv_template,
+					.count = CAST5_CBC_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
+		.alg = "cbc(cast6)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast6_cbc_enc_tv_template,
+					.count = CAST6_CBC_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast6_cbc_dec_tv_template,
+					.count = CAST6_CBC_DEC_TEST_VECTORS
+				}
+			}
+		}
 	}, {
 		.alg = "cbc(des)",
 		.test = alg_test_skcipher,
@@ -1936,6 +2158,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast5-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast6-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-serpent-avx)",
 		.test = alg_test_null,
@@ -2053,6 +2305,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "ctr(cast5)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast5_ctr_enc_tv_template,
+					.count = CAST5_CTR_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast5_ctr_dec_tv_template,
+					.count = CAST5_CTR_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
+		.alg = "ctr(cast6)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast6_ctr_enc_tv_template,
+					.count = CAST6_CTR_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast6_ctr_dec_tv_template,
+					.count = CAST6_CTR_DEC_TEST_VECTORS
+				}
+			}
+		}
 	}, {
 		.alg = "ctr(serpent)",
 		.test = alg_test_skcipher,
@@ -2529,6 +2811,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "lrw(cast6)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast6_lrw_enc_tv_template,
+					.count = CAST6_LRW_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast6_lrw_dec_tv_template,
+					.count = CAST6_LRW_DEC_TEST_VECTORS
+				}
+			}
+		}
 	}, {
 		.alg = "lrw(serpent)",
 		.test = alg_test_skcipher,
@@ -2881,6 +3178,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "xts(cast6)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = cast6_xts_enc_tv_template,
+					.count = CAST6_XTS_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = cast6_xts_dec_tv_template,
+					.count = CAST6_XTS_DEC_TEST_VECTORS
+				}
+			}
+		}
 	}, {
 		.alg = "xts(serpent)",
 		.test = alg_test_skcipher,

File diff suppressed because it is too large
+ 973 - 100
crypto/testmgr.h


+ 6 - 32
crypto/tgr192.c

@@ -628,7 +628,7 @@ static int tgr128_final(struct shash_desc *desc, u8 * out)
 	return 0;
 }
 
-static struct shash_alg tgr192 = {
+static struct shash_alg tgr_algs[3] = { {
 	.digestsize	=	TGR192_DIGEST_SIZE,
 	.init		=	tgr192_init,
 	.update		=	tgr192_update,
@@ -640,9 +640,7 @@ static struct shash_alg tgr192 = {
 		.cra_blocksize	=	TGR192_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg tgr160 = {
+}, {
 	.digestsize	=	TGR160_DIGEST_SIZE,
 	.init		=	tgr192_init,
 	.update		=	tgr192_update,
@@ -654,9 +652,7 @@ static struct shash_alg tgr160 = {
 		.cra_blocksize	=	TGR192_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg tgr128 = {
+}, {
 	.digestsize	=	TGR128_DIGEST_SIZE,
 	.init		=	tgr192_init,
 	.update		=	tgr192_update,
@@ -668,38 +664,16 @@ static struct shash_alg tgr128 = {
 		.cra_blocksize	=	TGR192_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
+} };
 
 static int __init tgr192_mod_init(void)
 {
-	int ret = 0;
-
-	ret = crypto_register_shash(&tgr192);
-
-	if (ret < 0) {
-		goto out;
-	}
-
-	ret = crypto_register_shash(&tgr160);
-	if (ret < 0) {
-		crypto_unregister_shash(&tgr192);
-		goto out;
-	}
-
-	ret = crypto_register_shash(&tgr128);
-	if (ret < 0) {
-		crypto_unregister_shash(&tgr192);
-		crypto_unregister_shash(&tgr160);
-	}
-      out:
-	return ret;
+	return crypto_register_shashes(tgr_algs, ARRAY_SIZE(tgr_algs));
 }
 
 static void __exit tgr192_mod_fini(void)
 {
-	crypto_unregister_shash(&tgr192);
-	crypto_unregister_shash(&tgr160);
-	crypto_unregister_shash(&tgr128);
+	crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs));
 }
 
 MODULE_ALIAS("tgr160");

+ 0 - 1
crypto/twofish_generic.c

@@ -188,7 +188,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize        =   sizeof(struct twofish_ctx),
 	.cra_alignmask      =	3,
 	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
 	.cra_u              =   { .cipher = {
 	.cia_min_keysize    =   TF_MIN_KEY_SIZE,
 	.cia_max_keysize    =   TF_MAX_KEY_SIZE,

+ 5 - 5
crypto/vmac.c

@@ -38,11 +38,11 @@
  * Constants and masks
  */
 #define UINT64_C(x) x##ULL
-const u64 p64   = UINT64_C(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
-const u64 m62   = UINT64_C(0x3fffffffffffffff);  /* 62-bit mask       */
-const u64 m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
-const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
-const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
+static const u64 p64   = UINT64_C(0xfffffffffffffeff);	/* 2^64 - 257 prime  */
+static const u64 m62   = UINT64_C(0x3fffffffffffffff);	/* 62-bit mask       */
+static const u64 m63   = UINT64_C(0x7fffffffffffffff);	/* 63-bit mask       */
+static const u64 m64   = UINT64_C(0xffffffffffffffff);	/* 64-bit mask       */
+static const u64 mpoly = UINT64_C(0x1fffffff1fffffff);	/* Poly key mask     */
 
 #define pe64_to_cpup le64_to_cpup		/* Prefer little endian */
 

+ 6 - 33
crypto/wp512.c

@@ -1119,7 +1119,7 @@ static int wp256_final(struct shash_desc *desc, u8 *out)
 	return 0;
 }
 
-static struct shash_alg wp512 = {
+static struct shash_alg wp_algs[3] = { {
 	.digestsize	=	WP512_DIGEST_SIZE,
 	.init		=	wp512_init,
 	.update		=	wp512_update,
@@ -1131,9 +1131,7 @@ static struct shash_alg wp512 = {
 		.cra_blocksize	=	WP512_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg wp384 = {
+}, {
 	.digestsize	=	WP384_DIGEST_SIZE,
 	.init		=	wp512_init,
 	.update		=	wp512_update,
@@ -1145,9 +1143,7 @@ static struct shash_alg wp384 = {
 		.cra_blocksize	=	WP512_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
-
-static struct shash_alg wp256 = {
+}, {
 	.digestsize	=	WP256_DIGEST_SIZE,
 	.init		=	wp512_init,
 	.update		=	wp512_update,
@@ -1159,39 +1155,16 @@ static struct shash_alg wp256 = {
 		.cra_blocksize	=	WP512_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
-};
+} };
 
 static int __init wp512_mod_init(void)
 {
-	int ret = 0;
-
-	ret = crypto_register_shash(&wp512);
-
-	if (ret < 0)
-		goto out;
-
-	ret = crypto_register_shash(&wp384);
-	if (ret < 0)
-	{
-		crypto_unregister_shash(&wp512);
-		goto out;
-	}
-
-	ret = crypto_register_shash(&wp256);
-	if (ret < 0)
-	{
-		crypto_unregister_shash(&wp512);
-		crypto_unregister_shash(&wp384);
-	}
-out:
-	return ret;
+	return crypto_register_shashes(wp_algs, ARRAY_SIZE(wp_algs));
 }
 
 static void __exit wp512_mod_fini(void)
 {
-	crypto_unregister_shash(&wp512);
-	crypto_unregister_shash(&wp384);
-	crypto_unregister_shash(&wp256);
+	crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs));
 }
 
 MODULE_ALIAS("wp384");

+ 51 - 57
drivers/char/hw_random/mxc-rnga.c

@@ -59,16 +59,21 @@
 #define RNGA_STATUS_LAST_READ_STATUS	0x00000002
 #define RNGA_STATUS_SECURITY_VIOLATION	0x00000001
 
-static struct platform_device *rng_dev;
+struct mxc_rng {
+	struct device *dev;
+	struct hwrng rng;
+	void __iomem *mem;
+	struct clk *clk;
+};
 
 static int mxc_rnga_data_present(struct hwrng *rng, int wait)
 {
-	void __iomem *rng_base = (void __iomem *)rng->priv;
 	int i;
+	struct mxc_rng *mxc_rng = container_of(rng, struct mxc_rng, rng);
 
 	for (i = 0; i < 20; i++) {
 		/* how many random numbers are in FIFO? [0-16] */
-		int level = (__raw_readl(rng_base + RNGA_STATUS) &
+		int level = (__raw_readl(mxc_rng->mem + RNGA_STATUS) &
 				RNGA_STATUS_LEVEL_MASK) >> 8;
 		if (level || !wait)
 			return !!level;
@@ -81,20 +86,20 @@ static int mxc_rnga_data_read(struct hwrng *rng, u32 * data)
 {
 	int err;
 	u32 ctrl;
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct mxc_rng *mxc_rng = container_of(rng, struct mxc_rng, rng);
 
 	/* retrieve a random number from FIFO */
-	*data = __raw_readl(rng_base + RNGA_OUTPUT_FIFO);
+	*data = __raw_readl(mxc_rng->mem + RNGA_OUTPUT_FIFO);
 
 	/* some error while reading this random number? */
-	err = __raw_readl(rng_base + RNGA_STATUS) & RNGA_STATUS_ERROR_INT;
+	err = __raw_readl(mxc_rng->mem + RNGA_STATUS) & RNGA_STATUS_ERROR_INT;
 
 	/* if error: clear error interrupt, but doesn't return random number */
 	if (err) {
-		dev_dbg(&rng_dev->dev, "Error while reading random number!\n");
-		ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+		dev_dbg(mxc_rng->dev, "Error while reading random number!\n");
+		ctrl = __raw_readl(mxc_rng->mem + RNGA_CONTROL);
 		__raw_writel(ctrl | RNGA_CONTROL_CLEAR_INT,
-					rng_base + RNGA_CONTROL);
+					mxc_rng->mem + RNGA_CONTROL);
 		return 0;
 	} else
 		return 4;
@@ -103,22 +108,22 @@ static int mxc_rnga_data_read(struct hwrng *rng, u32 * data)
 static int mxc_rnga_init(struct hwrng *rng)
 {
 	u32 ctrl, osc;
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct mxc_rng *mxc_rng = container_of(rng, struct mxc_rng, rng);
 
 	/* wake up */
-	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
-	__raw_writel(ctrl & ~RNGA_CONTROL_SLEEP, rng_base + RNGA_CONTROL);
+	ctrl = __raw_readl(mxc_rng->mem + RNGA_CONTROL);
+	__raw_writel(ctrl & ~RNGA_CONTROL_SLEEP, mxc_rng->mem + RNGA_CONTROL);
 
 	/* verify if oscillator is working */
-	osc = __raw_readl(rng_base + RNGA_STATUS);
+	osc = __raw_readl(mxc_rng->mem + RNGA_STATUS);
 	if (osc & RNGA_STATUS_OSC_DEAD) {
-		dev_err(&rng_dev->dev, "RNGA Oscillator is dead!\n");
+		dev_err(mxc_rng->dev, "RNGA Oscillator is dead!\n");
 		return -ENODEV;
 	}
 
 	/* go running */
-	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
-	__raw_writel(ctrl | RNGA_CONTROL_GO, rng_base + RNGA_CONTROL);
+	ctrl = __raw_readl(mxc_rng->mem + RNGA_CONTROL);
+	__raw_writel(ctrl | RNGA_CONTROL_GO, mxc_rng->mem + RNGA_CONTROL);
 
 	return 0;
 }
@@ -126,40 +131,40 @@ static int mxc_rnga_init(struct hwrng *rng)
 static void mxc_rnga_cleanup(struct hwrng *rng)
 {
 	u32 ctrl;
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct mxc_rng *mxc_rng = container_of(rng, struct mxc_rng, rng);
 
-	ctrl = __raw_readl(rng_base + RNGA_CONTROL);
+	ctrl = __raw_readl(mxc_rng->mem + RNGA_CONTROL);
 
 	/* stop rnga */
-	__raw_writel(ctrl & ~RNGA_CONTROL_GO, rng_base + RNGA_CONTROL);
+	__raw_writel(ctrl & ~RNGA_CONTROL_GO, mxc_rng->mem + RNGA_CONTROL);
 }
 
-static struct hwrng mxc_rnga = {
-	.name = "mxc-rnga",
-	.init = mxc_rnga_init,
-	.cleanup = mxc_rnga_cleanup,
-	.data_present = mxc_rnga_data_present,
-	.data_read = mxc_rnga_data_read
-};
-
 static int __init mxc_rnga_probe(struct platform_device *pdev)
 {
 	int err = -ENODEV;
-	struct clk *clk;
 	struct resource *res, *mem;
-	void __iomem *rng_base = NULL;
-
-	if (rng_dev)
-		return -EBUSY;
-
-	clk = clk_get(&pdev->dev, "rng");
-	if (IS_ERR(clk)) {
+	struct mxc_rng *mxc_rng;
+
+	mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng),
+					GFP_KERNEL);
+	if (!mxc_rng)
+		return -ENOMEM;
+
+	mxc_rng->dev = &pdev->dev;
+	mxc_rng->rng.name = "mxc-rnga";
+	mxc_rng->rng.init = mxc_rnga_init;
+	mxc_rng->rng.cleanup = mxc_rnga_cleanup,
+	mxc_rng->rng.data_present = mxc_rnga_data_present,
+	mxc_rng->rng.data_read = mxc_rnga_data_read,
+
+	mxc_rng->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxc_rng->clk)) {
 		dev_err(&pdev->dev, "Could not get rng_clk!\n");
-		err = PTR_ERR(clk);
+		err = PTR_ERR(mxc_rng->clk);
 		goto out;
 	}
 
-	clk_enable(clk);
+	clk_prepare_enable(mxc_rng->clk);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -173,36 +178,27 @@ static int __init mxc_rnga_probe(struct platform_device *pdev)
 		goto err_region;
 	}
 
-	rng_base = ioremap(res->start, resource_size(res));
-	if (!rng_base) {
+	mxc_rng->mem = ioremap(res->start, resource_size(res));
+	if (!mxc_rng->mem) {
 		err = -ENOMEM;
 		goto err_ioremap;
 	}
 
-	mxc_rnga.priv = (unsigned long)rng_base;
-
-	err = hwrng_register(&mxc_rnga);
+	err = hwrng_register(&mxc_rng->rng);
 	if (err) {
 		dev_err(&pdev->dev, "MXC RNGA registering failed (%d)\n", err);
-		goto err_register;
+		goto err_ioremap;
 	}
 
-	rng_dev = pdev;
-
 	dev_info(&pdev->dev, "MXC RNGA Registered.\n");
 
 	return 0;
 
-err_register:
-	iounmap(rng_base);
-	rng_base = NULL;
-
 err_ioremap:
 	release_mem_region(res->start, resource_size(res));
 
 err_region:
-	clk_disable(clk);
-	clk_put(clk);
+	clk_disable_unprepare(mxc_rng->clk);
 
 out:
 	return err;
@@ -211,17 +207,15 @@ out:
 static int __exit mxc_rnga_remove(struct platform_device *pdev)
 {
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	void __iomem *rng_base = (void __iomem *)mxc_rnga.priv;
-	struct clk *clk = clk_get(&pdev->dev, "rng");
+	struct mxc_rng *mxc_rng = platform_get_drvdata(pdev);
 
-	hwrng_unregister(&mxc_rnga);
+	hwrng_unregister(&mxc_rng->rng);
 
-	iounmap(rng_base);
+	iounmap(mxc_rng->mem);
 
 	release_mem_region(res->start, resource_size(res));
 
-	clk_disable(clk);
-	clk_put(clk);
+	clk_disable_unprepare(mxc_rng->clk);
 
 	return 0;
 }

+ 5 - 12
drivers/char/hw_random/octeon-rng.c

@@ -75,42 +75,35 @@ static int __devinit octeon_rng_probe(struct platform_device *pdev)
 
 	res_ports = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res_ports)
-		goto err_ports;
+		return -ENOENT;
 
 	res_result = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (!res_result)
-		goto err_ports;
+		return -ENOENT;
 
 
 	rng->control_status = devm_ioremap_nocache(&pdev->dev,
 						   res_ports->start,
 						   sizeof(u64));
 	if (!rng->control_status)
-		goto err_ports;
+		return -ENOENT;
 
 	rng->result = devm_ioremap_nocache(&pdev->dev,
 					   res_result->start,
 					   sizeof(u64));
 	if (!rng->result)
-		goto err_r;
+		return -ENOENT;
 
 	rng->ops = ops;
 
 	dev_set_drvdata(&pdev->dev, &rng->ops);
 	ret = hwrng_register(&rng->ops);
 	if (ret)
-		goto err;
+		return -ENOENT;
 
 	dev_info(&pdev->dev, "Octeon Random Number Generator\n");
 
 	return 0;
-err:
-	devm_iounmap(&pdev->dev, rng->control_status);
-err_r:
-	devm_iounmap(&pdev->dev, rng->result);
-err_ports:
-	devm_kfree(&pdev->dev, rng);
-	return -ENOENT;
 }
 
 static int __exit octeon_rng_remove(struct platform_device *pdev)

+ 8 - 14
drivers/crypto/Kconfig

@@ -298,21 +298,15 @@ config CRYPTO_DEV_TEGRA_AES
 	  will be called tegra-aes.
 
 config CRYPTO_DEV_NX
-	tristate "Support for Power7+ in-Nest cryptographic acceleration"
+	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
 	depends on PPC64 && IBMVIO
-	select CRYPTO_AES
-	select CRYPTO_CBC
-	select CRYPTO_ECB
-	select CRYPTO_CCM
-	select CRYPTO_GCM
-	select CRYPTO_AUTHENC
-	select CRYPTO_XCBC
-	select CRYPTO_SHA256
-	select CRYPTO_SHA512
+	default n
 	help
-	  Support for Power7+ in-Nest cryptographic acceleration. This
-	  module supports acceleration for AES and SHA2 algorithms. If you
-	  choose 'M' here, this module will be called nx_crypto.
+	  Support for Power7+ in-Nest cryptographic acceleration.
+
+if CRYPTO_DEV_NX
+	source "drivers/crypto/nx/Kconfig"
+endif
 
 config CRYPTO_DEV_UX500
 	tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration"
@@ -340,7 +334,7 @@ config CRYPTO_DEV_ATMEL_AES
 	select CRYPTO_AES
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
-	select CONFIG_AT_HDMAC
+	select AT_HDMAC
 	help
 	  Some Atmel processors have AES hw accelerator.
 	  Select this if you want to use the Atmel module for

+ 1 - 0
drivers/crypto/amcc/crypto4xx_core.c

@@ -1226,6 +1226,7 @@ static int __init crypto4xx_probe(struct platform_device *ofdev)
 	core_dev->dev->ce_base = of_iomap(ofdev->dev.of_node, 0);
 	if (!core_dev->dev->ce_base) {
 		dev_err(dev, "failed to of_iomap\n");
+		rc = -ENOMEM;
 		goto err_iomap;
 	}
 

+ 0 - 7
drivers/crypto/atmel-aes.c

@@ -24,15 +24,10 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
@@ -1017,7 +1012,6 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 	int err, i, j;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
-		INIT_LIST_HEAD(&aes_algs[i].cra_list);
 		err = crypto_register_alg(&aes_algs[i]);
 		if (err)
 			goto err_aes_algs;
@@ -1026,7 +1020,6 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
 	atmel_aes_hw_version_init(dd);
 
 	if (dd->hw_version >= 0x130) {
-		INIT_LIST_HEAD(&aes_cfb64_alg[0].cra_list);
 		err = crypto_register_alg(&aes_cfb64_alg[0]);
 		if (err)
 			goto err_aes_cfb64_alg;

+ 0 - 5
drivers/crypto/atmel-sha.c

@@ -24,15 +24,10 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>

+ 0 - 6
drivers/crypto/atmel-tdes.c

@@ -24,15 +24,10 @@
 #include <linux/platform_device.h>
 
 #include <linux/device.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
@@ -1044,7 +1039,6 @@ static int atmel_tdes_register_algs(struct atmel_tdes_dev *dd)
 	int err, i, j;
 
 	for (i = 0; i < ARRAY_SIZE(tdes_algs); i++) {
-		INIT_LIST_HEAD(&tdes_algs[i].cra_list);
 		err = crypto_register_alg(&tdes_algs[i]);
 		if (err)
 			goto err_tdes_algs;

+ 37 - 14
drivers/crypto/caam/caamalg.c

@@ -205,7 +205,7 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
 {
 	u32 *key_jump_cmd;
 
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
@@ -224,7 +224,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	struct aead_tfm *tfm = &aead->base.crt_aead;
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline = 0;
+	bool keys_fit_inline = false;
 	u32 *key_jump_cmd, *jump_cmd;
 	u32 geniv, moveiv;
 	u32 *desc;
@@ -239,7 +239,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	if (DESC_AEAD_ENC_LEN + DESC_JOB_IO_LEN +
 	    ctx->split_key_pad_len + ctx->enckeylen <=
 	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = 1;
+		keys_fit_inline = true;
 
 	/* aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
@@ -297,12 +297,12 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	if (DESC_AEAD_DEC_LEN + DESC_JOB_IO_LEN +
 	    ctx->split_key_pad_len + ctx->enckeylen <=
 	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = 1;
+		keys_fit_inline = true;
 
 	desc = ctx->sh_desc_dec;
 
 	/* aead_decrypt shared descriptor */
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
@@ -365,7 +365,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	if (DESC_AEAD_GIVENC_LEN + DESC_JOB_IO_LEN +
 	    ctx->split_key_pad_len + ctx->enckeylen <=
 	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = 1;
+		keys_fit_inline = true;
 
 	/* aead_givencrypt shared descriptor */
 	desc = ctx->sh_desc_givenc;
@@ -564,7 +564,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 	/* ablkcipher_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
@@ -605,7 +605,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	/* ablkcipher_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 	/* Skip if already shared */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 				   JUMP_COND_SHRD);
@@ -1354,10 +1354,10 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 		contig &= ~GIV_SRC_CONTIG;
 	if (dst_nents || iv_dma + ivsize != sg_dma_address(req->dst))
 		contig &= ~GIV_DST_CONTIG;
-		if (unlikely(req->src != req->dst)) {
-			dst_nents = dst_nents ? : 1;
-			sec4_sg_len += 1;
-		}
+	if (unlikely(req->src != req->dst)) {
+		dst_nents = dst_nents ? : 1;
+		sec4_sg_len += 1;
+	}
 	if (!(contig & GIV_SRC_CONTIG)) {
 		assoc_nents = assoc_nents ? : 1;
 		src_nents = src_nents ? : 1;
@@ -1650,7 +1650,11 @@ struct caam_alg_template {
 };
 
 static struct caam_alg_template driver_algs[] = {
-	/* single-pass ipsec_esp descriptor */
+	/*
+	 * single-pass ipsec_esp descriptor
+	 * authencesn(*,*) is also registered, although not present
+	 * explicitly here.
+	 */
 	{
 		.name = "authenc(hmac(md5),cbc(aes))",
 		.driver_name = "authenc-hmac-md5-cbc-aes-caam",
@@ -2213,7 +2217,9 @@ static int __init caam_algapi_init(void)
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		/* TODO: check if h/w supports alg */
 		struct caam_crypto_alg *t_alg;
+		bool done = false;
 
+authencesn:
 		t_alg = caam_alg_alloc(ctrldev, &driver_algs[i]);
 		if (IS_ERR(t_alg)) {
 			err = PTR_ERR(t_alg);
@@ -2227,8 +2233,25 @@ static int __init caam_algapi_init(void)
 			dev_warn(ctrldev, "%s alg registration failed\n",
 				t_alg->crypto_alg.cra_driver_name);
 			kfree(t_alg);
-		} else
+		} else {
 			list_add_tail(&t_alg->entry, &priv->alg_list);
+			if (driver_algs[i].type == CRYPTO_ALG_TYPE_AEAD &&
+			    !memcmp(driver_algs[i].name, "authenc", 7) &&
+			    !done) {
+				char *name;
+
+				name = driver_algs[i].name;
+				memmove(name + 10, name + 7, strlen(name) - 7);
+				memcpy(name + 7, "esn", 3);
+
+				name = driver_algs[i].driver_name;
+				memmove(name + 10, name + 7, strlen(name) - 7);
+				memcpy(name + 7, "esn", 3);
+
+				done = true;
+				goto authencesn;
+			}
+		}
 	}
 	if (!list_empty(&priv->alg_list))
 		dev_info(ctrldev, "%s algorithms registered in /proc/crypto\n",

+ 16 - 6
drivers/crypto/caam/caamhash.c

@@ -225,7 +225,7 @@ static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
 {
 	u32 *key_jump_cmd;
 
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	if (ctx->split_key_len) {
 		/* Skip if already shared */
@@ -311,7 +311,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
 
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Import context from software */
 	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
@@ -430,6 +430,10 @@ static u32 hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 	int ret = 0;
 
 	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+	if (!desc) {
+		dev_err(jrdev, "unable to allocate key input memory\n");
+		return -ENOMEM;
+	}
 
 	init_job_desc(desc, 0);
 
@@ -1736,8 +1740,11 @@ static void __exit caam_algapi_hash_exit(void)
 	struct caam_hash_alg *t_alg, *n;
 
 	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
-	if (!dev_node)
-		return;
+	if (!dev_node) {
+		dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+		if (!dev_node)
+			return;
+	}
 
 	pdev = of_find_device_by_node(dev_node);
 	if (!pdev)
@@ -1812,8 +1819,11 @@ static int __init caam_algapi_hash_init(void)
 	int i = 0, err = 0;
 
 	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
-	if (!dev_node)
-		return -ENODEV;
+	if (!dev_node) {
+		dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+		if (!dev_node)
+			return -ENODEV;
+	}
 
 	pdev = of_find_device_by_node(dev_node);
 	if (!pdev)

+ 6 - 3
drivers/crypto/caam/caamrng.c

@@ -193,7 +193,7 @@ static inline void rng_create_sh_desc(struct caam_rng_ctx *ctx)
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc = ctx->sh_desc;
 
-	init_sh_desc(desc, HDR_SHARE_WAIT);
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Propagate errors from shared to job descriptor */
 	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
@@ -284,8 +284,11 @@ static int __init caam_rng_init(void)
 	struct caam_drv_private *priv;
 
 	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
-	if (!dev_node)
-		return -ENODEV;
+	if (!dev_node) {
+		dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+		if (!dev_node)
+			return -ENODEV;
+	}
 
 	pdev = of_find_device_by_node(dev_node);
 	if (!pdev)

+ 1 - 0
drivers/crypto/caam/compat.h

@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/debugfs.h>
 #include <linux/circ_buf.h>
+#include <linux/string.h>
 #include <net/xfrm.h>
 
 #include <crypto/algapi.h>

+ 3 - 3
drivers/crypto/caam/ctrl.c

@@ -129,7 +129,7 @@ static int instantiate_rng(struct device *jrdev)
 
 /*
  * By default, the TRNG runs for 200 clocks per sample;
- * 800 clocks per sample generates better entropy.
+ * 1600 clocks per sample generates better entropy.
  */
 static void kick_trng(struct platform_device *pdev)
 {
@@ -144,9 +144,9 @@ static void kick_trng(struct platform_device *pdev)
 
 	/* put RNG4 into program mode */
 	setbits32(&r4tst->rtmctl, RTMCTL_PRGM);
-	/* 800 clocks per sample */
+	/* 1600 clocks per sample */
 	val = rd_reg32(&r4tst->rtsdctl);
-	val = (val & ~RTSDCTL_ENT_DLY_MASK) | (800 << RTSDCTL_ENT_DLY_SHIFT);
+	val = (val & ~RTSDCTL_ENT_DLY_MASK) | (1600 << RTSDCTL_ENT_DLY_SHIFT);
 	wr_reg32(&r4tst->rtsdctl, val);
 	/* min. freq. count */
 	wr_reg32(&r4tst->rtfrqmin, 400);

+ 0 - 2
drivers/crypto/caam/error.c

@@ -77,10 +77,8 @@ static void report_ccb_status(u32 status, char *outstr)
 		"Not instantiated",
 		"Test instantiate",
 		"Prediction resistance",
-		"",
 		"Prediction resistance and test request",
 		"Uninstantiate",
-		"",
 		"Secure key generation",
 	};
 	u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >>

+ 4 - 0
drivers/crypto/caam/key_gen.c

@@ -54,6 +54,10 @@ u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 	int ret = 0;
 
 	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+	if (!desc) {
+		dev_err(jrdev, "unable to allocate key input memory\n");
+		return -ENOMEM;
+	}
 
 	init_job_desc(desc, 0);
 

+ 1 - 17
drivers/crypto/geode-aes.c

@@ -289,7 +289,6 @@ static struct crypto_alg geode_alg = {
 	.cra_blocksize		=	AES_MIN_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct geode_aes_op),
 	.cra_module			=	THIS_MODULE,
-	.cra_list			=	LIST_HEAD_INIT(geode_alg.cra_list),
 	.cra_u				=	{
 		.cipher	=	{
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
@@ -402,7 +401,6 @@ static struct crypto_alg geode_cbc_alg = {
 	.cra_alignmask		=	15,
 	.cra_type			=	&crypto_blkcipher_type,
 	.cra_module			=	THIS_MODULE,
-	.cra_list			=	LIST_HEAD_INIT(geode_cbc_alg.cra_list),
 	.cra_u				=	{
 		.blkcipher	=	{
 			.min_keysize	=	AES_MIN_KEY_SIZE,
@@ -489,7 +487,6 @@ static struct crypto_alg geode_ecb_alg = {
 	.cra_alignmask		=	15,
 	.cra_type			=	&crypto_blkcipher_type,
 	.cra_module			=	THIS_MODULE,
-	.cra_list			=	LIST_HEAD_INIT(geode_ecb_alg.cra_list),
 	.cra_u				=	{
 		.blkcipher	=	{
 			.min_keysize	=	AES_MIN_KEY_SIZE,
@@ -588,21 +585,8 @@ static struct pci_driver geode_aes_driver = {
 	.remove = __devexit_p(geode_aes_remove)
 };
 
-static int __init
-geode_aes_init(void)
-{
-	return pci_register_driver(&geode_aes_driver);
-}
-
-static void __exit
-geode_aes_exit(void)
-{
-	pci_unregister_driver(&geode_aes_driver);
-}
+module_pci_driver(geode_aes_driver);
 
 MODULE_AUTHOR("Advanced Micro Devices, Inc.");
 MODULE_DESCRIPTION("Geode LX Hardware AES driver");
 MODULE_LICENSE("GPL");
-
-module_init(geode_aes_init);
-module_exit(geode_aes_exit);

+ 4 - 1
drivers/crypto/hifn_795x.c

@@ -2611,14 +2611,17 @@ static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id
 		size = pci_resource_len(pdev, i);
 
 		dev->bar[i] = ioremap_nocache(addr, size);
-		if (!dev->bar[i])
+		if (!dev->bar[i]) {
+			err = -ENOMEM;
 			goto err_out_unmap_bars;
+		}
 	}
 
 	dev->desc_virt = pci_alloc_consistent(pdev, sizeof(struct hifn_dma),
 			&dev->desc_dma);
 	if (!dev->desc_virt) {
 		dprintk("Failed to allocate descriptor rings.\n");
+		err = -ENOMEM;
 		goto err_out_unmap_bars;
 	}
 	memset(dev->desc_virt, 0, sizeof(struct hifn_dma));

+ 26 - 0
drivers/crypto/nx/Kconfig

@@ -0,0 +1,26 @@
+config CRYPTO_DEV_NX_ENCRYPT
+	tristate "Encryption acceleration support"
+	depends on PPC64 && IBMVIO
+	default y
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_CCM
+	select CRYPTO_GCM
+	select CRYPTO_AUTHENC
+	select CRYPTO_XCBC
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	help
+	  Support for Power7+ in-Nest encryption acceleration. This
+	  module supports acceleration for AES and SHA2 algorithms. If you
+	  choose 'M' here, this module will be called nx_crypto.
+
+config CRYPTO_DEV_NX_COMPRESS
+	tristate "Compression acceleration support"
+	depends on PPC64 && IBMVIO
+	default y
+	help
+	  Support for Power7+ in-Nest compression acceleration. This
+	  module supports acceleration for AES and SHA2 algorithms. If you
+	  choose 'M' here, this module will be called nx_compress.

+ 4 - 1
drivers/crypto/nx/Makefile

@@ -1,4 +1,4 @@
-obj-$(CONFIG_CRYPTO_DEV_NX) += nx-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_NX_ENCRYPT) += nx-crypto.o
 nx-crypto-objs := nx.o \
 		  nx_debugfs.o \
 		  nx-aes-cbc.o \
@@ -9,3 +9,6 @@ nx-crypto-objs := nx.o \
 		  nx-aes-xcbc.o \
 		  nx-sha256.o \
 		  nx-sha512.o
+
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
+nx-compress-objs := nx-842.o

+ 1617 - 0
drivers/crypto/nx/nx-842.c

@@ -0,0 +1,1617 @@
+/*
+ * Driver for IBM Power 842 compression accelerator
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nx842.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
+#include <asm/pSeries_reconfig.h>
+#include <asm/vio.h>
+
+#include "nx_csbcpb.h" /* struct nx_csbcpb */
+
+#define MODULE_NAME "nx-compress"
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+
+#define SHIFT_4K 12
+#define SHIFT_64K 16
+#define SIZE_4K (1UL << SHIFT_4K)
+#define SIZE_64K (1UL << SHIFT_64K)
+
+/* IO buffer must be 128 byte aligned */
+#define IO_BUFFER_ALIGN 128
+
+struct nx842_header {
+	int blocks_nr; /* number of compressed blocks */
+	int offset; /* offset of the first block (from beginning of header) */
+	int sizes[0]; /* size of compressed blocks */
+};
+
+static inline int nx842_header_size(const struct nx842_header *hdr)
+{
+	return sizeof(struct nx842_header) +
+			hdr->blocks_nr * sizeof(hdr->sizes[0]);
+}
+
+/* Macros for fields within nx_csbcpb */
+/* Check the valid bit within the csbcpb valid field */
+#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
+
+/* CE macros operate on the completion_extension field bits in the csbcpb.
+ * CE0 0=full completion, 1=partial completion
+ * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
+ * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
+#define NX842_CSBCPB_CE0(x)	(x & BIT_MASK(7))
+#define NX842_CSBCPB_CE1(x)	(x & BIT_MASK(6))
+#define NX842_CSBCPB_CE2(x)	(x & BIT_MASK(5))
+
+/* The NX unit accepts data only on 4K page boundaries */
+#define NX842_HW_PAGE_SHIFT	SHIFT_4K
+#define NX842_HW_PAGE_SIZE	(ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
+#define NX842_HW_PAGE_MASK	(~(NX842_HW_PAGE_SIZE-1))
+
+enum nx842_status {
+	UNAVAILABLE,
+	AVAILABLE
+};
+
+struct ibm_nx842_counters {
+	atomic64_t comp_complete;
+	atomic64_t comp_failed;
+	atomic64_t decomp_complete;
+	atomic64_t decomp_failed;
+	atomic64_t swdecomp;
+	atomic64_t comp_times[32];
+	atomic64_t decomp_times[32];
+};
+
+static struct nx842_devdata {
+	struct vio_dev *vdev;
+	struct device *dev;
+	struct ibm_nx842_counters *counters;
+	unsigned int max_sg_len;
+	unsigned int max_sync_size;
+	unsigned int max_sync_sg;
+	enum nx842_status status;
+} __rcu *devdata;
+static DEFINE_SPINLOCK(devdata_mutex);
+
+#define NX842_COUNTER_INC(_x) \
+static inline void nx842_inc_##_x( \
+	const struct nx842_devdata *dev) { \
+	if (dev) \
+		atomic64_inc(&dev->counters->_x); \
+}
+NX842_COUNTER_INC(comp_complete);
+NX842_COUNTER_INC(comp_failed);
+NX842_COUNTER_INC(decomp_complete);
+NX842_COUNTER_INC(decomp_failed);
+NX842_COUNTER_INC(swdecomp);
+
+#define NX842_HIST_SLOTS 16
+
+static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
+{
+	int bucket = fls(time);
+
+	if (bucket)
+		bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
+
+	atomic64_inc(&times[bucket]);
+}
+
+/* NX unit operation flags */
+#define NX842_OP_COMPRESS	0x0
+#define NX842_OP_CRC		0x1
+#define NX842_OP_DECOMPRESS	0x2
+#define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
+#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
+#define NX842_OP_ASYNC		(1<<23)
+#define NX842_OP_NOTIFY		(1<<22)
+#define NX842_OP_NOTIFY_INT(x)	((x & 0xff)<<8)
+
+static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
+{
+	/* No use of DMA mappings within the driver. */
+	return 0;
+}
+
+struct nx842_slentry {
+	unsigned long ptr; /* Real address (use __pa()) */
+	unsigned long len;
+};
+
+/* pHyp scatterlist entry */
+struct nx842_scatterlist {
+	int entry_nr; /* number of slentries */
+	struct nx842_slentry *entries; /* ptr to array of slentries */
+};
+
+/* Does not include sizeof(entry_nr) in the size */
+static inline unsigned long nx842_get_scatterlist_size(
+				struct nx842_scatterlist *sl)
+{
+	return sl->entry_nr * sizeof(struct nx842_slentry);
+}
+
+static int nx842_build_scatterlist(unsigned long buf, int len,
+			struct nx842_scatterlist *sl)
+{
+	unsigned long nextpage;
+	struct nx842_slentry *entry;
+
+	sl->entry_nr = 0;
+
+	entry = sl->entries;
+	while (len) {
+		entry->ptr = __pa(buf);
+		nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
+		if (nextpage < buf + len) {
+			/* we aren't at the end yet */
+			if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
+				/* we are in the middle (or beginning) */
+				entry->len = NX842_HW_PAGE_SIZE;
+			else
+				/* we are at the beginning */
+				entry->len = nextpage - buf;
+		} else {
+			/* at the end */
+			entry->len = len;
+		}
+
+		len -= entry->len;
+		buf += entry->len;
+		sl->entry_nr++;
+		entry++;
+	}
+
+	return 0;
+}
+
+/*
+ * Working memory for software decompression
+ */
+struct sw842_fifo {
+	union {
+		char f8[256][8];
+		char f4[512][4];
+	};
+	char f2[256][2];
+	unsigned char f84_full;
+	unsigned char f2_full;
+	unsigned char f8_count;
+	unsigned char f2_count;
+	unsigned int f4_count;
+};
+
+/*
+ * Working memory for crypto API
+ */
+struct nx842_workmem {
+	char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
+	union {
+		/* hardware working memory */
+		struct {
+			/* scatterlist */
+			char slin[SIZE_4K];
+			char slout[SIZE_4K];
+			/* coprocessor status/parameter block */
+			struct nx_csbcpb csbcpb;
+		};
+		/* software working memory */
+		struct sw842_fifo swfifo; /* software decompression fifo */
+	};
+};
+
+int nx842_get_workmem_size(void)
+{
+	return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
+}
+EXPORT_SYMBOL_GPL(nx842_get_workmem_size);
+
+int nx842_get_workmem_size_aligned(void)
+{
+	return sizeof(struct nx842_workmem);
+}
+EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned);
+
+static int nx842_validate_result(struct device *dev,
+	struct cop_status_block *csb)
+{
+	/* The csb must be valid after returning from vio_h_cop_sync */
+	if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
+		dev_err(dev, "%s: cspcbp not valid upon completion.\n",
+				__func__);
+		dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
+				csb->valid,
+				csb->crb_seq_number,
+				csb->completion_code,
+				csb->completion_extension);
+		dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
+				csb->processed_byte_count,
+				(unsigned long)csb->address);
+		return -EIO;
+	}
+
+	/* Check return values from the hardware in the CSB */
+	switch (csb->completion_code) {
+	case 0:	/* Completed without error */
+		break;
+	case 64: /* Target bytes > Source bytes during compression */
+	case 13: /* Output buffer too small */
+		dev_dbg(dev, "%s: Compression output larger than input\n",
+					__func__);
+		return -ENOSPC;
+	case 66: /* Input data contains an illegal template field */
+	case 67: /* Template indicates data past the end of the input stream */
+		dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
+					__func__, csb->completion_code);
+		return -EINVAL;
+	default:
+		dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
+					__func__, csb->completion_code);
+		return -EIO;
+	}
+
+	/* Hardware sanity check */
+	if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
+		dev_err(dev, "%s: No error returned by hardware, but "
+				"data returned is unusable, contact support.\n"
+				"(Additional info: csbcbp->processed bytes "
+				"does not specify processed bytes for the "
+				"target buffer.)\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * nx842_compress - Compress data using the 842 algorithm
+ *
+ * Compression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is compressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * compressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer, must be page aligned
+ * @inlen: Length of input buffer, must be PAGE_SIZE
+ * @out: Pointer to output buffer
+ * @outlen: Length of output buffer
+ * @wrkmem: ptr to buffer for working memory, size determined by
+ *          nx842_get_workmem_size()
+ *
+ * Returns:
+ *   0		Success, output of length @outlen stored in the buffer at @out
+ *   -ENOMEM	Unable to allocate internal buffers
+ *   -ENOSPC	Output buffer is to small
+ *   -EMSGSIZE	XXX Difficult to describe this limitation
+ *   -EIO	Internal error
+ *   -ENODEV	Hardware unavailable
+ */
+int nx842_compress(const unsigned char *in, unsigned int inlen,
+		       unsigned char *out, unsigned int *outlen, void *wmem)
+{
+	struct nx842_header *hdr;
+	struct nx842_devdata *local_devdata;
+	struct device *dev = NULL;
+	struct nx842_workmem *workmem;
+	struct nx842_scatterlist slin, slout;
+	struct nx_csbcpb *csbcpb;
+	int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
+	unsigned long inbuf, outbuf, padding;
+	struct vio_pfo_op op = {
+		.done = NULL,
+		.handle = 0,
+		.timeout = 0,
+	};
+	unsigned long start_time = get_tb();
+
+	/*
+	 * Make sure input buffer is 64k page aligned.  This is assumed since
+	 * this driver is designed for page compression only (for now).  This
+	 * is very nice since we can now use direct DDE(s) for the input and
+	 * the alignment is guaranteed.
+	*/
+	inbuf = (unsigned long)in;
+	if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
+		return -EINVAL;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (!local_devdata || !local_devdata->dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+	max_sync_size = local_devdata->max_sync_size;
+	dev = local_devdata->dev;
+
+	/* Create the header */
+	hdr = (struct nx842_header *)out;
+	hdr->blocks_nr = PAGE_SIZE / max_sync_size;
+	hdrsize = nx842_header_size(hdr);
+	outbuf = (unsigned long)out + hdrsize;
+	bytesleft = *outlen - hdrsize;
+
+	/* Init scatterlist */
+	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
+		NX842_HW_PAGE_SIZE);
+	slin.entries = (struct nx842_slentry *)workmem->slin;
+	slout.entries = (struct nx842_slentry *)workmem->slout;
+
+	/* Init operation */
+	op.flags = NX842_OP_COMPRESS;
+	csbcpb = &workmem->csbcpb;
+	memset(csbcpb, 0, sizeof(*csbcpb));
+	op.csbcpb = __pa(csbcpb);
+	op.out = __pa(slout.entries);
+
+	for (i = 0; i < hdr->blocks_nr; i++) {
+		/*
+		 * Aligning the output blocks to 128 bytes does waste space,
+		 * but it prevents the need for bounce buffers and memory
+		 * copies.  It also simplifies the code a lot.  In the worst
+		 * case (64k page, 4k max_sync_size), you lose up to
+		 * (128*16)/64k = ~3% the compression factor. For 64k
+		 * max_sync_size, the loss would be at most 128/64k = ~0.2%.
+		 */
+		padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
+		outbuf += padding;
+		bytesleft -= padding;
+		if (i == 0)
+			/* save offset into first block in header */
+			hdr->offset = padding + hdrsize;
+
+		if (bytesleft <= 0) {
+			ret = -ENOSPC;
+			goto unlock;
+		}
+
+		/*
+		 * NOTE: If the default max_sync_size is changed from 4k
+		 * to 64k, remove the "likely" case below, since a
+		 * scatterlist will always be needed.
+		 */
+		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
+			/* Create direct DDE */
+			op.in = __pa(inbuf);
+			op.inlen = max_sync_size;
+
+		} else {
+			/* Create indirect DDE (scatterlist) */
+			nx842_build_scatterlist(inbuf, max_sync_size, &slin);
+			op.in = __pa(slin.entries);
+			op.inlen = -nx842_get_scatterlist_size(&slin);
+		}
+
+		/*
+		 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
+		 * DDE is required for the outbuf.
+		 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
+		 * also be page aligned (1 in 128/4k=32 chance) in order
+		 * to use a direct DDE.
+		 * This is unlikely, just use an indirect DDE always.
+		 */
+		nx842_build_scatterlist(outbuf,
+			min(bytesleft, max_sync_size), &slout);
+		/* op.out set before loop */
+		op.outlen = -nx842_get_scatterlist_size(&slout);
+
+		/* Send request to pHyp */
+		ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+		/* Check for pHyp error */
+		if (ret) {
+			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+				__func__, ret, op.hcall_err);
+			ret = -EIO;
+			goto unlock;
+		}
+
+		/* Check for hardware error */
+		ret = nx842_validate_result(dev, &csbcpb->csb);
+		if (ret && ret != -ENOSPC)
+			goto unlock;
+
+		/* Handle incompressible data */
+		if (unlikely(ret == -ENOSPC)) {
+			if (bytesleft < max_sync_size) {
+				/*
+				 * Not enough space left in the output buffer
+				 * to store uncompressed block
+				 */
+				goto unlock;
+			} else {
+				/* Store incompressible block */
+				memcpy((void *)outbuf, (void *)inbuf,
+					max_sync_size);
+				hdr->sizes[i] = -max_sync_size;
+				outbuf += max_sync_size;
+				bytesleft -= max_sync_size;
+				/* Reset ret, incompressible data handled */
+				ret = 0;
+			}
+		} else {
+			/* Normal case, compression was successful */
+			size = csbcpb->csb.processed_byte_count;
+			dev_dbg(dev, "%s: processed_bytes=%d\n",
+				__func__, size);
+			hdr->sizes[i] = size;
+			outbuf += size;
+			bytesleft -= size;
+		}
+
+		inbuf += max_sync_size;
+	}
+
+	*outlen = (unsigned int)(outbuf - (unsigned long)out);
+
+unlock:
+	if (ret)
+		nx842_inc_comp_failed(local_devdata);
+	else {
+		nx842_inc_comp_complete(local_devdata);
+		ibm_nx842_incr_hist(local_devdata->counters->comp_times,
+			(get_tb() - start_time) / tb_ticks_per_usec);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_compress);
+
+static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
+			const void *);
+
+/**
+ * nx842_decompress - Decompress data using the 842 algorithm
+ *
+ * Decompression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is decompressed and the result is stored in the
+ * provided output buffer.  The size allocated to the output buffer is
+ * provided by the caller of this function in @outlen.  Upon return from
+ * this function @outlen contains the length of the decompressed data.
+ * If there is an error then @outlen will be 0 and an error will be
+ * specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer, will use bounce buffer if not 128 byte
+ *      aligned
+ * @inlen: Length of input buffer
+ * @out: Pointer to output buffer, must be page aligned
+ * @outlen: Length of output buffer, must be PAGE_SIZE
+ * @wrkmem: ptr to buffer for working memory, size determined by
+ *          nx842_get_workmem_size()
+ *
+ * Returns:
+ *   0		Success, output of length @outlen stored in the buffer at @out
+ *   -ENODEV	Hardware decompression device is unavailable
+ *   -ENOMEM	Unable to allocate internal buffers
+ *   -ENOSPC	Output buffer is to small
+ *   -EINVAL	Bad input data encountered when attempting decompress
+ *   -EIO	Internal error
+ */
+int nx842_decompress(const unsigned char *in, unsigned int inlen,
+			 unsigned char *out, unsigned int *outlen, void *wmem)
+{
+	struct nx842_header *hdr;
+	struct nx842_devdata *local_devdata;
+	struct device *dev = NULL;
+	struct nx842_workmem *workmem;
+	struct nx842_scatterlist slin, slout;
+	struct nx_csbcpb *csbcpb;
+	int ret = 0, i, size, max_sync_size;
+	unsigned long inbuf, outbuf;
+	struct vio_pfo_op op = {
+		.done = NULL,
+		.handle = 0,
+		.timeout = 0,
+	};
+	unsigned long start_time = get_tb();
+
+	/* Ensure page alignment and size */
+	outbuf = (unsigned long)out;
+	if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
+		return -EINVAL;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (local_devdata)
+		dev = local_devdata->dev;
+
+	/* Get header */
+	hdr = (struct nx842_header *)in;
+
+	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
+		NX842_HW_PAGE_SIZE);
+
+	inbuf = (unsigned long)in + hdr->offset;
+	if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
+		/* Copy block(s) into bounce buffer for alignment */
+		memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
+		inbuf = (unsigned long)workmem->bounce;
+	}
+
+	/* Init scatterlist */
+	slin.entries = (struct nx842_slentry *)workmem->slin;
+	slout.entries = (struct nx842_slentry *)workmem->slout;
+
+	/* Init operation */
+	op.flags = NX842_OP_DECOMPRESS;
+	csbcpb = &workmem->csbcpb;
+	memset(csbcpb, 0, sizeof(*csbcpb));
+	op.csbcpb = __pa(csbcpb);
+
+	/*
+	 * max_sync_size may have changed since compression,
+	 * so we can't read it from the device info. We need
+	 * to derive it from hdr->blocks_nr.
+	 */
+	max_sync_size = PAGE_SIZE / hdr->blocks_nr;
+
+	for (i = 0; i < hdr->blocks_nr; i++) {
+		/* Skip padding */
+		inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
+
+		if (hdr->sizes[i] < 0) {
+			/* Negative sizes indicate uncompressed data blocks */
+			size = abs(hdr->sizes[i]);
+			memcpy((void *)outbuf, (void *)inbuf, size);
+			outbuf += size;
+			inbuf += size;
+			continue;
+		}
+
+		if (!dev)
+			goto sw;
+
+		/*
+		 * The better the compression, the more likely the "likely"
+		 * case becomes.
+		 */
+		if (likely((inbuf & NX842_HW_PAGE_MASK) ==
+			((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
+			/* Create direct DDE */
+			op.in = __pa(inbuf);
+			op.inlen = hdr->sizes[i];
+		} else {
+			/* Create indirect DDE (scatterlist) */
+			nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
+			op.in = __pa(slin.entries);
+			op.inlen = -nx842_get_scatterlist_size(&slin);
+		}
+
+		/*
+		 * NOTE: If the default max_sync_size is changed from 4k
+		 * to 64k, remove the "likely" case below, since a
+		 * scatterlist will always be needed.
+		 */
+		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
+			/* Create direct DDE */
+			op.out = __pa(outbuf);
+			op.outlen = max_sync_size;
+		} else {
+			/* Create indirect DDE (scatterlist) */
+			nx842_build_scatterlist(outbuf, max_sync_size, &slout);
+			op.out = __pa(slout.entries);
+			op.outlen = -nx842_get_scatterlist_size(&slout);
+		}
+
+		/* Send request to pHyp */
+		ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+		/* Check for pHyp error */
+		if (ret) {
+			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+				__func__, ret, op.hcall_err);
+			dev = NULL;
+			goto sw;
+		}
+
+		/* Check for hardware error */
+		ret = nx842_validate_result(dev, &csbcpb->csb);
+		if (ret) {
+			dev = NULL;
+			goto sw;
+		}
+
+		/* HW decompression success */
+		inbuf += hdr->sizes[i];
+		outbuf += csbcpb->csb.processed_byte_count;
+		continue;
+
+sw:
+		/* software decompression */
+		size = max_sync_size;
+		ret = sw842_decompress(
+			(unsigned char *)inbuf, hdr->sizes[i],
+			(unsigned char *)outbuf, &size, wmem);
+		if (ret)
+			pr_debug("%s: sw842_decompress failed with %d\n",
+				__func__, ret);
+
+		if (ret) {
+			if (ret != -ENOSPC && ret != -EINVAL &&
+					ret != -EMSGSIZE)
+				ret = -EIO;
+			goto unlock;
+		}
+
+		/* SW decompression success */
+		inbuf += hdr->sizes[i];
+		outbuf += size;
+	}
+
+	*outlen = (unsigned int)(outbuf - (unsigned long)out);
+
+unlock:
+	if (ret)
+		/* decompress fail */
+		nx842_inc_decomp_failed(local_devdata);
+	else {
+		if (!dev)
+			/* software decompress */
+			nx842_inc_swdecomp(local_devdata);
+		nx842_inc_decomp_complete(local_devdata);
+		ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
+			(get_tb() - start_time) / tb_ticks_per_usec);
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_decompress);
+
+/**
+ * nx842_OF_set_defaults -- Set default (disabled) values for devdata
+ *
+ * @devdata - struct nx842_devdata to update
+ *
+ * Returns:
+ *  0 on success
+ *  -ENOENT if @devdata ptr is NULL
+ */
+static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
+{
+	if (devdata) {
+		devdata->max_sync_size = 0;
+		devdata->max_sync_sg = 0;
+		devdata->max_sg_len = 0;
+		devdata->status = UNAVAILABLE;
+		return 0;
+	} else
+		return -ENOENT;
+}
+
+/**
+ * nx842_OF_upd_status -- Update the device info from OF status prop
+ *
+ * The status property indicates if the accelerator is enabled.  If the
+ * device is in the OF tree it indicates that the hardware is present.
+ * The status field indicates if the device is enabled when the status
+ * is 'okay'.  Otherwise the device driver will be disabled.
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 - Device is available
+ *  -EINVAL - Device is not available
+ */
+static int nx842_OF_upd_status(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const char *status = (const char *)prop->value;
+
+	if (!strncmp(status, "okay", (size_t)prop->length)) {
+		devdata->status = AVAILABLE;
+	} else {
+		dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
+				__func__, status);
+		devdata->status = UNAVAILABLE;
+	}
+
+	return ret;
+}
+
+/**
+ * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
+ *
+ * Definition of the 'ibm,max-sg-len' OF property:
+ *  This field indicates the maximum byte length of a scatter list
+ *  for the platform facility. It is a single cell encoded as with encode-int.
+ *
+ * Example:
+ *  # od -x ibm,max-sg-len
+ *  0000000 0000 0ff0
+ *
+ *  In this example, the maximum byte length of a scatter list is
+ *  0x0ff0 (4,080).
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const int *maxsglen = prop->value;
+
+	if (prop->length != sizeof(*maxsglen)) {
+		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
+		dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
+				prop->length, sizeof(*maxsglen));
+		ret = -EINVAL;
+	} else {
+		devdata->max_sg_len = (unsigned int)min(*maxsglen,
+				(int)NX842_HW_PAGE_SIZE);
+	}
+
+	return ret;
+}
+
+/**
+ * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
+ *
+ * Definition of the 'ibm,max-sync-cop' OF property:
+ *  Two series of cells.  The first series of cells represents the maximums
+ *  that can be synchronously compressed. The second series of cells
+ *  represents the maximums that can be synchronously decompressed.
+ *  1. The first cell in each series contains the count of the number of
+ *     data length, scatter list elements pairs that follow – each being
+ *     of the form
+ *    a. One cell data byte length
+ *    b. One cell total number of scatter list elements
+ *
+ * Example:
+ *  # od -x ibm,max-sync-cop
+ *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
+ *  0000020 0000 1000 0000 01fe
+ *
+ *  In this example, compression supports 0x1000 (4,096) data byte length
+ *  and 0x1fe (510) total scatter list elements.  Decompression supports
+ *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
+ *  elements.
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const struct maxsynccop_t {
+		int comp_elements;
+		int comp_data_limit;
+		int comp_sg_limit;
+		int decomp_elements;
+		int decomp_data_limit;
+		int decomp_sg_limit;
+	} *maxsynccop;
+
+	if (prop->length != sizeof(*maxsynccop)) {
+		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
+		dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
+				sizeof(*maxsynccop));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	maxsynccop = (const struct maxsynccop_t *)prop->value;
+
+	/* Use one limit rather than separate limits for compression and
+	 * decompression. Set a maximum for this so as not to exceed the
+	 * size that the header can support and round the value down to
+	 * the hardware page size (4K) */
+	devdata->max_sync_size =
+			(unsigned int)min(maxsynccop->comp_data_limit,
+					maxsynccop->decomp_data_limit);
+
+	devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
+					SIZE_64K);
+
+	if (devdata->max_sync_size < SIZE_4K) {
+		dev_err(devdata->dev, "%s: hardware max data size (%u) is "
+				"less than the driver minimum, unable to use "
+				"the hardware device\n",
+				__func__, devdata->max_sync_size);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
+						maxsynccop->decomp_sg_limit);
+	if (devdata->max_sync_sg < 1) {
+		dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
+				"less than the driver minimum, unable to use "
+				"the hardware device\n",
+				__func__, devdata->max_sync_sg);
+		ret = -EINVAL;
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+/**
+ *
+ * nx842_OF_upd -- Handle OF properties updates for the device.
+ *
+ * Set all properties from the OF tree.  Optionally, a new property
+ * can be provided by the @new_prop pointer to overwrite an existing value.
+ * The device will remain disabled until all values are valid, this function
+ * will return an error for updates unless all values are valid.
+ *
+ * @new_prop: If not NULL, this property is being updated.  If NULL, update
+ *  all properties from the current values in the OF tree.
+ *
+ * Returns:
+ *  0 - Success
+ *  -ENOMEM - Could not allocate memory for new devdata structure
+ *  -EINVAL - property value not found, new_prop is not a recognized
+ *	property for the device or property value is not valid.
+ *  -ENODEV - Device is not available
+ */
+static int nx842_OF_upd(struct property *new_prop)
+{
+	struct nx842_devdata *old_devdata = NULL;
+	struct nx842_devdata *new_devdata = NULL;
+	struct device_node *of_node = NULL;
+	struct property *status = NULL;
+	struct property *maxsglen = NULL;
+	struct property *maxsyncop = NULL;
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	if (old_devdata)
+		of_node = old_devdata->dev->of_node;
+
+	if (!old_devdata || !of_node) {
+		pr_err("%s: device is not available\n", __func__);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+		return -ENODEV;
+	}
+
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+	if (!new_devdata) {
+		dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
+		ret = -ENOMEM;
+		goto error_out;
+	}
+
+	memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
+	new_devdata->counters = old_devdata->counters;
+
+	/* Set ptrs for existing properties */
+	status = of_find_property(of_node, "status", NULL);
+	maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
+	maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
+	if (!status || !maxsglen || !maxsyncop) {
+		dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
+		ret = -EINVAL;
+		goto error_out;
+	}
+
+	/* Set ptr to new property if provided */
+	if (new_prop) {
+		/* Single property */
+		if (!strncmp(new_prop->name, "status", new_prop->length)) {
+			status = new_prop;
+
+		} else if (!strncmp(new_prop->name, "ibm,max-sg-len",
+					new_prop->length)) {
+			maxsglen = new_prop;
+
+		} else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
+					new_prop->length)) {
+			maxsyncop = new_prop;
+
+		} else {
+			/*
+			 * Skip the update, the property being updated
+			 * has no impact.
+			 */
+			goto out;
+		}
+	}
+
+	/* Perform property updates */
+	ret = nx842_OF_upd_status(new_devdata, status);
+	if (ret)
+		goto error_out;
+
+	ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
+	if (ret)
+		goto error_out;
+
+	ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
+	if (ret)
+		goto error_out;
+
+out:
+	dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
+			__func__, new_devdata->max_sync_size,
+			old_devdata->max_sync_size);
+	dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
+			__func__, new_devdata->max_sync_sg,
+			old_devdata->max_sync_sg);
+	dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
+			__func__, new_devdata->max_sg_len,
+			old_devdata->max_sg_len);
+
+	rcu_assign_pointer(devdata, new_devdata);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	dev_set_drvdata(new_devdata->dev, new_devdata);
+	kfree(old_devdata);
+	return 0;
+
+error_out:
+	if (new_devdata) {
+		dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
+		nx842_OF_set_defaults(new_devdata);
+		rcu_assign_pointer(devdata, new_devdata);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+		synchronize_rcu();
+		dev_set_drvdata(new_devdata->dev, new_devdata);
+		kfree(old_devdata);
+	} else {
+		dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+	}
+
+	if (!ret)
+		ret = -EINVAL;
+	return ret;
+}
+
+/**
+ * nx842_OF_notifier - Process updates to OF properties for the device
+ *
+ * @np: notifier block
+ * @action: notifier action
+ * @update: struct pSeries_reconfig_prop_update pointer if action is
+ *	PSERIES_UPDATE_PROPERTY
+ *
+ * Returns:
+ *	NOTIFY_OK on success
+ *	NOTIFY_BAD encoded with error number on failure, use
+ *		notifier_to_errno() to decode this value
+ */
+static int nx842_OF_notifier(struct notifier_block *np,
+					unsigned long action,
+					void *update)
+{
+	struct pSeries_reconfig_prop_update *upd;
+	struct nx842_devdata *local_devdata;
+	struct device_node *node = NULL;
+
+	upd = (struct pSeries_reconfig_prop_update *)update;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (local_devdata)
+		node = local_devdata->dev->of_node;
+
+	if (local_devdata &&
+			action == PSERIES_UPDATE_PROPERTY &&
+			!strcmp(upd->node->name, node->name)) {
+		rcu_read_unlock();
+		nx842_OF_upd(upd->property);
+	} else
+		rcu_read_unlock();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block nx842_of_nb = {
+	.notifier_call = nx842_OF_notifier,
+};
+
+#define nx842_counter_read(_name)					\
+static ssize_t nx842_##_name##_show(struct device *dev,		\
+		struct device_attribute *attr,				\
+		char *buf) {						\
+	struct nx842_devdata *local_devdata;			\
+	int p = 0;							\
+	rcu_read_lock();						\
+	local_devdata = rcu_dereference(devdata);			\
+	if (local_devdata)						\
+		p = snprintf(buf, PAGE_SIZE, "%ld\n",			\
+		       atomic64_read(&local_devdata->counters->_name));	\
+	rcu_read_unlock();						\
+	return p;							\
+}
+
+#define NX842DEV_COUNTER_ATTR_RO(_name)					\
+	nx842_counter_read(_name);					\
+	static struct device_attribute dev_attr_##_name = __ATTR(_name,	\
+						0444,			\
+						nx842_##_name##_show,\
+						NULL);
+
+NX842DEV_COUNTER_ATTR_RO(comp_complete);
+NX842DEV_COUNTER_ATTR_RO(comp_failed);
+NX842DEV_COUNTER_ATTR_RO(decomp_complete);
+NX842DEV_COUNTER_ATTR_RO(decomp_failed);
+NX842DEV_COUNTER_ATTR_RO(swdecomp);
+
+static ssize_t nx842_timehist_show(struct device *,
+		struct device_attribute *, char *);
+
+static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
+		nx842_timehist_show, NULL);
+static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
+		0444, nx842_timehist_show, NULL);
+
+static ssize_t nx842_timehist_show(struct device *dev,
+		struct device_attribute *attr, char *buf) {
+	char *p = buf;
+	struct nx842_devdata *local_devdata;
+	atomic64_t *times;
+	int bytes_remain = PAGE_SIZE;
+	int bytes;
+	int i;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (!local_devdata) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	if (attr == &dev_attr_comp_times)
+		times = local_devdata->counters->comp_times;
+	else if (attr == &dev_attr_decomp_times)
+		times = local_devdata->counters->decomp_times;
+	else {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
+		bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
+			       i ? (2<<(i-1)) : 0, (2<<i)-1,
+			       atomic64_read(&times[i]));
+		bytes_remain -= bytes;
+		p += bytes;
+	}
+	/* The last bucket holds everything over
+	 * 2<<(NX842_HIST_SLOTS - 2) us */
+	bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
+			2<<(NX842_HIST_SLOTS - 2),
+			atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
+	p += bytes;
+
+	rcu_read_unlock();
+	return p - buf;
+}
+
+static struct attribute *nx842_sysfs_entries[] = {
+	&dev_attr_comp_complete.attr,
+	&dev_attr_comp_failed.attr,
+	&dev_attr_decomp_complete.attr,
+	&dev_attr_decomp_failed.attr,
+	&dev_attr_swdecomp.attr,
+	&dev_attr_comp_times.attr,
+	&dev_attr_decomp_times.attr,
+	NULL,
+};
+
+static struct attribute_group nx842_attribute_group = {
+	.name = NULL,		/* put in device directory */
+	.attrs = nx842_sysfs_entries,
+};
+
+static int __init nx842_probe(struct vio_dev *viodev,
+				  const struct vio_device_id *id)
+{
+	struct nx842_devdata *old_devdata, *new_devdata = NULL;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+
+	if (old_devdata && old_devdata->vdev != NULL) {
+		dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
+		ret = -1;
+		goto error_unlock;
+	}
+
+	dev_set_drvdata(&viodev->dev, NULL);
+
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+	if (!new_devdata) {
+		dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
+		ret = -ENOMEM;
+		goto error_unlock;
+	}
+
+	new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
+			GFP_NOFS);
+	if (!new_devdata->counters) {
+		dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
+		ret = -ENOMEM;
+		goto error_unlock;
+	}
+
+	new_devdata->vdev = viodev;
+	new_devdata->dev = &viodev->dev;
+	nx842_OF_set_defaults(new_devdata);
+
+	rcu_assign_pointer(devdata, new_devdata);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	kfree(old_devdata);
+
+	pSeries_reconfig_notifier_register(&nx842_of_nb);
+
+	ret = nx842_OF_upd(NULL);
+	if (ret && ret != -ENODEV) {
+		dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
+		ret = -1;
+		goto error;
+	}
+
+	rcu_read_lock();
+	if (dev_set_drvdata(&viodev->dev, rcu_dereference(devdata))) {
+		rcu_read_unlock();
+		dev_err(&viodev->dev, "failed to set driver data for device\n");
+		ret = -1;
+		goto error;
+	}
+	rcu_read_unlock();
+
+	if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
+		dev_err(&viodev->dev, "could not create sysfs device attributes\n");
+		ret = -1;
+		goto error;
+	}
+
+	return 0;
+
+error_unlock:
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	if (new_devdata)
+		kfree(new_devdata->counters);
+	kfree(new_devdata);
+error:
+	return ret;
+}
+
+static int __exit nx842_remove(struct vio_dev *viodev)
+{
+	struct nx842_devdata *old_devdata;
+	unsigned long flags;
+
+	pr_info("Removing IBM Power 842 compression device\n");
+	sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	pSeries_reconfig_notifier_unregister(&nx842_of_nb);
+	rcu_assign_pointer(devdata, NULL);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	dev_set_drvdata(&viodev->dev, NULL);
+	if (old_devdata)
+		kfree(old_devdata->counters);
+	kfree(old_devdata);
+	return 0;
+}
+
+static struct vio_device_id nx842_driver_ids[] = {
+	{"ibm,compression-v1", "ibm,compression"},
+	{"", ""},
+};
+
+static struct vio_driver nx842_driver = {
+	.name = MODULE_NAME,
+	.probe = nx842_probe,
+	.remove = nx842_remove,
+	.get_desired_dma = nx842_get_desired_dma,
+	.id_table = nx842_driver_ids,
+};
+
+static int __init nx842_init(void)
+{
+	struct nx842_devdata *new_devdata;
+	pr_info("Registering IBM Power 842 compression driver\n");
+
+	RCU_INIT_POINTER(devdata, NULL);
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
+	if (!new_devdata) {
+		pr_err("Could not allocate memory for device data\n");
+		return -ENOMEM;
+	}
+	new_devdata->status = UNAVAILABLE;
+	RCU_INIT_POINTER(devdata, new_devdata);
+
+	return vio_register_driver(&nx842_driver);
+}
+
+module_init(nx842_init);
+
+static void __exit nx842_exit(void)
+{
+	struct nx842_devdata *old_devdata;
+	unsigned long flags;
+
+	pr_info("Exiting IBM Power 842 compression driver\n");
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	rcu_assign_pointer(devdata, NULL);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	if (old_devdata)
+		dev_set_drvdata(old_devdata->dev, NULL);
+	kfree(old_devdata);
+	vio_unregister_driver(&nx842_driver);
+}
+
+module_exit(nx842_exit);
+
+/*********************************
+ * 842 software decompressor
+*********************************/
+typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+
+static int sw842_data8(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+static int sw842_data4(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+static int sw842_data2(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+static int sw842_ptr8(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+static int sw842_ptr4(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+static int sw842_ptr2(const char **, int *, unsigned char **,
+						struct sw842_fifo *);
+
+/* special templates */
+#define SW842_TMPL_REPEAT 0x1B
+#define SW842_TMPL_ZEROS 0x1C
+#define SW842_TMPL_EOF 0x1E
+
+static sw842_template_op sw842_tmpl_ops[26][4] = {
+	{ sw842_data8, NULL}, /* 0 (00000) */
+	{ sw842_data4, sw842_data2, sw842_ptr2,  NULL},
+	{ sw842_data4, sw842_ptr2,  sw842_data2, NULL},
+	{ sw842_data4, sw842_ptr2,  sw842_ptr2,  NULL},
+	{ sw842_data4, sw842_ptr4,  NULL},
+	{ sw842_data2, sw842_ptr2,  sw842_data4, NULL},
+	{ sw842_data2, sw842_ptr2,  sw842_data2, sw842_ptr2},
+	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_data2},
+	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_ptr2,},
+	{ sw842_data2, sw842_ptr2,  sw842_ptr4,  NULL},
+	{ sw842_ptr2,  sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
+	{ sw842_ptr2,  sw842_data4, sw842_ptr2,  NULL},
+	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_data2},
+	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_ptr2},
+	{ sw842_ptr2,  sw842_data2, sw842_ptr4,  NULL},
+	{ sw842_ptr2,  sw842_ptr2,  sw842_data4, NULL},
+	{ sw842_ptr2,  sw842_ptr2,  sw842_data2, sw842_ptr2},
+	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_data2},
+	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_ptr2},
+	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr4,  NULL},
+	{ sw842_ptr4,  sw842_data4, NULL}, /* 20 (10100) */
+	{ sw842_ptr4,  sw842_data2, sw842_ptr2,  NULL},
+	{ sw842_ptr4,  sw842_ptr2,  sw842_data2, NULL},
+	{ sw842_ptr4,  sw842_ptr2,  sw842_ptr2,  NULL},
+	{ sw842_ptr4,  sw842_ptr4,  NULL},
+	{ sw842_ptr8,  NULL}
+};
+
+/* Software decompress helpers */
+
+static uint8_t sw842_get_byte(const char *buf, int bit)
+{
+	uint8_t tmpl;
+	uint16_t tmp;
+	tmp = htons(*(uint16_t *)(buf));
+	tmp = (uint16_t)(tmp << bit);
+	tmp = ntohs(tmp);
+	memcpy(&tmpl, &tmp, 1);
+	return tmpl;
+}
+
+static uint8_t sw842_get_template(const char **buf, int *bit)
+{
+	uint8_t byte;
+	byte = sw842_get_byte(*buf, *bit);
+	byte = byte >> 3;
+	byte &= 0x1F;
+	*buf += (*bit + 5) / 8;
+	*bit = (*bit + 5) % 8;
+	return byte;
+}
+
+/* repeat_count happens to be 5-bit too (like the template) */
+static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
+{
+	uint8_t byte;
+	byte = sw842_get_byte(*buf, *bit);
+	byte = byte >> 2;
+	byte &= 0x3F;
+	*buf += (*bit + 6) / 8;
+	*bit = (*bit + 6) % 8;
+	return byte;
+}
+
+static uint8_t sw842_get_ptr2(const char **buf, int *bit)
+{
+	uint8_t ptr;
+	ptr = sw842_get_byte(*buf, *bit);
+	(*buf)++;
+	return ptr;
+}
+
+static uint16_t sw842_get_ptr4(const char **buf, int *bit,
+		struct sw842_fifo *fifo)
+{
+	uint16_t ptr;
+	ptr = htons(*(uint16_t *)(*buf));
+	ptr = (uint16_t)(ptr << *bit);
+	ptr = ptr >> 7;
+	ptr &= 0x01FF;
+	*buf += (*bit + 9) / 8;
+	*bit = (*bit + 9) % 8;
+	return ptr;
+}
+
+static uint8_t sw842_get_ptr8(const char **buf, int *bit,
+		struct sw842_fifo *fifo)
+{
+	return sw842_get_ptr2(buf, bit);
+}
+
+/* Software decompress template ops */
+
+static int sw842_data8(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	int ret;
+
+	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
+	if (ret)
+		return ret;
+	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
+	return ret;
+}
+
+static int sw842_data4(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	int ret;
+
+	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
+	if (ret)
+		return ret;
+	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
+	return ret;
+}
+
+static int sw842_data2(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	**outbuf = sw842_get_byte(*inbuf, *inbit);
+	(*inbuf)++;
+	(*outbuf)++;
+	**outbuf = sw842_get_byte(*inbuf, *inbit);
+	(*inbuf)++;
+	(*outbuf)++;
+	return 0;
+}
+
+static int sw842_ptr8(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	uint8_t ptr;
+	ptr = sw842_get_ptr8(inbuf, inbit, fifo);
+	if (!fifo->f84_full && (ptr >= fifo->f8_count))
+		return 1;
+	memcpy(*outbuf, fifo->f8[ptr], 8);
+	*outbuf += 8;
+	return 0;
+}
+
+static int sw842_ptr4(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	uint16_t ptr;
+	ptr = sw842_get_ptr4(inbuf, inbit, fifo);
+	if (!fifo->f84_full && (ptr >= fifo->f4_count))
+		return 1;
+	memcpy(*outbuf, fifo->f4[ptr], 4);
+	*outbuf += 4;
+	return 0;
+}
+
+static int sw842_ptr2(const char **inbuf, int *inbit,
+		unsigned char **outbuf, struct sw842_fifo *fifo)
+{
+	uint8_t ptr;
+	ptr = sw842_get_ptr2(inbuf, inbit);
+	if (!fifo->f2_full && (ptr >= fifo->f2_count))
+		return 1;
+	memcpy(*outbuf, fifo->f2[ptr], 2);
+	*outbuf += 2;
+	return 0;
+}
+
+static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
+{
+	unsigned char initial_f2count = fifo->f2_count;
+
+	memcpy(fifo->f8[fifo->f8_count], buf, 8);
+	fifo->f4_count += 2;
+	fifo->f8_count += 1;
+
+	if (!fifo->f84_full && fifo->f4_count >= 512) {
+		fifo->f84_full = 1;
+		fifo->f4_count /= 512;
+	}
+
+	memcpy(fifo->f2[fifo->f2_count++], buf, 2);
+	memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
+	memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
+	memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
+	if (fifo->f2_count < initial_f2count)
+		fifo->f2_full = 1;
+}
+
+static int sw842_decompress(const unsigned char *src, int srclen,
+			unsigned char *dst, int *destlen,
+			const void *wrkmem)
+{
+	uint8_t tmpl;
+	const char *inbuf;
+	int inbit = 0;
+	unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
+	const char *inbuf_end;
+	sw842_template_op op;
+	int opindex;
+	int i, repeat_count;
+	struct sw842_fifo *fifo;
+	int ret = 0;
+
+	fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
+	memset(fifo, 0, sizeof(*fifo));
+
+	origbuf = NULL;
+	inbuf = src;
+	inbuf_end = src + srclen;
+	outbuf = dst;
+	outbuf_end = dst + *destlen;
+
+	while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
+		if (inbuf >= inbuf_end) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		opindex = 0;
+		prevbuf = origbuf;
+		origbuf = outbuf;
+		switch (tmpl) {
+		case SW842_TMPL_REPEAT:
+			if (prevbuf == NULL) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			repeat_count = sw842_get_repeat_count(&inbuf,
+								&inbit) + 1;
+
+			/* Did the repeat count advance past the end of input */
+			if (inbuf > inbuf_end) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			for (i = 0; i < repeat_count; i++) {
+				/* Would this overflow the output buffer */
+				if ((outbuf + 8) > outbuf_end) {
+					ret = -ENOSPC;
+					goto out;
+				}
+
+				memcpy(outbuf, prevbuf, 8);
+				sw842_copy_to_fifo(outbuf, fifo);
+				outbuf += 8;
+			}
+			break;
+
+		case SW842_TMPL_ZEROS:
+			/* Would this overflow the output buffer */
+			if ((outbuf + 8) > outbuf_end) {
+				ret = -ENOSPC;
+				goto out;
+			}
+
+			memset(outbuf, 0, 8);
+			sw842_copy_to_fifo(outbuf, fifo);
+			outbuf += 8;
+			break;
+
+		default:
+			if (tmpl > 25) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			/* Does this go past the end of the input buffer */
+			if ((inbuf + 2) > inbuf_end) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			/* Would this overflow the output buffer */
+			if ((outbuf + 8) > outbuf_end) {
+				ret = -ENOSPC;
+				goto out;
+			}
+
+			while (opindex < 4 &&
+				(op = sw842_tmpl_ops[tmpl][opindex++])
+					!= NULL) {
+				ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
+				if (ret) {
+					ret = -EINVAL;
+					goto out;
+				}
+				sw842_copy_to_fifo(origbuf, fifo);
+			}
+		}
+	}
+
+out:
+	if (!ret)
+		*destlen = (unsigned int)(outbuf - dst);
+	else
+		*destlen = 0;
+
+	return ret;
+}

+ 0 - 1
drivers/crypto/nx/nx-aes-cbc.c

@@ -127,7 +127,6 @@ struct crypto_alg nx_cbc_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_blkcipher_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_cbc_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_cbc_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_blkcipher = {

+ 0 - 2
drivers/crypto/nx/nx-aes-ccm.c

@@ -430,7 +430,6 @@ struct crypto_alg nx_ccm_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_aead_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_ccm_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_ccm_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_aead = {
@@ -453,7 +452,6 @@ struct crypto_alg nx_ccm4309_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_nivaead_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_ccm4309_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_ccm_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_aead = {

+ 0 - 2
drivers/crypto/nx/nx-aes-ctr.c

@@ -141,7 +141,6 @@ struct crypto_alg nx_ctr_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_blkcipher_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_ctr_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_ctr_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_blkcipher = {
@@ -163,7 +162,6 @@ struct crypto_alg nx_ctr3686_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_blkcipher_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_ctr3686_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_ctr_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_blkcipher = {

+ 0 - 1
drivers/crypto/nx/nx-aes-ecb.c

@@ -126,7 +126,6 @@ struct crypto_alg nx_ecb_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_blkcipher_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_ecb_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_ecb_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_blkcipher = {

+ 0 - 2
drivers/crypto/nx/nx-aes-gcm.c

@@ -316,7 +316,6 @@ struct crypto_alg nx_gcm_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_aead_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_gcm_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_gcm_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_aead = {
@@ -338,7 +337,6 @@ struct crypto_alg nx_gcm4106_aes_alg = {
 	.cra_ctxsize     = sizeof(struct nx_crypto_ctx),
 	.cra_type        = &crypto_nivaead_type,
 	.cra_module      = THIS_MODULE,
-	.cra_list        = LIST_HEAD_INIT(nx_gcm4106_aes_alg.cra_list),
 	.cra_init        = nx_crypto_ctx_aes_gcm_init,
 	.cra_exit        = nx_crypto_ctx_exit,
 	.cra_aead = {

+ 0 - 1
drivers/crypto/omap-aes.c

@@ -876,7 +876,6 @@ static int omap_aes_probe(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(algs); i++) {
 		pr_debug("i: %d\n", i);
-		INIT_LIST_HEAD(&algs[i].cra_list);
 		err = crypto_register_alg(&algs[i]);
 		if (err)
 			goto err_algs;

+ 0 - 3
drivers/crypto/padlock-aes.c

@@ -328,7 +328,6 @@ static struct crypto_alg aes_alg = {
 	.cra_ctxsize		=	sizeof(struct aes_ctx),
 	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
@@ -408,7 +407,6 @@ static struct crypto_alg ecb_aes_alg = {
 	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_aes_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -491,7 +489,6 @@ static struct crypto_alg cbc_aes_alg = {
 	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_aes_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,

+ 0 - 1
drivers/crypto/s5p-sss.c

@@ -626,7 +626,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
 	crypto_init_queue(&pdata->queue, CRYPTO_QUEUE_LEN);
 
 	for (i = 0; i < ARRAY_SIZE(algs); i++) {
-		INIT_LIST_HEAD(&algs[i].cra_list);
 		err = crypto_register_alg(&algs[i]);
 		if (err)
 			goto err_algs;

File diff suppressed because it is too large
+ 172 - 247
drivers/crypto/talitos.c


+ 1 - 2
drivers/crypto/tegra-aes.c

@@ -969,6 +969,7 @@ static int tegra_aes_probe(struct platform_device *pdev)
 	aes_wq = alloc_workqueue("tegra_aes_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
 	if (!aes_wq) {
 		dev_err(dev, "alloc_workqueue failed\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
@@ -1004,8 +1005,6 @@ static int tegra_aes_probe(struct platform_device *pdev)
 
 	aes_dev = dd;
 	for (i = 0; i < ARRAY_SIZE(algs); i++) {
-		INIT_LIST_HEAD(&algs[i].cra_list);
-
 		algs[i].cra_priority = 300;
 		algs[i].cra_ctxsize = sizeof(struct tegra_aes_ctx);
 		algs[i].cra_module = THIS_MODULE;

+ 1 - 0
drivers/crypto/ux500/cryp/cryp_core.c

@@ -1486,6 +1486,7 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	if (!res_irq) {
 		dev_err(dev, "[%s]: IORESOURCE_IRQ unavailable",
 			__func__);
+		ret = -ENODEV;
 		goto out_power;
 	}
 

+ 0 - 1
drivers/crypto/ux500/hash/hash_core.c

@@ -1991,7 +1991,6 @@ static int __init ux500_hash_mod_init(void)
 static void __exit ux500_hash_mod_fini(void)
 {
 	platform_driver_unregister(&hash_driver);
-	return;
 }
 
 module_init(ux500_hash_mod_init);

+ 27 - 0
include/crypto/cast5.h

@@ -0,0 +1,27 @@
+#ifndef _CRYPTO_CAST5_H
+#define _CRYPTO_CAST5_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define CAST5_BLOCK_SIZE 8
+#define CAST5_MIN_KEY_SIZE 5
+#define CAST5_MAX_KEY_SIZE 16
+
+struct cast5_ctx {
+	u32 Km[16];
+	u8 Kr[16];
+	int rr;	/* rr ? rounds = 12 : rounds = 16; (rfc 2144) */
+};
+
+int cast5_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+
+void __cast5_encrypt(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
+void __cast5_decrypt(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
+
+extern const u32 cast5_s1[256];
+extern const u32 cast5_s2[256];
+extern const u32 cast5_s3[256];
+extern const u32 cast5_s4[256];
+
+#endif

+ 28 - 0
include/crypto/cast6.h

@@ -0,0 +1,28 @@
+#ifndef _CRYPTO_CAST6_H
+#define _CRYPTO_CAST6_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define CAST6_BLOCK_SIZE 16
+#define CAST6_MIN_KEY_SIZE 16
+#define CAST6_MAX_KEY_SIZE 32
+
+struct cast6_ctx {
+	u32 Km[12][4];
+	u8 Kr[12][4];
+};
+
+int __cast6_setkey(struct cast6_ctx *ctx, const u8 *key,
+		   unsigned int keylen, u32 *flags);
+int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+
+void __cast6_encrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
+void __cast6_decrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
+
+extern const u32 cast6_s1[256];
+extern const u32 cast6_s2[256];
+extern const u32 cast6_s3[256];
+extern const u32 cast6_s4[256];
+
+#endif

+ 2 - 0
include/crypto/internal/hash.h

@@ -83,6 +83,8 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
+int crypto_register_shashes(struct shash_alg *algs, int count);
+int crypto_unregister_shashes(struct shash_alg *algs, int count);
 int shash_register_instance(struct crypto_template *tmpl,
 			    struct shash_instance *inst);
 void shash_free_instance(struct crypto_instance *inst);

+ 11 - 0
include/linux/nx842.h

@@ -0,0 +1,11 @@
+#ifndef __NX842_H__
+#define __NX842_H__
+
+int nx842_get_workmem_size(void);
+int nx842_get_workmem_size_aligned(void);
+int nx842_compress(const unsigned char *in, unsigned int in_len,
+		unsigned char *out, unsigned int *out_len, void *wrkmem);
+int nx842_decompress(const unsigned char *in, unsigned int in_len,
+		unsigned char *out, unsigned int *out_len, void *wrkmem);
+
+#endif

Some files were not shown because too many files changed in this diff