sha1_ssse3_glue.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /*
  2. * Cryptographic API.
  3. *
  4. * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
  5. * Supplemental SSE3 instructions.
  6. *
  7. * This file is based on sha1_generic.c
  8. *
  9. * Copyright (c) Alan Smithee.
  10. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
  11. * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
  12. * Copyright (c) Mathias Krause <minipli@googlemail.com>
  13. *
  14. * This program is free software; you can redistribute it and/or modify it
  15. * under the terms of the GNU General Public License as published by the Free
  16. * Software Foundation; either version 2 of the License, or (at your option)
  17. * any later version.
  18. *
  19. */
  20. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  21. #include <crypto/internal/hash.h>
  22. #include <linux/init.h>
  23. #include <linux/module.h>
  24. #include <linux/mm.h>
  25. #include <linux/cryptohash.h>
  26. #include <linux/types.h>
  27. #include <crypto/sha.h>
  28. #include <asm/byteorder.h>
  29. #include <asm/i387.h>
  30. #include <asm/xcr.h>
  31. #include <asm/xsave.h>
  32. asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
  33. unsigned int rounds);
  34. #ifdef SHA1_ENABLE_AVX_SUPPORT
  35. asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
  36. unsigned int rounds);
  37. #endif
  38. static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
  39. static int sha1_ssse3_init(struct shash_desc *desc)
  40. {
  41. struct sha1_state *sctx = shash_desc_ctx(desc);
  42. *sctx = (struct sha1_state){
  43. .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
  44. };
  45. return 0;
  46. }
  47. static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
  48. unsigned int len, unsigned int partial)
  49. {
  50. struct sha1_state *sctx = shash_desc_ctx(desc);
  51. unsigned int done = 0;
  52. sctx->count += len;
  53. if (partial) {
  54. done = SHA1_BLOCK_SIZE - partial;
  55. memcpy(sctx->buffer + partial, data, done);
  56. sha1_transform_asm(sctx->state, sctx->buffer, 1);
  57. }
  58. if (len - done >= SHA1_BLOCK_SIZE) {
  59. const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
  60. sha1_transform_asm(sctx->state, data + done, rounds);
  61. done += rounds * SHA1_BLOCK_SIZE;
  62. }
  63. memcpy(sctx->buffer, data + done, len - done);
  64. return 0;
  65. }
  66. static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
  67. unsigned int len)
  68. {
  69. struct sha1_state *sctx = shash_desc_ctx(desc);
  70. unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
  71. int res;
  72. /* Handle the fast case right here */
  73. if (partial + len < SHA1_BLOCK_SIZE) {
  74. sctx->count += len;
  75. memcpy(sctx->buffer + partial, data, len);
  76. return 0;
  77. }
  78. if (!irq_fpu_usable()) {
  79. res = crypto_sha1_update(desc, data, len);
  80. } else {
  81. kernel_fpu_begin();
  82. res = __sha1_ssse3_update(desc, data, len, partial);
  83. kernel_fpu_end();
  84. }
  85. return res;
  86. }
  87. /* Add padding and return the message digest. */
  88. static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
  89. {
  90. struct sha1_state *sctx = shash_desc_ctx(desc);
  91. unsigned int i, index, padlen;
  92. __be32 *dst = (__be32 *)out;
  93. __be64 bits;
  94. static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
  95. bits = cpu_to_be64(sctx->count << 3);
  96. /* Pad out to 56 mod 64 and append length */
  97. index = sctx->count % SHA1_BLOCK_SIZE;
  98. padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
  99. if (!irq_fpu_usable()) {
  100. crypto_sha1_update(desc, padding, padlen);
  101. crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
  102. } else {
  103. kernel_fpu_begin();
  104. /* We need to fill a whole block for __sha1_ssse3_update() */
  105. if (padlen <= 56) {
  106. sctx->count += padlen;
  107. memcpy(sctx->buffer + index, padding, padlen);
  108. } else {
  109. __sha1_ssse3_update(desc, padding, padlen, index);
  110. }
  111. __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
  112. kernel_fpu_end();
  113. }
  114. /* Store state in digest */
  115. for (i = 0; i < 5; i++)
  116. dst[i] = cpu_to_be32(sctx->state[i]);
  117. /* Wipe context */
  118. memset(sctx, 0, sizeof(*sctx));
  119. return 0;
  120. }
  121. static int sha1_ssse3_export(struct shash_desc *desc, void *out)
  122. {
  123. struct sha1_state *sctx = shash_desc_ctx(desc);
  124. memcpy(out, sctx, sizeof(*sctx));
  125. return 0;
  126. }
  127. static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
  128. {
  129. struct sha1_state *sctx = shash_desc_ctx(desc);
  130. memcpy(sctx, in, sizeof(*sctx));
  131. return 0;
  132. }
  133. static struct shash_alg alg = {
  134. .digestsize = SHA1_DIGEST_SIZE,
  135. .init = sha1_ssse3_init,
  136. .update = sha1_ssse3_update,
  137. .final = sha1_ssse3_final,
  138. .export = sha1_ssse3_export,
  139. .import = sha1_ssse3_import,
  140. .descsize = sizeof(struct sha1_state),
  141. .statesize = sizeof(struct sha1_state),
  142. .base = {
  143. .cra_name = "sha1",
  144. .cra_driver_name= "sha1-ssse3",
  145. .cra_priority = 150,
  146. .cra_flags = CRYPTO_ALG_TYPE_SHASH,
  147. .cra_blocksize = SHA1_BLOCK_SIZE,
  148. .cra_module = THIS_MODULE,
  149. }
  150. };
  151. #ifdef SHA1_ENABLE_AVX_SUPPORT
  152. static bool __init avx_usable(void)
  153. {
  154. u64 xcr0;
  155. if (!cpu_has_avx || !cpu_has_osxsave)
  156. return false;
  157. xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
  158. if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
  159. pr_info("AVX detected but unusable.\n");
  160. return false;
  161. }
  162. return true;
  163. }
  164. #endif
  165. static int __init sha1_ssse3_mod_init(void)
  166. {
  167. /* test for SSSE3 first */
  168. if (cpu_has_ssse3)
  169. sha1_transform_asm = sha1_transform_ssse3;
  170. #ifdef SHA1_ENABLE_AVX_SUPPORT
  171. /* allow AVX to override SSSE3, it's a little faster */
  172. if (avx_usable())
  173. sha1_transform_asm = sha1_transform_avx;
  174. #endif
  175. if (sha1_transform_asm) {
  176. pr_info("Using %s optimized SHA-1 implementation\n",
  177. sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
  178. : "AVX");
  179. return crypto_register_shash(&alg);
  180. }
  181. pr_info("Neither AVX nor SSSE3 is available/usable.\n");
  182. return -ENODEV;
  183. }
  184. static void __exit sha1_ssse3_mod_fini(void)
  185. {
  186. crypto_unregister_shash(&alg);
  187. }
  188. module_init(sha1_ssse3_mod_init);
  189. module_exit(sha1_ssse3_mod_fini);
  190. MODULE_LICENSE("GPL");
  191. MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
  192. MODULE_ALIAS("sha1");