crc32c-intel.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /*
  2. * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
  3. * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
  4. * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
  5. * http://www.intel.com/products/processor/manuals/
  6. * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
  7. * Volume 2A: Instruction Set Reference, A-M
  8. *
  9. * Copyright (c) 2008 Austin Zhang <austin_zhang@linux.intel.com>
  10. * Copyright (c) 2008 Kent Liu <kent.liu@intel.com>
  11. *
  12. * This program is free software; you can redistribute it and/or modify it
  13. * under the terms of the GNU General Public License as published by the Free
  14. * Software Foundation; either version 2 of the License, or (at your option)
  15. * any later version.
  16. *
  17. */
  18. #include <linux/init.h>
  19. #include <linux/module.h>
  20. #include <linux/string.h>
  21. #include <linux/kernel.h>
  22. #include <crypto/internal/hash.h>
  23. #include <asm/cpufeature.h>
  24. #define CHKSUM_BLOCK_SIZE 1
  25. #define CHKSUM_DIGEST_SIZE 4
  26. #define SCALE_F sizeof(unsigned long)
  27. #ifdef CONFIG_X86_64
  28. #define REX_PRE "0x48, "
  29. #else
  30. #define REX_PRE
  31. #endif
  32. static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
  33. {
  34. while (length--) {
  35. __asm__ __volatile__(
  36. ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
  37. :"=S"(crc)
  38. :"0"(crc), "c"(*data)
  39. );
  40. data++;
  41. }
  42. return crc;
  43. }
  44. static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
  45. {
  46. unsigned int iquotient = len / SCALE_F;
  47. unsigned int iremainder = len % SCALE_F;
  48. unsigned long *ptmp = (unsigned long *)p;
  49. while (iquotient--) {
  50. __asm__ __volatile__(
  51. ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
  52. :"=S"(crc)
  53. :"0"(crc), "c"(*ptmp)
  54. );
  55. ptmp++;
  56. }
  57. if (iremainder)
  58. crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
  59. iremainder);
  60. return crc;
  61. }
  62. /*
  63. * Setting the seed allows arbitrary accumulators and flexible XOR policy
  64. * If your algorithm starts with ~0, then XOR with ~0 before you set
  65. * the seed.
  66. */
  67. static int crc32c_intel_setkey(struct crypto_ahash *hash, const u8 *key,
  68. unsigned int keylen)
  69. {
  70. u32 *mctx = crypto_ahash_ctx(hash);
  71. if (keylen != sizeof(u32)) {
  72. crypto_ahash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
  73. return -EINVAL;
  74. }
  75. *mctx = le32_to_cpup((__le32 *)key);
  76. return 0;
  77. }
  78. static int crc32c_intel_init(struct ahash_request *req)
  79. {
  80. u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
  81. u32 *crcp = ahash_request_ctx(req);
  82. *crcp = *mctx;
  83. return 0;
  84. }
  85. static int crc32c_intel_update(struct ahash_request *req)
  86. {
  87. struct crypto_hash_walk walk;
  88. u32 *crcp = ahash_request_ctx(req);
  89. u32 crc = *crcp;
  90. int nbytes;
  91. for (nbytes = crypto_hash_walk_first(req, &walk); nbytes;
  92. nbytes = crypto_hash_walk_done(&walk, 0))
  93. crc = crc32c_intel_le_hw(crc, walk.data, nbytes);
  94. *crcp = crc;
  95. return 0;
  96. }
  97. static int crc32c_intel_final(struct ahash_request *req)
  98. {
  99. u32 *crcp = ahash_request_ctx(req);
  100. *(__le32 *)req->result = ~cpu_to_le32p(crcp);
  101. return 0;
  102. }
  103. static int crc32c_intel_digest(struct ahash_request *req)
  104. {
  105. struct crypto_hash_walk walk;
  106. u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
  107. u32 crc = *mctx;
  108. int nbytes;
  109. for (nbytes = crypto_hash_walk_first(req, &walk); nbytes;
  110. nbytes = crypto_hash_walk_done(&walk, 0))
  111. crc = crc32c_intel_le_hw(crc, walk.data, nbytes);
  112. *(__le32 *)req->result = ~cpu_to_le32(crc);
  113. return 0;
  114. }
  115. static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
  116. {
  117. u32 *key = crypto_tfm_ctx(tfm);
  118. *key = ~0;
  119. tfm->crt_ahash.reqsize = sizeof(u32);
  120. return 0;
  121. }
  122. static struct crypto_alg alg = {
  123. .cra_name = "crc32c",
  124. .cra_driver_name = "crc32c-intel",
  125. .cra_priority = 200,
  126. .cra_flags = CRYPTO_ALG_TYPE_AHASH,
  127. .cra_blocksize = CHKSUM_BLOCK_SIZE,
  128. .cra_alignmask = 3,
  129. .cra_ctxsize = sizeof(u32),
  130. .cra_module = THIS_MODULE,
  131. .cra_list = LIST_HEAD_INIT(alg.cra_list),
  132. .cra_init = crc32c_intel_cra_init,
  133. .cra_type = &crypto_ahash_type,
  134. .cra_u = {
  135. .ahash = {
  136. .digestsize = CHKSUM_DIGEST_SIZE,
  137. .setkey = crc32c_intel_setkey,
  138. .init = crc32c_intel_init,
  139. .update = crc32c_intel_update,
  140. .final = crc32c_intel_final,
  141. .digest = crc32c_intel_digest,
  142. }
  143. }
  144. };
  145. static int __init crc32c_intel_mod_init(void)
  146. {
  147. if (cpu_has_xmm4_2)
  148. return crypto_register_alg(&alg);
  149. else
  150. return -ENODEV;
  151. }
  152. static void __exit crc32c_intel_mod_fini(void)
  153. {
  154. crypto_unregister_alg(&alg);
  155. }
  156. module_init(crc32c_intel_mod_init);
  157. module_exit(crc32c_intel_mod_fini);
  158. MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
  159. MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
  160. MODULE_LICENSE("GPL");
  161. MODULE_ALIAS("crc32c");
  162. MODULE_ALIAS("crc32c-intel");