|
@@ -25,6 +25,7 @@
|
|
|
*
|
|
|
*/
|
|
|
|
|
|
+#include <asm/processor.h>
|
|
|
#include <linux/crypto.h>
|
|
|
#include <linux/init.h>
|
|
|
#include <linux/module.h>
|
|
@@ -637,10 +638,56 @@ static struct crypto_alg blk_xts_alg = {
|
|
|
},
|
|
|
};
|
|
|
|
|
|
+static bool is_blacklisted_cpu(void)
|
|
|
+{
|
|
|
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (boot_cpu_data.x86 == 0x06 &&
|
|
|
+ (boot_cpu_data.x86_model == 0x1c ||
|
|
|
+ boot_cpu_data.x86_model == 0x26 ||
|
|
|
+ boot_cpu_data.x86_model == 0x36)) {
|
|
|
+ /*
|
|
|
+ * On Atom, twofish-3way is slower than original assembler
|
|
|
+ * implementation. Twofish-3way trades off some performance in
|
|
|
+ * storing blocks in 64bit registers to allow three blocks to
|
|
|
+ * be processed parallel. Parallel operation then allows gaining
|
|
|
+ * more performance than was trade off, on out-of-order CPUs.
|
|
|
+ * However Atom does not benefit from this parallellism and
|
|
|
+ * should be blacklisted.
|
|
|
+ */
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (boot_cpu_data.x86 == 0x0f) {
|
|
|
+ /*
|
|
|
+ * On Pentium 4, twofish-3way is slower than original assembler
|
|
|
+ * implementation because excessive uses of 64bit rotate and
|
|
|
+ * left-shifts (which are really slow on P4) needed to store and
|
|
|
+ * handle 128bit block in two 64bit registers.
|
|
|
+ */
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+static int force;
|
|
|
+module_param(force, int, 0);
|
|
|
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
|
|
|
+
|
|
|
int __init init(void)
|
|
|
{
|
|
|
int err;
|
|
|
|
|
|
+ if (!force && is_blacklisted_cpu()) {
|
|
|
+ printk(KERN_INFO
|
|
|
+ "twofish-x86_64-3way: performance on this CPU "
|
|
|
+ "would be suboptimal: disabling "
|
|
|
+ "twofish-x86_64-3way.\n");
|
|
|
+ return -ENODEV;
|
|
|
+ }
|
|
|
+
|
|
|
err = crypto_register_alg(&blk_ecb_alg);
|
|
|
if (err)
|
|
|
goto ecb_err;
|