1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Glue Code for 3-way parallel assembler optimized version of Twofish |
4 | * |
5 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
6 | */ |
7 | |
8 | #include <crypto/algapi.h> |
9 | #include <crypto/twofish.h> |
10 | #include <linux/crypto.h> |
11 | #include <linux/init.h> |
12 | #include <linux/module.h> |
13 | #include <linux/types.h> |
14 | |
15 | #include "twofish.h" |
16 | #include "ecb_cbc_helpers.h" |
17 | |
18 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
19 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
20 | |
21 | static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, |
22 | const u8 *key, unsigned int keylen) |
23 | { |
24 | return twofish_setkey(tfm: &tfm->base, key, key_len: keylen); |
25 | } |
26 | |
27 | static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) |
28 | { |
29 | __twofish_enc_blk_3way(ctx, dst, src, false); |
30 | } |
31 | |
32 | void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src) |
33 | { |
34 | u8 buf[2][TF_BLOCK_SIZE]; |
35 | const u8 *s = src; |
36 | |
37 | if (dst == src) |
38 | s = memcpy(buf, src, sizeof(buf)); |
39 | twofish_dec_blk_3way(ctx, dst, src); |
40 | crypto_xor(dst: dst + TF_BLOCK_SIZE, src: s, size: sizeof(buf)); |
41 | |
42 | } |
43 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); |
44 | |
45 | static int ecb_encrypt(struct skcipher_request *req) |
46 | { |
47 | ECB_WALK_START(req, TF_BLOCK_SIZE, -1); |
48 | ECB_BLOCK(3, twofish_enc_blk_3way); |
49 | ECB_BLOCK(1, twofish_enc_blk); |
50 | ECB_WALK_END(); |
51 | } |
52 | |
53 | static int ecb_decrypt(struct skcipher_request *req) |
54 | { |
55 | ECB_WALK_START(req, TF_BLOCK_SIZE, -1); |
56 | ECB_BLOCK(3, twofish_dec_blk_3way); |
57 | ECB_BLOCK(1, twofish_dec_blk); |
58 | ECB_WALK_END(); |
59 | } |
60 | |
61 | static int cbc_encrypt(struct skcipher_request *req) |
62 | { |
63 | CBC_WALK_START(req, TF_BLOCK_SIZE, -1); |
64 | CBC_ENC_BLOCK(twofish_enc_blk); |
65 | CBC_WALK_END(); |
66 | } |
67 | |
68 | static int cbc_decrypt(struct skcipher_request *req) |
69 | { |
70 | CBC_WALK_START(req, TF_BLOCK_SIZE, -1); |
71 | CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way); |
72 | CBC_DEC_BLOCK(1, twofish_dec_blk); |
73 | CBC_WALK_END(); |
74 | } |
75 | |
76 | static struct skcipher_alg tf_skciphers[] = { |
77 | { |
78 | .base.cra_name = "ecb(twofish)" , |
79 | .base.cra_driver_name = "ecb-twofish-3way" , |
80 | .base.cra_priority = 300, |
81 | .base.cra_blocksize = TF_BLOCK_SIZE, |
82 | .base.cra_ctxsize = sizeof(struct twofish_ctx), |
83 | .base.cra_module = THIS_MODULE, |
84 | .min_keysize = TF_MIN_KEY_SIZE, |
85 | .max_keysize = TF_MAX_KEY_SIZE, |
86 | .setkey = twofish_setkey_skcipher, |
87 | .encrypt = ecb_encrypt, |
88 | .decrypt = ecb_decrypt, |
89 | }, { |
90 | .base.cra_name = "cbc(twofish)" , |
91 | .base.cra_driver_name = "cbc-twofish-3way" , |
92 | .base.cra_priority = 300, |
93 | .base.cra_blocksize = TF_BLOCK_SIZE, |
94 | .base.cra_ctxsize = sizeof(struct twofish_ctx), |
95 | .base.cra_module = THIS_MODULE, |
96 | .min_keysize = TF_MIN_KEY_SIZE, |
97 | .max_keysize = TF_MAX_KEY_SIZE, |
98 | .ivsize = TF_BLOCK_SIZE, |
99 | .setkey = twofish_setkey_skcipher, |
100 | .encrypt = cbc_encrypt, |
101 | .decrypt = cbc_decrypt, |
102 | }, |
103 | }; |
104 | |
105 | static bool is_blacklisted_cpu(void) |
106 | { |
107 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
108 | return false; |
109 | |
110 | if (boot_cpu_data.x86 == 0x06 && |
111 | (boot_cpu_data.x86_model == 0x1c || |
112 | boot_cpu_data.x86_model == 0x26 || |
113 | boot_cpu_data.x86_model == 0x36)) { |
114 | /* |
115 | * On Atom, twofish-3way is slower than original assembler |
116 | * implementation. Twofish-3way trades off some performance in |
117 | * storing blocks in 64bit registers to allow three blocks to |
118 | * be processed parallel. Parallel operation then allows gaining |
119 | * more performance than was trade off, on out-of-order CPUs. |
120 | * However Atom does not benefit from this parallelism and |
121 | * should be blacklisted. |
122 | */ |
123 | return true; |
124 | } |
125 | |
126 | if (boot_cpu_data.x86 == 0x0f) { |
127 | /* |
128 | * On Pentium 4, twofish-3way is slower than original assembler |
129 | * implementation because excessive uses of 64bit rotate and |
130 | * left-shifts (which are really slow on P4) needed to store and |
131 | * handle 128bit block in two 64bit registers. |
132 | */ |
133 | return true; |
134 | } |
135 | |
136 | return false; |
137 | } |
138 | |
139 | static int force; |
140 | module_param(force, int, 0); |
141 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist" ); |
142 | |
143 | static int __init twofish_3way_init(void) |
144 | { |
145 | if (!force && is_blacklisted_cpu()) { |
146 | printk(KERN_INFO |
147 | "twofish-x86_64-3way: performance on this CPU " |
148 | "would be suboptimal: disabling " |
149 | "twofish-x86_64-3way.\n" ); |
150 | return -ENODEV; |
151 | } |
152 | |
153 | return crypto_register_skciphers(algs: tf_skciphers, |
154 | ARRAY_SIZE(tf_skciphers)); |
155 | } |
156 | |
157 | static void __exit twofish_3way_fini(void) |
158 | { |
159 | crypto_unregister_skciphers(algs: tf_skciphers, ARRAY_SIZE(tf_skciphers)); |
160 | } |
161 | |
162 | module_init(twofish_3way_init); |
163 | module_exit(twofish_3way_fini); |
164 | |
165 | MODULE_LICENSE("GPL" ); |
166 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized" ); |
167 | MODULE_ALIAS_CRYPTO("twofish" ); |
168 | MODULE_ALIAS_CRYPTO("twofish-asm" ); |
169 | |