1 | /* |
2 | * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. |
3 | * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. |
4 | * |
5 | * This software is available to you under a choice of one of two |
6 | * licenses. You may choose to be licensed under the terms of the GNU |
7 | * General Public License (GPL) Version 2, available from the file |
8 | * COPYING in the main directory of this source tree, or the |
9 | * OpenIB.org BSD license below: |
10 | * |
11 | * Redistribution and use in source and binary forms, with or |
12 | * without modification, are permitted provided that the following |
13 | * conditions are met: |
14 | * |
15 | * - Redistributions of source code must retain the above |
16 | * copyright notice, this list of conditions and the following |
17 | * disclaimer. |
18 | * |
19 | * - Redistributions in binary form must reproduce the above |
20 | * copyright notice, this list of conditions and the following |
21 | * disclaimer in the documentation and/or other materials |
22 | * provided with the distribution. |
23 | * |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
31 | * SOFTWARE. |
32 | */ |
33 | |
34 | #ifndef _TLS_OFFLOAD_H |
35 | #define _TLS_OFFLOAD_H |
36 | |
37 | #include <linux/types.h> |
38 | #include <asm/byteorder.h> |
39 | #include <linux/crypto.h> |
40 | #include <linux/socket.h> |
41 | #include <linux/tcp.h> |
42 | #include <linux/mutex.h> |
43 | #include <linux/netdevice.h> |
44 | #include <linux/rcupdate.h> |
45 | |
46 | #include <net/net_namespace.h> |
47 | #include <net/tcp.h> |
48 | #include <net/strparser.h> |
49 | #include <crypto/aead.h> |
50 | #include <uapi/linux/tls.h> |
51 | |
52 | struct tls_rec; |
53 | |
54 | /* Maximum data size carried in a TLS record */ |
55 | #define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) |
56 | |
57 | #define 5 |
58 | #define TLS_NONCE_OFFSET TLS_HEADER_SIZE |
59 | |
60 | #define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) |
61 | |
62 | #define TLS_AAD_SPACE_SIZE 13 |
63 | |
64 | #define TLS_MAX_IV_SIZE 16 |
65 | #define TLS_MAX_SALT_SIZE 4 |
66 | #define TLS_TAG_SIZE 16 |
67 | #define TLS_MAX_REC_SEQ_SIZE 8 |
68 | #define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE |
69 | |
70 | /* For CCM mode, the full 16-bytes of IV is made of '4' fields of given sizes. |
71 | * |
72 | * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] |
73 | * |
74 | * The field 'length' is encoded in field 'b0' as '(length width - 1)'. |
75 | * Hence b0 contains (3 - 1) = 2. |
76 | */ |
77 | #define TLS_AES_CCM_IV_B0_BYTE 2 |
78 | #define TLS_SM4_CCM_IV_B0_BYTE 2 |
79 | |
80 | enum { |
81 | TLS_BASE, |
82 | TLS_SW, |
83 | TLS_HW, |
84 | TLS_HW_RECORD, |
85 | TLS_NUM_CONFIG, |
86 | }; |
87 | |
88 | struct tx_work { |
89 | struct delayed_work work; |
90 | struct sock *sk; |
91 | }; |
92 | |
93 | struct tls_sw_context_tx { |
94 | struct crypto_aead *aead_send; |
95 | struct crypto_wait async_wait; |
96 | struct tx_work tx_work; |
97 | struct tls_rec *open_rec; |
98 | struct list_head tx_list; |
99 | atomic_t encrypt_pending; |
100 | u8 async_capable:1; |
101 | |
102 | #define BIT_TX_SCHEDULED 0 |
103 | #define BIT_TX_CLOSING 1 |
104 | unsigned long tx_bitmask; |
105 | }; |
106 | |
107 | struct tls_strparser { |
108 | struct sock *sk; |
109 | |
110 | u32 mark : 8; |
111 | u32 stopped : 1; |
112 | u32 copy_mode : 1; |
113 | u32 mixed_decrypted : 1; |
114 | u32 msg_ready : 1; |
115 | |
116 | struct strp_msg stm; |
117 | |
118 | struct sk_buff *anchor; |
119 | struct work_struct work; |
120 | }; |
121 | |
122 | struct tls_sw_context_rx { |
123 | struct crypto_aead *aead_recv; |
124 | struct crypto_wait async_wait; |
125 | struct sk_buff_head rx_list; /* list of decrypted 'data' records */ |
126 | void (*saved_data_ready)(struct sock *sk); |
127 | |
128 | u8 reader_present; |
129 | u8 async_capable:1; |
130 | u8 zc_capable:1; |
131 | u8 reader_contended:1; |
132 | |
133 | struct tls_strparser strp; |
134 | |
135 | atomic_t decrypt_pending; |
136 | struct sk_buff_head async_hold; |
137 | struct wait_queue_head wq; |
138 | }; |
139 | |
140 | struct tls_record_info { |
141 | struct list_head list; |
142 | u32 end_seq; |
143 | int len; |
144 | int num_frags; |
145 | skb_frag_t frags[MAX_SKB_FRAGS]; |
146 | }; |
147 | |
148 | #define TLS_DRIVER_STATE_SIZE_TX 16 |
149 | struct tls_offload_context_tx { |
150 | struct crypto_aead *aead_send; |
151 | spinlock_t lock; /* protects records list */ |
152 | struct list_head records_list; |
153 | struct tls_record_info *open_record; |
154 | struct tls_record_info *retransmit_hint; |
155 | u64 hint_record_sn; |
156 | u64 unacked_record_sn; |
157 | |
158 | struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; |
159 | void (*sk_destruct)(struct sock *sk); |
160 | struct work_struct destruct_work; |
161 | struct tls_context *ctx; |
162 | /* The TLS layer reserves room for driver specific state |
163 | * Currently the belief is that there is not enough |
164 | * driver specific state to justify another layer of indirection |
165 | */ |
166 | u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8); |
167 | }; |
168 | |
169 | enum tls_context_flags { |
170 | /* tls_device_down was called after the netdev went down, device state |
171 | * was released, and kTLS works in software, even though rx_conf is |
172 | * still TLS_HW (needed for transition). |
173 | */ |
174 | TLS_RX_DEV_DEGRADED = 0, |
175 | /* Unlike RX where resync is driven entirely by the core in TX only |
176 | * the driver knows when things went out of sync, so we need the flag |
177 | * to be atomic. |
178 | */ |
179 | TLS_TX_SYNC_SCHED = 1, |
180 | /* tls_dev_del was called for the RX side, device state was released, |
181 | * but tls_ctx->netdev might still be kept, because TX-side driver |
182 | * resources might not be released yet. Used to prevent the second |
183 | * tls_dev_del call in tls_device_down if it happens simultaneously. |
184 | */ |
185 | TLS_RX_DEV_CLOSED = 2, |
186 | }; |
187 | |
188 | struct cipher_context { |
189 | char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE]; |
190 | char rec_seq[TLS_MAX_REC_SEQ_SIZE]; |
191 | }; |
192 | |
193 | union tls_crypto_context { |
194 | struct tls_crypto_info info; |
195 | union { |
196 | struct tls12_crypto_info_aes_gcm_128 aes_gcm_128; |
197 | struct tls12_crypto_info_aes_gcm_256 aes_gcm_256; |
198 | struct tls12_crypto_info_chacha20_poly1305 chacha20_poly1305; |
199 | struct tls12_crypto_info_sm4_gcm sm4_gcm; |
200 | struct tls12_crypto_info_sm4_ccm sm4_ccm; |
201 | }; |
202 | }; |
203 | |
204 | struct tls_prot_info { |
205 | u16 version; |
206 | u16 cipher_type; |
207 | u16 prepend_size; |
208 | u16 tag_size; |
209 | u16 overhead_size; |
210 | u16 iv_size; |
211 | u16 salt_size; |
212 | u16 rec_seq_size; |
213 | u16 aad_size; |
214 | u16 tail_size; |
215 | }; |
216 | |
217 | struct tls_context { |
218 | /* read-only cache line */ |
219 | struct tls_prot_info prot_info; |
220 | |
221 | u8 tx_conf:3; |
222 | u8 rx_conf:3; |
223 | u8 zerocopy_sendfile:1; |
224 | u8 rx_no_pad:1; |
225 | |
226 | int (*push_pending_record)(struct sock *sk, int flags); |
227 | void (*sk_write_space)(struct sock *sk); |
228 | |
229 | void *priv_ctx_tx; |
230 | void *priv_ctx_rx; |
231 | |
232 | struct net_device __rcu *netdev; |
233 | |
234 | /* rw cache line */ |
235 | struct cipher_context tx; |
236 | struct cipher_context rx; |
237 | |
238 | struct scatterlist *partially_sent_record; |
239 | u16 partially_sent_offset; |
240 | |
241 | bool splicing_pages; |
242 | bool pending_open_record_frags; |
243 | |
244 | struct mutex tx_lock; /* protects partially_sent_* fields and |
245 | * per-type TX fields |
246 | */ |
247 | unsigned long flags; |
248 | |
249 | /* cache cold stuff */ |
250 | struct proto *sk_proto; |
251 | struct sock *sk; |
252 | |
253 | void (*sk_destruct)(struct sock *sk); |
254 | |
255 | union tls_crypto_context crypto_send; |
256 | union tls_crypto_context crypto_recv; |
257 | |
258 | struct list_head list; |
259 | refcount_t refcount; |
260 | struct rcu_head rcu; |
261 | }; |
262 | |
263 | enum tls_offload_ctx_dir { |
264 | TLS_OFFLOAD_CTX_DIR_RX, |
265 | TLS_OFFLOAD_CTX_DIR_TX, |
266 | }; |
267 | |
268 | struct tlsdev_ops { |
269 | int (*tls_dev_add)(struct net_device *netdev, struct sock *sk, |
270 | enum tls_offload_ctx_dir direction, |
271 | struct tls_crypto_info *crypto_info, |
272 | u32 start_offload_tcp_sn); |
273 | void (*tls_dev_del)(struct net_device *netdev, |
274 | struct tls_context *ctx, |
275 | enum tls_offload_ctx_dir direction); |
276 | int (*tls_dev_resync)(struct net_device *netdev, |
277 | struct sock *sk, u32 seq, u8 *rcd_sn, |
278 | enum tls_offload_ctx_dir direction); |
279 | }; |
280 | |
281 | enum tls_offload_sync_type { |
282 | TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0, |
283 | TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1, |
284 | TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC = 2, |
285 | }; |
286 | |
287 | #define TLS_DEVICE_RESYNC_NH_START_IVAL 2 |
288 | #define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128 |
289 | |
290 | #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 |
291 | struct tls_offload_resync_async { |
292 | atomic64_t req; |
293 | u16 loglen; |
294 | u16 rcd_delta; |
295 | u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; |
296 | }; |
297 | |
298 | #define TLS_DRIVER_STATE_SIZE_RX 8 |
299 | struct tls_offload_context_rx { |
300 | /* sw must be the first member of tls_offload_context_rx */ |
301 | struct tls_sw_context_rx sw; |
302 | enum tls_offload_sync_type resync_type; |
303 | /* this member is set regardless of resync_type, to avoid branches */ |
304 | u8 resync_nh_reset:1; |
305 | /* CORE_NEXT_HINT-only member, but use the hole here */ |
306 | u8 resync_nh_do_now:1; |
307 | union { |
308 | /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ */ |
309 | struct { |
310 | atomic64_t resync_req; |
311 | }; |
312 | /* TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT */ |
313 | struct { |
314 | u32 decrypted_failed; |
315 | u32 decrypted_tgt; |
316 | } resync_nh; |
317 | /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC */ |
318 | struct { |
319 | struct tls_offload_resync_async *resync_async; |
320 | }; |
321 | }; |
322 | /* The TLS layer reserves room for driver specific state |
323 | * Currently the belief is that there is not enough |
324 | * driver specific state to justify another layer of indirection |
325 | */ |
326 | u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8); |
327 | }; |
328 | |
329 | struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, |
330 | u32 seq, u64 *p_record_sn); |
331 | |
332 | static inline bool tls_record_is_start_marker(struct tls_record_info *rec) |
333 | { |
334 | return rec->len == 0; |
335 | } |
336 | |
337 | static inline u32 tls_record_start_seq(struct tls_record_info *rec) |
338 | { |
339 | return rec->end_seq - rec->len; |
340 | } |
341 | |
342 | struct sk_buff * |
343 | tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, |
344 | struct sk_buff *skb); |
345 | struct sk_buff * |
346 | tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev, |
347 | struct sk_buff *skb); |
348 | |
349 | static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb) |
350 | { |
351 | #ifdef CONFIG_TLS_DEVICE |
352 | struct sock *sk = skb->sk; |
353 | |
354 | return sk && sk_fullsock(sk) && |
355 | (smp_load_acquire(&sk->sk_validate_xmit_skb) == |
356 | &tls_validate_xmit_skb); |
357 | #else |
358 | return false; |
359 | #endif |
360 | } |
361 | |
362 | static inline struct tls_context *tls_get_ctx(const struct sock *sk) |
363 | { |
364 | struct inet_connection_sock *icsk = inet_csk(sk); |
365 | |
366 | /* Use RCU on icsk_ulp_data only for sock diag code, |
367 | * TLS data path doesn't need rcu_dereference(). |
368 | */ |
369 | return (__force void *)icsk->icsk_ulp_data; |
370 | } |
371 | |
372 | static inline struct tls_sw_context_rx *tls_sw_ctx_rx( |
373 | const struct tls_context *tls_ctx) |
374 | { |
375 | return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx; |
376 | } |
377 | |
378 | static inline struct tls_sw_context_tx *tls_sw_ctx_tx( |
379 | const struct tls_context *tls_ctx) |
380 | { |
381 | return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx; |
382 | } |
383 | |
384 | static inline struct tls_offload_context_tx * |
385 | tls_offload_ctx_tx(const struct tls_context *tls_ctx) |
386 | { |
387 | return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx; |
388 | } |
389 | |
390 | static inline bool tls_sw_has_ctx_tx(const struct sock *sk) |
391 | { |
392 | struct tls_context *ctx = tls_get_ctx(sk); |
393 | |
394 | if (!ctx) |
395 | return false; |
396 | return !!tls_sw_ctx_tx(tls_ctx: ctx); |
397 | } |
398 | |
399 | static inline bool tls_sw_has_ctx_rx(const struct sock *sk) |
400 | { |
401 | struct tls_context *ctx = tls_get_ctx(sk); |
402 | |
403 | if (!ctx) |
404 | return false; |
405 | return !!tls_sw_ctx_rx(tls_ctx: ctx); |
406 | } |
407 | |
408 | static inline struct tls_offload_context_rx * |
409 | tls_offload_ctx_rx(const struct tls_context *tls_ctx) |
410 | { |
411 | return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; |
412 | } |
413 | |
414 | static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, |
415 | enum tls_offload_ctx_dir direction) |
416 | { |
417 | if (direction == TLS_OFFLOAD_CTX_DIR_TX) |
418 | return tls_offload_ctx_tx(tls_ctx)->driver_state; |
419 | else |
420 | return tls_offload_ctx_rx(tls_ctx)->driver_state; |
421 | } |
422 | |
423 | static inline void * |
424 | tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) |
425 | { |
426 | return __tls_driver_ctx(tls_ctx: tls_get_ctx(sk), direction); |
427 | } |
428 | |
429 | #define RESYNC_REQ BIT(0) |
430 | #define RESYNC_REQ_ASYNC BIT(1) |
431 | /* The TLS context is valid until sk_destruct is called */ |
432 | static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) |
433 | { |
434 | struct tls_context *tls_ctx = tls_get_ctx(sk); |
435 | struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); |
436 | |
437 | atomic64_set(v: &rx_ctx->resync_req, i: ((u64)ntohl(seq) << 32) | RESYNC_REQ); |
438 | } |
439 | |
440 | /* Log all TLS record header TCP sequences in [seq, seq+len] */ |
441 | static inline void |
442 | tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) |
443 | { |
444 | struct tls_context *tls_ctx = tls_get_ctx(sk); |
445 | struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); |
446 | |
447 | atomic64_set(v: &rx_ctx->resync_async->req, i: ((u64)ntohl(seq) << 32) | |
448 | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); |
449 | rx_ctx->resync_async->loglen = 0; |
450 | rx_ctx->resync_async->rcd_delta = 0; |
451 | } |
452 | |
453 | static inline void |
454 | tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) |
455 | { |
456 | struct tls_context *tls_ctx = tls_get_ctx(sk); |
457 | struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); |
458 | |
459 | atomic64_set(v: &rx_ctx->resync_async->req, |
460 | i: ((u64)ntohl(seq) << 32) | RESYNC_REQ); |
461 | } |
462 | |
463 | static inline void |
464 | tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) |
465 | { |
466 | struct tls_context *tls_ctx = tls_get_ctx(sk); |
467 | |
468 | tls_offload_ctx_rx(tls_ctx)->resync_type = type; |
469 | } |
470 | |
471 | /* Driver's seq tracking has to be disabled until resync succeeded */ |
472 | static inline bool tls_offload_tx_resync_pending(struct sock *sk) |
473 | { |
474 | struct tls_context *tls_ctx = tls_get_ctx(sk); |
475 | bool ret; |
476 | |
477 | ret = test_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags); |
478 | smp_mb__after_atomic(); |
479 | return ret; |
480 | } |
481 | |
482 | struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); |
483 | |
484 | #ifdef CONFIG_TLS_DEVICE |
485 | void tls_device_sk_destruct(struct sock *sk); |
486 | void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq); |
487 | |
488 | static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) |
489 | { |
490 | if (!sk_fullsock(sk) || |
491 | smp_load_acquire(&sk->sk_destruct) != tls_device_sk_destruct) |
492 | return false; |
493 | return tls_get_ctx(sk)->rx_conf == TLS_HW; |
494 | } |
495 | #endif |
496 | #endif /* _TLS_OFFLOAD_H */ |
497 | |