1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Just-In-Time compiler for eBPF bytecode on MIPS. |
4 | * Implementation of JIT functions common to 32-bit and 64-bit CPUs. |
5 | * |
6 | * Copyright (c) 2021 Anyfi Networks AB. |
7 | * Author: Johan Almbladh <johan.almbladh@gmail.com> |
8 | * |
9 | * Based on code and ideas from |
10 | * Copyright (c) 2017 Cavium, Inc. |
11 | * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> |
12 | * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> |
13 | */ |
14 | |
15 | /* |
16 | * Code overview |
17 | * ============= |
18 | * |
19 | * - bpf_jit_comp.h |
20 | * Common definitions and utilities. |
21 | * |
22 | * - bpf_jit_comp.c |
23 | * Implementation of JIT top-level logic and exported JIT API functions. |
24 | * Implementation of internal operations shared by 32-bit and 64-bit code. |
25 | * JMP and ALU JIT control code, register control code, shared ALU and |
26 | * JMP/JMP32 JIT operations. |
27 | * |
28 | * - bpf_jit_comp32.c |
29 | * Implementation of functions to JIT prologue, epilogue and a single eBPF |
30 | * instruction for 32-bit MIPS CPUs. The functions use shared operations |
31 | * where possible, and implement the rest for 32-bit MIPS such as ALU64 |
32 | * operations. |
33 | * |
34 | * - bpf_jit_comp64.c |
35 | * Ditto, for 64-bit MIPS CPUs. |
36 | * |
37 | * Zero and sign extension |
38 | * ======================== |
39 | * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, |
40 | * but the eBPF instruction set mandates zero extension. We let the verifier |
41 | * insert explicit zero-extensions after 32-bit ALU operations, both for |
42 | * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs |
43 | * are JITed with sign extensions inserted when so expected. |
44 | * |
45 | * ALU operations |
46 | * ============== |
47 | * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are |
48 | * JITed in the following steps. ALU64 operations on 32-bit MIPS are more |
49 | * complicated and therefore only processed by special implementations in |
50 | * step (3). |
51 | * |
52 | * 1) valid_alu_i: |
53 | * Determine if an immediate operation can be emitted as such, or if |
54 | * we must fall back to the register version. |
55 | * |
56 | * 2) rewrite_alu_i: |
57 | * Convert BPF operation and immediate value to a canonical form for |
58 | * JITing. In some degenerate cases this form may be a no-op. |
59 | * |
60 | * 3) emit_alu_{i,i64,r,64}: |
61 | * Emit instructions for an ALU or ALU64 immediate or register operation. |
62 | * |
63 | * JMP operations |
64 | * ============== |
65 | * JMP and JMP32 operations require an JIT instruction offset table for |
66 | * translating the jump offset. This table is computed by dry-running the |
67 | * JIT without actually emitting anything. However, the computed PC-relative |
68 | * offset may overflow the 18-bit offset field width of the native MIPS |
69 | * branch instruction. In such cases, the long jump is converted into the |
70 | * following sequence. |
71 | * |
72 | * <branch> !<cond> +2 Inverted PC-relative branch |
73 | * nop Delay slot |
74 | * j <offset> Unconditional absolute long jump |
75 | * nop Delay slot |
76 | * |
77 | * Since this converted sequence alters the offset table, all offsets must |
78 | * be re-calculated. This may in turn trigger new branch conversions, so |
79 | * the process is repeated until no further changes are made. Normally it |
80 | * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we |
81 | * fall back to converting every remaining jump operation. The branch |
82 | * conversion is independent of how the JMP or JMP32 condition is JITed. |
83 | * |
84 | * JMP32 and JMP operations are JITed as follows. |
85 | * |
86 | * 1) setup_jmp_{i,r}: |
87 | * Convert jump conditional and offset into a form that can be JITed. |
88 | * This form may be a no-op, a canonical form, or an inverted PC-relative |
89 | * jump if branch conversion is necessary. |
90 | * |
91 | * 2) valid_jmp_i: |
92 | * Determine if an immediate operations can be emitted as such, or if |
93 | * we must fall back to the register version. Applies to JMP32 for 32-bit |
94 | * MIPS, and both JMP and JMP32 for 64-bit MIPS. |
95 | * |
96 | * 3) emit_jmp_{i,i64,r,r64}: |
97 | * Emit instructions for an JMP or JMP32 immediate or register operation. |
98 | * |
99 | * 4) finish_jmp_{i,r}: |
100 | * Emit any instructions needed to finish the jump. This includes a nop |
101 | * for the delay slot if a branch was emitted, and a long absolute jump |
102 | * if the branch was converted. |
103 | */ |
104 | |
105 | #include <linux/limits.h> |
106 | #include <linux/bitops.h> |
107 | #include <linux/errno.h> |
108 | #include <linux/filter.h> |
109 | #include <linux/bpf.h> |
110 | #include <linux/slab.h> |
111 | #include <asm/bitops.h> |
112 | #include <asm/cacheflush.h> |
113 | #include <asm/cpu-features.h> |
114 | #include <asm/isa-rev.h> |
115 | #include <asm/uasm.h> |
116 | |
117 | #include "bpf_jit_comp.h" |
118 | |
119 | /* Convenience macros for descriptor access */ |
120 | #define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) |
121 | #define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) |
122 | |
123 | /* |
124 | * Push registers on the stack, starting at a given depth from the stack |
125 | * pointer and increasing. The next depth to be written is returned. |
126 | */ |
127 | int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) |
128 | { |
129 | int reg; |
130 | |
131 | for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) |
132 | if (mask & BIT(reg)) { |
133 | if ((excl & BIT(reg)) == 0) { |
134 | if (sizeof(long) == 4) |
135 | emit(ctx, sw, reg, depth, MIPS_R_SP); |
136 | else /* sizeof(long) == 8 */ |
137 | emit(ctx, sd, reg, depth, MIPS_R_SP); |
138 | } |
139 | depth += sizeof(long); |
140 | } |
141 | |
142 | ctx->stack_used = max((int)ctx->stack_used, depth); |
143 | return depth; |
144 | } |
145 | |
146 | /* |
147 | * Pop registers from the stack, starting at a given depth from the stack |
148 | * pointer and increasing. The next depth to be read is returned. |
149 | */ |
150 | int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) |
151 | { |
152 | int reg; |
153 | |
154 | for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) |
155 | if (mask & BIT(reg)) { |
156 | if ((excl & BIT(reg)) == 0) { |
157 | if (sizeof(long) == 4) |
158 | emit(ctx, lw, reg, depth, MIPS_R_SP); |
159 | else /* sizeof(long) == 8 */ |
160 | emit(ctx, ld, reg, depth, MIPS_R_SP); |
161 | } |
162 | depth += sizeof(long); |
163 | } |
164 | |
165 | return depth; |
166 | } |
167 | |
168 | /* Compute the 28-bit jump target address from a BPF program location */ |
169 | int get_target(struct jit_context *ctx, u32 loc) |
170 | { |
171 | u32 index = INDEX(ctx->descriptors[loc]); |
172 | unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; |
173 | unsigned long addr = (unsigned long)&ctx->target[index]; |
174 | |
175 | if (!ctx->target) |
176 | return 0; |
177 | |
178 | if ((addr ^ pc) & ~MIPS_JMP_MASK) |
179 | return -1; |
180 | |
181 | return addr & MIPS_JMP_MASK; |
182 | } |
183 | |
184 | /* Compute the PC-relative offset to relative BPF program offset */ |
185 | int get_offset(const struct jit_context *ctx, int off) |
186 | { |
187 | return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - |
188 | ctx->jit_index - 1) * sizeof(u32); |
189 | } |
190 | |
191 | /* dst = imm (register width) */ |
192 | void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) |
193 | { |
194 | if (imm >= -0x8000 && imm <= 0x7fff) { |
195 | emit(ctx, addiu, dst, MIPS_R_ZERO, imm); |
196 | } else { |
197 | emit(ctx, lui, dst, (s16)((u32)imm >> 16)); |
198 | emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); |
199 | } |
200 | clobber_reg(ctx, reg: dst); |
201 | } |
202 | |
203 | /* dst = src (register width) */ |
204 | void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) |
205 | { |
206 | emit(ctx, ori, dst, src, 0); |
207 | clobber_reg(ctx, reg: dst); |
208 | } |
209 | |
210 | /* Validate ALU immediate range */ |
211 | bool valid_alu_i(u8 op, s32 imm) |
212 | { |
213 | switch (BPF_OP(op)) { |
214 | case BPF_NEG: |
215 | case BPF_LSH: |
216 | case BPF_RSH: |
217 | case BPF_ARSH: |
218 | /* All legal eBPF values are valid */ |
219 | return true; |
220 | case BPF_ADD: |
221 | if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) |
222 | return false; |
223 | /* imm must be 16 bits */ |
224 | return imm >= -0x8000 && imm <= 0x7fff; |
225 | case BPF_SUB: |
226 | if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) |
227 | return false; |
228 | /* -imm must be 16 bits */ |
229 | return imm >= -0x7fff && imm <= 0x8000; |
230 | case BPF_AND: |
231 | case BPF_OR: |
232 | case BPF_XOR: |
233 | /* imm must be 16 bits unsigned */ |
234 | return imm >= 0 && imm <= 0xffff; |
235 | case BPF_MUL: |
236 | /* imm must be zero or a positive power of two */ |
237 | return imm == 0 || (imm > 0 && is_power_of_2(n: imm)); |
238 | case BPF_DIV: |
239 | case BPF_MOD: |
240 | /* imm must be an 17-bit power of two */ |
241 | return (u32)imm <= 0x10000 && is_power_of_2(n: (u32)imm); |
242 | } |
243 | return false; |
244 | } |
245 | |
246 | /* Rewrite ALU immediate operation */ |
247 | bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) |
248 | { |
249 | bool act = true; |
250 | |
251 | switch (BPF_OP(op)) { |
252 | case BPF_LSH: |
253 | case BPF_RSH: |
254 | case BPF_ARSH: |
255 | case BPF_ADD: |
256 | case BPF_SUB: |
257 | case BPF_OR: |
258 | case BPF_XOR: |
259 | /* imm == 0 is a no-op */ |
260 | act = imm != 0; |
261 | break; |
262 | case BPF_MUL: |
263 | if (imm == 1) { |
264 | /* dst * 1 is a no-op */ |
265 | act = false; |
266 | } else if (imm == 0) { |
267 | /* dst * 0 is dst & 0 */ |
268 | op = BPF_AND; |
269 | } else { |
270 | /* dst * (1 << n) is dst << n */ |
271 | op = BPF_LSH; |
272 | imm = ilog2(abs(imm)); |
273 | } |
274 | break; |
275 | case BPF_DIV: |
276 | if (imm == 1) { |
277 | /* dst / 1 is a no-op */ |
278 | act = false; |
279 | } else { |
280 | /* dst / (1 << n) is dst >> n */ |
281 | op = BPF_RSH; |
282 | imm = ilog2(imm); |
283 | } |
284 | break; |
285 | case BPF_MOD: |
286 | /* dst % (1 << n) is dst & ((1 << n) - 1) */ |
287 | op = BPF_AND; |
288 | imm--; |
289 | break; |
290 | } |
291 | |
292 | *alu = op; |
293 | *val = imm; |
294 | return act; |
295 | } |
296 | |
297 | /* ALU immediate operation (32-bit) */ |
298 | void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) |
299 | { |
300 | switch (BPF_OP(op)) { |
301 | /* dst = -dst */ |
302 | case BPF_NEG: |
303 | emit(ctx, subu, dst, MIPS_R_ZERO, dst); |
304 | break; |
305 | /* dst = dst & imm */ |
306 | case BPF_AND: |
307 | emit(ctx, andi, dst, dst, (u16)imm); |
308 | break; |
309 | /* dst = dst | imm */ |
310 | case BPF_OR: |
311 | emit(ctx, ori, dst, dst, (u16)imm); |
312 | break; |
313 | /* dst = dst ^ imm */ |
314 | case BPF_XOR: |
315 | emit(ctx, xori, dst, dst, (u16)imm); |
316 | break; |
317 | /* dst = dst << imm */ |
318 | case BPF_LSH: |
319 | emit(ctx, sll, dst, dst, imm); |
320 | break; |
321 | /* dst = dst >> imm */ |
322 | case BPF_RSH: |
323 | emit(ctx, srl, dst, dst, imm); |
324 | break; |
325 | /* dst = dst >> imm (arithmetic) */ |
326 | case BPF_ARSH: |
327 | emit(ctx, sra, dst, dst, imm); |
328 | break; |
329 | /* dst = dst + imm */ |
330 | case BPF_ADD: |
331 | emit(ctx, addiu, dst, dst, imm); |
332 | break; |
333 | /* dst = dst - imm */ |
334 | case BPF_SUB: |
335 | emit(ctx, addiu, dst, dst, -imm); |
336 | break; |
337 | } |
338 | clobber_reg(ctx, reg: dst); |
339 | } |
340 | |
341 | /* ALU register operation (32-bit) */ |
342 | void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) |
343 | { |
344 | switch (BPF_OP(op)) { |
345 | /* dst = dst & src */ |
346 | case BPF_AND: |
347 | emit(ctx, and, dst, dst, src); |
348 | break; |
349 | /* dst = dst | src */ |
350 | case BPF_OR: |
351 | emit(ctx, or, dst, dst, src); |
352 | break; |
353 | /* dst = dst ^ src */ |
354 | case BPF_XOR: |
355 | emit(ctx, xor, dst, dst, src); |
356 | break; |
357 | /* dst = dst << src */ |
358 | case BPF_LSH: |
359 | emit(ctx, sllv, dst, dst, src); |
360 | break; |
361 | /* dst = dst >> src */ |
362 | case BPF_RSH: |
363 | emit(ctx, srlv, dst, dst, src); |
364 | break; |
365 | /* dst = dst >> src (arithmetic) */ |
366 | case BPF_ARSH: |
367 | emit(ctx, srav, dst, dst, src); |
368 | break; |
369 | /* dst = dst + src */ |
370 | case BPF_ADD: |
371 | emit(ctx, addu, dst, dst, src); |
372 | break; |
373 | /* dst = dst - src */ |
374 | case BPF_SUB: |
375 | emit(ctx, subu, dst, dst, src); |
376 | break; |
377 | /* dst = dst * src */ |
378 | case BPF_MUL: |
379 | if (cpu_has_mips32r1 || cpu_has_mips32r6) { |
380 | emit(ctx, mul, dst, dst, src); |
381 | } else { |
382 | emit(ctx, multu, dst, src); |
383 | emit(ctx, mflo, dst); |
384 | } |
385 | break; |
386 | /* dst = dst / src */ |
387 | case BPF_DIV: |
388 | if (cpu_has_mips32r6) { |
389 | emit(ctx, divu_r6, dst, dst, src); |
390 | } else { |
391 | emit(ctx, divu, dst, src); |
392 | emit(ctx, mflo, dst); |
393 | } |
394 | break; |
395 | /* dst = dst % src */ |
396 | case BPF_MOD: |
397 | if (cpu_has_mips32r6) { |
398 | emit(ctx, modu, dst, dst, src); |
399 | } else { |
400 | emit(ctx, divu, dst, src); |
401 | emit(ctx, mfhi, dst); |
402 | } |
403 | break; |
404 | } |
405 | clobber_reg(ctx, reg: dst); |
406 | } |
407 | |
408 | /* Atomic read-modify-write (32-bit) */ |
409 | void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) |
410 | { |
411 | LLSC_sync(ctx); |
412 | emit(ctx, ll, MIPS_R_T9, off, dst); |
413 | switch (code) { |
414 | case BPF_ADD: |
415 | case BPF_ADD | BPF_FETCH: |
416 | emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); |
417 | break; |
418 | case BPF_AND: |
419 | case BPF_AND | BPF_FETCH: |
420 | emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); |
421 | break; |
422 | case BPF_OR: |
423 | case BPF_OR | BPF_FETCH: |
424 | emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); |
425 | break; |
426 | case BPF_XOR: |
427 | case BPF_XOR | BPF_FETCH: |
428 | emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); |
429 | break; |
430 | case BPF_XCHG: |
431 | emit(ctx, move, MIPS_R_T8, src); |
432 | break; |
433 | } |
434 | emit(ctx, sc, MIPS_R_T8, off, dst); |
435 | emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); |
436 | emit(ctx, nop); /* Delay slot */ |
437 | |
438 | if (code & BPF_FETCH) { |
439 | emit(ctx, move, src, MIPS_R_T9); |
440 | clobber_reg(ctx, reg: src); |
441 | } |
442 | } |
443 | |
444 | /* Atomic compare-and-exchange (32-bit) */ |
445 | void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) |
446 | { |
447 | LLSC_sync(ctx); |
448 | emit(ctx, ll, MIPS_R_T9, off, dst); |
449 | emit(ctx, bne, MIPS_R_T9, res, 12); |
450 | emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ |
451 | emit(ctx, sc, MIPS_R_T8, off, dst); |
452 | emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); |
453 | emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ |
454 | clobber_reg(ctx, reg: res); |
455 | } |
456 | |
457 | /* Swap bytes and truncate a register word or half word */ |
458 | void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) |
459 | { |
460 | u8 tmp = MIPS_R_T8; |
461 | u8 msk = MIPS_R_T9; |
462 | |
463 | switch (width) { |
464 | /* Swap bytes in a word */ |
465 | case 32: |
466 | if (cpu_has_mips32r2 || cpu_has_mips32r6) { |
467 | emit(ctx, wsbh, dst, dst); |
468 | emit(ctx, rotr, dst, dst, 16); |
469 | } else { |
470 | emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ |
471 | emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ |
472 | emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ |
473 | |
474 | emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ |
475 | emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ |
476 | |
477 | emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ |
478 | emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ |
479 | emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ |
480 | emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ |
481 | emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ |
482 | } |
483 | break; |
484 | /* Swap bytes in a half word */ |
485 | case 16: |
486 | if (cpu_has_mips32r2 || cpu_has_mips32r6) { |
487 | emit(ctx, wsbh, dst, dst); |
488 | emit(ctx, andi, dst, dst, 0xffff); |
489 | } else { |
490 | emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ |
491 | emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ |
492 | emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ |
493 | emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ |
494 | emit(ctx, or, dst, dst, tmp); /* d = d | t */ |
495 | } |
496 | break; |
497 | } |
498 | clobber_reg(ctx, reg: dst); |
499 | } |
500 | |
501 | /* Validate jump immediate range */ |
502 | bool valid_jmp_i(u8 op, s32 imm) |
503 | { |
504 | switch (op) { |
505 | case JIT_JNOP: |
506 | /* Immediate value not used */ |
507 | return true; |
508 | case BPF_JEQ: |
509 | case BPF_JNE: |
510 | /* No immediate operation */ |
511 | return false; |
512 | case BPF_JSET: |
513 | case JIT_JNSET: |
514 | /* imm must be 16 bits unsigned */ |
515 | return imm >= 0 && imm <= 0xffff; |
516 | case BPF_JGE: |
517 | case BPF_JLT: |
518 | case BPF_JSGE: |
519 | case BPF_JSLT: |
520 | /* imm must be 16 bits */ |
521 | return imm >= -0x8000 && imm <= 0x7fff; |
522 | case BPF_JGT: |
523 | case BPF_JLE: |
524 | case BPF_JSGT: |
525 | case BPF_JSLE: |
526 | /* imm + 1 must be 16 bits */ |
527 | return imm >= -0x8001 && imm <= 0x7ffe; |
528 | } |
529 | return false; |
530 | } |
531 | |
532 | /* Invert a conditional jump operation */ |
533 | static u8 invert_jmp(u8 op) |
534 | { |
535 | switch (op) { |
536 | case BPF_JA: return JIT_JNOP; |
537 | case BPF_JEQ: return BPF_JNE; |
538 | case BPF_JNE: return BPF_JEQ; |
539 | case BPF_JSET: return JIT_JNSET; |
540 | case BPF_JGT: return BPF_JLE; |
541 | case BPF_JGE: return BPF_JLT; |
542 | case BPF_JLT: return BPF_JGE; |
543 | case BPF_JLE: return BPF_JGT; |
544 | case BPF_JSGT: return BPF_JSLE; |
545 | case BPF_JSGE: return BPF_JSLT; |
546 | case BPF_JSLT: return BPF_JSGE; |
547 | case BPF_JSLE: return BPF_JSGT; |
548 | } |
549 | return 0; |
550 | } |
551 | |
552 | /* Prepare a PC-relative jump operation */ |
553 | static void setup_jmp(struct jit_context *ctx, u8 bpf_op, |
554 | s16 bpf_off, u8 *jit_op, s32 *jit_off) |
555 | { |
556 | u32 *descp = &ctx->descriptors[ctx->bpf_index]; |
557 | int op = bpf_op; |
558 | int offset = 0; |
559 | |
560 | /* Do not compute offsets on the first pass */ |
561 | if (INDEX(*descp) == 0) |
562 | goto done; |
563 | |
564 | /* Skip jumps never taken */ |
565 | if (bpf_op == JIT_JNOP) |
566 | goto done; |
567 | |
568 | /* Convert jumps always taken */ |
569 | if (bpf_op == BPF_JA) |
570 | *descp |= JIT_DESC_CONVERT; |
571 | |
572 | /* |
573 | * Current ctx->jit_index points to the start of the branch preamble. |
574 | * Since the preamble differs among different branch conditionals, |
575 | * the current index cannot be used to compute the branch offset. |
576 | * Instead, we use the offset table value for the next instruction, |
577 | * which gives the index immediately after the branch delay slot. |
578 | */ |
579 | if (!CONVERTED(*descp)) { |
580 | int target = ctx->bpf_index + bpf_off + 1; |
581 | int origin = ctx->bpf_index + 1; |
582 | |
583 | offset = (INDEX(ctx->descriptors[target]) - |
584 | INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); |
585 | } |
586 | |
587 | /* |
588 | * The PC-relative branch offset field on MIPS is 18 bits signed, |
589 | * so if the computed offset is larger than this we generate a an |
590 | * absolute jump that we skip with an inverted conditional branch. |
591 | */ |
592 | if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { |
593 | offset = 3 * sizeof(u32); |
594 | op = invert_jmp(op: bpf_op); |
595 | ctx->changes += !CONVERTED(*descp); |
596 | *descp |= JIT_DESC_CONVERT; |
597 | } |
598 | |
599 | done: |
600 | *jit_off = offset; |
601 | *jit_op = op; |
602 | } |
603 | |
604 | /* Prepare a PC-relative jump operation with immediate conditional */ |
605 | void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, |
606 | u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) |
607 | { |
608 | bool always = false; |
609 | bool never = false; |
610 | |
611 | switch (bpf_op) { |
612 | case BPF_JEQ: |
613 | case BPF_JNE: |
614 | break; |
615 | case BPF_JSET: |
616 | case BPF_JLT: |
617 | never = imm == 0; |
618 | break; |
619 | case BPF_JGE: |
620 | always = imm == 0; |
621 | break; |
622 | case BPF_JGT: |
623 | never = (u32)imm == U32_MAX; |
624 | break; |
625 | case BPF_JLE: |
626 | always = (u32)imm == U32_MAX; |
627 | break; |
628 | case BPF_JSGT: |
629 | never = imm == S32_MAX && width == 32; |
630 | break; |
631 | case BPF_JSGE: |
632 | always = imm == S32_MIN && width == 32; |
633 | break; |
634 | case BPF_JSLT: |
635 | never = imm == S32_MIN && width == 32; |
636 | break; |
637 | case BPF_JSLE: |
638 | always = imm == S32_MAX && width == 32; |
639 | break; |
640 | } |
641 | |
642 | if (never) |
643 | bpf_op = JIT_JNOP; |
644 | if (always) |
645 | bpf_op = BPF_JA; |
646 | setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); |
647 | } |
648 | |
649 | /* Prepare a PC-relative jump operation with register conditional */ |
650 | void setup_jmp_r(struct jit_context *ctx, bool same_reg, |
651 | u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) |
652 | { |
653 | switch (bpf_op) { |
654 | case BPF_JSET: |
655 | break; |
656 | case BPF_JEQ: |
657 | case BPF_JGE: |
658 | case BPF_JLE: |
659 | case BPF_JSGE: |
660 | case BPF_JSLE: |
661 | if (same_reg) |
662 | bpf_op = BPF_JA; |
663 | break; |
664 | case BPF_JNE: |
665 | case BPF_JLT: |
666 | case BPF_JGT: |
667 | case BPF_JSGT: |
668 | case BPF_JSLT: |
669 | if (same_reg) |
670 | bpf_op = JIT_JNOP; |
671 | break; |
672 | } |
673 | setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); |
674 | } |
675 | |
676 | /* Finish a PC-relative jump operation */ |
677 | int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) |
678 | { |
679 | /* Emit conditional branch delay slot */ |
680 | if (jit_op != JIT_JNOP) |
681 | emit(ctx, nop); |
682 | /* |
683 | * Emit an absolute long jump with delay slot, |
684 | * if the PC-relative branch was converted. |
685 | */ |
686 | if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { |
687 | int target = get_target(ctx, loc: ctx->bpf_index + bpf_off + 1); |
688 | |
689 | if (target < 0) |
690 | return -1; |
691 | emit(ctx, j, target); |
692 | emit(ctx, nop); |
693 | } |
694 | return 0; |
695 | } |
696 | |
697 | /* Jump immediate (32-bit) */ |
698 | void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) |
699 | { |
700 | switch (op) { |
701 | /* No-op, used internally for branch optimization */ |
702 | case JIT_JNOP: |
703 | break; |
704 | /* PC += off if dst & imm */ |
705 | case BPF_JSET: |
706 | emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); |
707 | emit(ctx, bnez, MIPS_R_T9, off); |
708 | break; |
709 | /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ |
710 | case JIT_JNSET: |
711 | emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); |
712 | emit(ctx, beqz, MIPS_R_T9, off); |
713 | break; |
714 | /* PC += off if dst > imm */ |
715 | case BPF_JGT: |
716 | emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); |
717 | emit(ctx, beqz, MIPS_R_T9, off); |
718 | break; |
719 | /* PC += off if dst >= imm */ |
720 | case BPF_JGE: |
721 | emit(ctx, sltiu, MIPS_R_T9, dst, imm); |
722 | emit(ctx, beqz, MIPS_R_T9, off); |
723 | break; |
724 | /* PC += off if dst < imm */ |
725 | case BPF_JLT: |
726 | emit(ctx, sltiu, MIPS_R_T9, dst, imm); |
727 | emit(ctx, bnez, MIPS_R_T9, off); |
728 | break; |
729 | /* PC += off if dst <= imm */ |
730 | case BPF_JLE: |
731 | emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); |
732 | emit(ctx, bnez, MIPS_R_T9, off); |
733 | break; |
734 | /* PC += off if dst > imm (signed) */ |
735 | case BPF_JSGT: |
736 | emit(ctx, slti, MIPS_R_T9, dst, imm + 1); |
737 | emit(ctx, beqz, MIPS_R_T9, off); |
738 | break; |
739 | /* PC += off if dst >= imm (signed) */ |
740 | case BPF_JSGE: |
741 | emit(ctx, slti, MIPS_R_T9, dst, imm); |
742 | emit(ctx, beqz, MIPS_R_T9, off); |
743 | break; |
744 | /* PC += off if dst < imm (signed) */ |
745 | case BPF_JSLT: |
746 | emit(ctx, slti, MIPS_R_T9, dst, imm); |
747 | emit(ctx, bnez, MIPS_R_T9, off); |
748 | break; |
749 | /* PC += off if dst <= imm (signed) */ |
750 | case BPF_JSLE: |
751 | emit(ctx, slti, MIPS_R_T9, dst, imm + 1); |
752 | emit(ctx, bnez, MIPS_R_T9, off); |
753 | break; |
754 | } |
755 | } |
756 | |
757 | /* Jump register (32-bit) */ |
758 | void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) |
759 | { |
760 | switch (op) { |
761 | /* No-op, used internally for branch optimization */ |
762 | case JIT_JNOP: |
763 | break; |
764 | /* PC += off if dst == src */ |
765 | case BPF_JEQ: |
766 | emit(ctx, beq, dst, src, off); |
767 | break; |
768 | /* PC += off if dst != src */ |
769 | case BPF_JNE: |
770 | emit(ctx, bne, dst, src, off); |
771 | break; |
772 | /* PC += off if dst & src */ |
773 | case BPF_JSET: |
774 | emit(ctx, and, MIPS_R_T9, dst, src); |
775 | emit(ctx, bnez, MIPS_R_T9, off); |
776 | break; |
777 | /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ |
778 | case JIT_JNSET: |
779 | emit(ctx, and, MIPS_R_T9, dst, src); |
780 | emit(ctx, beqz, MIPS_R_T9, off); |
781 | break; |
782 | /* PC += off if dst > src */ |
783 | case BPF_JGT: |
784 | emit(ctx, sltu, MIPS_R_T9, src, dst); |
785 | emit(ctx, bnez, MIPS_R_T9, off); |
786 | break; |
787 | /* PC += off if dst >= src */ |
788 | case BPF_JGE: |
789 | emit(ctx, sltu, MIPS_R_T9, dst, src); |
790 | emit(ctx, beqz, MIPS_R_T9, off); |
791 | break; |
792 | /* PC += off if dst < src */ |
793 | case BPF_JLT: |
794 | emit(ctx, sltu, MIPS_R_T9, dst, src); |
795 | emit(ctx, bnez, MIPS_R_T9, off); |
796 | break; |
797 | /* PC += off if dst <= src */ |
798 | case BPF_JLE: |
799 | emit(ctx, sltu, MIPS_R_T9, src, dst); |
800 | emit(ctx, beqz, MIPS_R_T9, off); |
801 | break; |
802 | /* PC += off if dst > src (signed) */ |
803 | case BPF_JSGT: |
804 | emit(ctx, slt, MIPS_R_T9, src, dst); |
805 | emit(ctx, bnez, MIPS_R_T9, off); |
806 | break; |
807 | /* PC += off if dst >= src (signed) */ |
808 | case BPF_JSGE: |
809 | emit(ctx, slt, MIPS_R_T9, dst, src); |
810 | emit(ctx, beqz, MIPS_R_T9, off); |
811 | break; |
812 | /* PC += off if dst < src (signed) */ |
813 | case BPF_JSLT: |
814 | emit(ctx, slt, MIPS_R_T9, dst, src); |
815 | emit(ctx, bnez, MIPS_R_T9, off); |
816 | break; |
817 | /* PC += off if dst <= src (signed) */ |
818 | case BPF_JSLE: |
819 | emit(ctx, slt, MIPS_R_T9, src, dst); |
820 | emit(ctx, beqz, MIPS_R_T9, off); |
821 | break; |
822 | } |
823 | } |
824 | |
825 | /* Jump always */ |
826 | int emit_ja(struct jit_context *ctx, s16 off) |
827 | { |
828 | int target = get_target(ctx, loc: ctx->bpf_index + off + 1); |
829 | |
830 | if (target < 0) |
831 | return -1; |
832 | emit(ctx, j, target); |
833 | emit(ctx, nop); |
834 | return 0; |
835 | } |
836 | |
837 | /* Jump to epilogue */ |
838 | int emit_exit(struct jit_context *ctx) |
839 | { |
840 | int target = get_target(ctx, loc: ctx->program->len); |
841 | |
842 | if (target < 0) |
843 | return -1; |
844 | emit(ctx, j, target); |
845 | emit(ctx, nop); |
846 | return 0; |
847 | } |
848 | |
849 | /* Build the program body from eBPF bytecode */ |
850 | static int build_body(struct jit_context *ctx) |
851 | { |
852 | const struct bpf_prog *prog = ctx->program; |
853 | unsigned int i; |
854 | |
855 | ctx->stack_used = 0; |
856 | for (i = 0; i < prog->len; i++) { |
857 | const struct bpf_insn *insn = &prog->insnsi[i]; |
858 | u32 *descp = &ctx->descriptors[i]; |
859 | int ret; |
860 | |
861 | access_reg(ctx, reg: insn->src_reg); |
862 | access_reg(ctx, reg: insn->dst_reg); |
863 | |
864 | ctx->bpf_index = i; |
865 | if (ctx->target == NULL) { |
866 | ctx->changes += INDEX(*descp) != ctx->jit_index; |
867 | *descp &= JIT_DESC_CONVERT; |
868 | *descp |= ctx->jit_index; |
869 | } |
870 | |
871 | ret = build_insn(insn, ctx); |
872 | if (ret < 0) |
873 | return ret; |
874 | |
875 | if (ret > 0) { |
876 | i++; |
877 | if (ctx->target == NULL) |
878 | descp[1] = ctx->jit_index; |
879 | } |
880 | } |
881 | |
882 | /* Store the end offset, where the epilogue begins */ |
883 | ctx->descriptors[prog->len] = ctx->jit_index; |
884 | return 0; |
885 | } |
886 | |
887 | /* Set the branch conversion flag on all instructions */ |
888 | static void set_convert_flag(struct jit_context *ctx, bool enable) |
889 | { |
890 | const struct bpf_prog *prog = ctx->program; |
891 | u32 flag = enable ? JIT_DESC_CONVERT : 0; |
892 | unsigned int i; |
893 | |
894 | for (i = 0; i <= prog->len; i++) |
895 | ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; |
896 | } |
897 | |
898 | static void jit_fill_hole(void *area, unsigned int size) |
899 | { |
900 | u32 *p; |
901 | |
902 | /* We are guaranteed to have aligned memory. */ |
903 | for (p = area; size >= sizeof(u32); size -= sizeof(u32)) |
904 | uasm_i_break(&p, BRK_BUG); /* Increments p */ |
905 | } |
906 | |
907 | bool bpf_jit_needs_zext(void) |
908 | { |
909 | return true; |
910 | } |
911 | |
912 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) |
913 | { |
914 | struct bpf_prog *tmp, *orig_prog = prog; |
915 | struct bpf_binary_header * = NULL; |
916 | struct jit_context ctx; |
917 | bool tmp_blinded = false; |
918 | unsigned int tmp_idx; |
919 | unsigned int image_size; |
920 | u8 *image_ptr; |
921 | int tries; |
922 | |
923 | /* |
924 | * If BPF JIT was not enabled then we must fall back to |
925 | * the interpreter. |
926 | */ |
927 | if (!prog->jit_requested) |
928 | return orig_prog; |
929 | /* |
930 | * If constant blinding was enabled and we failed during blinding |
931 | * then we must fall back to the interpreter. Otherwise, we save |
932 | * the new JITed code. |
933 | */ |
934 | tmp = bpf_jit_blind_constants(fp: prog); |
935 | if (IS_ERR(ptr: tmp)) |
936 | return orig_prog; |
937 | if (tmp != prog) { |
938 | tmp_blinded = true; |
939 | prog = tmp; |
940 | } |
941 | |
942 | memset(&ctx, 0, sizeof(ctx)); |
943 | ctx.program = prog; |
944 | |
945 | /* |
946 | * Not able to allocate memory for descriptors[], then |
947 | * we must fall back to the interpreter |
948 | */ |
949 | ctx.descriptors = kcalloc(n: prog->len + 1, size: sizeof(*ctx.descriptors), |
950 | GFP_KERNEL); |
951 | if (ctx.descriptors == NULL) |
952 | goto out_err; |
953 | |
954 | /* First pass discovers used resources */ |
955 | if (build_body(ctx: &ctx) < 0) |
956 | goto out_err; |
957 | /* |
958 | * Second pass computes instruction offsets. |
959 | * If any PC-relative branches are out of range, a sequence of |
960 | * a PC-relative branch + a jump is generated, and we have to |
961 | * try again from the beginning to generate the new offsets. |
962 | * This is done until no additional conversions are necessary. |
963 | * The last two iterations are done with all branches being |
964 | * converted, to guarantee offset table convergence within a |
965 | * fixed number of iterations. |
966 | */ |
967 | ctx.jit_index = 0; |
968 | build_prologue(ctx: &ctx); |
969 | tmp_idx = ctx.jit_index; |
970 | |
971 | tries = JIT_MAX_ITERATIONS; |
972 | do { |
973 | ctx.jit_index = tmp_idx; |
974 | ctx.changes = 0; |
975 | if (tries == 2) |
976 | set_convert_flag(ctx: &ctx, enable: true); |
977 | if (build_body(ctx: &ctx) < 0) |
978 | goto out_err; |
979 | } while (ctx.changes > 0 && --tries > 0); |
980 | |
981 | if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge" )) |
982 | goto out_err; |
983 | |
984 | build_epilogue(ctx: &ctx, MIPS_R_RA); |
985 | |
986 | /* Now we know the size of the structure to make */ |
987 | image_size = sizeof(u32) * ctx.jit_index; |
988 | header = bpf_jit_binary_alloc(proglen: image_size, image_ptr: &image_ptr, |
989 | alignment: sizeof(u32), bpf_fill_ill_insns: jit_fill_hole); |
990 | /* |
991 | * Not able to allocate memory for the structure then |
992 | * we must fall back to the interpretation |
993 | */ |
994 | if (header == NULL) |
995 | goto out_err; |
996 | |
997 | /* Actual pass to generate final JIT code */ |
998 | ctx.target = (u32 *)image_ptr; |
999 | ctx.jit_index = 0; |
1000 | |
1001 | /* |
1002 | * If building the JITed code fails somehow, |
1003 | * we fall back to the interpretation. |
1004 | */ |
1005 | build_prologue(ctx: &ctx); |
1006 | if (build_body(ctx: &ctx) < 0) |
1007 | goto out_err; |
1008 | build_epilogue(ctx: &ctx, MIPS_R_RA); |
1009 | |
1010 | /* Populate line info meta data */ |
1011 | set_convert_flag(ctx: &ctx, enable: false); |
1012 | bpf_prog_fill_jited_linfo(prog, insn_to_jit_off: &ctx.descriptors[1]); |
1013 | |
1014 | /* Set as read-only exec and flush instruction cache */ |
1015 | bpf_jit_binary_lock_ro(hdr: header); |
1016 | flush_icache_range(start: (unsigned long)header, |
1017 | end: (unsigned long)&ctx.target[ctx.jit_index]); |
1018 | |
1019 | if (bpf_jit_enable > 1) |
1020 | bpf_jit_dump(flen: prog->len, proglen: image_size, pass: 2, image: ctx.target); |
1021 | |
1022 | prog->bpf_func = (void *)ctx.target; |
1023 | prog->jited = 1; |
1024 | prog->jited_len = image_size; |
1025 | |
1026 | out: |
1027 | if (tmp_blinded) |
1028 | bpf_jit_prog_release_other(fp: prog, fp_other: prog == orig_prog ? |
1029 | tmp : orig_prog); |
1030 | kfree(objp: ctx.descriptors); |
1031 | return prog; |
1032 | |
1033 | out_err: |
1034 | prog = orig_prog; |
1035 | if (header) |
1036 | bpf_jit_binary_free(hdr: header); |
1037 | goto out; |
1038 | } |
1039 | |