1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Just-In-Time compiler for eBPF bytecode on MIPS. |
4 | * Implementation of JIT functions for 32-bit CPUs. |
5 | * |
6 | * Copyright (c) 2021 Anyfi Networks AB. |
7 | * Author: Johan Almbladh <johan.almbladh@gmail.com> |
8 | * |
9 | * Based on code and ideas from |
10 | * Copyright (c) 2017 Cavium, Inc. |
11 | * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> |
12 | * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> |
13 | */ |
14 | |
15 | #include <linux/math64.h> |
16 | #include <linux/errno.h> |
17 | #include <linux/filter.h> |
18 | #include <linux/bpf.h> |
19 | #include <asm/cpu-features.h> |
20 | #include <asm/isa-rev.h> |
21 | #include <asm/uasm.h> |
22 | |
23 | #include "bpf_jit_comp.h" |
24 | |
25 | /* MIPS a4-a7 are not available in the o32 ABI */ |
26 | #undef MIPS_R_A4 |
27 | #undef MIPS_R_A5 |
28 | #undef MIPS_R_A6 |
29 | #undef MIPS_R_A7 |
30 | |
31 | /* Stack is 8-byte aligned in o32 ABI */ |
32 | #define MIPS_STACK_ALIGNMENT 8 |
33 | |
34 | /* |
35 | * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI. |
36 | * This corresponds to stack space for register arguments a0-a3. |
37 | */ |
38 | #define JIT_RESERVED_STACK 16 |
39 | |
40 | /* Temporary 64-bit register used by JIT */ |
41 | #define JIT_REG_TMP MAX_BPF_JIT_REG |
42 | |
43 | /* |
44 | * Number of prologue bytes to skip when doing a tail call. |
45 | * Tail call count (TCC) initialization (8 bytes) always, plus |
46 | * R0-to-v0 assignment (4 bytes) if big endian. |
47 | */ |
48 | #ifdef __BIG_ENDIAN |
49 | #define JIT_TCALL_SKIP 12 |
50 | #else |
51 | #define JIT_TCALL_SKIP 8 |
52 | #endif |
53 | |
54 | /* CPU registers holding the callee return value */ |
55 | #define JIT_RETURN_REGS \ |
56 | (BIT(MIPS_R_V0) | \ |
57 | BIT(MIPS_R_V1)) |
58 | |
59 | /* CPU registers arguments passed to callee directly */ |
60 | #define JIT_ARG_REGS \ |
61 | (BIT(MIPS_R_A0) | \ |
62 | BIT(MIPS_R_A1) | \ |
63 | BIT(MIPS_R_A2) | \ |
64 | BIT(MIPS_R_A3)) |
65 | |
66 | /* CPU register arguments passed to callee on stack */ |
67 | #define JIT_STACK_REGS \ |
68 | (BIT(MIPS_R_T0) | \ |
69 | BIT(MIPS_R_T1) | \ |
70 | BIT(MIPS_R_T2) | \ |
71 | BIT(MIPS_R_T3) | \ |
72 | BIT(MIPS_R_T4) | \ |
73 | BIT(MIPS_R_T5)) |
74 | |
75 | /* Caller-saved CPU registers */ |
76 | #define JIT_CALLER_REGS \ |
77 | (JIT_RETURN_REGS | \ |
78 | JIT_ARG_REGS | \ |
79 | JIT_STACK_REGS) |
80 | |
81 | /* Callee-saved CPU registers */ |
82 | #define JIT_CALLEE_REGS \ |
83 | (BIT(MIPS_R_S0) | \ |
84 | BIT(MIPS_R_S1) | \ |
85 | BIT(MIPS_R_S2) | \ |
86 | BIT(MIPS_R_S3) | \ |
87 | BIT(MIPS_R_S4) | \ |
88 | BIT(MIPS_R_S5) | \ |
89 | BIT(MIPS_R_S6) | \ |
90 | BIT(MIPS_R_S7) | \ |
91 | BIT(MIPS_R_GP) | \ |
92 | BIT(MIPS_R_FP) | \ |
93 | BIT(MIPS_R_RA)) |
94 | |
95 | /* |
96 | * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers. |
97 | * |
98 | * 1) Native register pairs are ordered according to CPU endianness, following |
99 | * the MIPS convention for passing 64-bit arguments and return values. |
100 | * 2) The eBPF return value, arguments and callee-saved registers are mapped |
101 | * to their native MIPS equivalents. |
102 | * 3) Since the 32 highest bits in the eBPF FP register are always zero, |
103 | * only one general-purpose register is actually needed for the mapping. |
104 | * We use the fp register for this purpose, and map the highest bits to |
105 | * the MIPS register r0 (zero). |
106 | * 4) We use the MIPS gp and at registers as internal temporary registers |
107 | * for constant blinding. The gp register is callee-saved. |
108 | * 5) One 64-bit temporary register is mapped for use when sign-extending |
109 | * immediate operands. MIPS registers t6-t9 are available to the JIT |
110 | * for as temporaries when implementing complex 64-bit operations. |
111 | * |
112 | * With this scheme all eBPF registers are being mapped to native MIPS |
113 | * registers without having to use any stack scratch space. The direct |
114 | * register mapping (2) simplifies the handling of function calls. |
115 | */ |
116 | static const u8 bpf2mips32[][2] = { |
117 | /* Return value from in-kernel function, and exit value from eBPF */ |
118 | [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0}, |
119 | /* Arguments from eBPF program to in-kernel function */ |
120 | [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0}, |
121 | [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2}, |
122 | /* Remaining arguments, to be passed on the stack per O32 ABI */ |
123 | [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0}, |
124 | [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2}, |
125 | [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4}, |
126 | /* Callee-saved registers that in-kernel function will preserve */ |
127 | [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0}, |
128 | [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2}, |
129 | [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4}, |
130 | [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6}, |
131 | /* Read-only frame pointer to access the eBPF stack */ |
132 | #ifdef __BIG_ENDIAN |
133 | [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO}, |
134 | #else |
135 | [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP}, |
136 | #endif |
137 | /* Temporary register for blinding constants */ |
138 | [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT}, |
139 | /* Temporary register for internal JIT use */ |
140 | [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6}, |
141 | }; |
142 | |
143 | /* Get low CPU register for a 64-bit eBPF register mapping */ |
144 | static inline u8 lo(const u8 reg[]) |
145 | { |
146 | #ifdef __BIG_ENDIAN |
147 | return reg[0]; |
148 | #else |
149 | return reg[1]; |
150 | #endif |
151 | } |
152 | |
153 | /* Get high CPU register for a 64-bit eBPF register mapping */ |
154 | static inline u8 hi(const u8 reg[]) |
155 | { |
156 | #ifdef __BIG_ENDIAN |
157 | return reg[1]; |
158 | #else |
159 | return reg[0]; |
160 | #endif |
161 | } |
162 | |
163 | /* |
164 | * Mark a 64-bit CPU register pair as clobbered, it needs to be |
165 | * saved/restored by the program if callee-saved. |
166 | */ |
167 | static void clobber_reg64(struct jit_context *ctx, const u8 reg[]) |
168 | { |
169 | clobber_reg(ctx, reg: reg[0]); |
170 | clobber_reg(ctx, reg: reg[1]); |
171 | } |
172 | |
173 | /* dst = imm (sign-extended) */ |
174 | static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm) |
175 | { |
176 | emit_mov_i(ctx, dst: lo(reg: dst), imm); |
177 | if (imm < 0) |
178 | emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); |
179 | else |
180 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
181 | clobber_reg64(ctx, reg: dst); |
182 | } |
183 | |
184 | /* Zero extension, if verifier does not do it for us */ |
185 | static void emit_zext_ver(struct jit_context *ctx, const u8 dst[]) |
186 | { |
187 | if (!ctx->program->aux->verifier_zext) { |
188 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
189 | clobber_reg(ctx, reg: hi(reg: dst)); |
190 | } |
191 | } |
192 | |
193 | /* Load delay slot, if ISA mandates it */ |
194 | static void emit_load_delay(struct jit_context *ctx) |
195 | { |
196 | if (!cpu_has_mips_2_3_4_5_r) |
197 | emit(ctx, nop); |
198 | } |
199 | |
200 | /* ALU immediate operation (64-bit) */ |
201 | static void emit_alu_i64(struct jit_context *ctx, |
202 | const u8 dst[], s32 imm, u8 op) |
203 | { |
204 | u8 src = MIPS_R_T6; |
205 | |
206 | /* |
207 | * ADD/SUB with all but the max negative imm can be handled by |
208 | * inverting the operation and the imm value, saving one insn. |
209 | */ |
210 | if (imm > S32_MIN && imm < 0) |
211 | switch (op) { |
212 | case BPF_ADD: |
213 | op = BPF_SUB; |
214 | imm = -imm; |
215 | break; |
216 | case BPF_SUB: |
217 | op = BPF_ADD; |
218 | imm = -imm; |
219 | break; |
220 | } |
221 | |
222 | /* Move immediate to temporary register */ |
223 | emit_mov_i(ctx, dst: src, imm); |
224 | |
225 | switch (op) { |
226 | /* dst = dst + imm */ |
227 | case BPF_ADD: |
228 | emit(ctx, addu, lo(dst), lo(dst), src); |
229 | emit(ctx, sltu, MIPS_R_T9, lo(dst), src); |
230 | emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); |
231 | if (imm < 0) |
232 | emit(ctx, addiu, hi(dst), hi(dst), -1); |
233 | break; |
234 | /* dst = dst - imm */ |
235 | case BPF_SUB: |
236 | emit(ctx, sltu, MIPS_R_T9, lo(dst), src); |
237 | emit(ctx, subu, lo(dst), lo(dst), src); |
238 | emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); |
239 | if (imm < 0) |
240 | emit(ctx, addiu, hi(dst), hi(dst), 1); |
241 | break; |
242 | /* dst = dst | imm */ |
243 | case BPF_OR: |
244 | emit(ctx, or, lo(dst), lo(dst), src); |
245 | if (imm < 0) |
246 | emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); |
247 | break; |
248 | /* dst = dst & imm */ |
249 | case BPF_AND: |
250 | emit(ctx, and, lo(dst), lo(dst), src); |
251 | if (imm >= 0) |
252 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
253 | break; |
254 | /* dst = dst ^ imm */ |
255 | case BPF_XOR: |
256 | emit(ctx, xor, lo(dst), lo(dst), src); |
257 | if (imm < 0) { |
258 | emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); |
259 | emit(ctx, addiu, hi(dst), hi(dst), -1); |
260 | } |
261 | break; |
262 | } |
263 | clobber_reg64(ctx, reg: dst); |
264 | } |
265 | |
266 | /* ALU register operation (64-bit) */ |
267 | static void emit_alu_r64(struct jit_context *ctx, |
268 | const u8 dst[], const u8 src[], u8 op) |
269 | { |
270 | switch (BPF_OP(op)) { |
271 | /* dst = dst + src */ |
272 | case BPF_ADD: |
273 | if (src == dst) { |
274 | emit(ctx, srl, MIPS_R_T9, lo(dst), 31); |
275 | emit(ctx, addu, lo(dst), lo(dst), lo(dst)); |
276 | } else { |
277 | emit(ctx, addu, lo(dst), lo(dst), lo(src)); |
278 | emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); |
279 | } |
280 | emit(ctx, addu, hi(dst), hi(dst), hi(src)); |
281 | emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); |
282 | break; |
283 | /* dst = dst - src */ |
284 | case BPF_SUB: |
285 | emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); |
286 | emit(ctx, subu, lo(dst), lo(dst), lo(src)); |
287 | emit(ctx, subu, hi(dst), hi(dst), hi(src)); |
288 | emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); |
289 | break; |
290 | /* dst = dst | src */ |
291 | case BPF_OR: |
292 | emit(ctx, or, lo(dst), lo(dst), lo(src)); |
293 | emit(ctx, or, hi(dst), hi(dst), hi(src)); |
294 | break; |
295 | /* dst = dst & src */ |
296 | case BPF_AND: |
297 | emit(ctx, and, lo(dst), lo(dst), lo(src)); |
298 | emit(ctx, and, hi(dst), hi(dst), hi(src)); |
299 | break; |
300 | /* dst = dst ^ src */ |
301 | case BPF_XOR: |
302 | emit(ctx, xor, lo(dst), lo(dst), lo(src)); |
303 | emit(ctx, xor, hi(dst), hi(dst), hi(src)); |
304 | break; |
305 | } |
306 | clobber_reg64(ctx, reg: dst); |
307 | } |
308 | |
309 | /* ALU invert (64-bit) */ |
310 | static void emit_neg_i64(struct jit_context *ctx, const u8 dst[]) |
311 | { |
312 | emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst)); |
313 | emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst)); |
314 | emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); |
315 | emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); |
316 | |
317 | clobber_reg64(ctx, reg: dst); |
318 | } |
319 | |
320 | /* ALU shift immediate (64-bit) */ |
321 | static void emit_shift_i64(struct jit_context *ctx, |
322 | const u8 dst[], u32 imm, u8 op) |
323 | { |
324 | switch (BPF_OP(op)) { |
325 | /* dst = dst << imm */ |
326 | case BPF_LSH: |
327 | if (imm < 32) { |
328 | emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm); |
329 | emit(ctx, sll, lo(dst), lo(dst), imm); |
330 | emit(ctx, sll, hi(dst), hi(dst), imm); |
331 | emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9); |
332 | } else { |
333 | emit(ctx, sll, hi(dst), lo(dst), imm - 32); |
334 | emit(ctx, move, lo(dst), MIPS_R_ZERO); |
335 | } |
336 | break; |
337 | /* dst = dst >> imm */ |
338 | case BPF_RSH: |
339 | if (imm < 32) { |
340 | emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); |
341 | emit(ctx, srl, lo(dst), lo(dst), imm); |
342 | emit(ctx, srl, hi(dst), hi(dst), imm); |
343 | emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); |
344 | } else { |
345 | emit(ctx, srl, lo(dst), hi(dst), imm - 32); |
346 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
347 | } |
348 | break; |
349 | /* dst = dst >> imm (arithmetic) */ |
350 | case BPF_ARSH: |
351 | if (imm < 32) { |
352 | emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); |
353 | emit(ctx, srl, lo(dst), lo(dst), imm); |
354 | emit(ctx, sra, hi(dst), hi(dst), imm); |
355 | emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); |
356 | } else { |
357 | emit(ctx, sra, lo(dst), hi(dst), imm - 32); |
358 | emit(ctx, sra, hi(dst), hi(dst), 31); |
359 | } |
360 | break; |
361 | } |
362 | clobber_reg64(ctx, reg: dst); |
363 | } |
364 | |
365 | /* ALU shift register (64-bit) */ |
366 | static void emit_shift_r64(struct jit_context *ctx, |
367 | const u8 dst[], u8 src, u8 op) |
368 | { |
369 | u8 t1 = MIPS_R_T8; |
370 | u8 t2 = MIPS_R_T9; |
371 | |
372 | emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */ |
373 | emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */ |
374 | emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */ |
375 | |
376 | switch (BPF_OP(op)) { |
377 | /* dst = dst << src */ |
378 | case BPF_LSH: |
379 | /* Next: shift >= 32 */ |
380 | emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */ |
381 | emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */ |
382 | emit(ctx, b, 20); /* PC += 20 */ |
383 | /* +16: shift < 32 */ |
384 | emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */ |
385 | emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */ |
386 | emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */ |
387 | emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */ |
388 | emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */ |
389 | break; |
390 | /* dst = dst >> src */ |
391 | case BPF_RSH: |
392 | /* Next: shift >= 32 */ |
393 | emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */ |
394 | emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */ |
395 | emit(ctx, b, 20); /* PC += 20 */ |
396 | /* +16: shift < 32 */ |
397 | emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ |
398 | emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ |
399 | emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */ |
400 | emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */ |
401 | emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ |
402 | break; |
403 | /* dst = dst >> src (arithmetic) */ |
404 | case BPF_ARSH: |
405 | /* Next: shift >= 32 */ |
406 | emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */ |
407 | emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */ |
408 | emit(ctx, b, 20); /* PC += 20 */ |
409 | /* +16: shift < 32 */ |
410 | emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ |
411 | emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ |
412 | emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */ |
413 | emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */ |
414 | emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ |
415 | break; |
416 | } |
417 | |
418 | /* +20: Done */ |
419 | clobber_reg64(ctx, reg: dst); |
420 | } |
421 | |
422 | /* ALU mul immediate (64x32-bit) */ |
423 | static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm) |
424 | { |
425 | u8 src = MIPS_R_T6; |
426 | u8 tmp = MIPS_R_T9; |
427 | |
428 | switch (imm) { |
429 | /* dst = dst * 1 is a no-op */ |
430 | case 1: |
431 | break; |
432 | /* dst = dst * -1 */ |
433 | case -1: |
434 | emit_neg_i64(ctx, dst); |
435 | break; |
436 | case 0: |
437 | emit_mov_r(ctx, dst: lo(reg: dst), MIPS_R_ZERO); |
438 | emit_mov_r(ctx, dst: hi(reg: dst), MIPS_R_ZERO); |
439 | break; |
440 | /* Full 64x32 multiply */ |
441 | default: |
442 | /* hi(dst) = hi(dst) * src(imm) */ |
443 | emit_mov_i(ctx, dst: src, imm); |
444 | if (cpu_has_mips32r1 || cpu_has_mips32r6) { |
445 | emit(ctx, mul, hi(dst), hi(dst), src); |
446 | } else { |
447 | emit(ctx, multu, hi(dst), src); |
448 | emit(ctx, mflo, hi(dst)); |
449 | } |
450 | |
451 | /* hi(dst) = hi(dst) - lo(dst) */ |
452 | if (imm < 0) |
453 | emit(ctx, subu, hi(dst), hi(dst), lo(dst)); |
454 | |
455 | /* tmp = lo(dst) * src(imm) >> 32 */ |
456 | /* lo(dst) = lo(dst) * src(imm) */ |
457 | if (cpu_has_mips32r6) { |
458 | emit(ctx, muhu, tmp, lo(dst), src); |
459 | emit(ctx, mulu, lo(dst), lo(dst), src); |
460 | } else { |
461 | emit(ctx, multu, lo(dst), src); |
462 | emit(ctx, mflo, lo(dst)); |
463 | emit(ctx, mfhi, tmp); |
464 | } |
465 | |
466 | /* hi(dst) += tmp */ |
467 | emit(ctx, addu, hi(dst), hi(dst), tmp); |
468 | clobber_reg64(ctx, reg: dst); |
469 | break; |
470 | } |
471 | } |
472 | |
473 | /* ALU mul register (64x64-bit) */ |
474 | static void emit_mul_r64(struct jit_context *ctx, |
475 | const u8 dst[], const u8 src[]) |
476 | { |
477 | u8 acc = MIPS_R_T8; |
478 | u8 tmp = MIPS_R_T9; |
479 | |
480 | /* acc = hi(dst) * lo(src) */ |
481 | if (cpu_has_mips32r1 || cpu_has_mips32r6) { |
482 | emit(ctx, mul, acc, hi(dst), lo(src)); |
483 | } else { |
484 | emit(ctx, multu, hi(dst), lo(src)); |
485 | emit(ctx, mflo, acc); |
486 | } |
487 | |
488 | /* tmp = lo(dst) * hi(src) */ |
489 | if (cpu_has_mips32r1 || cpu_has_mips32r6) { |
490 | emit(ctx, mul, tmp, lo(dst), hi(src)); |
491 | } else { |
492 | emit(ctx, multu, lo(dst), hi(src)); |
493 | emit(ctx, mflo, tmp); |
494 | } |
495 | |
496 | /* acc += tmp */ |
497 | emit(ctx, addu, acc, acc, tmp); |
498 | |
499 | /* tmp = lo(dst) * lo(src) >> 32 */ |
500 | /* lo(dst) = lo(dst) * lo(src) */ |
501 | if (cpu_has_mips32r6) { |
502 | emit(ctx, muhu, tmp, lo(dst), lo(src)); |
503 | emit(ctx, mulu, lo(dst), lo(dst), lo(src)); |
504 | } else { |
505 | emit(ctx, multu, lo(dst), lo(src)); |
506 | emit(ctx, mflo, lo(dst)); |
507 | emit(ctx, mfhi, tmp); |
508 | } |
509 | |
510 | /* hi(dst) = acc + tmp */ |
511 | emit(ctx, addu, hi(dst), acc, tmp); |
512 | clobber_reg64(ctx, reg: dst); |
513 | } |
514 | |
515 | /* Helper function for 64-bit modulo */ |
516 | static u64 jit_mod64(u64 a, u64 b) |
517 | { |
518 | u64 rem; |
519 | |
520 | div64_u64_rem(dividend: a, divisor: b, remainder: &rem); |
521 | return rem; |
522 | } |
523 | |
524 | /* ALU div/mod register (64-bit) */ |
525 | static void emit_divmod_r64(struct jit_context *ctx, |
526 | const u8 dst[], const u8 src[], u8 op) |
527 | { |
528 | const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ |
529 | const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ |
530 | const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */ |
531 | int exclude, k; |
532 | u32 addr = 0; |
533 | |
534 | /* Push caller-saved registers on stack */ |
535 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
536 | excl: 0, JIT_RESERVED_STACK); |
537 | |
538 | /* Put 64-bit arguments 1 and 2 in registers a0-a3 */ |
539 | for (k = 0; k < 2; k++) { |
540 | emit(ctx, move, MIPS_R_T9, src[k]); |
541 | emit(ctx, move, r1[k], dst[k]); |
542 | emit(ctx, move, r2[k], MIPS_R_T9); |
543 | } |
544 | |
545 | /* Emit function call */ |
546 | switch (BPF_OP(op)) { |
547 | /* dst = dst / src */ |
548 | case BPF_DIV: |
549 | addr = (u32)&div64_u64; |
550 | break; |
551 | /* dst = dst % src */ |
552 | case BPF_MOD: |
553 | addr = (u32)&jit_mod64; |
554 | break; |
555 | } |
556 | emit_mov_i(ctx, MIPS_R_T9, imm: addr); |
557 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
558 | emit(ctx, nop); /* Delay slot */ |
559 | |
560 | /* Store the 64-bit result in dst */ |
561 | emit(ctx, move, dst[0], r0[0]); |
562 | emit(ctx, move, dst[1], r0[1]); |
563 | |
564 | /* Restore caller-saved registers, excluding the computed result */ |
565 | exclude = BIT(lo(dst)) | BIT(hi(dst)); |
566 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
567 | excl: exclude, JIT_RESERVED_STACK); |
568 | emit_load_delay(ctx); |
569 | |
570 | clobber_reg64(ctx, reg: dst); |
571 | clobber_reg(ctx, MIPS_R_V0); |
572 | clobber_reg(ctx, MIPS_R_V1); |
573 | clobber_reg(ctx, MIPS_R_RA); |
574 | } |
575 | |
576 | /* Swap bytes in a register word */ |
577 | static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask) |
578 | { |
579 | u8 tmp = MIPS_R_T9; |
580 | |
581 | emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */ |
582 | emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ |
583 | emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */ |
584 | emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */ |
585 | emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ |
586 | } |
587 | |
588 | /* Swap half words in a register word */ |
589 | static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src) |
590 | { |
591 | u8 tmp = MIPS_R_T9; |
592 | |
593 | emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */ |
594 | emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */ |
595 | emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ |
596 | } |
597 | |
598 | /* Swap bytes and truncate a register double word, word or half word */ |
599 | static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width) |
600 | { |
601 | u8 tmp = MIPS_R_T8; |
602 | |
603 | switch (width) { |
604 | /* Swap bytes in a double word */ |
605 | case 64: |
606 | if (cpu_has_mips32r2 || cpu_has_mips32r6) { |
607 | emit(ctx, rotr, tmp, hi(dst), 16); |
608 | emit(ctx, rotr, hi(dst), lo(dst), 16); |
609 | emit(ctx, wsbh, lo(dst), tmp); |
610 | emit(ctx, wsbh, hi(dst), hi(dst)); |
611 | } else { |
612 | emit_swap16_r(ctx, dst: tmp, src: lo(reg: dst)); |
613 | emit_swap16_r(ctx, dst: lo(reg: dst), src: hi(reg: dst)); |
614 | emit(ctx, move, hi(dst), tmp); |
615 | |
616 | emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */ |
617 | emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */ |
618 | emit_swap8_r(ctx, dst: lo(reg: dst), src: lo(reg: dst), mask: tmp); |
619 | emit_swap8_r(ctx, dst: hi(reg: dst), src: hi(reg: dst), mask: tmp); |
620 | } |
621 | break; |
622 | /* Swap bytes in a word */ |
623 | /* Swap bytes in a half word */ |
624 | case 32: |
625 | case 16: |
626 | emit_bswap_r(ctx, dst: lo(reg: dst), width); |
627 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
628 | break; |
629 | } |
630 | clobber_reg64(ctx, reg: dst); |
631 | } |
632 | |
633 | /* Truncate a register double word, word or half word */ |
634 | static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width) |
635 | { |
636 | switch (width) { |
637 | case 64: |
638 | break; |
639 | /* Zero-extend a word */ |
640 | case 32: |
641 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
642 | clobber_reg(ctx, reg: hi(reg: dst)); |
643 | break; |
644 | /* Zero-extend a half word */ |
645 | case 16: |
646 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
647 | emit(ctx, andi, lo(dst), lo(dst), 0xffff); |
648 | clobber_reg64(ctx, reg: dst); |
649 | break; |
650 | } |
651 | } |
652 | |
653 | /* Load operation: dst = *(size*)(src + off) */ |
654 | static void emit_ldx(struct jit_context *ctx, |
655 | const u8 dst[], u8 src, s16 off, u8 size) |
656 | { |
657 | switch (size) { |
658 | /* Load a byte */ |
659 | case BPF_B: |
660 | emit(ctx, lbu, lo(dst), off, src); |
661 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
662 | break; |
663 | /* Load a half word */ |
664 | case BPF_H: |
665 | emit(ctx, lhu, lo(dst), off, src); |
666 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
667 | break; |
668 | /* Load a word */ |
669 | case BPF_W: |
670 | emit(ctx, lw, lo(dst), off, src); |
671 | emit(ctx, move, hi(dst), MIPS_R_ZERO); |
672 | break; |
673 | /* Load a double word */ |
674 | case BPF_DW: |
675 | if (dst[1] == src) { |
676 | emit(ctx, lw, dst[0], off + 4, src); |
677 | emit(ctx, lw, dst[1], off, src); |
678 | } else { |
679 | emit(ctx, lw, dst[1], off, src); |
680 | emit(ctx, lw, dst[0], off + 4, src); |
681 | } |
682 | emit_load_delay(ctx); |
683 | break; |
684 | } |
685 | clobber_reg64(ctx, reg: dst); |
686 | } |
687 | |
688 | /* Store operation: *(size *)(dst + off) = src */ |
689 | static void emit_stx(struct jit_context *ctx, |
690 | const u8 dst, const u8 src[], s16 off, u8 size) |
691 | { |
692 | switch (size) { |
693 | /* Store a byte */ |
694 | case BPF_B: |
695 | emit(ctx, sb, lo(src), off, dst); |
696 | break; |
697 | /* Store a half word */ |
698 | case BPF_H: |
699 | emit(ctx, sh, lo(src), off, dst); |
700 | break; |
701 | /* Store a word */ |
702 | case BPF_W: |
703 | emit(ctx, sw, lo(src), off, dst); |
704 | break; |
705 | /* Store a double word */ |
706 | case BPF_DW: |
707 | emit(ctx, sw, src[1], off, dst); |
708 | emit(ctx, sw, src[0], off + 4, dst); |
709 | break; |
710 | } |
711 | } |
712 | |
713 | /* Atomic read-modify-write (32-bit, non-ll/sc fallback) */ |
714 | static void emit_atomic_r32(struct jit_context *ctx, |
715 | u8 dst, u8 src, s16 off, u8 code) |
716 | { |
717 | u32 exclude = 0; |
718 | u32 addr = 0; |
719 | |
720 | /* Push caller-saved registers on stack */ |
721 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
722 | excl: 0, JIT_RESERVED_STACK); |
723 | /* |
724 | * Argument 1: dst+off if xchg, otherwise src, passed in register a0 |
725 | * Argument 2: src if xchg, otherwise dst+off, passed in register a1 |
726 | */ |
727 | emit(ctx, move, MIPS_R_T9, dst); |
728 | if (code == BPF_XCHG) { |
729 | emit(ctx, move, MIPS_R_A1, src); |
730 | emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off); |
731 | } else { |
732 | emit(ctx, move, MIPS_R_A0, src); |
733 | emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off); |
734 | } |
735 | |
736 | /* Emit function call */ |
737 | switch (code) { |
738 | case BPF_ADD: |
739 | addr = (u32)&atomic_add; |
740 | break; |
741 | case BPF_ADD | BPF_FETCH: |
742 | addr = (u32)&atomic_fetch_add; |
743 | break; |
744 | case BPF_SUB: |
745 | addr = (u32)&atomic_sub; |
746 | break; |
747 | case BPF_SUB | BPF_FETCH: |
748 | addr = (u32)&atomic_fetch_sub; |
749 | break; |
750 | case BPF_OR: |
751 | addr = (u32)&atomic_or; |
752 | break; |
753 | case BPF_OR | BPF_FETCH: |
754 | addr = (u32)&atomic_fetch_or; |
755 | break; |
756 | case BPF_AND: |
757 | addr = (u32)&atomic_and; |
758 | break; |
759 | case BPF_AND | BPF_FETCH: |
760 | addr = (u32)&atomic_fetch_and; |
761 | break; |
762 | case BPF_XOR: |
763 | addr = (u32)&atomic_xor; |
764 | break; |
765 | case BPF_XOR | BPF_FETCH: |
766 | addr = (u32)&atomic_fetch_xor; |
767 | break; |
768 | case BPF_XCHG: |
769 | addr = (u32)&atomic_xchg; |
770 | break; |
771 | } |
772 | emit_mov_i(ctx, MIPS_R_T9, imm: addr); |
773 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
774 | emit(ctx, nop); /* Delay slot */ |
775 | |
776 | /* Update src register with old value, if specified */ |
777 | if (code & BPF_FETCH) { |
778 | emit(ctx, move, src, MIPS_R_V0); |
779 | exclude = BIT(src); |
780 | clobber_reg(ctx, reg: src); |
781 | } |
782 | |
783 | /* Restore caller-saved registers, except any fetched value */ |
784 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
785 | excl: exclude, JIT_RESERVED_STACK); |
786 | emit_load_delay(ctx); |
787 | clobber_reg(ctx, MIPS_R_RA); |
788 | } |
789 | |
790 | /* Helper function for 64-bit atomic exchange */ |
791 | static s64 jit_xchg64(s64 a, atomic64_t *v) |
792 | { |
793 | return atomic64_xchg(v, new: a); |
794 | } |
795 | |
796 | /* Atomic read-modify-write (64-bit) */ |
797 | static void emit_atomic_r64(struct jit_context *ctx, |
798 | u8 dst, const u8 src[], s16 off, u8 code) |
799 | { |
800 | const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ |
801 | const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ |
802 | u32 exclude = 0; |
803 | u32 addr = 0; |
804 | |
805 | /* Push caller-saved registers on stack */ |
806 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
807 | excl: 0, JIT_RESERVED_STACK); |
808 | /* |
809 | * Argument 1: 64-bit src, passed in registers a0-a1 |
810 | * Argument 2: 32-bit dst+off, passed in register a2 |
811 | */ |
812 | emit(ctx, move, MIPS_R_T9, dst); |
813 | emit(ctx, move, r1[0], src[0]); |
814 | emit(ctx, move, r1[1], src[1]); |
815 | emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off); |
816 | |
817 | /* Emit function call */ |
818 | switch (code) { |
819 | case BPF_ADD: |
820 | addr = (u32)&atomic64_add; |
821 | break; |
822 | case BPF_ADD | BPF_FETCH: |
823 | addr = (u32)&atomic64_fetch_add; |
824 | break; |
825 | case BPF_SUB: |
826 | addr = (u32)&atomic64_sub; |
827 | break; |
828 | case BPF_SUB | BPF_FETCH: |
829 | addr = (u32)&atomic64_fetch_sub; |
830 | break; |
831 | case BPF_OR: |
832 | addr = (u32)&atomic64_or; |
833 | break; |
834 | case BPF_OR | BPF_FETCH: |
835 | addr = (u32)&atomic64_fetch_or; |
836 | break; |
837 | case BPF_AND: |
838 | addr = (u32)&atomic64_and; |
839 | break; |
840 | case BPF_AND | BPF_FETCH: |
841 | addr = (u32)&atomic64_fetch_and; |
842 | break; |
843 | case BPF_XOR: |
844 | addr = (u32)&atomic64_xor; |
845 | break; |
846 | case BPF_XOR | BPF_FETCH: |
847 | addr = (u32)&atomic64_fetch_xor; |
848 | break; |
849 | case BPF_XCHG: |
850 | addr = (u32)&jit_xchg64; |
851 | break; |
852 | } |
853 | emit_mov_i(ctx, MIPS_R_T9, imm: addr); |
854 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
855 | emit(ctx, nop); /* Delay slot */ |
856 | |
857 | /* Update src register with old value, if specified */ |
858 | if (code & BPF_FETCH) { |
859 | emit(ctx, move, lo(src), lo(r0)); |
860 | emit(ctx, move, hi(src), hi(r0)); |
861 | exclude = BIT(src[0]) | BIT(src[1]); |
862 | clobber_reg64(ctx, reg: src); |
863 | } |
864 | |
865 | /* Restore caller-saved registers, except any fetched value */ |
866 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
867 | excl: exclude, JIT_RESERVED_STACK); |
868 | emit_load_delay(ctx); |
869 | clobber_reg(ctx, MIPS_R_RA); |
870 | } |
871 | |
872 | /* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */ |
873 | static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off) |
874 | { |
875 | const u8 *r0 = bpf2mips32[BPF_REG_0]; |
876 | |
877 | /* Push caller-saved registers on stack */ |
878 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
879 | JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); |
880 | /* |
881 | * Argument 1: 32-bit dst+off, passed in register a0 |
882 | * Argument 2: 32-bit r0, passed in register a1 |
883 | * Argument 3: 32-bit src, passed in register a2 |
884 | */ |
885 | emit(ctx, addiu, MIPS_R_T9, dst, off); |
886 | emit(ctx, move, MIPS_R_T8, src); |
887 | emit(ctx, move, MIPS_R_A1, lo(r0)); |
888 | emit(ctx, move, MIPS_R_A0, MIPS_R_T9); |
889 | emit(ctx, move, MIPS_R_A2, MIPS_R_T8); |
890 | |
891 | /* Emit function call */ |
892 | emit_mov_i(ctx, MIPS_R_T9, imm: (u32)&atomic_cmpxchg); |
893 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
894 | emit(ctx, nop); /* Delay slot */ |
895 | |
896 | #ifdef __BIG_ENDIAN |
897 | emit(ctx, move, lo(r0), MIPS_R_V0); |
898 | #endif |
899 | /* Restore caller-saved registers, except the return value */ |
900 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
901 | JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); |
902 | emit_load_delay(ctx); |
903 | clobber_reg(ctx, MIPS_R_V0); |
904 | clobber_reg(ctx, MIPS_R_V1); |
905 | clobber_reg(ctx, MIPS_R_RA); |
906 | } |
907 | |
908 | /* Atomic compare-and-exchange (64-bit) */ |
909 | static void emit_cmpxchg_r64(struct jit_context *ctx, |
910 | u8 dst, const u8 src[], s16 off) |
911 | { |
912 | const u8 *r0 = bpf2mips32[BPF_REG_0]; |
913 | const u8 *r2 = bpf2mips32[BPF_REG_2]; |
914 | |
915 | /* Push caller-saved registers on stack */ |
916 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
917 | JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); |
918 | /* |
919 | * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused) |
920 | * Argument 2: 64-bit r0, passed in registers a2-a3 |
921 | * Argument 3: 64-bit src, passed on stack |
922 | */ |
923 | push_regs(ctx, BIT(src[0]) | BIT(src[1]), excl: 0, JIT_RESERVED_STACK); |
924 | emit(ctx, addiu, MIPS_R_T9, dst, off); |
925 | emit(ctx, move, r2[0], r0[0]); |
926 | emit(ctx, move, r2[1], r0[1]); |
927 | emit(ctx, move, MIPS_R_A0, MIPS_R_T9); |
928 | |
929 | /* Emit function call */ |
930 | emit_mov_i(ctx, MIPS_R_T9, imm: (u32)&atomic64_cmpxchg); |
931 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
932 | emit(ctx, nop); /* Delay slot */ |
933 | |
934 | /* Restore caller-saved registers, except the return value */ |
935 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLER_REGS, |
936 | JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); |
937 | emit_load_delay(ctx); |
938 | clobber_reg(ctx, MIPS_R_V0); |
939 | clobber_reg(ctx, MIPS_R_V1); |
940 | clobber_reg(ctx, MIPS_R_RA); |
941 | } |
942 | |
943 | /* |
944 | * Conditional movz or an emulated equivalent. |
945 | * Note that the rs register may be modified. |
946 | */ |
947 | static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) |
948 | { |
949 | if (cpu_has_mips_2) { |
950 | emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */ |
951 | } else if (cpu_has_mips32r6) { |
952 | if (rs != MIPS_R_ZERO) |
953 | emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */ |
954 | emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */ |
955 | if (rs != MIPS_R_ZERO) |
956 | emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ |
957 | } else { |
958 | emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */ |
959 | emit(ctx, nop); /* +0: delay slot */ |
960 | emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ |
961 | } |
962 | clobber_reg(ctx, reg: rd); |
963 | clobber_reg(ctx, reg: rs); |
964 | } |
965 | |
966 | /* |
967 | * Conditional movn or an emulated equivalent. |
968 | * Note that the rs register may be modified. |
969 | */ |
970 | static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) |
971 | { |
972 | if (cpu_has_mips_2) { |
973 | emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */ |
974 | } else if (cpu_has_mips32r6) { |
975 | if (rs != MIPS_R_ZERO) |
976 | emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */ |
977 | emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */ |
978 | if (rs != MIPS_R_ZERO) |
979 | emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ |
980 | } else { |
981 | emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */ |
982 | emit(ctx, nop); /* +0: delay slot */ |
983 | emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ |
984 | } |
985 | clobber_reg(ctx, reg: rd); |
986 | clobber_reg(ctx, reg: rs); |
987 | } |
988 | |
989 | /* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */ |
990 | static void emit_sltiu_r64(struct jit_context *ctx, u8 rd, |
991 | const u8 rs[], s64 imm) |
992 | { |
993 | u8 tmp = MIPS_R_T9; |
994 | |
995 | if (imm < 0) { |
996 | emit_mov_i(ctx, dst: rd, imm); /* rd = imm */ |
997 | emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ |
998 | emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */ |
999 | emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ |
1000 | } else { /* imm >= 0 */ |
1001 | if (imm > 0x7fff) { |
1002 | emit_mov_i(ctx, dst: rd, imm: (s32)imm); /* rd = imm */ |
1003 | emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ |
1004 | } else { |
1005 | emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */ |
1006 | } |
1007 | emit_movn_r(ctx, rd, MIPS_R_ZERO, rt: hi(reg: rs)); /* rd = 0 if rsh */ |
1008 | } |
1009 | } |
1010 | |
1011 | /* Emulation of 64-bit sltu rd, rs, rt */ |
1012 | static void emit_sltu_r64(struct jit_context *ctx, u8 rd, |
1013 | const u8 rs[], const u8 rt[]) |
1014 | { |
1015 | u8 tmp = MIPS_R_T9; |
1016 | |
1017 | emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */ |
1018 | emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */ |
1019 | emit_movn_r(ctx, rd, MIPS_R_ZERO, rt: tmp); /* rd = 0 if tmp != 0 */ |
1020 | emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */ |
1021 | emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ |
1022 | } |
1023 | |
1024 | /* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */ |
1025 | static void emit_slti_r64(struct jit_context *ctx, u8 rd, |
1026 | const u8 rs[], s64 imm) |
1027 | { |
1028 | u8 t1 = MIPS_R_T8; |
1029 | u8 t2 = MIPS_R_T9; |
1030 | u8 cmp; |
1031 | |
1032 | /* |
1033 | * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl |
1034 | * else t1 = rsl <u imm |
1035 | */ |
1036 | emit_mov_i(ctx, dst: rd, imm: (s32)imm); |
1037 | emit(ctx, sltu, t1, lo(rs), rd); /* t1 = rsl <u imm */ |
1038 | emit(ctx, sltu, t2, rd, lo(rs)); /* t2 = imm <u rsl */ |
1039 | emit(ctx, srl, rd, hi(rs), 31); /* rd = rsh >> 31 */ |
1040 | if (imm < 0) |
1041 | emit_movz_r(ctx, rd: t1, rs: t2, rt: rd); /* t1 = rd ? t1 : t2 */ |
1042 | else |
1043 | emit_movn_r(ctx, rd: t1, rs: t2, rt: rd); /* t1 = rd ? t2 : t1 */ |
1044 | /* |
1045 | * if ((imm < 0 && rsh != 0xffffffff) || |
1046 | * (imm >= 0 && rsh != 0)) |
1047 | * t1 = 0 |
1048 | */ |
1049 | if (imm < 0) { |
1050 | emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */ |
1051 | cmp = rd; |
1052 | } else { /* imm >= 0 */ |
1053 | cmp = hi(reg: rs); |
1054 | } |
1055 | emit_movn_r(ctx, rd: t1, MIPS_R_ZERO, rt: cmp); /* t1 = 0 if cmp != 0 */ |
1056 | |
1057 | /* |
1058 | * if (imm < 0) rd = rsh < -1 |
1059 | * else rd = rsh != 0 |
1060 | * rd = rd | t1 |
1061 | */ |
1062 | emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */ |
1063 | emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ |
1064 | } |
1065 | |
1066 | /* Emulation of 64-bit(slt rd, rs, rt) */ |
1067 | static void emit_slt_r64(struct jit_context *ctx, u8 rd, |
1068 | const u8 rs[], const u8 rt[]) |
1069 | { |
1070 | u8 t1 = MIPS_R_T7; |
1071 | u8 t2 = MIPS_R_T8; |
1072 | u8 t3 = MIPS_R_T9; |
1073 | |
1074 | /* |
1075 | * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl |
1076 | * else t1 = rsl <u rtl |
1077 | * if (rsh == rth) t1 = 0 |
1078 | */ |
1079 | emit(ctx, sltu, t1, lo(rs), lo(rt)); /* t1 = rsl <u rtl */ |
1080 | emit(ctx, sltu, t2, lo(rt), lo(rs)); /* t2 = rtl <u rsl */ |
1081 | emit(ctx, xor, t3, hi(rs), hi(rt)); /* t3 = rlh ^ rth */ |
1082 | emit(ctx, srl, rd, t3, 31); /* rd = t3 >> 31 */ |
1083 | emit_movn_r(ctx, rd: t1, rs: t2, rt: rd); /* t1 = rd ? t2 : t1 */ |
1084 | emit_movn_r(ctx, rd: t1, MIPS_R_ZERO, rt: t3); /* t1 = 0 if t3 != 0 */ |
1085 | |
1086 | /* rd = (rsh < rth) | t1 */ |
1087 | emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh <s rth */ |
1088 | emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ |
1089 | } |
1090 | |
1091 | /* Jump immediate (64-bit) */ |
1092 | static void emit_jmp_i64(struct jit_context *ctx, |
1093 | const u8 dst[], s32 imm, s32 off, u8 op) |
1094 | { |
1095 | u8 tmp = MIPS_R_T6; |
1096 | |
1097 | switch (op) { |
1098 | /* No-op, used internally for branch optimization */ |
1099 | case JIT_JNOP: |
1100 | break; |
1101 | /* PC += off if dst == imm */ |
1102 | /* PC += off if dst != imm */ |
1103 | case BPF_JEQ: |
1104 | case BPF_JNE: |
1105 | if (imm >= -0x7fff && imm <= 0x8000) { |
1106 | emit(ctx, addiu, tmp, lo(dst), -imm); |
1107 | } else if ((u32)imm <= 0xffff) { |
1108 | emit(ctx, xori, tmp, lo(dst), imm); |
1109 | } else { /* Register fallback */ |
1110 | emit_mov_i(ctx, dst: tmp, imm); |
1111 | emit(ctx, xor, tmp, lo(dst), tmp); |
1112 | } |
1113 | if (imm < 0) { /* Compare sign extension */ |
1114 | emit(ctx, addu, MIPS_R_T9, hi(dst), 1); |
1115 | emit(ctx, or, tmp, tmp, MIPS_R_T9); |
1116 | } else { /* Compare zero extension */ |
1117 | emit(ctx, or, tmp, tmp, hi(dst)); |
1118 | } |
1119 | if (op == BPF_JEQ) |
1120 | emit(ctx, beqz, tmp, off); |
1121 | else /* BPF_JNE */ |
1122 | emit(ctx, bnez, tmp, off); |
1123 | break; |
1124 | /* PC += off if dst & imm */ |
1125 | /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ |
1126 | case BPF_JSET: |
1127 | case JIT_JNSET: |
1128 | if ((u32)imm <= 0xffff) { |
1129 | emit(ctx, andi, tmp, lo(dst), imm); |
1130 | } else { /* Register fallback */ |
1131 | emit_mov_i(ctx, dst: tmp, imm); |
1132 | emit(ctx, and, tmp, lo(dst), tmp); |
1133 | } |
1134 | if (imm < 0) /* Sign-extension pulls in high word */ |
1135 | emit(ctx, or, tmp, tmp, hi(dst)); |
1136 | if (op == BPF_JSET) |
1137 | emit(ctx, bnez, tmp, off); |
1138 | else /* JIT_JNSET */ |
1139 | emit(ctx, beqz, tmp, off); |
1140 | break; |
1141 | /* PC += off if dst > imm */ |
1142 | case BPF_JGT: |
1143 | emit_sltiu_r64(ctx, rd: tmp, rs: dst, imm: (s64)imm + 1); |
1144 | emit(ctx, beqz, tmp, off); |
1145 | break; |
1146 | /* PC += off if dst >= imm */ |
1147 | case BPF_JGE: |
1148 | emit_sltiu_r64(ctx, rd: tmp, rs: dst, imm); |
1149 | emit(ctx, beqz, tmp, off); |
1150 | break; |
1151 | /* PC += off if dst < imm */ |
1152 | case BPF_JLT: |
1153 | emit_sltiu_r64(ctx, rd: tmp, rs: dst, imm); |
1154 | emit(ctx, bnez, tmp, off); |
1155 | break; |
1156 | /* PC += off if dst <= imm */ |
1157 | case BPF_JLE: |
1158 | emit_sltiu_r64(ctx, rd: tmp, rs: dst, imm: (s64)imm + 1); |
1159 | emit(ctx, bnez, tmp, off); |
1160 | break; |
1161 | /* PC += off if dst > imm (signed) */ |
1162 | case BPF_JSGT: |
1163 | emit_slti_r64(ctx, rd: tmp, rs: dst, imm: (s64)imm + 1); |
1164 | emit(ctx, beqz, tmp, off); |
1165 | break; |
1166 | /* PC += off if dst >= imm (signed) */ |
1167 | case BPF_JSGE: |
1168 | emit_slti_r64(ctx, rd: tmp, rs: dst, imm); |
1169 | emit(ctx, beqz, tmp, off); |
1170 | break; |
1171 | /* PC += off if dst < imm (signed) */ |
1172 | case BPF_JSLT: |
1173 | emit_slti_r64(ctx, rd: tmp, rs: dst, imm); |
1174 | emit(ctx, bnez, tmp, off); |
1175 | break; |
1176 | /* PC += off if dst <= imm (signed) */ |
1177 | case BPF_JSLE: |
1178 | emit_slti_r64(ctx, rd: tmp, rs: dst, imm: (s64)imm + 1); |
1179 | emit(ctx, bnez, tmp, off); |
1180 | break; |
1181 | } |
1182 | } |
1183 | |
1184 | /* Jump register (64-bit) */ |
1185 | static void emit_jmp_r64(struct jit_context *ctx, |
1186 | const u8 dst[], const u8 src[], s32 off, u8 op) |
1187 | { |
1188 | u8 t1 = MIPS_R_T6; |
1189 | u8 t2 = MIPS_R_T7; |
1190 | |
1191 | switch (op) { |
1192 | /* No-op, used internally for branch optimization */ |
1193 | case JIT_JNOP: |
1194 | break; |
1195 | /* PC += off if dst == src */ |
1196 | /* PC += off if dst != src */ |
1197 | case BPF_JEQ: |
1198 | case BPF_JNE: |
1199 | emit(ctx, subu, t1, lo(dst), lo(src)); |
1200 | emit(ctx, subu, t2, hi(dst), hi(src)); |
1201 | emit(ctx, or, t1, t1, t2); |
1202 | if (op == BPF_JEQ) |
1203 | emit(ctx, beqz, t1, off); |
1204 | else /* BPF_JNE */ |
1205 | emit(ctx, bnez, t1, off); |
1206 | break; |
1207 | /* PC += off if dst & src */ |
1208 | /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ |
1209 | case BPF_JSET: |
1210 | case JIT_JNSET: |
1211 | emit(ctx, and, t1, lo(dst), lo(src)); |
1212 | emit(ctx, and, t2, hi(dst), hi(src)); |
1213 | emit(ctx, or, t1, t1, t2); |
1214 | if (op == BPF_JSET) |
1215 | emit(ctx, bnez, t1, off); |
1216 | else /* JIT_JNSET */ |
1217 | emit(ctx, beqz, t1, off); |
1218 | break; |
1219 | /* PC += off if dst > src */ |
1220 | case BPF_JGT: |
1221 | emit_sltu_r64(ctx, rd: t1, rs: src, rt: dst); |
1222 | emit(ctx, bnez, t1, off); |
1223 | break; |
1224 | /* PC += off if dst >= src */ |
1225 | case BPF_JGE: |
1226 | emit_sltu_r64(ctx, rd: t1, rs: dst, rt: src); |
1227 | emit(ctx, beqz, t1, off); |
1228 | break; |
1229 | /* PC += off if dst < src */ |
1230 | case BPF_JLT: |
1231 | emit_sltu_r64(ctx, rd: t1, rs: dst, rt: src); |
1232 | emit(ctx, bnez, t1, off); |
1233 | break; |
1234 | /* PC += off if dst <= src */ |
1235 | case BPF_JLE: |
1236 | emit_sltu_r64(ctx, rd: t1, rs: src, rt: dst); |
1237 | emit(ctx, beqz, t1, off); |
1238 | break; |
1239 | /* PC += off if dst > src (signed) */ |
1240 | case BPF_JSGT: |
1241 | emit_slt_r64(ctx, rd: t1, rs: src, rt: dst); |
1242 | emit(ctx, bnez, t1, off); |
1243 | break; |
1244 | /* PC += off if dst >= src (signed) */ |
1245 | case BPF_JSGE: |
1246 | emit_slt_r64(ctx, rd: t1, rs: dst, rt: src); |
1247 | emit(ctx, beqz, t1, off); |
1248 | break; |
1249 | /* PC += off if dst < src (signed) */ |
1250 | case BPF_JSLT: |
1251 | emit_slt_r64(ctx, rd: t1, rs: dst, rt: src); |
1252 | emit(ctx, bnez, t1, off); |
1253 | break; |
1254 | /* PC += off if dst <= src (signed) */ |
1255 | case BPF_JSLE: |
1256 | emit_slt_r64(ctx, rd: t1, rs: src, rt: dst); |
1257 | emit(ctx, beqz, t1, off); |
1258 | break; |
1259 | } |
1260 | } |
1261 | |
1262 | /* Function call */ |
1263 | static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) |
1264 | { |
1265 | bool fixed; |
1266 | u64 addr; |
1267 | |
1268 | /* Decode the call address */ |
1269 | if (bpf_jit_get_func_addr(prog: ctx->program, insn, extra_pass: false, |
1270 | func_addr: &addr, func_addr_fixed: &fixed) < 0) |
1271 | return -1; |
1272 | if (!fixed) |
1273 | return -1; |
1274 | |
1275 | /* Push stack arguments */ |
1276 | push_regs(ctx, JIT_STACK_REGS, excl: 0, JIT_RESERVED_STACK); |
1277 | |
1278 | /* Emit function call */ |
1279 | emit_mov_i(ctx, MIPS_R_T9, imm: addr); |
1280 | emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); |
1281 | emit(ctx, nop); /* Delay slot */ |
1282 | |
1283 | clobber_reg(ctx, MIPS_R_RA); |
1284 | clobber_reg(ctx, MIPS_R_V0); |
1285 | clobber_reg(ctx, MIPS_R_V1); |
1286 | return 0; |
1287 | } |
1288 | |
1289 | /* Function tail call */ |
1290 | static int emit_tail_call(struct jit_context *ctx) |
1291 | { |
1292 | u8 ary = lo(reg: bpf2mips32[BPF_REG_2]); |
1293 | u8 ind = lo(reg: bpf2mips32[BPF_REG_3]); |
1294 | u8 t1 = MIPS_R_T8; |
1295 | u8 t2 = MIPS_R_T9; |
1296 | int off; |
1297 | |
1298 | /* |
1299 | * Tail call: |
1300 | * eBPF R1 - function argument (context ptr), passed in a0-a1 |
1301 | * eBPF R2 - ptr to object with array of function entry points |
1302 | * eBPF R3 - array index of function to be called |
1303 | * stack[sz] - remaining tail call count, initialized in prologue |
1304 | */ |
1305 | |
1306 | /* if (ind >= ary->map.max_entries) goto out */ |
1307 | off = offsetof(struct bpf_array, map.max_entries); |
1308 | if (off > 0x7fff) |
1309 | return -1; |
1310 | emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/ |
1311 | emit_load_delay(ctx); /* Load delay slot */ |
1312 | emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */ |
1313 | emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */ |
1314 | /* (next insn delay slot) */ |
1315 | /* if (TCC-- <= 0) goto out */ |
1316 | emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */ |
1317 | emit_load_delay(ctx); /* Load delay slot */ |
1318 | emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 <= 0 */ |
1319 | emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */ |
1320 | emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */ |
1321 | |
1322 | /* prog = ary->ptrs[ind] */ |
1323 | off = offsetof(struct bpf_array, ptrs); |
1324 | if (off > 0x7fff) |
1325 | return -1; |
1326 | emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */ |
1327 | emit(ctx, addu, t1, t1, ary); /* t1 += ary */ |
1328 | emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */ |
1329 | emit_load_delay(ctx); /* Load delay slot */ |
1330 | |
1331 | /* if (prog == 0) goto out */ |
1332 | emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */ |
1333 | emit(ctx, nop); /* Delay slot */ |
1334 | |
1335 | /* func = prog->bpf_func + 8 (prologue skip offset) */ |
1336 | off = offsetof(struct bpf_prog, bpf_func); |
1337 | if (off > 0x7fff) |
1338 | return -1; |
1339 | emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */ |
1340 | emit_load_delay(ctx); /* Load delay slot */ |
1341 | emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */ |
1342 | |
1343 | /* goto func */ |
1344 | build_epilogue(ctx, dest_reg: t1); |
1345 | return 0; |
1346 | } |
1347 | |
1348 | /* |
1349 | * Stack frame layout for a JITed program (stack grows down). |
1350 | * |
1351 | * Higher address : Caller's stack frame : |
1352 | * :----------------------------: |
1353 | * : 64-bit eBPF args r3-r5 : |
1354 | * :----------------------------: |
1355 | * : Reserved / tail call count : |
1356 | * +============================+ <--- MIPS sp before call |
1357 | * | Callee-saved registers, | |
1358 | * | including RA and FP | |
1359 | * +----------------------------+ <--- eBPF FP (MIPS zero,fp) |
1360 | * | Local eBPF variables | |
1361 | * | allocated by program | |
1362 | * +----------------------------+ |
1363 | * | Reserved for caller-saved | |
1364 | * | registers | |
1365 | * +----------------------------+ |
1366 | * | Reserved for 64-bit eBPF | |
1367 | * | args r3-r5 & args passed | |
1368 | * | on stack in kernel calls | |
1369 | * Lower address +============================+ <--- MIPS sp |
1370 | */ |
1371 | |
1372 | /* Build program prologue to set up the stack and registers */ |
1373 | void build_prologue(struct jit_context *ctx) |
1374 | { |
1375 | const u8 *r1 = bpf2mips32[BPF_REG_1]; |
1376 | const u8 *fp = bpf2mips32[BPF_REG_FP]; |
1377 | int stack, saved, locals, reserved; |
1378 | |
1379 | /* |
1380 | * In the unlikely event that the TCC limit is raised to more |
1381 | * than 16 bits, it is clamped to the maximum value allowed for |
1382 | * the generated code (0xffff). It is better fail to compile |
1383 | * instead of degrading gracefully. |
1384 | */ |
1385 | BUILD_BUG_ON(MAX_TAIL_CALL_CNT > 0xffff); |
1386 | |
1387 | /* |
1388 | * The first two instructions initialize TCC in the reserved (for us) |
1389 | * 16-byte area in the parent's stack frame. On a tail call, the |
1390 | * calling function jumps into the prologue after these instructions. |
1391 | */ |
1392 | emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); |
1393 | emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP); |
1394 | |
1395 | /* |
1396 | * Register eBPF R1 contains the 32-bit context pointer argument. |
1397 | * A 32-bit argument is always passed in MIPS register a0, regardless |
1398 | * of CPU endianness. Initialize R1 accordingly and zero-extend. |
1399 | */ |
1400 | #ifdef __BIG_ENDIAN |
1401 | emit(ctx, move, lo(r1), MIPS_R_A0); |
1402 | #endif |
1403 | |
1404 | /* === Entry-point for tail calls === */ |
1405 | |
1406 | /* Zero-extend the 32-bit argument */ |
1407 | emit(ctx, move, hi(r1), MIPS_R_ZERO); |
1408 | |
1409 | /* If the eBPF frame pointer was accessed it must be saved */ |
1410 | if (ctx->accessed & BIT(BPF_REG_FP)) |
1411 | clobber_reg64(ctx, reg: fp); |
1412 | |
1413 | /* Compute the stack space needed for callee-saved registers */ |
1414 | saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32); |
1415 | saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); |
1416 | |
1417 | /* Stack space used by eBPF program local data */ |
1418 | locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); |
1419 | |
1420 | /* |
1421 | * If we are emitting function calls, reserve extra stack space for |
1422 | * caller-saved registers and function arguments passed on the stack. |
1423 | * The required space is computed automatically during resource |
1424 | * usage discovery (pass 1). |
1425 | */ |
1426 | reserved = ctx->stack_used; |
1427 | |
1428 | /* Allocate the stack frame */ |
1429 | stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); |
1430 | emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack); |
1431 | |
1432 | /* Store callee-saved registers on stack */ |
1433 | push_regs(ctx, mask: ctx->clobbered & JIT_CALLEE_REGS, excl: 0, depth: stack - saved); |
1434 | |
1435 | /* Initialize the eBPF frame pointer if accessed */ |
1436 | if (ctx->accessed & BIT(BPF_REG_FP)) |
1437 | emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved); |
1438 | |
1439 | ctx->saved_size = saved; |
1440 | ctx->stack_size = stack; |
1441 | } |
1442 | |
1443 | /* Build the program epilogue to restore the stack and registers */ |
1444 | void build_epilogue(struct jit_context *ctx, int dest_reg) |
1445 | { |
1446 | /* Restore callee-saved registers from stack */ |
1447 | pop_regs(ctx, mask: ctx->clobbered & JIT_CALLEE_REGS, excl: 0, |
1448 | depth: ctx->stack_size - ctx->saved_size); |
1449 | /* |
1450 | * A 32-bit return value is always passed in MIPS register v0, |
1451 | * but on big-endian targets the low part of R0 is mapped to v1. |
1452 | */ |
1453 | #ifdef __BIG_ENDIAN |
1454 | emit(ctx, move, MIPS_R_V0, MIPS_R_V1); |
1455 | #endif |
1456 | |
1457 | /* Jump to the return address and adjust the stack pointer */ |
1458 | emit(ctx, jr, dest_reg); |
1459 | emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); |
1460 | } |
1461 | |
1462 | /* Build one eBPF instruction */ |
1463 | int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) |
1464 | { |
1465 | const u8 *dst = bpf2mips32[insn->dst_reg]; |
1466 | const u8 *src = bpf2mips32[insn->src_reg]; |
1467 | const u8 *res = bpf2mips32[BPF_REG_0]; |
1468 | const u8 *tmp = bpf2mips32[JIT_REG_TMP]; |
1469 | u8 code = insn->code; |
1470 | s16 off = insn->off; |
1471 | s32 imm = insn->imm; |
1472 | s32 val, rel; |
1473 | u8 alu, jmp; |
1474 | |
1475 | switch (code) { |
1476 | /* ALU operations */ |
1477 | /* dst = imm */ |
1478 | case BPF_ALU | BPF_MOV | BPF_K: |
1479 | emit_mov_i(ctx, dst: lo(reg: dst), imm); |
1480 | emit_zext_ver(ctx, dst); |
1481 | break; |
1482 | /* dst = src */ |
1483 | case BPF_ALU | BPF_MOV | BPF_X: |
1484 | if (imm == 1) { |
1485 | /* Special mov32 for zext */ |
1486 | emit_mov_i(ctx, dst: hi(reg: dst), imm: 0); |
1487 | } else { |
1488 | emit_mov_r(ctx, dst: lo(reg: dst), src: lo(reg: src)); |
1489 | emit_zext_ver(ctx, dst); |
1490 | } |
1491 | break; |
1492 | /* dst = -dst */ |
1493 | case BPF_ALU | BPF_NEG: |
1494 | emit_alu_i(ctx, dst: lo(reg: dst), imm: 0, BPF_NEG); |
1495 | emit_zext_ver(ctx, dst); |
1496 | break; |
1497 | /* dst = dst & imm */ |
1498 | /* dst = dst | imm */ |
1499 | /* dst = dst ^ imm */ |
1500 | /* dst = dst << imm */ |
1501 | /* dst = dst >> imm */ |
1502 | /* dst = dst >> imm (arithmetic) */ |
1503 | /* dst = dst + imm */ |
1504 | /* dst = dst - imm */ |
1505 | /* dst = dst * imm */ |
1506 | /* dst = dst / imm */ |
1507 | /* dst = dst % imm */ |
1508 | case BPF_ALU | BPF_OR | BPF_K: |
1509 | case BPF_ALU | BPF_AND | BPF_K: |
1510 | case BPF_ALU | BPF_XOR | BPF_K: |
1511 | case BPF_ALU | BPF_LSH | BPF_K: |
1512 | case BPF_ALU | BPF_RSH | BPF_K: |
1513 | case BPF_ALU | BPF_ARSH | BPF_K: |
1514 | case BPF_ALU | BPF_ADD | BPF_K: |
1515 | case BPF_ALU | BPF_SUB | BPF_K: |
1516 | case BPF_ALU | BPF_MUL | BPF_K: |
1517 | case BPF_ALU | BPF_DIV | BPF_K: |
1518 | case BPF_ALU | BPF_MOD | BPF_K: |
1519 | if (!valid_alu_i(BPF_OP(code), imm)) { |
1520 | emit_mov_i(ctx, MIPS_R_T6, imm); |
1521 | emit_alu_r(ctx, dst: lo(reg: dst), MIPS_R_T6, BPF_OP(code)); |
1522 | } else if (rewrite_alu_i(BPF_OP(code), imm, alu: &alu, val: &val)) { |
1523 | emit_alu_i(ctx, dst: lo(reg: dst), imm: val, op: alu); |
1524 | } |
1525 | emit_zext_ver(ctx, dst); |
1526 | break; |
1527 | /* dst = dst & src */ |
1528 | /* dst = dst | src */ |
1529 | /* dst = dst ^ src */ |
1530 | /* dst = dst << src */ |
1531 | /* dst = dst >> src */ |
1532 | /* dst = dst >> src (arithmetic) */ |
1533 | /* dst = dst + src */ |
1534 | /* dst = dst - src */ |
1535 | /* dst = dst * src */ |
1536 | /* dst = dst / src */ |
1537 | /* dst = dst % src */ |
1538 | case BPF_ALU | BPF_AND | BPF_X: |
1539 | case BPF_ALU | BPF_OR | BPF_X: |
1540 | case BPF_ALU | BPF_XOR | BPF_X: |
1541 | case BPF_ALU | BPF_LSH | BPF_X: |
1542 | case BPF_ALU | BPF_RSH | BPF_X: |
1543 | case BPF_ALU | BPF_ARSH | BPF_X: |
1544 | case BPF_ALU | BPF_ADD | BPF_X: |
1545 | case BPF_ALU | BPF_SUB | BPF_X: |
1546 | case BPF_ALU | BPF_MUL | BPF_X: |
1547 | case BPF_ALU | BPF_DIV | BPF_X: |
1548 | case BPF_ALU | BPF_MOD | BPF_X: |
1549 | emit_alu_r(ctx, dst: lo(reg: dst), src: lo(reg: src), BPF_OP(code)); |
1550 | emit_zext_ver(ctx, dst); |
1551 | break; |
1552 | /* dst = imm (64-bit) */ |
1553 | case BPF_ALU64 | BPF_MOV | BPF_K: |
1554 | emit_mov_se_i64(ctx, dst, imm); |
1555 | break; |
1556 | /* dst = src (64-bit) */ |
1557 | case BPF_ALU64 | BPF_MOV | BPF_X: |
1558 | emit_mov_r(ctx, dst: lo(reg: dst), src: lo(reg: src)); |
1559 | emit_mov_r(ctx, dst: hi(reg: dst), src: hi(reg: src)); |
1560 | break; |
1561 | /* dst = -dst (64-bit) */ |
1562 | case BPF_ALU64 | BPF_NEG: |
1563 | emit_neg_i64(ctx, dst); |
1564 | break; |
1565 | /* dst = dst & imm (64-bit) */ |
1566 | case BPF_ALU64 | BPF_AND | BPF_K: |
1567 | emit_alu_i64(ctx, dst, imm, BPF_OP(code)); |
1568 | break; |
1569 | /* dst = dst | imm (64-bit) */ |
1570 | /* dst = dst ^ imm (64-bit) */ |
1571 | /* dst = dst + imm (64-bit) */ |
1572 | /* dst = dst - imm (64-bit) */ |
1573 | case BPF_ALU64 | BPF_OR | BPF_K: |
1574 | case BPF_ALU64 | BPF_XOR | BPF_K: |
1575 | case BPF_ALU64 | BPF_ADD | BPF_K: |
1576 | case BPF_ALU64 | BPF_SUB | BPF_K: |
1577 | if (imm) |
1578 | emit_alu_i64(ctx, dst, imm, BPF_OP(code)); |
1579 | break; |
1580 | /* dst = dst << imm (64-bit) */ |
1581 | /* dst = dst >> imm (64-bit) */ |
1582 | /* dst = dst >> imm (64-bit, arithmetic) */ |
1583 | case BPF_ALU64 | BPF_LSH | BPF_K: |
1584 | case BPF_ALU64 | BPF_RSH | BPF_K: |
1585 | case BPF_ALU64 | BPF_ARSH | BPF_K: |
1586 | if (imm) |
1587 | emit_shift_i64(ctx, dst, imm, BPF_OP(code)); |
1588 | break; |
1589 | /* dst = dst * imm (64-bit) */ |
1590 | case BPF_ALU64 | BPF_MUL | BPF_K: |
1591 | emit_mul_i64(ctx, dst, imm); |
1592 | break; |
1593 | /* dst = dst / imm (64-bit) */ |
1594 | /* dst = dst % imm (64-bit) */ |
1595 | case BPF_ALU64 | BPF_DIV | BPF_K: |
1596 | case BPF_ALU64 | BPF_MOD | BPF_K: |
1597 | /* |
1598 | * Sign-extend the immediate value into a temporary register, |
1599 | * and then do the operation on this register. |
1600 | */ |
1601 | emit_mov_se_i64(ctx, dst: tmp, imm); |
1602 | emit_divmod_r64(ctx, dst, src: tmp, BPF_OP(code)); |
1603 | break; |
1604 | /* dst = dst & src (64-bit) */ |
1605 | /* dst = dst | src (64-bit) */ |
1606 | /* dst = dst ^ src (64-bit) */ |
1607 | /* dst = dst + src (64-bit) */ |
1608 | /* dst = dst - src (64-bit) */ |
1609 | case BPF_ALU64 | BPF_AND | BPF_X: |
1610 | case BPF_ALU64 | BPF_OR | BPF_X: |
1611 | case BPF_ALU64 | BPF_XOR | BPF_X: |
1612 | case BPF_ALU64 | BPF_ADD | BPF_X: |
1613 | case BPF_ALU64 | BPF_SUB | BPF_X: |
1614 | emit_alu_r64(ctx, dst, src, BPF_OP(code)); |
1615 | break; |
1616 | /* dst = dst << src (64-bit) */ |
1617 | /* dst = dst >> src (64-bit) */ |
1618 | /* dst = dst >> src (64-bit, arithmetic) */ |
1619 | case BPF_ALU64 | BPF_LSH | BPF_X: |
1620 | case BPF_ALU64 | BPF_RSH | BPF_X: |
1621 | case BPF_ALU64 | BPF_ARSH | BPF_X: |
1622 | emit_shift_r64(ctx, dst, src: lo(reg: src), BPF_OP(code)); |
1623 | break; |
1624 | /* dst = dst * src (64-bit) */ |
1625 | case BPF_ALU64 | BPF_MUL | BPF_X: |
1626 | emit_mul_r64(ctx, dst, src); |
1627 | break; |
1628 | /* dst = dst / src (64-bit) */ |
1629 | /* dst = dst % src (64-bit) */ |
1630 | case BPF_ALU64 | BPF_DIV | BPF_X: |
1631 | case BPF_ALU64 | BPF_MOD | BPF_X: |
1632 | emit_divmod_r64(ctx, dst, src, BPF_OP(code)); |
1633 | break; |
1634 | /* dst = htole(dst) */ |
1635 | /* dst = htobe(dst) */ |
1636 | case BPF_ALU | BPF_END | BPF_FROM_LE: |
1637 | case BPF_ALU | BPF_END | BPF_FROM_BE: |
1638 | if (BPF_SRC(code) == |
1639 | #ifdef __BIG_ENDIAN |
1640 | BPF_FROM_LE |
1641 | #else |
1642 | BPF_FROM_BE |
1643 | #endif |
1644 | ) |
1645 | emit_bswap_r64(ctx, dst, width: imm); |
1646 | else |
1647 | emit_trunc_r64(ctx, dst, width: imm); |
1648 | break; |
1649 | /* dst = imm64 */ |
1650 | case BPF_LD | BPF_IMM | BPF_DW: |
1651 | emit_mov_i(ctx, dst: lo(reg: dst), imm); |
1652 | emit_mov_i(ctx, dst: hi(reg: dst), imm: insn[1].imm); |
1653 | return 1; |
1654 | /* LDX: dst = *(size *)(src + off) */ |
1655 | case BPF_LDX | BPF_MEM | BPF_W: |
1656 | case BPF_LDX | BPF_MEM | BPF_H: |
1657 | case BPF_LDX | BPF_MEM | BPF_B: |
1658 | case BPF_LDX | BPF_MEM | BPF_DW: |
1659 | emit_ldx(ctx, dst, src: lo(reg: src), off, BPF_SIZE(code)); |
1660 | break; |
1661 | /* ST: *(size *)(dst + off) = imm */ |
1662 | case BPF_ST | BPF_MEM | BPF_W: |
1663 | case BPF_ST | BPF_MEM | BPF_H: |
1664 | case BPF_ST | BPF_MEM | BPF_B: |
1665 | case BPF_ST | BPF_MEM | BPF_DW: |
1666 | switch (BPF_SIZE(code)) { |
1667 | case BPF_DW: |
1668 | /* Sign-extend immediate value into temporary reg */ |
1669 | emit_mov_se_i64(ctx, dst: tmp, imm); |
1670 | break; |
1671 | case BPF_W: |
1672 | case BPF_H: |
1673 | case BPF_B: |
1674 | emit_mov_i(ctx, dst: lo(reg: tmp), imm); |
1675 | break; |
1676 | } |
1677 | emit_stx(ctx, dst: lo(reg: dst), src: tmp, off, BPF_SIZE(code)); |
1678 | break; |
1679 | /* STX: *(size *)(dst + off) = src */ |
1680 | case BPF_STX | BPF_MEM | BPF_W: |
1681 | case BPF_STX | BPF_MEM | BPF_H: |
1682 | case BPF_STX | BPF_MEM | BPF_B: |
1683 | case BPF_STX | BPF_MEM | BPF_DW: |
1684 | emit_stx(ctx, dst: lo(reg: dst), src, off, BPF_SIZE(code)); |
1685 | break; |
1686 | /* Speculation barrier */ |
1687 | case BPF_ST | BPF_NOSPEC: |
1688 | break; |
1689 | /* Atomics */ |
1690 | case BPF_STX | BPF_ATOMIC | BPF_W: |
1691 | switch (imm) { |
1692 | case BPF_ADD: |
1693 | case BPF_ADD | BPF_FETCH: |
1694 | case BPF_AND: |
1695 | case BPF_AND | BPF_FETCH: |
1696 | case BPF_OR: |
1697 | case BPF_OR | BPF_FETCH: |
1698 | case BPF_XOR: |
1699 | case BPF_XOR | BPF_FETCH: |
1700 | case BPF_XCHG: |
1701 | if (cpu_has_llsc) |
1702 | emit_atomic_r(ctx, dst: lo(reg: dst), src: lo(reg: src), off, code: imm); |
1703 | else /* Non-ll/sc fallback */ |
1704 | emit_atomic_r32(ctx, dst: lo(reg: dst), src: lo(reg: src), |
1705 | off, code: imm); |
1706 | if (imm & BPF_FETCH) |
1707 | emit_zext_ver(ctx, dst: src); |
1708 | break; |
1709 | case BPF_CMPXCHG: |
1710 | if (cpu_has_llsc) |
1711 | emit_cmpxchg_r(ctx, dst: lo(reg: dst), src: lo(reg: src), |
1712 | res: lo(reg: res), off); |
1713 | else /* Non-ll/sc fallback */ |
1714 | emit_cmpxchg_r32(ctx, dst: lo(reg: dst), src: lo(reg: src), off); |
1715 | /* Result zero-extension inserted by verifier */ |
1716 | break; |
1717 | default: |
1718 | goto notyet; |
1719 | } |
1720 | break; |
1721 | /* Atomics (64-bit) */ |
1722 | case BPF_STX | BPF_ATOMIC | BPF_DW: |
1723 | switch (imm) { |
1724 | case BPF_ADD: |
1725 | case BPF_ADD | BPF_FETCH: |
1726 | case BPF_AND: |
1727 | case BPF_AND | BPF_FETCH: |
1728 | case BPF_OR: |
1729 | case BPF_OR | BPF_FETCH: |
1730 | case BPF_XOR: |
1731 | case BPF_XOR | BPF_FETCH: |
1732 | case BPF_XCHG: |
1733 | emit_atomic_r64(ctx, dst: lo(reg: dst), src, off, code: imm); |
1734 | break; |
1735 | case BPF_CMPXCHG: |
1736 | emit_cmpxchg_r64(ctx, dst: lo(reg: dst), src, off); |
1737 | break; |
1738 | default: |
1739 | goto notyet; |
1740 | } |
1741 | break; |
1742 | /* PC += off if dst == src */ |
1743 | /* PC += off if dst != src */ |
1744 | /* PC += off if dst & src */ |
1745 | /* PC += off if dst > src */ |
1746 | /* PC += off if dst >= src */ |
1747 | /* PC += off if dst < src */ |
1748 | /* PC += off if dst <= src */ |
1749 | /* PC += off if dst > src (signed) */ |
1750 | /* PC += off if dst >= src (signed) */ |
1751 | /* PC += off if dst < src (signed) */ |
1752 | /* PC += off if dst <= src (signed) */ |
1753 | case BPF_JMP32 | BPF_JEQ | BPF_X: |
1754 | case BPF_JMP32 | BPF_JNE | BPF_X: |
1755 | case BPF_JMP32 | BPF_JSET | BPF_X: |
1756 | case BPF_JMP32 | BPF_JGT | BPF_X: |
1757 | case BPF_JMP32 | BPF_JGE | BPF_X: |
1758 | case BPF_JMP32 | BPF_JLT | BPF_X: |
1759 | case BPF_JMP32 | BPF_JLE | BPF_X: |
1760 | case BPF_JMP32 | BPF_JSGT | BPF_X: |
1761 | case BPF_JMP32 | BPF_JSGE | BPF_X: |
1762 | case BPF_JMP32 | BPF_JSLT | BPF_X: |
1763 | case BPF_JMP32 | BPF_JSLE | BPF_X: |
1764 | if (off == 0) |
1765 | break; |
1766 | setup_jmp_r(ctx, same_reg: dst == src, BPF_OP(code), bpf_off: off, jit_op: &jmp, jit_off: &rel); |
1767 | emit_jmp_r(ctx, dst: lo(reg: dst), src: lo(reg: src), off: rel, op: jmp); |
1768 | if (finish_jmp(ctx, jit_op: jmp, bpf_off: off) < 0) |
1769 | goto toofar; |
1770 | break; |
1771 | /* PC += off if dst == imm */ |
1772 | /* PC += off if dst != imm */ |
1773 | /* PC += off if dst & imm */ |
1774 | /* PC += off if dst > imm */ |
1775 | /* PC += off if dst >= imm */ |
1776 | /* PC += off if dst < imm */ |
1777 | /* PC += off if dst <= imm */ |
1778 | /* PC += off if dst > imm (signed) */ |
1779 | /* PC += off if dst >= imm (signed) */ |
1780 | /* PC += off if dst < imm (signed) */ |
1781 | /* PC += off if dst <= imm (signed) */ |
1782 | case BPF_JMP32 | BPF_JEQ | BPF_K: |
1783 | case BPF_JMP32 | BPF_JNE | BPF_K: |
1784 | case BPF_JMP32 | BPF_JSET | BPF_K: |
1785 | case BPF_JMP32 | BPF_JGT | BPF_K: |
1786 | case BPF_JMP32 | BPF_JGE | BPF_K: |
1787 | case BPF_JMP32 | BPF_JLT | BPF_K: |
1788 | case BPF_JMP32 | BPF_JLE | BPF_K: |
1789 | case BPF_JMP32 | BPF_JSGT | BPF_K: |
1790 | case BPF_JMP32 | BPF_JSGE | BPF_K: |
1791 | case BPF_JMP32 | BPF_JSLT | BPF_K: |
1792 | case BPF_JMP32 | BPF_JSLE | BPF_K: |
1793 | if (off == 0) |
1794 | break; |
1795 | setup_jmp_i(ctx, imm, width: 32, BPF_OP(code), bpf_off: off, jit_op: &jmp, jit_off: &rel); |
1796 | if (valid_jmp_i(op: jmp, imm)) { |
1797 | emit_jmp_i(ctx, dst: lo(reg: dst), imm, off: rel, op: jmp); |
1798 | } else { |
1799 | /* Move large immediate to register */ |
1800 | emit_mov_i(ctx, MIPS_R_T6, imm); |
1801 | emit_jmp_r(ctx, dst: lo(reg: dst), MIPS_R_T6, off: rel, op: jmp); |
1802 | } |
1803 | if (finish_jmp(ctx, jit_op: jmp, bpf_off: off) < 0) |
1804 | goto toofar; |
1805 | break; |
1806 | /* PC += off if dst == src */ |
1807 | /* PC += off if dst != src */ |
1808 | /* PC += off if dst & src */ |
1809 | /* PC += off if dst > src */ |
1810 | /* PC += off if dst >= src */ |
1811 | /* PC += off if dst < src */ |
1812 | /* PC += off if dst <= src */ |
1813 | /* PC += off if dst > src (signed) */ |
1814 | /* PC += off if dst >= src (signed) */ |
1815 | /* PC += off if dst < src (signed) */ |
1816 | /* PC += off if dst <= src (signed) */ |
1817 | case BPF_JMP | BPF_JEQ | BPF_X: |
1818 | case BPF_JMP | BPF_JNE | BPF_X: |
1819 | case BPF_JMP | BPF_JSET | BPF_X: |
1820 | case BPF_JMP | BPF_JGT | BPF_X: |
1821 | case BPF_JMP | BPF_JGE | BPF_X: |
1822 | case BPF_JMP | BPF_JLT | BPF_X: |
1823 | case BPF_JMP | BPF_JLE | BPF_X: |
1824 | case BPF_JMP | BPF_JSGT | BPF_X: |
1825 | case BPF_JMP | BPF_JSGE | BPF_X: |
1826 | case BPF_JMP | BPF_JSLT | BPF_X: |
1827 | case BPF_JMP | BPF_JSLE | BPF_X: |
1828 | if (off == 0) |
1829 | break; |
1830 | setup_jmp_r(ctx, same_reg: dst == src, BPF_OP(code), bpf_off: off, jit_op: &jmp, jit_off: &rel); |
1831 | emit_jmp_r64(ctx, dst, src, off: rel, op: jmp); |
1832 | if (finish_jmp(ctx, jit_op: jmp, bpf_off: off) < 0) |
1833 | goto toofar; |
1834 | break; |
1835 | /* PC += off if dst == imm */ |
1836 | /* PC += off if dst != imm */ |
1837 | /* PC += off if dst & imm */ |
1838 | /* PC += off if dst > imm */ |
1839 | /* PC += off if dst >= imm */ |
1840 | /* PC += off if dst < imm */ |
1841 | /* PC += off if dst <= imm */ |
1842 | /* PC += off if dst > imm (signed) */ |
1843 | /* PC += off if dst >= imm (signed) */ |
1844 | /* PC += off if dst < imm (signed) */ |
1845 | /* PC += off if dst <= imm (signed) */ |
1846 | case BPF_JMP | BPF_JEQ | BPF_K: |
1847 | case BPF_JMP | BPF_JNE | BPF_K: |
1848 | case BPF_JMP | BPF_JSET | BPF_K: |
1849 | case BPF_JMP | BPF_JGT | BPF_K: |
1850 | case BPF_JMP | BPF_JGE | BPF_K: |
1851 | case BPF_JMP | BPF_JLT | BPF_K: |
1852 | case BPF_JMP | BPF_JLE | BPF_K: |
1853 | case BPF_JMP | BPF_JSGT | BPF_K: |
1854 | case BPF_JMP | BPF_JSGE | BPF_K: |
1855 | case BPF_JMP | BPF_JSLT | BPF_K: |
1856 | case BPF_JMP | BPF_JSLE | BPF_K: |
1857 | if (off == 0) |
1858 | break; |
1859 | setup_jmp_i(ctx, imm, width: 64, BPF_OP(code), bpf_off: off, jit_op: &jmp, jit_off: &rel); |
1860 | emit_jmp_i64(ctx, dst, imm, off: rel, op: jmp); |
1861 | if (finish_jmp(ctx, jit_op: jmp, bpf_off: off) < 0) |
1862 | goto toofar; |
1863 | break; |
1864 | /* PC += off */ |
1865 | case BPF_JMP | BPF_JA: |
1866 | if (off == 0) |
1867 | break; |
1868 | if (emit_ja(ctx, off) < 0) |
1869 | goto toofar; |
1870 | break; |
1871 | /* Tail call */ |
1872 | case BPF_JMP | BPF_TAIL_CALL: |
1873 | if (emit_tail_call(ctx) < 0) |
1874 | goto invalid; |
1875 | break; |
1876 | /* Function call */ |
1877 | case BPF_JMP | BPF_CALL: |
1878 | if (emit_call(ctx, insn) < 0) |
1879 | goto invalid; |
1880 | break; |
1881 | /* Function return */ |
1882 | case BPF_JMP | BPF_EXIT: |
1883 | /* |
1884 | * Optimization: when last instruction is EXIT |
1885 | * simply continue to epilogue. |
1886 | */ |
1887 | if (ctx->bpf_index == ctx->program->len - 1) |
1888 | break; |
1889 | if (emit_exit(ctx) < 0) |
1890 | goto toofar; |
1891 | break; |
1892 | |
1893 | default: |
1894 | invalid: |
1895 | pr_err_once("unknown opcode %02x\n" , code); |
1896 | return -EINVAL; |
1897 | notyet: |
1898 | pr_info_once("*** NOT YET: opcode %02x ***\n" , code); |
1899 | return -EFAULT; |
1900 | toofar: |
1901 | pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n" , |
1902 | ctx->bpf_index, code); |
1903 | return -E2BIG; |
1904 | } |
1905 | return 0; |
1906 | } |
1907 | |