1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (C) 2016-2018 Netronome Systems, Inc. */ |
3 | |
4 | #define pr_fmt(fmt) "NFP net bpf: " fmt |
5 | |
6 | #include <linux/bug.h> |
7 | #include <linux/bpf.h> |
8 | #include <linux/filter.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/pkt_cls.h> |
11 | #include <linux/reciprocal_div.h> |
12 | #include <linux/unistd.h> |
13 | |
14 | #include "main.h" |
15 | #include "../nfp_asm.h" |
16 | #include "../nfp_net_ctrl.h" |
17 | |
18 | /* --- NFP prog --- */ |
19 | /* Foreach "multiple" entries macros provide pos and next<n> pointers. |
20 | * It's safe to modify the next pointers (but not pos). |
21 | */ |
22 | #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ |
23 | for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ |
24 | next = list_next_entry(pos, l); \ |
25 | &(nfp_prog)->insns != &pos->l && \ |
26 | &(nfp_prog)->insns != &next->l; \ |
27 | pos = nfp_meta_next(pos), \ |
28 | next = nfp_meta_next(pos)) |
29 | |
30 | #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ |
31 | for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ |
32 | next = list_next_entry(pos, l), \ |
33 | next2 = list_next_entry(next, l); \ |
34 | &(nfp_prog)->insns != &pos->l && \ |
35 | &(nfp_prog)->insns != &next->l && \ |
36 | &(nfp_prog)->insns != &next2->l; \ |
37 | pos = nfp_meta_next(pos), \ |
38 | next = nfp_meta_next(pos), \ |
39 | next2 = nfp_meta_next(next)) |
40 | |
41 | static bool |
42 | nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
43 | { |
44 | return meta->l.prev != &nfp_prog->insns; |
45 | } |
46 | |
47 | static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) |
48 | { |
49 | if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) { |
50 | pr_warn("instruction limit reached (%u NFP instructions)\n" , |
51 | nfp_prog->prog_len); |
52 | nfp_prog->error = -ENOSPC; |
53 | return; |
54 | } |
55 | |
56 | nfp_prog->prog[nfp_prog->prog_len] = insn; |
57 | nfp_prog->prog_len++; |
58 | } |
59 | |
60 | static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) |
61 | { |
62 | return nfp_prog->prog_len; |
63 | } |
64 | |
65 | static bool |
66 | nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) |
67 | { |
68 | /* If there is a recorded error we may have dropped instructions; |
69 | * that doesn't have to be due to translator bug, and the translation |
70 | * will fail anyway, so just return OK. |
71 | */ |
72 | if (nfp_prog->error) |
73 | return true; |
74 | return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); |
75 | } |
76 | |
77 | /* --- Emitters --- */ |
78 | static void |
79 | __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, |
80 | u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, |
81 | bool indir) |
82 | { |
83 | u64 insn; |
84 | |
85 | insn = FIELD_PREP(OP_CMD_A_SRC, areg) | |
86 | FIELD_PREP(OP_CMD_CTX, ctx) | |
87 | FIELD_PREP(OP_CMD_B_SRC, breg) | |
88 | FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | |
89 | FIELD_PREP(OP_CMD_XFER, xfer) | |
90 | FIELD_PREP(OP_CMD_CNT, size) | |
91 | FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | |
92 | FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | |
93 | FIELD_PREP(OP_CMD_INDIR, indir) | |
94 | FIELD_PREP(OP_CMD_MODE, mode); |
95 | |
96 | nfp_prog_push(nfp_prog, insn); |
97 | } |
98 | |
99 | static void |
100 | emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, |
101 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) |
102 | { |
103 | struct nfp_insn_re_regs reg; |
104 | int err; |
105 | |
106 | err = swreg_to_restricted(reg_none(), lreg, rreg, reg: ®, has_imm8: false); |
107 | if (err) { |
108 | nfp_prog->error = err; |
109 | return; |
110 | } |
111 | if (reg.swap) { |
112 | pr_err("cmd can't swap arguments\n" ); |
113 | nfp_prog->error = -EFAULT; |
114 | return; |
115 | } |
116 | if (reg.dst_lmextn || reg.src_lmextn) { |
117 | pr_err("cmd can't use LMextn\n" ); |
118 | nfp_prog->error = -EFAULT; |
119 | return; |
120 | } |
121 | |
122 | __emit_cmd(nfp_prog, op, mode, xfer, areg: reg.areg, breg: reg.breg, size, ctx, |
123 | indir); |
124 | } |
125 | |
126 | static void |
127 | emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, |
128 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) |
129 | { |
130 | emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, indir: false); |
131 | } |
132 | |
133 | static void |
134 | emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, |
135 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) |
136 | { |
137 | emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, indir: true); |
138 | } |
139 | |
140 | static void |
141 | __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, |
142 | enum br_ctx_signal_state css, u16 addr, u8 defer) |
143 | { |
144 | u16 addr_lo, addr_hi; |
145 | u64 insn; |
146 | |
147 | addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); |
148 | addr_hi = addr != addr_lo; |
149 | |
150 | insn = OP_BR_BASE | |
151 | FIELD_PREP(OP_BR_MASK, mask) | |
152 | FIELD_PREP(OP_BR_EV_PIP, ev_pip) | |
153 | FIELD_PREP(OP_BR_CSS, css) | |
154 | FIELD_PREP(OP_BR_DEFBR, defer) | |
155 | FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | |
156 | FIELD_PREP(OP_BR_ADDR_HI, addr_hi); |
157 | |
158 | nfp_prog_push(nfp_prog, insn); |
159 | } |
160 | |
161 | static void |
162 | emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, |
163 | enum nfp_relo_type relo) |
164 | { |
165 | if (mask == BR_UNC && defer > 2) { |
166 | pr_err("BUG: branch defer out of bounds %d\n" , defer); |
167 | nfp_prog->error = -EFAULT; |
168 | return; |
169 | } |
170 | |
171 | __emit_br(nfp_prog, mask, |
172 | ev_pip: mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, |
173 | css: BR_CSS_NONE, addr, defer); |
174 | |
175 | nfp_prog->prog[nfp_prog->prog_len - 1] |= |
176 | FIELD_PREP(OP_RELO_TYPE, relo); |
177 | } |
178 | |
179 | static void |
180 | emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) |
181 | { |
182 | emit_br_relo(nfp_prog, mask, addr, defer, relo: RELO_BR_REL); |
183 | } |
184 | |
185 | static void |
186 | __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, |
187 | bool set, bool src_lmextn) |
188 | { |
189 | u16 addr_lo, addr_hi; |
190 | u64 insn; |
191 | |
192 | addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); |
193 | addr_hi = addr != addr_lo; |
194 | |
195 | insn = OP_BR_BIT_BASE | |
196 | FIELD_PREP(OP_BR_BIT_A_SRC, areg) | |
197 | FIELD_PREP(OP_BR_BIT_B_SRC, breg) | |
198 | FIELD_PREP(OP_BR_BIT_BV, set) | |
199 | FIELD_PREP(OP_BR_BIT_DEFBR, defer) | |
200 | FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | |
201 | FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | |
202 | FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); |
203 | |
204 | nfp_prog_push(nfp_prog, insn); |
205 | } |
206 | |
207 | static void |
208 | emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, |
209 | u8 defer, bool set, enum nfp_relo_type relo) |
210 | { |
211 | struct nfp_insn_re_regs reg; |
212 | int err; |
213 | |
214 | /* NOTE: The bit to test is specified as an rotation amount, such that |
215 | * the bit to test will be placed on the MSB of the result when |
216 | * doing a rotate right. For bit X, we need right rotate X + 1. |
217 | */ |
218 | bit += 1; |
219 | |
220 | err = swreg_to_restricted(reg_none(), lreg: src, reg_imm(bit), reg: ®, has_imm8: false); |
221 | if (err) { |
222 | nfp_prog->error = err; |
223 | return; |
224 | } |
225 | |
226 | __emit_br_bit(nfp_prog, areg: reg.areg, breg: reg.breg, addr, defer, set, |
227 | src_lmextn: reg.src_lmextn); |
228 | |
229 | nfp_prog->prog[nfp_prog->prog_len - 1] |= |
230 | FIELD_PREP(OP_RELO_TYPE, relo); |
231 | } |
232 | |
233 | static void |
234 | emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) |
235 | { |
236 | emit_br_bit_relo(nfp_prog, src, bit, addr, defer, set: true, relo: RELO_BR_REL); |
237 | } |
238 | |
239 | static void |
240 | __emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, |
241 | u8 defer, bool dst_lmextn, bool src_lmextn) |
242 | { |
243 | u64 insn; |
244 | |
245 | insn = OP_BR_ALU_BASE | |
246 | FIELD_PREP(OP_BR_ALU_A_SRC, areg) | |
247 | FIELD_PREP(OP_BR_ALU_B_SRC, breg) | |
248 | FIELD_PREP(OP_BR_ALU_DEFBR, defer) | |
249 | FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | |
250 | FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | |
251 | FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); |
252 | |
253 | nfp_prog_push(nfp_prog, insn); |
254 | } |
255 | |
256 | static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) |
257 | { |
258 | struct nfp_insn_ur_regs reg; |
259 | int err; |
260 | |
261 | err = swreg_to_unrestricted(reg_none(), lreg: base, reg_imm(0), reg: ®); |
262 | if (err) { |
263 | nfp_prog->error = err; |
264 | return; |
265 | } |
266 | |
267 | __emit_br_alu(nfp_prog, areg: reg.areg, breg: reg.breg, imm_hi: 0, defer, dst_lmextn: reg.dst_lmextn, |
268 | src_lmextn: reg.src_lmextn); |
269 | } |
270 | |
271 | static void |
272 | __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, |
273 | enum immed_width width, bool invert, |
274 | enum immed_shift shift, bool wr_both, |
275 | bool dst_lmextn, bool src_lmextn) |
276 | { |
277 | u64 insn; |
278 | |
279 | insn = OP_IMMED_BASE | |
280 | FIELD_PREP(OP_IMMED_A_SRC, areg) | |
281 | FIELD_PREP(OP_IMMED_B_SRC, breg) | |
282 | FIELD_PREP(OP_IMMED_IMM, imm_hi) | |
283 | FIELD_PREP(OP_IMMED_WIDTH, width) | |
284 | FIELD_PREP(OP_IMMED_INV, invert) | |
285 | FIELD_PREP(OP_IMMED_SHIFT, shift) | |
286 | FIELD_PREP(OP_IMMED_WR_AB, wr_both) | |
287 | FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | |
288 | FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); |
289 | |
290 | nfp_prog_push(nfp_prog, insn); |
291 | } |
292 | |
293 | static void |
294 | emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, |
295 | enum immed_width width, bool invert, enum immed_shift shift) |
296 | { |
297 | struct nfp_insn_ur_regs reg; |
298 | int err; |
299 | |
300 | if (swreg_type(reg: dst) == NN_REG_IMM) { |
301 | nfp_prog->error = -EFAULT; |
302 | return; |
303 | } |
304 | |
305 | err = swreg_to_unrestricted(dst, lreg: dst, reg_imm(imm & 0xff), reg: ®); |
306 | if (err) { |
307 | nfp_prog->error = err; |
308 | return; |
309 | } |
310 | |
311 | /* Use reg.dst when destination is No-Dest. */ |
312 | __emit_immed(nfp_prog, |
313 | areg: swreg_type(reg: dst) == NN_REG_NONE ? reg.dst : reg.areg, |
314 | breg: reg.breg, imm_hi: imm >> 8, width, invert, shift, |
315 | wr_both: reg.wr_both, dst_lmextn: reg.dst_lmextn, src_lmextn: reg.src_lmextn); |
316 | } |
317 | |
318 | static void |
319 | __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, |
320 | enum shf_sc sc, u8 shift, |
321 | u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, |
322 | bool dst_lmextn, bool src_lmextn) |
323 | { |
324 | u64 insn; |
325 | |
326 | if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { |
327 | nfp_prog->error = -EFAULT; |
328 | return; |
329 | } |
330 | |
331 | /* NFP shift instruction has something special. If shift direction is |
332 | * left then shift amount of 1 to 31 is specified as 32 minus the amount |
333 | * to shift. |
334 | * |
335 | * But no need to do this for indirect shift which has shift amount be |
336 | * 0. Even after we do this subtraction, shift amount 0 will be turned |
337 | * into 32 which will eventually be encoded the same as 0 because only |
338 | * low 5 bits are encoded, but shift amount be 32 will fail the |
339 | * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of |
340 | * mask range. |
341 | */ |
342 | if (sc == SHF_SC_L_SHF && shift) |
343 | shift = 32 - shift; |
344 | |
345 | insn = OP_SHF_BASE | |
346 | FIELD_PREP(OP_SHF_A_SRC, areg) | |
347 | FIELD_PREP(OP_SHF_SC, sc) | |
348 | FIELD_PREP(OP_SHF_B_SRC, breg) | |
349 | FIELD_PREP(OP_SHF_I8, i8) | |
350 | FIELD_PREP(OP_SHF_SW, sw) | |
351 | FIELD_PREP(OP_SHF_DST, dst) | |
352 | FIELD_PREP(OP_SHF_SHIFT, shift) | |
353 | FIELD_PREP(OP_SHF_OP, op) | |
354 | FIELD_PREP(OP_SHF_DST_AB, dst_ab) | |
355 | FIELD_PREP(OP_SHF_WR_AB, wr_both) | |
356 | FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | |
357 | FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); |
358 | |
359 | nfp_prog_push(nfp_prog, insn); |
360 | } |
361 | |
362 | static void |
363 | emit_shf(struct nfp_prog *nfp_prog, swreg dst, |
364 | swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) |
365 | { |
366 | struct nfp_insn_re_regs reg; |
367 | int err; |
368 | |
369 | err = swreg_to_restricted(dst, lreg, rreg, reg: ®, has_imm8: true); |
370 | if (err) { |
371 | nfp_prog->error = err; |
372 | return; |
373 | } |
374 | |
375 | __emit_shf(nfp_prog, dst: reg.dst, dst_ab: reg.dst_ab, sc, shift, |
376 | areg: reg.areg, op, breg: reg.breg, i8: reg.i8, sw: reg.swap, wr_both: reg.wr_both, |
377 | dst_lmextn: reg.dst_lmextn, src_lmextn: reg.src_lmextn); |
378 | } |
379 | |
380 | static void |
381 | emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, |
382 | swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) |
383 | { |
384 | if (sc == SHF_SC_R_ROT) { |
385 | pr_err("indirect shift is not allowed on rotation\n" ); |
386 | nfp_prog->error = -EFAULT; |
387 | return; |
388 | } |
389 | |
390 | emit_shf(nfp_prog, dst, lreg, op, rreg, sc, shift: 0); |
391 | } |
392 | |
393 | static void |
394 | __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, |
395 | u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, |
396 | bool dst_lmextn, bool src_lmextn) |
397 | { |
398 | u64 insn; |
399 | |
400 | insn = OP_ALU_BASE | |
401 | FIELD_PREP(OP_ALU_A_SRC, areg) | |
402 | FIELD_PREP(OP_ALU_B_SRC, breg) | |
403 | FIELD_PREP(OP_ALU_DST, dst) | |
404 | FIELD_PREP(OP_ALU_SW, swap) | |
405 | FIELD_PREP(OP_ALU_OP, op) | |
406 | FIELD_PREP(OP_ALU_DST_AB, dst_ab) | |
407 | FIELD_PREP(OP_ALU_WR_AB, wr_both) | |
408 | FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | |
409 | FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); |
410 | |
411 | nfp_prog_push(nfp_prog, insn); |
412 | } |
413 | |
414 | static void |
415 | emit_alu(struct nfp_prog *nfp_prog, swreg dst, |
416 | swreg lreg, enum alu_op op, swreg rreg) |
417 | { |
418 | struct nfp_insn_ur_regs reg; |
419 | int err; |
420 | |
421 | err = swreg_to_unrestricted(dst, lreg, rreg, reg: ®); |
422 | if (err) { |
423 | nfp_prog->error = err; |
424 | return; |
425 | } |
426 | |
427 | __emit_alu(nfp_prog, dst: reg.dst, dst_ab: reg.dst_ab, |
428 | areg: reg.areg, op, breg: reg.breg, swap: reg.swap, wr_both: reg.wr_both, |
429 | dst_lmextn: reg.dst_lmextn, src_lmextn: reg.src_lmextn); |
430 | } |
431 | |
432 | static void |
433 | __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, |
434 | enum mul_type type, enum mul_step step, u16 breg, bool swap, |
435 | bool wr_both, bool dst_lmextn, bool src_lmextn) |
436 | { |
437 | u64 insn; |
438 | |
439 | insn = OP_MUL_BASE | |
440 | FIELD_PREP(OP_MUL_A_SRC, areg) | |
441 | FIELD_PREP(OP_MUL_B_SRC, breg) | |
442 | FIELD_PREP(OP_MUL_STEP, step) | |
443 | FIELD_PREP(OP_MUL_DST_AB, dst_ab) | |
444 | FIELD_PREP(OP_MUL_SW, swap) | |
445 | FIELD_PREP(OP_MUL_TYPE, type) | |
446 | FIELD_PREP(OP_MUL_WR_AB, wr_both) | |
447 | FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | |
448 | FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); |
449 | |
450 | nfp_prog_push(nfp_prog, insn); |
451 | } |
452 | |
453 | static void |
454 | emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, |
455 | enum mul_step step, swreg rreg) |
456 | { |
457 | struct nfp_insn_ur_regs reg; |
458 | u16 areg; |
459 | int err; |
460 | |
461 | if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { |
462 | nfp_prog->error = -EINVAL; |
463 | return; |
464 | } |
465 | |
466 | if (step == MUL_LAST || step == MUL_LAST_2) { |
467 | /* When type is step and step Number is LAST or LAST2, left |
468 | * source is used as destination. |
469 | */ |
470 | err = swreg_to_unrestricted(dst: lreg, reg_none(), rreg, reg: ®); |
471 | areg = reg.dst; |
472 | } else { |
473 | err = swreg_to_unrestricted(reg_none(), lreg, rreg, reg: ®); |
474 | areg = reg.areg; |
475 | } |
476 | |
477 | if (err) { |
478 | nfp_prog->error = err; |
479 | return; |
480 | } |
481 | |
482 | __emit_mul(nfp_prog, dst_ab: reg.dst_ab, areg, type, step, breg: reg.breg, swap: reg.swap, |
483 | wr_both: reg.wr_both, dst_lmextn: reg.dst_lmextn, src_lmextn: reg.src_lmextn); |
484 | } |
485 | |
486 | static void |
487 | __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, |
488 | u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, |
489 | bool zero, bool swap, bool wr_both, |
490 | bool dst_lmextn, bool src_lmextn) |
491 | { |
492 | u64 insn; |
493 | |
494 | insn = OP_LDF_BASE | |
495 | FIELD_PREP(OP_LDF_A_SRC, areg) | |
496 | FIELD_PREP(OP_LDF_SC, sc) | |
497 | FIELD_PREP(OP_LDF_B_SRC, breg) | |
498 | FIELD_PREP(OP_LDF_I8, imm8) | |
499 | FIELD_PREP(OP_LDF_SW, swap) | |
500 | FIELD_PREP(OP_LDF_ZF, zero) | |
501 | FIELD_PREP(OP_LDF_BMASK, bmask) | |
502 | FIELD_PREP(OP_LDF_SHF, shift) | |
503 | FIELD_PREP(OP_LDF_WR_AB, wr_both) | |
504 | FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | |
505 | FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); |
506 | |
507 | nfp_prog_push(nfp_prog, insn); |
508 | } |
509 | |
510 | static void |
511 | emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, |
512 | enum shf_sc sc, u8 shift, bool zero) |
513 | { |
514 | struct nfp_insn_re_regs reg; |
515 | int err; |
516 | |
517 | /* Note: ld_field is special as it uses one of the src regs as dst */ |
518 | err = swreg_to_restricted(dst, lreg: dst, rreg: src, reg: ®, has_imm8: true); |
519 | if (err) { |
520 | nfp_prog->error = err; |
521 | return; |
522 | } |
523 | |
524 | __emit_ld_field(nfp_prog, sc, areg: reg.areg, bmask, breg: reg.breg, shift, |
525 | imm8: reg.i8, zero, swap: reg.swap, wr_both: reg.wr_both, |
526 | dst_lmextn: reg.dst_lmextn, src_lmextn: reg.src_lmextn); |
527 | } |
528 | |
529 | static void |
530 | emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, |
531 | enum shf_sc sc, u8 shift) |
532 | { |
533 | emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, zero: false); |
534 | } |
535 | |
536 | static void |
537 | __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, |
538 | bool dst_lmextn, bool src_lmextn) |
539 | { |
540 | u64 insn; |
541 | |
542 | insn = OP_LCSR_BASE | |
543 | FIELD_PREP(OP_LCSR_A_SRC, areg) | |
544 | FIELD_PREP(OP_LCSR_B_SRC, breg) | |
545 | FIELD_PREP(OP_LCSR_WRITE, wr) | |
546 | FIELD_PREP(OP_LCSR_ADDR, addr / 4) | |
547 | FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | |
548 | FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); |
549 | |
550 | nfp_prog_push(nfp_prog, insn); |
551 | } |
552 | |
553 | static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) |
554 | { |
555 | struct nfp_insn_ur_regs reg; |
556 | int err; |
557 | |
558 | /* This instruction takes immeds instead of reg_none() for the ignored |
559 | * operand, but we can't encode 2 immeds in one instr with our normal |
560 | * swreg infra so if param is an immed, we encode as reg_none() and |
561 | * copy the immed to both operands. |
562 | */ |
563 | if (swreg_type(reg: src) == NN_REG_IMM) { |
564 | err = swreg_to_unrestricted(reg_none(), lreg: src, reg_none(), reg: ®); |
565 | reg.breg = reg.areg; |
566 | } else { |
567 | err = swreg_to_unrestricted(reg_none(), lreg: src, reg_imm(0), reg: ®); |
568 | } |
569 | if (err) { |
570 | nfp_prog->error = err; |
571 | return; |
572 | } |
573 | |
574 | __emit_lcsr(nfp_prog, areg: reg.areg, breg: reg.breg, wr: true, addr, |
575 | dst_lmextn: false, src_lmextn: reg.src_lmextn); |
576 | } |
577 | |
578 | /* CSR value is read in following immed[gpr, 0] */ |
579 | static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) |
580 | { |
581 | __emit_lcsr(nfp_prog, areg: 0, breg: 0, wr: false, addr, dst_lmextn: false, src_lmextn: false); |
582 | } |
583 | |
584 | static void emit_nop(struct nfp_prog *nfp_prog) |
585 | { |
586 | __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, imm_hi: 0, width: 0, invert: 0, shift: 0, wr_both: 0, dst_lmextn: 0, src_lmextn: 0); |
587 | } |
588 | |
589 | /* --- Wrappers --- */ |
590 | static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) |
591 | { |
592 | if (!(imm & 0xffff0000)) { |
593 | *val = imm; |
594 | *shift = IMMED_SHIFT_0B; |
595 | } else if (!(imm & 0xff0000ff)) { |
596 | *val = imm >> 8; |
597 | *shift = IMMED_SHIFT_1B; |
598 | } else if (!(imm & 0x0000ffff)) { |
599 | *val = imm >> 16; |
600 | *shift = IMMED_SHIFT_2B; |
601 | } else { |
602 | return false; |
603 | } |
604 | |
605 | return true; |
606 | } |
607 | |
608 | static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) |
609 | { |
610 | enum immed_shift shift; |
611 | u16 val; |
612 | |
613 | if (pack_immed(imm, val: &val, shift: &shift)) { |
614 | emit_immed(nfp_prog, dst, imm: val, width: IMMED_WIDTH_ALL, invert: false, shift); |
615 | } else if (pack_immed(imm: ~imm, val: &val, shift: &shift)) { |
616 | emit_immed(nfp_prog, dst, imm: val, width: IMMED_WIDTH_ALL, invert: true, shift); |
617 | } else { |
618 | emit_immed(nfp_prog, dst, imm: imm & 0xffff, width: IMMED_WIDTH_ALL, |
619 | invert: false, shift: IMMED_SHIFT_0B); |
620 | emit_immed(nfp_prog, dst, imm: imm >> 16, width: IMMED_WIDTH_WORD, |
621 | invert: false, shift: IMMED_SHIFT_2B); |
622 | } |
623 | } |
624 | |
625 | static void |
626 | wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst) |
627 | { |
628 | if (meta->flags & FLAG_INSN_DO_ZEXT) |
629 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: 0); |
630 | } |
631 | |
632 | static void |
633 | wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, |
634 | enum nfp_relo_type relo) |
635 | { |
636 | if (imm > 0xffff) { |
637 | pr_err("relocation of a large immediate!\n" ); |
638 | nfp_prog->error = -EFAULT; |
639 | return; |
640 | } |
641 | emit_immed(nfp_prog, dst, imm, width: IMMED_WIDTH_ALL, invert: false, shift: IMMED_SHIFT_0B); |
642 | |
643 | nfp_prog->prog[nfp_prog->prog_len - 1] |= |
644 | FIELD_PREP(OP_RELO_TYPE, relo); |
645 | } |
646 | |
647 | /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) |
648 | * If the @imm is small enough encode it directly in operand and return |
649 | * otherwise load @imm to a spare register and return its encoding. |
650 | */ |
651 | static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) |
652 | { |
653 | if (FIELD_FIT(UR_REG_IMM_MAX, imm)) |
654 | return reg_imm(imm); |
655 | |
656 | wrp_immed(nfp_prog, dst: tmp_reg, imm); |
657 | return tmp_reg; |
658 | } |
659 | |
660 | /* re_load_imm_any() - encode immediate or use tmp register (restricted) |
661 | * If the @imm is small enough encode it directly in operand and return |
662 | * otherwise load @imm to a spare register and return its encoding. |
663 | */ |
664 | static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) |
665 | { |
666 | if (FIELD_FIT(RE_REG_IMM_MAX, imm)) |
667 | return reg_imm(imm); |
668 | |
669 | wrp_immed(nfp_prog, dst: tmp_reg, imm); |
670 | return tmp_reg; |
671 | } |
672 | |
673 | static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) |
674 | { |
675 | while (count--) |
676 | emit_nop(nfp_prog); |
677 | } |
678 | |
679 | static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) |
680 | { |
681 | emit_alu(nfp_prog, dst, reg_none(), op: ALU_OP_NONE, rreg: src); |
682 | } |
683 | |
684 | static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) |
685 | { |
686 | wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); |
687 | } |
688 | |
689 | /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the |
690 | * result to @dst from low end. |
691 | */ |
692 | static void |
693 | wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, |
694 | u8 offset) |
695 | { |
696 | enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; |
697 | u8 mask = (1 << field_len) - 1; |
698 | |
699 | emit_ld_field_any(nfp_prog, dst, bmask: mask, src, sc, shift: offset * 8, zero: true); |
700 | } |
701 | |
702 | /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the |
703 | * result to @dst from offset, there is no change on the other bits of @dst. |
704 | */ |
705 | static void |
706 | wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, |
707 | u8 field_len, u8 offset) |
708 | { |
709 | enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; |
710 | u8 mask = ((1 << field_len) - 1) << offset; |
711 | |
712 | emit_ld_field(nfp_prog, dst, bmask: mask, src, sc, shift: 32 - offset * 8); |
713 | } |
714 | |
715 | static void |
716 | addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, |
717 | swreg *rega, swreg *regb) |
718 | { |
719 | if (offset == reg_imm(0)) { |
720 | *rega = reg_a(src_gpr); |
721 | *regb = reg_b(src_gpr + 1); |
722 | return; |
723 | } |
724 | |
725 | emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), op: ALU_OP_ADD, rreg: offset); |
726 | emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), op: ALU_OP_ADD_C, |
727 | reg_imm(0)); |
728 | *rega = imm_a(nfp_prog); |
729 | *regb = imm_b(nfp_prog); |
730 | } |
731 | |
732 | /* NFP has Command Push Pull bus which supports bluk memory operations. */ |
733 | static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
734 | { |
735 | bool descending_seq = meta->ldst_gather_len < 0; |
736 | s16 len = abs(meta->ldst_gather_len); |
737 | swreg src_base, off; |
738 | bool src_40bit_addr; |
739 | unsigned int i; |
740 | u8 xfer_num; |
741 | |
742 | off = re_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
743 | src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; |
744 | src_base = reg_a(meta->insn.src_reg * 2); |
745 | xfer_num = round_up(len, 4) / 4; |
746 | |
747 | if (src_40bit_addr) |
748 | addr40_offset(nfp_prog, src_gpr: meta->insn.src_reg * 2, offset: off, rega: &src_base, |
749 | regb: &off); |
750 | |
751 | /* Setup PREV_ALU fields to override memory read length. */ |
752 | if (len > 32) |
753 | wrp_immed(nfp_prog, reg_none(), |
754 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); |
755 | |
756 | /* Memory read from source addr into transfer-in registers. */ |
757 | emit_cmd_any(nfp_prog, op: CMD_TGT_READ32_SWAP, |
758 | mode: src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, xfer: 0, |
759 | lreg: src_base, rreg: off, size: xfer_num - 1, ctx: CMD_CTX_SWAP, indir: len > 32); |
760 | |
761 | /* Move from transfer-in to transfer-out. */ |
762 | for (i = 0; i < xfer_num; i++) |
763 | wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); |
764 | |
765 | off = re_load_imm_any(nfp_prog, imm: meta->paired_st->off, imm_b(nfp_prog)); |
766 | |
767 | if (len <= 8) { |
768 | /* Use single direct_ref write8. */ |
769 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, xfer: 0, |
770 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, size: len - 1, |
771 | ctx: CMD_CTX_SWAP); |
772 | } else if (len <= 32 && IS_ALIGNED(len, 4)) { |
773 | /* Use single direct_ref write32. */ |
774 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE32_SWAP, mode: CMD_MODE_32b, xfer: 0, |
775 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, size: xfer_num - 1, |
776 | ctx: CMD_CTX_SWAP); |
777 | } else if (len <= 32) { |
778 | /* Use single indirect_ref write8. */ |
779 | wrp_immed(nfp_prog, reg_none(), |
780 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); |
781 | emit_cmd_indir(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, xfer: 0, |
782 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, |
783 | size: len - 1, ctx: CMD_CTX_SWAP); |
784 | } else if (IS_ALIGNED(len, 4)) { |
785 | /* Use single indirect_ref write32. */ |
786 | wrp_immed(nfp_prog, reg_none(), |
787 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); |
788 | emit_cmd_indir(nfp_prog, op: CMD_TGT_WRITE32_SWAP, mode: CMD_MODE_32b, xfer: 0, |
789 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, |
790 | size: xfer_num - 1, ctx: CMD_CTX_SWAP); |
791 | } else if (len <= 40) { |
792 | /* Use one direct_ref write32 to write the first 32-bytes, then |
793 | * another direct_ref write8 to write the remaining bytes. |
794 | */ |
795 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE32_SWAP, mode: CMD_MODE_32b, xfer: 0, |
796 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, size: 7, |
797 | ctx: CMD_CTX_SWAP); |
798 | |
799 | off = re_load_imm_any(nfp_prog, imm: meta->paired_st->off + 32, |
800 | imm_b(nfp_prog)); |
801 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, xfer: 8, |
802 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, size: len - 33, |
803 | ctx: CMD_CTX_SWAP); |
804 | } else { |
805 | /* Use one indirect_ref write32 to write 4-bytes aligned length, |
806 | * then another direct_ref write8 to write the remaining bytes. |
807 | */ |
808 | u8 new_off; |
809 | |
810 | wrp_immed(nfp_prog, reg_none(), |
811 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); |
812 | emit_cmd_indir(nfp_prog, op: CMD_TGT_WRITE32_SWAP, mode: CMD_MODE_32b, xfer: 0, |
813 | reg_a(meta->paired_st->dst_reg * 2), rreg: off, |
814 | size: xfer_num - 2, ctx: CMD_CTX_SWAP); |
815 | new_off = meta->paired_st->off + (xfer_num - 1) * 4; |
816 | off = re_load_imm_any(nfp_prog, imm: new_off, imm_b(nfp_prog)); |
817 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, |
818 | xfer: xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), rreg: off, |
819 | size: (len & 0x3) - 1, ctx: CMD_CTX_SWAP); |
820 | } |
821 | |
822 | /* TODO: The following extra load is to make sure data flow be identical |
823 | * before and after we do memory copy optimization. |
824 | * |
825 | * The load destination register is not guaranteed to be dead, so we |
826 | * need to make sure it is loaded with the value the same as before |
827 | * this transformation. |
828 | * |
829 | * These extra loads could be removed once we have accurate register |
830 | * usage information. |
831 | */ |
832 | if (descending_seq) |
833 | xfer_num = 0; |
834 | else if (BPF_SIZE(meta->insn.code) != BPF_DW) |
835 | xfer_num = xfer_num - 1; |
836 | else |
837 | xfer_num = xfer_num - 2; |
838 | |
839 | switch (BPF_SIZE(meta->insn.code)) { |
840 | case BPF_B: |
841 | wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), |
842 | reg_xfer(xfer_num), field_len: 1, |
843 | IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); |
844 | break; |
845 | case BPF_H: |
846 | wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), |
847 | reg_xfer(xfer_num), field_len: 2, offset: (len & 3) ^ 2); |
848 | break; |
849 | case BPF_W: |
850 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), |
851 | reg_xfer(0)); |
852 | break; |
853 | case BPF_DW: |
854 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), |
855 | reg_xfer(xfer_num)); |
856 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), |
857 | reg_xfer(xfer_num + 1)); |
858 | break; |
859 | } |
860 | |
861 | if (BPF_SIZE(meta->insn.code) != BPF_DW) |
862 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm: 0); |
863 | |
864 | return 0; |
865 | } |
866 | |
867 | static int |
868 | data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset, |
869 | u8 dst_gpr, int size) |
870 | { |
871 | unsigned int i; |
872 | u16 shift, sz; |
873 | |
874 | /* We load the value from the address indicated in @offset and then |
875 | * shift out the data we don't need. Note: this is big endian! |
876 | */ |
877 | sz = max(size, 4); |
878 | shift = size < 4 ? 4 - size : 0; |
879 | |
880 | emit_cmd(nfp_prog, op: CMD_TGT_READ8, mode: CMD_MODE_32b, xfer: 0, |
881 | pptr_reg(nfp_prog), rreg: offset, size: sz - 1, ctx: CMD_CTX_SWAP); |
882 | |
883 | i = 0; |
884 | if (shift) |
885 | emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), op: SHF_OP_NONE, |
886 | reg_xfer(0), sc: SHF_SC_R_SHF, shift: shift * 8); |
887 | else |
888 | for (; i * 4 < size; i++) |
889 | wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); |
890 | |
891 | if (i < 2) |
892 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
893 | |
894 | return 0; |
895 | } |
896 | |
897 | static int |
898 | data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
899 | u8 dst_gpr, swreg lreg, swreg rreg, int size, |
900 | enum cmd_mode mode) |
901 | { |
902 | unsigned int i; |
903 | u8 mask, sz; |
904 | |
905 | /* We load the value from the address indicated in rreg + lreg and then |
906 | * mask out the data we don't need. Note: this is little endian! |
907 | */ |
908 | sz = max(size, 4); |
909 | mask = size < 4 ? GENMASK(size - 1, 0) : 0; |
910 | |
911 | emit_cmd(nfp_prog, op: CMD_TGT_READ32_SWAP, mode, xfer: 0, |
912 | lreg, rreg, size: sz / 4 - 1, ctx: CMD_CTX_SWAP); |
913 | |
914 | i = 0; |
915 | if (mask) |
916 | emit_ld_field_any(nfp_prog, reg_both(dst_gpr), bmask: mask, |
917 | reg_xfer(0), sc: SHF_SC_NONE, shift: 0, zero: true); |
918 | else |
919 | for (; i * 4 < size; i++) |
920 | wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); |
921 | |
922 | if (i < 2) |
923 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
924 | |
925 | return 0; |
926 | } |
927 | |
928 | static int |
929 | data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
930 | u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) |
931 | { |
932 | return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr), |
933 | rreg: offset, size, mode: CMD_MODE_32b); |
934 | } |
935 | |
936 | static int |
937 | data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
938 | u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) |
939 | { |
940 | swreg rega, regb; |
941 | |
942 | addr40_offset(nfp_prog, src_gpr, offset, rega: ®a, regb: ®b); |
943 | |
944 | return data_ld_host_order(nfp_prog, meta, dst_gpr, lreg: rega, rreg: regb, |
945 | size, mode: CMD_MODE_40b_BA); |
946 | } |
947 | |
948 | static int |
949 | construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
950 | u16 offset, u16 src, u8 size) |
951 | { |
952 | swreg tmp_reg; |
953 | |
954 | /* Calculate the true offset (src_reg + imm) */ |
955 | tmp_reg = ur_load_imm_any(nfp_prog, imm: offset, imm_b(nfp_prog)); |
956 | emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), op: ALU_OP_ADD, rreg: tmp_reg); |
957 | |
958 | /* Check packet length (size guaranteed to fit b/c it's u8) */ |
959 | emit_alu(nfp_prog, imm_a(nfp_prog), |
960 | imm_a(nfp_prog), op: ALU_OP_ADD, reg_imm(size)); |
961 | emit_alu(nfp_prog, reg_none(), |
962 | plen_reg(nfp_prog), op: ALU_OP_SUB, imm_a(nfp_prog)); |
963 | emit_br_relo(nfp_prog, mask: BR_BLO, BR_OFF_RELO, defer: 0, relo: RELO_BR_GO_ABORT); |
964 | |
965 | /* Load data */ |
966 | return data_ld(nfp_prog, meta, imm_b(nfp_prog), dst_gpr: 0, size); |
967 | } |
968 | |
969 | static int |
970 | construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
971 | u16 offset, u8 size) |
972 | { |
973 | swreg tmp_reg; |
974 | |
975 | /* Check packet length */ |
976 | tmp_reg = ur_load_imm_any(nfp_prog, imm: offset + size, imm_a(nfp_prog)); |
977 | emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), op: ALU_OP_SUB, rreg: tmp_reg); |
978 | emit_br_relo(nfp_prog, mask: BR_BLO, BR_OFF_RELO, defer: 0, relo: RELO_BR_GO_ABORT); |
979 | |
980 | /* Load data */ |
981 | tmp_reg = re_load_imm_any(nfp_prog, imm: offset, imm_b(nfp_prog)); |
982 | return data_ld(nfp_prog, meta, offset: tmp_reg, dst_gpr: 0, size); |
983 | } |
984 | |
985 | static int |
986 | data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, |
987 | u8 src_gpr, u8 size) |
988 | { |
989 | unsigned int i; |
990 | |
991 | for (i = 0; i * 4 < size; i++) |
992 | wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); |
993 | |
994 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, xfer: 0, |
995 | reg_a(dst_gpr), rreg: offset, size: size - 1, ctx: CMD_CTX_SWAP); |
996 | |
997 | return 0; |
998 | } |
999 | |
1000 | static int |
1001 | data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, |
1002 | u64 imm, u8 size) |
1003 | { |
1004 | wrp_immed(nfp_prog, reg_xfer(0), imm); |
1005 | if (size == 8) |
1006 | wrp_immed(nfp_prog, reg_xfer(1), imm: imm >> 32); |
1007 | |
1008 | emit_cmd(nfp_prog, op: CMD_TGT_WRITE8_SWAP, mode: CMD_MODE_32b, xfer: 0, |
1009 | reg_a(dst_gpr), rreg: offset, size: size - 1, ctx: CMD_CTX_SWAP); |
1010 | |
1011 | return 0; |
1012 | } |
1013 | |
1014 | typedef int |
1015 | (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, |
1016 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1017 | bool needs_inc); |
1018 | |
1019 | static int |
1020 | wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, |
1021 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1022 | bool needs_inc) |
1023 | { |
1024 | bool should_inc = needs_inc && new_gpr && !last; |
1025 | u32 idx, src_byte; |
1026 | enum shf_sc sc; |
1027 | swreg reg; |
1028 | int shf; |
1029 | u8 mask; |
1030 | |
1031 | if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) |
1032 | return -EOPNOTSUPP; |
1033 | |
1034 | idx = off / 4; |
1035 | |
1036 | /* Move the entire word */ |
1037 | if (size == 4) { |
1038 | wrp_mov(nfp_prog, reg_both(dst), |
1039 | src: should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); |
1040 | return 0; |
1041 | } |
1042 | |
1043 | if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) |
1044 | return -EOPNOTSUPP; |
1045 | |
1046 | src_byte = off % 4; |
1047 | |
1048 | mask = (1 << size) - 1; |
1049 | mask <<= dst_byte; |
1050 | |
1051 | if (WARN_ON_ONCE(mask > 0xf)) |
1052 | return -EOPNOTSUPP; |
1053 | |
1054 | shf = abs(src_byte - dst_byte) * 8; |
1055 | if (src_byte == dst_byte) { |
1056 | sc = SHF_SC_NONE; |
1057 | } else if (src_byte < dst_byte) { |
1058 | shf = 32 - shf; |
1059 | sc = SHF_SC_L_SHF; |
1060 | } else { |
1061 | sc = SHF_SC_R_SHF; |
1062 | } |
1063 | |
1064 | /* ld_field can address fewer indexes, if offset too large do RMW. |
1065 | * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. |
1066 | */ |
1067 | if (idx <= RE_REG_LM_IDX_MAX) { |
1068 | reg = reg_lm(lm3 ? 3 : 0, idx); |
1069 | } else { |
1070 | reg = imm_a(nfp_prog); |
1071 | /* If it's not the first part of the load and we start a new GPR |
1072 | * that means we are loading a second part of the LMEM word into |
1073 | * a new GPR. IOW we've already looked that LMEM word and |
1074 | * therefore it has been loaded into imm_a(). |
1075 | */ |
1076 | if (first || !new_gpr) |
1077 | wrp_mov(nfp_prog, dst: reg, reg_lm(0, idx)); |
1078 | } |
1079 | |
1080 | emit_ld_field_any(nfp_prog, reg_both(dst), bmask: mask, src: reg, sc, shift: shf, zero: new_gpr); |
1081 | |
1082 | if (should_inc) |
1083 | wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); |
1084 | |
1085 | return 0; |
1086 | } |
1087 | |
1088 | static int |
1089 | wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, |
1090 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1091 | bool needs_inc) |
1092 | { |
1093 | bool should_inc = needs_inc && new_gpr && !last; |
1094 | u32 idx, dst_byte; |
1095 | enum shf_sc sc; |
1096 | swreg reg; |
1097 | int shf; |
1098 | u8 mask; |
1099 | |
1100 | if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) |
1101 | return -EOPNOTSUPP; |
1102 | |
1103 | idx = off / 4; |
1104 | |
1105 | /* Move the entire word */ |
1106 | if (size == 4) { |
1107 | wrp_mov(nfp_prog, |
1108 | dst: should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), |
1109 | reg_b(src)); |
1110 | return 0; |
1111 | } |
1112 | |
1113 | if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) |
1114 | return -EOPNOTSUPP; |
1115 | |
1116 | dst_byte = off % 4; |
1117 | |
1118 | mask = (1 << size) - 1; |
1119 | mask <<= dst_byte; |
1120 | |
1121 | if (WARN_ON_ONCE(mask > 0xf)) |
1122 | return -EOPNOTSUPP; |
1123 | |
1124 | shf = abs(src_byte - dst_byte) * 8; |
1125 | if (src_byte == dst_byte) { |
1126 | sc = SHF_SC_NONE; |
1127 | } else if (src_byte < dst_byte) { |
1128 | shf = 32 - shf; |
1129 | sc = SHF_SC_L_SHF; |
1130 | } else { |
1131 | sc = SHF_SC_R_SHF; |
1132 | } |
1133 | |
1134 | /* ld_field can address fewer indexes, if offset too large do RMW. |
1135 | * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. |
1136 | */ |
1137 | if (idx <= RE_REG_LM_IDX_MAX) { |
1138 | reg = reg_lm(lm3 ? 3 : 0, idx); |
1139 | } else { |
1140 | reg = imm_a(nfp_prog); |
1141 | /* Only first and last LMEM locations are going to need RMW, |
1142 | * the middle location will be overwritten fully. |
1143 | */ |
1144 | if (first || last) |
1145 | wrp_mov(nfp_prog, dst: reg, reg_lm(0, idx)); |
1146 | } |
1147 | |
1148 | emit_ld_field(nfp_prog, dst: reg, bmask: mask, reg_b(src), sc, shift: shf); |
1149 | |
1150 | if (new_gpr || last) { |
1151 | if (idx > RE_REG_LM_IDX_MAX) |
1152 | wrp_mov(nfp_prog, reg_lm(0, idx), src: reg); |
1153 | if (should_inc) |
1154 | wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); |
1155 | } |
1156 | |
1157 | return 0; |
1158 | } |
1159 | |
1160 | static int |
1161 | mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1162 | unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, |
1163 | bool clr_gpr, lmem_step step) |
1164 | { |
1165 | s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; |
1166 | bool first = true, narrow_ld, last; |
1167 | bool needs_inc = false; |
1168 | swreg stack_off_reg; |
1169 | u8 prev_gpr = 255; |
1170 | u32 gpr_byte = 0; |
1171 | bool lm3 = true; |
1172 | int ret; |
1173 | |
1174 | if (meta->ptr_not_const || |
1175 | meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { |
1176 | /* Use of the last encountered ptr_off is OK, they all have |
1177 | * the same alignment. Depend on low bits of value being |
1178 | * discarded when written to LMaddr register. |
1179 | */ |
1180 | stack_off_reg = ur_load_imm_any(nfp_prog, imm: meta->insn.off, |
1181 | stack_imm(nfp_prog)); |
1182 | |
1183 | emit_alu(nfp_prog, imm_b(nfp_prog), |
1184 | reg_a(ptr_gpr), op: ALU_OP_ADD, rreg: stack_off_reg); |
1185 | |
1186 | needs_inc = true; |
1187 | } else if (off + size <= 64) { |
1188 | /* We can reach bottom 64B with LMaddr0 */ |
1189 | lm3 = false; |
1190 | } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { |
1191 | /* We have to set up a new pointer. If we know the offset |
1192 | * and the entire access falls into a single 32 byte aligned |
1193 | * window we won't have to increment the LM pointer. |
1194 | * The 32 byte alignment is imporant because offset is ORed in |
1195 | * not added when doing *l$indexN[off]. |
1196 | */ |
1197 | stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), |
1198 | stack_imm(nfp_prog)); |
1199 | emit_alu(nfp_prog, imm_b(nfp_prog), |
1200 | stack_reg(nfp_prog), op: ALU_OP_ADD, rreg: stack_off_reg); |
1201 | |
1202 | off %= 32; |
1203 | } else { |
1204 | stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), |
1205 | stack_imm(nfp_prog)); |
1206 | |
1207 | emit_alu(nfp_prog, imm_b(nfp_prog), |
1208 | stack_reg(nfp_prog), op: ALU_OP_ADD, rreg: stack_off_reg); |
1209 | |
1210 | needs_inc = true; |
1211 | } |
1212 | |
1213 | narrow_ld = clr_gpr && size < 8; |
1214 | |
1215 | if (lm3) { |
1216 | unsigned int nop_cnt; |
1217 | |
1218 | emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); |
1219 | /* For size < 4 one slot will be filled by zeroing of upper, |
1220 | * but be careful, that zeroing could be eliminated by zext |
1221 | * optimization. |
1222 | */ |
1223 | nop_cnt = narrow_ld && meta->flags & FLAG_INSN_DO_ZEXT ? 2 : 3; |
1224 | wrp_nops(nfp_prog, count: nop_cnt); |
1225 | } |
1226 | |
1227 | if (narrow_ld) |
1228 | wrp_zext(nfp_prog, meta, dst: gpr); |
1229 | |
1230 | while (size) { |
1231 | u32 slice_end; |
1232 | u8 slice_size; |
1233 | |
1234 | slice_size = min(size, 4 - gpr_byte); |
1235 | slice_end = min(off + slice_size, round_up(off + 1, 4)); |
1236 | slice_size = slice_end - off; |
1237 | |
1238 | last = slice_size == size; |
1239 | |
1240 | if (needs_inc) |
1241 | off %= 4; |
1242 | |
1243 | ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, |
1244 | first, gpr != prev_gpr, last, lm3, needs_inc); |
1245 | if (ret) |
1246 | return ret; |
1247 | |
1248 | prev_gpr = gpr; |
1249 | first = false; |
1250 | |
1251 | gpr_byte += slice_size; |
1252 | if (gpr_byte >= 4) { |
1253 | gpr_byte -= 4; |
1254 | gpr++; |
1255 | } |
1256 | |
1257 | size -= slice_size; |
1258 | off += slice_size; |
1259 | } |
1260 | |
1261 | return 0; |
1262 | } |
1263 | |
1264 | static void |
1265 | wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) |
1266 | { |
1267 | swreg tmp_reg; |
1268 | |
1269 | if (alu_op == ALU_OP_AND) { |
1270 | if (!imm) |
1271 | wrp_immed(nfp_prog, reg_both(dst), imm: 0); |
1272 | if (!imm || !~imm) |
1273 | return; |
1274 | } |
1275 | if (alu_op == ALU_OP_OR) { |
1276 | if (!~imm) |
1277 | wrp_immed(nfp_prog, reg_both(dst), imm: ~0U); |
1278 | if (!imm || !~imm) |
1279 | return; |
1280 | } |
1281 | if (alu_op == ALU_OP_XOR) { |
1282 | if (!~imm) |
1283 | emit_alu(nfp_prog, reg_both(dst), reg_none(), |
1284 | op: ALU_OP_NOT, reg_b(dst)); |
1285 | if (!imm || !~imm) |
1286 | return; |
1287 | } |
1288 | |
1289 | tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); |
1290 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), op: alu_op, rreg: tmp_reg); |
1291 | } |
1292 | |
1293 | static int |
1294 | wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1295 | enum alu_op alu_op, bool skip) |
1296 | { |
1297 | const struct bpf_insn *insn = &meta->insn; |
1298 | u64 imm = insn->imm; /* sign extend */ |
1299 | |
1300 | if (skip) { |
1301 | meta->flags |= FLAG_INSN_SKIP_NOOP; |
1302 | return 0; |
1303 | } |
1304 | |
1305 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2, alu_op, imm: imm & ~0U); |
1306 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2 + 1, alu_op, imm: imm >> 32); |
1307 | |
1308 | return 0; |
1309 | } |
1310 | |
1311 | static int |
1312 | wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1313 | enum alu_op alu_op) |
1314 | { |
1315 | u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; |
1316 | |
1317 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), op: alu_op, reg_b(src)); |
1318 | emit_alu(nfp_prog, reg_both(dst + 1), |
1319 | reg_a(dst + 1), op: alu_op, reg_b(src + 1)); |
1320 | |
1321 | return 0; |
1322 | } |
1323 | |
1324 | static int |
1325 | wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1326 | enum alu_op alu_op) |
1327 | { |
1328 | const struct bpf_insn *insn = &meta->insn; |
1329 | u8 dst = insn->dst_reg * 2; |
1330 | |
1331 | wrp_alu_imm(nfp_prog, dst, alu_op, imm: insn->imm); |
1332 | wrp_zext(nfp_prog, meta, dst); |
1333 | |
1334 | return 0; |
1335 | } |
1336 | |
1337 | static int |
1338 | wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1339 | enum alu_op alu_op) |
1340 | { |
1341 | u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; |
1342 | |
1343 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), op: alu_op, reg_b(src)); |
1344 | wrp_zext(nfp_prog, meta, dst); |
1345 | |
1346 | return 0; |
1347 | } |
1348 | |
1349 | static void |
1350 | wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, |
1351 | enum br_mask br_mask, u16 off) |
1352 | { |
1353 | emit_alu(nfp_prog, reg_none(), reg_a(dst), op: alu_op, reg_b(src)); |
1354 | emit_br(nfp_prog, mask: br_mask, addr: off, defer: 0); |
1355 | } |
1356 | |
1357 | static int |
1358 | wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1359 | enum alu_op alu_op, enum br_mask br_mask) |
1360 | { |
1361 | const struct bpf_insn *insn = &meta->insn; |
1362 | |
1363 | wrp_test_reg_one(nfp_prog, dst: insn->dst_reg * 2, alu_op, |
1364 | src: insn->src_reg * 2, br_mask, off: insn->off); |
1365 | if (is_mbpf_jmp64(meta)) |
1366 | wrp_test_reg_one(nfp_prog, dst: insn->dst_reg * 2 + 1, alu_op, |
1367 | src: insn->src_reg * 2 + 1, br_mask, off: insn->off); |
1368 | |
1369 | return 0; |
1370 | } |
1371 | |
1372 | static const struct jmp_code_map { |
1373 | enum br_mask br_mask; |
1374 | bool swap; |
1375 | } jmp_code_map[] = { |
1376 | [BPF_JGT >> 4] = { BR_BLO, true }, |
1377 | [BPF_JGE >> 4] = { BR_BHS, false }, |
1378 | [BPF_JLT >> 4] = { BR_BLO, false }, |
1379 | [BPF_JLE >> 4] = { BR_BHS, true }, |
1380 | [BPF_JSGT >> 4] = { BR_BLT, true }, |
1381 | [BPF_JSGE >> 4] = { BR_BGE, false }, |
1382 | [BPF_JSLT >> 4] = { BR_BLT, false }, |
1383 | [BPF_JSLE >> 4] = { BR_BGE, true }, |
1384 | }; |
1385 | |
1386 | static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) |
1387 | { |
1388 | unsigned int op; |
1389 | |
1390 | op = BPF_OP(meta->insn.code) >> 4; |
1391 | /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ |
1392 | if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || |
1393 | !jmp_code_map[op].br_mask, |
1394 | "no code found for jump instruction" )) |
1395 | return NULL; |
1396 | |
1397 | return &jmp_code_map[op]; |
1398 | } |
1399 | |
1400 | static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1401 | { |
1402 | const struct bpf_insn *insn = &meta->insn; |
1403 | u64 imm = insn->imm; /* sign extend */ |
1404 | const struct jmp_code_map *code; |
1405 | enum alu_op alu_op, carry_op; |
1406 | u8 reg = insn->dst_reg * 2; |
1407 | swreg tmp_reg; |
1408 | |
1409 | code = nfp_jmp_code_get(meta); |
1410 | if (!code) |
1411 | return -EINVAL; |
1412 | |
1413 | alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; |
1414 | carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; |
1415 | |
1416 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm & ~0U, imm_b(nfp_prog)); |
1417 | if (!code->swap) |
1418 | emit_alu(nfp_prog, reg_none(), reg_a(reg), op: alu_op, rreg: tmp_reg); |
1419 | else |
1420 | emit_alu(nfp_prog, reg_none(), lreg: tmp_reg, op: alu_op, reg_a(reg)); |
1421 | |
1422 | if (is_mbpf_jmp64(meta)) { |
1423 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm >> 32, imm_b(nfp_prog)); |
1424 | if (!code->swap) |
1425 | emit_alu(nfp_prog, reg_none(), |
1426 | reg_a(reg + 1), op: carry_op, rreg: tmp_reg); |
1427 | else |
1428 | emit_alu(nfp_prog, reg_none(), |
1429 | lreg: tmp_reg, op: carry_op, reg_a(reg + 1)); |
1430 | } |
1431 | |
1432 | emit_br(nfp_prog, mask: code->br_mask, addr: insn->off, defer: 0); |
1433 | |
1434 | return 0; |
1435 | } |
1436 | |
1437 | static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1438 | { |
1439 | const struct bpf_insn *insn = &meta->insn; |
1440 | const struct jmp_code_map *code; |
1441 | u8 areg, breg; |
1442 | |
1443 | code = nfp_jmp_code_get(meta); |
1444 | if (!code) |
1445 | return -EINVAL; |
1446 | |
1447 | areg = insn->dst_reg * 2; |
1448 | breg = insn->src_reg * 2; |
1449 | |
1450 | if (code->swap) { |
1451 | areg ^= breg; |
1452 | breg ^= areg; |
1453 | areg ^= breg; |
1454 | } |
1455 | |
1456 | emit_alu(nfp_prog, reg_none(), reg_a(areg), op: ALU_OP_SUB, reg_b(breg)); |
1457 | if (is_mbpf_jmp64(meta)) |
1458 | emit_alu(nfp_prog, reg_none(), |
1459 | reg_a(areg + 1), op: ALU_OP_SUB_C, reg_b(breg + 1)); |
1460 | emit_br(nfp_prog, mask: code->br_mask, addr: insn->off, defer: 0); |
1461 | |
1462 | return 0; |
1463 | } |
1464 | |
1465 | static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) |
1466 | { |
1467 | emit_ld_field(nfp_prog, reg_both(gpr_out), bmask: 0xf, src: reg_in, |
1468 | sc: SHF_SC_R_ROT, shift: 8); |
1469 | emit_ld_field(nfp_prog, reg_both(gpr_out), bmask: 0x5, reg_a(gpr_out), |
1470 | sc: SHF_SC_R_ROT, shift: 16); |
1471 | } |
1472 | |
1473 | static void |
1474 | wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, |
1475 | swreg rreg, bool gen_high_half) |
1476 | { |
1477 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_START, step: MUL_STEP_NONE, rreg); |
1478 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_32x32, step: MUL_STEP_1, rreg); |
1479 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_32x32, step: MUL_STEP_2, rreg); |
1480 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_32x32, step: MUL_STEP_3, rreg); |
1481 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_32x32, step: MUL_STEP_4, rreg); |
1482 | emit_mul(nfp_prog, lreg: dst_lo, type: MUL_TYPE_STEP_32x32, step: MUL_LAST, reg_none()); |
1483 | if (gen_high_half) |
1484 | emit_mul(nfp_prog, lreg: dst_hi, type: MUL_TYPE_STEP_32x32, step: MUL_LAST_2, |
1485 | reg_none()); |
1486 | else |
1487 | wrp_immed(nfp_prog, dst: dst_hi, imm: 0); |
1488 | } |
1489 | |
1490 | static void |
1491 | wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, |
1492 | swreg rreg) |
1493 | { |
1494 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_START, step: MUL_STEP_NONE, rreg); |
1495 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_16x16, step: MUL_STEP_1, rreg); |
1496 | emit_mul(nfp_prog, lreg, type: MUL_TYPE_STEP_16x16, step: MUL_STEP_2, rreg); |
1497 | emit_mul(nfp_prog, lreg: dst_lo, type: MUL_TYPE_STEP_16x16, step: MUL_LAST, reg_none()); |
1498 | } |
1499 | |
1500 | static int |
1501 | wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
1502 | bool gen_high_half, bool ropnd_from_reg) |
1503 | { |
1504 | swreg multiplier, multiplicand, dst_hi, dst_lo; |
1505 | const struct bpf_insn *insn = &meta->insn; |
1506 | u32 lopnd_max, ropnd_max; |
1507 | u8 dst_reg; |
1508 | |
1509 | dst_reg = insn->dst_reg; |
1510 | multiplicand = reg_a(dst_reg * 2); |
1511 | dst_hi = reg_both(dst_reg * 2 + 1); |
1512 | dst_lo = reg_both(dst_reg * 2); |
1513 | lopnd_max = meta->umax_dst; |
1514 | if (ropnd_from_reg) { |
1515 | multiplier = reg_b(insn->src_reg * 2); |
1516 | ropnd_max = meta->umax_src; |
1517 | } else { |
1518 | u32 imm = insn->imm; |
1519 | |
1520 | multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); |
1521 | ropnd_max = imm; |
1522 | } |
1523 | if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) |
1524 | wrp_mul_u32(nfp_prog, dst_hi, dst_lo, lreg: multiplicand, rreg: multiplier, |
1525 | gen_high_half); |
1526 | else |
1527 | wrp_mul_u16(nfp_prog, dst_hi, dst_lo, lreg: multiplicand, rreg: multiplier); |
1528 | |
1529 | return 0; |
1530 | } |
1531 | |
1532 | static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) |
1533 | { |
1534 | swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); |
1535 | struct reciprocal_value_adv rvalue; |
1536 | u8 pre_shift, exp; |
1537 | swreg magic; |
1538 | |
1539 | if (imm > U32_MAX) { |
1540 | wrp_immed(nfp_prog, dst: dst_both, imm: 0); |
1541 | return 0; |
1542 | } |
1543 | |
1544 | /* NOTE: because we are using "reciprocal_value_adv" which doesn't |
1545 | * support "divisor > (1u << 31)", we need to JIT separate NFP sequence |
1546 | * to handle such case which actually equals to the result of unsigned |
1547 | * comparison "dst >= imm" which could be calculated using the following |
1548 | * NFP sequence: |
1549 | * |
1550 | * alu[--, dst, -, imm] |
1551 | * immed[imm, 0] |
1552 | * alu[dst, imm, +carry, 0] |
1553 | * |
1554 | */ |
1555 | if (imm > 1U << 31) { |
1556 | swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); |
1557 | |
1558 | emit_alu(nfp_prog, reg_none(), lreg: dst_a, op: ALU_OP_SUB, rreg: tmp_b); |
1559 | wrp_immed(nfp_prog, imm_a(nfp_prog), imm: 0); |
1560 | emit_alu(nfp_prog, dst: dst_both, imm_a(nfp_prog), op: ALU_OP_ADD_C, |
1561 | reg_imm(0)); |
1562 | return 0; |
1563 | } |
1564 | |
1565 | rvalue = reciprocal_value_adv(d: imm, prec: 32); |
1566 | exp = rvalue.exp; |
1567 | if (rvalue.is_wide_m && !(imm & 1)) { |
1568 | pre_shift = fls(x: imm & -imm) - 1; |
1569 | rvalue = reciprocal_value_adv(d: imm >> pre_shift, prec: 32 - pre_shift); |
1570 | } else { |
1571 | pre_shift = 0; |
1572 | } |
1573 | magic = ur_load_imm_any(nfp_prog, imm: rvalue.m, imm_b(nfp_prog)); |
1574 | if (imm == 1U << exp) { |
1575 | emit_shf(nfp_prog, dst: dst_both, reg_none(), op: SHF_OP_NONE, rreg: dst_b, |
1576 | sc: SHF_SC_R_SHF, shift: exp); |
1577 | } else if (rvalue.is_wide_m) { |
1578 | wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), lreg: dst_a, |
1579 | rreg: magic, gen_high_half: true); |
1580 | emit_alu(nfp_prog, dst: dst_both, lreg: dst_a, op: ALU_OP_SUB, |
1581 | imm_b(nfp_prog)); |
1582 | emit_shf(nfp_prog, dst: dst_both, reg_none(), op: SHF_OP_NONE, rreg: dst_b, |
1583 | sc: SHF_SC_R_SHF, shift: 1); |
1584 | emit_alu(nfp_prog, dst: dst_both, lreg: dst_a, op: ALU_OP_ADD, |
1585 | imm_b(nfp_prog)); |
1586 | emit_shf(nfp_prog, dst: dst_both, reg_none(), op: SHF_OP_NONE, rreg: dst_b, |
1587 | sc: SHF_SC_R_SHF, shift: rvalue.sh - 1); |
1588 | } else { |
1589 | if (pre_shift) |
1590 | emit_shf(nfp_prog, dst: dst_both, reg_none(), op: SHF_OP_NONE, |
1591 | rreg: dst_b, sc: SHF_SC_R_SHF, shift: pre_shift); |
1592 | wrp_mul_u32(nfp_prog, dst_hi: dst_both, reg_none(), lreg: dst_a, rreg: magic, gen_high_half: true); |
1593 | emit_shf(nfp_prog, dst: dst_both, reg_none(), op: SHF_OP_NONE, |
1594 | rreg: dst_b, sc: SHF_SC_R_SHF, shift: rvalue.sh); |
1595 | } |
1596 | |
1597 | return 0; |
1598 | } |
1599 | |
1600 | static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1601 | { |
1602 | swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); |
1603 | struct nfp_bpf_cap_adjust_head *adjust_head; |
1604 | u32 ret_einval, end; |
1605 | |
1606 | adjust_head = &nfp_prog->bpf->adjust_head; |
1607 | |
1608 | /* Optimized version - 5 vs 14 cycles */ |
1609 | if (nfp_prog->adjust_head_location != UINT_MAX) { |
1610 | if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) |
1611 | return -EINVAL; |
1612 | |
1613 | emit_alu(nfp_prog, pptr_reg(nfp_prog), |
1614 | reg_a(2 * 2), op: ALU_OP_ADD, pptr_reg(nfp_prog)); |
1615 | emit_alu(nfp_prog, plen_reg(nfp_prog), |
1616 | plen_reg(nfp_prog), op: ALU_OP_SUB, reg_a(2 * 2)); |
1617 | emit_alu(nfp_prog, pv_len(nfp_prog), |
1618 | pv_len(nfp_prog), op: ALU_OP_SUB, reg_a(2 * 2)); |
1619 | |
1620 | wrp_immed(nfp_prog, reg_both(0), imm: 0); |
1621 | wrp_immed(nfp_prog, reg_both(1), imm: 0); |
1622 | |
1623 | /* TODO: when adjust head is guaranteed to succeed we can |
1624 | * also eliminate the following if (r0 == 0) branch. |
1625 | */ |
1626 | |
1627 | return 0; |
1628 | } |
1629 | |
1630 | ret_einval = nfp_prog_current_offset(nfp_prog) + 14; |
1631 | end = ret_einval + 2; |
1632 | |
1633 | /* We need to use a temp because offset is just a part of the pkt ptr */ |
1634 | emit_alu(nfp_prog, dst: tmp, |
1635 | reg_a(2 * 2), op: ALU_OP_ADD_2B, pptr_reg(nfp_prog)); |
1636 | |
1637 | /* Validate result will fit within FW datapath constraints */ |
1638 | emit_alu(nfp_prog, reg_none(), |
1639 | lreg: tmp, op: ALU_OP_SUB, reg_imm(adjust_head->off_min)); |
1640 | emit_br(nfp_prog, mask: BR_BLO, addr: ret_einval, defer: 0); |
1641 | emit_alu(nfp_prog, reg_none(), |
1642 | reg_imm(adjust_head->off_max), op: ALU_OP_SUB, rreg: tmp); |
1643 | emit_br(nfp_prog, mask: BR_BLO, addr: ret_einval, defer: 0); |
1644 | |
1645 | /* Validate the length is at least ETH_HLEN */ |
1646 | emit_alu(nfp_prog, dst: tmp_len, |
1647 | plen_reg(nfp_prog), op: ALU_OP_SUB, reg_a(2 * 2)); |
1648 | emit_alu(nfp_prog, reg_none(), |
1649 | lreg: tmp_len, op: ALU_OP_SUB, reg_imm(ETH_HLEN)); |
1650 | emit_br(nfp_prog, mask: BR_BMI, addr: ret_einval, defer: 0); |
1651 | |
1652 | /* Load the ret code */ |
1653 | wrp_immed(nfp_prog, reg_both(0), imm: 0); |
1654 | wrp_immed(nfp_prog, reg_both(1), imm: 0); |
1655 | |
1656 | /* Modify the packet metadata */ |
1657 | emit_ld_field(nfp_prog, pptr_reg(nfp_prog), bmask: 0x3, src: tmp, sc: SHF_SC_NONE, shift: 0); |
1658 | |
1659 | /* Skip over the -EINVAL ret code (defer 2) */ |
1660 | emit_br(nfp_prog, mask: BR_UNC, addr: end, defer: 2); |
1661 | |
1662 | emit_alu(nfp_prog, plen_reg(nfp_prog), |
1663 | plen_reg(nfp_prog), op: ALU_OP_SUB, reg_a(2 * 2)); |
1664 | emit_alu(nfp_prog, pv_len(nfp_prog), |
1665 | pv_len(nfp_prog), op: ALU_OP_SUB, reg_a(2 * 2)); |
1666 | |
1667 | /* return -EINVAL target */ |
1668 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: ret_einval)) |
1669 | return -EINVAL; |
1670 | |
1671 | wrp_immed(nfp_prog, reg_both(0), imm: -22); |
1672 | wrp_immed(nfp_prog, reg_both(1), imm: ~0); |
1673 | |
1674 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: end)) |
1675 | return -EINVAL; |
1676 | |
1677 | return 0; |
1678 | } |
1679 | |
1680 | static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1681 | { |
1682 | u32 ret_einval, end; |
1683 | swreg plen, delta; |
1684 | |
1685 | BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); |
1686 | |
1687 | plen = imm_a(nfp_prog); |
1688 | delta = reg_a(2 * 2); |
1689 | |
1690 | ret_einval = nfp_prog_current_offset(nfp_prog) + 9; |
1691 | end = nfp_prog_current_offset(nfp_prog) + 11; |
1692 | |
1693 | /* Calculate resulting length */ |
1694 | emit_alu(nfp_prog, dst: plen, plen_reg(nfp_prog), op: ALU_OP_ADD, rreg: delta); |
1695 | /* delta == 0 is not allowed by the kernel, add must overflow to make |
1696 | * length smaller. |
1697 | */ |
1698 | emit_br(nfp_prog, mask: BR_BCC, addr: ret_einval, defer: 0); |
1699 | |
1700 | /* if (new_len < 14) then -EINVAL */ |
1701 | emit_alu(nfp_prog, reg_none(), lreg: plen, op: ALU_OP_SUB, reg_imm(ETH_HLEN)); |
1702 | emit_br(nfp_prog, mask: BR_BMI, addr: ret_einval, defer: 0); |
1703 | |
1704 | emit_alu(nfp_prog, plen_reg(nfp_prog), |
1705 | plen_reg(nfp_prog), op: ALU_OP_ADD, rreg: delta); |
1706 | emit_alu(nfp_prog, pv_len(nfp_prog), |
1707 | pv_len(nfp_prog), op: ALU_OP_ADD, rreg: delta); |
1708 | |
1709 | emit_br(nfp_prog, mask: BR_UNC, addr: end, defer: 2); |
1710 | wrp_immed(nfp_prog, reg_both(0), imm: 0); |
1711 | wrp_immed(nfp_prog, reg_both(1), imm: 0); |
1712 | |
1713 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: ret_einval)) |
1714 | return -EINVAL; |
1715 | |
1716 | wrp_immed(nfp_prog, reg_both(0), imm: -22); |
1717 | wrp_immed(nfp_prog, reg_both(1), imm: ~0); |
1718 | |
1719 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: end)) |
1720 | return -EINVAL; |
1721 | |
1722 | return 0; |
1723 | } |
1724 | |
1725 | static int |
1726 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1727 | { |
1728 | bool load_lm_ptr; |
1729 | u32 ret_tgt; |
1730 | s64 lm_off; |
1731 | |
1732 | /* We only have to reload LM0 if the key is not at start of stack */ |
1733 | lm_off = nfp_prog->stack_frame_depth; |
1734 | lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; |
1735 | load_lm_ptr = meta->arg2.var_off || lm_off; |
1736 | |
1737 | /* Set LM0 to start of key */ |
1738 | if (load_lm_ptr) |
1739 | emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); |
1740 | if (meta->func_id == BPF_FUNC_map_update_elem) |
1741 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); |
1742 | |
1743 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO + meta->func_id, |
1744 | defer: 2, relo: RELO_BR_HELPER); |
1745 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; |
1746 | |
1747 | /* Load map ID into A0 */ |
1748 | wrp_mov(nfp_prog, reg_a(0), reg_a(2)); |
1749 | |
1750 | /* Load the return address into B0 */ |
1751 | wrp_immed_relo(nfp_prog, reg_b(0), imm: ret_tgt, relo: RELO_IMMED_REL); |
1752 | |
1753 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: ret_tgt)) |
1754 | return -EINVAL; |
1755 | |
1756 | /* Reset the LM0 pointer */ |
1757 | if (!load_lm_ptr) |
1758 | return 0; |
1759 | |
1760 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); |
1761 | wrp_nops(nfp_prog, count: 3); |
1762 | |
1763 | return 0; |
1764 | } |
1765 | |
1766 | static int |
1767 | nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1768 | { |
1769 | __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); |
1770 | /* CSR value is read in following immed[gpr, 0] */ |
1771 | emit_immed(nfp_prog, reg_both(0), imm: 0, |
1772 | width: IMMED_WIDTH_ALL, invert: false, shift: IMMED_SHIFT_0B); |
1773 | emit_immed(nfp_prog, reg_both(1), imm: 0, |
1774 | width: IMMED_WIDTH_ALL, invert: false, shift: IMMED_SHIFT_0B); |
1775 | return 0; |
1776 | } |
1777 | |
1778 | static int |
1779 | nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1780 | { |
1781 | swreg ptr_type; |
1782 | u32 ret_tgt; |
1783 | |
1784 | ptr_type = ur_load_imm_any(nfp_prog, imm: meta->arg1.type, imm_a(nfp_prog)); |
1785 | |
1786 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; |
1787 | |
1788 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO + meta->func_id, |
1789 | defer: 2, relo: RELO_BR_HELPER); |
1790 | |
1791 | /* Load ptr type into A1 */ |
1792 | wrp_mov(nfp_prog, reg_a(1), src: ptr_type); |
1793 | |
1794 | /* Load the return address into B0 */ |
1795 | wrp_immed_relo(nfp_prog, reg_b(0), imm: ret_tgt, relo: RELO_IMMED_REL); |
1796 | |
1797 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: ret_tgt)) |
1798 | return -EINVAL; |
1799 | |
1800 | return 0; |
1801 | } |
1802 | |
1803 | static int |
1804 | nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1805 | { |
1806 | u32 jmp_tgt; |
1807 | |
1808 | jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5; |
1809 | |
1810 | /* Make sure the queue id fits into FW field */ |
1811 | emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2), |
1812 | op: ALU_OP_AND_NOT_B, reg_imm(0xff)); |
1813 | emit_br(nfp_prog, mask: BR_BEQ, addr: jmp_tgt, defer: 2); |
1814 | |
1815 | /* Set the 'queue selected' bit and the queue value */ |
1816 | emit_shf(nfp_prog, pv_qsel_set(nfp_prog), |
1817 | pv_qsel_set(nfp_prog), op: SHF_OP_OR, reg_imm(1), |
1818 | sc: SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT); |
1819 | emit_ld_field(nfp_prog, |
1820 | pv_qsel_val(nfp_prog), bmask: 0x1, reg_b(meta->insn.src_reg * 2), |
1821 | sc: SHF_SC_NONE, shift: 0); |
1822 | /* Delay slots end here, we will jump over next instruction if queue |
1823 | * value fits into the field. |
1824 | */ |
1825 | emit_ld_field(nfp_prog, |
1826 | pv_qsel_val(nfp_prog), bmask: 0x1, reg_imm(NFP_NET_RXR_MAX), |
1827 | sc: SHF_SC_NONE, shift: 0); |
1828 | |
1829 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: jmp_tgt)) |
1830 | return -EINVAL; |
1831 | |
1832 | return 0; |
1833 | } |
1834 | |
1835 | /* --- Callbacks --- */ |
1836 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1837 | { |
1838 | const struct bpf_insn *insn = &meta->insn; |
1839 | u8 dst = insn->dst_reg * 2; |
1840 | u8 src = insn->src_reg * 2; |
1841 | |
1842 | if (insn->src_reg == BPF_REG_10) { |
1843 | swreg stack_depth_reg; |
1844 | |
1845 | stack_depth_reg = ur_load_imm_any(nfp_prog, |
1846 | imm: nfp_prog->stack_frame_depth, |
1847 | stack_imm(nfp_prog)); |
1848 | emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), |
1849 | op: ALU_OP_ADD, rreg: stack_depth_reg); |
1850 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: 0); |
1851 | } else { |
1852 | wrp_reg_mov(nfp_prog, dst, src); |
1853 | wrp_reg_mov(nfp_prog, dst: dst + 1, src: src + 1); |
1854 | } |
1855 | |
1856 | return 0; |
1857 | } |
1858 | |
1859 | static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1860 | { |
1861 | u64 imm = meta->insn.imm; /* sign extend */ |
1862 | |
1863 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm: imm & ~0U); |
1864 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm: imm >> 32); |
1865 | |
1866 | return 0; |
1867 | } |
1868 | |
1869 | static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1870 | { |
1871 | return wrp_alu64_reg(nfp_prog, meta, alu_op: ALU_OP_XOR); |
1872 | } |
1873 | |
1874 | static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1875 | { |
1876 | return wrp_alu64_imm(nfp_prog, meta, alu_op: ALU_OP_XOR, skip: !meta->insn.imm); |
1877 | } |
1878 | |
1879 | static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1880 | { |
1881 | return wrp_alu64_reg(nfp_prog, meta, alu_op: ALU_OP_AND); |
1882 | } |
1883 | |
1884 | static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1885 | { |
1886 | return wrp_alu64_imm(nfp_prog, meta, alu_op: ALU_OP_AND, skip: !~meta->insn.imm); |
1887 | } |
1888 | |
1889 | static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1890 | { |
1891 | return wrp_alu64_reg(nfp_prog, meta, alu_op: ALU_OP_OR); |
1892 | } |
1893 | |
1894 | static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1895 | { |
1896 | return wrp_alu64_imm(nfp_prog, meta, alu_op: ALU_OP_OR, skip: !meta->insn.imm); |
1897 | } |
1898 | |
1899 | static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1900 | { |
1901 | const struct bpf_insn *insn = &meta->insn; |
1902 | |
1903 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), |
1904 | reg_a(insn->dst_reg * 2), op: ALU_OP_ADD, |
1905 | reg_b(insn->src_reg * 2)); |
1906 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), |
1907 | reg_a(insn->dst_reg * 2 + 1), op: ALU_OP_ADD_C, |
1908 | reg_b(insn->src_reg * 2 + 1)); |
1909 | |
1910 | return 0; |
1911 | } |
1912 | |
1913 | static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1914 | { |
1915 | const struct bpf_insn *insn = &meta->insn; |
1916 | u64 imm = insn->imm; /* sign extend */ |
1917 | |
1918 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2, alu_op: ALU_OP_ADD, imm: imm & ~0U); |
1919 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2 + 1, alu_op: ALU_OP_ADD_C, imm: imm >> 32); |
1920 | |
1921 | return 0; |
1922 | } |
1923 | |
1924 | static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1925 | { |
1926 | const struct bpf_insn *insn = &meta->insn; |
1927 | |
1928 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), |
1929 | reg_a(insn->dst_reg * 2), op: ALU_OP_SUB, |
1930 | reg_b(insn->src_reg * 2)); |
1931 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), |
1932 | reg_a(insn->dst_reg * 2 + 1), op: ALU_OP_SUB_C, |
1933 | reg_b(insn->src_reg * 2 + 1)); |
1934 | |
1935 | return 0; |
1936 | } |
1937 | |
1938 | static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1939 | { |
1940 | const struct bpf_insn *insn = &meta->insn; |
1941 | u64 imm = insn->imm; /* sign extend */ |
1942 | |
1943 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2, alu_op: ALU_OP_SUB, imm: imm & ~0U); |
1944 | wrp_alu_imm(nfp_prog, dst: insn->dst_reg * 2 + 1, alu_op: ALU_OP_SUB_C, imm: imm >> 32); |
1945 | |
1946 | return 0; |
1947 | } |
1948 | |
1949 | static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1950 | { |
1951 | return wrp_mul(nfp_prog, meta, gen_high_half: true, ropnd_from_reg: true); |
1952 | } |
1953 | |
1954 | static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1955 | { |
1956 | return wrp_mul(nfp_prog, meta, gen_high_half: true, ropnd_from_reg: false); |
1957 | } |
1958 | |
1959 | static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1960 | { |
1961 | const struct bpf_insn *insn = &meta->insn; |
1962 | |
1963 | return wrp_div_imm(nfp_prog, dst: insn->dst_reg * 2, imm: insn->imm); |
1964 | } |
1965 | |
1966 | static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1967 | { |
1968 | /* NOTE: verifier hook has rejected cases for which verifier doesn't |
1969 | * know whether the source operand is constant or not. |
1970 | */ |
1971 | return wrp_div_imm(nfp_prog, dst: meta->insn.dst_reg * 2, imm: meta->umin_src); |
1972 | } |
1973 | |
1974 | static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1975 | { |
1976 | const struct bpf_insn *insn = &meta->insn; |
1977 | |
1978 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), |
1979 | op: ALU_OP_SUB, reg_b(insn->dst_reg * 2)); |
1980 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), |
1981 | op: ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); |
1982 | |
1983 | return 0; |
1984 | } |
1985 | |
1986 | /* Pseudo code: |
1987 | * if shift_amt >= 32 |
1988 | * dst_high = dst_low << shift_amt[4:0] |
1989 | * dst_low = 0; |
1990 | * else |
1991 | * dst_high = (dst_high, dst_low) >> (32 - shift_amt) |
1992 | * dst_low = dst_low << shift_amt |
1993 | * |
1994 | * The indirect shift will use the same logic at runtime. |
1995 | */ |
1996 | static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
1997 | { |
1998 | if (!shift_amt) |
1999 | return 0; |
2000 | |
2001 | if (shift_amt < 32) { |
2002 | emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), |
2003 | op: SHF_OP_NONE, reg_b(dst), sc: SHF_SC_R_DSHF, |
2004 | shift: 32 - shift_amt); |
2005 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2006 | reg_b(dst), sc: SHF_SC_L_SHF, shift: shift_amt); |
2007 | } else if (shift_amt == 32) { |
2008 | wrp_reg_mov(nfp_prog, dst: dst + 1, src: dst); |
2009 | wrp_immed(nfp_prog, reg_both(dst), imm: 0); |
2010 | } else if (shift_amt > 32) { |
2011 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_NONE, |
2012 | reg_b(dst), sc: SHF_SC_L_SHF, shift: shift_amt - 32); |
2013 | wrp_immed(nfp_prog, reg_both(dst), imm: 0); |
2014 | } |
2015 | |
2016 | return 0; |
2017 | } |
2018 | |
2019 | static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2020 | { |
2021 | const struct bpf_insn *insn = &meta->insn; |
2022 | u8 dst = insn->dst_reg * 2; |
2023 | |
2024 | return __shl_imm64(nfp_prog, dst, shift_amt: insn->imm); |
2025 | } |
2026 | |
2027 | static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2028 | { |
2029 | emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), op: ALU_OP_SUB, |
2030 | reg_b(src)); |
2031 | emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), op: ALU_OP_OR, reg_imm(0)); |
2032 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), op: SHF_OP_NONE, |
2033 | reg_b(dst), sc: SHF_SC_R_DSHF); |
2034 | } |
2035 | |
2036 | /* NOTE: for indirect left shift, HIGH part should be calculated first. */ |
2037 | static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2038 | { |
2039 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2040 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2041 | reg_b(dst), sc: SHF_SC_L_SHF); |
2042 | } |
2043 | |
2044 | static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2045 | { |
2046 | shl_reg64_lt32_high(nfp_prog, dst, src); |
2047 | shl_reg64_lt32_low(nfp_prog, dst, src); |
2048 | } |
2049 | |
2050 | static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2051 | { |
2052 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2053 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_NONE, |
2054 | reg_b(dst), sc: SHF_SC_L_SHF); |
2055 | wrp_immed(nfp_prog, reg_both(dst), imm: 0); |
2056 | } |
2057 | |
2058 | static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2059 | { |
2060 | const struct bpf_insn *insn = &meta->insn; |
2061 | u64 umin, umax; |
2062 | u8 dst, src; |
2063 | |
2064 | dst = insn->dst_reg * 2; |
2065 | umin = meta->umin_src; |
2066 | umax = meta->umax_src; |
2067 | if (umin == umax) |
2068 | return __shl_imm64(nfp_prog, dst, shift_amt: umin); |
2069 | |
2070 | src = insn->src_reg * 2; |
2071 | if (umax < 32) { |
2072 | shl_reg64_lt32(nfp_prog, dst, src); |
2073 | } else if (umin >= 32) { |
2074 | shl_reg64_ge32(nfp_prog, dst, src); |
2075 | } else { |
2076 | /* Generate different instruction sequences depending on runtime |
2077 | * value of shift amount. |
2078 | */ |
2079 | u16 label_ge32, label_end; |
2080 | |
2081 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; |
2082 | emit_br_bset(nfp_prog, reg_a(src), bit: 5, addr: label_ge32, defer: 0); |
2083 | |
2084 | shl_reg64_lt32_high(nfp_prog, dst, src); |
2085 | label_end = nfp_prog_current_offset(nfp_prog) + 6; |
2086 | emit_br(nfp_prog, mask: BR_UNC, addr: label_end, defer: 2); |
2087 | /* shl_reg64_lt32_low packed in delay slot. */ |
2088 | shl_reg64_lt32_low(nfp_prog, dst, src); |
2089 | |
2090 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_ge32)) |
2091 | return -EINVAL; |
2092 | shl_reg64_ge32(nfp_prog, dst, src); |
2093 | |
2094 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_end)) |
2095 | return -EINVAL; |
2096 | } |
2097 | |
2098 | return 0; |
2099 | } |
2100 | |
2101 | /* Pseudo code: |
2102 | * if shift_amt >= 32 |
2103 | * dst_high = 0; |
2104 | * dst_low = dst_high >> shift_amt[4:0] |
2105 | * else |
2106 | * dst_high = dst_high >> shift_amt |
2107 | * dst_low = (dst_high, dst_low) >> shift_amt |
2108 | * |
2109 | * The indirect shift will use the same logic at runtime. |
2110 | */ |
2111 | static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
2112 | { |
2113 | if (!shift_amt) |
2114 | return 0; |
2115 | |
2116 | if (shift_amt < 32) { |
2117 | emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), op: SHF_OP_NONE, |
2118 | reg_b(dst), sc: SHF_SC_R_DSHF, shift: shift_amt); |
2119 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_NONE, |
2120 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: shift_amt); |
2121 | } else if (shift_amt == 32) { |
2122 | wrp_reg_mov(nfp_prog, dst, src: dst + 1); |
2123 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: 0); |
2124 | } else if (shift_amt > 32) { |
2125 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2126 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: shift_amt - 32); |
2127 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: 0); |
2128 | } |
2129 | |
2130 | return 0; |
2131 | } |
2132 | |
2133 | static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2134 | { |
2135 | const struct bpf_insn *insn = &meta->insn; |
2136 | u8 dst = insn->dst_reg * 2; |
2137 | |
2138 | return __shr_imm64(nfp_prog, dst, shift_amt: insn->imm); |
2139 | } |
2140 | |
2141 | /* NOTE: for indirect right shift, LOW part should be calculated first. */ |
2142 | static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2143 | { |
2144 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2145 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_NONE, |
2146 | reg_b(dst + 1), sc: SHF_SC_R_SHF); |
2147 | } |
2148 | |
2149 | static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2150 | { |
2151 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2152 | emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), op: SHF_OP_NONE, |
2153 | reg_b(dst), sc: SHF_SC_R_DSHF); |
2154 | } |
2155 | |
2156 | static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2157 | { |
2158 | shr_reg64_lt32_low(nfp_prog, dst, src); |
2159 | shr_reg64_lt32_high(nfp_prog, dst, src); |
2160 | } |
2161 | |
2162 | static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2163 | { |
2164 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2165 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2166 | reg_b(dst + 1), sc: SHF_SC_R_SHF); |
2167 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: 0); |
2168 | } |
2169 | |
2170 | static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2171 | { |
2172 | const struct bpf_insn *insn = &meta->insn; |
2173 | u64 umin, umax; |
2174 | u8 dst, src; |
2175 | |
2176 | dst = insn->dst_reg * 2; |
2177 | umin = meta->umin_src; |
2178 | umax = meta->umax_src; |
2179 | if (umin == umax) |
2180 | return __shr_imm64(nfp_prog, dst, shift_amt: umin); |
2181 | |
2182 | src = insn->src_reg * 2; |
2183 | if (umax < 32) { |
2184 | shr_reg64_lt32(nfp_prog, dst, src); |
2185 | } else if (umin >= 32) { |
2186 | shr_reg64_ge32(nfp_prog, dst, src); |
2187 | } else { |
2188 | /* Generate different instruction sequences depending on runtime |
2189 | * value of shift amount. |
2190 | */ |
2191 | u16 label_ge32, label_end; |
2192 | |
2193 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; |
2194 | emit_br_bset(nfp_prog, reg_a(src), bit: 5, addr: label_ge32, defer: 0); |
2195 | shr_reg64_lt32_low(nfp_prog, dst, src); |
2196 | label_end = nfp_prog_current_offset(nfp_prog) + 6; |
2197 | emit_br(nfp_prog, mask: BR_UNC, addr: label_end, defer: 2); |
2198 | /* shr_reg64_lt32_high packed in delay slot. */ |
2199 | shr_reg64_lt32_high(nfp_prog, dst, src); |
2200 | |
2201 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_ge32)) |
2202 | return -EINVAL; |
2203 | shr_reg64_ge32(nfp_prog, dst, src); |
2204 | |
2205 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_end)) |
2206 | return -EINVAL; |
2207 | } |
2208 | |
2209 | return 0; |
2210 | } |
2211 | |
2212 | /* Code logic is the same as __shr_imm64 except ashr requires signedness bit |
2213 | * told through PREV_ALU result. |
2214 | */ |
2215 | static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
2216 | { |
2217 | if (!shift_amt) |
2218 | return 0; |
2219 | |
2220 | if (shift_amt < 32) { |
2221 | emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), op: SHF_OP_NONE, |
2222 | reg_b(dst), sc: SHF_SC_R_DSHF, shift: shift_amt); |
2223 | /* Set signedness bit. */ |
2224 | emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), op: ALU_OP_OR, |
2225 | reg_imm(0)); |
2226 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_ASHR, |
2227 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: shift_amt); |
2228 | } else if (shift_amt == 32) { |
2229 | /* NOTE: this also helps setting signedness bit. */ |
2230 | wrp_reg_mov(nfp_prog, dst, src: dst + 1); |
2231 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_ASHR, |
2232 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: 31); |
2233 | } else if (shift_amt > 32) { |
2234 | emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), op: ALU_OP_OR, |
2235 | reg_imm(0)); |
2236 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_ASHR, |
2237 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: shift_amt - 32); |
2238 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_ASHR, |
2239 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: 31); |
2240 | } |
2241 | |
2242 | return 0; |
2243 | } |
2244 | |
2245 | static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2246 | { |
2247 | const struct bpf_insn *insn = &meta->insn; |
2248 | u8 dst = insn->dst_reg * 2; |
2249 | |
2250 | return __ashr_imm64(nfp_prog, dst, shift_amt: insn->imm); |
2251 | } |
2252 | |
2253 | static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2254 | { |
2255 | /* NOTE: the first insn will set both indirect shift amount (source A) |
2256 | * and signedness bit (MSB of result). |
2257 | */ |
2258 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_b(dst + 1)); |
2259 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_ASHR, |
2260 | reg_b(dst + 1), sc: SHF_SC_R_SHF); |
2261 | } |
2262 | |
2263 | static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2264 | { |
2265 | /* NOTE: it is the same as logic shift because we don't need to shift in |
2266 | * signedness bit when the shift amount is less than 32. |
2267 | */ |
2268 | return shr_reg64_lt32_low(nfp_prog, dst, src); |
2269 | } |
2270 | |
2271 | static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2272 | { |
2273 | ashr_reg64_lt32_low(nfp_prog, dst, src); |
2274 | ashr_reg64_lt32_high(nfp_prog, dst, src); |
2275 | } |
2276 | |
2277 | static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) |
2278 | { |
2279 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_b(dst + 1)); |
2280 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_ASHR, |
2281 | reg_b(dst + 1), sc: SHF_SC_R_SHF); |
2282 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), op: SHF_OP_ASHR, |
2283 | reg_b(dst + 1), sc: SHF_SC_R_SHF, shift: 31); |
2284 | } |
2285 | |
2286 | /* Like ashr_imm64, but need to use indirect shift. */ |
2287 | static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2288 | { |
2289 | const struct bpf_insn *insn = &meta->insn; |
2290 | u64 umin, umax; |
2291 | u8 dst, src; |
2292 | |
2293 | dst = insn->dst_reg * 2; |
2294 | umin = meta->umin_src; |
2295 | umax = meta->umax_src; |
2296 | if (umin == umax) |
2297 | return __ashr_imm64(nfp_prog, dst, shift_amt: umin); |
2298 | |
2299 | src = insn->src_reg * 2; |
2300 | if (umax < 32) { |
2301 | ashr_reg64_lt32(nfp_prog, dst, src); |
2302 | } else if (umin >= 32) { |
2303 | ashr_reg64_ge32(nfp_prog, dst, src); |
2304 | } else { |
2305 | u16 label_ge32, label_end; |
2306 | |
2307 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; |
2308 | emit_br_bset(nfp_prog, reg_a(src), bit: 5, addr: label_ge32, defer: 0); |
2309 | ashr_reg64_lt32_low(nfp_prog, dst, src); |
2310 | label_end = nfp_prog_current_offset(nfp_prog) + 6; |
2311 | emit_br(nfp_prog, mask: BR_UNC, addr: label_end, defer: 2); |
2312 | /* ashr_reg64_lt32_high packed in delay slot. */ |
2313 | ashr_reg64_lt32_high(nfp_prog, dst, src); |
2314 | |
2315 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_ge32)) |
2316 | return -EINVAL; |
2317 | ashr_reg64_ge32(nfp_prog, dst, src); |
2318 | |
2319 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: label_end)) |
2320 | return -EINVAL; |
2321 | } |
2322 | |
2323 | return 0; |
2324 | } |
2325 | |
2326 | static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2327 | { |
2328 | const struct bpf_insn *insn = &meta->insn; |
2329 | |
2330 | wrp_reg_mov(nfp_prog, dst: insn->dst_reg * 2, src: insn->src_reg * 2); |
2331 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), imm: 0); |
2332 | |
2333 | return 0; |
2334 | } |
2335 | |
2336 | static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2337 | { |
2338 | const struct bpf_insn *insn = &meta->insn; |
2339 | |
2340 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), imm: insn->imm); |
2341 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), imm: 0); |
2342 | |
2343 | return 0; |
2344 | } |
2345 | |
2346 | static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2347 | { |
2348 | return wrp_alu32_reg(nfp_prog, meta, alu_op: ALU_OP_XOR); |
2349 | } |
2350 | |
2351 | static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2352 | { |
2353 | return wrp_alu32_imm(nfp_prog, meta, alu_op: ALU_OP_XOR); |
2354 | } |
2355 | |
2356 | static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2357 | { |
2358 | return wrp_alu32_reg(nfp_prog, meta, alu_op: ALU_OP_AND); |
2359 | } |
2360 | |
2361 | static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2362 | { |
2363 | return wrp_alu32_imm(nfp_prog, meta, alu_op: ALU_OP_AND); |
2364 | } |
2365 | |
2366 | static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2367 | { |
2368 | return wrp_alu32_reg(nfp_prog, meta, alu_op: ALU_OP_OR); |
2369 | } |
2370 | |
2371 | static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2372 | { |
2373 | return wrp_alu32_imm(nfp_prog, meta, alu_op: ALU_OP_OR); |
2374 | } |
2375 | |
2376 | static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2377 | { |
2378 | return wrp_alu32_reg(nfp_prog, meta, alu_op: ALU_OP_ADD); |
2379 | } |
2380 | |
2381 | static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2382 | { |
2383 | return wrp_alu32_imm(nfp_prog, meta, alu_op: ALU_OP_ADD); |
2384 | } |
2385 | |
2386 | static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2387 | { |
2388 | return wrp_alu32_reg(nfp_prog, meta, alu_op: ALU_OP_SUB); |
2389 | } |
2390 | |
2391 | static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2392 | { |
2393 | return wrp_alu32_imm(nfp_prog, meta, alu_op: ALU_OP_SUB); |
2394 | } |
2395 | |
2396 | static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2397 | { |
2398 | return wrp_mul(nfp_prog, meta, gen_high_half: false, ropnd_from_reg: true); |
2399 | } |
2400 | |
2401 | static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2402 | { |
2403 | return wrp_mul(nfp_prog, meta, gen_high_half: false, ropnd_from_reg: false); |
2404 | } |
2405 | |
2406 | static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2407 | { |
2408 | return div_reg64(nfp_prog, meta); |
2409 | } |
2410 | |
2411 | static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2412 | { |
2413 | return div_imm64(nfp_prog, meta); |
2414 | } |
2415 | |
2416 | static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2417 | { |
2418 | u8 dst = meta->insn.dst_reg * 2; |
2419 | |
2420 | emit_alu(nfp_prog, reg_both(dst), reg_imm(0), op: ALU_OP_SUB, reg_b(dst)); |
2421 | wrp_zext(nfp_prog, meta, dst); |
2422 | |
2423 | return 0; |
2424 | } |
2425 | |
2426 | static int |
2427 | __ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, |
2428 | u8 shift_amt) |
2429 | { |
2430 | if (shift_amt) { |
2431 | /* Set signedness bit (MSB of result). */ |
2432 | emit_alu(nfp_prog, reg_none(), reg_a(dst), op: ALU_OP_OR, |
2433 | reg_imm(0)); |
2434 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_ASHR, |
2435 | reg_b(dst), sc: SHF_SC_R_SHF, shift: shift_amt); |
2436 | } |
2437 | wrp_zext(nfp_prog, meta, dst); |
2438 | |
2439 | return 0; |
2440 | } |
2441 | |
2442 | static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2443 | { |
2444 | const struct bpf_insn *insn = &meta->insn; |
2445 | u64 umin, umax; |
2446 | u8 dst, src; |
2447 | |
2448 | dst = insn->dst_reg * 2; |
2449 | umin = meta->umin_src; |
2450 | umax = meta->umax_src; |
2451 | if (umin == umax) |
2452 | return __ashr_imm(nfp_prog, meta, dst, shift_amt: umin); |
2453 | |
2454 | src = insn->src_reg * 2; |
2455 | /* NOTE: the first insn will set both indirect shift amount (source A) |
2456 | * and signedness bit (MSB of result). |
2457 | */ |
2458 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_b(dst)); |
2459 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_ASHR, |
2460 | reg_b(dst), sc: SHF_SC_R_SHF); |
2461 | wrp_zext(nfp_prog, meta, dst); |
2462 | |
2463 | return 0; |
2464 | } |
2465 | |
2466 | static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2467 | { |
2468 | const struct bpf_insn *insn = &meta->insn; |
2469 | u8 dst = insn->dst_reg * 2; |
2470 | |
2471 | return __ashr_imm(nfp_prog, meta, dst, shift_amt: insn->imm); |
2472 | } |
2473 | |
2474 | static int |
2475 | __shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, |
2476 | u8 shift_amt) |
2477 | { |
2478 | if (shift_amt) |
2479 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2480 | reg_b(dst), sc: SHF_SC_R_SHF, shift: shift_amt); |
2481 | wrp_zext(nfp_prog, meta, dst); |
2482 | return 0; |
2483 | } |
2484 | |
2485 | static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2486 | { |
2487 | const struct bpf_insn *insn = &meta->insn; |
2488 | u8 dst = insn->dst_reg * 2; |
2489 | |
2490 | return __shr_imm(nfp_prog, meta, dst, shift_amt: insn->imm); |
2491 | } |
2492 | |
2493 | static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2494 | { |
2495 | const struct bpf_insn *insn = &meta->insn; |
2496 | u64 umin, umax; |
2497 | u8 dst, src; |
2498 | |
2499 | dst = insn->dst_reg * 2; |
2500 | umin = meta->umin_src; |
2501 | umax = meta->umax_src; |
2502 | if (umin == umax) |
2503 | return __shr_imm(nfp_prog, meta, dst, shift_amt: umin); |
2504 | |
2505 | src = insn->src_reg * 2; |
2506 | emit_alu(nfp_prog, reg_none(), reg_a(src), op: ALU_OP_OR, reg_imm(0)); |
2507 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2508 | reg_b(dst), sc: SHF_SC_R_SHF); |
2509 | wrp_zext(nfp_prog, meta, dst); |
2510 | return 0; |
2511 | } |
2512 | |
2513 | static int |
2514 | __shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, |
2515 | u8 shift_amt) |
2516 | { |
2517 | if (shift_amt) |
2518 | emit_shf(nfp_prog, reg_both(dst), reg_none(), op: SHF_OP_NONE, |
2519 | reg_b(dst), sc: SHF_SC_L_SHF, shift: shift_amt); |
2520 | wrp_zext(nfp_prog, meta, dst); |
2521 | return 0; |
2522 | } |
2523 | |
2524 | static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2525 | { |
2526 | const struct bpf_insn *insn = &meta->insn; |
2527 | u8 dst = insn->dst_reg * 2; |
2528 | |
2529 | return __shl_imm(nfp_prog, meta, dst, shift_amt: insn->imm); |
2530 | } |
2531 | |
2532 | static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2533 | { |
2534 | const struct bpf_insn *insn = &meta->insn; |
2535 | u64 umin, umax; |
2536 | u8 dst, src; |
2537 | |
2538 | dst = insn->dst_reg * 2; |
2539 | umin = meta->umin_src; |
2540 | umax = meta->umax_src; |
2541 | if (umin == umax) |
2542 | return __shl_imm(nfp_prog, meta, dst, shift_amt: umin); |
2543 | |
2544 | src = insn->src_reg * 2; |
2545 | shl_reg64_lt32_low(nfp_prog, dst, src); |
2546 | wrp_zext(nfp_prog, meta, dst); |
2547 | return 0; |
2548 | } |
2549 | |
2550 | static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2551 | { |
2552 | const struct bpf_insn *insn = &meta->insn; |
2553 | u8 gpr = insn->dst_reg * 2; |
2554 | |
2555 | switch (insn->imm) { |
2556 | case 16: |
2557 | emit_ld_field(nfp_prog, reg_both(gpr), bmask: 0x9, reg_b(gpr), |
2558 | sc: SHF_SC_R_ROT, shift: 8); |
2559 | emit_ld_field(nfp_prog, reg_both(gpr), bmask: 0xe, reg_a(gpr), |
2560 | sc: SHF_SC_R_SHF, shift: 16); |
2561 | |
2562 | wrp_immed(nfp_prog, reg_both(gpr + 1), imm: 0); |
2563 | break; |
2564 | case 32: |
2565 | wrp_end32(nfp_prog, reg_a(gpr), gpr_out: gpr); |
2566 | wrp_immed(nfp_prog, reg_both(gpr + 1), imm: 0); |
2567 | break; |
2568 | case 64: |
2569 | wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); |
2570 | |
2571 | wrp_end32(nfp_prog, reg_a(gpr), gpr_out: gpr + 1); |
2572 | wrp_end32(nfp_prog, imm_a(nfp_prog), gpr_out: gpr); |
2573 | break; |
2574 | } |
2575 | |
2576 | return 0; |
2577 | } |
2578 | |
2579 | static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2580 | { |
2581 | struct nfp_insn_meta *prev = nfp_meta_prev(meta); |
2582 | u32 imm_lo, imm_hi; |
2583 | u8 dst; |
2584 | |
2585 | dst = prev->insn.dst_reg * 2; |
2586 | imm_lo = prev->insn.imm; |
2587 | imm_hi = meta->insn.imm; |
2588 | |
2589 | wrp_immed(nfp_prog, reg_both(dst), imm: imm_lo); |
2590 | |
2591 | /* mov is always 1 insn, load imm may be two, so try to use mov */ |
2592 | if (imm_hi == imm_lo) |
2593 | wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); |
2594 | else |
2595 | wrp_immed(nfp_prog, reg_both(dst + 1), imm: imm_hi); |
2596 | |
2597 | return 0; |
2598 | } |
2599 | |
2600 | static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2601 | { |
2602 | meta->double_cb = imm_ld8_part2; |
2603 | return 0; |
2604 | } |
2605 | |
2606 | static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2607 | { |
2608 | return construct_data_ld(nfp_prog, meta, offset: meta->insn.imm, size: 1); |
2609 | } |
2610 | |
2611 | static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2612 | { |
2613 | return construct_data_ld(nfp_prog, meta, offset: meta->insn.imm, size: 2); |
2614 | } |
2615 | |
2616 | static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2617 | { |
2618 | return construct_data_ld(nfp_prog, meta, offset: meta->insn.imm, size: 4); |
2619 | } |
2620 | |
2621 | static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2622 | { |
2623 | return construct_data_ind_ld(nfp_prog, meta, offset: meta->insn.imm, |
2624 | src: meta->insn.src_reg * 2, size: 1); |
2625 | } |
2626 | |
2627 | static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2628 | { |
2629 | return construct_data_ind_ld(nfp_prog, meta, offset: meta->insn.imm, |
2630 | src: meta->insn.src_reg * 2, size: 2); |
2631 | } |
2632 | |
2633 | static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2634 | { |
2635 | return construct_data_ind_ld(nfp_prog, meta, offset: meta->insn.imm, |
2636 | src: meta->insn.src_reg * 2, size: 4); |
2637 | } |
2638 | |
2639 | static int |
2640 | mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2641 | unsigned int size, unsigned int ptr_off) |
2642 | { |
2643 | return mem_op_stack(nfp_prog, meta, size, ptr_off, |
2644 | gpr: meta->insn.dst_reg * 2, ptr_gpr: meta->insn.src_reg * 2, |
2645 | clr_gpr: true, step: wrp_lmem_load); |
2646 | } |
2647 | |
2648 | static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2649 | u8 size) |
2650 | { |
2651 | swreg dst = reg_both(meta->insn.dst_reg * 2); |
2652 | |
2653 | switch (meta->insn.off) { |
2654 | case offsetof(struct __sk_buff, len): |
2655 | if (size != sizeof_field(struct __sk_buff, len)) |
2656 | return -EOPNOTSUPP; |
2657 | wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); |
2658 | break; |
2659 | case offsetof(struct __sk_buff, data): |
2660 | if (size != sizeof_field(struct __sk_buff, data)) |
2661 | return -EOPNOTSUPP; |
2662 | wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); |
2663 | break; |
2664 | case offsetof(struct __sk_buff, data_end): |
2665 | if (size != sizeof_field(struct __sk_buff, data_end)) |
2666 | return -EOPNOTSUPP; |
2667 | emit_alu(nfp_prog, dst, |
2668 | plen_reg(nfp_prog), op: ALU_OP_ADD, pptr_reg(nfp_prog)); |
2669 | break; |
2670 | default: |
2671 | return -EOPNOTSUPP; |
2672 | } |
2673 | |
2674 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm: 0); |
2675 | |
2676 | return 0; |
2677 | } |
2678 | |
2679 | static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2680 | u8 size) |
2681 | { |
2682 | swreg dst = reg_both(meta->insn.dst_reg * 2); |
2683 | |
2684 | switch (meta->insn.off) { |
2685 | case offsetof(struct xdp_md, data): |
2686 | if (size != sizeof_field(struct xdp_md, data)) |
2687 | return -EOPNOTSUPP; |
2688 | wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); |
2689 | break; |
2690 | case offsetof(struct xdp_md, data_end): |
2691 | if (size != sizeof_field(struct xdp_md, data_end)) |
2692 | return -EOPNOTSUPP; |
2693 | emit_alu(nfp_prog, dst, |
2694 | plen_reg(nfp_prog), op: ALU_OP_ADD, pptr_reg(nfp_prog)); |
2695 | break; |
2696 | default: |
2697 | return -EOPNOTSUPP; |
2698 | } |
2699 | |
2700 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm: 0); |
2701 | |
2702 | return 0; |
2703 | } |
2704 | |
2705 | static int |
2706 | mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2707 | unsigned int size) |
2708 | { |
2709 | swreg tmp_reg; |
2710 | |
2711 | tmp_reg = re_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
2712 | |
2713 | return data_ld_host_order_addr32(nfp_prog, meta, src_gpr: meta->insn.src_reg * 2, |
2714 | offset: tmp_reg, dst_gpr: meta->insn.dst_reg * 2, size); |
2715 | } |
2716 | |
2717 | static int |
2718 | mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2719 | unsigned int size) |
2720 | { |
2721 | swreg tmp_reg; |
2722 | |
2723 | tmp_reg = re_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
2724 | |
2725 | return data_ld_host_order_addr40(nfp_prog, meta, src_gpr: meta->insn.src_reg * 2, |
2726 | offset: tmp_reg, dst_gpr: meta->insn.dst_reg * 2, size); |
2727 | } |
2728 | |
2729 | static void |
2730 | mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, |
2731 | struct nfp_insn_meta *meta) |
2732 | { |
2733 | s16 range_start = meta->pkt_cache.range_start; |
2734 | s16 range_end = meta->pkt_cache.range_end; |
2735 | swreg src_base, off; |
2736 | u8 xfer_num, len; |
2737 | bool indir; |
2738 | |
2739 | off = re_load_imm_any(nfp_prog, imm: range_start, imm_b(nfp_prog)); |
2740 | src_base = reg_a(meta->insn.src_reg * 2); |
2741 | len = range_end - range_start; |
2742 | xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; |
2743 | |
2744 | indir = len > 8 * REG_WIDTH; |
2745 | /* Setup PREV_ALU for indirect mode. */ |
2746 | if (indir) |
2747 | wrp_immed(nfp_prog, reg_none(), |
2748 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); |
2749 | |
2750 | /* Cache memory into transfer-in registers. */ |
2751 | emit_cmd_any(nfp_prog, op: CMD_TGT_READ32_SWAP, mode: CMD_MODE_32b, xfer: 0, lreg: src_base, |
2752 | rreg: off, size: xfer_num - 1, ctx: CMD_CTX_SWAP, indir); |
2753 | } |
2754 | |
2755 | static int |
2756 | mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, |
2757 | struct nfp_insn_meta *meta, |
2758 | unsigned int size) |
2759 | { |
2760 | s16 range_start = meta->pkt_cache.range_start; |
2761 | s16 insn_off = meta->insn.off - range_start; |
2762 | swreg dst_lo, dst_hi, src_lo, src_mid; |
2763 | u8 dst_gpr = meta->insn.dst_reg * 2; |
2764 | u8 len_lo = size, len_mid = 0; |
2765 | u8 idx = insn_off / REG_WIDTH; |
2766 | u8 off = insn_off % REG_WIDTH; |
2767 | |
2768 | dst_hi = reg_both(dst_gpr + 1); |
2769 | dst_lo = reg_both(dst_gpr); |
2770 | src_lo = reg_xfer(idx); |
2771 | |
2772 | /* The read length could involve as many as three registers. */ |
2773 | if (size > REG_WIDTH - off) { |
2774 | /* Calculate the part in the second register. */ |
2775 | len_lo = REG_WIDTH - off; |
2776 | len_mid = size - len_lo; |
2777 | |
2778 | /* Calculate the part in the third register. */ |
2779 | if (size > 2 * REG_WIDTH - off) |
2780 | len_mid = REG_WIDTH; |
2781 | } |
2782 | |
2783 | wrp_reg_subpart(nfp_prog, dst: dst_lo, src: src_lo, field_len: len_lo, offset: off); |
2784 | |
2785 | if (!len_mid) { |
2786 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
2787 | return 0; |
2788 | } |
2789 | |
2790 | src_mid = reg_xfer(idx + 1); |
2791 | |
2792 | if (size <= REG_WIDTH) { |
2793 | wrp_reg_or_subpart(nfp_prog, dst: dst_lo, src: src_mid, field_len: len_mid, offset: len_lo); |
2794 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
2795 | } else { |
2796 | swreg src_hi = reg_xfer(idx + 2); |
2797 | |
2798 | wrp_reg_or_subpart(nfp_prog, dst: dst_lo, src: src_mid, |
2799 | REG_WIDTH - len_lo, offset: len_lo); |
2800 | wrp_reg_subpart(nfp_prog, dst: dst_hi, src: src_mid, field_len: len_lo, |
2801 | REG_WIDTH - len_lo); |
2802 | wrp_reg_or_subpart(nfp_prog, dst: dst_hi, src: src_hi, REG_WIDTH - len_lo, |
2803 | offset: len_lo); |
2804 | } |
2805 | |
2806 | return 0; |
2807 | } |
2808 | |
2809 | static int |
2810 | mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, |
2811 | struct nfp_insn_meta *meta, |
2812 | unsigned int size) |
2813 | { |
2814 | swreg dst_lo, dst_hi, src_lo; |
2815 | u8 dst_gpr, idx; |
2816 | |
2817 | idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; |
2818 | dst_gpr = meta->insn.dst_reg * 2; |
2819 | dst_hi = reg_both(dst_gpr + 1); |
2820 | dst_lo = reg_both(dst_gpr); |
2821 | src_lo = reg_xfer(idx); |
2822 | |
2823 | if (size < REG_WIDTH) { |
2824 | wrp_reg_subpart(nfp_prog, dst: dst_lo, src: src_lo, field_len: size, offset: 0); |
2825 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
2826 | } else if (size == REG_WIDTH) { |
2827 | wrp_mov(nfp_prog, dst: dst_lo, src: src_lo); |
2828 | wrp_zext(nfp_prog, meta, dst: dst_gpr); |
2829 | } else { |
2830 | swreg src_hi = reg_xfer(idx + 1); |
2831 | |
2832 | wrp_mov(nfp_prog, dst: dst_lo, src: src_lo); |
2833 | wrp_mov(nfp_prog, dst: dst_hi, src: src_hi); |
2834 | } |
2835 | |
2836 | return 0; |
2837 | } |
2838 | |
2839 | static int |
2840 | mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, |
2841 | struct nfp_insn_meta *meta, unsigned int size) |
2842 | { |
2843 | u8 off = meta->insn.off - meta->pkt_cache.range_start; |
2844 | |
2845 | if (IS_ALIGNED(off, REG_WIDTH)) |
2846 | return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); |
2847 | |
2848 | return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); |
2849 | } |
2850 | |
2851 | static int |
2852 | mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2853 | unsigned int size) |
2854 | { |
2855 | if (meta->ldst_gather_len) |
2856 | return nfp_cpp_memcpy(nfp_prog, meta); |
2857 | |
2858 | if (meta->ptr.type == PTR_TO_CTX) { |
2859 | if (nfp_prog->type == BPF_PROG_TYPE_XDP) |
2860 | return mem_ldx_xdp(nfp_prog, meta, size); |
2861 | else |
2862 | return mem_ldx_skb(nfp_prog, meta, size); |
2863 | } |
2864 | |
2865 | if (meta->ptr.type == PTR_TO_PACKET) { |
2866 | if (meta->pkt_cache.range_end) { |
2867 | if (meta->pkt_cache.do_init) |
2868 | mem_ldx_data_init_pktcache(nfp_prog, meta); |
2869 | |
2870 | return mem_ldx_data_from_pktcache(nfp_prog, meta, size); |
2871 | } else { |
2872 | return mem_ldx_data(nfp_prog, meta, size); |
2873 | } |
2874 | } |
2875 | |
2876 | if (meta->ptr.type == PTR_TO_STACK) |
2877 | return mem_ldx_stack(nfp_prog, meta, size, |
2878 | ptr_off: meta->ptr.off + meta->ptr.var_off.value); |
2879 | |
2880 | if (meta->ptr.type == PTR_TO_MAP_VALUE) |
2881 | return mem_ldx_emem(nfp_prog, meta, size); |
2882 | |
2883 | return -EOPNOTSUPP; |
2884 | } |
2885 | |
2886 | static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2887 | { |
2888 | return mem_ldx(nfp_prog, meta, size: 1); |
2889 | } |
2890 | |
2891 | static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2892 | { |
2893 | return mem_ldx(nfp_prog, meta, size: 2); |
2894 | } |
2895 | |
2896 | static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2897 | { |
2898 | return mem_ldx(nfp_prog, meta, size: 4); |
2899 | } |
2900 | |
2901 | static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2902 | { |
2903 | return mem_ldx(nfp_prog, meta, size: 8); |
2904 | } |
2905 | |
2906 | static int |
2907 | mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2908 | unsigned int size) |
2909 | { |
2910 | u64 imm = meta->insn.imm; /* sign extend */ |
2911 | swreg off_reg; |
2912 | |
2913 | off_reg = re_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
2914 | |
2915 | return data_st_host_order(nfp_prog, dst_gpr: meta->insn.dst_reg * 2, offset: off_reg, |
2916 | imm, size); |
2917 | } |
2918 | |
2919 | static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2920 | unsigned int size) |
2921 | { |
2922 | if (meta->ptr.type == PTR_TO_PACKET) |
2923 | return mem_st_data(nfp_prog, meta, size); |
2924 | |
2925 | return -EOPNOTSUPP; |
2926 | } |
2927 | |
2928 | static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2929 | { |
2930 | return mem_st(nfp_prog, meta, size: 1); |
2931 | } |
2932 | |
2933 | static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2934 | { |
2935 | return mem_st(nfp_prog, meta, size: 2); |
2936 | } |
2937 | |
2938 | static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2939 | { |
2940 | return mem_st(nfp_prog, meta, size: 4); |
2941 | } |
2942 | |
2943 | static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2944 | { |
2945 | return mem_st(nfp_prog, meta, size: 8); |
2946 | } |
2947 | |
2948 | static int |
2949 | mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2950 | unsigned int size) |
2951 | { |
2952 | swreg off_reg; |
2953 | |
2954 | off_reg = re_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
2955 | |
2956 | return data_stx_host_order(nfp_prog, dst_gpr: meta->insn.dst_reg * 2, offset: off_reg, |
2957 | src_gpr: meta->insn.src_reg * 2, size); |
2958 | } |
2959 | |
2960 | static int |
2961 | mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2962 | unsigned int size, unsigned int ptr_off) |
2963 | { |
2964 | return mem_op_stack(nfp_prog, meta, size, ptr_off, |
2965 | gpr: meta->insn.src_reg * 2, ptr_gpr: meta->insn.dst_reg * 2, |
2966 | clr_gpr: false, step: wrp_lmem_store); |
2967 | } |
2968 | |
2969 | static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2970 | { |
2971 | switch (meta->insn.off) { |
2972 | case offsetof(struct xdp_md, rx_queue_index): |
2973 | return nfp_queue_select(nfp_prog, meta); |
2974 | } |
2975 | |
2976 | WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */ |
2977 | return -EOPNOTSUPP; |
2978 | } |
2979 | |
2980 | static int |
2981 | mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2982 | unsigned int size) |
2983 | { |
2984 | if (meta->ptr.type == PTR_TO_PACKET) |
2985 | return mem_stx_data(nfp_prog, meta, size); |
2986 | |
2987 | if (meta->ptr.type == PTR_TO_STACK) |
2988 | return mem_stx_stack(nfp_prog, meta, size, |
2989 | ptr_off: meta->ptr.off + meta->ptr.var_off.value); |
2990 | |
2991 | return -EOPNOTSUPP; |
2992 | } |
2993 | |
2994 | static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2995 | { |
2996 | return mem_stx(nfp_prog, meta, size: 1); |
2997 | } |
2998 | |
2999 | static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3000 | { |
3001 | return mem_stx(nfp_prog, meta, size: 2); |
3002 | } |
3003 | |
3004 | static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3005 | { |
3006 | if (meta->ptr.type == PTR_TO_CTX) |
3007 | if (nfp_prog->type == BPF_PROG_TYPE_XDP) |
3008 | return mem_stx_xdp(nfp_prog, meta); |
3009 | return mem_stx(nfp_prog, meta, size: 4); |
3010 | } |
3011 | |
3012 | static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3013 | { |
3014 | return mem_stx(nfp_prog, meta, size: 8); |
3015 | } |
3016 | |
3017 | static int |
3018 | mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) |
3019 | { |
3020 | u8 dst_gpr = meta->insn.dst_reg * 2; |
3021 | u8 src_gpr = meta->insn.src_reg * 2; |
3022 | unsigned int full_add, out; |
3023 | swreg addra, addrb, off; |
3024 | |
3025 | off = ur_load_imm_any(nfp_prog, imm: meta->insn.off, imm_b(nfp_prog)); |
3026 | |
3027 | /* We can fit 16 bits into command immediate, if we know the immediate |
3028 | * is guaranteed to either always or never fit into 16 bit we only |
3029 | * generate code to handle that particular case, otherwise generate |
3030 | * code for both. |
3031 | */ |
3032 | out = nfp_prog_current_offset(nfp_prog); |
3033 | full_add = nfp_prog_current_offset(nfp_prog); |
3034 | |
3035 | if (meta->insn.off) { |
3036 | out += 2; |
3037 | full_add += 2; |
3038 | } |
3039 | if (meta->xadd_maybe_16bit) { |
3040 | out += 3; |
3041 | full_add += 3; |
3042 | } |
3043 | if (meta->xadd_over_16bit) |
3044 | out += 2 + is64; |
3045 | if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { |
3046 | out += 5; |
3047 | full_add += 5; |
3048 | } |
3049 | |
3050 | /* Generate the branch for choosing add_imm vs add */ |
3051 | if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { |
3052 | swreg max_imm = imm_a(nfp_prog); |
3053 | |
3054 | wrp_immed(nfp_prog, dst: max_imm, imm: 0xffff); |
3055 | emit_alu(nfp_prog, reg_none(), |
3056 | lreg: max_imm, op: ALU_OP_SUB, reg_b(src_gpr)); |
3057 | emit_alu(nfp_prog, reg_none(), |
3058 | reg_imm(0), op: ALU_OP_SUB_C, reg_b(src_gpr + 1)); |
3059 | emit_br(nfp_prog, mask: BR_BLO, addr: full_add, defer: meta->insn.off ? 2 : 0); |
3060 | /* defer for add */ |
3061 | } |
3062 | |
3063 | /* If insn has an offset add to the address */ |
3064 | if (!meta->insn.off) { |
3065 | addra = reg_a(dst_gpr); |
3066 | addrb = reg_b(dst_gpr + 1); |
3067 | } else { |
3068 | emit_alu(nfp_prog, imma_a(nfp_prog), |
3069 | reg_a(dst_gpr), op: ALU_OP_ADD, rreg: off); |
3070 | emit_alu(nfp_prog, imma_b(nfp_prog), |
3071 | reg_a(dst_gpr + 1), op: ALU_OP_ADD_C, reg_imm(0)); |
3072 | addra = imma_a(nfp_prog); |
3073 | addrb = imma_b(nfp_prog); |
3074 | } |
3075 | |
3076 | /* Generate the add_imm if 16 bits are possible */ |
3077 | if (meta->xadd_maybe_16bit) { |
3078 | swreg prev_alu = imm_a(nfp_prog); |
3079 | |
3080 | wrp_immed(nfp_prog, dst: prev_alu, |
3081 | FIELD_PREP(CMD_OVE_DATA, 2) | |
3082 | CMD_OVE_LEN | |
3083 | FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); |
3084 | wrp_reg_or_subpart(nfp_prog, dst: prev_alu, reg_b(src_gpr), field_len: 2, offset: 2); |
3085 | emit_cmd_indir(nfp_prog, op: CMD_TGT_ADD_IMM, mode: CMD_MODE_40b_BA, xfer: 0, |
3086 | lreg: addra, rreg: addrb, size: 0, ctx: CMD_CTX_NO_SWAP); |
3087 | |
3088 | if (meta->xadd_over_16bit) |
3089 | emit_br(nfp_prog, mask: BR_UNC, addr: out, defer: 0); |
3090 | } |
3091 | |
3092 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: full_add)) |
3093 | return -EINVAL; |
3094 | |
3095 | /* Generate the add if 16 bits are not guaranteed */ |
3096 | if (meta->xadd_over_16bit) { |
3097 | emit_cmd(nfp_prog, op: CMD_TGT_ADD, mode: CMD_MODE_40b_BA, xfer: 0, |
3098 | lreg: addra, rreg: addrb, size: is64 << 2, |
3099 | ctx: is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); |
3100 | |
3101 | wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); |
3102 | if (is64) |
3103 | wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); |
3104 | } |
3105 | |
3106 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: out)) |
3107 | return -EINVAL; |
3108 | |
3109 | return 0; |
3110 | } |
3111 | |
3112 | static int mem_atomic4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3113 | { |
3114 | if (meta->insn.imm != BPF_ADD) |
3115 | return -EOPNOTSUPP; |
3116 | |
3117 | return mem_xadd(nfp_prog, meta, is64: false); |
3118 | } |
3119 | |
3120 | static int mem_atomic8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3121 | { |
3122 | if (meta->insn.imm != BPF_ADD) |
3123 | return -EOPNOTSUPP; |
3124 | |
3125 | return mem_xadd(nfp_prog, meta, is64: true); |
3126 | } |
3127 | |
3128 | static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3129 | { |
3130 | emit_br(nfp_prog, mask: BR_UNC, addr: meta->insn.off, defer: 0); |
3131 | |
3132 | return 0; |
3133 | } |
3134 | |
3135 | static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3136 | { |
3137 | const struct bpf_insn *insn = &meta->insn; |
3138 | u64 imm = insn->imm; /* sign extend */ |
3139 | swreg or1, or2, tmp_reg; |
3140 | |
3141 | or1 = reg_a(insn->dst_reg * 2); |
3142 | or2 = reg_b(insn->dst_reg * 2 + 1); |
3143 | |
3144 | if (imm & ~0U) { |
3145 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm & ~0U, imm_b(nfp_prog)); |
3146 | emit_alu(nfp_prog, imm_a(nfp_prog), |
3147 | reg_a(insn->dst_reg * 2), op: ALU_OP_XOR, rreg: tmp_reg); |
3148 | or1 = imm_a(nfp_prog); |
3149 | } |
3150 | |
3151 | if (imm >> 32) { |
3152 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm >> 32, imm_b(nfp_prog)); |
3153 | emit_alu(nfp_prog, imm_b(nfp_prog), |
3154 | reg_a(insn->dst_reg * 2 + 1), op: ALU_OP_XOR, rreg: tmp_reg); |
3155 | or2 = imm_b(nfp_prog); |
3156 | } |
3157 | |
3158 | emit_alu(nfp_prog, reg_none(), lreg: or1, op: ALU_OP_OR, rreg: or2); |
3159 | emit_br(nfp_prog, mask: BR_BEQ, addr: insn->off, defer: 0); |
3160 | |
3161 | return 0; |
3162 | } |
3163 | |
3164 | static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3165 | { |
3166 | const struct bpf_insn *insn = &meta->insn; |
3167 | swreg tmp_reg; |
3168 | |
3169 | tmp_reg = ur_load_imm_any(nfp_prog, imm: insn->imm, imm_b(nfp_prog)); |
3170 | emit_alu(nfp_prog, reg_none(), |
3171 | reg_a(insn->dst_reg * 2), op: ALU_OP_XOR, rreg: tmp_reg); |
3172 | emit_br(nfp_prog, mask: BR_BEQ, addr: insn->off, defer: 0); |
3173 | |
3174 | return 0; |
3175 | } |
3176 | |
3177 | static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3178 | { |
3179 | const struct bpf_insn *insn = &meta->insn; |
3180 | u64 imm = insn->imm; /* sign extend */ |
3181 | u8 dst_gpr = insn->dst_reg * 2; |
3182 | swreg tmp_reg; |
3183 | |
3184 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm & ~0U, imm_b(nfp_prog)); |
3185 | emit_alu(nfp_prog, imm_b(nfp_prog), |
3186 | reg_a(dst_gpr), op: ALU_OP_AND, rreg: tmp_reg); |
3187 | /* Upper word of the mask can only be 0 or ~0 from sign extension, |
3188 | * so either ignore it or OR the whole thing in. |
3189 | */ |
3190 | if (is_mbpf_jmp64(meta) && imm >> 32) { |
3191 | emit_alu(nfp_prog, reg_none(), |
3192 | reg_a(dst_gpr + 1), op: ALU_OP_OR, imm_b(nfp_prog)); |
3193 | } |
3194 | emit_br(nfp_prog, mask: BR_BNE, addr: insn->off, defer: 0); |
3195 | |
3196 | return 0; |
3197 | } |
3198 | |
3199 | static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3200 | { |
3201 | const struct bpf_insn *insn = &meta->insn; |
3202 | u64 imm = insn->imm; /* sign extend */ |
3203 | bool is_jmp32 = is_mbpf_jmp32(meta); |
3204 | swreg tmp_reg; |
3205 | |
3206 | if (!imm) { |
3207 | if (is_jmp32) |
3208 | emit_alu(nfp_prog, reg_none(), reg_none(), op: ALU_OP_NONE, |
3209 | reg_b(insn->dst_reg * 2)); |
3210 | else |
3211 | emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), |
3212 | op: ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); |
3213 | emit_br(nfp_prog, mask: BR_BNE, addr: insn->off, defer: 0); |
3214 | return 0; |
3215 | } |
3216 | |
3217 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm & ~0U, imm_b(nfp_prog)); |
3218 | emit_alu(nfp_prog, reg_none(), |
3219 | reg_a(insn->dst_reg * 2), op: ALU_OP_XOR, rreg: tmp_reg); |
3220 | emit_br(nfp_prog, mask: BR_BNE, addr: insn->off, defer: 0); |
3221 | |
3222 | if (is_jmp32) |
3223 | return 0; |
3224 | |
3225 | tmp_reg = ur_load_imm_any(nfp_prog, imm: imm >> 32, imm_b(nfp_prog)); |
3226 | emit_alu(nfp_prog, reg_none(), |
3227 | reg_a(insn->dst_reg * 2 + 1), op: ALU_OP_XOR, rreg: tmp_reg); |
3228 | emit_br(nfp_prog, mask: BR_BNE, addr: insn->off, defer: 0); |
3229 | |
3230 | return 0; |
3231 | } |
3232 | |
3233 | static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3234 | { |
3235 | const struct bpf_insn *insn = &meta->insn; |
3236 | |
3237 | emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), |
3238 | op: ALU_OP_XOR, reg_b(insn->src_reg * 2)); |
3239 | if (is_mbpf_jmp64(meta)) { |
3240 | emit_alu(nfp_prog, imm_b(nfp_prog), |
3241 | reg_a(insn->dst_reg * 2 + 1), op: ALU_OP_XOR, |
3242 | reg_b(insn->src_reg * 2 + 1)); |
3243 | emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), op: ALU_OP_OR, |
3244 | imm_b(nfp_prog)); |
3245 | } |
3246 | emit_br(nfp_prog, mask: BR_BEQ, addr: insn->off, defer: 0); |
3247 | |
3248 | return 0; |
3249 | } |
3250 | |
3251 | static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3252 | { |
3253 | return wrp_test_reg(nfp_prog, meta, alu_op: ALU_OP_AND, br_mask: BR_BNE); |
3254 | } |
3255 | |
3256 | static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3257 | { |
3258 | return wrp_test_reg(nfp_prog, meta, alu_op: ALU_OP_XOR, br_mask: BR_BNE); |
3259 | } |
3260 | |
3261 | static int |
3262 | bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3263 | { |
3264 | u32 ret_tgt, stack_depth, offset_br; |
3265 | swreg tmp_reg; |
3266 | |
3267 | stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); |
3268 | /* Space for saving the return address is accounted for by the callee, |
3269 | * so stack_depth can be zero for the main function. |
3270 | */ |
3271 | if (stack_depth) { |
3272 | tmp_reg = ur_load_imm_any(nfp_prog, imm: stack_depth, |
3273 | stack_imm(nfp_prog)); |
3274 | emit_alu(nfp_prog, stack_reg(nfp_prog), |
3275 | stack_reg(nfp_prog), op: ALU_OP_ADD, rreg: tmp_reg); |
3276 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), |
3277 | NFP_CSR_ACT_LM_ADDR0); |
3278 | } |
3279 | |
3280 | /* Two cases for jumping to the callee: |
3281 | * |
3282 | * - If callee uses and needs to save R6~R9 then: |
3283 | * 1. Put the start offset of the callee into imm_b(). This will |
3284 | * require a fixup step, as we do not necessarily know this |
3285 | * address yet. |
3286 | * 2. Put the return address from the callee to the caller into |
3287 | * register ret_reg(). |
3288 | * 3. (After defer slots are consumed) Jump to the subroutine that |
3289 | * pushes the registers to the stack. |
3290 | * The subroutine acts as a trampoline, and returns to the address in |
3291 | * imm_b(), i.e. jumps to the callee. |
3292 | * |
3293 | * - If callee does not need to save R6~R9 then just load return |
3294 | * address to the caller in ret_reg(), and jump to the callee |
3295 | * directly. |
3296 | * |
3297 | * Using ret_reg() to pass the return address to the callee is set here |
3298 | * as a convention. The callee can then push this address onto its |
3299 | * stack frame in its prologue. The advantages of passing the return |
3300 | * address through ret_reg(), instead of pushing it to the stack right |
3301 | * here, are the following: |
3302 | * - It looks cleaner. |
3303 | * - If the called function is called multiple time, we get a lower |
3304 | * program size. |
3305 | * - We save two no-op instructions that should be added just before |
3306 | * the emit_br() when stack depth is not null otherwise. |
3307 | * - If we ever find a register to hold the return address during whole |
3308 | * execution of the callee, we will not have to push the return |
3309 | * address to the stack for leaf functions. |
3310 | */ |
3311 | if (!meta->jmp_dst) { |
3312 | pr_err("BUG: BPF-to-BPF call has no destination recorded\n" ); |
3313 | return -ELOOP; |
3314 | } |
3315 | if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { |
3316 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; |
3317 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 2, |
3318 | relo: RELO_BR_GO_CALL_PUSH_REGS); |
3319 | offset_br = nfp_prog_current_offset(nfp_prog); |
3320 | wrp_immed_relo(nfp_prog, imm_b(nfp_prog), imm: 0, relo: RELO_IMMED_REL); |
3321 | } else { |
3322 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; |
3323 | emit_br(nfp_prog, mask: BR_UNC, addr: meta->insn.imm, defer: 1); |
3324 | offset_br = nfp_prog_current_offset(nfp_prog); |
3325 | } |
3326 | wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), imm: ret_tgt, relo: RELO_IMMED_REL); |
3327 | |
3328 | if (!nfp_prog_confirm_current_offset(nfp_prog, off: ret_tgt)) |
3329 | return -EINVAL; |
3330 | |
3331 | if (stack_depth) { |
3332 | tmp_reg = ur_load_imm_any(nfp_prog, imm: stack_depth, |
3333 | stack_imm(nfp_prog)); |
3334 | emit_alu(nfp_prog, stack_reg(nfp_prog), |
3335 | stack_reg(nfp_prog), op: ALU_OP_SUB, rreg: tmp_reg); |
3336 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), |
3337 | NFP_CSR_ACT_LM_ADDR0); |
3338 | wrp_nops(nfp_prog, count: 3); |
3339 | } |
3340 | |
3341 | meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); |
3342 | meta->num_insns_after_br -= offset_br; |
3343 | |
3344 | return 0; |
3345 | } |
3346 | |
3347 | static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3348 | { |
3349 | switch (meta->insn.imm) { |
3350 | case BPF_FUNC_xdp_adjust_head: |
3351 | return adjust_head(nfp_prog, meta); |
3352 | case BPF_FUNC_xdp_adjust_tail: |
3353 | return adjust_tail(nfp_prog, meta); |
3354 | case BPF_FUNC_map_lookup_elem: |
3355 | case BPF_FUNC_map_update_elem: |
3356 | case BPF_FUNC_map_delete_elem: |
3357 | return map_call_stack_common(nfp_prog, meta); |
3358 | case BPF_FUNC_get_prandom_u32: |
3359 | return nfp_get_prandom_u32(nfp_prog, meta); |
3360 | case BPF_FUNC_perf_event_output: |
3361 | return nfp_perf_event_output(nfp_prog, meta); |
3362 | default: |
3363 | WARN_ONCE(1, "verifier allowed unsupported function\n" ); |
3364 | return -EOPNOTSUPP; |
3365 | } |
3366 | } |
3367 | |
3368 | static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3369 | { |
3370 | if (is_mbpf_pseudo_call(meta)) |
3371 | return bpf_to_bpf_call(nfp_prog, meta); |
3372 | else |
3373 | return helper_call(nfp_prog, meta); |
3374 | } |
3375 | |
3376 | static bool nfp_is_main_function(struct nfp_insn_meta *meta) |
3377 | { |
3378 | return meta->subprog_idx == 0; |
3379 | } |
3380 | |
3381 | static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3382 | { |
3383 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 0, relo: RELO_BR_GO_OUT); |
3384 | |
3385 | return 0; |
3386 | } |
3387 | |
3388 | static int |
3389 | nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3390 | { |
3391 | if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { |
3392 | /* Pop R6~R9 to the stack via related subroutine. |
3393 | * We loaded the return address to the caller into ret_reg(). |
3394 | * This means that the subroutine does not come back here, we |
3395 | * make it jump back to the subprogram caller directly! |
3396 | */ |
3397 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 1, |
3398 | relo: RELO_BR_GO_CALL_POP_REGS); |
3399 | /* Pop return address from the stack. */ |
3400 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); |
3401 | } else { |
3402 | /* Pop return address from the stack. */ |
3403 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); |
3404 | /* Jump back to caller if no callee-saved registers were used |
3405 | * by the subprogram. |
3406 | */ |
3407 | emit_rtn(nfp_prog, ret_reg(nfp_prog), defer: 0); |
3408 | } |
3409 | |
3410 | return 0; |
3411 | } |
3412 | |
3413 | static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3414 | { |
3415 | if (nfp_is_main_function(meta)) |
3416 | return goto_out(nfp_prog, meta); |
3417 | else |
3418 | return nfp_subprog_epilogue(nfp_prog, meta); |
3419 | } |
3420 | |
3421 | static const instr_cb_t instr_cb[256] = { |
3422 | [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, |
3423 | [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, |
3424 | [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, |
3425 | [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, |
3426 | [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, |
3427 | [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, |
3428 | [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, |
3429 | [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, |
3430 | [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, |
3431 | [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, |
3432 | [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, |
3433 | [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, |
3434 | [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, |
3435 | [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, |
3436 | [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, |
3437 | [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, |
3438 | [BPF_ALU64 | BPF_NEG] = neg_reg64, |
3439 | [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, |
3440 | [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, |
3441 | [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, |
3442 | [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, |
3443 | [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, |
3444 | [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, |
3445 | [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, |
3446 | [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, |
3447 | [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, |
3448 | [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, |
3449 | [BPF_ALU | BPF_AND | BPF_X] = and_reg, |
3450 | [BPF_ALU | BPF_AND | BPF_K] = and_imm, |
3451 | [BPF_ALU | BPF_OR | BPF_X] = or_reg, |
3452 | [BPF_ALU | BPF_OR | BPF_K] = or_imm, |
3453 | [BPF_ALU | BPF_ADD | BPF_X] = add_reg, |
3454 | [BPF_ALU | BPF_ADD | BPF_K] = add_imm, |
3455 | [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, |
3456 | [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, |
3457 | [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, |
3458 | [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, |
3459 | [BPF_ALU | BPF_DIV | BPF_X] = div_reg, |
3460 | [BPF_ALU | BPF_DIV | BPF_K] = div_imm, |
3461 | [BPF_ALU | BPF_NEG] = neg_reg, |
3462 | [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, |
3463 | [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, |
3464 | [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, |
3465 | [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, |
3466 | [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, |
3467 | [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, |
3468 | [BPF_ALU | BPF_END | BPF_X] = end_reg32, |
3469 | [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, |
3470 | [BPF_LD | BPF_ABS | BPF_B] = data_ld1, |
3471 | [BPF_LD | BPF_ABS | BPF_H] = data_ld2, |
3472 | [BPF_LD | BPF_ABS | BPF_W] = data_ld4, |
3473 | [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, |
3474 | [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, |
3475 | [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, |
3476 | [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, |
3477 | [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, |
3478 | [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, |
3479 | [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, |
3480 | [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, |
3481 | [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, |
3482 | [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, |
3483 | [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, |
3484 | [BPF_STX | BPF_ATOMIC | BPF_W] = mem_atomic4, |
3485 | [BPF_STX | BPF_ATOMIC | BPF_DW] = mem_atomic8, |
3486 | [BPF_ST | BPF_MEM | BPF_B] = mem_st1, |
3487 | [BPF_ST | BPF_MEM | BPF_H] = mem_st2, |
3488 | [BPF_ST | BPF_MEM | BPF_W] = mem_st4, |
3489 | [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, |
3490 | [BPF_JMP | BPF_JA | BPF_K] = jump, |
3491 | [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, |
3492 | [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, |
3493 | [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, |
3494 | [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, |
3495 | [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, |
3496 | [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, |
3497 | [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, |
3498 | [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, |
3499 | [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, |
3500 | [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, |
3501 | [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, |
3502 | [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, |
3503 | [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, |
3504 | [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, |
3505 | [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, |
3506 | [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, |
3507 | [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, |
3508 | [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, |
3509 | [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, |
3510 | [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, |
3511 | [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, |
3512 | [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, |
3513 | [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, |
3514 | [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, |
3515 | [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, |
3516 | [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, |
3517 | [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, |
3518 | [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, |
3519 | [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, |
3520 | [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, |
3521 | [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, |
3522 | [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, |
3523 | [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, |
3524 | [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, |
3525 | [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, |
3526 | [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, |
3527 | [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, |
3528 | [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, |
3529 | [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, |
3530 | [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, |
3531 | [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, |
3532 | [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, |
3533 | [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, |
3534 | [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, |
3535 | [BPF_JMP | BPF_CALL] = call, |
3536 | [BPF_JMP | BPF_EXIT] = jmp_exit, |
3537 | }; |
3538 | |
3539 | /* --- Assembler logic --- */ |
3540 | static int |
3541 | nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
3542 | struct nfp_insn_meta *jmp_dst, u32 br_idx) |
3543 | { |
3544 | if (immed_get_value(instr: nfp_prog->prog[br_idx + 1])) { |
3545 | pr_err("BUG: failed to fix up callee register saving\n" ); |
3546 | return -EINVAL; |
3547 | } |
3548 | |
3549 | immed_set_value(instr: &nfp_prog->prog[br_idx + 1], immed: jmp_dst->off); |
3550 | |
3551 | return 0; |
3552 | } |
3553 | |
3554 | static int nfp_fixup_branches(struct nfp_prog *nfp_prog) |
3555 | { |
3556 | struct nfp_insn_meta *meta, *jmp_dst; |
3557 | u32 idx, br_idx; |
3558 | int err; |
3559 | |
3560 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
3561 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
3562 | continue; |
3563 | if (!is_mbpf_jmp(meta)) |
3564 | continue; |
3565 | if (meta->insn.code == (BPF_JMP | BPF_EXIT) && |
3566 | !nfp_is_main_function(meta)) |
3567 | continue; |
3568 | if (is_mbpf_helper_call(meta)) |
3569 | continue; |
3570 | |
3571 | if (list_is_last(list: &meta->l, head: &nfp_prog->insns)) |
3572 | br_idx = nfp_prog->last_bpf_off; |
3573 | else |
3574 | br_idx = list_next_entry(meta, l)->off - 1; |
3575 | |
3576 | /* For BPF-to-BPF function call, a stack adjustment sequence is |
3577 | * generated after the return instruction. Therefore, we must |
3578 | * withdraw the length of this sequence to have br_idx pointing |
3579 | * to where the "branch" NFP instruction is expected to be. |
3580 | */ |
3581 | if (is_mbpf_pseudo_call(meta)) |
3582 | br_idx -= meta->num_insns_after_br; |
3583 | |
3584 | if (!nfp_is_br(insn: nfp_prog->prog[br_idx])) { |
3585 | pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n" , |
3586 | br_idx, meta->insn.code, nfp_prog->prog[br_idx]); |
3587 | return -ELOOP; |
3588 | } |
3589 | |
3590 | if (meta->insn.code == (BPF_JMP | BPF_EXIT)) |
3591 | continue; |
3592 | |
3593 | /* Leave special branches for later */ |
3594 | if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != |
3595 | RELO_BR_REL && !is_mbpf_pseudo_call(meta)) |
3596 | continue; |
3597 | |
3598 | if (!meta->jmp_dst) { |
3599 | pr_err("Non-exit jump doesn't have destination info recorded!!\n" ); |
3600 | return -ELOOP; |
3601 | } |
3602 | |
3603 | jmp_dst = meta->jmp_dst; |
3604 | |
3605 | if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { |
3606 | pr_err("Branch landing on removed instruction!!\n" ); |
3607 | return -ELOOP; |
3608 | } |
3609 | |
3610 | if (is_mbpf_pseudo_call(meta) && |
3611 | nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { |
3612 | err = nfp_fixup_immed_relo(nfp_prog, meta, |
3613 | jmp_dst, br_idx); |
3614 | if (err) |
3615 | return err; |
3616 | } |
3617 | |
3618 | if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != |
3619 | RELO_BR_REL) |
3620 | continue; |
3621 | |
3622 | for (idx = meta->off; idx <= br_idx; idx++) { |
3623 | if (!nfp_is_br(insn: nfp_prog->prog[idx])) |
3624 | continue; |
3625 | br_set_offset(instr: &nfp_prog->prog[idx], offset: jmp_dst->off); |
3626 | } |
3627 | } |
3628 | |
3629 | return 0; |
3630 | } |
3631 | |
3632 | static void nfp_intro(struct nfp_prog *nfp_prog) |
3633 | { |
3634 | wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); |
3635 | emit_alu(nfp_prog, plen_reg(nfp_prog), |
3636 | plen_reg(nfp_prog), op: ALU_OP_AND, pv_len(nfp_prog)); |
3637 | } |
3638 | |
3639 | static void |
3640 | nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3641 | { |
3642 | /* Save return address into the stack. */ |
3643 | wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); |
3644 | } |
3645 | |
3646 | static void |
3647 | nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3648 | { |
3649 | unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; |
3650 | |
3651 | nfp_prog->stack_frame_depth = round_up(depth, 4); |
3652 | nfp_subprog_prologue(nfp_prog, meta); |
3653 | } |
3654 | |
3655 | bool nfp_is_subprog_start(struct nfp_insn_meta *meta) |
3656 | { |
3657 | return meta->flags & FLAG_INSN_IS_SUBPROG_START; |
3658 | } |
3659 | |
3660 | static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) |
3661 | { |
3662 | /* TC direct-action mode: |
3663 | * 0,1 ok NOT SUPPORTED[1] |
3664 | * 2 drop 0x22 -> drop, count as stat1 |
3665 | * 4,5 nuke 0x02 -> drop |
3666 | * 7 redir 0x44 -> redir, count as stat2 |
3667 | * * unspec 0x11 -> pass, count as stat0 |
3668 | * |
3669 | * [1] We can't support OK and RECLASSIFY because we can't tell TC |
3670 | * the exact decision made. We are forced to support UNSPEC |
3671 | * to handle aborts so that's the only one we handle for passing |
3672 | * packets up the stack. |
3673 | */ |
3674 | /* Target for aborts */ |
3675 | nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); |
3676 | |
3677 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 2, relo: RELO_BR_NEXT_PKT); |
3678 | |
3679 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
3680 | emit_ld_field(nfp_prog, reg_a(0), bmask: 0xc, reg_imm(0x11), sc: SHF_SC_L_SHF, shift: 16); |
3681 | |
3682 | /* Target for normal exits */ |
3683 | nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); |
3684 | |
3685 | /* if R0 > 7 jump to abort */ |
3686 | emit_alu(nfp_prog, reg_none(), reg_imm(7), op: ALU_OP_SUB, reg_b(0)); |
3687 | emit_br(nfp_prog, mask: BR_BLO, addr: nfp_prog->tgt_abort, defer: 0); |
3688 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
3689 | |
3690 | wrp_immed(nfp_prog, reg_b(2), imm: 0x41221211); |
3691 | wrp_immed(nfp_prog, reg_b(3), imm: 0x41001211); |
3692 | |
3693 | emit_shf(nfp_prog, reg_a(1), |
3694 | reg_none(), op: SHF_OP_NONE, reg_b(0), sc: SHF_SC_L_SHF, shift: 2); |
3695 | |
3696 | emit_alu(nfp_prog, reg_none(), reg_a(1), op: ALU_OP_OR, reg_imm(0)); |
3697 | emit_shf(nfp_prog, reg_a(2), |
3698 | reg_imm(0xf), op: SHF_OP_AND, reg_b(2), sc: SHF_SC_R_SHF, shift: 0); |
3699 | |
3700 | emit_alu(nfp_prog, reg_none(), reg_a(1), op: ALU_OP_OR, reg_imm(0)); |
3701 | emit_shf(nfp_prog, reg_b(2), |
3702 | reg_imm(0xf), op: SHF_OP_AND, reg_b(3), sc: SHF_SC_R_SHF, shift: 0); |
3703 | |
3704 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 2, relo: RELO_BR_NEXT_PKT); |
3705 | |
3706 | emit_shf(nfp_prog, reg_b(2), |
3707 | reg_a(2), op: SHF_OP_OR, reg_b(2), sc: SHF_SC_L_SHF, shift: 4); |
3708 | emit_ld_field(nfp_prog, reg_a(0), bmask: 0xc, reg_b(2), sc: SHF_SC_L_SHF, shift: 16); |
3709 | } |
3710 | |
3711 | static void nfp_outro_xdp(struct nfp_prog *nfp_prog) |
3712 | { |
3713 | /* XDP return codes: |
3714 | * 0 aborted 0x82 -> drop, count as stat3 |
3715 | * 1 drop 0x22 -> drop, count as stat1 |
3716 | * 2 pass 0x11 -> pass, count as stat0 |
3717 | * 3 tx 0x44 -> redir, count as stat2 |
3718 | * * unknown 0x82 -> drop, count as stat3 |
3719 | */ |
3720 | /* Target for aborts */ |
3721 | nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); |
3722 | |
3723 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 2, relo: RELO_BR_NEXT_PKT); |
3724 | |
3725 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
3726 | emit_ld_field(nfp_prog, reg_a(0), bmask: 0xc, reg_imm(0x82), sc: SHF_SC_L_SHF, shift: 16); |
3727 | |
3728 | /* Target for normal exits */ |
3729 | nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); |
3730 | |
3731 | /* if R0 > 3 jump to abort */ |
3732 | emit_alu(nfp_prog, reg_none(), reg_imm(3), op: ALU_OP_SUB, reg_b(0)); |
3733 | emit_br(nfp_prog, mask: BR_BLO, addr: nfp_prog->tgt_abort, defer: 0); |
3734 | |
3735 | wrp_immed(nfp_prog, reg_b(2), imm: 0x44112282); |
3736 | |
3737 | emit_shf(nfp_prog, reg_a(1), |
3738 | reg_none(), op: SHF_OP_NONE, reg_b(0), sc: SHF_SC_L_SHF, shift: 3); |
3739 | |
3740 | emit_alu(nfp_prog, reg_none(), reg_a(1), op: ALU_OP_OR, reg_imm(0)); |
3741 | emit_shf(nfp_prog, reg_b(2), |
3742 | reg_imm(0xff), op: SHF_OP_AND, reg_b(2), sc: SHF_SC_R_SHF, shift: 0); |
3743 | |
3744 | emit_br_relo(nfp_prog, mask: BR_UNC, BR_OFF_RELO, defer: 2, relo: RELO_BR_NEXT_PKT); |
3745 | |
3746 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
3747 | emit_ld_field(nfp_prog, reg_a(0), bmask: 0xc, reg_b(2), sc: SHF_SC_L_SHF, shift: 16); |
3748 | } |
3749 | |
3750 | static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) |
3751 | { |
3752 | unsigned int idx; |
3753 | |
3754 | for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) |
3755 | if (nfp_prog->subprog[idx].needs_reg_push) |
3756 | return true; |
3757 | |
3758 | return false; |
3759 | } |
3760 | |
3761 | static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) |
3762 | { |
3763 | u8 reg; |
3764 | |
3765 | /* Subroutine: Save all callee saved registers (R6 ~ R9). |
3766 | * imm_b() holds the return address. |
3767 | */ |
3768 | nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); |
3769 | for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { |
3770 | u8 adj = (reg - BPF_REG_0) * 2; |
3771 | u8 idx = (reg - BPF_REG_6) * 2; |
3772 | |
3773 | /* The first slot in the stack frame is used to push the return |
3774 | * address in bpf_to_bpf_call(), start just after. |
3775 | */ |
3776 | wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); |
3777 | |
3778 | if (reg == BPF_REG_8) |
3779 | /* Prepare to jump back, last 3 insns use defer slots */ |
3780 | emit_rtn(nfp_prog, imm_b(nfp_prog), defer: 3); |
3781 | |
3782 | wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); |
3783 | } |
3784 | } |
3785 | |
3786 | static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) |
3787 | { |
3788 | u8 reg; |
3789 | |
3790 | /* Subroutine: Restore all callee saved registers (R6 ~ R9). |
3791 | * ret_reg() holds the return address. |
3792 | */ |
3793 | nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); |
3794 | for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { |
3795 | u8 adj = (reg - BPF_REG_0) * 2; |
3796 | u8 idx = (reg - BPF_REG_6) * 2; |
3797 | |
3798 | /* The first slot in the stack frame holds the return address, |
3799 | * start popping just after that. |
3800 | */ |
3801 | wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); |
3802 | |
3803 | if (reg == BPF_REG_8) |
3804 | /* Prepare to jump back, last 3 insns use defer slots */ |
3805 | emit_rtn(nfp_prog, ret_reg(nfp_prog), defer: 3); |
3806 | |
3807 | wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); |
3808 | } |
3809 | } |
3810 | |
3811 | static void nfp_outro(struct nfp_prog *nfp_prog) |
3812 | { |
3813 | switch (nfp_prog->type) { |
3814 | case BPF_PROG_TYPE_SCHED_CLS: |
3815 | nfp_outro_tc_da(nfp_prog); |
3816 | break; |
3817 | case BPF_PROG_TYPE_XDP: |
3818 | nfp_outro_xdp(nfp_prog); |
3819 | break; |
3820 | default: |
3821 | WARN_ON(1); |
3822 | } |
3823 | |
3824 | if (!nfp_prog_needs_callee_reg_save(nfp_prog)) |
3825 | return; |
3826 | |
3827 | nfp_push_callee_registers(nfp_prog); |
3828 | nfp_pop_callee_registers(nfp_prog); |
3829 | } |
3830 | |
3831 | static int nfp_translate(struct nfp_prog *nfp_prog) |
3832 | { |
3833 | struct nfp_insn_meta *meta; |
3834 | unsigned int depth; |
3835 | int err; |
3836 | |
3837 | depth = nfp_prog->subprog[0].stack_depth; |
3838 | nfp_prog->stack_frame_depth = round_up(depth, 4); |
3839 | |
3840 | nfp_intro(nfp_prog); |
3841 | if (nfp_prog->error) |
3842 | return nfp_prog->error; |
3843 | |
3844 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
3845 | instr_cb_t cb = instr_cb[meta->insn.code]; |
3846 | |
3847 | meta->off = nfp_prog_current_offset(nfp_prog); |
3848 | |
3849 | if (nfp_is_subprog_start(meta)) { |
3850 | nfp_start_subprog(nfp_prog, meta); |
3851 | if (nfp_prog->error) |
3852 | return nfp_prog->error; |
3853 | } |
3854 | |
3855 | if (meta->flags & FLAG_INSN_SKIP_MASK) { |
3856 | nfp_prog->n_translated++; |
3857 | continue; |
3858 | } |
3859 | |
3860 | if (nfp_meta_has_prev(nfp_prog, meta) && |
3861 | nfp_meta_prev(meta)->double_cb) |
3862 | cb = nfp_meta_prev(meta)->double_cb; |
3863 | if (!cb) |
3864 | return -ENOENT; |
3865 | err = cb(nfp_prog, meta); |
3866 | if (err) |
3867 | return err; |
3868 | if (nfp_prog->error) |
3869 | return nfp_prog->error; |
3870 | |
3871 | nfp_prog->n_translated++; |
3872 | } |
3873 | |
3874 | nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; |
3875 | |
3876 | nfp_outro(nfp_prog); |
3877 | if (nfp_prog->error) |
3878 | return nfp_prog->error; |
3879 | |
3880 | wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); |
3881 | if (nfp_prog->error) |
3882 | return nfp_prog->error; |
3883 | |
3884 | return nfp_fixup_branches(nfp_prog); |
3885 | } |
3886 | |
3887 | /* --- Optimizations --- */ |
3888 | static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) |
3889 | { |
3890 | struct nfp_insn_meta *meta; |
3891 | |
3892 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
3893 | struct bpf_insn insn = meta->insn; |
3894 | |
3895 | /* Programs converted from cBPF start with register xoring */ |
3896 | if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && |
3897 | insn.src_reg == insn.dst_reg) |
3898 | continue; |
3899 | |
3900 | /* Programs start with R6 = R1 but we ignore the skb pointer */ |
3901 | if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && |
3902 | insn.src_reg == 1 && insn.dst_reg == 6) |
3903 | meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
3904 | |
3905 | /* Return as soon as something doesn't match */ |
3906 | if (!(meta->flags & FLAG_INSN_SKIP_MASK)) |
3907 | return; |
3908 | } |
3909 | } |
3910 | |
3911 | /* abs(insn.imm) will fit better into unrestricted reg immediate - |
3912 | * convert add/sub of a negative number into a sub/add of a positive one. |
3913 | */ |
3914 | static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) |
3915 | { |
3916 | struct nfp_insn_meta *meta; |
3917 | |
3918 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
3919 | struct bpf_insn insn = meta->insn; |
3920 | |
3921 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
3922 | continue; |
3923 | |
3924 | if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) |
3925 | continue; |
3926 | if (BPF_SRC(insn.code) != BPF_K) |
3927 | continue; |
3928 | if (insn.imm >= 0) |
3929 | continue; |
3930 | |
3931 | if (is_mbpf_jmp(meta)) { |
3932 | switch (BPF_OP(insn.code)) { |
3933 | case BPF_JGE: |
3934 | case BPF_JSGE: |
3935 | case BPF_JLT: |
3936 | case BPF_JSLT: |
3937 | meta->jump_neg_op = true; |
3938 | break; |
3939 | default: |
3940 | continue; |
3941 | } |
3942 | } else { |
3943 | if (BPF_OP(insn.code) == BPF_ADD) |
3944 | insn.code = BPF_CLASS(insn.code) | BPF_SUB; |
3945 | else if (BPF_OP(insn.code) == BPF_SUB) |
3946 | insn.code = BPF_CLASS(insn.code) | BPF_ADD; |
3947 | else |
3948 | continue; |
3949 | |
3950 | meta->insn.code = insn.code | BPF_K; |
3951 | } |
3952 | |
3953 | meta->insn.imm = -insn.imm; |
3954 | } |
3955 | } |
3956 | |
3957 | /* Remove masking after load since our load guarantees this is not needed */ |
3958 | static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) |
3959 | { |
3960 | struct nfp_insn_meta *meta1, *meta2; |
3961 | static const s32 exp_mask[] = { |
3962 | [BPF_B] = 0x000000ffU, |
3963 | [BPF_H] = 0x0000ffffU, |
3964 | [BPF_W] = 0xffffffffU, |
3965 | }; |
3966 | |
3967 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { |
3968 | struct bpf_insn insn, next; |
3969 | |
3970 | insn = meta1->insn; |
3971 | next = meta2->insn; |
3972 | |
3973 | if (BPF_CLASS(insn.code) != BPF_LD) |
3974 | continue; |
3975 | if (BPF_MODE(insn.code) != BPF_ABS && |
3976 | BPF_MODE(insn.code) != BPF_IND) |
3977 | continue; |
3978 | |
3979 | if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) |
3980 | continue; |
3981 | |
3982 | if (!exp_mask[BPF_SIZE(insn.code)]) |
3983 | continue; |
3984 | if (exp_mask[BPF_SIZE(insn.code)] != next.imm) |
3985 | continue; |
3986 | |
3987 | if (next.src_reg || next.dst_reg) |
3988 | continue; |
3989 | |
3990 | if (meta2->flags & FLAG_INSN_IS_JUMP_DST) |
3991 | continue; |
3992 | |
3993 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
3994 | } |
3995 | } |
3996 | |
3997 | static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) |
3998 | { |
3999 | struct nfp_insn_meta *meta1, *meta2, *meta3; |
4000 | |
4001 | nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { |
4002 | struct bpf_insn insn, next1, next2; |
4003 | |
4004 | insn = meta1->insn; |
4005 | next1 = meta2->insn; |
4006 | next2 = meta3->insn; |
4007 | |
4008 | if (BPF_CLASS(insn.code) != BPF_LD) |
4009 | continue; |
4010 | if (BPF_MODE(insn.code) != BPF_ABS && |
4011 | BPF_MODE(insn.code) != BPF_IND) |
4012 | continue; |
4013 | if (BPF_SIZE(insn.code) != BPF_W) |
4014 | continue; |
4015 | |
4016 | if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && |
4017 | next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && |
4018 | !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && |
4019 | next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) |
4020 | continue; |
4021 | |
4022 | if (next1.src_reg || next1.dst_reg || |
4023 | next2.src_reg || next2.dst_reg) |
4024 | continue; |
4025 | |
4026 | if (next1.imm != 0x20 || next2.imm != 0x20) |
4027 | continue; |
4028 | |
4029 | if (meta2->flags & FLAG_INSN_IS_JUMP_DST || |
4030 | meta3->flags & FLAG_INSN_IS_JUMP_DST) |
4031 | continue; |
4032 | |
4033 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4034 | meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4035 | } |
4036 | } |
4037 | |
4038 | /* load/store pair that forms memory copy sould look like the following: |
4039 | * |
4040 | * ld_width R, [addr_src + offset_src] |
4041 | * st_width [addr_dest + offset_dest], R |
4042 | * |
4043 | * The destination register of load and source register of store should |
4044 | * be the same, load and store should also perform at the same width. |
4045 | * If either of addr_src or addr_dest is stack pointer, we don't do the |
4046 | * CPP optimization as stack is modelled by registers on NFP. |
4047 | */ |
4048 | static bool |
4049 | curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, |
4050 | struct nfp_insn_meta *st_meta) |
4051 | { |
4052 | struct bpf_insn *ld = &ld_meta->insn; |
4053 | struct bpf_insn *st = &st_meta->insn; |
4054 | |
4055 | if (!is_mbpf_load(meta: ld_meta) || !is_mbpf_store(meta: st_meta)) |
4056 | return false; |
4057 | |
4058 | if (ld_meta->ptr.type != PTR_TO_PACKET && |
4059 | ld_meta->ptr.type != PTR_TO_MAP_VALUE) |
4060 | return false; |
4061 | |
4062 | if (st_meta->ptr.type != PTR_TO_PACKET) |
4063 | return false; |
4064 | |
4065 | if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) |
4066 | return false; |
4067 | |
4068 | if (ld->dst_reg != st->src_reg) |
4069 | return false; |
4070 | |
4071 | /* There is jump to the store insn in this pair. */ |
4072 | if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) |
4073 | return false; |
4074 | |
4075 | return true; |
4076 | } |
4077 | |
4078 | /* Currently, we only support chaining load/store pairs if: |
4079 | * |
4080 | * - Their address base registers are the same. |
4081 | * - Their address offsets are in the same order. |
4082 | * - They operate at the same memory width. |
4083 | * - There is no jump into the middle of them. |
4084 | */ |
4085 | static bool |
4086 | curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, |
4087 | struct nfp_insn_meta *st_meta, |
4088 | struct bpf_insn *prev_ld, |
4089 | struct bpf_insn *prev_st) |
4090 | { |
4091 | u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; |
4092 | struct bpf_insn *ld = &ld_meta->insn; |
4093 | struct bpf_insn *st = &st_meta->insn; |
4094 | s16 prev_ld_off, prev_st_off; |
4095 | |
4096 | /* This pair is the start pair. */ |
4097 | if (!prev_ld) |
4098 | return true; |
4099 | |
4100 | prev_size = BPF_LDST_BYTES(prev_ld); |
4101 | curr_size = BPF_LDST_BYTES(ld); |
4102 | prev_ld_base = prev_ld->src_reg; |
4103 | prev_st_base = prev_st->dst_reg; |
4104 | prev_ld_dst = prev_ld->dst_reg; |
4105 | prev_ld_off = prev_ld->off; |
4106 | prev_st_off = prev_st->off; |
4107 | |
4108 | if (ld->dst_reg != prev_ld_dst) |
4109 | return false; |
4110 | |
4111 | if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) |
4112 | return false; |
4113 | |
4114 | if (curr_size != prev_size) |
4115 | return false; |
4116 | |
4117 | /* There is jump to the head of this pair. */ |
4118 | if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) |
4119 | return false; |
4120 | |
4121 | /* Both in ascending order. */ |
4122 | if (prev_ld_off + prev_size == ld->off && |
4123 | prev_st_off + prev_size == st->off) |
4124 | return true; |
4125 | |
4126 | /* Both in descending order. */ |
4127 | if (ld->off + curr_size == prev_ld_off && |
4128 | st->off + curr_size == prev_st_off) |
4129 | return true; |
4130 | |
4131 | return false; |
4132 | } |
4133 | |
4134 | /* Return TRUE if cross memory access happens. Cross memory access means |
4135 | * store area is overlapping with load area that a later load might load |
4136 | * the value from previous store, for this case we can't treat the sequence |
4137 | * as an memory copy. |
4138 | */ |
4139 | static bool |
4140 | cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, |
4141 | struct nfp_insn_meta *head_st_meta) |
4142 | { |
4143 | s16 head_ld_off, head_st_off, ld_off; |
4144 | |
4145 | /* Different pointer types does not overlap. */ |
4146 | if (head_ld_meta->ptr.type != head_st_meta->ptr.type) |
4147 | return false; |
4148 | |
4149 | /* load and store are both PTR_TO_PACKET, check ID info. */ |
4150 | if (head_ld_meta->ptr.id != head_st_meta->ptr.id) |
4151 | return true; |
4152 | |
4153 | /* Canonicalize the offsets. Turn all of them against the original |
4154 | * base register. |
4155 | */ |
4156 | head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; |
4157 | head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; |
4158 | ld_off = ld->off + head_ld_meta->ptr.off; |
4159 | |
4160 | /* Ascending order cross. */ |
4161 | if (ld_off > head_ld_off && |
4162 | head_ld_off < head_st_off && ld_off >= head_st_off) |
4163 | return true; |
4164 | |
4165 | /* Descending order cross. */ |
4166 | if (ld_off < head_ld_off && |
4167 | head_ld_off > head_st_off && ld_off <= head_st_off) |
4168 | return true; |
4169 | |
4170 | return false; |
4171 | } |
4172 | |
4173 | /* This pass try to identify the following instructoin sequences. |
4174 | * |
4175 | * load R, [regA + offA] |
4176 | * store [regB + offB], R |
4177 | * load R, [regA + offA + const_imm_A] |
4178 | * store [regB + offB + const_imm_A], R |
4179 | * load R, [regA + offA + 2 * const_imm_A] |
4180 | * store [regB + offB + 2 * const_imm_A], R |
4181 | * ... |
4182 | * |
4183 | * Above sequence is typically generated by compiler when lowering |
4184 | * memcpy. NFP prefer using CPP instructions to accelerate it. |
4185 | */ |
4186 | static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) |
4187 | { |
4188 | struct nfp_insn_meta *head_ld_meta = NULL; |
4189 | struct nfp_insn_meta *head_st_meta = NULL; |
4190 | struct nfp_insn_meta *meta1, *meta2; |
4191 | struct bpf_insn *prev_ld = NULL; |
4192 | struct bpf_insn *prev_st = NULL; |
4193 | u8 count = 0; |
4194 | |
4195 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { |
4196 | struct bpf_insn *ld = &meta1->insn; |
4197 | struct bpf_insn *st = &meta2->insn; |
4198 | |
4199 | /* Reset record status if any of the following if true: |
4200 | * - The current insn pair is not load/store. |
4201 | * - The load/store pair doesn't chain with previous one. |
4202 | * - The chained load/store pair crossed with previous pair. |
4203 | * - The chained load/store pair has a total size of memory |
4204 | * copy beyond 128 bytes which is the maximum length a |
4205 | * single NFP CPP command can transfer. |
4206 | */ |
4207 | if (!curr_pair_is_memcpy(ld_meta: meta1, st_meta: meta2) || |
4208 | !curr_pair_chain_with_previous(ld_meta: meta1, st_meta: meta2, prev_ld, |
4209 | prev_st) || |
4210 | (head_ld_meta && (cross_mem_access(ld, head_ld_meta, |
4211 | head_st_meta) || |
4212 | head_ld_meta->ldst_gather_len >= 128))) { |
4213 | if (!count) |
4214 | continue; |
4215 | |
4216 | if (count > 1) { |
4217 | s16 prev_ld_off = prev_ld->off; |
4218 | s16 prev_st_off = prev_st->off; |
4219 | s16 head_ld_off = head_ld_meta->insn.off; |
4220 | |
4221 | if (prev_ld_off < head_ld_off) { |
4222 | head_ld_meta->insn.off = prev_ld_off; |
4223 | head_st_meta->insn.off = prev_st_off; |
4224 | head_ld_meta->ldst_gather_len = |
4225 | -head_ld_meta->ldst_gather_len; |
4226 | } |
4227 | |
4228 | head_ld_meta->paired_st = &head_st_meta->insn; |
4229 | head_st_meta->flags |= |
4230 | FLAG_INSN_SKIP_PREC_DEPENDENT; |
4231 | } else { |
4232 | head_ld_meta->ldst_gather_len = 0; |
4233 | } |
4234 | |
4235 | /* If the chain is ended by an load/store pair then this |
4236 | * could serve as the new head of the next chain. |
4237 | */ |
4238 | if (curr_pair_is_memcpy(ld_meta: meta1, st_meta: meta2)) { |
4239 | head_ld_meta = meta1; |
4240 | head_st_meta = meta2; |
4241 | head_ld_meta->ldst_gather_len = |
4242 | BPF_LDST_BYTES(ld); |
4243 | meta1 = nfp_meta_next(meta1); |
4244 | meta2 = nfp_meta_next(meta2); |
4245 | prev_ld = ld; |
4246 | prev_st = st; |
4247 | count = 1; |
4248 | } else { |
4249 | head_ld_meta = NULL; |
4250 | head_st_meta = NULL; |
4251 | prev_ld = NULL; |
4252 | prev_st = NULL; |
4253 | count = 0; |
4254 | } |
4255 | |
4256 | continue; |
4257 | } |
4258 | |
4259 | if (!head_ld_meta) { |
4260 | head_ld_meta = meta1; |
4261 | head_st_meta = meta2; |
4262 | } else { |
4263 | meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4264 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4265 | } |
4266 | |
4267 | head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); |
4268 | meta1 = nfp_meta_next(meta1); |
4269 | meta2 = nfp_meta_next(meta2); |
4270 | prev_ld = ld; |
4271 | prev_st = st; |
4272 | count++; |
4273 | } |
4274 | } |
4275 | |
4276 | static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) |
4277 | { |
4278 | struct nfp_insn_meta *meta, *range_node = NULL; |
4279 | s16 range_start = 0, range_end = 0; |
4280 | bool cache_avail = false; |
4281 | struct bpf_insn *insn; |
4282 | s32 range_ptr_off = 0; |
4283 | u32 range_ptr_id = 0; |
4284 | |
4285 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
4286 | if (meta->flags & FLAG_INSN_IS_JUMP_DST) |
4287 | cache_avail = false; |
4288 | |
4289 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
4290 | continue; |
4291 | |
4292 | insn = &meta->insn; |
4293 | |
4294 | if (is_mbpf_store_pkt(meta) || |
4295 | insn->code == (BPF_JMP | BPF_CALL) || |
4296 | is_mbpf_classic_store_pkt(meta) || |
4297 | is_mbpf_classic_load(meta)) { |
4298 | cache_avail = false; |
4299 | continue; |
4300 | } |
4301 | |
4302 | if (!is_mbpf_load(meta)) |
4303 | continue; |
4304 | |
4305 | if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { |
4306 | cache_avail = false; |
4307 | continue; |
4308 | } |
4309 | |
4310 | if (!cache_avail) { |
4311 | cache_avail = true; |
4312 | if (range_node) |
4313 | goto end_current_then_start_new; |
4314 | goto start_new; |
4315 | } |
4316 | |
4317 | /* Check ID to make sure two reads share the same |
4318 | * variable offset against PTR_TO_PACKET, and check OFF |
4319 | * to make sure they also share the same constant |
4320 | * offset. |
4321 | * |
4322 | * OFFs don't really need to be the same, because they |
4323 | * are the constant offsets against PTR_TO_PACKET, so |
4324 | * for different OFFs, we could canonicalize them to |
4325 | * offsets against original packet pointer. We don't |
4326 | * support this. |
4327 | */ |
4328 | if (meta->ptr.id == range_ptr_id && |
4329 | meta->ptr.off == range_ptr_off) { |
4330 | s16 new_start = range_start; |
4331 | s16 end, off = insn->off; |
4332 | s16 new_end = range_end; |
4333 | bool changed = false; |
4334 | |
4335 | if (off < range_start) { |
4336 | new_start = off; |
4337 | changed = true; |
4338 | } |
4339 | |
4340 | end = off + BPF_LDST_BYTES(insn); |
4341 | if (end > range_end) { |
4342 | new_end = end; |
4343 | changed = true; |
4344 | } |
4345 | |
4346 | if (!changed) |
4347 | continue; |
4348 | |
4349 | if (new_end - new_start <= 64) { |
4350 | /* Install new range. */ |
4351 | range_start = new_start; |
4352 | range_end = new_end; |
4353 | continue; |
4354 | } |
4355 | } |
4356 | |
4357 | end_current_then_start_new: |
4358 | range_node->pkt_cache.range_start = range_start; |
4359 | range_node->pkt_cache.range_end = range_end; |
4360 | start_new: |
4361 | range_node = meta; |
4362 | range_node->pkt_cache.do_init = true; |
4363 | range_ptr_id = range_node->ptr.id; |
4364 | range_ptr_off = range_node->ptr.off; |
4365 | range_start = insn->off; |
4366 | range_end = insn->off + BPF_LDST_BYTES(insn); |
4367 | } |
4368 | |
4369 | if (range_node) { |
4370 | range_node->pkt_cache.range_start = range_start; |
4371 | range_node->pkt_cache.range_end = range_end; |
4372 | } |
4373 | |
4374 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
4375 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
4376 | continue; |
4377 | |
4378 | if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { |
4379 | if (meta->pkt_cache.do_init) { |
4380 | range_start = meta->pkt_cache.range_start; |
4381 | range_end = meta->pkt_cache.range_end; |
4382 | } else { |
4383 | meta->pkt_cache.range_start = range_start; |
4384 | meta->pkt_cache.range_end = range_end; |
4385 | } |
4386 | } |
4387 | } |
4388 | } |
4389 | |
4390 | static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) |
4391 | { |
4392 | nfp_bpf_opt_reg_init(nfp_prog); |
4393 | |
4394 | nfp_bpf_opt_neg_add_sub(nfp_prog); |
4395 | nfp_bpf_opt_ld_mask(nfp_prog); |
4396 | nfp_bpf_opt_ld_shift(nfp_prog); |
4397 | nfp_bpf_opt_ldst_gather(nfp_prog); |
4398 | nfp_bpf_opt_pkt_cache(nfp_prog); |
4399 | |
4400 | return 0; |
4401 | } |
4402 | |
4403 | static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) |
4404 | { |
4405 | struct nfp_insn_meta *meta1, *meta2; |
4406 | struct nfp_bpf_map *nfp_map; |
4407 | struct bpf_map *map; |
4408 | u32 id; |
4409 | |
4410 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { |
4411 | if (meta1->flags & FLAG_INSN_SKIP_MASK || |
4412 | meta2->flags & FLAG_INSN_SKIP_MASK) |
4413 | continue; |
4414 | |
4415 | if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || |
4416 | meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) |
4417 | continue; |
4418 | |
4419 | map = (void *)(unsigned long)((u32)meta1->insn.imm | |
4420 | (u64)meta2->insn.imm << 32); |
4421 | if (bpf_map_offload_neutral(map)) { |
4422 | id = map->id; |
4423 | } else { |
4424 | nfp_map = map_to_offmap(map)->dev_priv; |
4425 | id = nfp_map->tid; |
4426 | } |
4427 | |
4428 | meta1->insn.imm = id; |
4429 | meta2->insn.imm = 0; |
4430 | } |
4431 | |
4432 | return 0; |
4433 | } |
4434 | |
4435 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) |
4436 | { |
4437 | __le64 *ustore = (__force __le64 *)prog; |
4438 | int i; |
4439 | |
4440 | for (i = 0; i < len; i++) { |
4441 | int err; |
4442 | |
4443 | err = nfp_ustore_check_valid_no_ecc(insn: prog[i]); |
4444 | if (err) |
4445 | return err; |
4446 | |
4447 | ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); |
4448 | } |
4449 | |
4450 | return 0; |
4451 | } |
4452 | |
4453 | static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) |
4454 | { |
4455 | void *prog; |
4456 | |
4457 | prog = kvmalloc_array(n: nfp_prog->prog_len, size: sizeof(u64), GFP_KERNEL); |
4458 | if (!prog) |
4459 | return; |
4460 | |
4461 | nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); |
4462 | memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); |
4463 | kvfree(addr: nfp_prog->prog); |
4464 | nfp_prog->prog = prog; |
4465 | } |
4466 | |
4467 | int nfp_bpf_jit(struct nfp_prog *nfp_prog) |
4468 | { |
4469 | int ret; |
4470 | |
4471 | ret = nfp_bpf_replace_map_ptrs(nfp_prog); |
4472 | if (ret) |
4473 | return ret; |
4474 | |
4475 | ret = nfp_bpf_optimize(nfp_prog); |
4476 | if (ret) |
4477 | return ret; |
4478 | |
4479 | ret = nfp_translate(nfp_prog); |
4480 | if (ret) { |
4481 | pr_err("Translation failed with error %d (translated: %u)\n" , |
4482 | ret, nfp_prog->n_translated); |
4483 | return -EINVAL; |
4484 | } |
4485 | |
4486 | nfp_bpf_prog_trim(nfp_prog); |
4487 | |
4488 | return ret; |
4489 | } |
4490 | |
4491 | void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) |
4492 | { |
4493 | struct nfp_insn_meta *meta; |
4494 | |
4495 | /* Another pass to record jump information. */ |
4496 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
4497 | struct nfp_insn_meta *dst_meta; |
4498 | u64 code = meta->insn.code; |
4499 | unsigned int dst_idx; |
4500 | bool pseudo_call; |
4501 | |
4502 | if (!is_mbpf_jmp(meta)) |
4503 | continue; |
4504 | if (BPF_OP(code) == BPF_EXIT) |
4505 | continue; |
4506 | if (is_mbpf_helper_call(meta)) |
4507 | continue; |
4508 | |
4509 | /* If opcode is BPF_CALL at this point, this can only be a |
4510 | * BPF-to-BPF call (a.k.a pseudo call). |
4511 | */ |
4512 | pseudo_call = BPF_OP(code) == BPF_CALL; |
4513 | |
4514 | if (pseudo_call) |
4515 | dst_idx = meta->n + 1 + meta->insn.imm; |
4516 | else |
4517 | dst_idx = meta->n + 1 + meta->insn.off; |
4518 | |
4519 | dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx: dst_idx); |
4520 | |
4521 | if (pseudo_call) |
4522 | dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; |
4523 | |
4524 | dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; |
4525 | meta->jmp_dst = dst_meta; |
4526 | } |
4527 | } |
4528 | |
4529 | bool nfp_bpf_supported_opcode(u8 code) |
4530 | { |
4531 | return !!instr_cb[code]; |
4532 | } |
4533 | |
4534 | void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) |
4535 | { |
4536 | unsigned int i; |
4537 | u64 *prog; |
4538 | int err; |
4539 | |
4540 | prog = kmemdup(p: nfp_prog->prog, size: nfp_prog->prog_len * sizeof(u64), |
4541 | GFP_KERNEL); |
4542 | if (!prog) |
4543 | return ERR_PTR(error: -ENOMEM); |
4544 | |
4545 | for (i = 0; i < nfp_prog->prog_len; i++) { |
4546 | enum nfp_relo_type special; |
4547 | u32 val; |
4548 | u16 off; |
4549 | |
4550 | special = FIELD_GET(OP_RELO_TYPE, prog[i]); |
4551 | switch (special) { |
4552 | case RELO_NONE: |
4553 | continue; |
4554 | case RELO_BR_REL: |
4555 | br_add_offset(instr: &prog[i], offset: bv->start_off); |
4556 | break; |
4557 | case RELO_BR_GO_OUT: |
4558 | br_set_offset(instr: &prog[i], |
4559 | offset: nfp_prog->tgt_out + bv->start_off); |
4560 | break; |
4561 | case RELO_BR_GO_ABORT: |
4562 | br_set_offset(instr: &prog[i], |
4563 | offset: nfp_prog->tgt_abort + bv->start_off); |
4564 | break; |
4565 | case RELO_BR_GO_CALL_PUSH_REGS: |
4566 | if (!nfp_prog->tgt_call_push_regs) { |
4567 | pr_err("BUG: failed to detect subprogram registers needs\n" ); |
4568 | err = -EINVAL; |
4569 | goto err_free_prog; |
4570 | } |
4571 | off = nfp_prog->tgt_call_push_regs + bv->start_off; |
4572 | br_set_offset(instr: &prog[i], offset: off); |
4573 | break; |
4574 | case RELO_BR_GO_CALL_POP_REGS: |
4575 | if (!nfp_prog->tgt_call_pop_regs) { |
4576 | pr_err("BUG: failed to detect subprogram registers needs\n" ); |
4577 | err = -EINVAL; |
4578 | goto err_free_prog; |
4579 | } |
4580 | off = nfp_prog->tgt_call_pop_regs + bv->start_off; |
4581 | br_set_offset(instr: &prog[i], offset: off); |
4582 | break; |
4583 | case RELO_BR_NEXT_PKT: |
4584 | br_set_offset(instr: &prog[i], offset: bv->tgt_done); |
4585 | break; |
4586 | case RELO_BR_HELPER: |
4587 | val = br_get_offset(instr: prog[i]); |
4588 | val -= BR_OFF_RELO; |
4589 | switch (val) { |
4590 | case BPF_FUNC_map_lookup_elem: |
4591 | val = nfp_prog->bpf->helpers.map_lookup; |
4592 | break; |
4593 | case BPF_FUNC_map_update_elem: |
4594 | val = nfp_prog->bpf->helpers.map_update; |
4595 | break; |
4596 | case BPF_FUNC_map_delete_elem: |
4597 | val = nfp_prog->bpf->helpers.map_delete; |
4598 | break; |
4599 | case BPF_FUNC_perf_event_output: |
4600 | val = nfp_prog->bpf->helpers.perf_event_output; |
4601 | break; |
4602 | default: |
4603 | pr_err("relocation of unknown helper %d\n" , |
4604 | val); |
4605 | err = -EINVAL; |
4606 | goto err_free_prog; |
4607 | } |
4608 | br_set_offset(instr: &prog[i], offset: val); |
4609 | break; |
4610 | case RELO_IMMED_REL: |
4611 | immed_add_value(instr: &prog[i], offset: bv->start_off); |
4612 | break; |
4613 | } |
4614 | |
4615 | prog[i] &= ~OP_RELO_TYPE; |
4616 | } |
4617 | |
4618 | err = nfp_bpf_ustore_calc(prog, len: nfp_prog->prog_len); |
4619 | if (err) |
4620 | goto err_free_prog; |
4621 | |
4622 | return prog; |
4623 | |
4624 | err_free_prog: |
4625 | kfree(objp: prog); |
4626 | return ERR_PTR(error: err); |
4627 | } |
4628 | |