1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* include/net/xdp.h |
3 | * |
4 | * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. |
5 | */ |
6 | #ifndef __LINUX_NET_XDP_H__ |
7 | #define __LINUX_NET_XDP_H__ |
8 | |
9 | #include <linux/bitfield.h> |
10 | #include <linux/filter.h> |
11 | #include <linux/netdevice.h> |
12 | #include <linux/skbuff.h> /* skb_shared_info */ |
13 | |
14 | /** |
15 | * DOC: XDP RX-queue information |
16 | * |
17 | * The XDP RX-queue info (xdp_rxq_info) is associated with the driver |
18 | * level RX-ring queues. It is information that is specific to how |
19 | * the driver have configured a given RX-ring queue. |
20 | * |
21 | * Each xdp_buff frame received in the driver carries a (pointer) |
22 | * reference to this xdp_rxq_info structure. This provides the XDP |
23 | * data-path read-access to RX-info for both kernel and bpf-side |
24 | * (limited subset). |
25 | * |
26 | * For now, direct access is only safe while running in NAPI/softirq |
27 | * context. Contents are read-mostly and must not be updated during |
28 | * driver NAPI/softirq poll. |
29 | * |
30 | * The driver usage API is a register and unregister API. |
31 | * |
32 | * The struct is not directly tied to the XDP prog. A new XDP prog |
33 | * can be attached as long as it doesn't change the underlying |
34 | * RX-ring. If the RX-ring does change significantly, the NIC driver |
35 | * naturally need to stop the RX-ring before purging and reallocating |
36 | * memory. In that process the driver MUST call unregister (which |
37 | * also applies for driver shutdown and unload). The register API is |
38 | * also mandatory during RX-ring setup. |
39 | */ |
40 | |
41 | enum xdp_mem_type { |
42 | MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ |
43 | MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ |
44 | MEM_TYPE_PAGE_POOL, |
45 | MEM_TYPE_XSK_BUFF_POOL, |
46 | MEM_TYPE_MAX, |
47 | }; |
48 | |
49 | /* XDP flags for ndo_xdp_xmit */ |
50 | #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ |
51 | #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH |
52 | |
53 | struct xdp_mem_info { |
54 | u32 type; /* enum xdp_mem_type, but known size type */ |
55 | u32 id; |
56 | }; |
57 | |
58 | struct page_pool; |
59 | |
60 | struct xdp_rxq_info { |
61 | struct net_device *dev; |
62 | u32 queue_index; |
63 | u32 reg_state; |
64 | struct xdp_mem_info mem; |
65 | unsigned int napi_id; |
66 | u32 frag_size; |
67 | } ____cacheline_aligned; /* perf critical, avoid false-sharing */ |
68 | |
69 | struct xdp_txq_info { |
70 | struct net_device *dev; |
71 | }; |
72 | |
73 | enum xdp_buff_flags { |
74 | XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ |
75 | XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under |
76 | * pressure |
77 | */ |
78 | }; |
79 | |
80 | struct xdp_buff { |
81 | void *data; |
82 | void *data_end; |
83 | void *data_meta; |
84 | void *data_hard_start; |
85 | struct xdp_rxq_info *rxq; |
86 | struct xdp_txq_info *txq; |
87 | u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ |
88 | u32 flags; /* supported values defined in xdp_buff_flags */ |
89 | }; |
90 | |
91 | static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) |
92 | { |
93 | return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); |
94 | } |
95 | |
96 | static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) |
97 | { |
98 | xdp->flags |= XDP_FLAGS_HAS_FRAGS; |
99 | } |
100 | |
101 | static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) |
102 | { |
103 | xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; |
104 | } |
105 | |
106 | static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) |
107 | { |
108 | return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); |
109 | } |
110 | |
111 | static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) |
112 | { |
113 | xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; |
114 | } |
115 | |
116 | static __always_inline void |
117 | xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) |
118 | { |
119 | xdp->frame_sz = frame_sz; |
120 | xdp->rxq = rxq; |
121 | xdp->flags = 0; |
122 | } |
123 | |
124 | static __always_inline void |
125 | xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, |
126 | int headroom, int data_len, const bool meta_valid) |
127 | { |
128 | unsigned char *data = hard_start + headroom; |
129 | |
130 | xdp->data_hard_start = hard_start; |
131 | xdp->data = data; |
132 | xdp->data_end = data + data_len; |
133 | xdp->data_meta = meta_valid ? data : data + 1; |
134 | } |
135 | |
136 | /* Reserve memory area at end-of data area. |
137 | * |
138 | * This macro reserves tailroom in the XDP buffer by limiting the |
139 | * XDP/BPF data access to data_hard_end. Notice same area (and size) |
140 | * is used for XDP_PASS, when constructing the SKB via build_skb(). |
141 | */ |
142 | #define xdp_data_hard_end(xdp) \ |
143 | ((xdp)->data_hard_start + (xdp)->frame_sz - \ |
144 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) |
145 | |
146 | static inline struct skb_shared_info * |
147 | xdp_get_shared_info_from_buff(struct xdp_buff *xdp) |
148 | { |
149 | return (struct skb_shared_info *)xdp_data_hard_end(xdp); |
150 | } |
151 | |
152 | static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) |
153 | { |
154 | unsigned int len = xdp->data_end - xdp->data; |
155 | struct skb_shared_info *sinfo; |
156 | |
157 | if (likely(!xdp_buff_has_frags(xdp))) |
158 | goto out; |
159 | |
160 | sinfo = xdp_get_shared_info_from_buff(xdp); |
161 | len += sinfo->xdp_frags_size; |
162 | out: |
163 | return len; |
164 | } |
165 | |
166 | struct xdp_frame { |
167 | void *data; |
168 | u16 len; |
169 | u16 headroom; |
170 | u32 metasize; /* uses lower 8-bits */ |
171 | /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, |
172 | * while mem info is valid on remote CPU. |
173 | */ |
174 | struct xdp_mem_info mem; |
175 | struct net_device *dev_rx; /* used by cpumap */ |
176 | u32 frame_sz; |
177 | u32 flags; /* supported values defined in xdp_buff_flags */ |
178 | }; |
179 | |
180 | static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) |
181 | { |
182 | return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); |
183 | } |
184 | |
185 | static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) |
186 | { |
187 | return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); |
188 | } |
189 | |
190 | #define XDP_BULK_QUEUE_SIZE 16 |
191 | struct xdp_frame_bulk { |
192 | int count; |
193 | void *xa; |
194 | void *q[XDP_BULK_QUEUE_SIZE]; |
195 | }; |
196 | |
197 | static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) |
198 | { |
199 | /* bq->count will be zero'ed when bq->xa gets updated */ |
200 | bq->xa = NULL; |
201 | } |
202 | |
203 | static inline struct skb_shared_info * |
204 | xdp_get_shared_info_from_frame(struct xdp_frame *frame) |
205 | { |
206 | void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); |
207 | |
208 | return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - |
209 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); |
210 | } |
211 | |
212 | struct xdp_cpumap_stats { |
213 | unsigned int redirect; |
214 | unsigned int pass; |
215 | unsigned int drop; |
216 | }; |
217 | |
218 | /* Clear kernel pointers in xdp_frame */ |
219 | static inline void xdp_scrub_frame(struct xdp_frame *frame) |
220 | { |
221 | frame->data = NULL; |
222 | frame->dev_rx = NULL; |
223 | } |
224 | |
225 | static inline void |
226 | xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, |
227 | unsigned int size, unsigned int truesize, |
228 | bool pfmemalloc) |
229 | { |
230 | skb_shinfo(skb)->nr_frags = nr_frags; |
231 | |
232 | skb->len += size; |
233 | skb->data_len += size; |
234 | skb->truesize += truesize; |
235 | skb->pfmemalloc |= pfmemalloc; |
236 | } |
237 | |
238 | /* Avoids inlining WARN macro in fast-path */ |
239 | void xdp_warn(const char *msg, const char *func, const int line); |
240 | #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) |
241 | |
242 | struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); |
243 | struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, |
244 | struct sk_buff *skb, |
245 | struct net_device *dev); |
246 | struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, |
247 | struct net_device *dev); |
248 | int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); |
249 | struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf); |
250 | |
251 | static inline |
252 | void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) |
253 | { |
254 | xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame); |
255 | xdp->data = frame->data; |
256 | xdp->data_end = frame->data + frame->len; |
257 | xdp->data_meta = frame->data - frame->metasize; |
258 | xdp->frame_sz = frame->frame_sz; |
259 | xdp->flags = frame->flags; |
260 | } |
261 | |
262 | static inline |
263 | int xdp_update_frame_from_buff(struct xdp_buff *xdp, |
264 | struct xdp_frame *xdp_frame) |
265 | { |
266 | int metasize, headroom; |
267 | |
268 | /* Assure headroom is available for storing info */ |
269 | headroom = xdp->data - xdp->data_hard_start; |
270 | metasize = xdp->data - xdp->data_meta; |
271 | metasize = metasize > 0 ? metasize : 0; |
272 | if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) |
273 | return -ENOSPC; |
274 | |
275 | /* Catch if driver didn't reserve tailroom for skb_shared_info */ |
276 | if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { |
277 | XDP_WARN("Driver BUG: missing reserved tailroom" ); |
278 | return -ENOSPC; |
279 | } |
280 | |
281 | xdp_frame->data = xdp->data; |
282 | xdp_frame->len = xdp->data_end - xdp->data; |
283 | xdp_frame->headroom = headroom - sizeof(*xdp_frame); |
284 | xdp_frame->metasize = metasize; |
285 | xdp_frame->frame_sz = xdp->frame_sz; |
286 | xdp_frame->flags = xdp->flags; |
287 | |
288 | return 0; |
289 | } |
290 | |
291 | /* Convert xdp_buff to xdp_frame */ |
292 | static inline |
293 | struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) |
294 | { |
295 | struct xdp_frame *xdp_frame; |
296 | |
297 | if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) |
298 | return xdp_convert_zc_to_xdp_frame(xdp); |
299 | |
300 | /* Store info in top of packet */ |
301 | xdp_frame = xdp->data_hard_start; |
302 | if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) |
303 | return NULL; |
304 | |
305 | /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ |
306 | xdp_frame->mem = xdp->rxq->mem; |
307 | |
308 | return xdp_frame; |
309 | } |
310 | |
311 | void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, |
312 | struct xdp_buff *xdp); |
313 | void xdp_return_frame(struct xdp_frame *xdpf); |
314 | void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); |
315 | void xdp_return_buff(struct xdp_buff *xdp); |
316 | void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); |
317 | void xdp_return_frame_bulk(struct xdp_frame *xdpf, |
318 | struct xdp_frame_bulk *bq); |
319 | |
320 | static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) |
321 | { |
322 | struct skb_shared_info *sinfo; |
323 | unsigned int len = xdpf->len; |
324 | |
325 | if (likely(!xdp_frame_has_frags(xdpf))) |
326 | goto out; |
327 | |
328 | sinfo = xdp_get_shared_info_from_frame(frame: xdpf); |
329 | len += sinfo->xdp_frags_size; |
330 | out: |
331 | return len; |
332 | } |
333 | |
334 | int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, |
335 | struct net_device *dev, u32 queue_index, |
336 | unsigned int napi_id, u32 frag_size); |
337 | static inline int |
338 | xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, |
339 | struct net_device *dev, u32 queue_index, |
340 | unsigned int napi_id) |
341 | { |
342 | return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, frag_size: 0); |
343 | } |
344 | |
345 | void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); |
346 | void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); |
347 | bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); |
348 | int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, |
349 | enum xdp_mem_type type, void *allocator); |
350 | void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); |
351 | int xdp_reg_mem_model(struct xdp_mem_info *mem, |
352 | enum xdp_mem_type type, void *allocator); |
353 | void xdp_unreg_mem_model(struct xdp_mem_info *mem); |
354 | |
355 | /* Drivers not supporting XDP metadata can use this helper, which |
356 | * rejects any room expansion for metadata as a result. |
357 | */ |
358 | static __always_inline void |
359 | xdp_set_data_meta_invalid(struct xdp_buff *xdp) |
360 | { |
361 | xdp->data_meta = xdp->data + 1; |
362 | } |
363 | |
364 | static __always_inline bool |
365 | xdp_data_meta_unsupported(const struct xdp_buff *xdp) |
366 | { |
367 | return unlikely(xdp->data_meta > xdp->data); |
368 | } |
369 | |
370 | static inline bool xdp_metalen_invalid(unsigned long metalen) |
371 | { |
372 | return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); |
373 | } |
374 | |
375 | struct xdp_attachment_info { |
376 | struct bpf_prog *prog; |
377 | u32 flags; |
378 | }; |
379 | |
380 | struct netdev_bpf; |
381 | void xdp_attachment_setup(struct xdp_attachment_info *info, |
382 | struct netdev_bpf *bpf); |
383 | |
384 | #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE |
385 | |
386 | /* Define the relationship between xdp-rx-metadata kfunc and |
387 | * various other entities: |
388 | * - xdp_rx_metadata enum |
389 | * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml) |
390 | * - kfunc name |
391 | * - xdp_metadata_ops field |
392 | */ |
393 | #define XDP_METADATA_KFUNC_xxx \ |
394 | XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ |
395 | NETDEV_XDP_RX_METADATA_TIMESTAMP, \ |
396 | bpf_xdp_metadata_rx_timestamp, \ |
397 | xmo_rx_timestamp) \ |
398 | XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ |
399 | NETDEV_XDP_RX_METADATA_HASH, \ |
400 | bpf_xdp_metadata_rx_hash, \ |
401 | xmo_rx_hash) \ |
402 | |
403 | enum xdp_rx_metadata { |
404 | #define XDP_METADATA_KFUNC(name, _, __, ___) name, |
405 | XDP_METADATA_KFUNC_xxx |
406 | #undef XDP_METADATA_KFUNC |
407 | MAX_XDP_METADATA_KFUNC, |
408 | }; |
409 | |
410 | enum { |
411 | /* First part: Individual bits for L3/L4 types */ |
412 | = BIT(0), |
413 | = BIT(1), |
414 | |
415 | /* The fixed (L3) IPv4 and IPv6 headers can both be followed by |
416 | * variable/dynamic headers, IPv4 called Options and IPv6 called |
417 | * Extension Headers. HW RSS type can contain this info. |
418 | */ |
419 | = BIT(2), |
420 | |
421 | /* When RSS hash covers L4 then drivers MUST set XDP_RSS_L4 bit in |
422 | * addition to the protocol specific bit. This ease interaction with |
423 | * SKBs and avoids reserving a fixed mask for future L4 protocol bits. |
424 | */ |
425 | = BIT(3), /* L4 based hash, proto can be unknown */ |
426 | = BIT(4), |
427 | = BIT(5), |
428 | = BIT(6), |
429 | = BIT(7), /* L4 based hash include IPSEC SPI */ |
430 | |
431 | /* Second part: RSS hash type combinations used for driver HW mapping */ |
432 | = 0, |
433 | = XDP_RSS_TYPE_NONE, |
434 | |
435 | = XDP_RSS_L3_IPV4, |
436 | = XDP_RSS_L3_IPV6, |
437 | = XDP_RSS_L3_IPV4 | XDP_RSS_L3_DYNHDR, |
438 | = XDP_RSS_L3_IPV6 | XDP_RSS_L3_DYNHDR, |
439 | |
440 | = XDP_RSS_L4, |
441 | = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP, |
442 | = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, |
443 | = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, |
444 | = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, |
445 | |
446 | = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, |
447 | = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, |
448 | = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, |
449 | = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, |
450 | |
451 | = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, |
452 | = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, |
453 | = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR, |
454 | }; |
455 | |
456 | struct xdp_metadata_ops { |
457 | int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); |
458 | int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, |
459 | enum xdp_rss_hash_type *); |
460 | }; |
461 | |
462 | #ifdef CONFIG_NET |
463 | u32 bpf_xdp_metadata_kfunc_id(int id); |
464 | bool bpf_dev_bound_kfunc_id(u32 btf_id); |
465 | void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); |
466 | void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); |
467 | void xdp_features_clear_redirect_target(struct net_device *dev); |
468 | #else |
469 | static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } |
470 | static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } |
471 | |
472 | static inline void |
473 | xdp_set_features_flag(struct net_device *dev, xdp_features_t val) |
474 | { |
475 | } |
476 | |
477 | static inline void |
478 | xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) |
479 | { |
480 | } |
481 | |
482 | static inline void |
483 | xdp_features_clear_redirect_target(struct net_device *dev) |
484 | { |
485 | } |
486 | #endif |
487 | |
488 | static inline void xdp_clear_features_flag(struct net_device *dev) |
489 | { |
490 | xdp_set_features_flag(dev, val: 0); |
491 | } |
492 | |
493 | static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, |
494 | struct xdp_buff *xdp) |
495 | { |
496 | /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus |
497 | * under local_bh_disable(), which provides the needed RCU protection |
498 | * for accessing map entries. |
499 | */ |
500 | u32 act = __bpf_prog_run(prog, ctx: xdp, BPF_DISPATCHER_FUNC(xdp)); |
501 | |
502 | if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { |
503 | if (act == XDP_TX && netif_is_bond_slave(dev: xdp->rxq->dev)) |
504 | act = xdp_master_redirect(xdp); |
505 | } |
506 | |
507 | return act; |
508 | } |
509 | #endif /* __LINUX_NET_XDP_H__ */ |
510 | |