1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /**************************************************************************** |
3 | * Driver for Solarflare network controllers and boards |
4 | * Copyright 2019 Solarflare Communications Inc. |
5 | * Copyright 2020-2022 Xilinx Inc. |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify it |
8 | * under the terms of the GNU General Public License version 2 as published |
9 | * by the Free Software Foundation, incorporated herein by reference. |
10 | */ |
11 | |
12 | #ifndef EFX_TC_H |
13 | #define EFX_TC_H |
14 | #include <net/flow_offload.h> |
15 | #include <linux/rhashtable.h> |
16 | #include "net_driver.h" |
17 | #include "tc_counters.h" |
18 | |
19 | #define IS_ALL_ONES(v) (!(typeof (v))~(v)) |
20 | |
21 | /** |
22 | * struct efx_tc_mac_pedit_action - mac pedit action fields |
23 | * |
24 | * @h_addr: mac address field of ethernet header |
25 | * @linkage: rhashtable reference |
26 | * @ref: reference count |
27 | * @fw_id: index of this entry in firmware MAC address table |
28 | * |
29 | * MAC address edits are indirected through a table in the hardware |
30 | */ |
31 | struct efx_tc_mac_pedit_action { |
32 | u8 h_addr[ETH_ALEN]; |
33 | struct rhash_head linkage; |
34 | refcount_t ref; |
35 | u32 fw_id; /* index of this entry in firmware MAC address table */ |
36 | }; |
37 | |
38 | static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr) |
39 | { |
40 | return !memchr_inv(p: addr, c: 0xff, size: sizeof(*addr)); |
41 | } |
42 | |
43 | struct efx_tc_encap_action; /* see tc_encap_actions.h */ |
44 | |
45 | /** |
46 | * struct efx_tc_action_set - collection of tc action fields |
47 | * |
48 | * @vlan_push: the number of vlan headers to push |
49 | * @vlan_pop: the number of vlan headers to pop |
50 | * @decap: used to indicate a tunnel header decapsulation should take place |
51 | * @do_nat: perform NAT/NPT with values returned by conntrack match |
52 | * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented |
53 | * @deliver: used to indicate a deliver action should take place |
54 | * @vlan_tci: tci fields for vlan push actions |
55 | * @vlan_proto: ethernet types for vlan push actions |
56 | * @count: counter mapping |
57 | * @encap_md: encap entry in tc_encap_ht table |
58 | * @encap_user: linked list of encap users (encap_md->users) |
59 | * @user: owning action-set-list. Only populated if @encap_md is; used by efx_tc_update_encap() fallback handling |
60 | * @count_user: linked list of counter users (counter->users) |
61 | * @dest_mport: destination mport |
62 | * @src_mac: source mac entry in tc_mac_ht table |
63 | * @dst_mac: destination mac entry in tc_mac_ht table |
64 | * @fw_id: index of this entry in firmware actions table |
65 | * @list: linked list of tc actions |
66 | * |
67 | */ |
68 | struct efx_tc_action_set { |
69 | u16 vlan_push:2; |
70 | u16 vlan_pop:2; |
71 | u16 decap:1; |
72 | u16 do_nat:1; |
73 | u16 do_ttl_dec:1; |
74 | u16 deliver:1; |
75 | __be16 vlan_tci[2]; |
76 | __be16 vlan_proto[2]; |
77 | struct efx_tc_counter_index *count; |
78 | struct efx_tc_encap_action *encap_md; |
79 | struct list_head encap_user; |
80 | struct efx_tc_action_set_list *user; |
81 | struct list_head count_user; |
82 | u32 dest_mport; |
83 | struct efx_tc_mac_pedit_action *src_mac; |
84 | struct efx_tc_mac_pedit_action *dst_mac; |
85 | u32 fw_id; |
86 | struct list_head list; |
87 | }; |
88 | |
89 | struct efx_tc_match_fields { |
90 | /* L1 */ |
91 | u32 ingress_port; |
92 | u8 recirc_id; /* mapped from (u32) TC chain_index to smaller space */ |
93 | /* L2 (inner when encap) */ |
94 | __be16 eth_proto; |
95 | __be16 vlan_tci[2], vlan_proto[2]; |
96 | u8 eth_saddr[ETH_ALEN], eth_daddr[ETH_ALEN]; |
97 | /* L3 (when IP) */ |
98 | u8 ip_proto, ip_tos, ip_ttl; |
99 | __be32 src_ip, dst_ip; |
100 | #ifdef CONFIG_IPV6 |
101 | struct in6_addr src_ip6, dst_ip6; |
102 | #endif |
103 | bool ip_frag, ip_firstfrag; |
104 | /* L4 */ |
105 | __be16 l4_sport, l4_dport; /* Ports (UDP, TCP) */ |
106 | __be16 tcp_flags; |
107 | bool tcp_syn_fin_rst; /* true if ANY of SYN/FIN/RST are set */ |
108 | /* Encap. The following are *outer* fields. Note that there are no |
109 | * outer eth (L2) fields; this is because TC doesn't have them. |
110 | */ |
111 | __be32 enc_src_ip, enc_dst_ip; |
112 | struct in6_addr enc_src_ip6, enc_dst_ip6; |
113 | u8 enc_ip_tos, enc_ip_ttl; |
114 | __be16 enc_sport, enc_dport; |
115 | __be32 enc_keyid; /* e.g. VNI, VSID */ |
116 | /* Conntrack. */ |
117 | u16 ct_state_trk:1, ct_state_est:1; |
118 | u32 ct_mark; |
119 | u16 ct_zone; |
120 | }; |
121 | |
122 | static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) |
123 | { |
124 | return mask->enc_src_ip || mask->enc_dst_ip || |
125 | !ipv6_addr_any(a: &mask->enc_src_ip6) || |
126 | !ipv6_addr_any(a: &mask->enc_dst_ip6) || mask->enc_ip_tos || |
127 | mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport; |
128 | } |
129 | |
130 | /** |
131 | * enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type |
132 | * |
133 | * These are used to classify "pseudo" encap matches, which don't refer |
134 | * to an entry in hardware but rather indicate that a section of the |
135 | * match space is in use by another Outer Rule. |
136 | * |
137 | * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo. |
138 | * Hardware index in &struct efx_tc_encap_match.fw_id is valid. |
139 | * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a |
140 | * match on an optional field (currently ip_tos and/or udp_sport), |
141 | * to prevent an overlapping encap match _without_ optional fields. |
142 | * The pseudo encap match may be referenced again by an encap match |
143 | * with different values for these fields, but all masks must match the |
144 | * first (stored in our child_* fields). |
145 | * @EFX_TC_EM_PSEUDO_OR: registered by an fLHS rule that fits in the OR |
146 | * table. The &struct efx_tc_lhs_rule already holds the HW OR entry. |
147 | * Only one reference to this encap match may exist. |
148 | */ |
149 | enum efx_tc_em_pseudo_type { |
150 | EFX_TC_EM_DIRECT, |
151 | EFX_TC_EM_PSEUDO_MASK, |
152 | EFX_TC_EM_PSEUDO_OR, |
153 | }; |
154 | |
155 | struct efx_tc_encap_match { |
156 | __be32 src_ip, dst_ip; |
157 | struct in6_addr src_ip6, dst_ip6; |
158 | __be16 udp_dport; |
159 | __be16 udp_sport, udp_sport_mask; |
160 | u8 ip_tos, ip_tos_mask; |
161 | struct rhash_head linkage; |
162 | enum efx_encap_type tun_type; |
163 | u8 child_ip_tos_mask; |
164 | __be16 child_udp_sport_mask; |
165 | refcount_t ref; |
166 | enum efx_tc_em_pseudo_type type; |
167 | u32 fw_id; /* index of this entry in firmware encap match table */ |
168 | struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */ |
169 | }; |
170 | |
171 | struct efx_tc_recirc_id { |
172 | u32 chain_index; |
173 | struct net_device *net_dev; |
174 | struct rhash_head linkage; |
175 | refcount_t ref; |
176 | u8 fw_id; /* index allocated for use in the MAE */ |
177 | }; |
178 | |
179 | struct efx_tc_match { |
180 | struct efx_tc_match_fields value; |
181 | struct efx_tc_match_fields mask; |
182 | struct efx_tc_encap_match *encap; |
183 | struct efx_tc_recirc_id *rid; |
184 | }; |
185 | |
186 | struct efx_tc_action_set_list { |
187 | struct list_head list; |
188 | u32 fw_id; |
189 | }; |
190 | |
191 | struct efx_tc_lhs_action { |
192 | enum efx_encap_type tun_type; |
193 | struct efx_tc_recirc_id *rid; |
194 | struct efx_tc_ct_zone *zone; |
195 | struct efx_tc_counter_index *count; |
196 | }; |
197 | |
198 | struct efx_tc_flow_rule { |
199 | unsigned long cookie; |
200 | struct rhash_head linkage; |
201 | struct efx_tc_match match; |
202 | struct efx_tc_action_set_list acts; |
203 | struct efx_tc_action_set_list *fallback; /* what to use when unready? */ |
204 | u32 fw_id; |
205 | }; |
206 | |
207 | struct efx_tc_lhs_rule { |
208 | unsigned long cookie; |
209 | struct efx_tc_match match; |
210 | struct efx_tc_lhs_action lhs_act; |
211 | struct rhash_head linkage; |
212 | u32 fw_id; |
213 | bool is_ar; /* Action Rule (for OR-AR-CT-AR sequence) */ |
214 | }; |
215 | |
216 | enum efx_tc_rule_prios { |
217 | EFX_TC_PRIO_TC, /* Rule inserted by TC */ |
218 | EFX_TC_PRIO_DFLT, /* Default switch rule; one of efx_tc_default_rules */ |
219 | EFX_TC_PRIO__NUM |
220 | }; |
221 | |
222 | struct efx_tc_table_field_fmt { |
223 | u16 field_id; |
224 | u16 lbn; |
225 | u16 width; |
226 | u8 masking; |
227 | u8 scheme; |
228 | }; |
229 | |
230 | struct efx_tc_table_desc { |
231 | u16 type; |
232 | u16 key_width; |
233 | u16 resp_width; |
234 | u16 n_keys; |
235 | u16 n_resps; |
236 | u16 n_prios; |
237 | u8 flags; |
238 | u8 scheme; |
239 | struct efx_tc_table_field_fmt *keys; |
240 | struct efx_tc_table_field_fmt *resps; |
241 | }; |
242 | |
243 | struct efx_tc_table_ct { /* TABLE_ID_CONNTRACK_TABLE */ |
244 | struct efx_tc_table_desc desc; |
245 | bool hooked; |
246 | struct { /* indices of named fields within @desc.keys */ |
247 | u8 eth_proto_idx; |
248 | u8 ip_proto_idx; |
249 | u8 src_ip_idx; /* either v4 or v6 */ |
250 | u8 dst_ip_idx; |
251 | u8 l4_sport_idx; |
252 | u8 l4_dport_idx; |
253 | u8 zone_idx; /* for TABLE_FIELD_ID_DOMAIN */ |
254 | } keys; |
255 | struct { /* indices of named fields within @desc.resps */ |
256 | u8 dnat_idx; |
257 | u8 nat_ip_idx; |
258 | u8 l4_natport_idx; |
259 | u8 mark_idx; |
260 | u8 counter_id_idx; |
261 | } resps; |
262 | }; |
263 | |
264 | /** |
265 | * struct efx_tc_state - control plane data for TC offload |
266 | * |
267 | * @caps: MAE capabilities reported by MCDI |
268 | * @block_list: List of &struct efx_tc_block_binding |
269 | * @mutex: Used to serialise operations on TC hashtables |
270 | * @counter_ht: Hashtable of TC counters (FW IDs and counter values) |
271 | * @counter_id_ht: Hashtable mapping TC counter cookies to counters |
272 | * @encap_ht: Hashtable of TC encap actions |
273 | * @mac_ht: Hashtable of MAC address entries (for pedits) |
274 | * @encap_match_ht: Hashtable of TC encap matches |
275 | * @match_action_ht: Hashtable of TC match-action rules |
276 | * @lhs_rule_ht: Hashtable of TC left-hand (act ct & goto chain) rules |
277 | * @ct_zone_ht: Hashtable of TC conntrack flowtable bindings |
278 | * @ct_ht: Hashtable of TC conntrack flow entries |
279 | * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder) |
280 | * @recirc_ht: Hashtable of recirculation ID mappings (&struct efx_tc_recirc_id) |
281 | * @recirc_ida: Recirculation ID allocator |
282 | * @meta_ct: MAE table layout for conntrack table |
283 | * @reps_mport_id: MAE port allocated for representor RX |
284 | * @reps_filter_uc: VNIC filter for representor unicast RX (promisc) |
285 | * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti) |
286 | * @reps_mport_vport_id: vport_id for representor RX filters |
287 | * @flush_counters: counters have been stopped, waiting for drain |
288 | * @flush_gen: final generation count per type array as reported by |
289 | * MC_CMD_MAE_COUNTERS_STREAM_STOP |
290 | * @seen_gen: most recent generation count per type as seen by efx_tc_rx() |
291 | * @flush_wq: wait queue used by efx_mae_stop_counters() to wait for |
292 | * MAE counters RXQ to finish draining |
293 | * @dflt: Match-action rules for default switching; at priority |
294 | * %EFX_TC_PRIO_DFLT. Named by *ingress* port |
295 | * @dflt.pf: rule for traffic ingressing from PF (egresses to wire) |
296 | * @dflt.wire: rule for traffic ingressing from wire (egresses to PF) |
297 | * @facts: Fallback action-set-lists for unready rules. Named by *egress* port |
298 | * @facts.pf: action-set-list for unready rules on PF netdev, hence applying to |
299 | * traffic from wire, and egressing to PF |
300 | * @facts.reps: action-set-list for unready rules on representors, hence |
301 | * applying to traffic from representees, and egressing to the reps mport |
302 | * @up: have TC datastructures been set up? |
303 | */ |
304 | struct efx_tc_state { |
305 | struct mae_caps *caps; |
306 | struct list_head block_list; |
307 | struct mutex mutex; |
308 | struct rhashtable counter_ht; |
309 | struct rhashtable counter_id_ht; |
310 | struct rhashtable encap_ht; |
311 | struct rhashtable mac_ht; |
312 | struct rhashtable encap_match_ht; |
313 | struct rhashtable match_action_ht; |
314 | struct rhashtable lhs_rule_ht; |
315 | struct rhashtable ct_zone_ht; |
316 | struct rhashtable ct_ht; |
317 | struct rhashtable neigh_ht; |
318 | struct rhashtable recirc_ht; |
319 | struct ida recirc_ida; |
320 | struct efx_tc_table_ct meta_ct; |
321 | u32 reps_mport_id, reps_mport_vport_id; |
322 | s32 reps_filter_uc, reps_filter_mc; |
323 | bool flush_counters; |
324 | u32 flush_gen[EFX_TC_COUNTER_TYPE_MAX]; |
325 | u32 seen_gen[EFX_TC_COUNTER_TYPE_MAX]; |
326 | wait_queue_head_t flush_wq; |
327 | struct { |
328 | struct efx_tc_flow_rule pf; |
329 | struct efx_tc_flow_rule wire; |
330 | } dflt; |
331 | struct { |
332 | struct efx_tc_action_set_list pf; |
333 | struct efx_tc_action_set_list reps; |
334 | } facts; |
335 | bool up; |
336 | }; |
337 | |
338 | struct efx_rep; |
339 | |
340 | enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); |
341 | struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, |
342 | struct net_device *dev); |
343 | s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv); |
344 | int efx_tc_configure_default_rule_rep(struct efx_rep *efv); |
345 | void efx_tc_deconfigure_default_rule(struct efx_nic *efx, |
346 | struct efx_tc_flow_rule *rule); |
347 | int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev, |
348 | struct flow_cls_offload *tc, struct efx_rep *efv); |
349 | |
350 | int efx_tc_insert_rep_filters(struct efx_nic *efx); |
351 | void efx_tc_remove_rep_filters(struct efx_nic *efx); |
352 | |
353 | int efx_init_tc(struct efx_nic *efx); |
354 | void efx_fini_tc(struct efx_nic *efx); |
355 | |
356 | int efx_init_struct_tc(struct efx_nic *efx); |
357 | void efx_fini_struct_tc(struct efx_nic *efx); |
358 | |
359 | #endif /* EFX_TC_H */ |
360 | |