1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
4 | * operating system. INET is implemented using the BSD Socket |
5 | * interface as the means of communication with the user level. |
6 | * |
7 | * Generic INET6 transport hashtables |
8 | * |
9 | * Authors: Lotsa people, from code originally in tcp, generalised here |
10 | * by Arnaldo Carvalho de Melo <acme@mandriva.com> |
11 | */ |
12 | |
13 | #include <linux/module.h> |
14 | #include <linux/random.h> |
15 | |
16 | #include <net/addrconf.h> |
17 | #include <net/inet_connection_sock.h> |
18 | #include <net/inet_hashtables.h> |
19 | #include <net/inet6_hashtables.h> |
20 | #include <net/secure_seq.h> |
21 | #include <net/ip.h> |
22 | #include <net/sock_reuseport.h> |
23 | |
24 | u32 inet6_ehashfn(const struct net *net, |
25 | const struct in6_addr *laddr, const u16 lport, |
26 | const struct in6_addr *faddr, const __be16 fport) |
27 | { |
28 | static u32 inet6_ehash_secret __read_mostly; |
29 | static u32 ipv6_hash_secret __read_mostly; |
30 | |
31 | u32 lhash, fhash; |
32 | |
33 | net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); |
34 | net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); |
35 | |
36 | lhash = (__force u32)laddr->s6_addr32[3]; |
37 | fhash = __ipv6_addr_jhash(a: faddr, initval: ipv6_hash_secret); |
38 | |
39 | return __inet6_ehashfn(lhash, lport, fhash, fport, |
40 | initval: inet6_ehash_secret + net_hash_mix(net)); |
41 | } |
42 | EXPORT_SYMBOL_GPL(inet6_ehashfn); |
43 | |
44 | /* |
45 | * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so |
46 | * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM |
47 | * |
48 | * The sockhash lock must be held as a reader here. |
49 | */ |
50 | struct sock *__inet6_lookup_established(struct net *net, |
51 | struct inet_hashinfo *hashinfo, |
52 | const struct in6_addr *saddr, |
53 | const __be16 sport, |
54 | const struct in6_addr *daddr, |
55 | const u16 hnum, |
56 | const int dif, const int sdif) |
57 | { |
58 | struct sock *sk; |
59 | const struct hlist_nulls_node *node; |
60 | const __portpair ports = INET_COMBINED_PORTS(sport, hnum); |
61 | /* Optimize here for direct hit, only listening connections can |
62 | * have wildcards anyways. |
63 | */ |
64 | unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); |
65 | unsigned int slot = hash & hashinfo->ehash_mask; |
66 | struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; |
67 | |
68 | |
69 | begin: |
70 | sk_nulls_for_each_rcu(sk, node, &head->chain) { |
71 | if (sk->sk_hash != hash) |
72 | continue; |
73 | if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) |
74 | continue; |
75 | if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) |
76 | goto out; |
77 | |
78 | if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { |
79 | sock_gen_put(sk); |
80 | goto begin; |
81 | } |
82 | goto found; |
83 | } |
84 | if (get_nulls_value(ptr: node) != slot) |
85 | goto begin; |
86 | out: |
87 | sk = NULL; |
88 | found: |
89 | return sk; |
90 | } |
91 | EXPORT_SYMBOL(__inet6_lookup_established); |
92 | |
93 | static inline int compute_score(struct sock *sk, struct net *net, |
94 | const unsigned short hnum, |
95 | const struct in6_addr *daddr, |
96 | const int dif, const int sdif) |
97 | { |
98 | int score = -1; |
99 | |
100 | if (net_eq(net1: sock_net(sk), net2: net) && inet_sk(sk)->inet_num == hnum && |
101 | sk->sk_family == PF_INET6) { |
102 | if (!ipv6_addr_equal(a1: &sk->sk_v6_rcv_saddr, a2: daddr)) |
103 | return -1; |
104 | |
105 | if (!inet_sk_bound_dev_eq(net, bound_dev_if: sk->sk_bound_dev_if, dif, sdif)) |
106 | return -1; |
107 | |
108 | score = sk->sk_bound_dev_if ? 2 : 1; |
109 | if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) |
110 | score++; |
111 | } |
112 | return score; |
113 | } |
114 | |
115 | /** |
116 | * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary. |
117 | * @net: network namespace. |
118 | * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. |
119 | * @skb: context for a potential SK_REUSEPORT program. |
120 | * @doff: header offset. |
121 | * @saddr: source address. |
122 | * @sport: source port. |
123 | * @daddr: destination address. |
124 | * @hnum: destination port in host byte order. |
125 | * @ehashfn: hash function used to generate the fallback hash. |
126 | * |
127 | * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to |
128 | * the selected sock or an error. |
129 | */ |
130 | struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, |
131 | struct sk_buff *skb, int doff, |
132 | const struct in6_addr *saddr, |
133 | __be16 sport, |
134 | const struct in6_addr *daddr, |
135 | unsigned short hnum, |
136 | inet6_ehashfn_t *ehashfn) |
137 | { |
138 | struct sock *reuse_sk = NULL; |
139 | u32 phash; |
140 | |
141 | if (sk->sk_reuseport) { |
142 | phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, |
143 | net, daddr, hnum, saddr, sport); |
144 | reuse_sk = reuseport_select_sock(sk, hash: phash, skb, hdr_len: doff); |
145 | } |
146 | return reuse_sk; |
147 | } |
148 | EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); |
149 | |
150 | /* called with rcu_read_lock() */ |
151 | static struct sock *inet6_lhash2_lookup(struct net *net, |
152 | struct inet_listen_hashbucket *ilb2, |
153 | struct sk_buff *skb, int doff, |
154 | const struct in6_addr *saddr, |
155 | const __be16 sport, const struct in6_addr *daddr, |
156 | const unsigned short hnum, const int dif, const int sdif) |
157 | { |
158 | struct sock *sk, *result = NULL; |
159 | struct hlist_nulls_node *node; |
160 | int score, hiscore = 0; |
161 | |
162 | sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { |
163 | score = compute_score(sk, net, hnum, daddr, dif, sdif); |
164 | if (score > hiscore) { |
165 | result = inet6_lookup_reuseport(net, sk, skb, doff, |
166 | saddr, sport, daddr, hnum, inet6_ehashfn); |
167 | if (result) |
168 | return result; |
169 | |
170 | result = sk; |
171 | hiscore = score; |
172 | } |
173 | } |
174 | |
175 | return result; |
176 | } |
177 | |
178 | struct sock *inet6_lookup_run_sk_lookup(struct net *net, |
179 | int protocol, |
180 | struct sk_buff *skb, int doff, |
181 | const struct in6_addr *saddr, |
182 | const __be16 sport, |
183 | const struct in6_addr *daddr, |
184 | const u16 hnum, const int dif, |
185 | inet6_ehashfn_t *ehashfn) |
186 | { |
187 | struct sock *sk, *reuse_sk; |
188 | bool no_reuseport; |
189 | |
190 | no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport, |
191 | daddr, dport: hnum, ifindex: dif, psk: &sk); |
192 | if (no_reuseport || IS_ERR_OR_NULL(ptr: sk)) |
193 | return sk; |
194 | |
195 | reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, |
196 | saddr, sport, daddr, hnum, ehashfn); |
197 | if (reuse_sk) |
198 | sk = reuse_sk; |
199 | return sk; |
200 | } |
201 | EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); |
202 | |
203 | struct sock *inet6_lookup_listener(struct net *net, |
204 | struct inet_hashinfo *hashinfo, |
205 | struct sk_buff *skb, int doff, |
206 | const struct in6_addr *saddr, |
207 | const __be16 sport, const struct in6_addr *daddr, |
208 | const unsigned short hnum, const int dif, const int sdif) |
209 | { |
210 | struct inet_listen_hashbucket *ilb2; |
211 | struct sock *result = NULL; |
212 | unsigned int hash2; |
213 | |
214 | /* Lookup redirect from BPF */ |
215 | if (static_branch_unlikely(&bpf_sk_lookup_enabled) && |
216 | hashinfo == net->ipv4.tcp_death_row.hashinfo) { |
217 | result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, |
218 | saddr, sport, daddr, hnum, dif, |
219 | inet6_ehashfn); |
220 | if (result) |
221 | goto done; |
222 | } |
223 | |
224 | hash2 = ipv6_portaddr_hash(net, addr6: daddr, port: hnum); |
225 | ilb2 = inet_lhash2_bucket(h: hashinfo, hash: hash2); |
226 | |
227 | result = inet6_lhash2_lookup(net, ilb2, skb, doff, |
228 | saddr, sport, daddr, hnum, |
229 | dif, sdif); |
230 | if (result) |
231 | goto done; |
232 | |
233 | /* Lookup lhash2 with in6addr_any */ |
234 | hash2 = ipv6_portaddr_hash(net, addr6: &in6addr_any, port: hnum); |
235 | ilb2 = inet_lhash2_bucket(h: hashinfo, hash: hash2); |
236 | |
237 | result = inet6_lhash2_lookup(net, ilb2, skb, doff, |
238 | saddr, sport, daddr: &in6addr_any, hnum, |
239 | dif, sdif); |
240 | done: |
241 | if (IS_ERR(ptr: result)) |
242 | return NULL; |
243 | return result; |
244 | } |
245 | EXPORT_SYMBOL_GPL(inet6_lookup_listener); |
246 | |
247 | struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, |
248 | struct sk_buff *skb, int doff, |
249 | const struct in6_addr *saddr, const __be16 sport, |
250 | const struct in6_addr *daddr, const __be16 dport, |
251 | const int dif) |
252 | { |
253 | struct sock *sk; |
254 | bool refcounted; |
255 | |
256 | sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, |
257 | ntohs(dport), dif, sdif: 0, refcounted: &refcounted); |
258 | if (sk && !refcounted && !refcount_inc_not_zero(r: &sk->sk_refcnt)) |
259 | sk = NULL; |
260 | return sk; |
261 | } |
262 | EXPORT_SYMBOL_GPL(inet6_lookup); |
263 | |
264 | static int __inet6_check_established(struct inet_timewait_death_row *death_row, |
265 | struct sock *sk, const __u16 lport, |
266 | struct inet_timewait_sock **twp) |
267 | { |
268 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
269 | struct inet_sock *inet = inet_sk(sk); |
270 | const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; |
271 | const struct in6_addr *saddr = &sk->sk_v6_daddr; |
272 | const int dif = sk->sk_bound_dev_if; |
273 | struct net *net = sock_net(sk); |
274 | const int sdif = l3mdev_master_ifindex_by_index(net, ifindex: dif); |
275 | const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); |
276 | const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, |
277 | inet->inet_dport); |
278 | struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo: hinfo, hash); |
279 | spinlock_t *lock = inet_ehash_lockp(hashinfo: hinfo, hash); |
280 | struct sock *sk2; |
281 | const struct hlist_nulls_node *node; |
282 | struct inet_timewait_sock *tw = NULL; |
283 | |
284 | spin_lock(lock); |
285 | |
286 | sk_nulls_for_each(sk2, node, &head->chain) { |
287 | if (sk2->sk_hash != hash) |
288 | continue; |
289 | |
290 | if (likely(inet6_match(net, sk2, saddr, daddr, ports, |
291 | dif, sdif))) { |
292 | if (sk2->sk_state == TCP_TIME_WAIT) { |
293 | tw = inet_twsk(sk: sk2); |
294 | if (twsk_unique(sk, sktw: sk2, twp)) |
295 | break; |
296 | } |
297 | goto not_unique; |
298 | } |
299 | } |
300 | |
301 | /* Must record num and sport now. Otherwise we will see |
302 | * in hash table socket with a funny identity. |
303 | */ |
304 | inet->inet_num = lport; |
305 | inet->inet_sport = htons(lport); |
306 | sk->sk_hash = hash; |
307 | WARN_ON(!sk_unhashed(sk)); |
308 | __sk_nulls_add_node_rcu(sk, list: &head->chain); |
309 | if (tw) { |
310 | sk_nulls_del_node_init_rcu(sk: (struct sock *)tw); |
311 | __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); |
312 | } |
313 | spin_unlock(lock); |
314 | sock_prot_inuse_add(net: sock_net(sk), prot: sk->sk_prot, val: 1); |
315 | |
316 | if (twp) { |
317 | *twp = tw; |
318 | } else if (tw) { |
319 | /* Silly. Should hash-dance instead... */ |
320 | inet_twsk_deschedule_put(tw); |
321 | } |
322 | return 0; |
323 | |
324 | not_unique: |
325 | spin_unlock(lock); |
326 | return -EADDRNOTAVAIL; |
327 | } |
328 | |
329 | static u64 inet6_sk_port_offset(const struct sock *sk) |
330 | { |
331 | const struct inet_sock *inet = inet_sk(sk); |
332 | |
333 | return secure_ipv6_port_ephemeral(saddr: sk->sk_v6_rcv_saddr.s6_addr32, |
334 | daddr: sk->sk_v6_daddr.s6_addr32, |
335 | dport: inet->inet_dport); |
336 | } |
337 | |
338 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, |
339 | struct sock *sk) |
340 | { |
341 | u64 port_offset = 0; |
342 | |
343 | if (!inet_sk(sk)->inet_num) |
344 | port_offset = inet6_sk_port_offset(sk); |
345 | return __inet_hash_connect(death_row, sk, port_offset, |
346 | check_established: __inet6_check_established); |
347 | } |
348 | EXPORT_SYMBOL_GPL(inet6_hash_connect); |
349 | |
350 | int inet6_hash(struct sock *sk) |
351 | { |
352 | int err = 0; |
353 | |
354 | if (sk->sk_state != TCP_CLOSE) |
355 | err = __inet_hash(sk, NULL); |
356 | |
357 | return err; |
358 | } |
359 | EXPORT_SYMBOL_GPL(inet6_hash); |
360 | |