1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * ip6_flowlabel.c IPv6 flowlabel manager. |
4 | * |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | */ |
7 | |
8 | #include <linux/capability.h> |
9 | #include <linux/errno.h> |
10 | #include <linux/types.h> |
11 | #include <linux/socket.h> |
12 | #include <linux/net.h> |
13 | #include <linux/netdevice.h> |
14 | #include <linux/in6.h> |
15 | #include <linux/proc_fs.h> |
16 | #include <linux/seq_file.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/export.h> |
19 | #include <linux/pid_namespace.h> |
20 | #include <linux/jump_label_ratelimit.h> |
21 | |
22 | #include <net/net_namespace.h> |
23 | #include <net/sock.h> |
24 | |
25 | #include <net/ipv6.h> |
26 | #include <net/rawv6.h> |
27 | #include <net/transp_v6.h> |
28 | |
29 | #include <linux/uaccess.h> |
30 | |
31 | #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified |
32 | in old IPv6 RFC. Well, it was reasonable value. |
33 | */ |
34 | #define FL_MAX_LINGER 150 /* Maximal linger timeout */ |
35 | |
36 | /* FL hash table */ |
37 | |
38 | #define FL_MAX_PER_SOCK 32 |
39 | #define FL_MAX_SIZE 4096 |
40 | #define FL_HASH_MASK 255 |
41 | #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) |
42 | |
43 | static atomic_t fl_size = ATOMIC_INIT(0); |
44 | static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; |
45 | |
46 | static void ip6_fl_gc(struct timer_list *unused); |
47 | static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); |
48 | |
49 | /* FL hash table lock: it protects only of GC */ |
50 | |
51 | static DEFINE_SPINLOCK(ip6_fl_lock); |
52 | |
53 | /* Big socket sock */ |
54 | |
55 | static DEFINE_SPINLOCK(ip6_sk_fl_lock); |
56 | |
57 | DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); |
58 | EXPORT_SYMBOL(ipv6_flowlabel_exclusive); |
59 | |
60 | #define for_each_fl_rcu(hash, fl) \ |
61 | for (fl = rcu_dereference(fl_ht[(hash)]); \ |
62 | fl != NULL; \ |
63 | fl = rcu_dereference(fl->next)) |
64 | #define for_each_fl_continue_rcu(fl) \ |
65 | for (fl = rcu_dereference(fl->next); \ |
66 | fl != NULL; \ |
67 | fl = rcu_dereference(fl->next)) |
68 | |
69 | #define for_each_sk_fl_rcu(np, sfl) \ |
70 | for (sfl = rcu_dereference(np->ipv6_fl_list); \ |
71 | sfl != NULL; \ |
72 | sfl = rcu_dereference(sfl->next)) |
73 | |
74 | static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) |
75 | { |
76 | struct ip6_flowlabel *fl; |
77 | |
78 | for_each_fl_rcu(FL_HASH(label), fl) { |
79 | if (fl->label == label && net_eq(net1: fl->fl_net, net2: net)) |
80 | return fl; |
81 | } |
82 | return NULL; |
83 | } |
84 | |
85 | static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) |
86 | { |
87 | struct ip6_flowlabel *fl; |
88 | |
89 | rcu_read_lock(); |
90 | fl = __fl_lookup(net, label); |
91 | if (fl && !atomic_inc_not_zero(v: &fl->users)) |
92 | fl = NULL; |
93 | rcu_read_unlock(); |
94 | return fl; |
95 | } |
96 | |
97 | static bool fl_shared_exclusive(struct ip6_flowlabel *fl) |
98 | { |
99 | return fl->share == IPV6_FL_S_EXCL || |
100 | fl->share == IPV6_FL_S_PROCESS || |
101 | fl->share == IPV6_FL_S_USER; |
102 | } |
103 | |
104 | static void fl_free_rcu(struct rcu_head *head) |
105 | { |
106 | struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); |
107 | |
108 | if (fl->share == IPV6_FL_S_PROCESS) |
109 | put_pid(pid: fl->owner.pid); |
110 | kfree(objp: fl->opt); |
111 | kfree(objp: fl); |
112 | } |
113 | |
114 | |
115 | static void fl_free(struct ip6_flowlabel *fl) |
116 | { |
117 | if (!fl) |
118 | return; |
119 | |
120 | if (fl_shared_exclusive(fl) || fl->opt) |
121 | static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); |
122 | |
123 | call_rcu(head: &fl->rcu, func: fl_free_rcu); |
124 | } |
125 | |
126 | static void fl_release(struct ip6_flowlabel *fl) |
127 | { |
128 | spin_lock_bh(lock: &ip6_fl_lock); |
129 | |
130 | fl->lastuse = jiffies; |
131 | if (atomic_dec_and_test(v: &fl->users)) { |
132 | unsigned long ttd = fl->lastuse + fl->linger; |
133 | if (time_after(ttd, fl->expires)) |
134 | fl->expires = ttd; |
135 | ttd = fl->expires; |
136 | if (fl->opt && fl->share == IPV6_FL_S_EXCL) { |
137 | struct ipv6_txoptions *opt = fl->opt; |
138 | fl->opt = NULL; |
139 | kfree(objp: opt); |
140 | } |
141 | if (!timer_pending(timer: &ip6_fl_gc_timer) || |
142 | time_after(ip6_fl_gc_timer.expires, ttd)) |
143 | mod_timer(timer: &ip6_fl_gc_timer, expires: ttd); |
144 | } |
145 | spin_unlock_bh(lock: &ip6_fl_lock); |
146 | } |
147 | |
148 | static void ip6_fl_gc(struct timer_list *unused) |
149 | { |
150 | int i; |
151 | unsigned long now = jiffies; |
152 | unsigned long sched = 0; |
153 | |
154 | spin_lock(lock: &ip6_fl_lock); |
155 | |
156 | for (i = 0; i <= FL_HASH_MASK; i++) { |
157 | struct ip6_flowlabel *fl; |
158 | struct ip6_flowlabel __rcu **flp; |
159 | |
160 | flp = &fl_ht[i]; |
161 | while ((fl = rcu_dereference_protected(*flp, |
162 | lockdep_is_held(&ip6_fl_lock))) != NULL) { |
163 | if (atomic_read(v: &fl->users) == 0) { |
164 | unsigned long ttd = fl->lastuse + fl->linger; |
165 | if (time_after(ttd, fl->expires)) |
166 | fl->expires = ttd; |
167 | ttd = fl->expires; |
168 | if (time_after_eq(now, ttd)) { |
169 | *flp = fl->next; |
170 | fl_free(fl); |
171 | atomic_dec(v: &fl_size); |
172 | continue; |
173 | } |
174 | if (!sched || time_before(ttd, sched)) |
175 | sched = ttd; |
176 | } |
177 | flp = &fl->next; |
178 | } |
179 | } |
180 | if (!sched && atomic_read(v: &fl_size)) |
181 | sched = now + FL_MAX_LINGER; |
182 | if (sched) { |
183 | mod_timer(timer: &ip6_fl_gc_timer, expires: sched); |
184 | } |
185 | spin_unlock(lock: &ip6_fl_lock); |
186 | } |
187 | |
188 | static void __net_exit ip6_fl_purge(struct net *net) |
189 | { |
190 | int i; |
191 | |
192 | spin_lock_bh(lock: &ip6_fl_lock); |
193 | for (i = 0; i <= FL_HASH_MASK; i++) { |
194 | struct ip6_flowlabel *fl; |
195 | struct ip6_flowlabel __rcu **flp; |
196 | |
197 | flp = &fl_ht[i]; |
198 | while ((fl = rcu_dereference_protected(*flp, |
199 | lockdep_is_held(&ip6_fl_lock))) != NULL) { |
200 | if (net_eq(net1: fl->fl_net, net2: net) && |
201 | atomic_read(v: &fl->users) == 0) { |
202 | *flp = fl->next; |
203 | fl_free(fl); |
204 | atomic_dec(v: &fl_size); |
205 | continue; |
206 | } |
207 | flp = &fl->next; |
208 | } |
209 | } |
210 | spin_unlock_bh(lock: &ip6_fl_lock); |
211 | } |
212 | |
213 | static struct ip6_flowlabel *fl_intern(struct net *net, |
214 | struct ip6_flowlabel *fl, __be32 label) |
215 | { |
216 | struct ip6_flowlabel *lfl; |
217 | |
218 | fl->label = label & IPV6_FLOWLABEL_MASK; |
219 | |
220 | rcu_read_lock(); |
221 | spin_lock_bh(lock: &ip6_fl_lock); |
222 | if (label == 0) { |
223 | for (;;) { |
224 | fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK; |
225 | if (fl->label) { |
226 | lfl = __fl_lookup(net, label: fl->label); |
227 | if (!lfl) |
228 | break; |
229 | } |
230 | } |
231 | } else { |
232 | /* |
233 | * we dropper the ip6_fl_lock, so this entry could reappear |
234 | * and we need to recheck with it. |
235 | * |
236 | * OTOH no need to search the active socket first, like it is |
237 | * done in ipv6_flowlabel_opt - sock is locked, so new entry |
238 | * with the same label can only appear on another sock |
239 | */ |
240 | lfl = __fl_lookup(net, label: fl->label); |
241 | if (lfl) { |
242 | atomic_inc(v: &lfl->users); |
243 | spin_unlock_bh(lock: &ip6_fl_lock); |
244 | rcu_read_unlock(); |
245 | return lfl; |
246 | } |
247 | } |
248 | |
249 | fl->lastuse = jiffies; |
250 | fl->next = fl_ht[FL_HASH(fl->label)]; |
251 | rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); |
252 | atomic_inc(v: &fl_size); |
253 | spin_unlock_bh(lock: &ip6_fl_lock); |
254 | rcu_read_unlock(); |
255 | return NULL; |
256 | } |
257 | |
258 | |
259 | |
260 | /* Socket flowlabel lists */ |
261 | |
262 | struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) |
263 | { |
264 | struct ipv6_fl_socklist *sfl; |
265 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
266 | |
267 | label &= IPV6_FLOWLABEL_MASK; |
268 | |
269 | rcu_read_lock(); |
270 | for_each_sk_fl_rcu(np, sfl) { |
271 | struct ip6_flowlabel *fl = sfl->fl; |
272 | |
273 | if (fl->label == label && atomic_inc_not_zero(v: &fl->users)) { |
274 | fl->lastuse = jiffies; |
275 | rcu_read_unlock(); |
276 | return fl; |
277 | } |
278 | } |
279 | rcu_read_unlock(); |
280 | return NULL; |
281 | } |
282 | EXPORT_SYMBOL_GPL(__fl6_sock_lookup); |
283 | |
284 | void fl6_free_socklist(struct sock *sk) |
285 | { |
286 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
287 | struct ipv6_fl_socklist *sfl; |
288 | |
289 | if (!rcu_access_pointer(np->ipv6_fl_list)) |
290 | return; |
291 | |
292 | spin_lock_bh(lock: &ip6_sk_fl_lock); |
293 | while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, |
294 | lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { |
295 | np->ipv6_fl_list = sfl->next; |
296 | spin_unlock_bh(lock: &ip6_sk_fl_lock); |
297 | |
298 | fl_release(fl: sfl->fl); |
299 | kfree_rcu(sfl, rcu); |
300 | |
301 | spin_lock_bh(lock: &ip6_sk_fl_lock); |
302 | } |
303 | spin_unlock_bh(lock: &ip6_sk_fl_lock); |
304 | } |
305 | |
306 | /* Service routines */ |
307 | |
308 | |
309 | /* |
310 | It is the only difficult place. flowlabel enforces equal headers |
311 | before and including routing header, however user may supply options |
312 | following rthdr. |
313 | */ |
314 | |
315 | struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, |
316 | struct ip6_flowlabel *fl, |
317 | struct ipv6_txoptions *fopt) |
318 | { |
319 | struct ipv6_txoptions *fl_opt = fl->opt; |
320 | |
321 | if (!fopt || fopt->opt_flen == 0) |
322 | return fl_opt; |
323 | |
324 | if (fl_opt) { |
325 | opt_space->hopopt = fl_opt->hopopt; |
326 | opt_space->dst0opt = fl_opt->dst0opt; |
327 | opt_space->srcrt = fl_opt->srcrt; |
328 | opt_space->opt_nflen = fl_opt->opt_nflen; |
329 | } else { |
330 | if (fopt->opt_nflen == 0) |
331 | return fopt; |
332 | opt_space->hopopt = NULL; |
333 | opt_space->dst0opt = NULL; |
334 | opt_space->srcrt = NULL; |
335 | opt_space->opt_nflen = 0; |
336 | } |
337 | opt_space->dst1opt = fopt->dst1opt; |
338 | opt_space->opt_flen = fopt->opt_flen; |
339 | opt_space->tot_len = fopt->tot_len; |
340 | return opt_space; |
341 | } |
342 | EXPORT_SYMBOL_GPL(fl6_merge_options); |
343 | |
344 | static unsigned long check_linger(unsigned long ttl) |
345 | { |
346 | if (ttl < FL_MIN_LINGER) |
347 | return FL_MIN_LINGER*HZ; |
348 | if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) |
349 | return 0; |
350 | return ttl*HZ; |
351 | } |
352 | |
353 | static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) |
354 | { |
355 | linger = check_linger(ttl: linger); |
356 | if (!linger) |
357 | return -EPERM; |
358 | expires = check_linger(ttl: expires); |
359 | if (!expires) |
360 | return -EPERM; |
361 | |
362 | spin_lock_bh(lock: &ip6_fl_lock); |
363 | fl->lastuse = jiffies; |
364 | if (time_before(fl->linger, linger)) |
365 | fl->linger = linger; |
366 | if (time_before(expires, fl->linger)) |
367 | expires = fl->linger; |
368 | if (time_before(fl->expires, fl->lastuse + expires)) |
369 | fl->expires = fl->lastuse + expires; |
370 | spin_unlock_bh(lock: &ip6_fl_lock); |
371 | |
372 | return 0; |
373 | } |
374 | |
375 | static struct ip6_flowlabel * |
376 | fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, |
377 | sockptr_t optval, int optlen, int *err_p) |
378 | { |
379 | struct ip6_flowlabel *fl = NULL; |
380 | int olen; |
381 | int addr_type; |
382 | int err; |
383 | |
384 | olen = optlen - CMSG_ALIGN(sizeof(*freq)); |
385 | err = -EINVAL; |
386 | if (olen > 64 * 1024) |
387 | goto done; |
388 | |
389 | err = -ENOMEM; |
390 | fl = kzalloc(size: sizeof(*fl), GFP_KERNEL); |
391 | if (!fl) |
392 | goto done; |
393 | |
394 | if (olen > 0) { |
395 | struct msghdr msg; |
396 | struct flowi6 flowi6; |
397 | struct ipcm6_cookie ipc6; |
398 | |
399 | err = -ENOMEM; |
400 | fl->opt = kmalloc(size: sizeof(*fl->opt) + olen, GFP_KERNEL); |
401 | if (!fl->opt) |
402 | goto done; |
403 | |
404 | memset(fl->opt, 0, sizeof(*fl->opt)); |
405 | fl->opt->tot_len = sizeof(*fl->opt) + olen; |
406 | err = -EFAULT; |
407 | if (copy_from_sockptr_offset(dst: fl->opt + 1, src: optval, |
408 | CMSG_ALIGN(sizeof(*freq)), size: olen)) |
409 | goto done; |
410 | |
411 | msg.msg_controllen = olen; |
412 | msg.msg_control = (void *)(fl->opt+1); |
413 | memset(&flowi6, 0, sizeof(flowi6)); |
414 | |
415 | ipc6.opt = fl->opt; |
416 | err = ip6_datagram_send_ctl(net, sk, msg: &msg, fl6: &flowi6, ipc6: &ipc6); |
417 | if (err) |
418 | goto done; |
419 | err = -EINVAL; |
420 | if (fl->opt->opt_flen) |
421 | goto done; |
422 | if (fl->opt->opt_nflen == 0) { |
423 | kfree(objp: fl->opt); |
424 | fl->opt = NULL; |
425 | } |
426 | } |
427 | |
428 | fl->fl_net = net; |
429 | fl->expires = jiffies; |
430 | err = fl6_renew(fl, linger: freq->flr_linger, expires: freq->flr_expires); |
431 | if (err) |
432 | goto done; |
433 | fl->share = freq->flr_share; |
434 | addr_type = ipv6_addr_type(addr: &freq->flr_dst); |
435 | if ((addr_type & IPV6_ADDR_MAPPED) || |
436 | addr_type == IPV6_ADDR_ANY) { |
437 | err = -EINVAL; |
438 | goto done; |
439 | } |
440 | fl->dst = freq->flr_dst; |
441 | atomic_set(v: &fl->users, i: 1); |
442 | switch (fl->share) { |
443 | case IPV6_FL_S_EXCL: |
444 | case IPV6_FL_S_ANY: |
445 | break; |
446 | case IPV6_FL_S_PROCESS: |
447 | fl->owner.pid = get_task_pid(current, type: PIDTYPE_PID); |
448 | break; |
449 | case IPV6_FL_S_USER: |
450 | fl->owner.uid = current_euid(); |
451 | break; |
452 | default: |
453 | err = -EINVAL; |
454 | goto done; |
455 | } |
456 | if (fl_shared_exclusive(fl) || fl->opt) { |
457 | WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); |
458 | static_branch_deferred_inc(&ipv6_flowlabel_exclusive); |
459 | } |
460 | return fl; |
461 | |
462 | done: |
463 | if (fl) { |
464 | kfree(objp: fl->opt); |
465 | kfree(objp: fl); |
466 | } |
467 | *err_p = err; |
468 | return NULL; |
469 | } |
470 | |
471 | static int mem_check(struct sock *sk) |
472 | { |
473 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
474 | struct ipv6_fl_socklist *sfl; |
475 | int room = FL_MAX_SIZE - atomic_read(v: &fl_size); |
476 | int count = 0; |
477 | |
478 | if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) |
479 | return 0; |
480 | |
481 | rcu_read_lock(); |
482 | for_each_sk_fl_rcu(np, sfl) |
483 | count++; |
484 | rcu_read_unlock(); |
485 | |
486 | if (room <= 0 || |
487 | ((count >= FL_MAX_PER_SOCK || |
488 | (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && |
489 | !capable(CAP_NET_ADMIN))) |
490 | return -ENOBUFS; |
491 | |
492 | return 0; |
493 | } |
494 | |
495 | static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, |
496 | struct ip6_flowlabel *fl) |
497 | { |
498 | spin_lock_bh(lock: &ip6_sk_fl_lock); |
499 | sfl->fl = fl; |
500 | sfl->next = np->ipv6_fl_list; |
501 | rcu_assign_pointer(np->ipv6_fl_list, sfl); |
502 | spin_unlock_bh(lock: &ip6_sk_fl_lock); |
503 | } |
504 | |
505 | int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, |
506 | int flags) |
507 | { |
508 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
509 | struct ipv6_fl_socklist *sfl; |
510 | |
511 | if (flags & IPV6_FL_F_REMOTE) { |
512 | freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; |
513 | return 0; |
514 | } |
515 | |
516 | if (inet6_test_bit(REPFLOW, sk)) { |
517 | freq->flr_label = np->flow_label; |
518 | return 0; |
519 | } |
520 | |
521 | rcu_read_lock(); |
522 | |
523 | for_each_sk_fl_rcu(np, sfl) { |
524 | if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { |
525 | spin_lock_bh(lock: &ip6_fl_lock); |
526 | freq->flr_label = sfl->fl->label; |
527 | freq->flr_dst = sfl->fl->dst; |
528 | freq->flr_share = sfl->fl->share; |
529 | freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; |
530 | freq->flr_linger = sfl->fl->linger / HZ; |
531 | |
532 | spin_unlock_bh(lock: &ip6_fl_lock); |
533 | rcu_read_unlock(); |
534 | return 0; |
535 | } |
536 | } |
537 | rcu_read_unlock(); |
538 | |
539 | return -ENOENT; |
540 | } |
541 | |
542 | #define socklist_dereference(__sflp) \ |
543 | rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) |
544 | |
545 | static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) |
546 | { |
547 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
548 | struct ipv6_fl_socklist __rcu **sflp; |
549 | struct ipv6_fl_socklist *sfl; |
550 | |
551 | if (freq->flr_flags & IPV6_FL_F_REFLECT) { |
552 | if (sk->sk_protocol != IPPROTO_TCP) |
553 | return -ENOPROTOOPT; |
554 | if (!inet6_test_bit(REPFLOW, sk)) |
555 | return -ESRCH; |
556 | np->flow_label = 0; |
557 | inet6_clear_bit(REPFLOW, sk); |
558 | return 0; |
559 | } |
560 | |
561 | spin_lock_bh(lock: &ip6_sk_fl_lock); |
562 | for (sflp = &np->ipv6_fl_list; |
563 | (sfl = socklist_dereference(*sflp)) != NULL; |
564 | sflp = &sfl->next) { |
565 | if (sfl->fl->label == freq->flr_label) |
566 | goto found; |
567 | } |
568 | spin_unlock_bh(lock: &ip6_sk_fl_lock); |
569 | return -ESRCH; |
570 | found: |
571 | if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) |
572 | np->flow_label &= ~IPV6_FLOWLABEL_MASK; |
573 | *sflp = sfl->next; |
574 | spin_unlock_bh(lock: &ip6_sk_fl_lock); |
575 | fl_release(fl: sfl->fl); |
576 | kfree_rcu(sfl, rcu); |
577 | return 0; |
578 | } |
579 | |
580 | static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) |
581 | { |
582 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
583 | struct net *net = sock_net(sk); |
584 | struct ipv6_fl_socklist *sfl; |
585 | int err; |
586 | |
587 | rcu_read_lock(); |
588 | for_each_sk_fl_rcu(np, sfl) { |
589 | if (sfl->fl->label == freq->flr_label) { |
590 | err = fl6_renew(fl: sfl->fl, linger: freq->flr_linger, |
591 | expires: freq->flr_expires); |
592 | rcu_read_unlock(); |
593 | return err; |
594 | } |
595 | } |
596 | rcu_read_unlock(); |
597 | |
598 | if (freq->flr_share == IPV6_FL_S_NONE && |
599 | ns_capable(ns: net->user_ns, CAP_NET_ADMIN)) { |
600 | struct ip6_flowlabel *fl = fl_lookup(net, label: freq->flr_label); |
601 | |
602 | if (fl) { |
603 | err = fl6_renew(fl, linger: freq->flr_linger, |
604 | expires: freq->flr_expires); |
605 | fl_release(fl); |
606 | return err; |
607 | } |
608 | } |
609 | return -ESRCH; |
610 | } |
611 | |
612 | static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, |
613 | sockptr_t optval, int optlen) |
614 | { |
615 | struct ipv6_fl_socklist *sfl, *sfl1 = NULL; |
616 | struct ip6_flowlabel *fl, *fl1 = NULL; |
617 | struct ipv6_pinfo *np = inet6_sk(sk: sk); |
618 | struct net *net = sock_net(sk); |
619 | int err; |
620 | |
621 | if (freq->flr_flags & IPV6_FL_F_REFLECT) { |
622 | if (net->ipv6.sysctl.flowlabel_consistency) { |
623 | net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n" ); |
624 | return -EPERM; |
625 | } |
626 | |
627 | if (sk->sk_protocol != IPPROTO_TCP) |
628 | return -ENOPROTOOPT; |
629 | inet6_set_bit(REPFLOW, sk); |
630 | return 0; |
631 | } |
632 | |
633 | if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) |
634 | return -EINVAL; |
635 | if (net->ipv6.sysctl.flowlabel_state_ranges && |
636 | (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) |
637 | return -ERANGE; |
638 | |
639 | fl = fl_create(net, sk, freq, optval, optlen, err_p: &err); |
640 | if (!fl) |
641 | return err; |
642 | |
643 | sfl1 = kmalloc(size: sizeof(*sfl1), GFP_KERNEL); |
644 | |
645 | if (freq->flr_label) { |
646 | err = -EEXIST; |
647 | rcu_read_lock(); |
648 | for_each_sk_fl_rcu(np, sfl) { |
649 | if (sfl->fl->label == freq->flr_label) { |
650 | if (freq->flr_flags & IPV6_FL_F_EXCL) { |
651 | rcu_read_unlock(); |
652 | goto done; |
653 | } |
654 | fl1 = sfl->fl; |
655 | if (!atomic_inc_not_zero(v: &fl1->users)) |
656 | fl1 = NULL; |
657 | break; |
658 | } |
659 | } |
660 | rcu_read_unlock(); |
661 | |
662 | if (!fl1) |
663 | fl1 = fl_lookup(net, label: freq->flr_label); |
664 | if (fl1) { |
665 | recheck: |
666 | err = -EEXIST; |
667 | if (freq->flr_flags&IPV6_FL_F_EXCL) |
668 | goto release; |
669 | err = -EPERM; |
670 | if (fl1->share == IPV6_FL_S_EXCL || |
671 | fl1->share != fl->share || |
672 | ((fl1->share == IPV6_FL_S_PROCESS) && |
673 | (fl1->owner.pid != fl->owner.pid)) || |
674 | ((fl1->share == IPV6_FL_S_USER) && |
675 | !uid_eq(left: fl1->owner.uid, right: fl->owner.uid))) |
676 | goto release; |
677 | |
678 | err = -ENOMEM; |
679 | if (!sfl1) |
680 | goto release; |
681 | if (fl->linger > fl1->linger) |
682 | fl1->linger = fl->linger; |
683 | if ((long)(fl->expires - fl1->expires) > 0) |
684 | fl1->expires = fl->expires; |
685 | fl_link(np, sfl: sfl1, fl: fl1); |
686 | fl_free(fl); |
687 | return 0; |
688 | |
689 | release: |
690 | fl_release(fl: fl1); |
691 | goto done; |
692 | } |
693 | } |
694 | err = -ENOENT; |
695 | if (!(freq->flr_flags & IPV6_FL_F_CREATE)) |
696 | goto done; |
697 | |
698 | err = -ENOMEM; |
699 | if (!sfl1) |
700 | goto done; |
701 | |
702 | err = mem_check(sk); |
703 | if (err != 0) |
704 | goto done; |
705 | |
706 | fl1 = fl_intern(net, fl, label: freq->flr_label); |
707 | if (fl1) |
708 | goto recheck; |
709 | |
710 | if (!freq->flr_label) { |
711 | size_t offset = offsetof(struct in6_flowlabel_req, flr_label); |
712 | |
713 | if (copy_to_sockptr_offset(dst: optval, offset, src: &fl->label, |
714 | size: sizeof(fl->label))) { |
715 | /* Intentionally ignore fault. */ |
716 | } |
717 | } |
718 | |
719 | fl_link(np, sfl: sfl1, fl); |
720 | return 0; |
721 | done: |
722 | fl_free(fl); |
723 | kfree(objp: sfl1); |
724 | return err; |
725 | } |
726 | |
727 | int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) |
728 | { |
729 | struct in6_flowlabel_req freq; |
730 | |
731 | if (optlen < sizeof(freq)) |
732 | return -EINVAL; |
733 | if (copy_from_sockptr(dst: &freq, src: optval, size: sizeof(freq))) |
734 | return -EFAULT; |
735 | |
736 | switch (freq.flr_action) { |
737 | case IPV6_FL_A_PUT: |
738 | return ipv6_flowlabel_put(sk, freq: &freq); |
739 | case IPV6_FL_A_RENEW: |
740 | return ipv6_flowlabel_renew(sk, freq: &freq); |
741 | case IPV6_FL_A_GET: |
742 | return ipv6_flowlabel_get(sk, freq: &freq, optval, optlen); |
743 | default: |
744 | return -EINVAL; |
745 | } |
746 | } |
747 | |
748 | #ifdef CONFIG_PROC_FS |
749 | |
750 | struct ip6fl_iter_state { |
751 | struct seq_net_private p; |
752 | struct pid_namespace *pid_ns; |
753 | int bucket; |
754 | }; |
755 | |
756 | #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) |
757 | |
758 | static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) |
759 | { |
760 | struct ip6_flowlabel *fl = NULL; |
761 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
762 | struct net *net = seq_file_net(seq); |
763 | |
764 | for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { |
765 | for_each_fl_rcu(state->bucket, fl) { |
766 | if (net_eq(net1: fl->fl_net, net2: net)) |
767 | goto out; |
768 | } |
769 | } |
770 | fl = NULL; |
771 | out: |
772 | return fl; |
773 | } |
774 | |
775 | static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) |
776 | { |
777 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
778 | struct net *net = seq_file_net(seq); |
779 | |
780 | for_each_fl_continue_rcu(fl) { |
781 | if (net_eq(net1: fl->fl_net, net2: net)) |
782 | goto out; |
783 | } |
784 | |
785 | try_again: |
786 | if (++state->bucket <= FL_HASH_MASK) { |
787 | for_each_fl_rcu(state->bucket, fl) { |
788 | if (net_eq(net1: fl->fl_net, net2: net)) |
789 | goto out; |
790 | } |
791 | goto try_again; |
792 | } |
793 | fl = NULL; |
794 | |
795 | out: |
796 | return fl; |
797 | } |
798 | |
799 | static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) |
800 | { |
801 | struct ip6_flowlabel *fl = ip6fl_get_first(seq); |
802 | if (fl) |
803 | while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) |
804 | --pos; |
805 | return pos ? NULL : fl; |
806 | } |
807 | |
808 | static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) |
809 | __acquires(RCU) |
810 | { |
811 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
812 | |
813 | state->pid_ns = proc_pid_ns(sb: file_inode(f: seq->file)->i_sb); |
814 | |
815 | rcu_read_lock(); |
816 | return *pos ? ip6fl_get_idx(seq, pos: *pos - 1) : SEQ_START_TOKEN; |
817 | } |
818 | |
819 | static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
820 | { |
821 | struct ip6_flowlabel *fl; |
822 | |
823 | if (v == SEQ_START_TOKEN) |
824 | fl = ip6fl_get_first(seq); |
825 | else |
826 | fl = ip6fl_get_next(seq, fl: v); |
827 | ++*pos; |
828 | return fl; |
829 | } |
830 | |
831 | static void ip6fl_seq_stop(struct seq_file *seq, void *v) |
832 | __releases(RCU) |
833 | { |
834 | rcu_read_unlock(); |
835 | } |
836 | |
837 | static int ip6fl_seq_show(struct seq_file *seq, void *v) |
838 | { |
839 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
840 | if (v == SEQ_START_TOKEN) { |
841 | seq_puts(m: seq, s: "Label S Owner Users Linger Expires Dst Opt\n" ); |
842 | } else { |
843 | struct ip6_flowlabel *fl = v; |
844 | seq_printf(m: seq, |
845 | fmt: "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n" , |
846 | (unsigned int)ntohl(fl->label), |
847 | fl->share, |
848 | ((fl->share == IPV6_FL_S_PROCESS) ? |
849 | pid_nr_ns(pid: fl->owner.pid, ns: state->pid_ns) : |
850 | ((fl->share == IPV6_FL_S_USER) ? |
851 | from_kuid_munged(to: seq_user_ns(seq), uid: fl->owner.uid) : |
852 | 0)), |
853 | atomic_read(v: &fl->users), |
854 | fl->linger/HZ, |
855 | (long)(fl->expires - jiffies)/HZ, |
856 | &fl->dst, |
857 | fl->opt ? fl->opt->opt_nflen : 0); |
858 | } |
859 | return 0; |
860 | } |
861 | |
862 | static const struct seq_operations ip6fl_seq_ops = { |
863 | .start = ip6fl_seq_start, |
864 | .next = ip6fl_seq_next, |
865 | .stop = ip6fl_seq_stop, |
866 | .show = ip6fl_seq_show, |
867 | }; |
868 | |
869 | static int __net_init ip6_flowlabel_proc_init(struct net *net) |
870 | { |
871 | if (!proc_create_net("ip6_flowlabel" , 0444, net->proc_net, |
872 | &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) |
873 | return -ENOMEM; |
874 | return 0; |
875 | } |
876 | |
877 | static void __net_exit ip6_flowlabel_proc_fini(struct net *net) |
878 | { |
879 | remove_proc_entry("ip6_flowlabel" , net->proc_net); |
880 | } |
881 | #else |
882 | static inline int ip6_flowlabel_proc_init(struct net *net) |
883 | { |
884 | return 0; |
885 | } |
886 | static inline void ip6_flowlabel_proc_fini(struct net *net) |
887 | { |
888 | } |
889 | #endif |
890 | |
891 | static void __net_exit ip6_flowlabel_net_exit(struct net *net) |
892 | { |
893 | ip6_fl_purge(net); |
894 | ip6_flowlabel_proc_fini(net); |
895 | } |
896 | |
897 | static struct pernet_operations ip6_flowlabel_net_ops = { |
898 | .init = ip6_flowlabel_proc_init, |
899 | .exit = ip6_flowlabel_net_exit, |
900 | }; |
901 | |
902 | int ip6_flowlabel_init(void) |
903 | { |
904 | return register_pernet_subsys(&ip6_flowlabel_net_ops); |
905 | } |
906 | |
907 | void ip6_flowlabel_cleanup(void) |
908 | { |
909 | static_key_deferred_flush(&ipv6_flowlabel_exclusive); |
910 | del_timer(timer: &ip6_fl_gc_timer); |
911 | unregister_pernet_subsys(&ip6_flowlabel_net_ops); |
912 | } |
913 | |