1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * ip_vs_app.c: Application module support for IPVS |
4 | * |
5 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
6 | * |
7 | * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference |
8 | * is that ip_vs_app module handles the reverse direction (incoming requests |
9 | * and outgoing responses). |
10 | * |
11 | * IP_MASQ_APP application masquerading module |
12 | * |
13 | * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> |
14 | */ |
15 | |
16 | #define KMSG_COMPONENT "IPVS" |
17 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
18 | |
19 | #include <linux/module.h> |
20 | #include <linux/kernel.h> |
21 | #include <linux/skbuff.h> |
22 | #include <linux/in.h> |
23 | #include <linux/ip.h> |
24 | #include <linux/netfilter.h> |
25 | #include <linux/slab.h> |
26 | #include <net/net_namespace.h> |
27 | #include <net/protocol.h> |
28 | #include <net/tcp.h> |
29 | #include <linux/stat.h> |
30 | #include <linux/proc_fs.h> |
31 | #include <linux/seq_file.h> |
32 | #include <linux/mutex.h> |
33 | |
34 | #include <net/ip_vs.h> |
35 | |
36 | EXPORT_SYMBOL(register_ip_vs_app); |
37 | EXPORT_SYMBOL(unregister_ip_vs_app); |
38 | EXPORT_SYMBOL(register_ip_vs_app_inc); |
39 | |
40 | static DEFINE_MUTEX(__ip_vs_app_mutex); |
41 | |
42 | /* |
43 | * Get an ip_vs_app object |
44 | */ |
45 | static inline int ip_vs_app_get(struct ip_vs_app *app) |
46 | { |
47 | return try_module_get(module: app->module); |
48 | } |
49 | |
50 | |
51 | static inline void ip_vs_app_put(struct ip_vs_app *app) |
52 | { |
53 | module_put(module: app->module); |
54 | } |
55 | |
56 | static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) |
57 | { |
58 | kfree(objp: inc->timeout_table); |
59 | kfree(objp: inc); |
60 | } |
61 | |
62 | static void ip_vs_app_inc_rcu_free(struct rcu_head *head) |
63 | { |
64 | struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); |
65 | |
66 | ip_vs_app_inc_destroy(inc); |
67 | } |
68 | |
69 | /* |
70 | * Allocate/initialize app incarnation and register it in proto apps. |
71 | */ |
72 | static int |
73 | ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, |
74 | __u16 port) |
75 | { |
76 | struct ip_vs_protocol *pp; |
77 | struct ip_vs_app *inc; |
78 | int ret; |
79 | |
80 | if (!(pp = ip_vs_proto_get(proto))) |
81 | return -EPROTONOSUPPORT; |
82 | |
83 | if (!pp->unregister_app) |
84 | return -EOPNOTSUPP; |
85 | |
86 | inc = kmemdup(p: app, size: sizeof(*inc), GFP_KERNEL); |
87 | if (!inc) |
88 | return -ENOMEM; |
89 | INIT_LIST_HEAD(list: &inc->p_list); |
90 | INIT_LIST_HEAD(list: &inc->incs_list); |
91 | inc->app = app; |
92 | inc->port = htons(port); |
93 | atomic_set(v: &inc->usecnt, i: 0); |
94 | |
95 | if (app->timeouts) { |
96 | inc->timeout_table = |
97 | ip_vs_create_timeout_table(table: app->timeouts, |
98 | size: app->timeouts_size); |
99 | if (!inc->timeout_table) { |
100 | ret = -ENOMEM; |
101 | goto out; |
102 | } |
103 | } |
104 | |
105 | ret = pp->register_app(ipvs, inc); |
106 | if (ret) |
107 | goto out; |
108 | |
109 | list_add(new: &inc->a_list, head: &app->incs_list); |
110 | IP_VS_DBG(9, "%s App %s:%u registered\n" , |
111 | pp->name, inc->name, ntohs(inc->port)); |
112 | |
113 | return 0; |
114 | |
115 | out: |
116 | ip_vs_app_inc_destroy(inc); |
117 | return ret; |
118 | } |
119 | |
120 | |
121 | /* |
122 | * Release app incarnation |
123 | */ |
124 | static void |
125 | ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) |
126 | { |
127 | struct ip_vs_protocol *pp; |
128 | |
129 | if (!(pp = ip_vs_proto_get(proto: inc->protocol))) |
130 | return; |
131 | |
132 | if (pp->unregister_app) |
133 | pp->unregister_app(ipvs, inc); |
134 | |
135 | IP_VS_DBG(9, "%s App %s:%u unregistered\n" , |
136 | pp->name, inc->name, ntohs(inc->port)); |
137 | |
138 | list_del(entry: &inc->a_list); |
139 | |
140 | call_rcu(head: &inc->rcu_head, func: ip_vs_app_inc_rcu_free); |
141 | } |
142 | |
143 | |
144 | /* |
145 | * Get reference to app inc (only called from softirq) |
146 | * |
147 | */ |
148 | int ip_vs_app_inc_get(struct ip_vs_app *inc) |
149 | { |
150 | int result; |
151 | |
152 | result = ip_vs_app_get(app: inc->app); |
153 | if (result) |
154 | atomic_inc(v: &inc->usecnt); |
155 | return result; |
156 | } |
157 | |
158 | |
159 | /* |
160 | * Put the app inc (only called from timer or net softirq) |
161 | */ |
162 | void ip_vs_app_inc_put(struct ip_vs_app *inc) |
163 | { |
164 | atomic_dec(v: &inc->usecnt); |
165 | ip_vs_app_put(app: inc->app); |
166 | } |
167 | |
168 | |
169 | /* |
170 | * Register an application incarnation in protocol applications |
171 | */ |
172 | int |
173 | register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, |
174 | __u16 port) |
175 | { |
176 | int result; |
177 | |
178 | mutex_lock(&__ip_vs_app_mutex); |
179 | |
180 | result = ip_vs_app_inc_new(ipvs, app, proto, port); |
181 | |
182 | mutex_unlock(lock: &__ip_vs_app_mutex); |
183 | |
184 | return result; |
185 | } |
186 | |
187 | |
188 | /* Register application for netns */ |
189 | struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) |
190 | { |
191 | struct ip_vs_app *a; |
192 | int err = 0; |
193 | |
194 | mutex_lock(&__ip_vs_app_mutex); |
195 | |
196 | /* increase the module use count */ |
197 | if (!ip_vs_use_count_inc()) { |
198 | err = -ENOENT; |
199 | goto out_unlock; |
200 | } |
201 | |
202 | list_for_each_entry(a, &ipvs->app_list, a_list) { |
203 | if (!strcmp(app->name, a->name)) { |
204 | err = -EEXIST; |
205 | /* decrease the module use count */ |
206 | ip_vs_use_count_dec(); |
207 | goto out_unlock; |
208 | } |
209 | } |
210 | a = kmemdup(p: app, size: sizeof(*app), GFP_KERNEL); |
211 | if (!a) { |
212 | err = -ENOMEM; |
213 | /* decrease the module use count */ |
214 | ip_vs_use_count_dec(); |
215 | goto out_unlock; |
216 | } |
217 | INIT_LIST_HEAD(list: &a->incs_list); |
218 | list_add(new: &a->a_list, head: &ipvs->app_list); |
219 | |
220 | out_unlock: |
221 | mutex_unlock(lock: &__ip_vs_app_mutex); |
222 | |
223 | return err ? ERR_PTR(error: err) : a; |
224 | } |
225 | |
226 | |
227 | /* |
228 | * ip_vs_app unregistration routine |
229 | * We are sure there are no app incarnations attached to services |
230 | * Caller should use synchronize_rcu() or rcu_barrier() |
231 | */ |
232 | void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) |
233 | { |
234 | struct ip_vs_app *a, *anxt, *inc, *nxt; |
235 | |
236 | mutex_lock(&__ip_vs_app_mutex); |
237 | |
238 | list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { |
239 | if (app && strcmp(app->name, a->name)) |
240 | continue; |
241 | list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { |
242 | ip_vs_app_inc_release(ipvs, inc); |
243 | } |
244 | |
245 | list_del(entry: &a->a_list); |
246 | kfree(objp: a); |
247 | |
248 | /* decrease the module use count */ |
249 | ip_vs_use_count_dec(); |
250 | } |
251 | |
252 | mutex_unlock(lock: &__ip_vs_app_mutex); |
253 | } |
254 | |
255 | |
256 | /* |
257 | * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) |
258 | */ |
259 | int ip_vs_bind_app(struct ip_vs_conn *cp, |
260 | struct ip_vs_protocol *pp) |
261 | { |
262 | return pp->app_conn_bind(cp); |
263 | } |
264 | |
265 | |
266 | /* |
267 | * Unbind cp from application incarnation (called by cp destructor) |
268 | */ |
269 | void ip_vs_unbind_app(struct ip_vs_conn *cp) |
270 | { |
271 | struct ip_vs_app *inc = cp->app; |
272 | |
273 | if (!inc) |
274 | return; |
275 | |
276 | if (inc->unbind_conn) |
277 | inc->unbind_conn(inc, cp); |
278 | if (inc->done_conn) |
279 | inc->done_conn(inc, cp); |
280 | ip_vs_app_inc_put(inc); |
281 | cp->app = NULL; |
282 | } |
283 | |
284 | |
285 | /* |
286 | * Fixes th->seq based on ip_vs_seq info. |
287 | */ |
288 | static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) |
289 | { |
290 | __u32 seq = ntohl(th->seq); |
291 | |
292 | /* |
293 | * Adjust seq with delta-offset for all packets after |
294 | * the most recent resized pkt seq and with previous_delta offset |
295 | * for all packets before most recent resized pkt seq. |
296 | */ |
297 | if (vseq->delta || vseq->previous_delta) { |
298 | if(after(seq, vseq->init_seq)) { |
299 | th->seq = htonl(seq + vseq->delta); |
300 | IP_VS_DBG(9, "%s(): added delta (%d) to seq\n" , |
301 | __func__, vseq->delta); |
302 | } else { |
303 | th->seq = htonl(seq + vseq->previous_delta); |
304 | IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n" , |
305 | __func__, vseq->previous_delta); |
306 | } |
307 | } |
308 | } |
309 | |
310 | |
311 | /* |
312 | * Fixes th->ack_seq based on ip_vs_seq info. |
313 | */ |
314 | static inline void |
315 | vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) |
316 | { |
317 | __u32 ack_seq = ntohl(th->ack_seq); |
318 | |
319 | /* |
320 | * Adjust ack_seq with delta-offset for |
321 | * the packets AFTER most recent resized pkt has caused a shift |
322 | * for packets before most recent resized pkt, use previous_delta |
323 | */ |
324 | if (vseq->delta || vseq->previous_delta) { |
325 | /* since ack_seq is the number of octet that is expected |
326 | to receive next, so compare it with init_seq+delta */ |
327 | if(after(ack_seq, vseq->init_seq+vseq->delta)) { |
328 | th->ack_seq = htonl(ack_seq - vseq->delta); |
329 | IP_VS_DBG(9, "%s(): subtracted delta " |
330 | "(%d) from ack_seq\n" , __func__, vseq->delta); |
331 | |
332 | } else { |
333 | th->ack_seq = htonl(ack_seq - vseq->previous_delta); |
334 | IP_VS_DBG(9, "%s(): subtracted " |
335 | "previous_delta (%d) from ack_seq\n" , |
336 | __func__, vseq->previous_delta); |
337 | } |
338 | } |
339 | } |
340 | |
341 | |
342 | /* |
343 | * Updates ip_vs_seq if pkt has been resized |
344 | * Assumes already checked proto==IPPROTO_TCP and diff!=0. |
345 | */ |
346 | static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, |
347 | unsigned int flag, __u32 seq, int diff) |
348 | { |
349 | /* spinlock is to keep updating cp->flags atomic */ |
350 | spin_lock_bh(lock: &cp->lock); |
351 | if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { |
352 | vseq->previous_delta = vseq->delta; |
353 | vseq->delta += diff; |
354 | vseq->init_seq = seq; |
355 | cp->flags |= flag; |
356 | } |
357 | spin_unlock_bh(lock: &cp->lock); |
358 | } |
359 | |
360 | static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, |
361 | struct ip_vs_app *app, |
362 | struct ip_vs_iphdr *ipvsh) |
363 | { |
364 | int diff; |
365 | const unsigned int tcp_offset = ip_hdrlen(skb); |
366 | struct tcphdr *th; |
367 | __u32 seq; |
368 | |
369 | if (skb_ensure_writable(skb, write_len: tcp_offset + sizeof(*th))) |
370 | return 0; |
371 | |
372 | th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); |
373 | |
374 | /* |
375 | * Remember seq number in case this pkt gets resized |
376 | */ |
377 | seq = ntohl(th->seq); |
378 | |
379 | /* |
380 | * Fix seq stuff if flagged as so. |
381 | */ |
382 | if (cp->flags & IP_VS_CONN_F_OUT_SEQ) |
383 | vs_fix_seq(vseq: &cp->out_seq, th); |
384 | if (cp->flags & IP_VS_CONN_F_IN_SEQ) |
385 | vs_fix_ack_seq(vseq: &cp->in_seq, th); |
386 | |
387 | /* |
388 | * Call private output hook function |
389 | */ |
390 | if (app->pkt_out == NULL) |
391 | return 1; |
392 | |
393 | if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) |
394 | return 0; |
395 | |
396 | /* |
397 | * Update ip_vs seq stuff if len has changed. |
398 | */ |
399 | if (diff != 0) |
400 | vs_seq_update(cp, vseq: &cp->out_seq, |
401 | IP_VS_CONN_F_OUT_SEQ, seq, diff); |
402 | |
403 | return 1; |
404 | } |
405 | |
406 | /* |
407 | * Output pkt hook. Will call bound ip_vs_app specific function |
408 | * called by ipvs packet handler, assumes previously checked cp!=NULL |
409 | * returns false if it can't handle packet (oom) |
410 | */ |
411 | int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, |
412 | struct ip_vs_iphdr *ipvsh) |
413 | { |
414 | struct ip_vs_app *app; |
415 | |
416 | /* |
417 | * check if application module is bound to |
418 | * this ip_vs_conn. |
419 | */ |
420 | if ((app = cp->app) == NULL) |
421 | return 1; |
422 | |
423 | /* TCP is complicated */ |
424 | if (cp->protocol == IPPROTO_TCP) |
425 | return app_tcp_pkt_out(cp, skb, app, ipvsh); |
426 | |
427 | /* |
428 | * Call private output hook function |
429 | */ |
430 | if (app->pkt_out == NULL) |
431 | return 1; |
432 | |
433 | return app->pkt_out(app, cp, skb, NULL, ipvsh); |
434 | } |
435 | |
436 | |
437 | static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, |
438 | struct ip_vs_app *app, |
439 | struct ip_vs_iphdr *ipvsh) |
440 | { |
441 | int diff; |
442 | const unsigned int tcp_offset = ip_hdrlen(skb); |
443 | struct tcphdr *th; |
444 | __u32 seq; |
445 | |
446 | if (skb_ensure_writable(skb, write_len: tcp_offset + sizeof(*th))) |
447 | return 0; |
448 | |
449 | th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); |
450 | |
451 | /* |
452 | * Remember seq number in case this pkt gets resized |
453 | */ |
454 | seq = ntohl(th->seq); |
455 | |
456 | /* |
457 | * Fix seq stuff if flagged as so. |
458 | */ |
459 | if (cp->flags & IP_VS_CONN_F_IN_SEQ) |
460 | vs_fix_seq(vseq: &cp->in_seq, th); |
461 | if (cp->flags & IP_VS_CONN_F_OUT_SEQ) |
462 | vs_fix_ack_seq(vseq: &cp->out_seq, th); |
463 | |
464 | /* |
465 | * Call private input hook function |
466 | */ |
467 | if (app->pkt_in == NULL) |
468 | return 1; |
469 | |
470 | if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) |
471 | return 0; |
472 | |
473 | /* |
474 | * Update ip_vs seq stuff if len has changed. |
475 | */ |
476 | if (diff != 0) |
477 | vs_seq_update(cp, vseq: &cp->in_seq, |
478 | IP_VS_CONN_F_IN_SEQ, seq, diff); |
479 | |
480 | return 1; |
481 | } |
482 | |
483 | /* |
484 | * Input pkt hook. Will call bound ip_vs_app specific function |
485 | * called by ipvs packet handler, assumes previously checked cp!=NULL. |
486 | * returns false if can't handle packet (oom). |
487 | */ |
488 | int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, |
489 | struct ip_vs_iphdr *ipvsh) |
490 | { |
491 | struct ip_vs_app *app; |
492 | |
493 | /* |
494 | * check if application module is bound to |
495 | * this ip_vs_conn. |
496 | */ |
497 | if ((app = cp->app) == NULL) |
498 | return 1; |
499 | |
500 | /* TCP is complicated */ |
501 | if (cp->protocol == IPPROTO_TCP) |
502 | return app_tcp_pkt_in(cp, skb, app, ipvsh); |
503 | |
504 | /* |
505 | * Call private input hook function |
506 | */ |
507 | if (app->pkt_in == NULL) |
508 | return 1; |
509 | |
510 | return app->pkt_in(app, cp, skb, NULL, ipvsh); |
511 | } |
512 | |
513 | |
514 | #ifdef CONFIG_PROC_FS |
515 | /* |
516 | * /proc/net/ip_vs_app entry function |
517 | */ |
518 | |
519 | static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) |
520 | { |
521 | struct ip_vs_app *app, *inc; |
522 | |
523 | list_for_each_entry(app, &ipvs->app_list, a_list) { |
524 | list_for_each_entry(inc, &app->incs_list, a_list) { |
525 | if (pos-- == 0) |
526 | return inc; |
527 | } |
528 | } |
529 | return NULL; |
530 | |
531 | } |
532 | |
533 | static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) |
534 | { |
535 | struct net *net = seq_file_net(seq); |
536 | struct netns_ipvs *ipvs = net_ipvs(net); |
537 | |
538 | mutex_lock(&__ip_vs_app_mutex); |
539 | |
540 | return *pos ? ip_vs_app_idx(ipvs, pos: *pos - 1) : SEQ_START_TOKEN; |
541 | } |
542 | |
543 | static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
544 | { |
545 | struct ip_vs_app *inc, *app; |
546 | struct list_head *e; |
547 | struct net *net = seq_file_net(seq); |
548 | struct netns_ipvs *ipvs = net_ipvs(net); |
549 | |
550 | ++*pos; |
551 | if (v == SEQ_START_TOKEN) |
552 | return ip_vs_app_idx(ipvs, pos: 0); |
553 | |
554 | inc = v; |
555 | app = inc->app; |
556 | |
557 | if ((e = inc->a_list.next) != &app->incs_list) |
558 | return list_entry(e, struct ip_vs_app, a_list); |
559 | |
560 | /* go on to next application */ |
561 | for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { |
562 | app = list_entry(e, struct ip_vs_app, a_list); |
563 | list_for_each_entry(inc, &app->incs_list, a_list) { |
564 | return inc; |
565 | } |
566 | } |
567 | return NULL; |
568 | } |
569 | |
570 | static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) |
571 | { |
572 | mutex_unlock(lock: &__ip_vs_app_mutex); |
573 | } |
574 | |
575 | static int ip_vs_app_seq_show(struct seq_file *seq, void *v) |
576 | { |
577 | if (v == SEQ_START_TOKEN) |
578 | seq_puts(m: seq, s: "prot port usecnt name\n" ); |
579 | else { |
580 | const struct ip_vs_app *inc = v; |
581 | |
582 | seq_printf(m: seq, fmt: "%-3s %-7u %-6d %-17s\n" , |
583 | ip_vs_proto_name(proto: inc->protocol), |
584 | ntohs(inc->port), |
585 | atomic_read(v: &inc->usecnt), |
586 | inc->name); |
587 | } |
588 | return 0; |
589 | } |
590 | |
591 | static const struct seq_operations ip_vs_app_seq_ops = { |
592 | .start = ip_vs_app_seq_start, |
593 | .next = ip_vs_app_seq_next, |
594 | .stop = ip_vs_app_seq_stop, |
595 | .show = ip_vs_app_seq_show, |
596 | }; |
597 | #endif |
598 | |
599 | int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) |
600 | { |
601 | INIT_LIST_HEAD(list: &ipvs->app_list); |
602 | #ifdef CONFIG_PROC_FS |
603 | if (!proc_create_net("ip_vs_app" , 0, ipvs->net->proc_net, |
604 | &ip_vs_app_seq_ops, |
605 | sizeof(struct seq_net_private))) |
606 | return -ENOMEM; |
607 | #endif |
608 | return 0; |
609 | } |
610 | |
611 | void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) |
612 | { |
613 | unregister_ip_vs_app(ipvs, NULL /* all */); |
614 | #ifdef CONFIG_PROC_FS |
615 | remove_proc_entry("ip_vs_app" , ipvs->net->proc_net); |
616 | #endif |
617 | } |
618 | |