1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org> |
4 | */ |
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
6 | #include <linux/module.h> |
7 | #include <linux/skbuff.h> |
8 | #include <linux/jhash.h> |
9 | #include <linux/ip.h> |
10 | #include <net/ipv6.h> |
11 | |
12 | #include <linux/netfilter/x_tables.h> |
13 | #include <net/netfilter/nf_conntrack.h> |
14 | #include <linux/netfilter/xt_cluster.h> |
15 | |
16 | static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) |
17 | { |
18 | return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; |
19 | } |
20 | |
21 | static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) |
22 | { |
23 | return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; |
24 | } |
25 | |
26 | static inline u_int32_t |
27 | xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) |
28 | { |
29 | return jhash_1word(a: ip, initval: info->hash_seed); |
30 | } |
31 | |
32 | static inline u_int32_t |
33 | xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) |
34 | { |
35 | return jhash2(k: ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), initval: info->hash_seed); |
36 | } |
37 | |
38 | static inline u_int32_t |
39 | xt_cluster_hash(const struct nf_conn *ct, |
40 | const struct xt_cluster_match_info *info) |
41 | { |
42 | u_int32_t hash = 0; |
43 | |
44 | switch(nf_ct_l3num(ct)) { |
45 | case AF_INET: |
46 | hash = xt_cluster_hash_ipv4(ip: nf_ct_orig_ipv4_src(ct), info); |
47 | break; |
48 | case AF_INET6: |
49 | hash = xt_cluster_hash_ipv6(ip: nf_ct_orig_ipv6_src(ct), info); |
50 | break; |
51 | default: |
52 | WARN_ON(1); |
53 | break; |
54 | } |
55 | |
56 | return reciprocal_scale(val: hash, ep_ro: info->total_nodes); |
57 | } |
58 | |
59 | static inline bool |
60 | xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) |
61 | { |
62 | bool is_multicast = false; |
63 | |
64 | switch(family) { |
65 | case NFPROTO_IPV4: |
66 | is_multicast = ipv4_is_multicast(addr: ip_hdr(skb)->daddr); |
67 | break; |
68 | case NFPROTO_IPV6: |
69 | is_multicast = ipv6_addr_is_multicast(addr: &ipv6_hdr(skb)->daddr); |
70 | break; |
71 | default: |
72 | WARN_ON(1); |
73 | break; |
74 | } |
75 | return is_multicast; |
76 | } |
77 | |
78 | static bool |
79 | xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) |
80 | { |
81 | struct sk_buff *pskb = (struct sk_buff *)skb; |
82 | const struct xt_cluster_match_info *info = par->matchinfo; |
83 | const struct nf_conn *ct; |
84 | enum ip_conntrack_info ctinfo; |
85 | unsigned long hash; |
86 | |
87 | /* This match assumes that all nodes see the same packets. This can be |
88 | * achieved if the switch that connects the cluster nodes support some |
89 | * sort of 'port mirroring'. However, if your switch does not support |
90 | * this, your cluster nodes can reply ARP request using a multicast MAC |
91 | * address. Thus, your switch will flood the same packets to the |
92 | * cluster nodes with the same multicast MAC address. Using a multicast |
93 | * link address is a RFC 1812 (section 3.3.2) violation, but this works |
94 | * fine in practise. |
95 | * |
96 | * Unfortunately, if you use the multicast MAC address, the link layer |
97 | * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted |
98 | * by TCP and others for packets coming to this node. For that reason, |
99 | * this match mangles skbuff's pkt_type if it detects a packet |
100 | * addressed to a unicast address but using PACKET_MULTICAST. Yes, I |
101 | * know, matches should not alter packets, but we are doing this here |
102 | * because we would need to add a PKTTYPE target for this sole purpose. |
103 | */ |
104 | if (!xt_cluster_is_multicast_addr(skb, family: xt_family(par)) && |
105 | skb->pkt_type == PACKET_MULTICAST) { |
106 | pskb->pkt_type = PACKET_HOST; |
107 | } |
108 | |
109 | ct = nf_ct_get(skb, ctinfo: &ctinfo); |
110 | if (ct == NULL) |
111 | return false; |
112 | |
113 | if (ct->master) |
114 | hash = xt_cluster_hash(ct: ct->master, info); |
115 | else |
116 | hash = xt_cluster_hash(ct, info); |
117 | |
118 | return !!((1 << hash) & info->node_mask) ^ |
119 | !!(info->flags & XT_CLUSTER_F_INV); |
120 | } |
121 | |
122 | static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) |
123 | { |
124 | struct xt_cluster_match_info *info = par->matchinfo; |
125 | int ret; |
126 | |
127 | if (info->total_nodes > XT_CLUSTER_NODES_MAX) { |
128 | pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n" , |
129 | info->total_nodes, XT_CLUSTER_NODES_MAX); |
130 | return -EINVAL; |
131 | } |
132 | if (info->node_mask >= (1ULL << info->total_nodes)) { |
133 | pr_info_ratelimited("node mask cannot exceed total number of nodes\n" ); |
134 | return -EDOM; |
135 | } |
136 | |
137 | ret = nf_ct_netns_get(net: par->net, nfproto: par->family); |
138 | if (ret < 0) |
139 | pr_info_ratelimited("cannot load conntrack support for proto=%u\n" , |
140 | par->family); |
141 | return ret; |
142 | } |
143 | |
144 | static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par) |
145 | { |
146 | nf_ct_netns_put(net: par->net, nfproto: par->family); |
147 | } |
148 | |
149 | static struct xt_match xt_cluster_match __read_mostly = { |
150 | .name = "cluster" , |
151 | .family = NFPROTO_UNSPEC, |
152 | .match = xt_cluster_mt, |
153 | .checkentry = xt_cluster_mt_checkentry, |
154 | .matchsize = sizeof(struct xt_cluster_match_info), |
155 | .destroy = xt_cluster_mt_destroy, |
156 | .me = THIS_MODULE, |
157 | }; |
158 | |
159 | static int __init xt_cluster_mt_init(void) |
160 | { |
161 | return xt_register_match(target: &xt_cluster_match); |
162 | } |
163 | |
164 | static void __exit xt_cluster_mt_fini(void) |
165 | { |
166 | xt_unregister_match(target: &xt_cluster_match); |
167 | } |
168 | |
169 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>" ); |
170 | MODULE_LICENSE("GPL" ); |
171 | MODULE_DESCRIPTION("Xtables: hash-based cluster match" ); |
172 | MODULE_ALIAS("ipt_cluster" ); |
173 | MODULE_ALIAS("ip6t_cluster" ); |
174 | module_init(xt_cluster_mt_init); |
175 | module_exit(xt_cluster_mt_fini); |
176 | |