1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * xt_HMARK - Netfilter module to set mark by means of hashing
4 *
5 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
6 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
7 */
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/module.h>
12#include <linux/skbuff.h>
13#include <linux/icmp.h>
14
15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter/xt_HMARK.h>
17
18#include <net/ip.h>
19#if IS_ENABLED(CONFIG_NF_CONNTRACK)
20#include <net/netfilter/nf_conntrack.h>
21#endif
22#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
23#include <net/ipv6.h>
24#include <linux/netfilter_ipv6/ip6_tables.h>
25#endif
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
29MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
30MODULE_ALIAS("ipt_HMARK");
31MODULE_ALIAS("ip6t_HMARK");
32
33struct hmark_tuple {
34 __be32 src;
35 __be32 dst;
36 union hmark_ports uports;
37 u8 proto;
38};
39
40static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
41{
42 return (addr32[0] & mask[0]) ^
43 (addr32[1] & mask[1]) ^
44 (addr32[2] & mask[2]) ^
45 (addr32[3] & mask[3]);
46}
47
48static inline __be32
49hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
50{
51 switch (l3num) {
52 case AF_INET:
53 return *addr32 & *mask;
54 case AF_INET6:
55 return hmark_addr6_mask(addr32, mask);
56 }
57 return 0;
58}
59
60static inline void hmark_swap_ports(union hmark_ports *uports,
61 const struct xt_hmark_info *info)
62{
63 union hmark_ports hp;
64 u16 src, dst;
65
66 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
67 src = ntohs(hp.b16.src);
68 dst = ntohs(hp.b16.dst);
69
70 if (dst > src)
71 uports->v32 = (dst << 16) | src;
72 else
73 uports->v32 = (src << 16) | dst;
74}
75
76static int
77hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
78 const struct xt_hmark_info *info)
79{
80#if IS_ENABLED(CONFIG_NF_CONNTRACK)
81 enum ip_conntrack_info ctinfo;
82 struct nf_conn *ct = nf_ct_get(skb, ctinfo: &ctinfo);
83 struct nf_conntrack_tuple *otuple;
84 struct nf_conntrack_tuple *rtuple;
85
86 if (ct == NULL)
87 return -1;
88
89 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
90 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
91
92 t->src = hmark_addr_mask(l3num: otuple->src.l3num, addr32: otuple->src.u3.ip6,
93 mask: info->src_mask.ip6);
94 t->dst = hmark_addr_mask(l3num: otuple->src.l3num, addr32: rtuple->src.u3.ip6,
95 mask: info->dst_mask.ip6);
96
97 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
98 return 0;
99
100 t->proto = nf_ct_protonum(ct);
101 if (t->proto != IPPROTO_ICMP) {
102 t->uports.b16.src = otuple->src.u.all;
103 t->uports.b16.dst = rtuple->src.u.all;
104 hmark_swap_ports(uports: &t->uports, info);
105 }
106
107 return 0;
108#else
109 return -1;
110#endif
111}
112
113/* This hash function is endian independent, to ensure consistent hashing if
114 * the cluster is composed of big and little endian systems. */
115static inline u32
116hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
117{
118 u32 hash;
119 u32 src = ntohl(t->src);
120 u32 dst = ntohl(t->dst);
121
122 if (dst < src)
123 swap(src, dst);
124
125 hash = jhash_3words(a: src, b: dst, c: t->uports.v32, initval: info->hashrnd);
126 hash = hash ^ (t->proto & info->proto_mask);
127
128 return reciprocal_scale(val: hash, ep_ro: info->hmodulus) + info->hoffset;
129}
130
131static void
132hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
133 struct hmark_tuple *t, const struct xt_hmark_info *info)
134{
135 int protoff;
136
137 protoff = proto_ports_offset(proto: t->proto);
138 if (protoff < 0)
139 return;
140
141 nhoff += protoff;
142 if (skb_copy_bits(skb, offset: nhoff, to: &t->uports, len: sizeof(t->uports)) < 0)
143 return;
144
145 hmark_swap_ports(uports: &t->uports, info);
146}
147
148#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
149static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
150{
151 struct icmp6hdr *icmp6h, _ih6;
152
153 icmp6h = skb_header_pointer(skb, offset: *offset, len: sizeof(_ih6), buffer: &_ih6);
154 if (icmp6h == NULL)
155 return 0;
156
157 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
158 *offset += sizeof(struct icmp6hdr);
159 return 1;
160 }
161 return 0;
162}
163
164static int
165hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
166 const struct xt_hmark_info *info)
167{
168 struct ipv6hdr *ip6, _ip6;
169 int flag = IP6_FH_F_AUTH;
170 unsigned int nhoff = 0;
171 u16 fragoff = 0;
172 int nexthdr;
173
174 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
175 nexthdr = ipv6_find_hdr(skb, offset: &nhoff, target: -1, fragoff: &fragoff, fragflg: &flag);
176 if (nexthdr < 0)
177 return 0;
178 /* No need to check for icmp errors on fragments */
179 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
180 goto noicmp;
181 /* Use inner header in case of ICMP errors */
182 if (get_inner6_hdr(skb, offset: &nhoff)) {
183 ip6 = skb_header_pointer(skb, offset: nhoff, len: sizeof(_ip6), buffer: &_ip6);
184 if (ip6 == NULL)
185 return -1;
186 /* If AH present, use SPI like in ESP. */
187 flag = IP6_FH_F_AUTH;
188 nexthdr = ipv6_find_hdr(skb, offset: &nhoff, target: -1, fragoff: &fragoff, fragflg: &flag);
189 if (nexthdr < 0)
190 return -1;
191 }
192noicmp:
193 t->src = hmark_addr6_mask(addr32: ip6->saddr.s6_addr32, mask: info->src_mask.ip6);
194 t->dst = hmark_addr6_mask(addr32: ip6->daddr.s6_addr32, mask: info->dst_mask.ip6);
195
196 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
197 return 0;
198
199 t->proto = nexthdr;
200 if (t->proto == IPPROTO_ICMPV6)
201 return 0;
202
203 if (flag & IP6_FH_F_FRAG)
204 return 0;
205
206 hmark_set_tuple_ports(skb, nhoff, t, info);
207 return 0;
208}
209
210static unsigned int
211hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
212{
213 const struct xt_hmark_info *info = par->targinfo;
214 struct hmark_tuple t;
215
216 memset(&t, 0, sizeof(struct hmark_tuple));
217
218 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
219 if (hmark_ct_set_htuple(skb, t: &t, info) < 0)
220 return XT_CONTINUE;
221 } else {
222 if (hmark_pkt_set_htuple_ipv6(skb, t: &t, info) < 0)
223 return XT_CONTINUE;
224 }
225
226 skb->mark = hmark_hash(t: &t, info);
227 return XT_CONTINUE;
228}
229#endif
230
231static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
232{
233 const struct icmphdr *icmph;
234 struct icmphdr _ih;
235
236 /* Not enough header? */
237 icmph = skb_header_pointer(skb, offset: *nhoff + iphsz, len: sizeof(_ih), buffer: &_ih);
238 if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
239 return 0;
240
241 /* Error message? */
242 if (!icmp_is_err(type: icmph->type))
243 return 0;
244
245 *nhoff += iphsz + sizeof(_ih);
246 return 1;
247}
248
249static int
250hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
251 const struct xt_hmark_info *info)
252{
253 struct iphdr *ip, _ip;
254 int nhoff = skb_network_offset(skb);
255
256 ip = (struct iphdr *) (skb->data + nhoff);
257 if (ip->protocol == IPPROTO_ICMP) {
258 /* Use inner header in case of ICMP errors */
259 if (get_inner_hdr(skb, iphsz: ip->ihl * 4, nhoff: &nhoff)) {
260 ip = skb_header_pointer(skb, offset: nhoff, len: sizeof(_ip), buffer: &_ip);
261 if (ip == NULL)
262 return -1;
263 }
264 }
265
266 t->src = ip->saddr & info->src_mask.ip;
267 t->dst = ip->daddr & info->dst_mask.ip;
268
269 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
270 return 0;
271
272 t->proto = ip->protocol;
273
274 /* ICMP has no ports, skip */
275 if (t->proto == IPPROTO_ICMP)
276 return 0;
277
278 /* follow-up fragments don't contain ports, skip all fragments */
279 if (ip_is_fragment(iph: ip))
280 return 0;
281
282 hmark_set_tuple_ports(skb, nhoff: (ip->ihl * 4) + nhoff, t, info);
283
284 return 0;
285}
286
287static unsigned int
288hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
289{
290 const struct xt_hmark_info *info = par->targinfo;
291 struct hmark_tuple t;
292
293 memset(&t, 0, sizeof(struct hmark_tuple));
294
295 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
296 if (hmark_ct_set_htuple(skb, t: &t, info) < 0)
297 return XT_CONTINUE;
298 } else {
299 if (hmark_pkt_set_htuple_ipv4(skb, t: &t, info) < 0)
300 return XT_CONTINUE;
301 }
302
303 skb->mark = hmark_hash(t: &t, info);
304 return XT_CONTINUE;
305}
306
307static int hmark_tg_check(const struct xt_tgchk_param *par)
308{
309 const struct xt_hmark_info *info = par->targinfo;
310 const char *errmsg = "proto mask must be zero with L3 mode";
311
312 if (!info->hmodulus)
313 return -EINVAL;
314
315 if (info->proto_mask &&
316 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
317 goto err;
318
319 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
320 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
321 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
322 return -EINVAL;
323
324 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
325 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
326 XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
327 errmsg = "spi-set and port-set can't be combined";
328 goto err;
329 }
330 return 0;
331err:
332 pr_info_ratelimited("%s\n", errmsg);
333 return -EINVAL;
334}
335
336static struct xt_target hmark_tg_reg[] __read_mostly = {
337 {
338 .name = "HMARK",
339 .family = NFPROTO_IPV4,
340 .target = hmark_tg_v4,
341 .targetsize = sizeof(struct xt_hmark_info),
342 .checkentry = hmark_tg_check,
343 .me = THIS_MODULE,
344 },
345#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
346 {
347 .name = "HMARK",
348 .family = NFPROTO_IPV6,
349 .target = hmark_tg_v6,
350 .targetsize = sizeof(struct xt_hmark_info),
351 .checkentry = hmark_tg_check,
352 .me = THIS_MODULE,
353 },
354#endif
355};
356
357static int __init hmark_tg_init(void)
358{
359 return xt_register_targets(target: hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
360}
361
362static void __exit hmark_tg_exit(void)
363{
364 xt_unregister_targets(target: hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
365}
366
367module_init(hmark_tg_init);
368module_exit(hmark_tg_exit);
369

source code of linux/net/netfilter/xt_HMARK.c