skbuff.c source code [linux/net/core/skbuff.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Routines having to do with the 'struct sk_buff' memory handlers.
4	*
5	* Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
6	* Florian La Roche <rzsfl@rz.uni-sb.de>
7	*
8	* Fixes:
9	* Alan Cox : Fixed the worst of the load
10	* balancer bugs.
11	* Dave Platt : Interrupt stacking fix.
12	* Richard Kooijman : Timestamp fixes.
13	* Alan Cox : Changed buffer format.
14	* Alan Cox : destructor hook for AF_UNIX etc.
15	* Linus Torvalds : Better skb_clone.
16	* Alan Cox : Added skb_copy.
17	* Alan Cox : Added all the changed routines Linus
18	* only put in the headers
19	* Ray VanTassle : Fixed --skb->lock in free
20	* Alan Cox : skb_copy copy arp field
21	* Andi Kleen : slabified it.
22	* Robert Olsson : Removed skb_head_pool
23	*
24	* NOTE:
25	* The __skb_ routines should be called with interrupts
26	* disabled, or you better be real sure that the operation is atomic
27	* with respect to whatever list is being frobbed (e.g. via lock_sock()
28	* or via disabling bottom half handlers, etc).
29	*/
30
31	/*
32	* The functions in this file will not compile correctly with gcc 2.4.x
33	*/
34
35	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
36
37	#include <linux/module.h>
38	#include <linux/types.h>
39	#include <linux/kernel.h>
40	#include <linux/mm.h>
41	#include <linux/interrupt.h>
42	#include <linux/in.h>
43	#include <linux/inet.h>
44	#include <linux/slab.h>
45	#include <linux/tcp.h>
46	#include <linux/udp.h>
47	#include <linux/sctp.h>
48	#include <linux/netdevice.h>
49	#ifdef CONFIG_NET_CLS_ACT
50	#include <net/pkt_sched.h>
51	#endif
52	#include <linux/string.h>
53	#include <linux/skbuff.h>
54	#include <linux/splice.h>
55	#include <linux/cache.h>
56	#include <linux/rtnetlink.h>
57	#include <linux/init.h>
58	#include <linux/scatterlist.h>
59	#include <linux/errqueue.h>
60	#include <linux/prefetch.h>
61	#include <linux/bitfield.h>
62	#include <linux/if_vlan.h>
63	#include <linux/mpls.h>
64	#include <linux/kcov.h>
65	#include <linux/iov_iter.h>
66
67	#include <net/protocol.h>
68	#include <net/dst.h>
69	#include <net/sock.h>
70	#include <net/checksum.h>
71	#include <net/gso.h>
72	#include <net/ip6_checksum.h>
73	#include <net/xfrm.h>
74	#include <net/mpls.h>
75	#include <net/mptcp.h>
76	#include <net/mctp.h>
77	#include <net/page_pool/helpers.h>
78	#include <net/dropreason.h>
79
80	#include <linux/uaccess.h>
81	#include <trace/events/skb.h>
82	#include <linux/highmem.h>
83	#include <linux/capability.h>
84	#include <linux/user_namespace.h>
85	#include <linux/indirect_call_wrapper.h>
86	#include <linux/textsearch.h>
87
88	#include "dev.h"
89	#include "sock_destructor.h"
90
91	struct kmem_cache *skbuff_cache __ro_after_init;
92	static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
93	#ifdef CONFIG_SKB_EXTENSIONS
94	static struct kmem_cache *skbuff_ext_cache __ro_after_init;
95	#endif
96
97
98	static struct kmem_cache *skb_small_head_cache __ro_after_init;
99
100	#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
101
102	/ We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.*
103	* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
104	* size, and we can differentiate heads from skb_small_head_cache
105	* vs system slabs by looking at their size (skb_end_offset()).
106	*/
107	#define SKB_SMALL_HEAD_CACHE_SIZE \
108	(is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
109	(SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
110	SKB_SMALL_HEAD_SIZE)
111
112	#define SKB_SMALL_HEAD_HEADROOM \
113	SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
114
115	int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
116	EXPORT_SYMBOL(sysctl_max_skb_frags);
117
118	#undef FN
119	#define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
120	static const char * const drop_reasons[] = {
121	[SKB_CONSUMED] = "CONSUMED",
122	DEFINE_DROP_REASON(FN, FN)
123	};
124
125	static const struct drop_reason_list drop_reasons_core = {
126	.reasons = drop_reasons,
127	.n_reasons = ARRAY_SIZE(drop_reasons),
128	};
129
130	const struct drop_reason_list __rcu *
131	drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = {
132	[SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core),
133	};
134	EXPORT_SYMBOL(drop_reasons_by_subsys);
135
136	/**
137	* drop_reasons_register_subsys - register another drop reason subsystem
138	* @subsys: the subsystem to register, must not be the core
139	* @list: the list of drop reasons within the subsystem, must point to
140	* a statically initialized list
141	*/
142	void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys,
143	const struct drop_reason_list *list)
144	{
145	if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE \|\|
146	subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
147	"invalid subsystem %d\n", subsys))
148	return;
149
150	/ must point to statically allocated memory, so INIT is OK /
151	RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list);
152	}
153	EXPORT_SYMBOL_GPL(drop_reasons_register_subsys);
154
155	/**
156	* drop_reasons_unregister_subsys - unregister a drop reason subsystem
157	* @subsys: the subsystem to remove, must not be the core
158	*
159	* Note: This will synchronize_rcu() to ensure no users when it returns.
160	*/
161	void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys)
162	{
163	if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE \|\|
164	subsys >= ARRAY_SIZE(drop_reasons_by_subsys),
165	"invalid subsystem %d\n", subsys))
166	return;
167
168	RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL);
169
170	synchronize_rcu();
171	}
172	EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys);
173
174	/**
175	* skb_panic - private function for out-of-line support
176	* @skb: buffer
177	* @sz: size
178	* @addr: address
179	* @msg: skb_over_panic or skb_under_panic
180	*
181	* Out-of-line support for skb_put() and skb_push().
182	* Called via the wrapper skb_over_panic() or skb_under_panic().
183	* Keep out of line to prevent kernel bloat.
184	* __builtin_return_address is not used because it is not always reliable.
185	*/
186	static void skb_panic(struct sk_buff skb, unsigned* int sz, void *addr,
187	const char msg[])
188	{
189	pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
190	msg, addr, skb->len, sz, skb->head, skb->data,
191	(unsigned long)skb->tail, (unsigned long)skb->end,
192	skb->dev ? skb->dev->name : "<NULL>");
193	BUG();
194	}
195
196	static void skb_over_panic(struct sk_buff skb, unsigned* int sz, void *addr)
197	{
198	skb_panic(skb, sz, addr, msg: __func__);
199	}
200
201	static void skb_under_panic(struct sk_buff skb, unsigned* int sz, void *addr)
202	{
203	skb_panic(skb, sz, addr, msg: __func__);
204	}
205
206	#define NAPI_SKB_CACHE_SIZE 64
207	#define NAPI_SKB_CACHE_BULK 16
208	#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
209
210	#if PAGE_SIZE == SZ_4K
211
212	#define NAPI_HAS_SMALL_PAGE_FRAG 1
213	#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
214
215	/ specialized page frag allocator using a single order 0 page*
216	* and slicing it into 1K sized fragment. Constrained to systems
217	* with a very limited amount of 1K fragments fitting a single
218	* page - to avoid excessive truesize underestimation
219	*/
220
221	struct page_frag_1k {
222	void *va;
223	u16 offset;
224	bool pfmemalloc;
225	};
226
227	static void page_frag_alloc_1k(struct* page_frag_1k *nc, gfp_t gfp)
228	{
229	struct page *page;
230	int offset;
231
232	offset = nc->offset - SZ_1K;
233	if (likely(offset >= `0`))
234	goto use_frag;
235
236	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask: gfp, order: `0`);
237	if (!page)
238	return NULL;
239
240	nc->va = page_address(page);
241	nc->pfmemalloc = page_is_pfmemalloc(page);
242	offset = PAGE_SIZE - SZ_1K;
243	page_ref_add(page, nr: offset / SZ_1K);
244
245	use_frag:
246	nc->offset = offset;
247	return nc->va + offset;
248	}
249	#else
250
251	/ the small page is actually unused in this build; add dummy helpers*
252	* to please the compiler and avoid later preprocessor's conditionals
253	*/
254	#define NAPI_HAS_SMALL_PAGE_FRAG 0
255	#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
256
257	struct page_frag_1k {
258	};
259
260	static void page_frag_alloc_1k(struct* page_frag_1k *nc, gfp_t gfp_mask)
261	{
262	return NULL;
263	}
264
265	#endif
266
267	struct napi_alloc_cache {
268	struct page_frag_cache page;
269	struct page_frag_1k page_small;
270	unsigned int skb_count;
271	void *skb_cache[NAPI_SKB_CACHE_SIZE];
272	};
273
274	static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
275	static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
276
277	/ Double check that napi_get_frags() allocates skbs with*
278	* skb->head being backed by slab, not a page fragment.
279	* This is to make sure bug fixed in 3226b158e67c
280	* ("net: avoid 32 x truesize under-estimation for tiny skbs")
281	* does not accidentally come back.
282	*/
283	void napi_get_frags_check(struct napi_struct *napi)
284	{
285	struct sk_buff *skb;
286
287	local_bh_disable();
288	skb = napi_get_frags(napi);
289	WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
290	napi_free_frags(napi);
291	local_bh_enable();
292	}
293
294	void __napi_alloc_frag_align(unsigned* int fragsz, unsigned int align_mask)
295	{
296	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
297
298	fragsz = SKB_DATA_ALIGN(fragsz);
299
300	return page_frag_alloc_align(nc: &nc->page, fragsz, GFP_ATOMIC, align_mask);
301	}
302	EXPORT_SYMBOL(__napi_alloc_frag_align);
303
304	void __netdev_alloc_frag_align(unsigned* int fragsz, unsigned int align_mask)
305	{
306	void *data;
307
308	fragsz = SKB_DATA_ALIGN(fragsz);
309	if (in_hardirq() \|\| irqs_disabled()) {
310	struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
311
312	data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
313	} else {
314	struct napi_alloc_cache *nc;
315
316	local_bh_disable();
317	nc = this_cpu_ptr(&napi_alloc_cache);
318	data = page_frag_alloc_align(nc: &nc->page, fragsz, GFP_ATOMIC, align_mask);
319	local_bh_enable();
320	}
321	return data;
322	}
323	EXPORT_SYMBOL(__netdev_alloc_frag_align);
324
325	static struct sk_buff napi_skb_cache_get(void*)
326	{
327	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
328	struct sk_buff *skb;
329
330	if (unlikely(!nc->skb_count)) {
331	nc->skb_count = kmem_cache_alloc_bulk(s: skbuff_cache,
332	GFP_ATOMIC,
333	NAPI_SKB_CACHE_BULK,
334	p: nc->skb_cache);
335	if (unlikely(!nc->skb_count))
336	return NULL;
337	}
338
339	skb = nc->skb_cache[--nc->skb_count];
340	kasan_unpoison_object_data(cache: skbuff_cache, object: skb);
341
342	return skb;
343	}
344
345	static inline void __finalize_skb_around(struct sk_buff skb, void* *data,
346	unsigned int size)
347	{
348	struct skb_shared_info *shinfo;
349
350	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
351
352	/ Assumes caller memset cleared SKB /
353	skb->truesize = SKB_TRUESIZE(size);
354	refcount_set(r: &skb->users, n: `1`);
355	skb->head = data;
356	skb->data = data;
357	skb_reset_tail_pointer(skb);
358	skb_set_end_offset(skb, offset: size);
359	skb->mac_header = (typeof(skb->mac_header))~`0U`;
360	skb->transport_header = (typeof(skb->transport_header))~`0U`;
361	skb->alloc_cpu = raw_smp_processor_id();
362	/ make sure we initialize shinfo sequentially /
363	shinfo = skb_shinfo(skb);
364	memset(shinfo, `0`, offsetof(struct skb_shared_info, dataref));
365	atomic_set(v: &shinfo->dataref, i: `1`);
366
367	skb_set_kcov_handle(skb, kcov_handle: kcov_common_handle());
368	}
369
370	static inline void __slab_build_skb(struct* sk_buff skb, void* *data,
371	unsigned int *size)
372	{
373	void *resized;
374
375	/ Must find the allocation size (and grow it to match). /
376	*size = ksize(objp: data);
377	/ krealloc() will immediately return "data" when*
378	* "ksize(data)" is requested: it is the existing upper
379	* bounds. As a result, GFP_ATOMIC will be ignored. Note
380	* that this "new" pointer needs to be passed back to the
381	* caller for use so the __alloc_size hinting will be
382	* tracked correctly.
383	*/
384	resized = krealloc(objp: data, new_size: *size, GFP_ATOMIC);
385	WARN_ON_ONCE(resized != data);
386	return resized;
387	}
388
389	/ build_skb() variant which can operate on slab buffers.*
390	* Note that this should be used sparingly as slab buffers
391	* cannot be combined efficiently by GRO!
392	*/
393	struct sk_buff slab_build_skb(void* *data)
394	{
395	struct sk_buff *skb;
396	unsigned int size;
397
398	skb = kmem_cache_alloc(cachep: skbuff_cache, GFP_ATOMIC);
399	if (unlikely(!skb))
400	return NULL;
401
402	memset(skb, `0`, offsetof(struct sk_buff, tail));
403	data = __slab_build_skb(skb, data, size: &size);
404	__finalize_skb_around(skb, data, size);
405
406	return skb;
407	}
408	EXPORT_SYMBOL(slab_build_skb);
409
410	/ Caller must provide SKB that is memset cleared /
411	static void __build_skb_around(struct sk_buff skb, void* *data,
412	unsigned int frag_size)
413	{
414	unsigned int size = frag_size;
415
416	/ frag_size == 0 is considered deprecated now. Callers*
417	* using slab buffer should use slab_build_skb() instead.
418	*/
419	if (WARN_ONCE(size == `0`, "Use slab_build_skb() instead"))
420	data = __slab_build_skb(skb, data, size: &size);
421
422	__finalize_skb_around(skb, data, size);
423	}
424
425	/**
426	* __build_skb - build a network buffer
427	* @data: data buffer provided by caller
428	* @frag_size: size of data (must not be 0)
429	*
430	* Allocate a new &sk_buff. Caller provides space holding head and
431	* skb_shared_info. @data must have been allocated from the page
432	* allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc()
433	* allocation is deprecated, and callers should use slab_build_skb()
434	* instead.)
435	* The return is the new skb buffer.
436	* On a failure the return is %NULL, and @data is not freed.
437	* Notes :
438	* Before IO, driver allocates only data buffer where NIC put incoming frame
439	* Driver should add room at head (NET_SKB_PAD) and
440	* MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
441	* After IO, driver calls build_skb(), to allocate sk_buff and populate it
442	* before giving packet to stack.
443	* RX rings only contains data buffers, not full skbs.
444	*/
445	struct sk_buff __build_skb(void* data, unsigned* int frag_size)
446	{
447	struct sk_buff *skb;
448
449	skb = kmem_cache_alloc(cachep: skbuff_cache, GFP_ATOMIC);
450	if (unlikely(!skb))
451	return NULL;
452
453	memset(skb, `0`, offsetof(struct sk_buff, tail));
454	__build_skb_around(skb, data, frag_size);
455
456	return skb;
457	}
458
459	/ build_skb() is wrapper over __build_skb(), that specifically*
460	* takes care of skb->head and skb->pfmemalloc
461	*/
462	struct sk_buff build_skb(void* data, unsigned* int frag_size)
463	{
464	struct sk_buff *skb = __build_skb(data, frag_size);
465
466	if (likely(skb && frag_size)) {
467	skb->head_frag = `1`;
468	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
469	}
470	return skb;
471	}
472	EXPORT_SYMBOL(build_skb);
473
474	/**
475	* build_skb_around - build a network buffer around provided skb
476	* @skb: sk_buff provide by caller, must be memset cleared
477	* @data: data buffer provided by caller
478	* @frag_size: size of data
479	*/
480	struct sk_buff build_skb_around(struct* sk_buff *skb,
481	void data, unsigned* int frag_size)
482	{
483	if (unlikely(!skb))
484	return NULL;
485
486	__build_skb_around(skb, data, frag_size);
487
488	if (frag_size) {
489	skb->head_frag = `1`;
490	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
491	}
492	return skb;
493	}
494	EXPORT_SYMBOL(build_skb_around);
495
496	/**
497	* __napi_build_skb - build a network buffer
498	* @data: data buffer provided by caller
499	* @frag_size: size of data
500	*
501	* Version of __build_skb() that uses NAPI percpu caches to obtain
502	* skbuff_head instead of inplace allocation.
503	*
504	* Returns a new &sk_buff on success, %NULL on allocation failure.
505	*/
506	static struct sk_buff __napi_build_skb(void* data, unsigned* int frag_size)
507	{
508	struct sk_buff *skb;
509
510	skb = napi_skb_cache_get();
511	if (unlikely(!skb))
512	return NULL;
513
514	memset(skb, `0`, offsetof(struct sk_buff, tail));
515	__build_skb_around(skb, data, frag_size);
516
517	return skb;
518	}
519
520	/**
521	* napi_build_skb - build a network buffer
522	* @data: data buffer provided by caller
523	* @frag_size: size of data
524	*
525	* Version of __napi_build_skb() that takes care of skb->head_frag
526	* and skb->pfmemalloc when the data is a page or page fragment.
527	*
528	* Returns a new &sk_buff on success, %NULL on allocation failure.
529	*/
530	struct sk_buff napi_build_skb(void* data, unsigned* int frag_size)
531	{
532	struct sk_buff *skb = __napi_build_skb(data, frag_size);
533
534	if (likely(skb) && frag_size) {
535	skb->head_frag = `1`;
536	skb_propagate_pfmemalloc(page: virt_to_head_page(x: data), skb);
537	}
538
539	return skb;
540	}
541	EXPORT_SYMBOL(napi_build_skb);
542
543	/*
544	* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
545	* the caller if emergency pfmemalloc reserves are being used. If it is and
546	* the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
547	* may be used. Otherwise, the packet data may be discarded until enough
548	* memory is free
549	*/
550	static void kmalloc_reserve(unsigned* int size, gfp_t flags, int* node,
551	bool *pfmemalloc)
552	{
553	bool ret_pfmemalloc = false;
554	size_t obj_size;
555	void *obj;
556
557	obj_size = SKB_HEAD_ALIGN(*size);
558	if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
559	!(flags & KMALLOC_NOT_NORMAL_BITS)) {
560	obj = kmem_cache_alloc_node(s: skb_small_head_cache,
561	flags: flags \| __GFP_NOMEMALLOC \| __GFP_NOWARN,
562	node);
563	*size = SKB_SMALL_HEAD_CACHE_SIZE;
564	if (obj \|\| !(gfp_pfmemalloc_allowed(gfp_mask: flags)))
565	goto out;
566	/ Try again but now we are using pfmemalloc reserves /
567	ret_pfmemalloc = true;
568	obj = kmem_cache_alloc_node(s: skb_small_head_cache, flags, node);
569	goto out;
570	}
571
572	obj_size = kmalloc_size_roundup(size: obj_size);
573	/ The following cast might truncate high-order bits of obj_size, this*
574	* is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
575	*/
576	size = (unsigned* int)obj_size;
577
578	/*
579	* Try a regular allocation, when that fails and we're not entitled
580	* to the reserves, fail.
581	*/
582	obj = kmalloc_node_track_caller(obj_size,
583	flags \| __GFP_NOMEMALLOC \| __GFP_NOWARN,
584	node);
585	if (obj \|\| !(gfp_pfmemalloc_allowed(gfp_mask: flags)))
586	goto out;
587
588	/ Try again but now we are using pfmemalloc reserves /
589	ret_pfmemalloc = true;
590	obj = kmalloc_node_track_caller(obj_size, flags, node);
591
592	out:
593	if (pfmemalloc)
594	*pfmemalloc = ret_pfmemalloc;
595
596	return obj;
597	}
598
599	/ Allocate a new skbuff. We do this ourselves so we can fill in a few*
600	* 'private' fields and also do memory statistics to find all the
601	* [BEEP] leaks.
602	*
603	*/
604
605	/**
606	* __alloc_skb - allocate a network buffer
607	* @size: size to allocate
608	* @gfp_mask: allocation mask
609	* @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
610	* instead of head cache and allocate a cloned (child) skb.
611	* If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
612	* allocations in case the data is required for writeback
613	* @node: numa node to allocate memory on
614	*
615	* Allocate a new &sk_buff. The returned buffer has no headroom and a
616	* tail room of at least size bytes. The object has a reference count
617	* of one. The return is the buffer. On a failure the return is %NULL.
618	*
619	* Buffers may only be allocated from interrupts using a @gfp_mask of
620	* %GFP_ATOMIC.
621	*/
622	struct sk_buff __alloc_skb(unsigned* int size, gfp_t gfp_mask,
623	int flags, int node)
624	{
625	struct kmem_cache *cache;
626	struct sk_buff *skb;
627	bool pfmemalloc;
628	u8 *data;
629
630	cache = (flags & SKB_ALLOC_FCLONE)
631	? skbuff_fclone_cache : skbuff_cache;
632
633	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
634	gfp_mask \|= __GFP_MEMALLOC;
635
636	/ Get the HEAD /
637	if ((flags & (SKB_ALLOC_FCLONE \| SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
638	likely(node == NUMA_NO_NODE \|\| node == numa_mem_id()))
639	skb = napi_skb_cache_get();
640	else
641	skb = kmem_cache_alloc_node(s: cache, flags: gfp_mask & ~GFP_DMA, node);
642	if (unlikely(!skb))
643	return NULL;
644	prefetchw(x: skb);
645
646	/ We do our best to align skb_shared_info on a separate cache*
647	* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
648	* aligned memory blocks, unless SLUB/SLAB debug is enabled.
649	* Both skb->head and skb_shared_info are cache line aligned.
650	*/
651	data = kmalloc_reserve(size: &size, flags: gfp_mask, node, pfmemalloc: &pfmemalloc);
652	if (unlikely(!data))
653	goto nodata;
654	/ kmalloc_size_roundup() might give us more room than requested.*
655	* Put skb_shared_info exactly at the end of allocated zone,
656	* to allow max possible filling before reallocation.
657	*/
658	prefetchw(x: data + SKB_WITH_OVERHEAD(size));
659
660	/*
661	* Only clear those fields we need to clear, not those that we will
662	* actually initialise below. Hence, don't put any more fields after
663	* the tail pointer in struct sk_buff!
664	*/
665	memset(skb, `0`, offsetof(struct sk_buff, tail));
666	__build_skb_around(skb, data, frag_size: size);
667	skb->pfmemalloc = pfmemalloc;
668
669	if (flags & SKB_ALLOC_FCLONE) {
670	struct sk_buff_fclones *fclones;
671
672	fclones = container_of(skb, struct sk_buff_fclones, skb1);
673
674	skb->fclone = SKB_FCLONE_ORIG;
675	refcount_set(r: &fclones->fclone_ref, n: `1`);
676	}
677
678	return skb;
679
680	nodata:
681	kmem_cache_free(s: cache, objp: skb);
682	return NULL;
683	}
684	EXPORT_SYMBOL(__alloc_skb);
685
686	/**
687	* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
688	* @dev: network device to receive on
689	* @len: length to allocate
690	* @gfp_mask: get_free_pages mask, passed to alloc_skb
691	*
692	* Allocate a new &sk_buff and assign it a usage count of one. The
693	* buffer has NET_SKB_PAD headroom built in. Users should allocate
694	* the headroom they think they need without accounting for the
695	* built in space. The built in space is used for optimisations.
696	*
697	* %NULL is returned if there is no free memory.
698	*/
699	struct sk_buff __netdev_alloc_skb(struct* net_device dev, unsigned* int len,
700	gfp_t gfp_mask)
701	{
702	struct page_frag_cache *nc;
703	struct sk_buff *skb;
704	bool pfmemalloc;
705	void *data;
706
707	len += NET_SKB_PAD;
708
709	/ If requested length is either too small or too big,*
710	* we use kmalloc() for skb->head allocation.
711	*/
712	if (len <= SKB_WITH_OVERHEAD(`1024`) \|\|
713	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
714	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
715	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
716	if (!skb)
717	goto skb_fail;
718	goto skb_success;
719	}
720
721	len = SKB_HEAD_ALIGN(len);
722
723	if (sk_memalloc_socks())
724	gfp_mask \|= __GFP_MEMALLOC;
725
726	if (in_hardirq() \|\| irqs_disabled()) {
727	nc = this_cpu_ptr(&netdev_alloc_cache);
728	data = page_frag_alloc(nc, fragsz: len, gfp_mask);
729	pfmemalloc = nc->pfmemalloc;
730	} else {
731	local_bh_disable();
732	nc = this_cpu_ptr(&napi_alloc_cache.page);
733	data = page_frag_alloc(nc, fragsz: len, gfp_mask);
734	pfmemalloc = nc->pfmemalloc;
735	local_bh_enable();
736	}
737
738	if (unlikely(!data))
739	return NULL;
740
741	skb = __build_skb(data, frag_size: len);
742	if (unlikely(!skb)) {
743	skb_free_frag(addr: data);
744	return NULL;
745	}
746
747	if (pfmemalloc)
748	skb->pfmemalloc = `1`;
749	skb->head_frag = `1`;
750
751	skb_success:
752	skb_reserve(skb, NET_SKB_PAD);
753	skb->dev = dev;
754
755	skb_fail:
756	return skb;
757	}
758	EXPORT_SYMBOL(__netdev_alloc_skb);
759
760	/**
761	* __napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
762	* @napi: napi instance this buffer was allocated for
763	* @len: length to allocate
764	* @gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
765	*
766	* Allocate a new sk_buff for use in NAPI receive. This buffer will
767	* attempt to allocate the head from a special reserved region used
768	* only for NAPI Rx allocation. By doing this we can save several
769	* CPU cycles by avoiding having to disable and re-enable IRQs.
770	*
771	* %NULL is returned if there is no free memory.
772	*/
773	struct sk_buff __napi_alloc_skb(struct* napi_struct napi, unsigned* int len,
774	gfp_t gfp_mask)
775	{
776	struct napi_alloc_cache *nc;
777	struct sk_buff *skb;
778	bool pfmemalloc;
779	void *data;
780
781	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
782	len += NET_SKB_PAD + NET_IP_ALIGN;
783
784	/ If requested length is either too small or too big,*
785	* we use kmalloc() for skb->head allocation.
786	* When the small frag allocator is available, prefer it over kmalloc
787	* for small fragments
788	*/
789	if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(`1024`)) \|\|
790	len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
791	(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
792	skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX \| SKB_ALLOC_NAPI,
793	NUMA_NO_NODE);
794	if (!skb)
795	goto skb_fail;
796	goto skb_success;
797	}
798
799	nc = this_cpu_ptr(&napi_alloc_cache);
800
801	if (sk_memalloc_socks())
802	gfp_mask \|= __GFP_MEMALLOC;
803
804	if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(`1024`)) {
805	/ we are artificially inflating the allocation size, but*
806	* that is not as bad as it may look like, as:
807	* - 'len' less than GRO_MAX_HEAD makes little sense
808	* - On most systems, larger 'len' values lead to fragment
809	* size above 512 bytes
810	* - kmalloc would use the kmalloc-1k slab for such values
811	* - Builds with smaller GRO_MAX_HEAD will very likely do
812	* little networking, as that implies no WiFi and no
813	* tunnels support, and 32 bits arches.
814	*/
815	len = SZ_1K;
816
817	data = page_frag_alloc_1k(nc: &nc->page_small, gfp: gfp_mask);
818	pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
819	} else {
820	len = SKB_HEAD_ALIGN(len);
821
822	data = page_frag_alloc(nc: &nc->page, fragsz: len, gfp_mask);
823	pfmemalloc = nc->page.pfmemalloc;
824	}
825
826	if (unlikely(!data))
827	return NULL;
828
829	skb = __napi_build_skb(data, frag_size: len);
830	if (unlikely(!skb)) {
831	skb_free_frag(addr: data);
832	return NULL;
833	}
834
835	if (pfmemalloc)
836	skb->pfmemalloc = `1`;
837	skb->head_frag = `1`;
838
839	skb_success:
840	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
841	skb->dev = napi->dev;
842
843	skb_fail:
844	return skb;
845	}
846	EXPORT_SYMBOL(__napi_alloc_skb);
847
848	void skb_add_rx_frag(struct sk_buff skb, int* i, struct page page, int* off,
849	int size, unsigned int truesize)
850	{
851	DEBUG_NET_WARN_ON_ONCE(size > truesize);
852
853	skb_fill_page_desc(skb, i, page, off, size);
854	skb->len += size;
855	skb->data_len += size;
856	skb->truesize += truesize;
857	}
858	EXPORT_SYMBOL(skb_add_rx_frag);
859
860	void skb_coalesce_rx_frag(struct sk_buff skb, int* i, int size,
861	unsigned int truesize)
862	{
863	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
864
865	DEBUG_NET_WARN_ON_ONCE(size > truesize);
866
867	skb_frag_size_add(frag, delta: size);
868	skb->len += size;
869	skb->data_len += size;
870	skb->truesize += truesize;
871	}
872	EXPORT_SYMBOL(skb_coalesce_rx_frag);
873
874	static void skb_drop_list(struct sk_buff **listp)
875	{
876	kfree_skb_list(segs: *listp);
877	*listp = NULL;
878	}
879
880	static inline void skb_drop_fraglist(struct sk_buff *skb)
881	{
882	skb_drop_list(listp: &skb_shinfo(skb)->frag_list);
883	}
884
885	static void skb_clone_fraglist(struct sk_buff *skb)
886	{
887	struct sk_buff *list;
888
889	skb_walk_frags(skb, list)
890	skb_get(skb: list);
891	}
892
893	#if IS_ENABLED(CONFIG_PAGE_POOL)
894	bool napi_pp_put_page(struct page *page, bool napi_safe)
895	{
896	bool allow_direct = false;
897	struct page_pool *pp;
898
899	page = compound_head(page);
900
901	/ page->pp_magic is OR'ed with PP_SIGNATURE after the allocation*
902	* in order to preserve any existing bits, such as bit 0 for the
903	* head page of compound page and bit 1 for pfmemalloc page, so
904	* mask those bits for freeing side when doing below checking,
905	* and page_is_pfmemalloc() is checked in __page_pool_put_page()
906	* to avoid recycling the pfmemalloc page.
907	*/
908	if (unlikely((page->pp_magic & ~`0x3UL`) != PP_SIGNATURE))
909	return false;
910
911	pp = page->pp;
912
913	/ Allow direct recycle if we have reasons to believe that we are*
914	* in the same context as the consumer would run, so there's
915	* no possible race.
916	* __page_pool_put_page() makes sure we're not in hardirq context
917	* and interrupts are enabled prior to accessing the cache.
918	*/
919	if (napi_safe \|\| in_softirq()) {
920	const struct napi_struct *napi = READ_ONCE(pp->p.napi);
921
922	allow_direct = napi &&
923	READ_ONCE(napi->list_owner) == smp_processor_id();
924	}
925
926	/ Driver set this to memory recycling info. Reset it on recycle.*
927	* This will not work for NIC using a split-page memory model.
928	* The page will be returned to the pool here regardless of the
929	* 'flipped' fragment being in use or not.
930	*/
931	page_pool_put_full_page(pool: pp, page, allow_direct);
932
933	return true;
934	}
935	EXPORT_SYMBOL(napi_pp_put_page);
936	#endif
937
938	static bool skb_pp_recycle(struct sk_buff skb, void* *data, bool napi_safe)
939	{
940	if (!IS_ENABLED(CONFIG_PAGE_POOL) \|\| !skb->pp_recycle)
941	return false;
942	return napi_pp_put_page(virt_to_page(data), napi_safe);
943	}
944
945	static void skb_kfree_head(void head, unsigned* int end_offset)
946	{
947	if (end_offset == SKB_SMALL_HEAD_HEADROOM)
948	kmem_cache_free(s: skb_small_head_cache, objp: head);
949	else
950	kfree(objp: head);
951	}
952
953	static void skb_free_head(struct sk_buff *skb, bool napi_safe)
954	{
955	unsigned char *head = skb->head;
956
957	if (skb->head_frag) {
958	if (skb_pp_recycle(skb, data: head, napi_safe))
959	return;
960	skb_free_frag(addr: head);
961	} else {
962	skb_kfree_head(head, end_offset: skb_end_offset(skb));
963	}
964	}
965
966	static void skb_release_data(struct sk_buff skb, enum* skb_drop_reason reason,
967	bool napi_safe)
968	{
969	struct skb_shared_info *shinfo = skb_shinfo(skb);
970	int i;
971
972	if (skb->cloned &&
973	atomic_sub_return(i: skb->nohdr ? (`1` << SKB_DATAREF_SHIFT) + `1` : `1`,
974	v: &shinfo->dataref))
975	goto exit;
976
977	if (skb_zcopy(skb)) {
978	bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;
979
980	skb_zcopy_clear(skb, zerocopy_success: true);
981	if (skip_unref)
982	goto free_head;
983	}
984
985	for (i = `0`; i < shinfo->nr_frags; i++)
986	napi_frag_unref(frag: &shinfo->frags[i], recycle: skb->pp_recycle, napi_safe);
987
988	free_head:
989	if (shinfo->frag_list)
990	kfree_skb_list_reason(segs: shinfo->frag_list, reason);
991
992	skb_free_head(skb, napi_safe);
993	exit:
994	/ When we clone an SKB we copy the reycling bit. The pp_recycle*
995	* bit is only set on the head though, so in order to avoid races
996	* while trying to recycle fragments on __skb_frag_unref() we need
997	* to make one SKB responsible for triggering the recycle path.
998	* So disable the recycling bit if an SKB is cloned and we have
999	* additional references to the fragmented part of the SKB.
1000	* Eventually the last SKB will have the recycling bit set and it's
1001	* dataref set to 0, which will trigger the recycling
1002	*/
1003	skb->pp_recycle = `0`;
1004	}
1005
1006	/*
1007	* Free an skbuff by memory without cleaning the state.
1008	*/
1009	static void kfree_skbmem(struct sk_buff *skb)
1010	{
1011	struct sk_buff_fclones *fclones;
1012
1013	switch (skb->fclone) {
1014	case SKB_FCLONE_UNAVAILABLE:
1015	kmem_cache_free(s: skbuff_cache, objp: skb);
1016	return;
1017
1018	case SKB_FCLONE_ORIG:
1019	fclones = container_of(skb, struct sk_buff_fclones, skb1);
1020
1021	/ We usually free the clone (TX completion) before original skb*
1022	* This test would have no chance to be true for the clone,
1023	* while here, branch prediction will be good.
1024	*/
1025	if (refcount_read(r: &fclones->fclone_ref) == `1`)
1026	goto fastpath;
1027	break;
1028
1029	default: / SKB_FCLONE_CLONE /
1030	fclones = container_of(skb, struct sk_buff_fclones, skb2);
1031	break;
1032	}
1033	if (!refcount_dec_and_test(r: &fclones->fclone_ref))
1034	return;
1035	fastpath:
1036	kmem_cache_free(s: skbuff_fclone_cache, objp: fclones);
1037	}
1038
1039	void skb_release_head_state(struct sk_buff *skb)
1040	{
1041	skb_dst_drop(skb);
1042	if (skb->destructor) {
1043	DEBUG_NET_WARN_ON_ONCE(in_hardirq());
1044	skb->destructor(skb);
1045	}
1046	#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1047	nf_conntrack_put(nfct: skb_nfct(skb));
1048	#endif
1049	skb_ext_put(skb);
1050	}
1051
1052	/ Free everything but the sk_buff shell. /
1053	static void skb_release_all(struct sk_buff skb, enum* skb_drop_reason reason,
1054	bool napi_safe)
1055	{
1056	skb_release_head_state(skb);
1057	if (likely(skb->head))
1058	skb_release_data(skb, reason, napi_safe);
1059	}
1060
1061	/**
1062	* __kfree_skb - private function
1063	* @skb: buffer
1064	*
1065	* Free an sk_buff. Release anything attached to the buffer.
1066	* Clean the state. This is an internal helper function. Users should
1067	* always call kfree_skb
1068	*/
1069
1070	void __kfree_skb(struct sk_buff *skb)
1071	{
1072	skb_release_all(skb, reason: SKB_DROP_REASON_NOT_SPECIFIED, napi_safe: false);
1073	kfree_skbmem(skb);
1074	}
1075	EXPORT_SYMBOL(__kfree_skb);
1076
1077	static __always_inline
1078	bool __kfree_skb_reason(struct sk_buff skb, enum* skb_drop_reason reason)
1079	{
1080	if (unlikely(!skb_unref(skb)))
1081	return false;
1082
1083	DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET \|\|
1084	u32_get_bits(reason,
1085	SKB_DROP_REASON_SUBSYS_MASK) >=
1086	SKB_DROP_REASON_SUBSYS_NUM);
1087
1088	if (reason == SKB_CONSUMED)
1089	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1090	else
1091	trace_kfree_skb(skb, location: __builtin_return_address(`0`), reason);
1092	return true;
1093	}
1094
1095	/**
1096	* kfree_skb_reason - free an sk_buff with special reason
1097	* @skb: buffer to free
1098	* @reason: reason why this skb is dropped
1099	*
1100	* Drop a reference to the buffer and free it if the usage count has
1101	* hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
1102	* tracepoint.
1103	*/
1104	void __fix_address
1105	kfree_skb_reason(struct sk_buff skb, enum* skb_drop_reason reason)
1106	{
1107	if (__kfree_skb_reason(skb, reason))
1108	__kfree_skb(skb);
1109	}
1110	EXPORT_SYMBOL(kfree_skb_reason);
1111
1112	#define KFREE_SKB_BULK_SIZE 16
1113
1114	struct skb_free_array {
1115	unsigned int skb_count;
1116	void *skb_array[KFREE_SKB_BULK_SIZE];
1117	};
1118
1119	static void kfree_skb_add_bulk(struct sk_buff *skb,
1120	struct skb_free_array *sa,
1121	enum skb_drop_reason reason)
1122	{
1123	/ if SKB is a clone, don't handle this case /
1124	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
1125	__kfree_skb(skb);
1126	return;
1127	}
1128
1129	skb_release_all(skb, reason, napi_safe: false);
1130	sa->skb_array[sa->skb_count++] = skb;
1131
1132	if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
1133	kmem_cache_free_bulk(s: skbuff_cache, KFREE_SKB_BULK_SIZE,
1134	p: sa->skb_array);
1135	sa->skb_count = `0`;
1136	}
1137	}
1138
1139	void __fix_address
1140	kfree_skb_list_reason(struct sk_buff segs, enum* skb_drop_reason reason)
1141	{
1142	struct skb_free_array sa;
1143
1144	sa.skb_count = `0`;
1145
1146	while (segs) {
1147	struct sk_buff *next = segs->next;
1148
1149	if (__kfree_skb_reason(skb: segs, reason)) {
1150	skb_poison_list(skb: segs);
1151	kfree_skb_add_bulk(skb: segs, sa: &sa, reason);
1152	}
1153
1154	segs = next;
1155	}
1156
1157	if (sa.skb_count)
1158	kmem_cache_free_bulk(s: skbuff_cache, size: sa.skb_count, p: sa.skb_array);
1159	}
1160	EXPORT_SYMBOL(kfree_skb_list_reason);
1161
1162	/ Dump skb information and contents.*
1163	*
1164	* Must only be called from net_ratelimit()-ed paths.
1165	*
1166	* Dumps whole packets if full_pkt, only headers otherwise.
1167	*/
1168	void skb_dump(const char level, const* struct sk_buff *skb, bool full_pkt)
1169	{
1170	struct skb_shared_info *sh = skb_shinfo(skb);
1171	struct net_device *dev = skb->dev;
1172	struct sock *sk = skb->sk;
1173	struct sk_buff *list_skb;
1174	bool has_mac, has_trans;
1175	int headroom, tailroom;
1176	int i, len, seg_len;
1177
1178	if (full_pkt)
1179	len = skb->len;
1180	else
1181	len = min_t(int, skb->len, MAX_HEADER + `128`);
1182
1183	headroom = skb_headroom(skb);
1184	tailroom = skb_tailroom(skb);
1185
1186	has_mac = skb_mac_header_was_set(skb);
1187	has_trans = skb_transport_header_was_set(skb);
1188
1189	printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
1190	"mac=(%d,%d) net=(%d,%d) trans=%d\n"
1191	"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
1192	"csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
1193	"hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
1194	level, skb->len, headroom, skb_headlen(skb), tailroom,
1195	has_mac ? skb->mac_header : -`1`,
1196	has_mac ? skb_mac_header_len(skb) : -`1`,
1197	skb->network_header,
1198	has_trans ? skb_network_header_len(skb) : -`1`,
1199	has_trans ? skb->transport_header : -`1`,
1200	sh->tx_flags, sh->nr_frags,
1201	sh->gso_size, sh->gso_type, sh->gso_segs,
1202	skb->csum, skb->ip_summed, skb->csum_complete_sw,
1203	skb->csum_valid, skb->csum_level,
1204	skb->hash, skb->sw_hash, skb->l4_hash,
1205	ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
1206
1207	if (dev)
1208	printk("%sdev name=%s feat=%pNF\n",
1209	level, dev->name, &dev->features);
1210	if (sk)
1211	printk("%ssk family=%hu type=%u proto=%u\n",
1212	level, sk->sk_family, sk->sk_type, sk->sk_protocol);
1213
1214	if (full_pkt && headroom)
1215	print_hex_dump(level, prefix_str: "skb headroom: ", prefix_type: DUMP_PREFIX_OFFSET,
1216	rowsize: `16`, groupsize: `1`, buf: skb->head, len: headroom, ascii: false);
1217
1218	seg_len = min_t(int, skb_headlen(skb), len);
1219	if (seg_len)
1220	print_hex_dump(level, prefix_str: "skb linear: ", prefix_type: DUMP_PREFIX_OFFSET,
1221	rowsize: `16`, groupsize: `1`, buf: skb->data, len: seg_len, ascii: false);
1222	len -= seg_len;
1223
1224	if (full_pkt && tailroom)
1225	print_hex_dump(level, prefix_str: "skb tailroom: ", prefix_type: DUMP_PREFIX_OFFSET,
1226	rowsize: `16`, groupsize: `1`, buf: skb_tail_pointer(skb), len: tailroom, ascii: false);
1227
1228	for (i = `0`; len && i < skb_shinfo(skb)->nr_frags; i++) {
1229	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1230	u32 p_off, p_len, copied;
1231	struct page *p;
1232	u8 *vaddr;
1233
1234	skb_frag_foreach_page(frag, skb_frag_off(frag),
1235	skb_frag_size(frag), p, p_off, p_len,
1236	copied) {
1237	seg_len = min_t(int, p_len, len);
1238	vaddr = kmap_atomic(page: p);
1239	print_hex_dump(level, prefix_str: "skb frag: ",
1240	prefix_type: DUMP_PREFIX_OFFSET,
1241	rowsize: `16`, groupsize: `1`, buf: vaddr + p_off, len: seg_len, ascii: false);
1242	kunmap_atomic(vaddr);
1243	len -= seg_len;
1244	if (!len)
1245	break;
1246	}
1247	}
1248
1249	if (full_pkt && skb_has_frag_list(skb)) {
1250	printk("skb fraglist:\n");
1251	skb_walk_frags(skb, list_skb)
1252	skb_dump(level, skb: list_skb, full_pkt: true);
1253	}
1254	}
1255	EXPORT_SYMBOL(skb_dump);
1256
1257	/**
1258	* skb_tx_error - report an sk_buff xmit error
1259	* @skb: buffer that triggered an error
1260	*
1261	* Report xmit error if a device callback is tracking this skb.
1262	* skb must be freed afterwards.
1263	*/
1264	void skb_tx_error(struct sk_buff *skb)
1265	{
1266	if (skb) {
1267	skb_zcopy_downgrade_managed(skb);
1268	skb_zcopy_clear(skb, zerocopy_success: true);
1269	}
1270	}
1271	EXPORT_SYMBOL(skb_tx_error);
1272
1273	#ifdef CONFIG_TRACEPOINTS
1274	/**
1275	* consume_skb - free an skbuff
1276	* @skb: buffer to free
1277	*
1278	* Drop a ref to the buffer and free it if the usage count has hit zero
1279	* Functions identically to kfree_skb, but kfree_skb assumes that the frame
1280	* is being dropped after a failure and notes that
1281	*/
1282	void consume_skb(struct sk_buff *skb)
1283	{
1284	if (!skb_unref(skb))
1285	return;
1286
1287	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1288	__kfree_skb(skb);
1289	}
1290	EXPORT_SYMBOL(consume_skb);
1291	#endif
1292
1293	/**
1294	* __consume_stateless_skb - free an skbuff, assuming it is stateless
1295	* @skb: buffer to free
1296	*
1297	* Alike consume_skb(), but this variant assumes that this is the last
1298	* skb reference and all the head states have been already dropped
1299	*/
1300	void __consume_stateless_skb(struct sk_buff *skb)
1301	{
1302	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1303	skb_release_data(skb, reason: SKB_CONSUMED, napi_safe: false);
1304	kfree_skbmem(skb);
1305	}
1306
1307	static void napi_skb_cache_put(struct sk_buff *skb)
1308	{
1309	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
1310	u32 i;
1311
1312	kasan_poison_object_data(cache: skbuff_cache, object: skb);
1313	nc->skb_cache[nc->skb_count++] = skb;
1314
1315	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
1316	for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
1317	kasan_unpoison_object_data(cache: skbuff_cache,
1318	object: nc->skb_cache[i]);
1319
1320	kmem_cache_free_bulk(s: skbuff_cache, NAPI_SKB_CACHE_HALF,
1321	p: nc->skb_cache + NAPI_SKB_CACHE_HALF);
1322	nc->skb_count = NAPI_SKB_CACHE_HALF;
1323	}
1324	}
1325
1326	void __napi_kfree_skb(struct sk_buff skb, enum* skb_drop_reason reason)
1327	{
1328	skb_release_all(skb, reason, napi_safe: true);
1329	napi_skb_cache_put(skb);
1330	}
1331
1332	void napi_skb_free_stolen_head(struct sk_buff *skb)
1333	{
1334	if (unlikely(skb->slow_gro)) {
1335	nf_reset_ct(skb);
1336	skb_dst_drop(skb);
1337	skb_ext_put(skb);
1338	skb_orphan(skb);
1339	skb->slow_gro = `0`;
1340	}
1341	napi_skb_cache_put(skb);
1342	}
1343
1344	void napi_consume_skb(struct sk_buff skb, int* budget)
1345	{
1346	/ Zero budget indicate non-NAPI context called us, like netpoll /
1347	if (unlikely(!budget)) {
1348	dev_consume_skb_any(skb);
1349	return;
1350	}
1351
1352	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
1353
1354	if (!skb_unref(skb))
1355	return;
1356
1357	/ if reaching here SKB is ready to free /
1358	trace_consume_skb(skb, location: __builtin_return_address(`0`));
1359
1360	/ if SKB is a clone, don't handle this case /
1361	if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
1362	__kfree_skb(skb);
1363	return;
1364	}
1365
1366	skb_release_all(skb, reason: SKB_CONSUMED, napi_safe: !!budget);
1367	napi_skb_cache_put(skb);
1368	}
1369	EXPORT_SYMBOL(napi_consume_skb);
1370
1371	/ Make sure a field is contained by headers group /
1372	#define CHECK_SKB_FIELD(field) \
1373	BUILD_BUG_ON(offsetof(struct sk_buff, field) != \
1374	offsetof(struct sk_buff, headers.field)); \
1375
1376	static void __copy_skb_header(struct sk_buff new, const* struct sk_buff *old)
1377	{
1378	new->tstamp = old->tstamp;
1379	/ We do not copy old->sk /
1380	new->dev = old->dev;
1381	memcpy(new->cb, old->cb, sizeof(old->cb));
1382	skb_dst_copy(nskb: new, oskb: old);
1383	__skb_ext_copy(dst: new, src: old);
1384	__nf_copy(dst: new, src: old, copy: false);
1385
1386	/ Note : this field could be in the headers group.*
1387	* It is not yet because we do not want to have a 16 bit hole
1388	*/
1389	new->queue_mapping = old->queue_mapping;
1390
1391	memcpy(&new->headers, &old->headers, sizeof(new->headers));
1392	CHECK_SKB_FIELD(protocol);
1393	CHECK_SKB_FIELD(csum);
1394	CHECK_SKB_FIELD(hash);
1395	CHECK_SKB_FIELD(priority);
1396	CHECK_SKB_FIELD(skb_iif);
1397	CHECK_SKB_FIELD(vlan_proto);
1398	CHECK_SKB_FIELD(vlan_tci);
1399	CHECK_SKB_FIELD(transport_header);
1400	CHECK_SKB_FIELD(network_header);
1401	CHECK_SKB_FIELD(mac_header);
1402	CHECK_SKB_FIELD(inner_protocol);
1403	CHECK_SKB_FIELD(inner_transport_header);
1404	CHECK_SKB_FIELD(inner_network_header);
1405	CHECK_SKB_FIELD(inner_mac_header);
1406	CHECK_SKB_FIELD(mark);
1407	#ifdef CONFIG_NETWORK_SECMARK
1408	CHECK_SKB_FIELD(secmark);
1409	#endif
1410	#ifdef CONFIG_NET_RX_BUSY_POLL
1411	CHECK_SKB_FIELD(napi_id);
1412	#endif
1413	CHECK_SKB_FIELD(alloc_cpu);
1414	#ifdef CONFIG_XPS
1415	CHECK_SKB_FIELD(sender_cpu);
1416	#endif
1417	#ifdef CONFIG_NET_SCHED
1418	CHECK_SKB_FIELD(tc_index);
1419	#endif
1420
1421	}
1422
1423	/*
1424	* You should not add any new code to this function. Add it to
1425	* __copy_skb_header above instead.
1426	*/
1427	static struct sk_buff __skb_clone(struct* sk_buff n, struct* sk_buff *skb)
1428	{
1429	#define C(x) n->x = skb->x
1430
1431	n->next = n->prev = NULL;
1432	n->sk = NULL;
1433	__copy_skb_header(new: n, old: skb);
1434
1435	C(len);
1436	C(data_len);
1437	C(mac_len);
1438	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
1439	n->cloned = `1`;
1440	n->nohdr = `0`;
1441	n->peeked = `0`;
1442	C(pfmemalloc);
1443	C(pp_recycle);
1444	n->destructor = NULL;
1445	C(tail);
1446	C(end);
1447	C(head);
1448	C(head_frag);
1449	C(data);
1450	C(truesize);
1451	refcount_set(r: &n->users, n: `1`);
1452
1453	atomic_inc(v: &(skb_shinfo(skb)->dataref));
1454	skb->cloned = `1`;
1455
1456	return n;
1457	#undef C
1458	}
1459
1460	/**
1461	* alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1462	* @first: first sk_buff of the msg
1463	*/
1464	struct sk_buff alloc_skb_for_msg(struct* sk_buff *first)
1465	{
1466	struct sk_buff *n;
1467
1468	n = alloc_skb(size: `0`, GFP_ATOMIC);
1469	if (!n)
1470	return NULL;
1471
1472	n->len = first->len;
1473	n->data_len = first->len;
1474	n->truesize = first->truesize;
1475
1476	skb_shinfo(n)->frag_list = first;
1477
1478	__copy_skb_header(new: n, old: first);
1479	n->destructor = NULL;
1480
1481	return n;
1482	}
1483	EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
1484
1485	/**
1486	* skb_morph - morph one skb into another
1487	* @dst: the skb to receive the contents
1488	* @src: the skb to supply the contents
1489	*
1490	* This is identical to skb_clone except that the target skb is
1491	* supplied by the user.
1492	*
1493	* The target skb is returned upon exit.
1494	*/
1495	struct sk_buff skb_morph(struct* sk_buff dst, struct* sk_buff *src)
1496	{
1497	skb_release_all(skb: dst, reason: SKB_CONSUMED, napi_safe: false);
1498	return __skb_clone(n: dst, skb: src);
1499	}
1500	EXPORT_SYMBOL_GPL(skb_morph);
1501
1502	int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
1503	{
1504	unsigned long max_pg, num_pg, new_pg, old_pg, rlim;
1505	struct user_struct *user;
1506
1507	if (capable(CAP_IPC_LOCK) \|\| !size)
1508	return `0`;
1509
1510	rlim = rlimit(RLIMIT_MEMLOCK);
1511	if (rlim == RLIM_INFINITY)
1512	return `0`;
1513
1514	num_pg = (size >> PAGE_SHIFT) + `2`; / worst case /
1515	max_pg = rlim >> PAGE_SHIFT;
1516	user = mmp->user ? : current_user();
1517
1518	old_pg = atomic_long_read(v: &user->locked_vm);
1519	do {
1520	new_pg = old_pg + num_pg;
1521	if (new_pg > max_pg)
1522	return -ENOBUFS;
1523	} while (!atomic_long_try_cmpxchg(v: &user->locked_vm, old: &old_pg, new: new_pg));
1524
1525	if (!mmp->user) {
1526	mmp->user = get_uid(u: user);
1527	mmp->num_pg = num_pg;
1528	} else {
1529	mmp->num_pg += num_pg;
1530	}
1531
1532	return `0`;
1533	}
1534	EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
1535
1536	void mm_unaccount_pinned_pages(struct mmpin *mmp)
1537	{
1538	if (mmp->user) {
1539	atomic_long_sub(i: mmp->num_pg, v: &mmp->user->locked_vm);
1540	free_uid(mmp->user);
1541	}
1542	}
1543	EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
1544
1545	static struct ubuf_info msg_zerocopy_alloc(struct* sock *sk, size_t size)
1546	{
1547	struct ubuf_info_msgzc *uarg;
1548	struct sk_buff *skb;
1549
1550	WARN_ON_ONCE(!in_task());
1551
1552	skb = sock_omalloc(sk, size: `0`, GFP_KERNEL);
1553	if (!skb)
1554	return NULL;
1555
1556	BUILD_BUG_ON(sizeof(uarg) > sizeof*(skb->cb));
1557	uarg = (void *)skb->cb;
1558	uarg->mmp.user = NULL;
1559
1560	if (mm_account_pinned_pages(&uarg->mmp, size)) {
1561	kfree_skb(skb);
1562	return NULL;
1563	}
1564
1565	uarg->ubuf.callback = msg_zerocopy_callback;
1566	uarg->id = ((u32)atomic_inc_return(v: &sk->sk_zckey)) - `1`;
1567	uarg->len = `1`;
1568	uarg->bytelen = size;
1569	uarg->zerocopy = `1`;
1570	uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG \| SKBFL_DONT_ORPHAN;
1571	refcount_set(r: &uarg->ubuf.refcnt, n: `1`);
1572	sock_hold(sk);
1573
1574	return &uarg->ubuf;
1575	}
1576
1577	static inline struct sk_buff skb_from_uarg(struct* ubuf_info_msgzc *uarg)
1578	{
1579	return container_of((void )uarg, struct* sk_buff, cb);
1580	}
1581
1582	struct ubuf_info msg_zerocopy_realloc(struct* sock *sk, size_t size,
1583	struct ubuf_info *uarg)
1584	{
1585	if (uarg) {
1586	struct ubuf_info_msgzc *uarg_zc;
1587	const u32 byte_limit = `1` << `19`; / limit to a few TSO /
1588	u32 bytelen, next;
1589
1590	/ there might be non MSG_ZEROCOPY users /
1591	if (uarg->callback != msg_zerocopy_callback)
1592	return NULL;
1593
1594	/ realloc only when socket is locked (TCP, UDP cork),*
1595	* so uarg->len and sk_zckey access is serialized
1596	*/
1597	if (!sock_owned_by_user(sk)) {
1598	WARN_ON_ONCE(`1`);
1599	return NULL;
1600	}
1601
1602	uarg_zc = uarg_to_msgzc(uarg);
1603	bytelen = uarg_zc->bytelen + size;
1604	if (uarg_zc->len == USHRT_MAX - `1` \|\| bytelen > byte_limit) {
1605	/ TCP can create new skb to attach new uarg /
1606	if (sk->sk_type == SOCK_STREAM)
1607	goto new_alloc;
1608	return NULL;
1609	}
1610
1611	next = (u32)atomic_read(v: &sk->sk_zckey);
1612	if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
1613	if (mm_account_pinned_pages(&uarg_zc->mmp, size))
1614	return NULL;
1615	uarg_zc->len++;
1616	uarg_zc->bytelen = bytelen;
1617	atomic_set(v: &sk->sk_zckey, i: ++next);
1618
1619	/ no extra ref when appending to datagram (MSG_MORE) /
1620	if (sk->sk_type == SOCK_STREAM)
1621	net_zcopy_get(uarg);
1622
1623	return uarg;
1624	}
1625	}
1626
1627	new_alloc:
1628	return msg_zerocopy_alloc(sk, size);
1629	}
1630	EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
1631
1632	static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
1633	{
1634	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
1635	u32 old_lo, old_hi;
1636	u64 sum_len;
1637
1638	old_lo = serr->ee.ee_info;
1639	old_hi = serr->ee.ee_data;
1640	sum_len = old_hi - old_lo + `1ULL` + len;
1641
1642	if (sum_len >= (`1ULL` << `32`))
1643	return false;
1644
1645	if (lo != old_hi + `1`)
1646	return false;
1647
1648	serr->ee.ee_data += len;
1649	return true;
1650	}
1651
1652	static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
1653	{
1654	struct sk_buff tail, skb = skb_from_uarg(uarg);
1655	struct sock_exterr_skb *serr;
1656	struct sock *sk = skb->sk;
1657	struct sk_buff_head *q;
1658	unsigned long flags;
1659	bool is_zerocopy;
1660	u32 lo, hi;
1661	u16 len;
1662
1663	mm_unaccount_pinned_pages(&uarg->mmp);
1664
1665	/ if !len, there was only 1 call, and it was aborted*
1666	* so do not queue a completion notification
1667	*/
1668	if (!uarg->len \|\| sock_flag(sk, flag: SOCK_DEAD))
1669	goto release;
1670
1671	len = uarg->len;
1672	lo = uarg->id;
1673	hi = uarg->id + len - `1`;
1674	is_zerocopy = uarg->zerocopy;
1675
1676	serr = SKB_EXT_ERR(skb);
1677	memset(serr, `0`, sizeof(*serr));
1678	serr->ee.ee_errno = `0`;
1679	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
1680	serr->ee.ee_data = hi;
1681	serr->ee.ee_info = lo;
1682	if (!is_zerocopy)
1683	serr->ee.ee_code \|= SO_EE_CODE_ZEROCOPY_COPIED;
1684
1685	q = &sk->sk_error_queue;
1686	spin_lock_irqsave(&q->lock, flags);
1687	tail = skb_peek_tail(list_: q);
1688	if (!tail \|\| SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY \|\|
1689	!skb_zerocopy_notify_extend(skb: tail, lo, len)) {
1690	__skb_queue_tail(list: q, newsk: skb);
1691	skb = NULL;
1692	}
1693	spin_unlock_irqrestore(lock: &q->lock, flags);
1694
1695	sk_error_report(sk);
1696
1697	release:
1698	consume_skb(skb);
1699	sock_put(sk);
1700	}
1701
1702	void msg_zerocopy_callback(struct sk_buff skb, struct* ubuf_info *uarg,
1703	bool success)
1704	{
1705	struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
1706
1707	uarg_zc->zerocopy = uarg_zc->zerocopy & success;
1708
1709	if (refcount_dec_and_test(r: &uarg->refcnt))
1710	__msg_zerocopy_callback(uarg: uarg_zc);
1711	}
1712	EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
1713
1714	void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
1715	{
1716	struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
1717
1718	atomic_dec(v: &sk->sk_zckey);
1719	uarg_to_msgzc(uarg)->len--;
1720
1721	if (have_uref)
1722	msg_zerocopy_callback(NULL, uarg, true);
1723	}
1724	EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
1725
1726	int skb_zerocopy_iter_stream(struct sock sk, struct* sk_buff *skb,
1727	struct msghdr msg, int* len,
1728	struct ubuf_info *uarg)
1729	{
1730	struct ubuf_info *orig_uarg = skb_zcopy(skb);
1731	int err, orig_len = skb->len;
1732
1733	/ An skb can only point to one uarg. This edge case happens when*
1734	* TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
1735	*/
1736	if (orig_uarg && uarg != orig_uarg)
1737	return -EEXIST;
1738
1739	err = __zerocopy_sg_from_iter(msg, sk, skb, from: &msg->msg_iter, length: len);
1740	if (err == -EFAULT \|\| (err == -EMSGSIZE && skb->len == orig_len)) {
1741	struct sock *save_sk = skb->sk;
1742
1743	/ Streams do not free skb on error. Reset to prev state. /
1744	iov_iter_revert(i: &msg->msg_iter, bytes: skb->len - orig_len);
1745	skb->sk = sk;
1746	___pskb_trim(skb, len: orig_len);
1747	skb->sk = save_sk;
1748	return err;
1749	}
1750
1751	skb_zcopy_set(skb, uarg, NULL);
1752	return skb->len - orig_len;
1753	}
1754	EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
1755
1756	void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
1757	{
1758	int i;
1759
1760	skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
1761	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
1762	skb_frag_ref(skb, f: i);
1763	}
1764	EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);
1765
1766	static int skb_zerocopy_clone(struct sk_buff nskb, struct* sk_buff *orig,
1767	gfp_t gfp_mask)
1768	{
1769	if (skb_zcopy(skb: orig)) {
1770	if (skb_zcopy(skb: nskb)) {
1771	/ !gfp_mask callers are verified to !skb_zcopy(nskb) /
1772	if (!gfp_mask) {
1773	WARN_ON_ONCE(`1`);
1774	return -ENOMEM;
1775	}
1776	if (skb_uarg(nskb) == skb_uarg(orig))
1777	return `0`;
1778	if (skb_copy_ubufs(skb: nskb, GFP_ATOMIC))
1779	return -EIO;
1780	}
1781	skb_zcopy_set(skb: nskb, skb_uarg(orig), NULL);
1782	}
1783	return `0`;
1784	}
1785
1786	/**
1787	* skb_copy_ubufs - copy userspace skb frags buffers to kernel
1788	* @skb: the skb to modify
1789	* @gfp_mask: allocation priority
1790	*
1791	* This must be called on skb with SKBFL_ZEROCOPY_ENABLE.
1792	* It will copy all frags into kernel and drop the reference
1793	* to userspace pages.
1794	*
1795	* If this function is called from an interrupt gfp_mask() must be
1796	* %GFP_ATOMIC.
1797	*
1798	* Returns 0 on success or a negative error code on failure
1799	* to allocate kernel memory to copy to.
1800	*/
1801	int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
1802	{
1803	int num_frags = skb_shinfo(skb)->nr_frags;
1804	struct page page, head = NULL;
1805	int i, order, psize, new_frags;
1806	u32 d_off;
1807
1808	if (skb_shared(skb) \|\| skb_unclone(skb, pri: gfp_mask))
1809	return -EINVAL;
1810
1811	if (!num_frags)
1812	goto release;
1813
1814	/ We might have to allocate high order pages, so compute what minimum*
1815	* page order is needed.
1816	*/
1817	order = `0`;
1818	while ((PAGE_SIZE << order) * MAX_SKB_FRAGS < __skb_pagelen(skb))
1819	order++;
1820	psize = (PAGE_SIZE << order);
1821
1822	new_frags = (__skb_pagelen(skb) + psize - `1`) >> (PAGE_SHIFT + order);
1823	for (i = `0`; i < new_frags; i++) {
1824	page = alloc_pages(gfp: gfp_mask \| __GFP_COMP, order);
1825	if (!page) {
1826	while (head) {
1827	struct page next = (struct* page *)page_private(head);
1828	put_page(page: head);
1829	head = next;
1830	}
1831	return -ENOMEM;
1832	}
1833	set_page_private(page, private: (unsigned long)head);
1834	head = page;
1835	}
1836
1837	page = head;
1838	d_off = `0`;
1839	for (i = `0`; i < num_frags; i++) {
1840	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
1841	u32 p_off, p_len, copied;
1842	struct page *p;
1843	u8 *vaddr;
1844
1845	skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
1846	p, p_off, p_len, copied) {
1847	u32 copy, done = `0`;
1848	vaddr = kmap_atomic(page: p);
1849
1850	while (done < p_len) {
1851	if (d_off == psize) {
1852	d_off = `0`;
1853	page = (struct page *)page_private(page);
1854	}
1855	copy = min_t(u32, psize - d_off, p_len - done);
1856	memcpy(page_address(page) + d_off,
1857	vaddr + p_off + done, copy);
1858	done += copy;
1859	d_off += copy;
1860	}
1861	kunmap_atomic(vaddr);
1862	}
1863	}
1864
1865	/ skb frags release userspace buffers /
1866	for (i = `0`; i < num_frags; i++)
1867	skb_frag_unref(skb, f: i);
1868
1869	/ skb frags point to kernel buffers /
1870	for (i = `0`; i < new_frags - `1`; i++) {
1871	__skb_fill_page_desc(skb, i, page: head, off: `0`, size: psize);
1872	head = (struct page *)page_private(head);
1873	}
1874	__skb_fill_page_desc(skb, i: new_frags - `1`, page: head, off: `0`, size: d_off);
1875	skb_shinfo(skb)->nr_frags = new_frags;
1876
1877	release:
1878	skb_zcopy_clear(skb, zerocopy_success: false);
1879	return `0`;
1880	}
1881	EXPORT_SYMBOL_GPL(skb_copy_ubufs);
1882
1883	/**
1884	* skb_clone - duplicate an sk_buff
1885	* @skb: buffer to clone
1886	* @gfp_mask: allocation priority
1887	*
1888	* Duplicate an &sk_buff. The new one is not owned by a socket. Both
1889	* copies share the same packet data but not structure. The new
1890	* buffer has a reference count of 1. If the allocation fails the
1891	* function returns %NULL otherwise the new buffer is returned.
1892	*
1893	* If this function is called from an interrupt gfp_mask() must be
1894	* %GFP_ATOMIC.
1895	*/
1896
1897	struct sk_buff skb_clone(struct* sk_buff *skb, gfp_t gfp_mask)
1898	{
1899	struct sk_buff_fclones *fclones = container_of(skb,
1900	struct sk_buff_fclones,
1901	skb1);
1902	struct sk_buff *n;
1903
1904	if (skb_orphan_frags(skb, gfp_mask))
1905	return NULL;
1906
1907	if (skb->fclone == SKB_FCLONE_ORIG &&
1908	refcount_read(r: &fclones->fclone_ref) == `1`) {
1909	n = &fclones->skb2;
1910	refcount_set(r: &fclones->fclone_ref, n: `2`);
1911	n->fclone = SKB_FCLONE_CLONE;
1912	} else {
1913	if (skb_pfmemalloc(skb))
1914	gfp_mask \|= __GFP_MEMALLOC;
1915
1916	n = kmem_cache_alloc(cachep: skbuff_cache, flags: gfp_mask);
1917	if (!n)
1918	return NULL;
1919
1920	n->fclone = SKB_FCLONE_UNAVAILABLE;
1921	}
1922
1923	return __skb_clone(n, skb);
1924	}
1925	EXPORT_SYMBOL(skb_clone);
1926
1927	void skb_headers_offset_update(struct sk_buff skb, int* off)
1928	{
1929	/ Only adjust this if it actually is csum_start rather than csum /
1930	if (skb->ip_summed == CHECKSUM_PARTIAL)
1931	skb->csum_start += off;
1932	/ {transport,network,mac}_header and tail are relative to skb->head /
1933	skb->transport_header += off;
1934	skb->network_header += off;
1935	if (skb_mac_header_was_set(skb))
1936	skb->mac_header += off;
1937	skb->inner_transport_header += off;
1938	skb->inner_network_header += off;
1939	skb->inner_mac_header += off;
1940	}
1941	EXPORT_SYMBOL(skb_headers_offset_update);
1942
1943	void skb_copy_header(struct sk_buff new, const* struct sk_buff *old)
1944	{
1945	__copy_skb_header(new, old);
1946
1947	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
1948	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
1949	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
1950	}
1951	EXPORT_SYMBOL(skb_copy_header);
1952
1953	static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
1954	{
1955	if (skb_pfmemalloc(skb))
1956	return SKB_ALLOC_RX;
1957	return `0`;
1958	}
1959
1960	/**
1961	* skb_copy - create private copy of an sk_buff
1962	* @skb: buffer to copy
1963	* @gfp_mask: allocation priority
1964	*
1965	* Make a copy of both an &sk_buff and its data. This is used when the
1966	* caller wishes to modify the data and needs a private copy of the
1967	* data to alter. Returns %NULL on failure or the pointer to the buffer
1968	* on success. The returned buffer has a reference count of 1.
1969	*
1970	* As by-product this function converts non-linear &sk_buff to linear
1971	* one, so that &sk_buff becomes completely private and caller is allowed
1972	* to modify all the data of returned buffer. This means that this
1973	* function is not recommended for use in circumstances when only
1974	* header is going to be modified. Use pskb_copy() instead.
1975	*/
1976
1977	struct sk_buff skb_copy(const* struct sk_buff *skb, gfp_t gfp_mask)
1978	{
1979	int headerlen = skb_headroom(skb);
1980	unsigned int size = skb_end_offset(skb) + skb->data_len;
1981	struct sk_buff *n = __alloc_skb(size, gfp_mask,
1982	skb_alloc_rx_flag(skb), NUMA_NO_NODE);
1983
1984	if (!n)
1985	return NULL;
1986
1987	/ Set the data pointer /
1988	skb_reserve(skb: n, len: headerlen);
1989	/ Set the tail pointer and length /
1990	skb_put(skb: n, len: skb->len);
1991
1992	BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
1993
1994	skb_copy_header(n, skb);
1995	return n;
1996	}
1997	EXPORT_SYMBOL(skb_copy);
1998
1999	/**
2000	* __pskb_copy_fclone - create copy of an sk_buff with private head.
2001	* @skb: buffer to copy
2002	* @headroom: headroom of new skb
2003	* @gfp_mask: allocation priority
2004	* @fclone: if true allocate the copy of the skb from the fclone
2005	* cache instead of the head cache; it is recommended to set this
2006	* to true for the cases where the copy will likely be cloned
2007	*
2008	* Make a copy of both an &sk_buff and part of its data, located
2009	* in header. Fragmented data remain shared. This is used when
2010	* the caller wishes to modify only header of &sk_buff and needs
2011	* private copy of the header to alter. Returns %NULL on failure
2012	* or the pointer to the buffer on success.
2013	* The returned buffer has a reference count of 1.
2014	*/
2015
2016	struct sk_buff __pskb_copy_fclone(struct* sk_buff skb, int* headroom,
2017	gfp_t gfp_mask, bool fclone)
2018	{
2019	unsigned int size = skb_headlen(skb) + headroom;
2020	int flags = skb_alloc_rx_flag(skb) \| (fclone ? SKB_ALLOC_FCLONE : `0`);
2021	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
2022
2023	if (!n)
2024	goto out;
2025
2026	/ Set the data pointer /
2027	skb_reserve(skb: n, len: headroom);
2028	/ Set the tail pointer and length /
2029	skb_put(skb: n, len: skb_headlen(skb));
2030	/ Copy the bytes /
2031	skb_copy_from_linear_data(skb, to: n->data, len: n->len);
2032
2033	n->truesize += skb->data_len;
2034	n->data_len = skb->data_len;
2035	n->len = skb->len;
2036
2037	if (skb_shinfo(skb)->nr_frags) {
2038	int i;
2039
2040	if (skb_orphan_frags(skb, gfp_mask) \|\|
2041	skb_zerocopy_clone(nskb: n, orig: skb, gfp_mask)) {
2042	kfree_skb(skb: n);
2043	n = NULL;
2044	goto out;
2045	}
2046	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2047	skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
2048	skb_frag_ref(skb, f: i);
2049	}
2050	skb_shinfo(n)->nr_frags = i;
2051	}
2052
2053	if (skb_has_frag_list(skb)) {
2054	skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
2055	skb_clone_fraglist(skb: n);
2056	}
2057
2058	skb_copy_header(n, skb);
2059	out:
2060	return n;
2061	}
2062	EXPORT_SYMBOL(__pskb_copy_fclone);
2063
2064	/**
2065	* pskb_expand_head - reallocate header of &sk_buff
2066	* @skb: buffer to reallocate
2067	* @nhead: room to add at head
2068	* @ntail: room to add at tail
2069	* @gfp_mask: allocation priority
2070	*
2071	* Expands (or creates identical copy, if @nhead and @ntail are zero)
2072	* header of @skb. &sk_buff itself is not changed. &sk_buff MUST have
2073	* reference count of 1. Returns zero in the case of success or error,
2074	* if expansion failed. In the last case, &sk_buff is not changed.
2075	*
2076	* All the pointers pointing into skb header may change and must be
2077	* reloaded after call to this function.
2078	*/
2079
2080	int pskb_expand_head(struct sk_buff skb, int* nhead, int ntail,
2081	gfp_t gfp_mask)
2082	{
2083	unsigned int osize = skb_end_offset(skb);
2084	unsigned int size = osize + nhead + ntail;
2085	long off;
2086	u8 *data;
2087	int i;
2088
2089	BUG_ON(nhead < `0`);
2090
2091	BUG_ON(skb_shared(skb));
2092
2093	skb_zcopy_downgrade_managed(skb);
2094
2095	if (skb_pfmemalloc(skb))
2096	gfp_mask \|= __GFP_MEMALLOC;
2097
2098	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
2099	if (!data)
2100	goto nodata;
2101	size = SKB_WITH_OVERHEAD(size);
2102
2103	/ Copy only real data... and, alas, header. This should be*
2104	* optimized for the cases when header is void.
2105	*/
2106	memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
2107
2108	memcpy((struct skb_shared_info *)(data + size),
2109	skb_shinfo(skb),
2110	offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
2111
2112	/*
2113	* if shinfo is shared we must drop the old head gracefully, but if it
2114	* is not we can just drop the old head and let the existing refcount
2115	* be since all we did is relocate the values
2116	*/
2117	if (skb_cloned(skb)) {
2118	if (skb_orphan_frags(skb, gfp_mask))
2119	goto nofrags;
2120	if (skb_zcopy(skb))
2121	refcount_inc(r: &skb_uarg(skb)->refcnt);
2122	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
2123	skb_frag_ref(skb, f: i);
2124
2125	if (skb_has_frag_list(skb))
2126	skb_clone_fraglist(skb);
2127
2128	skb_release_data(skb, reason: SKB_CONSUMED, napi_safe: false);
2129	} else {
2130	skb_free_head(skb, napi_safe: false);
2131	}
2132	off = (data + nhead) - skb->head;
2133
2134	skb->head = data;
2135	skb->head_frag = `0`;
2136	skb->data += off;
2137
2138	skb_set_end_offset(skb, offset: size);
2139	#ifdef NET_SKBUFF_DATA_USES_OFFSET
2140	off = nhead;
2141	#endif
2142	skb->tail += off;
2143	skb_headers_offset_update(skb, nhead);
2144	skb->cloned = `0`;
2145	skb->hdr_len = `0`;
2146	skb->nohdr = `0`;
2147	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
2148
2149	skb_metadata_clear(skb);
2150
2151	/ It is not generally safe to change skb->truesize.*
2152	* For the moment, we really care of rx path, or
2153	* when skb is orphaned (not attached to a socket).
2154	*/
2155	if (!skb->sk \|\| skb->destructor == sock_edemux)
2156	skb->truesize += size - osize;
2157
2158	return `0`;
2159
2160	nofrags:
2161	skb_kfree_head(head: data, end_offset: size);
2162	nodata:
2163	return -ENOMEM;
2164	}
2165	EXPORT_SYMBOL(pskb_expand_head);
2166
2167	/ Make private copy of skb with writable head and some headroom /
2168
2169	struct sk_buff skb_realloc_headroom(struct* sk_buff skb, unsigned* int headroom)
2170	{
2171	struct sk_buff *skb2;
2172	int delta = headroom - skb_headroom(skb);
2173
2174	if (delta <= `0`)
2175	skb2 = pskb_copy(skb, GFP_ATOMIC);
2176	else {
2177	skb2 = skb_clone(skb, GFP_ATOMIC);
2178	if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), `0`,
2179	GFP_ATOMIC)) {
2180	kfree_skb(skb: skb2);
2181	skb2 = NULL;
2182	}
2183	}
2184	return skb2;
2185	}
2186	EXPORT_SYMBOL(skb_realloc_headroom);
2187
2188	/ Note: We plan to rework this in linux-6.4 /
2189	int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
2190	{
2191	unsigned int saved_end_offset, saved_truesize;
2192	struct skb_shared_info *shinfo;
2193	int res;
2194
2195	saved_end_offset = skb_end_offset(skb);
2196	saved_truesize = skb->truesize;
2197
2198	res = pskb_expand_head(skb, `0`, `0`, pri);
2199	if (res)
2200	return res;
2201
2202	skb->truesize = saved_truesize;
2203
2204	if (likely(skb_end_offset(skb) == saved_end_offset))
2205	return `0`;
2206
2207	/ We can not change skb->end if the original or new value*
2208	* is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
2209	*/
2210	if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM \|\|
2211	skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) {
2212	/ We think this path should not be taken.*
2213	* Add a temporary trace to warn us just in case.
2214	*/
2215	pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n",
2216	saved_end_offset, skb_end_offset(skb));
2217	WARN_ON_ONCE(`1`);
2218	return `0`;
2219	}
2220
2221	shinfo = skb_shinfo(skb);
2222
2223	/ We are about to change back skb->end,*
2224	* we need to move skb_shinfo() to its new location.
2225	*/
2226	memmove(skb->head + saved_end_offset,
2227	shinfo,
2228	offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
2229
2230	skb_set_end_offset(skb, offset: saved_end_offset);
2231
2232	return `0`;
2233	}
2234
2235	/**
2236	* skb_expand_head - reallocate header of &sk_buff
2237	* @skb: buffer to reallocate
2238	* @headroom: needed headroom
2239	*
2240	* Unlike skb_realloc_headroom, this one does not allocate a new skb
2241	* if possible; copies skb->sk to new skb as needed
2242	* and frees original skb in case of failures.
2243	*
2244	* It expect increased headroom and generates warning otherwise.
2245	*/
2246
2247	struct sk_buff skb_expand_head(struct* sk_buff skb, unsigned* int headroom)
2248	{
2249	int delta = headroom - skb_headroom(skb);
2250	int osize = skb_end_offset(skb);
2251	struct sock *sk = skb->sk;
2252
2253	if (WARN_ONCE(delta <= `0`,
2254	"%s is expecting an increase in the headroom", __func__))
2255	return skb;
2256
2257	delta = SKB_DATA_ALIGN(delta);
2258	/ pskb_expand_head() might crash, if skb is shared. /
2259	if (skb_shared(skb) \|\| !is_skb_wmem(skb)) {
2260	struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2261
2262	if (unlikely(!nskb))
2263	goto fail;
2264
2265	if (sk)
2266	skb_set_owner_w(skb: nskb, sk);
2267	consume_skb(skb);
2268	skb = nskb;
2269	}
2270	if (pskb_expand_head(skb, delta, `0`, GFP_ATOMIC))
2271	goto fail;
2272
2273	if (sk && is_skb_wmem(skb)) {
2274	delta = skb_end_offset(skb) - osize;
2275	refcount_add(i: delta, r: &sk->sk_wmem_alloc);
2276	skb->truesize += delta;
2277	}
2278	return skb;
2279
2280	fail:
2281	kfree_skb(skb);
2282	return NULL;
2283	}
2284	EXPORT_SYMBOL(skb_expand_head);
2285
2286	/**
2287	* skb_copy_expand - copy and expand sk_buff
2288	* @skb: buffer to copy
2289	* @newheadroom: new free bytes at head
2290	* @newtailroom: new free bytes at tail
2291	* @gfp_mask: allocation priority
2292	*
2293	* Make a copy of both an &sk_buff and its data and while doing so
2294	* allocate additional space.
2295	*
2296	* This is used when the caller wishes to modify the data and needs a
2297	* private copy of the data to alter as well as more space for new fields.
2298	* Returns %NULL on failure or the pointer to the buffer
2299	* on success. The returned buffer has a reference count of 1.
2300	*
2301	* You must pass %GFP_ATOMIC as the allocation priority if this function
2302	* is called from an interrupt.
2303	*/
2304	struct sk_buff skb_copy_expand(const* struct sk_buff *skb,
2305	int newheadroom, int newtailroom,
2306	gfp_t gfp_mask)
2307	{
2308	/*
2309	* Allocate the copy buffer
2310	*/
2311	struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
2312	gfp_mask, skb_alloc_rx_flag(skb),
2313	NUMA_NO_NODE);
2314	int oldheadroom = skb_headroom(skb);
2315	int head_copy_len, head_copy_off;
2316
2317	if (!n)
2318	return NULL;
2319
2320	skb_reserve(skb: n, len: newheadroom);
2321
2322	/ Set the tail pointer and length /
2323	skb_put(skb: n, len: skb->len);
2324
2325	head_copy_len = oldheadroom;
2326	head_copy_off = `0`;
2327	if (newheadroom <= head_copy_len)
2328	head_copy_len = newheadroom;
2329	else
2330	head_copy_off = newheadroom - head_copy_len;
2331
2332	/ Copy the linear header and data. /
2333	BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
2334	skb->len + head_copy_len));
2335
2336	skb_copy_header(n, skb);
2337
2338	skb_headers_offset_update(n, newheadroom - oldheadroom);
2339
2340	return n;
2341	}
2342	EXPORT_SYMBOL(skb_copy_expand);
2343
2344	/**
2345	* __skb_pad - zero pad the tail of an skb
2346	* @skb: buffer to pad
2347	* @pad: space to pad
2348	* @free_on_error: free buffer on error
2349	*
2350	* Ensure that a buffer is followed by a padding area that is zero
2351	* filled. Used by network drivers which may DMA or transfer data
2352	* beyond the buffer end onto the wire.
2353	*
2354	* May return error in out of memory cases. The skb is freed on error
2355	* if @free_on_error is true.
2356	*/
2357
2358	int __skb_pad(struct sk_buff skb, int* pad, bool free_on_error)
2359	{
2360	int err;
2361	int ntail;
2362
2363	/ If the skbuff is non linear tailroom is always zero.. /
2364	if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
2365	memset(skb->data+skb->len, `0`, pad);
2366	return `0`;
2367	}
2368
2369	ntail = skb->data_len + pad - (skb->end - skb->tail);
2370	if (likely(skb_cloned(skb) \|\| ntail > `0`)) {
2371	err = pskb_expand_head(skb, `0`, ntail, GFP_ATOMIC);
2372	if (unlikely(err))
2373	goto free_skb;
2374	}
2375
2376	/ FIXME: The use of this function with non-linear skb's really needs*
2377	* to be audited.
2378	*/
2379	err = skb_linearize(skb);
2380	if (unlikely(err))
2381	goto free_skb;
2382
2383	memset(skb->data + skb->len, `0`, pad);
2384	return `0`;
2385
2386	free_skb:
2387	if (free_on_error)
2388	kfree_skb(skb);
2389	return err;
2390	}
2391	EXPORT_SYMBOL(__skb_pad);
2392
2393	/**
2394	* pskb_put - add data to the tail of a potentially fragmented buffer
2395	* @skb: start of the buffer to use
2396	* @tail: tail fragment of the buffer to use
2397	* @len: amount of data to add
2398	*
2399	* This function extends the used data area of the potentially
2400	* fragmented buffer. @tail must be the last fragment of @skb -- or
2401	* @skb itself. If this would exceed the total buffer size the kernel
2402	* will panic. A pointer to the first byte of the extra data is
2403	* returned.
2404	*/
2405
2406	void pskb_put(struct* sk_buff skb, struct* sk_buff tail, int* len)
2407	{
2408	if (tail != skb) {
2409	skb->data_len += len;
2410	skb->len += len;
2411	}
2412	return skb_put(skb: tail, len);
2413	}
2414	EXPORT_SYMBOL_GPL(pskb_put);
2415
2416	/**
2417	* skb_put - add data to a buffer
2418	* @skb: buffer to use
2419	* @len: amount of data to add
2420	*
2421	* This function extends the used data area of the buffer. If this would
2422	* exceed the total buffer size the kernel will panic. A pointer to the
2423	* first byte of the extra data is returned.
2424	*/
2425	void skb_put(struct* sk_buff skb, unsigned* int len)
2426	{
2427	void *tmp = skb_tail_pointer(skb);
2428	SKB_LINEAR_ASSERT(skb);
2429	skb->tail += len;
2430	skb->len += len;
2431	if (unlikely(skb->tail > skb->end))
2432	skb_over_panic(skb, sz: len, addr: __builtin_return_address(`0`));
2433	return tmp;
2434	}
2435	EXPORT_SYMBOL(skb_put);
2436
2437	/**
2438	* skb_push - add data to the start of a buffer
2439	* @skb: buffer to use
2440	* @len: amount of data to add
2441	*
2442	* This function extends the used data area of the buffer at the buffer
2443	* start. If this would exceed the total buffer headroom the kernel will
2444	* panic. A pointer to the first byte of the extra data is returned.
2445	*/
2446	void skb_push(struct* sk_buff skb, unsigned* int len)
2447	{
2448	skb->data -= len;
2449	skb->len += len;
2450	if (unlikely(skb->data < skb->head))
2451	skb_under_panic(skb, sz: len, addr: __builtin_return_address(`0`));
2452	return skb->data;
2453	}
2454	EXPORT_SYMBOL(skb_push);
2455
2456	/**
2457	* skb_pull - remove data from the start of a buffer
2458	* @skb: buffer to use
2459	* @len: amount of data to remove
2460	*
2461	* This function removes data from the start of a buffer, returning
2462	* the memory to the headroom. A pointer to the next data in the buffer
2463	* is returned. Once the data has been pulled future pushes will overwrite
2464	* the old data.
2465	*/
2466	void skb_pull(struct* sk_buff skb, unsigned* int len)
2467	{
2468	return skb_pull_inline(skb, len);
2469	}
2470	EXPORT_SYMBOL(skb_pull);
2471
2472	/**
2473	* skb_pull_data - remove data from the start of a buffer returning its
2474	* original position.
2475	* @skb: buffer to use
2476	* @len: amount of data to remove
2477	*
2478	* This function removes data from the start of a buffer, returning
2479	* the memory to the headroom. A pointer to the original data in the buffer
2480	* is returned after checking if there is enough data to pull. Once the
2481	* data has been pulled future pushes will overwrite the old data.
2482	*/
2483	void skb_pull_data(struct* sk_buff *skb, size_t len)
2484	{
2485	void *data = skb->data;
2486
2487	if (skb->len < len)
2488	return NULL;
2489
2490	skb_pull(skb, len);
2491
2492	return data;
2493	}
2494	EXPORT_SYMBOL(skb_pull_data);
2495
2496	/**
2497	* skb_trim - remove end from a buffer
2498	* @skb: buffer to alter
2499	* @len: new length
2500	*
2501	* Cut the length of a buffer down by removing data from the tail. If
2502	* the buffer is already under the length specified it is not modified.
2503	* The skb must be linear.
2504	*/
2505	void skb_trim(struct sk_buff skb, unsigned* int len)
2506	{
2507	if (skb->len > len)
2508	__skb_trim(skb, len);
2509	}
2510	EXPORT_SYMBOL(skb_trim);
2511
2512	/ Trims skb to length len. It can change skb pointers.*
2513	*/
2514
2515	int ___pskb_trim(struct sk_buff skb, unsigned* int len)
2516	{
2517	struct sk_buff **fragp;
2518	struct sk_buff *frag;
2519	int offset = skb_headlen(skb);
2520	int nfrags = skb_shinfo(skb)->nr_frags;
2521	int i;
2522	int err;
2523
2524	if (skb_cloned(skb) &&
2525	unlikely((err = pskb_expand_head(skb, `0`, `0`, GFP_ATOMIC))))
2526	return err;
2527
2528	i = `0`;
2529	if (offset >= len)
2530	goto drop_pages;
2531
2532	for (; i < nfrags; i++) {
2533	int end = offset + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2534
2535	if (end < len) {
2536	offset = end;
2537	continue;
2538	}
2539
2540	skb_frag_size_set(frag: &skb_shinfo(skb)->frags[i++], size: len - offset);
2541
2542	drop_pages:
2543	skb_shinfo(skb)->nr_frags = i;
2544
2545	for (; i < nfrags; i++)
2546	skb_frag_unref(skb, f: i);
2547
2548	if (skb_has_frag_list(skb))
2549	skb_drop_fraglist(skb);
2550	goto done;
2551	}
2552
2553	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
2554	fragp = &frag->next) {
2555	int end = offset + frag->len;
2556
2557	if (skb_shared(skb: frag)) {
2558	struct sk_buff *nfrag;
2559
2560	nfrag = skb_clone(frag, GFP_ATOMIC);
2561	if (unlikely(!nfrag))
2562	return -ENOMEM;
2563
2564	nfrag->next = frag->next;
2565	consume_skb(frag);
2566	frag = nfrag;
2567	*fragp = frag;
2568	}
2569
2570	if (end < len) {
2571	offset = end;
2572	continue;
2573	}
2574
2575	if (end > len &&
2576	unlikely((err = pskb_trim(frag, len - offset))))
2577	return err;
2578
2579	if (frag->next)
2580	skb_drop_list(listp: &frag->next);
2581	break;
2582	}
2583
2584	done:
2585	if (len > skb_headlen(skb)) {
2586	skb->data_len -= skb->len - len;
2587	skb->len = len;
2588	} else {
2589	skb->len = len;
2590	skb->data_len = `0`;
2591	skb_set_tail_pointer(skb, offset: len);
2592	}
2593
2594	if (!skb->sk \|\| skb->destructor == sock_edemux)
2595	skb_condense(skb);
2596	return `0`;
2597	}
2598	EXPORT_SYMBOL(___pskb_trim);
2599
2600	/ Note : use pskb_trim_rcsum() instead of calling this directly*
2601	*/
2602	int pskb_trim_rcsum_slow(struct sk_buff skb, unsigned* int len)
2603	{
2604	if (skb->ip_summed == CHECKSUM_COMPLETE) {
2605	int delta = skb->len - len;
2606
2607	skb->csum = csum_block_sub(csum: skb->csum,
2608	csum2: skb_checksum(skb, offset: len, len: delta, csum: `0`),
2609	offset: len);
2610	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
2611	int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
2612	int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
2613
2614	if (offset + sizeof(__sum16) > hdlen)
2615	return -EINVAL;
2616	}
2617	return __pskb_trim(skb, len);
2618	}
2619	EXPORT_SYMBOL(pskb_trim_rcsum_slow);
2620
2621	/**
2622	* __pskb_pull_tail - advance tail of skb header
2623	* @skb: buffer to reallocate
2624	* @delta: number of bytes to advance tail
2625	*
2626	* The function makes a sense only on a fragmented &sk_buff,
2627	* it expands header moving its tail forward and copying necessary
2628	* data from fragmented part.
2629	*
2630	* &sk_buff MUST have reference count of 1.
2631	*
2632	* Returns %NULL (and &sk_buff does not change) if pull failed
2633	* or value of new tail of skb in the case of success.
2634	*
2635	* All the pointers pointing into skb header may change and must be
2636	* reloaded after call to this function.
2637	*/
2638
2639	/ Moves tail of skb head forward, copying data from fragmented part,*
2640	* when it is necessary.
2641	* 1. It may fail due to malloc failure.
2642	* 2. It may change skb pointers.
2643	*
2644	* It is pretty complicated. Luckily, it is called only in exceptional cases.
2645	*/
2646	void __pskb_pull_tail(struct* sk_buff skb, int* delta)
2647	{
2648	/ If skb has not enough free space at tail, get new one*
2649	* plus 128 bytes for future expansions. If we have enough
2650	* room at tail, reallocate without expansion only if skb is cloned.
2651	*/
2652	int i, k, eat = (skb->tail + delta) - skb->end;
2653
2654	if (eat > `0` \|\| skb_cloned(skb)) {
2655	if (pskb_expand_head(skb, `0`, eat > `0` ? eat + `128` : `0`,
2656	GFP_ATOMIC))
2657	return NULL;
2658	}
2659
2660	BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
2661	skb_tail_pointer(skb), delta));
2662
2663	/ Optimization: no fragments, no reasons to preestimate*
2664	* size of pulled pages. Superb.
2665	*/
2666	if (!skb_has_frag_list(skb))
2667	goto pull_pages;
2668
2669	/ Estimate size of pulled pages. /
2670	eat = delta;
2671	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2672	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2673
2674	if (size >= eat)
2675	goto pull_pages;
2676	eat -= size;
2677	}
2678
2679	/ If we need update frag list, we are in troubles.*
2680	* Certainly, it is possible to add an offset to skb data,
2681	* but taking into account that pulling is expected to
2682	* be very rare operation, it is worth to fight against
2683	* further bloating skb head and crucify ourselves here instead.
2684	* Pure masohism, indeed. 8)8)
2685	*/
2686	if (eat) {
2687	struct sk_buff *list = skb_shinfo(skb)->frag_list;
2688	struct sk_buff *clone = NULL;
2689	struct sk_buff *insp = NULL;
2690
2691	do {
2692	if (list->len <= eat) {
2693	/ Eaten as whole. /
2694	eat -= list->len;
2695	list = list->next;
2696	insp = list;
2697	} else {
2698	/ Eaten partially. /
2699	if (skb_is_gso(skb) && !list->head_frag &&
2700	skb_headlen(skb: list))
2701	skb_shinfo(skb)->gso_type \|= SKB_GSO_DODGY;
2702
2703	if (skb_shared(skb: list)) {
2704	/ Sucks! We need to fork list. :-( /
2705	clone = skb_clone(list, GFP_ATOMIC);
2706	if (!clone)
2707	return NULL;
2708	insp = list->next;
2709	list = clone;
2710	} else {
2711	/ This may be pulled without*
2712	* problems. */
2713	insp = list;
2714	}
2715	if (!pskb_pull(skb: list, len: eat)) {
2716	kfree_skb(skb: clone);
2717	return NULL;
2718	}
2719	break;
2720	}
2721	} while (eat);
2722
2723	/ Free pulled out fragments. /
2724	while ((list = skb_shinfo(skb)->frag_list) != insp) {
2725	skb_shinfo(skb)->frag_list = list->next;
2726	consume_skb(list);
2727	}
2728	/ And insert new clone at head. /
2729	if (clone) {
2730	clone->next = list;
2731	skb_shinfo(skb)->frag_list = clone;
2732	}
2733	}
2734	/ Success! Now we may commit changes to skb data. /
2735
2736	pull_pages:
2737	eat = delta;
2738	k = `0`;
2739	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2740	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
2741
2742	if (size <= eat) {
2743	skb_frag_unref(skb, f: i);
2744	eat -= size;
2745	} else {
2746	skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2747
2748	*frag = skb_shinfo(skb)->frags[i];
2749	if (eat) {
2750	skb_frag_off_add(frag, delta: eat);
2751	skb_frag_size_sub(frag, delta: eat);
2752	if (!i)
2753	goto end;
2754	eat = `0`;
2755	}
2756	k++;
2757	}
2758	}
2759	skb_shinfo(skb)->nr_frags = k;
2760
2761	end:
2762	skb->tail += delta;
2763	skb->data_len -= delta;
2764
2765	if (!skb->data_len)
2766	skb_zcopy_clear(skb, zerocopy_success: false);
2767
2768	return skb_tail_pointer(skb);
2769	}
2770	EXPORT_SYMBOL(__pskb_pull_tail);
2771
2772	/**
2773	* skb_copy_bits - copy bits from skb to kernel buffer
2774	* @skb: source skb
2775	* @offset: offset in source
2776	* @to: destination buffer
2777	* @len: number of bytes to copy
2778	*
2779	* Copy the specified number of bytes from the source skb to the
2780	* destination buffer.
2781	*
2782	* CAUTION ! :
2783	* If its prototype is ever changed,
2784	* check arch/{}/net/{}.S files,
2785	* since it is called from BPF assembly code.
2786	*/
2787	int skb_copy_bits(const struct sk_buff skb, int* offset, void to, int* len)
2788	{
2789	int start = skb_headlen(skb);
2790	struct sk_buff *frag_iter;
2791	int i, copy;
2792
2793	if (offset > (int)skb->len - len)
2794	goto fault;
2795
2796	/ Copy header. /
2797	if ((copy = start - offset) > `0`) {
2798	if (copy > len)
2799	copy = len;
2800	skb_copy_from_linear_data_offset(skb, offset, to, len: copy);
2801	if ((len -= copy) == `0`)
2802	return `0`;
2803	offset += copy;
2804	to += copy;
2805	}
2806
2807	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
2808	int end;
2809	skb_frag_t *f = &skb_shinfo(skb)->frags[i];
2810
2811	WARN_ON(start > offset + len);
2812
2813	end = start + skb_frag_size(frag: f);
2814	if ((copy = end - offset) > `0`) {
2815	u32 p_off, p_len, copied;
2816	struct page *p;
2817	u8 *vaddr;
2818
2819	if (copy > len)
2820	copy = len;
2821
2822	skb_frag_foreach_page(f,
2823	skb_frag_off(f) + offset - start,
2824	copy, p, p_off, p_len, copied) {
2825	vaddr = kmap_atomic(page: p);
2826	memcpy(to + copied, vaddr + p_off, p_len);
2827	kunmap_atomic(vaddr);
2828	}
2829
2830	if ((len -= copy) == `0`)
2831	return `0`;
2832	offset += copy;
2833	to += copy;
2834	}
2835	start = end;
2836	}
2837
2838	skb_walk_frags(skb, frag_iter) {
2839	int end;
2840
2841	WARN_ON(start > offset + len);
2842
2843	end = start + frag_iter->len;
2844	if ((copy = end - offset) > `0`) {
2845	if (copy > len)
2846	copy = len;
2847	if (skb_copy_bits(skb: frag_iter, offset: offset - start, to, len: copy))
2848	goto fault;
2849	if ((len -= copy) == `0`)
2850	return `0`;
2851	offset += copy;
2852	to += copy;
2853	}
2854	start = end;
2855	}
2856
2857	if (!len)
2858	return `0`;
2859
2860	fault:
2861	return -EFAULT;
2862	}
2863	EXPORT_SYMBOL(skb_copy_bits);
2864
2865	/*
2866	* Callback from splice_to_pipe(), if we need to release some pages
2867	* at the end of the spd in case we error'ed out in filling the pipe.
2868	*/
2869	static void sock_spd_release(struct splice_pipe_desc spd, unsigned* int i)
2870	{
2871	put_page(page: spd->pages[i]);
2872	}
2873
2874	static struct page linear_to_page(struct* page page, unsigned* int *len,
2875	unsigned int *offset,
2876	struct sock *sk)
2877	{
2878	struct page_frag *pfrag = sk_page_frag(sk);
2879
2880	if (!sk_page_frag_refill(sk, pfrag))
2881	return NULL;
2882
2883	len = min_t(unsigned* int, *len, pfrag->size - pfrag->offset);
2884
2885	memcpy(page_address(pfrag->page) + pfrag->offset,
2886	page_address(page) + offset, len);
2887	*offset = pfrag->offset;
2888	pfrag->offset += *len;
2889
2890	return pfrag->page;
2891	}
2892
2893	static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
2894	struct page *page,
2895	unsigned int offset)
2896	{
2897	return spd->nr_pages &&
2898	spd->pages[spd->nr_pages - `1`] == page &&
2899	(spd->partial[spd->nr_pages - `1`].offset +
2900	spd->partial[spd->nr_pages - `1`].len == offset);
2901	}
2902
2903	/*
2904	* Fill page/offset/length into spd, if it can hold more pages.
2905	*/
2906	static bool spd_fill_page(struct splice_pipe_desc *spd,
2907	struct pipe_inode_info pipe, struct* page *page,
2908	unsigned int len, unsigned* int offset,
2909	bool linear,
2910	struct sock *sk)
2911	{
2912	if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
2913	return true;
2914
2915	if (linear) {
2916	page = linear_to_page(page, len, offset: &offset, sk);
2917	if (!page)
2918	return true;
2919	}
2920	if (spd_can_coalesce(spd, page, offset)) {
2921	spd->partial[spd->nr_pages - `1`].len += *len;
2922	return false;
2923	}
2924	get_page(page);
2925	spd->pages[spd->nr_pages] = page;
2926	spd->partial[spd->nr_pages].len = *len;
2927	spd->partial[spd->nr_pages].offset = offset;
2928	spd->nr_pages++;
2929
2930	return false;
2931	}
2932
2933	static bool __splice_segment(struct page page, unsigned* int poff,
2934	unsigned int plen, unsigned int *off,
2935	unsigned int *len,
2936	struct splice_pipe_desc *spd, bool linear,
2937	struct sock *sk,
2938	struct pipe_inode_info *pipe)
2939	{
2940	if (!*len)
2941	return true;
2942
2943	/ skip this segment if already processed /
2944	if (*off >= plen) {
2945	*off -= plen;
2946	return false;
2947	}
2948
2949	/ ignore any bits we already processed /
2950	poff += *off;
2951	plen -= *off;
2952	*off = `0`;
2953
2954	do {
2955	unsigned int flen = min(*len, plen);
2956
2957	if (spd_fill_page(spd, pipe, page, len: &flen, offset: poff,
2958	linear, sk))
2959	return true;
2960	poff += flen;
2961	plen -= flen;
2962	*len -= flen;
2963	} while (*len && plen);
2964
2965	return false;
2966	}
2967
2968	/*
2969	* Map linear and fragment data from the skb to spd. It reports true if the
2970	* pipe is full or if we already spliced the requested length.
2971	*/
2972	static bool __skb_splice_bits(struct sk_buff skb, struct* pipe_inode_info *pipe,
2973	unsigned int offset, unsigned* int *len,
2974	struct splice_pipe_desc spd, struct* sock *sk)
2975	{
2976	int seg;
2977	struct sk_buff *iter;
2978
2979	/ map the linear part :*
2980	* If skb->head_frag is set, this 'linear' part is backed by a
2981	* fragment, and if the head is not shared with any clones then
2982	* we can avoid a copy since we own the head portion of this page.
2983	*/
2984	if (__splice_segment(virt_to_page(skb->data),
2985	poff: (unsigned long) skb->data & (PAGE_SIZE - `1`),
2986	plen: skb_headlen(skb),
2987	off: offset, len, spd,
2988	linear: skb_head_is_locked(skb),
2989	sk, pipe))
2990	return true;
2991
2992	/*
2993	* then map the fragments
2994	*/
2995	for (seg = `0`; seg < skb_shinfo(skb)->nr_frags; seg++) {
2996	const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
2997
2998	if (__splice_segment(page: skb_frag_page(frag: f),
2999	poff: skb_frag_off(frag: f), plen: skb_frag_size(frag: f),
3000	off: offset, len, spd, linear: false, sk, pipe))
3001	return true;
3002	}
3003
3004	skb_walk_frags(skb, iter) {
3005	if (*offset >= iter->len) {
3006	*offset -= iter->len;
3007	continue;
3008	}
3009	/ __skb_splice_bits() only fails if the output has no room*
3010	* left, so no point in going over the frag_list for the error
3011	* case.
3012	*/
3013	if (__skb_splice_bits(skb: iter, pipe, offset, len, spd, sk))
3014	return true;
3015	}
3016
3017	return false;
3018	}
3019
3020	/*
3021	* Map data from the skb to a pipe. Should handle both the linear part,
3022	* the fragments, and the frag list.
3023	*/
3024	int skb_splice_bits(struct sk_buff skb, struct* sock sk, unsigned* int offset,
3025	struct pipe_inode_info pipe, unsigned* int tlen,
3026	unsigned int flags)
3027	{
3028	struct partial_page partial[MAX_SKB_FRAGS];
3029	struct page *pages[MAX_SKB_FRAGS];
3030	struct splice_pipe_desc spd = {
3031	.pages = pages,
3032	.partial = partial,
3033	.nr_pages_max = MAX_SKB_FRAGS,
3034	.ops = &nosteal_pipe_buf_ops,
3035	.spd_release = sock_spd_release,
3036	};
3037	int ret = `0`;
3038
3039	__skb_splice_bits(skb, pipe, offset: &offset, len: &tlen, spd: &spd, sk);
3040
3041	if (spd.nr_pages)
3042	ret = splice_to_pipe(pipe, &spd);
3043
3044	return ret;
3045	}
3046	EXPORT_SYMBOL_GPL(skb_splice_bits);
3047
3048	static int sendmsg_locked(struct sock sk, struct* msghdr *msg)
3049	{
3050	struct socket *sock = sk->sk_socket;
3051	size_t size = msg_data_left(msg);
3052
3053	if (!sock)
3054	return -EINVAL;
3055
3056	if (!sock->ops->sendmsg_locked)
3057	return sock_no_sendmsg_locked(sk, msg, len: size);
3058
3059	return sock->ops->sendmsg_locked(sk, msg, size);
3060	}
3061
3062	static int sendmsg_unlocked(struct sock sk, struct* msghdr *msg)
3063	{
3064	struct socket *sock = sk->sk_socket;
3065
3066	if (!sock)
3067	return -EINVAL;
3068	return sock_sendmsg(sock, msg);
3069	}
3070
3071	typedef int (sendmsg_func)(struct* sock sk, struct* msghdr *msg);
3072	static int __skb_send_sock(struct sock sk, struct* sk_buff skb, int* offset,
3073	int len, sendmsg_func sendmsg)
3074	{
3075	unsigned int orig_len = len;
3076	struct sk_buff *head = skb;
3077	unsigned short fragidx;
3078	int slen, ret;
3079
3080	do_frag_list:
3081
3082	/ Deal with head data /
3083	while (offset < skb_headlen(skb) && len) {
3084	struct kvec kv;
3085	struct msghdr msg;
3086
3087	slen = min_t(int, len, skb_headlen(skb) - offset);
3088	kv.iov_base = skb->data + offset;
3089	kv.iov_len = slen;
3090	memset(&msg, `0`, sizeof(msg));
3091	msg.msg_flags = MSG_DONTWAIT;
3092
3093	iov_iter_kvec(i: &msg.msg_iter, ITER_SOURCE, kvec: &kv, nr_segs: `1`, count: slen);
3094	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
3095	sendmsg_unlocked, sk, &msg);
3096	if (ret <= `0`)
3097	goto error;
3098
3099	offset += ret;
3100	len -= ret;
3101	}
3102
3103	/ All the data was skb head? /
3104	if (!len)
3105	goto out;
3106
3107	/ Make offset relative to start of frags /
3108	offset -= skb_headlen(skb);
3109
3110	/ Find where we are in frag list /
3111	for (fragidx = `0`; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
3112	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
3113
3114	if (offset < skb_frag_size(frag))
3115	break;
3116
3117	offset -= skb_frag_size(frag);
3118	}
3119
3120	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
3121	skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
3122
3123	slen = min_t(size_t, len, skb_frag_size(frag) - offset);
3124
3125	while (slen) {
3126	struct bio_vec bvec;
3127	struct msghdr msg = {
3128	.msg_flags = MSG_SPLICE_PAGES \| MSG_DONTWAIT,
3129	};
3130
3131	bvec_set_page(bv: &bvec, page: skb_frag_page(frag), len: slen,
3132	offset: skb_frag_off(frag) + offset);
3133	iov_iter_bvec(i: &msg.msg_iter, ITER_SOURCE, bvec: &bvec, nr_segs: `1`,
3134	count: slen);
3135
3136	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
3137	sendmsg_unlocked, sk, &msg);
3138	if (ret <= `0`)
3139	goto error;
3140
3141	len -= ret;
3142	offset += ret;
3143	slen -= ret;
3144	}
3145
3146	offset = `0`;
3147	}
3148
3149	if (len) {
3150	/ Process any frag lists /
3151
3152	if (skb == head) {
3153	if (skb_has_frag_list(skb)) {
3154	skb = skb_shinfo(skb)->frag_list;
3155	goto do_frag_list;
3156	}
3157	} else if (skb->next) {
3158	skb = skb->next;
3159	goto do_frag_list;
3160	}
3161	}
3162
3163	out:
3164	return orig_len - len;
3165
3166	error:
3167	return orig_len == len ? ret : orig_len - len;
3168	}
3169
3170	/ Send skb data on a socket. Socket must be locked. /
3171	int skb_send_sock_locked(struct sock sk, struct* sk_buff skb, int* offset,
3172	int len)
3173	{
3174	return __skb_send_sock(sk, skb, offset, len, sendmsg: sendmsg_locked);
3175	}
3176	EXPORT_SYMBOL_GPL(skb_send_sock_locked);
3177
3178	/ Send skb data on a socket. Socket must be unlocked. /
3179	int skb_send_sock(struct sock sk, struct* sk_buff skb, int* offset, int len)
3180	{
3181	return __skb_send_sock(sk, skb, offset, len, sendmsg: sendmsg_unlocked);
3182	}
3183
3184	/**
3185	* skb_store_bits - store bits from kernel buffer to skb
3186	* @skb: destination buffer
3187	* @offset: offset in destination
3188	* @from: source buffer
3189	* @len: number of bytes to copy
3190	*
3191	* Copy the specified number of bytes from the source buffer to the
3192	* destination skb. This function handles all the messy bits of
3193	* traversing fragment lists and such.
3194	*/
3195
3196	int skb_store_bits(struct sk_buff skb, int* offset, const void from, int* len)
3197	{
3198	int start = skb_headlen(skb);
3199	struct sk_buff *frag_iter;
3200	int i, copy;
3201
3202	if (offset > (int)skb->len - len)
3203	goto fault;
3204
3205	if ((copy = start - offset) > `0`) {
3206	if (copy > len)
3207	copy = len;
3208	skb_copy_to_linear_data_offset(skb, offset, from, len: copy);
3209	if ((len -= copy) == `0`)
3210	return `0`;
3211	offset += copy;
3212	from += copy;
3213	}
3214
3215	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3216	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3217	int end;
3218
3219	WARN_ON(start > offset + len);
3220
3221	end = start + skb_frag_size(frag);
3222	if ((copy = end - offset) > `0`) {
3223	u32 p_off, p_len, copied;
3224	struct page *p;
3225	u8 *vaddr;
3226
3227	if (copy > len)
3228	copy = len;
3229
3230	skb_frag_foreach_page(frag,
3231	skb_frag_off(frag) + offset - start,
3232	copy, p, p_off, p_len, copied) {
3233	vaddr = kmap_atomic(page: p);
3234	memcpy(vaddr + p_off, from + copied, p_len);
3235	kunmap_atomic(vaddr);
3236	}
3237
3238	if ((len -= copy) == `0`)
3239	return `0`;
3240	offset += copy;
3241	from += copy;
3242	}
3243	start = end;
3244	}
3245
3246	skb_walk_frags(skb, frag_iter) {
3247	int end;
3248
3249	WARN_ON(start > offset + len);
3250
3251	end = start + frag_iter->len;
3252	if ((copy = end - offset) > `0`) {
3253	if (copy > len)
3254	copy = len;
3255	if (skb_store_bits(skb: frag_iter, offset: offset - start,
3256	from, len: copy))
3257	goto fault;
3258	if ((len -= copy) == `0`)
3259	return `0`;
3260	offset += copy;
3261	from += copy;
3262	}
3263	start = end;
3264	}
3265	if (!len)
3266	return `0`;
3267
3268	fault:
3269	return -EFAULT;
3270	}
3271	EXPORT_SYMBOL(skb_store_bits);
3272
3273	/ Checksum skb data. /
3274	__wsum __skb_checksum(const struct sk_buff skb, int* offset, int len,
3275	__wsum csum, const struct skb_checksum_ops *ops)
3276	{
3277	int start = skb_headlen(skb);
3278	int i, copy = start - offset;
3279	struct sk_buff *frag_iter;
3280	int pos = `0`;
3281
3282	/ Checksum header. /
3283	if (copy > `0`) {
3284	if (copy > len)
3285	copy = len;
3286	csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
3287	skb->data + offset, copy, csum);
3288	if ((len -= copy) == `0`)
3289	return csum;
3290	offset += copy;
3291	pos = copy;
3292	}
3293
3294	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3295	int end;
3296	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3297
3298	WARN_ON(start > offset + len);
3299
3300	end = start + skb_frag_size(frag);
3301	if ((copy = end - offset) > `0`) {
3302	u32 p_off, p_len, copied;
3303	struct page *p;
3304	__wsum csum2;
3305	u8 *vaddr;
3306
3307	if (copy > len)
3308	copy = len;
3309
3310	skb_frag_foreach_page(frag,
3311	skb_frag_off(frag) + offset - start,
3312	copy, p, p_off, p_len, copied) {
3313	vaddr = kmap_atomic(page: p);
3314	csum2 = INDIRECT_CALL_1(ops->update,
3315	csum_partial_ext,
3316	vaddr + p_off, p_len, `0`);
3317	kunmap_atomic(vaddr);
3318	csum = INDIRECT_CALL_1(ops->combine,
3319	csum_block_add_ext, csum,
3320	csum2, pos, p_len);
3321	pos += p_len;
3322	}
3323
3324	if (!(len -= copy))
3325	return csum;
3326	offset += copy;
3327	}
3328	start = end;
3329	}
3330
3331	skb_walk_frags(skb, frag_iter) {
3332	int end;
3333
3334	WARN_ON(start > offset + len);
3335
3336	end = start + frag_iter->len;
3337	if ((copy = end - offset) > `0`) {
3338	__wsum csum2;
3339	if (copy > len)
3340	copy = len;
3341	csum2 = __skb_checksum(skb: frag_iter, offset: offset - start,
3342	len: copy, csum: `0`, ops);
3343	csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
3344	csum, csum2, pos, copy);
3345	if ((len -= copy) == `0`)
3346	return csum;
3347	offset += copy;
3348	pos += copy;
3349	}
3350	start = end;
3351	}
3352	BUG_ON(len);
3353
3354	return csum;
3355	}
3356	EXPORT_SYMBOL(__skb_checksum);
3357
3358	__wsum skb_checksum(const struct sk_buff skb, int* offset,
3359	int len, __wsum csum)
3360	{
3361	const struct skb_checksum_ops ops = {
3362	.update = csum_partial_ext,
3363	.combine = csum_block_add_ext,
3364	};
3365
3366	return __skb_checksum(skb, offset, len, csum, &ops);
3367	}
3368	EXPORT_SYMBOL(skb_checksum);
3369
3370	/ Both of above in one bottle. /
3371
3372	__wsum skb_copy_and_csum_bits(const struct sk_buff skb, int* offset,
3373	u8 to, int* len)
3374	{
3375	int start = skb_headlen(skb);
3376	int i, copy = start - offset;
3377	struct sk_buff *frag_iter;
3378	int pos = `0`;
3379	__wsum csum = `0`;
3380
3381	/ Copy header. /
3382	if (copy > `0`) {
3383	if (copy > len)
3384	copy = len;
3385	csum = csum_partial_copy_nocheck(src: skb->data + offset, dst: to,
3386	len: copy);
3387	if ((len -= copy) == `0`)
3388	return csum;
3389	offset += copy;
3390	to += copy;
3391	pos = copy;
3392	}
3393
3394	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
3395	int end;
3396
3397	WARN_ON(start > offset + len);
3398
3399	end = start + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
3400	if ((copy = end - offset) > `0`) {
3401	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3402	u32 p_off, p_len, copied;
3403	struct page *p;
3404	__wsum csum2;
3405	u8 *vaddr;
3406
3407	if (copy > len)
3408	copy = len;
3409
3410	skb_frag_foreach_page(frag,
3411	skb_frag_off(frag) + offset - start,
3412	copy, p, p_off, p_len, copied) {
3413	vaddr = kmap_atomic(page: p);
3414	csum2 = csum_partial_copy_nocheck(src: vaddr + p_off,
3415	dst: to + copied,
3416	len: p_len);
3417	kunmap_atomic(vaddr);
3418	csum = csum_block_add(csum, csum2, offset: pos);
3419	pos += p_len;
3420	}
3421
3422	if (!(len -= copy))
3423	return csum;
3424	offset += copy;
3425	to += copy;
3426	}
3427	start = end;
3428	}
3429
3430	skb_walk_frags(skb, frag_iter) {
3431	__wsum csum2;
3432	int end;
3433
3434	WARN_ON(start > offset + len);
3435
3436	end = start + frag_iter->len;
3437	if ((copy = end - offset) > `0`) {
3438	if (copy > len)
3439	copy = len;
3440	csum2 = skb_copy_and_csum_bits(skb: frag_iter,
3441	offset: offset - start,
3442	to, len: copy);
3443	csum = csum_block_add(csum, csum2, offset: pos);
3444	if ((len -= copy) == `0`)
3445	return csum;
3446	offset += copy;
3447	to += copy;
3448	pos += copy;
3449	}
3450	start = end;
3451	}
3452	BUG_ON(len);
3453	return csum;
3454	}
3455	EXPORT_SYMBOL(skb_copy_and_csum_bits);
3456
3457	__sum16 __skb_checksum_complete_head(struct sk_buff skb, int* len)
3458	{
3459	__sum16 sum;
3460
3461	sum = csum_fold(sum: skb_checksum(skb, `0`, len, skb->csum));
3462	/ See comments in __skb_checksum_complete(). /
3463	if (likely(!sum)) {
3464	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
3465	!skb->csum_complete_sw)
3466	netdev_rx_csum_fault(dev: skb->dev, skb);
3467	}
3468	if (!skb_shared(skb))
3469	skb->csum_valid = !sum;
3470	return sum;
3471	}
3472	EXPORT_SYMBOL(__skb_checksum_complete_head);
3473
3474	/ This function assumes skb->csum already holds pseudo header's checksum,*
3475	* which has been changed from the hardware checksum, for example, by
3476	* __skb_checksum_validate_complete(). And, the original skb->csum must
3477	* have been validated unsuccessfully for CHECKSUM_COMPLETE case.
3478	*
3479	* It returns non-zero if the recomputed checksum is still invalid, otherwise
3480	* zero. The new checksum is stored back into skb->csum unless the skb is
3481	* shared.
3482	*/
3483	__sum16 __skb_checksum_complete(struct sk_buff *skb)
3484	{
3485	__wsum csum;
3486	__sum16 sum;
3487
3488	csum = skb_checksum(skb, `0`, skb->len, `0`);
3489
3490	sum = csum_fold(sum: csum_add(csum: skb->csum, addend: csum));
3491	/ This check is inverted, because we already knew the hardware*
3492	* checksum is invalid before calling this function. So, if the
3493	* re-computed checksum is valid instead, then we have a mismatch
3494	* between the original skb->csum and skb_checksum(). This means either
3495	* the original hardware checksum is incorrect or we screw up skb->csum
3496	* when moving skb->data around.
3497	*/
3498	if (likely(!sum)) {
3499	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
3500	!skb->csum_complete_sw)
3501	netdev_rx_csum_fault(dev: skb->dev, skb);
3502	}
3503
3504	if (!skb_shared(skb)) {
3505	/ Save full packet checksum /
3506	skb->csum = csum;
3507	skb->ip_summed = CHECKSUM_COMPLETE;
3508	skb->csum_complete_sw = `1`;
3509	skb->csum_valid = !sum;
3510	}
3511
3512	return sum;
3513	}
3514	EXPORT_SYMBOL(__skb_checksum_complete);
3515
3516	static __wsum warn_crc32c_csum_update(const void buff, int* len, __wsum sum)
3517	{
3518	net_warn_ratelimited(
3519	"%s: attempt to compute crc32c without libcrc32c.ko\n",
3520	__func__);
3521	return `0`;
3522	}
3523
3524	static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
3525	int offset, int len)
3526	{
3527	net_warn_ratelimited(
3528	"%s: attempt to compute crc32c without libcrc32c.ko\n",
3529	__func__);
3530	return `0`;
3531	}
3532
3533	static const struct skb_checksum_ops default_crc32c_ops = {
3534	.update = warn_crc32c_csum_update,
3535	.combine = warn_crc32c_csum_combine,
3536	};
3537
3538	const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
3539	&default_crc32c_ops;
3540	EXPORT_SYMBOL(crc32c_csum_stub);
3541
3542	/**
3543	* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
3544	* @from: source buffer
3545	*
3546	* Calculates the amount of linear headroom needed in the 'to' skb passed
3547	* into skb_zerocopy().
3548	*/
3549	unsigned int
3550	skb_zerocopy_headlen(const struct sk_buff *from)
3551	{
3552	unsigned int hlen = `0`;
3553
3554	if (!from->head_frag \|\|
3555	skb_headlen(skb: from) < L1_CACHE_BYTES \|\|
3556	skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
3557	hlen = skb_headlen(skb: from);
3558	if (!hlen)
3559	hlen = from->len;
3560	}
3561
3562	if (skb_has_frag_list(skb: from))
3563	hlen = from->len;
3564
3565	return hlen;
3566	}
3567	EXPORT_SYMBOL_GPL(skb_zerocopy_headlen);
3568
3569	/**
3570	* skb_zerocopy - Zero copy skb to skb
3571	* @to: destination buffer
3572	* @from: source buffer
3573	* @len: number of bytes to copy from source buffer
3574	* @hlen: size of linear headroom in destination buffer
3575	*
3576	* Copies up to `len` bytes from `from` to `to` by creating references
3577	* to the frags in the source buffer.
3578	*
3579	* The `hlen` as calculated by skb_zerocopy_headlen() specifies the
3580	* headroom in the `to` buffer.
3581	*
3582	* Return value:
3583	* 0: everything is OK
3584	* -ENOMEM: couldn't orphan frags of @from due to lack of memory
3585	* -EFAULT: skb_copy_bits() found some problem with skb geometry
3586	*/
3587	int
3588	skb_zerocopy(struct sk_buff to, struct* sk_buff from, int* len, int hlen)
3589	{
3590	int i, j = `0`;
3591	int plen = `0`; / length of skb->head fragment /
3592	int ret;
3593	struct page *page;
3594	unsigned int offset;
3595
3596	BUG_ON(!from->head_frag && !hlen);
3597
3598	/ dont bother with small payloads /
3599	if (len <= skb_tailroom(skb: to))
3600	return skb_copy_bits(from, `0`, skb_put(to, len), len);
3601
3602	if (hlen) {
3603	ret = skb_copy_bits(from, `0`, skb_put(to, hlen), hlen);
3604	if (unlikely(ret))
3605	return ret;
3606	len -= hlen;
3607	} else {
3608	plen = min_t(int, skb_headlen(from), len);
3609	if (plen) {
3610	page = virt_to_head_page(x: from->head);
3611	offset = from->data - (unsigned char *)page_address(page);
3612	__skb_fill_page_desc(skb: to, i: `0`, page, off: offset, size: plen);
3613	get_page(page);
3614	j = `1`;
3615	len -= plen;
3616	}
3617	}
3618
3619	skb_len_add(skb: to, delta: len + plen);
3620
3621	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) {
3622	skb_tx_error(from);
3623	return -ENOMEM;
3624	}
3625	skb_zerocopy_clone(nskb: to, orig: from, GFP_ATOMIC);
3626
3627	for (i = `0`; i < skb_shinfo(from)->nr_frags; i++) {
3628	int size;
3629
3630	if (!len)
3631	break;
3632	skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
3633	size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3634	len);
3635	skb_frag_size_set(frag: &skb_shinfo(to)->frags[j], size);
3636	len -= size;
3637	skb_frag_ref(skb: to, f: j);
3638	j++;
3639	}
3640	skb_shinfo(to)->nr_frags = j;
3641
3642	return `0`;
3643	}
3644	EXPORT_SYMBOL_GPL(skb_zerocopy);
3645
3646	void skb_copy_and_csum_dev(const struct sk_buff skb, u8 to)
3647	{
3648	__wsum csum;
3649	long csstart;
3650
3651	if (skb->ip_summed == CHECKSUM_PARTIAL)
3652	csstart = skb_checksum_start_offset(skb);
3653	else
3654	csstart = skb_headlen(skb);
3655
3656	BUG_ON(csstart > skb_headlen(skb));
3657
3658	skb_copy_from_linear_data(skb, to, len: csstart);
3659
3660	csum = `0`;
3661	if (csstart != skb->len)
3662	csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
3663	skb->len - csstart);
3664
3665	if (skb->ip_summed == CHECKSUM_PARTIAL) {
3666	long csstuff = csstart + skb->csum_offset;
3667
3668	((__sum16 )(to + csstuff)) = csum_fold(sum: csum);
3669	}
3670	}
3671	EXPORT_SYMBOL(skb_copy_and_csum_dev);
3672
3673	/**
3674	* skb_dequeue - remove from the head of the queue
3675	* @list: list to dequeue from
3676	*
3677	* Remove the head of the list. The list lock is taken so the function
3678	* may be used safely with other locking list functions. The head item is
3679	* returned or %NULL if the list is empty.
3680	*/
3681
3682	struct sk_buff skb_dequeue(struct* sk_buff_head *list)
3683	{
3684	unsigned long flags;
3685	struct sk_buff *result;
3686
3687	spin_lock_irqsave(&list->lock, flags);
3688	result = __skb_dequeue(list);
3689	spin_unlock_irqrestore(lock: &list->lock, flags);
3690	return result;
3691	}
3692	EXPORT_SYMBOL(skb_dequeue);
3693
3694	/**
3695	* skb_dequeue_tail - remove from the tail of the queue
3696	* @list: list to dequeue from
3697	*
3698	* Remove the tail of the list. The list lock is taken so the function
3699	* may be used safely with other locking list functions. The tail item is
3700	* returned or %NULL if the list is empty.
3701	*/
3702	struct sk_buff skb_dequeue_tail(struct* sk_buff_head *list)
3703	{
3704	unsigned long flags;
3705	struct sk_buff *result;
3706
3707	spin_lock_irqsave(&list->lock, flags);
3708	result = __skb_dequeue_tail(list);
3709	spin_unlock_irqrestore(lock: &list->lock, flags);
3710	return result;
3711	}
3712	EXPORT_SYMBOL(skb_dequeue_tail);
3713
3714	/**
3715	* skb_queue_purge_reason - empty a list
3716	* @list: list to empty
3717	* @reason: drop reason
3718	*
3719	* Delete all buffers on an &sk_buff list. Each buffer is removed from
3720	* the list and one reference dropped. This function takes the list
3721	* lock and is atomic with respect to other list locking functions.
3722	*/
3723	void skb_queue_purge_reason(struct sk_buff_head *list,
3724	enum skb_drop_reason reason)
3725	{
3726	struct sk_buff_head tmp;
3727	unsigned long flags;
3728
3729	if (skb_queue_empty_lockless(list))
3730	return;
3731
3732	__skb_queue_head_init(list: &tmp);
3733
3734	spin_lock_irqsave(&list->lock, flags);
3735	skb_queue_splice_init(list, head: &tmp);
3736	spin_unlock_irqrestore(lock: &list->lock, flags);
3737
3738	__skb_queue_purge_reason(list: &tmp, reason);
3739	}
3740	EXPORT_SYMBOL(skb_queue_purge_reason);
3741
3742	/**
3743	* skb_rbtree_purge - empty a skb rbtree
3744	* @root: root of the rbtree to empty
3745	* Return value: the sum of truesizes of all purged skbs.
3746	*
3747	* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
3748	* the list and one reference dropped. This function does not take
3749	* any lock. Synchronization should be handled by the caller (e.g., TCP
3750	* out-of-order queue is protected by the socket lock).
3751	*/
3752	unsigned int skb_rbtree_purge(struct rb_root *root)
3753	{
3754	struct rb_node *p = rb_first(root);
3755	unsigned int sum = `0`;
3756
3757	while (p) {
3758	struct sk_buff skb = rb_entry(p, struct* sk_buff, rbnode);
3759
3760	p = rb_next(p);
3761	rb_erase(&skb->rbnode, root);
3762	sum += skb->truesize;
3763	kfree_skb(skb);
3764	}
3765	return sum;
3766	}
3767
3768	void skb_errqueue_purge(struct sk_buff_head *list)
3769	{
3770	struct sk_buff skb, next;
3771	struct sk_buff_head kill;
3772	unsigned long flags;
3773
3774	__skb_queue_head_init(list: &kill);
3775
3776	spin_lock_irqsave(&list->lock, flags);
3777	skb_queue_walk_safe(list, skb, next) {
3778	if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY \|\|
3779	SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING)
3780	continue;
3781	__skb_unlink(skb, list);
3782	__skb_queue_tail(list: &kill, newsk: skb);
3783	}
3784	spin_unlock_irqrestore(lock: &list->lock, flags);
3785	__skb_queue_purge(list: &kill);
3786	}
3787	EXPORT_SYMBOL(skb_errqueue_purge);
3788
3789	/**
3790	* skb_queue_head - queue a buffer at the list head
3791	* @list: list to use
3792	* @newsk: buffer to queue
3793	*
3794	* Queue a buffer at the start of the list. This function takes the
3795	* list lock and can be used safely with other locking &sk_buff functions
3796	* safely.
3797	*
3798	* A buffer cannot be placed on two lists at the same time.
3799	*/
3800	void skb_queue_head(struct sk_buff_head list, struct* sk_buff *newsk)
3801	{
3802	unsigned long flags;
3803
3804	spin_lock_irqsave(&list->lock, flags);
3805	__skb_queue_head(list, newsk);
3806	spin_unlock_irqrestore(lock: &list->lock, flags);
3807	}
3808	EXPORT_SYMBOL(skb_queue_head);
3809
3810	/**
3811	* skb_queue_tail - queue a buffer at the list tail
3812	* @list: list to use
3813	* @newsk: buffer to queue
3814	*
3815	* Queue a buffer at the tail of the list. This function takes the
3816	* list lock and can be used safely with other locking &sk_buff functions
3817	* safely.
3818	*
3819	* A buffer cannot be placed on two lists at the same time.
3820	*/
3821	void skb_queue_tail(struct sk_buff_head list, struct* sk_buff *newsk)
3822	{
3823	unsigned long flags;
3824
3825	spin_lock_irqsave(&list->lock, flags);
3826	__skb_queue_tail(list, newsk);
3827	spin_unlock_irqrestore(lock: &list->lock, flags);
3828	}
3829	EXPORT_SYMBOL(skb_queue_tail);
3830
3831	/**
3832	* skb_unlink - remove a buffer from a list
3833	* @skb: buffer to remove
3834	* @list: list to use
3835	*
3836	* Remove a packet from a list. The list locks are taken and this
3837	* function is atomic with respect to other list locked calls
3838	*
3839	* You must know what list the SKB is on.
3840	*/
3841	void skb_unlink(struct sk_buff skb, struct* sk_buff_head *list)
3842	{
3843	unsigned long flags;
3844
3845	spin_lock_irqsave(&list->lock, flags);
3846	__skb_unlink(skb, list);
3847	spin_unlock_irqrestore(lock: &list->lock, flags);
3848	}
3849	EXPORT_SYMBOL(skb_unlink);
3850
3851	/**
3852	* skb_append - append a buffer
3853	* @old: buffer to insert after
3854	* @newsk: buffer to insert
3855	* @list: list to use
3856	*
3857	* Place a packet after a given packet in a list. The list locks are taken
3858	* and this function is atomic with respect to other list locked calls.
3859	* A buffer cannot be placed on two lists at the same time.
3860	*/
3861	void skb_append(struct sk_buff old, struct* sk_buff newsk, struct* sk_buff_head *list)
3862	{
3863	unsigned long flags;
3864
3865	spin_lock_irqsave(&list->lock, flags);
3866	__skb_queue_after(list, prev: old, newsk);
3867	spin_unlock_irqrestore(lock: &list->lock, flags);
3868	}
3869	EXPORT_SYMBOL(skb_append);
3870
3871	static inline void skb_split_inside_header(struct sk_buff *skb,
3872	struct sk_buff* skb1,
3873	const u32 len, const int pos)
3874	{
3875	int i;
3876
3877	skb_copy_from_linear_data_offset(skb, offset: len, to: skb_put(skb1, pos - len),
3878	len: pos - len);
3879	/ And move data appendix as is. /
3880	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
3881	skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
3882
3883	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
3884	skb_shinfo(skb)->nr_frags = `0`;
3885	skb1->data_len = skb->data_len;
3886	skb1->len += skb1->data_len;
3887	skb->data_len = `0`;
3888	skb->len = len;
3889	skb_set_tail_pointer(skb, offset: len);
3890	}
3891
3892	static inline void skb_split_no_header(struct sk_buff *skb,
3893	struct sk_buff* skb1,
3894	const u32 len, int pos)
3895	{
3896	int i, k = `0`;
3897	const int nfrags = skb_shinfo(skb)->nr_frags;
3898
3899	skb_shinfo(skb)->nr_frags = `0`;
3900	skb1->len = skb1->data_len = skb->len - len;
3901	skb->len = len;
3902	skb->data_len = len - pos;
3903
3904	for (i = `0`; i < nfrags; i++) {
3905	int size = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
3906
3907	if (pos + size > len) {
3908	skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
3909
3910	if (pos < len) {
3911	/ Split frag.*
3912	* We have two variants in this case:
3913	* 1. Move all the frag to the second
3914	* part, if it is possible. F.e.
3915	* this approach is mandatory for TUX,
3916	* where splitting is expensive.
3917	* 2. Split is accurately. We make this.
3918	*/
3919	skb_frag_ref(skb, f: i);
3920	skb_frag_off_add(frag: &skb_shinfo(skb1)->frags[`0`], delta: len - pos);
3921	skb_frag_size_sub(frag: &skb_shinfo(skb1)->frags[`0`], delta: len - pos);
3922	skb_frag_size_set(frag: &skb_shinfo(skb)->frags[i], size: len - pos);
3923	skb_shinfo(skb)->nr_frags++;
3924	}
3925	k++;
3926	} else
3927	skb_shinfo(skb)->nr_frags++;
3928	pos += size;
3929	}
3930	skb_shinfo(skb1)->nr_frags = k;
3931	}
3932
3933	/**
3934	* skb_split - Split fragmented skb to two parts at length len.
3935	* @skb: the buffer to split
3936	* @skb1: the buffer to receive the second part
3937	* @len: new length for skb
3938	*/
3939	void skb_split(struct sk_buff skb, struct* sk_buff skb1, const* u32 len)
3940	{
3941	int pos = skb_headlen(skb);
3942	const int zc_flags = SKBFL_SHARED_FRAG \| SKBFL_PURE_ZEROCOPY;
3943
3944	skb_zcopy_downgrade_managed(skb);
3945
3946	skb_shinfo(skb1)->flags \|= skb_shinfo(skb)->flags & zc_flags;
3947	skb_zerocopy_clone(nskb: skb1, orig: skb, gfp_mask: `0`);
3948	if (len < pos) / Split line is inside header. /
3949	skb_split_inside_header(skb, skb1, len, pos);
3950	else / Second chunk has no header, nothing to copy. /
3951	skb_split_no_header(skb, skb1, len, pos);
3952	}
3953	EXPORT_SYMBOL(skb_split);
3954
3955	/ Shifting from/to a cloned skb is a no-go.*
3956	*
3957	* Caller cannot keep skb_shinfo related pointers past calling here!
3958	*/
3959	static int skb_prepare_for_shift(struct sk_buff *skb)
3960	{
3961	return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
3962	}
3963
3964	/**
3965	* skb_shift - Shifts paged data partially from skb to another
3966	* @tgt: buffer into which tail data gets added
3967	* @skb: buffer from which the paged data comes from
3968	* @shiftlen: shift up to this many bytes
3969	*
3970	* Attempts to shift up to shiftlen worth of bytes, which may be less than
3971	* the length of the skb, from skb to tgt. Returns number bytes shifted.
3972	* It's up to caller to free skb if everything was shifted.
3973	*
3974	* If @tgt runs out of frags, the whole operation is aborted.
3975	*
3976	* Skb cannot include anything else but paged data while tgt is allowed
3977	* to have non-paged data as well.
3978	*
3979	* TODO: full sized shift could be optimized but that would need
3980	* specialized skb free'er to handle frags without up-to-date nr_frags.
3981	*/
3982	int skb_shift(struct sk_buff tgt, struct* sk_buff skb, int* shiftlen)
3983	{
3984	int from, to, merge, todo;
3985	skb_frag_t fragfrom, fragto;
3986
3987	BUG_ON(shiftlen > skb->len);
3988
3989	if (skb_headlen(skb))
3990	return `0`;
3991	if (skb_zcopy(skb: tgt) \|\| skb_zcopy(skb))
3992	return `0`;
3993
3994	todo = shiftlen;
3995	from = `0`;
3996	to = skb_shinfo(tgt)->nr_frags;
3997	fragfrom = &skb_shinfo(skb)->frags[from];
3998
3999	/ Actual merge is delayed until the point when we know we can*
4000	* commit all, so that we don't have to undo partial changes
4001	*/
4002	if (!to \|\|
4003	!skb_can_coalesce(skb: tgt, i: to, page: skb_frag_page(frag: fragfrom),
4004	off: skb_frag_off(frag: fragfrom))) {
4005	merge = -`1`;
4006	} else {
4007	merge = to - `1`;
4008
4009	todo -= skb_frag_size(frag: fragfrom);
4010	if (todo < `0`) {
4011	if (skb_prepare_for_shift(skb) \|\|
4012	skb_prepare_for_shift(skb: tgt))
4013	return `0`;
4014
4015	/ All previous frag pointers might be stale! /
4016	fragfrom = &skb_shinfo(skb)->frags[from];
4017	fragto = &skb_shinfo(tgt)->frags[merge];
4018
4019	skb_frag_size_add(frag: fragto, delta: shiftlen);
4020	skb_frag_size_sub(frag: fragfrom, delta: shiftlen);
4021	skb_frag_off_add(frag: fragfrom, delta: shiftlen);
4022
4023	goto onlymerged;
4024	}
4025
4026	from++;
4027	}
4028
4029	/ Skip full, not-fitting skb to avoid expensive operations /
4030	if ((shiftlen == skb->len) &&
4031	(skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to))
4032	return `0`;
4033
4034	if (skb_prepare_for_shift(skb) \|\| skb_prepare_for_shift(skb: tgt))
4035	return `0`;
4036
4037	while ((todo > `0`) && (from < skb_shinfo(skb)->nr_frags)) {
4038	if (to == MAX_SKB_FRAGS)
4039	return `0`;
4040
4041	fragfrom = &skb_shinfo(skb)->frags[from];
4042	fragto = &skb_shinfo(tgt)->frags[to];
4043
4044	if (todo >= skb_frag_size(frag: fragfrom)) {
4045	fragto = fragfrom;
4046	todo -= skb_frag_size(frag: fragfrom);
4047	from++;
4048	to++;
4049
4050	} else {
4051	__skb_frag_ref(frag: fragfrom);
4052	skb_frag_page_copy(fragto, fragfrom);
4053	skb_frag_off_copy(fragto, fragfrom);
4054	skb_frag_size_set(frag: fragto, size: todo);
4055
4056	skb_frag_off_add(frag: fragfrom, delta: todo);
4057	skb_frag_size_sub(frag: fragfrom, delta: todo);
4058	todo = `0`;
4059
4060	to++;
4061	break;
4062	}
4063	}
4064
4065	/ Ready to "commit" this state change to tgt /
4066	skb_shinfo(tgt)->nr_frags = to;
4067
4068	if (merge >= `0`) {
4069	fragfrom = &skb_shinfo(skb)->frags[`0`];
4070	fragto = &skb_shinfo(tgt)->frags[merge];
4071
4072	skb_frag_size_add(frag: fragto, delta: skb_frag_size(frag: fragfrom));
4073	__skb_frag_unref(frag: fragfrom, recycle: skb->pp_recycle);
4074	}
4075
4076	/ Reposition in the original skb /
4077	to = `0`;
4078	while (from < skb_shinfo(skb)->nr_frags)
4079	skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
4080	skb_shinfo(skb)->nr_frags = to;
4081
4082	BUG_ON(todo > `0` && !skb_shinfo(skb)->nr_frags);
4083
4084	onlymerged:
4085	/ Most likely the tgt won't ever need its checksum anymore, skb on*
4086	* the other hand might need it if it needs to be resent
4087	*/
4088	tgt->ip_summed = CHECKSUM_PARTIAL;
4089	skb->ip_summed = CHECKSUM_PARTIAL;
4090
4091	skb_len_add(skb, delta: -shiftlen);
4092	skb_len_add(skb: tgt, delta: shiftlen);
4093
4094	return shiftlen;
4095	}
4096
4097	/**
4098	* skb_prepare_seq_read - Prepare a sequential read of skb data
4099	* @skb: the buffer to read
4100	* @from: lower offset of data to be read
4101	* @to: upper offset of data to be read
4102	* @st: state variable
4103	*
4104	* Initializes the specified state variable. Must be called before
4105	* invoking skb_seq_read() for the first time.
4106	*/
4107	void skb_prepare_seq_read(struct sk_buff skb, unsigned* int from,
4108	unsigned int to, struct skb_seq_state *st)
4109	{
4110	st->lower_offset = from;
4111	st->upper_offset = to;
4112	st->root_skb = st->cur_skb = skb;
4113	st->frag_idx = st->stepped_offset = `0`;
4114	st->frag_data = NULL;
4115	st->frag_off = `0`;
4116	}
4117	EXPORT_SYMBOL(skb_prepare_seq_read);
4118
4119	/**
4120	* skb_seq_read - Sequentially read skb data
4121	* @consumed: number of bytes consumed by the caller so far
4122	* @data: destination pointer for data to be returned
4123	* @st: state variable
4124	*
4125	* Reads a block of skb data at @consumed relative to the
4126	* lower offset specified to skb_prepare_seq_read(). Assigns
4127	* the head of the data block to @data and returns the length
4128	* of the block or 0 if the end of the skb data or the upper
4129	* offset has been reached.
4130	*
4131	* The caller is not required to consume all of the data
4132	* returned, i.e. @consumed is typically set to the number
4133	* of bytes already consumed and the next call to
4134	* skb_seq_read() will return the remaining part of the block.
4135	*
4136	* Note 1: The size of each block of data returned can be arbitrary,
4137	* this limitation is the cost for zerocopy sequential
4138	* reads of potentially non linear data.
4139	*
4140	* Note 2: Fragment lists within fragments are not implemented
4141	* at the moment, state->root_skb could be replaced with
4142	* a stack for this purpose.
4143	*/
4144	unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
4145	struct skb_seq_state *st)
4146	{
4147	unsigned int block_limit, abs_offset = consumed + st->lower_offset;
4148	skb_frag_t *frag;
4149
4150	if (unlikely(abs_offset >= st->upper_offset)) {
4151	if (st->frag_data) {
4152	kunmap_atomic(st->frag_data);
4153	st->frag_data = NULL;
4154	}
4155	return `0`;
4156	}
4157
4158	next_skb:
4159	block_limit = skb_headlen(skb: st->cur_skb) + st->stepped_offset;
4160
4161	if (abs_offset < block_limit && !st->frag_data) {
4162	*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
4163	return block_limit - abs_offset;
4164	}
4165
4166	if (st->frag_idx == `0` && !st->frag_data)
4167	st->stepped_offset += skb_headlen(skb: st->cur_skb);
4168
4169	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
4170	unsigned int pg_idx, pg_off, pg_sz;
4171
4172	frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
4173
4174	pg_idx = `0`;
4175	pg_off = skb_frag_off(frag);
4176	pg_sz = skb_frag_size(frag);
4177
4178	if (skb_frag_must_loop(p: skb_frag_page(frag))) {
4179	pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
4180	pg_off = offset_in_page(pg_off + st->frag_off);
4181	pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
4182	PAGE_SIZE - pg_off);
4183	}
4184
4185	block_limit = pg_sz + st->stepped_offset;
4186	if (abs_offset < block_limit) {
4187	if (!st->frag_data)
4188	st->frag_data = kmap_atomic(page: skb_frag_page(frag) + pg_idx);
4189
4190	data = (u8 )st->frag_data + pg_off +
4191	(abs_offset - st->stepped_offset);
4192
4193	return block_limit - abs_offset;
4194	}
4195
4196	if (st->frag_data) {
4197	kunmap_atomic(st->frag_data);
4198	st->frag_data = NULL;
4199	}
4200
4201	st->stepped_offset += pg_sz;
4202	st->frag_off += pg_sz;
4203	if (st->frag_off == skb_frag_size(frag)) {
4204	st->frag_off = `0`;
4205	st->frag_idx++;
4206	}
4207	}
4208
4209	if (st->frag_data) {
4210	kunmap_atomic(st->frag_data);
4211	st->frag_data = NULL;
4212	}
4213
4214	if (st->root_skb == st->cur_skb && skb_has_frag_list(skb: st->root_skb)) {
4215	st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
4216	st->frag_idx = `0`;
4217	goto next_skb;
4218	} else if (st->cur_skb->next) {
4219	st->cur_skb = st->cur_skb->next;
4220	st->frag_idx = `0`;
4221	goto next_skb;
4222	}
4223
4224	return `0`;
4225	}
4226	EXPORT_SYMBOL(skb_seq_read);
4227
4228	/**
4229	* skb_abort_seq_read - Abort a sequential read of skb data
4230	* @st: state variable
4231	*
4232	* Must be called if skb_seq_read() was not called until it
4233	* returned 0.
4234	*/
4235	void skb_abort_seq_read(struct skb_seq_state *st)
4236	{
4237	if (st->frag_data)
4238	kunmap_atomic(st->frag_data);
4239	}
4240	EXPORT_SYMBOL(skb_abort_seq_read);
4241
4242	#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
4243
4244	static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
4245	struct ts_config *conf,
4246	struct ts_state *state)
4247	{
4248	return skb_seq_read(offset, text, TS_SKB_CB(state));
4249	}
4250
4251	static void skb_ts_finish(struct ts_config conf, struct* ts_state *state)
4252	{
4253	skb_abort_seq_read(TS_SKB_CB(state));
4254	}
4255
4256	/**
4257	* skb_find_text - Find a text pattern in skb data
4258	* @skb: the buffer to look in
4259	* @from: search offset
4260	* @to: search limit
4261	* @config: textsearch configuration
4262	*
4263	* Finds a pattern in the skb data according to the specified
4264	* textsearch configuration. Use textsearch_next() to retrieve
4265	* subsequent occurrences of the pattern. Returns the offset
4266	* to the first occurrence or UINT_MAX if no match was found.
4267	*/
4268	unsigned int skb_find_text(struct sk_buff skb, unsigned* int from,
4269	unsigned int to, struct ts_config *config)
4270	{
4271	unsigned int patlen = config->ops->get_pattern_len(config);
4272	struct ts_state state;
4273	unsigned int ret;
4274
4275	BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
4276
4277	config->get_next_block = skb_ts_get_next_block;
4278	config->finish = skb_ts_finish;
4279
4280	skb_prepare_seq_read(skb, from, to, TS_SKB_CB(&state));
4281
4282	ret = textsearch_find(conf: config, state: &state);
4283	return (ret + patlen <= to - from ? ret : UINT_MAX);
4284	}
4285	EXPORT_SYMBOL(skb_find_text);
4286
4287	int skb_append_pagefrags(struct sk_buff skb, struct* page *page,
4288	int offset, size_t size, size_t max_frags)
4289	{
4290	int i = skb_shinfo(skb)->nr_frags;
4291
4292	if (skb_can_coalesce(skb, i, page, off: offset)) {
4293	skb_frag_size_add(frag: &skb_shinfo(skb)->frags[i - `1`], delta: size);
4294	} else if (i < max_frags) {
4295	skb_zcopy_downgrade_managed(skb);
4296	get_page(page);
4297	skb_fill_page_desc_noacc(skb, i, page, off: offset, size);
4298	} else {
4299	return -EMSGSIZE;
4300	}
4301
4302	return `0`;
4303	}
4304	EXPORT_SYMBOL_GPL(skb_append_pagefrags);
4305
4306	/**
4307	* skb_pull_rcsum - pull skb and update receive checksum
4308	* @skb: buffer to update
4309	* @len: length of data pulled
4310	*
4311	* This function performs an skb_pull on the packet and updates
4312	* the CHECKSUM_COMPLETE checksum. It should be used on
4313	* receive path processing instead of skb_pull unless you know
4314	* that the checksum difference is zero (e.g., a valid IP header)
4315	* or you are setting ip_summed to CHECKSUM_NONE.
4316	*/
4317	void skb_pull_rcsum(struct* sk_buff skb, unsigned* int len)
4318	{
4319	unsigned char *data = skb->data;
4320
4321	BUG_ON(len > skb->len);
4322	__skb_pull(skb, len);
4323	skb_postpull_rcsum(skb, start: data, len);
4324	return skb->data;
4325	}
4326	EXPORT_SYMBOL_GPL(skb_pull_rcsum);
4327
4328	static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
4329	{
4330	skb_frag_t head_frag;
4331	struct page *page;
4332
4333	page = virt_to_head_page(x: frag_skb->head);
4334	skb_frag_fill_page_desc(frag: &head_frag, page, off: frag_skb->data -
4335	(unsigned char *)page_address(page),
4336	size: skb_headlen(skb: frag_skb));
4337	return head_frag;
4338	}
4339
4340	struct sk_buff skb_segment_list(struct* sk_buff *skb,
4341	netdev_features_t features,
4342	unsigned int offset)
4343	{
4344	struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
4345	unsigned int tnl_hlen = skb_tnl_header_len(inner_skb: skb);
4346	unsigned int delta_truesize = `0`;
4347	unsigned int delta_len = `0`;
4348	struct sk_buff *tail = NULL;
4349	struct sk_buff nskb, tmp;
4350	int len_diff, err;
4351
4352	skb_push(skb, -skb_network_offset(skb) + offset);
4353
4354	/ Ensure the head is writeable before touching the shared info /
4355	err = skb_unclone(skb, GFP_ATOMIC);
4356	if (err)
4357	goto err_linearize;
4358
4359	skb_shinfo(skb)->frag_list = NULL;
4360
4361	while (list_skb) {
4362	nskb = list_skb;
4363	list_skb = list_skb->next;
4364
4365	err = `0`;
4366	delta_truesize += nskb->truesize;
4367	if (skb_shared(skb: nskb)) {
4368	tmp = skb_clone(nskb, GFP_ATOMIC);
4369	if (tmp) {
4370	consume_skb(nskb);
4371	nskb = tmp;
4372	err = skb_unclone(skb: nskb, GFP_ATOMIC);
4373	} else {
4374	err = -ENOMEM;
4375	}
4376	}
4377
4378	if (!tail)
4379	skb->next = nskb;
4380	else
4381	tail->next = nskb;
4382
4383	if (unlikely(err)) {
4384	nskb->next = list_skb;
4385	goto err_linearize;
4386	}
4387
4388	tail = nskb;
4389
4390	delta_len += nskb->len;
4391
4392	skb_push(nskb, -skb_network_offset(skb: nskb) + offset);
4393
4394	skb_release_head_state(skb: nskb);
4395	len_diff = skb_network_header_len(skb: nskb) - skb_network_header_len(skb);
4396	__copy_skb_header(new: nskb, old: skb);
4397
4398	skb_headers_offset_update(nskb, skb_headroom(skb: nskb) - skb_headroom(skb));
4399	nskb->transport_header += len_diff;
4400	skb_copy_from_linear_data_offset(skb, offset: -tnl_hlen,
4401	to: nskb->data - tnl_hlen,
4402	len: offset + tnl_hlen);
4403
4404	if (skb_needs_linearize(skb: nskb, features) &&
4405	__skb_linearize(skb: nskb))
4406	goto err_linearize;
4407	}
4408
4409	skb->truesize = skb->truesize - delta_truesize;
4410	skb->data_len = skb->data_len - delta_len;
4411	skb->len = skb->len - delta_len;
4412
4413	skb_gso_reset(skb);
4414
4415	skb->prev = tail;
4416
4417	if (skb_needs_linearize(skb, features) &&
4418	__skb_linearize(skb))
4419	goto err_linearize;
4420
4421	skb_get(skb);
4422
4423	return skb;
4424
4425	err_linearize:
4426	kfree_skb_list(segs: skb->next);
4427	skb->next = NULL;
4428	return ERR_PTR(error: -ENOMEM);
4429	}
4430	EXPORT_SYMBOL_GPL(skb_segment_list);
4431
4432	/**
4433	* skb_segment - Perform protocol segmentation on skb.
4434	* @head_skb: buffer to segment
4435	* @features: features for the output path (see dev->features)
4436	*
4437	* This function performs segmentation on the given skb. It returns
4438	* a pointer to the first in a list of new skbs for the segments.
4439	* In case of error it returns ERR_PTR(err).
4440	*/
4441	struct sk_buff skb_segment(struct* sk_buff *head_skb,
4442	netdev_features_t features)
4443	{
4444	struct sk_buff *segs = NULL;
4445	struct sk_buff *tail = NULL;
4446	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
4447	unsigned int mss = skb_shinfo(head_skb)->gso_size;
4448	unsigned int doffset = head_skb->data - skb_mac_header(skb: head_skb);
4449	unsigned int offset = doffset;
4450	unsigned int tnl_hlen = skb_tnl_header_len(inner_skb: head_skb);
4451	unsigned int partial_segs = `0`;
4452	unsigned int headroom;
4453	unsigned int len = head_skb->len;
4454	struct sk_buff *frag_skb;
4455	skb_frag_t *frag;
4456	__be16 proto;
4457	bool csum, sg;
4458	int err = -ENOMEM;
4459	int i = `0`;
4460	int nfrags, pos;
4461
4462	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
4463	mss != GSO_BY_FRAGS && mss != skb_headlen(skb: head_skb)) {
4464	struct sk_buff *check_skb;
4465
4466	for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
4467	if (skb_headlen(skb: check_skb) && !check_skb->head_frag) {
4468	/ gso_size is untrusted, and we have a frag_list with*
4469	* a linear non head_frag item.
4470	*
4471	* If head_skb's headlen does not fit requested gso_size,
4472	* it means that the frag_list members do NOT terminate
4473	* on exact gso_size boundaries. Hence we cannot perform
4474	* skb_frag_t page sharing. Therefore we must fallback to
4475	* copying the frag_list skbs; we do so by disabling SG.
4476	*/
4477	features &= ~NETIF_F_SG;
4478	break;
4479	}
4480	}
4481	}
4482
4483	__skb_push(skb: head_skb, len: doffset);
4484	proto = skb_network_protocol(skb: head_skb, NULL);
4485	if (unlikely(!proto))
4486	return ERR_PTR(error: -EINVAL);
4487
4488	sg = !!(features & NETIF_F_SG);
4489	csum = !!can_checksum_protocol(features, protocol: proto);
4490
4491	if (sg && csum && (mss != GSO_BY_FRAGS)) {
4492	if (!(features & NETIF_F_GSO_PARTIAL)) {
4493	struct sk_buff *iter;
4494	unsigned int frag_len;
4495
4496	if (!list_skb \|\|
4497	!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
4498	goto normal;
4499
4500	/ If we get here then all the required*
4501	* GSO features except frag_list are supported.
4502	* Try to split the SKB to multiple GSO SKBs
4503	* with no frag_list.
4504	* Currently we can do that only when the buffers don't
4505	* have a linear part and all the buffers except
4506	* the last are of the same length.
4507	*/
4508	frag_len = list_skb->len;
4509	skb_walk_frags(head_skb, iter) {
4510	if (frag_len != iter->len && iter->next)
4511	goto normal;
4512	if (skb_headlen(skb: iter) && !iter->head_frag)
4513	goto normal;
4514
4515	len -= iter->len;
4516	}
4517
4518	if (len != frag_len)
4519	goto normal;
4520	}
4521
4522	/ GSO partial only requires that we trim off any excess that*
4523	* doesn't fit into an MSS sized block, so take care of that
4524	* now.
4525	*/
4526	partial_segs = len / mss;
4527	if (partial_segs > `1`)
4528	mss *= partial_segs;
4529	else
4530	partial_segs = `0`;
4531	}
4532
4533	normal:
4534	headroom = skb_headroom(skb: head_skb);
4535	pos = skb_headlen(skb: head_skb);
4536
4537	if (skb_orphan_frags(skb: head_skb, GFP_ATOMIC))
4538	return ERR_PTR(error: -ENOMEM);
4539
4540	nfrags = skb_shinfo(head_skb)->nr_frags;
4541	frag = skb_shinfo(head_skb)->frags;
4542	frag_skb = head_skb;
4543
4544	do {
4545	struct sk_buff *nskb;
4546	skb_frag_t *nskb_frag;
4547	int hsize;
4548	int size;
4549
4550	if (unlikely(mss == GSO_BY_FRAGS)) {
4551	len = list_skb->len;
4552	} else {
4553	len = head_skb->len - offset;
4554	if (len > mss)
4555	len = mss;
4556	}
4557
4558	hsize = skb_headlen(skb: head_skb) - offset;
4559
4560	if (hsize <= `0` && i >= nfrags && skb_headlen(skb: list_skb) &&
4561	(skb_headlen(skb: list_skb) == len \|\| sg)) {
4562	BUG_ON(skb_headlen(list_skb) > len);
4563
4564	nskb = skb_clone(list_skb, GFP_ATOMIC);
4565	if (unlikely(!nskb))
4566	goto err;
4567
4568	i = `0`;
4569	nfrags = skb_shinfo(list_skb)->nr_frags;
4570	frag = skb_shinfo(list_skb)->frags;
4571	frag_skb = list_skb;
4572	pos += skb_headlen(skb: list_skb);
4573
4574	while (pos < offset + len) {
4575	BUG_ON(i >= nfrags);
4576
4577	size = skb_frag_size(frag);
4578	if (pos + size > offset + len)
4579	break;
4580
4581	i++;
4582	pos += size;
4583	frag++;
4584	}
4585
4586	list_skb = list_skb->next;
4587
4588	if (unlikely(pskb_trim(nskb, len))) {
4589	kfree_skb(skb: nskb);
4590	goto err;
4591	}
4592
4593	hsize = skb_end_offset(skb: nskb);
4594	if (skb_cow_head(skb: nskb, headroom: doffset + headroom)) {
4595	kfree_skb(skb: nskb);
4596	goto err;
4597	}
4598
4599	nskb->truesize += skb_end_offset(skb: nskb) - hsize;
4600	skb_release_head_state(skb: nskb);
4601	__skb_push(skb: nskb, len: doffset);
4602	} else {
4603	if (hsize < `0`)
4604	hsize = `0`;
4605	if (hsize > len \|\| !sg)
4606	hsize = len;
4607
4608	nskb = __alloc_skb(hsize + doffset + headroom,
4609	GFP_ATOMIC, skb_alloc_rx_flag(skb: head_skb),
4610	NUMA_NO_NODE);
4611
4612	if (unlikely(!nskb))
4613	goto err;
4614
4615	skb_reserve(skb: nskb, len: headroom);
4616	__skb_put(skb: nskb, len: doffset);
4617	}
4618
4619	if (segs)
4620	tail->next = nskb;
4621	else
4622	segs = nskb;
4623	tail = nskb;
4624
4625	__copy_skb_header(new: nskb, old: head_skb);
4626
4627	skb_headers_offset_update(nskb, skb_headroom(skb: nskb) - headroom);
4628	skb_reset_mac_len(skb: nskb);
4629
4630	skb_copy_from_linear_data_offset(skb: head_skb, offset: -tnl_hlen,
4631	to: nskb->data - tnl_hlen,
4632	len: doffset + tnl_hlen);
4633
4634	if (nskb->len == len + doffset)
4635	goto perform_csum_check;
4636
4637	if (!sg) {
4638	if (!csum) {
4639	if (!nskb->remcsum_offload)
4640	nskb->ip_summed = CHECKSUM_NONE;
4641	SKB_GSO_CB(nskb)->csum =
4642	skb_copy_and_csum_bits(head_skb, offset,
4643	skb_put(nskb,
4644	len),
4645	len);
4646	SKB_GSO_CB(nskb)->csum_start =
4647	skb_headroom(skb: nskb) + doffset;
4648	} else {
4649	if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
4650	goto err;
4651	}
4652	continue;
4653	}
4654
4655	nskb_frag = skb_shinfo(nskb)->frags;
4656
4657	skb_copy_from_linear_data_offset(skb: head_skb, offset,
4658	to: skb_put(nskb, hsize), len: hsize);
4659
4660	skb_shinfo(nskb)->flags \|= skb_shinfo(head_skb)->flags &
4661	SKBFL_SHARED_FRAG;
4662
4663	if (skb_zerocopy_clone(nskb, orig: frag_skb, GFP_ATOMIC))
4664	goto err;
4665
4666	while (pos < offset + len) {
4667	if (i >= nfrags) {
4668	if (skb_orphan_frags(skb: list_skb, GFP_ATOMIC) \|\|
4669	skb_zerocopy_clone(nskb, orig: list_skb,
4670	GFP_ATOMIC))
4671	goto err;
4672
4673	i = `0`;
4674	nfrags = skb_shinfo(list_skb)->nr_frags;
4675	frag = skb_shinfo(list_skb)->frags;
4676	frag_skb = list_skb;
4677	if (!skb_headlen(skb: list_skb)) {
4678	BUG_ON(!nfrags);
4679	} else {
4680	BUG_ON(!list_skb->head_frag);
4681
4682	/ to make room for head_frag. /
4683	i--;
4684	frag--;
4685	}
4686
4687	list_skb = list_skb->next;
4688	}
4689
4690	if (unlikely(skb_shinfo(nskb)->nr_frags >=
4691	MAX_SKB_FRAGS)) {
4692	net_warn_ratelimited(
4693	"skb_segment: too many frags: %u %u\n",
4694	pos, mss);
4695	err = -EINVAL;
4696	goto err;
4697	}
4698
4699	nskb_frag = (i < `0`) ? skb_head_frag_to_page_desc(frag_skb) : frag;
4700	__skb_frag_ref(frag: nskb_frag);
4701	size = skb_frag_size(frag: nskb_frag);
4702
4703	if (pos < offset) {
4704	skb_frag_off_add(frag: nskb_frag, delta: offset - pos);
4705	skb_frag_size_sub(frag: nskb_frag, delta: offset - pos);
4706	}
4707
4708	skb_shinfo(nskb)->nr_frags++;
4709
4710	if (pos + size <= offset + len) {
4711	i++;
4712	frag++;
4713	pos += size;
4714	} else {
4715	skb_frag_size_sub(frag: nskb_frag, delta: pos + size - (offset + len));
4716	goto skip_fraglist;
4717	}
4718
4719	nskb_frag++;
4720	}
4721
4722	skip_fraglist:
4723	nskb->data_len = len - hsize;
4724	nskb->len += nskb->data_len;
4725	nskb->truesize += nskb->data_len;
4726
4727	perform_csum_check:
4728	if (!csum) {
4729	if (skb_has_shared_frag(skb: nskb) &&
4730	__skb_linearize(skb: nskb))
4731	goto err;
4732
4733	if (!nskb->remcsum_offload)
4734	nskb->ip_summed = CHECKSUM_NONE;
4735	SKB_GSO_CB(nskb)->csum =
4736	skb_checksum(nskb, doffset,
4737	nskb->len - doffset, `0`);
4738	SKB_GSO_CB(nskb)->csum_start =
4739	skb_headroom(skb: nskb) + doffset;
4740	}
4741	} while ((offset += len) < head_skb->len);
4742
4743	/ Some callers want to get the end of the list.*
4744	* Put it in segs->prev to avoid walking the list.
4745	* (see validate_xmit_skb_list() for example)
4746	*/
4747	segs->prev = tail;
4748
4749	if (partial_segs) {
4750	struct sk_buff *iter;
4751	int type = skb_shinfo(head_skb)->gso_type;
4752	unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
4753
4754	/ Update type to add partial and then remove dodgy if set /
4755	type \|= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
4756	type &= ~SKB_GSO_DODGY;
4757
4758	/ Update GSO info and prepare to start updating headers on*
4759	* our way back down the stack of protocols.
4760	*/
4761	for (iter = segs; iter; iter = iter->next) {
4762	skb_shinfo(iter)->gso_size = gso_size;
4763	skb_shinfo(iter)->gso_segs = partial_segs;
4764	skb_shinfo(iter)->gso_type = type;
4765	SKB_GSO_CB(iter)->data_offset = skb_headroom(skb: iter) + doffset;
4766	}
4767
4768	if (tail->len - doffset <= gso_size)
4769	skb_shinfo(tail)->gso_size = `0`;
4770	else if (tail != segs)
4771	skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
4772	}
4773
4774	/ Following permits correct backpressure, for protocols*
4775	* using skb_set_owner_w().
4776	* Idea is to tranfert ownership from head_skb to last segment.
4777	*/
4778	if (head_skb->destructor == sock_wfree) {
4779	swap(tail->truesize, head_skb->truesize);
4780	swap(tail->destructor, head_skb->destructor);
4781	swap(tail->sk, head_skb->sk);
4782	}
4783	return segs;
4784
4785	err:
4786	kfree_skb_list(segs);
4787	return ERR_PTR(error: err);
4788	}
4789	EXPORT_SYMBOL_GPL(skb_segment);
4790
4791	#ifdef CONFIG_SKB_EXTENSIONS
4792	#define SKB_EXT_ALIGN_VALUE 8
4793	#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
4794
4795	static const u8 skb_ext_type_len[] = {
4796	#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4797	[SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
4798	#endif
4799	#ifdef CONFIG_XFRM
4800	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
4801	#endif
4802	#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4803	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
4804	#endif
4805	#if IS_ENABLED(CONFIG_MPTCP)
4806	[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
4807	#endif
4808	#if IS_ENABLED(CONFIG_MCTP_FLOWS)
4809	[SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
4810	#endif
4811	};
4812
4813	static __always_inline unsigned int skb_ext_total_length(void)
4814	{
4815	unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext);
4816	int i;
4817
4818	for (i = `0`; i < ARRAY_SIZE(skb_ext_type_len); i++)
4819	l += skb_ext_type_len[i];
4820
4821	return l;
4822	}
4823
4824	static void skb_extensions_init(void)
4825	{
4826	BUILD_BUG_ON(SKB_EXT_NUM >= `8`);
4827	BUILD_BUG_ON(skb_ext_total_length() > `255`);
4828
4829	skbuff_ext_cache = kmem_cache_create(name: "skbuff_ext_cache",
4830	SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
4831	align: `0`,
4832	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
4833	NULL);
4834	}
4835	#else
4836	static void skb_extensions_init(void) {}
4837	#endif
4838
4839	/ The SKB kmem_cache slab is critical for network performance. Never*
4840	* merge/alias the slab with similar sized objects. This avoids fragmentation
4841	* that hurts performance of kmem_cache_{alloc,free}_bulk APIs.
4842	*/
4843	#ifndef CONFIG_SLUB_TINY
4844	#define FLAG_SKB_NO_MERGE SLAB_NO_MERGE
4845	#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */
4846	#define FLAG_SKB_NO_MERGE 0
4847	#endif
4848
4849	void __init skb_init(void)
4850	{
4851	skbuff_cache = kmem_cache_create_usercopy(name: "skbuff_head_cache",
4852	size: sizeof(struct sk_buff),
4853	align: `0`,
4854	SLAB_HWCACHE_ALIGN\|SLAB_PANIC\|
4855	FLAG_SKB_NO_MERGE,
4856	offsetof(struct sk_buff, cb),
4857	sizeof_field(struct sk_buff, cb),
4858	NULL);
4859	skbuff_fclone_cache = kmem_cache_create(name: "skbuff_fclone_cache",
4860	size: sizeof(struct sk_buff_fclones),
4861	align: `0`,
4862	SLAB_HWCACHE_ALIGN\|SLAB_PANIC,
4863	NULL);
4864	/ usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.*
4865	* struct skb_shared_info is located at the end of skb->head,
4866	* and should not be copied to/from user.
4867	*/
4868	skb_small_head_cache = kmem_cache_create_usercopy(name: "skbuff_small_head",
4869	SKB_SMALL_HEAD_CACHE_SIZE,
4870	align: `0`,
4871	SLAB_HWCACHE_ALIGN \| SLAB_PANIC,
4872	useroffset: `0`,
4873	SKB_SMALL_HEAD_HEADROOM,
4874	NULL);
4875	skb_extensions_init();
4876	}
4877
4878	static int
4879	__skb_to_sgvec(struct sk_buff skb, struct* scatterlist sg, int* offset, int len,
4880	unsigned int recursion_level)
4881	{
4882	int start = skb_headlen(skb);
4883	int i, copy = start - offset;
4884	struct sk_buff *frag_iter;
4885	int elt = `0`;
4886
4887	if (unlikely(recursion_level >= `24`))
4888	return -EMSGSIZE;
4889
4890	if (copy > `0`) {
4891	if (copy > len)
4892	copy = len;
4893	sg_set_buf(sg, buf: skb->data + offset, buflen: copy);
4894	elt++;
4895	if ((len -= copy) == `0`)
4896	return elt;
4897	offset += copy;
4898	}
4899
4900	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++) {
4901	int end;
4902
4903	WARN_ON(start > offset + len);
4904
4905	end = start + skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
4906	if ((copy = end - offset) > `0`) {
4907	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
4908	if (unlikely(elt && sg_is_last(&sg[elt - `1`])))
4909	return -EMSGSIZE;
4910
4911	if (copy > len)
4912	copy = len;
4913	sg_set_page(sg: &sg[elt], page: skb_frag_page(frag), len: copy,
4914	offset: skb_frag_off(frag) + offset - start);
4915	elt++;
4916	if (!(len -= copy))
4917	return elt;
4918	offset += copy;
4919	}
4920	start = end;
4921	}
4922
4923	skb_walk_frags(skb, frag_iter) {
4924	int end, ret;
4925
4926	WARN_ON(start > offset + len);
4927
4928	end = start + frag_iter->len;
4929	if ((copy = end - offset) > `0`) {
4930	if (unlikely(elt && sg_is_last(&sg[elt - `1`])))
4931	return -EMSGSIZE;
4932
4933	if (copy > len)
4934	copy = len;
4935	ret = __skb_to_sgvec(skb: frag_iter, sg: sg+elt, offset: offset - start,
4936	len: copy, recursion_level: recursion_level + `1`);
4937	if (unlikely(ret < `0`))
4938	return ret;
4939	elt += ret;
4940	if ((len -= copy) == `0`)
4941	return elt;
4942	offset += copy;
4943	}
4944	start = end;
4945	}
4946	BUG_ON(len);
4947	return elt;
4948	}
4949
4950	/**
4951	* skb_to_sgvec - Fill a scatter-gather list from a socket buffer
4952	* @skb: Socket buffer containing the buffers to be mapped
4953	* @sg: The scatter-gather list to map into
4954	* @offset: The offset into the buffer's contents to start mapping
4955	* @len: Length of buffer space to be mapped
4956	*
4957	* Fill the specified scatter-gather list with mappings/pointers into a
4958	* region of the buffer space attached to a socket buffer. Returns either
4959	* the number of scatterlist items used, or -EMSGSIZE if the contents
4960	* could not fit.
4961	*/
4962	int skb_to_sgvec(struct sk_buff skb, struct* scatterlist sg, int* offset, int len)
4963	{
4964	int nsg = __skb_to_sgvec(skb, sg, offset, len, recursion_level: `0`);
4965
4966	if (nsg <= `0`)
4967	return nsg;
4968
4969	sg_mark_end(sg: &sg[nsg - `1`]);
4970
4971	return nsg;
4972	}
4973	EXPORT_SYMBOL_GPL(skb_to_sgvec);
4974
4975	/ As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given*
4976	* sglist without mark the sg which contain last skb data as the end.
4977	* So the caller can mannipulate sg list as will when padding new data after
4978	* the first call without calling sg_unmark_end to expend sg list.
4979	*
4980	* Scenario to use skb_to_sgvec_nomark:
4981	* 1. sg_init_table
4982	* 2. skb_to_sgvec_nomark(payload1)
4983	* 3. skb_to_sgvec_nomark(payload2)
4984	*
4985	* This is equivalent to:
4986	* 1. sg_init_table
4987	* 2. skb_to_sgvec(payload1)
4988	* 3. sg_unmark_end
4989	* 4. skb_to_sgvec(payload2)
4990	*
4991	* When mapping mutilple payload conditionally, skb_to_sgvec_nomark
4992	* is more preferable.
4993	*/
4994	int skb_to_sgvec_nomark(struct sk_buff skb, struct* scatterlist *sg,
4995	int offset, int len)
4996	{
4997	return __skb_to_sgvec(skb, sg, offset, len, recursion_level: `0`);
4998	}
4999	EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
5000
5001
5002
5003	/**
5004	* skb_cow_data - Check that a socket buffer's data buffers are writable
5005	* @skb: The socket buffer to check.
5006	* @tailbits: Amount of trailing space to be added
5007	* @trailer: Returned pointer to the skb where the @tailbits space begins
5008	*
5009	* Make sure that the data buffers attached to a socket buffer are
5010	* writable. If they are not, private copies are made of the data buffers
5011	* and the socket buffer is set to use these instead.
5012	*
5013	* If @tailbits is given, make sure that there is space to write @tailbits
5014	* bytes of data beyond current end of socket buffer. @trailer will be
5015	* set to point to the skb in which this space begins.
5016	*
5017	* The number of scatterlist elements required to completely map the
5018	* COW'd and extended socket buffer will be returned.
5019	*/
5020	int skb_cow_data(struct sk_buff skb, int* tailbits, struct sk_buff **trailer)
5021	{
5022	int copyflag;
5023	int elt;
5024	struct sk_buff skb1, *skb_p;
5025
5026	/ If skb is cloned or its head is paged, reallocate*
5027	* head pulling out all the pages (pages are considered not writable
5028	* at the moment even if they are anonymous).
5029	*/
5030	if ((skb_cloned(skb) \|\| skb_shinfo(skb)->nr_frags) &&
5031	!__pskb_pull_tail(skb, __skb_pagelen(skb)))
5032	return -ENOMEM;
5033
5034	/ Easy case. Most of packets will go this way. /
5035	if (!skb_has_frag_list(skb)) {
5036	/ A little of trouble, not enough of space for trailer.*
5037	* This should not happen, when stack is tuned to generate
5038	* good frames. OK, on miss we reallocate and reserve even more
5039	* space, 128 bytes is fair. */
5040
5041	if (skb_tailroom(skb) < tailbits &&
5042	pskb_expand_head(skb, `0`, tailbits-skb_tailroom(skb)+`128`, GFP_ATOMIC))
5043	return -ENOMEM;
5044
5045	/ Voila! /
5046	*trailer = skb;
5047	return `1`;
5048	}
5049
5050	/ Misery. We are in troubles, going to mincer fragments... /
5051
5052	elt = `1`;
5053	skb_p = &skb_shinfo(skb)->frag_list;
5054	copyflag = `0`;
5055
5056	while ((skb1 = *skb_p) != NULL) {
5057	int ntail = `0`;
5058
5059	/ The fragment is partially pulled by someone,*
5060	* this can happen on input. Copy it and everything
5061	* after it. */
5062
5063	if (skb_shared(skb: skb1))
5064	copyflag = `1`;
5065
5066	/ If the skb is the last, worry about trailer. /
5067
5068	if (skb1->next == NULL && tailbits) {
5069	if (skb_shinfo(skb1)->nr_frags \|\|
5070	skb_has_frag_list(skb: skb1) \|\|
5071	skb_tailroom(skb: skb1) < tailbits)
5072	ntail = tailbits + `128`;
5073	}
5074
5075	if (copyflag \|\|
5076	skb_cloned(skb: skb1) \|\|
5077	ntail \|\|
5078	skb_shinfo(skb1)->nr_frags \|\|
5079	skb_has_frag_list(skb: skb1)) {
5080	struct sk_buff *skb2;
5081
5082	/ Fuck, we are miserable poor guys... /
5083	if (ntail == `0`)
5084	skb2 = skb_copy(skb1, GFP_ATOMIC);
5085	else
5086	skb2 = skb_copy_expand(skb1,
5087	skb_headroom(skb: skb1),
5088	ntail,
5089	GFP_ATOMIC);
5090	if (unlikely(skb2 == NULL))
5091	return -ENOMEM;
5092
5093	if (skb1->sk)
5094	skb_set_owner_w(skb: skb2, sk: skb1->sk);
5095
5096	/ Looking around. Are we still alive?*
5097	* OK, link new skb, drop old one */
5098
5099	skb2->next = skb1->next;
5100	*skb_p = skb2;
5101	kfree_skb(skb: skb1);
5102	skb1 = skb2;
5103	}
5104	elt++;
5105	*trailer = skb1;
5106	skb_p = &skb1->next;
5107	}
5108
5109	return elt;
5110	}
5111	EXPORT_SYMBOL_GPL(skb_cow_data);
5112
5113	static void sock_rmem_free(struct sk_buff *skb)
5114	{
5115	struct sock *sk = skb->sk;
5116
5117	atomic_sub(i: skb->truesize, v: &sk->sk_rmem_alloc);
5118	}
5119
5120	static void skb_set_err_queue(struct sk_buff *skb)
5121	{
5122	/ pkt_type of skbs received on local sockets is never PACKET_OUTGOING.*
5123	* So, it is safe to (mis)use it to mark skbs on the error queue.
5124	*/
5125	skb->pkt_type = PACKET_OUTGOING;
5126	BUILD_BUG_ON(PACKET_OUTGOING == `0`);
5127	}
5128
5129	/*
5130	* Note: We dont mem charge error packets (no sk_forward_alloc changes)
5131	*/
5132	int sock_queue_err_skb(struct sock sk, struct* sk_buff *skb)
5133	{
5134	if (atomic_read(v: &sk->sk_rmem_alloc) + skb->truesize >=
5135	(unsigned int)READ_ONCE(sk->sk_rcvbuf))
5136	return -ENOMEM;
5137
5138	skb_orphan(skb);
5139	skb->sk = sk;
5140	skb->destructor = sock_rmem_free;
5141	atomic_add(i: skb->truesize, v: &sk->sk_rmem_alloc);
5142	skb_set_err_queue(skb);
5143
5144	/ before exiting rcu section, make sure dst is refcounted /
5145	skb_dst_force(skb);
5146
5147	skb_queue_tail(&sk->sk_error_queue, skb);
5148	if (!sock_flag(sk, flag: SOCK_DEAD))
5149	sk_error_report(sk);
5150	return `0`;
5151	}
5152	EXPORT_SYMBOL(sock_queue_err_skb);
5153
5154	static bool is_icmp_err_skb(const struct sk_buff *skb)
5155	{
5156	return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP \|\|
5157	SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
5158	}
5159
5160	struct sk_buff sock_dequeue_err_skb(struct* sock *sk)
5161	{
5162	struct sk_buff_head *q = &sk->sk_error_queue;
5163	struct sk_buff skb, skb_next = NULL;
5164	bool icmp_next = false;
5165	unsigned long flags;
5166
5167	if (skb_queue_empty_lockless(list: q))
5168	return NULL;
5169
5170	spin_lock_irqsave(&q->lock, flags);
5171	skb = __skb_dequeue(list: q);
5172	if (skb && (skb_next = skb_peek(list_: q))) {
5173	icmp_next = is_icmp_err_skb(skb: skb_next);
5174	if (icmp_next)
5175	sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
5176	}
5177	spin_unlock_irqrestore(lock: &q->lock, flags);
5178
5179	if (is_icmp_err_skb(skb) && !icmp_next)
5180	sk->sk_err = `0`;
5181
5182	if (skb_next)
5183	sk_error_report(sk);
5184
5185	return skb;
5186	}
5187	EXPORT_SYMBOL(sock_dequeue_err_skb);
5188
5189	/**
5190	* skb_clone_sk - create clone of skb, and take reference to socket
5191	* @skb: the skb to clone
5192	*
5193	* This function creates a clone of a buffer that holds a reference on
5194	* sk_refcnt. Buffers created via this function are meant to be
5195	* returned using sock_queue_err_skb, or free via kfree_skb.
5196	*
5197	* When passing buffers allocated with this function to sock_queue_err_skb
5198	* it is necessary to wrap the call with sock_hold/sock_put in order to
5199	* prevent the socket from being released prior to being enqueued on
5200	* the sk_error_queue.
5201	*/
5202	struct sk_buff skb_clone_sk(struct* sk_buff *skb)
5203	{
5204	struct sock *sk = skb->sk;
5205	struct sk_buff *clone;
5206
5207	if (!sk \|\| !refcount_inc_not_zero(r: &sk->sk_refcnt))
5208	return NULL;
5209
5210	clone = skb_clone(skb, GFP_ATOMIC);
5211	if (!clone) {
5212	sock_put(sk);
5213	return NULL;
5214	}
5215
5216	clone->sk = sk;
5217	clone->destructor = sock_efree;
5218
5219	return clone;
5220	}
5221	EXPORT_SYMBOL(skb_clone_sk);
5222
5223	static void __skb_complete_tx_timestamp(struct sk_buff *skb,
5224	struct sock *sk,
5225	int tstype,
5226	bool opt_stats)
5227	{
5228	struct sock_exterr_skb *serr;
5229	int err;
5230
5231	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
5232
5233	serr = SKB_EXT_ERR(skb);
5234	memset(serr, `0`, sizeof(*serr));
5235	serr->ee.ee_errno = ENOMSG;
5236	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
5237	serr->ee.ee_info = tstype;
5238	serr->opt_stats = opt_stats;
5239	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : `0`;
5240	if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
5241	serr->ee.ee_data = skb_shinfo(skb)->tskey;
5242	if (sk_is_tcp(sk))
5243	serr->ee.ee_data -= atomic_read(v: &sk->sk_tskey);
5244	}
5245
5246	err = sock_queue_err_skb(sk, skb);
5247
5248	if (err)
5249	kfree_skb(skb);
5250	}
5251
5252	static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
5253	{
5254	bool ret;
5255
5256	if (likely(READ_ONCE(sysctl_tstamp_allow_data) \|\| tsonly))
5257	return true;
5258
5259	read_lock_bh(&sk->sk_callback_lock);
5260	ret = sk->sk_socket && sk->sk_socket->file &&
5261	file_ns_capable(file: sk->sk_socket->file, ns: &init_user_ns, CAP_NET_RAW);
5262	read_unlock_bh(&sk->sk_callback_lock);
5263	return ret;
5264	}
5265
5266	void skb_complete_tx_timestamp(struct sk_buff *skb,
5267	struct skb_shared_hwtstamps *hwtstamps)
5268	{
5269	struct sock *sk = skb->sk;
5270
5271	if (!skb_may_tx_timestamp(sk, tsonly: false))
5272	goto err;
5273
5274	/ Take a reference to prevent skb_orphan() from freeing the socket,*
5275	* but only if the socket refcount is not zero.
5276	*/
5277	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
5278	skb_hwtstamps(skb) = hwtstamps;
5279	__skb_complete_tx_timestamp(skb, sk, tstype: SCM_TSTAMP_SND, opt_stats: false);
5280	sock_put(sk);
5281	return;
5282	}
5283
5284	err:
5285	kfree_skb(skb);
5286	}
5287	EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
5288
5289	void __skb_tstamp_tx(struct sk_buff *orig_skb,
5290	const struct sk_buff *ack_skb,
5291	struct skb_shared_hwtstamps *hwtstamps,
5292	struct sock sk, int* tstype)
5293	{
5294	struct sk_buff *skb;
5295	bool tsonly, opt_stats = false;
5296	u32 tsflags;
5297
5298	if (!sk)
5299	return;
5300
5301	tsflags = READ_ONCE(sk->sk_tsflags);
5302	if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
5303	skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
5304	return;
5305
5306	tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
5307	if (!skb_may_tx_timestamp(sk, tsonly))
5308	return;
5309
5310	if (tsonly) {
5311	#ifdef CONFIG_INET
5312	if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
5313	sk_is_tcp(sk)) {
5314	skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
5315	ack_skb);
5316	opt_stats = true;
5317	} else
5318	#endif
5319	skb = alloc_skb(size: `0`, GFP_ATOMIC);
5320	} else {
5321	skb = skb_clone(orig_skb, GFP_ATOMIC);
5322
5323	if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
5324	kfree_skb(skb);
5325	return;
5326	}
5327	}
5328	if (!skb)
5329	return;
5330
5331	if (tsonly) {
5332	skb_shinfo(skb)->tx_flags \|= skb_shinfo(orig_skb)->tx_flags &
5333	SKBTX_ANY_TSTAMP;
5334	skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
5335	}
5336
5337	if (hwtstamps)
5338	skb_hwtstamps(skb) = hwtstamps;
5339	else
5340	__net_timestamp(skb);
5341
5342	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
5343	}
5344	EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
5345
5346	void skb_tstamp_tx(struct sk_buff *orig_skb,
5347	struct skb_shared_hwtstamps *hwtstamps)
5348	{
5349	return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
5350	SCM_TSTAMP_SND);
5351	}
5352	EXPORT_SYMBOL_GPL(skb_tstamp_tx);
5353
5354	#ifdef CONFIG_WIRELESS
5355	void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
5356	{
5357	struct sock *sk = skb->sk;
5358	struct sock_exterr_skb *serr;
5359	int err = `1`;
5360
5361	skb->wifi_acked_valid = `1`;
5362	skb->wifi_acked = acked;
5363
5364	serr = SKB_EXT_ERR(skb);
5365	memset(serr, `0`, sizeof(*serr));
5366	serr->ee.ee_errno = ENOMSG;
5367	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
5368
5369	/ Take a reference to prevent skb_orphan() from freeing the socket,*
5370	* but only if the socket refcount is not zero.
5371	*/
5372	if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
5373	err = sock_queue_err_skb(sk, skb);
5374	sock_put(sk);
5375	}
5376	if (err)
5377	kfree_skb(skb);
5378	}
5379	EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
5380	#endif /* CONFIG_WIRELESS */
5381
5382	/**
5383	* skb_partial_csum_set - set up and verify partial csum values for packet
5384	* @skb: the skb to set
5385	* @start: the number of bytes after skb->data to start checksumming.
5386	* @off: the offset from start to place the checksum.
5387	*
5388	* For untrusted partially-checksummed packets, we need to make sure the values
5389	* for skb->csum_start and skb->csum_offset are valid so we don't oops.
5390	*
5391	* This function checks and sets those values and skb->ip_summed: if this
5392	* returns false you should drop the packet.
5393	*/
5394	bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
5395	{
5396	u32 csum_end = (u32)start + (u32)off + sizeof(__sum16);
5397	u32 csum_start = skb_headroom(skb) + (u32)start;
5398
5399	if (unlikely(csum_start >= U16_MAX \|\| csum_end > skb_headlen(skb))) {
5400	net_warn_ratelimited("bad partial csum: csum=%u/%u headroom=%u headlen=%u\n",
5401	start, off, skb_headroom(skb), skb_headlen(skb));
5402	return false;
5403	}
5404	skb->ip_summed = CHECKSUM_PARTIAL;
5405	skb->csum_start = csum_start;
5406	skb->csum_offset = off;
5407	skb->transport_header = csum_start;
5408	return true;
5409	}
5410	EXPORT_SYMBOL_GPL(skb_partial_csum_set);
5411
5412	static int skb_maybe_pull_tail(struct sk_buff skb, unsigned* int len,
5413	unsigned int max)
5414	{
5415	if (skb_headlen(skb) >= len)
5416	return `0`;
5417
5418	/ If we need to pullup then pullup to the max, so we*
5419	* won't need to do it again.
5420	*/
5421	if (max > skb->len)
5422	max = skb->len;
5423
5424	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
5425	return -ENOMEM;
5426
5427	if (skb_headlen(skb) < len)
5428	return -EPROTO;
5429
5430	return `0`;
5431	}
5432
5433	#define MAX_TCP_HDR_LEN (15 * 4)
5434
5435	static __sum16 skb_checksum_setup_ip(struct* sk_buff *skb,
5436	typeof(IPPROTO_IP) proto,
5437	unsigned int off)
5438	{
5439	int err;
5440
5441	switch (proto) {
5442	case IPPROTO_TCP:
5443	err = skb_maybe_pull_tail(skb, len: off + sizeof(struct tcphdr),
5444	max: off + MAX_TCP_HDR_LEN);
5445	if (!err && !skb_partial_csum_set(skb, off,
5446	offsetof(struct tcphdr,
5447	check)))
5448	err = -EPROTO;
5449	return err ? ERR_PTR(error: err) : &tcp_hdr(skb)->check;
5450
5451	case IPPROTO_UDP:
5452	err = skb_maybe_pull_tail(skb, len: off + sizeof(struct udphdr),
5453	max: off + sizeof(struct udphdr));
5454	if (!err && !skb_partial_csum_set(skb, off,
5455	offsetof(struct udphdr,
5456	check)))
5457	err = -EPROTO;
5458	return err ? ERR_PTR(error: err) : &udp_hdr(skb)->check;
5459	}
5460
5461	return ERR_PTR(error: -EPROTO);
5462	}
5463
5464	/ This value should be large enough to cover a tagged ethernet header plus*
5465	* maximally sized IP and TCP or UDP headers.
5466	*/
5467	#define MAX_IP_HDR_LEN 128
5468
5469	static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
5470	{
5471	unsigned int off;
5472	bool fragment;
5473	__sum16 *csum;
5474	int err;
5475
5476	fragment = false;
5477
5478	err = skb_maybe_pull_tail(skb,
5479	len: sizeof(struct iphdr),
5480	MAX_IP_HDR_LEN);
5481	if (err < `0`)
5482	goto out;
5483
5484	if (ip_is_fragment(iph: ip_hdr(skb)))
5485	fragment = true;
5486
5487	off = ip_hdrlen(skb);
5488
5489	err = -EPROTO;
5490
5491	if (fragment)
5492	goto out;
5493
5494	csum = skb_checksum_setup_ip(skb, proto: ip_hdr(skb)->protocol, off);
5495	if (IS_ERR(ptr: csum))
5496	return PTR_ERR(ptr: csum);
5497
5498	if (recalculate)
5499	*csum = ~csum_tcpudp_magic(saddr: ip_hdr(skb)->saddr,
5500	daddr: ip_hdr(skb)->daddr,
5501	len: skb->len - off,
5502	proto: ip_hdr(skb)->protocol, sum: `0`);
5503	err = `0`;
5504
5505	out:
5506	return err;
5507	}
5508
5509	/ This value should be large enough to cover a tagged ethernet header plus*
5510	* an IPv6 header, all options, and a maximal TCP or UDP header.
5511	*/
5512	#define MAX_IPV6_HDR_LEN 256
5513
5514	#define OPT_HDR(type, skb, off) \
5515	(type *)(skb_network_header(skb) + (off))
5516
5517	static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
5518	{
5519	int err;
5520	u8 nexthdr;
5521	unsigned int off;
5522	unsigned int len;
5523	bool fragment;
5524	bool done;
5525	__sum16 *csum;
5526
5527	fragment = false;
5528	done = false;
5529
5530	off = sizeof(struct ipv6hdr);
5531
5532	err = skb_maybe_pull_tail(skb, len: off, MAX_IPV6_HDR_LEN);
5533	if (err < `0`)
5534	goto out;
5535
5536	nexthdr = ipv6_hdr(skb)->nexthdr;
5537
5538	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
5539	while (off <= len && !done) {
5540	switch (nexthdr) {
5541	case IPPROTO_DSTOPTS:
5542	case IPPROTO_HOPOPTS:
5543	case IPPROTO_ROUTING: {
5544	struct ipv6_opt_hdr *hp;
5545
5546	err = skb_maybe_pull_tail(skb,
5547	len: off +
5548	sizeof(struct ipv6_opt_hdr),
5549	MAX_IPV6_HDR_LEN);
5550	if (err < `0`)
5551	goto out;
5552
5553	hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
5554	nexthdr = hp->nexthdr;
5555	off += ipv6_optlen(hp);
5556	break;
5557	}
5558	case IPPROTO_AH: {
5559	struct ip_auth_hdr *hp;
5560
5561	err = skb_maybe_pull_tail(skb,
5562	len: off +
5563	sizeof(struct ip_auth_hdr),
5564	MAX_IPV6_HDR_LEN);
5565	if (err < `0`)
5566	goto out;
5567
5568	hp = OPT_HDR(struct ip_auth_hdr, skb, off);
5569	nexthdr = hp->nexthdr;
5570	off += ipv6_authlen(hp);
5571	break;
5572	}
5573	case IPPROTO_FRAGMENT: {
5574	struct frag_hdr *hp;
5575
5576	err = skb_maybe_pull_tail(skb,
5577	len: off +
5578	sizeof(struct frag_hdr),
5579	MAX_IPV6_HDR_LEN);
5580	if (err < `0`)
5581	goto out;
5582
5583	hp = OPT_HDR(struct frag_hdr, skb, off);
5584
5585	if (hp->frag_off & htons(IP6_OFFSET \| IP6_MF))
5586	fragment = true;
5587
5588	nexthdr = hp->nexthdr;
5589	off += sizeof(struct frag_hdr);
5590	break;
5591	}
5592	default:
5593	done = true;
5594	break;
5595	}
5596	}
5597
5598	err = -EPROTO;
5599
5600	if (!done \|\| fragment)
5601	goto out;
5602
5603	csum = skb_checksum_setup_ip(skb, proto: nexthdr, off);
5604	if (IS_ERR(ptr: csum))
5605	return PTR_ERR(ptr: csum);
5606
5607	if (recalculate)
5608	*csum = ~csum_ipv6_magic(saddr: &ipv6_hdr(skb)->saddr,
5609	daddr: &ipv6_hdr(skb)->daddr,
5610	len: skb->len - off, proto: nexthdr, sum: `0`);
5611	err = `0`;
5612
5613	out:
5614	return err;
5615	}
5616
5617	/**
5618	* skb_checksum_setup - set up partial checksum offset
5619	* @skb: the skb to set up
5620	* @recalculate: if true the pseudo-header checksum will be recalculated
5621	*/
5622	int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
5623	{
5624	int err;
5625
5626	switch (skb->protocol) {
5627	case htons(ETH_P_IP):
5628	err = skb_checksum_setup_ipv4(skb, recalculate);
5629	break;
5630
5631	case htons(ETH_P_IPV6):
5632	err = skb_checksum_setup_ipv6(skb, recalculate);
5633	break;
5634
5635	default:
5636	err = -EPROTO;
5637	break;
5638	}
5639
5640	return err;
5641	}
5642	EXPORT_SYMBOL(skb_checksum_setup);
5643
5644	/**
5645	* skb_checksum_maybe_trim - maybe trims the given skb
5646	* @skb: the skb to check
5647	* @transport_len: the data length beyond the network header
5648	*
5649	* Checks whether the given skb has data beyond the given transport length.
5650	* If so, returns a cloned skb trimmed to this transport length.
5651	* Otherwise returns the provided skb. Returns NULL in error cases
5652	* (e.g. transport_len exceeds skb length or out-of-memory).
5653	*
5654	* Caller needs to set the skb transport header and free any returned skb if it
5655	* differs from the provided skb.
5656	*/
5657	static struct sk_buff skb_checksum_maybe_trim(struct* sk_buff *skb,
5658	unsigned int transport_len)
5659	{
5660	struct sk_buff *skb_chk;
5661	unsigned int len = skb_transport_offset(skb) + transport_len;
5662	int ret;
5663
5664	if (skb->len < len)
5665	return NULL;
5666	else if (skb->len == len)
5667	return skb;
5668
5669	skb_chk = skb_clone(skb, GFP_ATOMIC);
5670	if (!skb_chk)
5671	return NULL;
5672
5673	ret = pskb_trim_rcsum(skb: skb_chk, len);
5674	if (ret) {
5675	kfree_skb(skb: skb_chk);
5676	return NULL;
5677	}
5678
5679	return skb_chk;
5680	}
5681
5682	/**
5683	* skb_checksum_trimmed - validate checksum of an skb
5684	* @skb: the skb to check
5685	* @transport_len: the data length beyond the network header
5686	* @skb_chkf: checksum function to use
5687	*
5688	* Applies the given checksum function skb_chkf to the provided skb.
5689	* Returns a checked and maybe trimmed skb. Returns NULL on error.
5690	*
5691	* If the skb has data beyond the given transport length, then a
5692	* trimmed & cloned skb is checked and returned.
5693	*
5694	* Caller needs to set the skb transport header and free any returned skb if it
5695	* differs from the provided skb.
5696	*/
5697	struct sk_buff skb_checksum_trimmed(struct* sk_buff *skb,
5698	unsigned int transport_len,
5699	__sum16(skb_chkf)(struct* sk_buff *skb))
5700	{
5701	struct sk_buff *skb_chk;
5702	unsigned int offset = skb_transport_offset(skb);
5703	__sum16 ret;
5704
5705	skb_chk = skb_checksum_maybe_trim(skb, transport_len);
5706	if (!skb_chk)
5707	goto err;
5708
5709	if (!pskb_may_pull(skb: skb_chk, len: offset))
5710	goto err;
5711
5712	skb_pull_rcsum(skb_chk, offset);
5713	ret = skb_chkf(skb_chk);
5714	skb_push_rcsum(skb: skb_chk, len: offset);
5715
5716	if (ret)
5717	goto err;
5718
5719	return skb_chk;
5720
5721	err:
5722	if (skb_chk && skb_chk != skb)
5723	kfree_skb(skb: skb_chk);
5724
5725	return NULL;
5726
5727	}
5728	EXPORT_SYMBOL(skb_checksum_trimmed);
5729
5730	void __skb_warn_lro_forwarding(const struct sk_buff *skb)
5731	{
5732	net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
5733	skb->dev->name);
5734	}
5735	EXPORT_SYMBOL(__skb_warn_lro_forwarding);
5736
5737	void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
5738	{
5739	if (head_stolen) {
5740	skb_release_head_state(skb);
5741	kmem_cache_free(s: skbuff_cache, objp: skb);
5742	} else {
5743	__kfree_skb(skb);
5744	}
5745	}
5746	EXPORT_SYMBOL(kfree_skb_partial);
5747
5748	/**
5749	* skb_try_coalesce - try to merge skb to prior one
5750	* @to: prior buffer
5751	* @from: buffer to add
5752	* @fragstolen: pointer to boolean
5753	* @delta_truesize: how much more was allocated than was requested
5754	*/
5755	bool skb_try_coalesce(struct sk_buff to, struct* sk_buff *from,
5756	bool fragstolen, int* *delta_truesize)
5757	{
5758	struct skb_shared_info to_shinfo, from_shinfo;
5759	int i, delta, len = from->len;
5760
5761	*fragstolen = false;
5762
5763	if (skb_cloned(skb: to))
5764	return false;
5765
5766	/ In general, avoid mixing page_pool and non-page_pool allocated*
5767	* pages within the same SKB. Additionally avoid dealing with clones
5768	* with page_pool pages, in case the SKB is using page_pool fragment
5769	* references (page_pool_alloc_frag()). Since we only take full page
5770	* references for cloned SKBs at the moment that would result in
5771	* inconsistent reference counts.
5772	* In theory we could take full references if @from is cloned and
5773	* !@to->pp_recycle but its tricky (due to potential race with
5774	* the clone disappearing) and rare, so not worth dealing with.
5775	*/
5776	if (to->pp_recycle != from->pp_recycle \|\|
5777	(from->pp_recycle && skb_cloned(skb: from)))
5778	return false;
5779
5780	if (len <= skb_tailroom(skb: to)) {
5781	if (len)
5782	BUG_ON(skb_copy_bits(from, `0`, skb_put(to, len), len));
5783	*delta_truesize = `0`;
5784	return true;
5785	}
5786
5787	to_shinfo = skb_shinfo(to);
5788	from_shinfo = skb_shinfo(from);
5789	if (to_shinfo->frag_list \|\| from_shinfo->frag_list)
5790	return false;
5791	if (skb_zcopy(skb: to) \|\| skb_zcopy(skb: from))
5792	return false;
5793
5794	if (skb_headlen(skb: from) != `0`) {
5795	struct page *page;
5796	unsigned int offset;
5797
5798	if (to_shinfo->nr_frags +
5799	from_shinfo->nr_frags >= MAX_SKB_FRAGS)
5800	return false;
5801
5802	if (skb_head_is_locked(skb: from))
5803	return false;
5804
5805	delta = from->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
5806
5807	page = virt_to_head_page(x: from->head);
5808	offset = from->data - (unsigned char *)page_address(page);
5809
5810	skb_fill_page_desc(skb: to, i: to_shinfo->nr_frags,
5811	page, off: offset, size: skb_headlen(skb: from));
5812	*fragstolen = true;
5813	} else {
5814	if (to_shinfo->nr_frags +
5815	from_shinfo->nr_frags > MAX_SKB_FRAGS)
5816	return false;
5817
5818	delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
5819	}
5820
5821	WARN_ON_ONCE(delta < len);
5822
5823	memcpy(to_shinfo->frags + to_shinfo->nr_frags,
5824	from_shinfo->frags,
5825	from_shinfo->nr_frags * sizeof(skb_frag_t));
5826	to_shinfo->nr_frags += from_shinfo->nr_frags;
5827
5828	if (!skb_cloned(skb: from))
5829	from_shinfo->nr_frags = `0`;
5830
5831	/ if the skb is not cloned this does nothing*
5832	* since we set nr_frags to 0.
5833	*/
5834	for (i = `0`; i < from_shinfo->nr_frags; i++)
5835	__skb_frag_ref(frag: &from_shinfo->frags[i]);
5836
5837	to->truesize += delta;
5838	to->len += len;
5839	to->data_len += len;
5840
5841	*delta_truesize = delta;
5842	return true;
5843	}
5844	EXPORT_SYMBOL(skb_try_coalesce);
5845
5846	/**
5847	* skb_scrub_packet - scrub an skb
5848	*
5849	* @skb: buffer to clean
5850	* @xnet: packet is crossing netns
5851	*
5852	* skb_scrub_packet can be used after encapsulating or decapsulting a packet
5853	* into/from a tunnel. Some information have to be cleared during these
5854	* operations.
5855	* skb_scrub_packet can also be used to clean a skb before injecting it in
5856	* another namespace (@xnet == true). We have to clear all information in the
5857	* skb that could impact namespace isolation.
5858	*/
5859	void skb_scrub_packet(struct sk_buff *skb, bool xnet)
5860	{
5861	skb->pkt_type = PACKET_HOST;
5862	skb->skb_iif = `0`;
5863	skb->ignore_df = `0`;
5864	skb_dst_drop(skb);
5865	skb_ext_reset(skb);
5866	nf_reset_ct(skb);
5867	nf_reset_trace(skb);
5868
5869	#ifdef CONFIG_NET_SWITCHDEV
5870	skb->offload_fwd_mark = `0`;
5871	skb->offload_l3_fwd_mark = `0`;
5872	#endif
5873
5874	if (!xnet)
5875	return;
5876
5877	ipvs_reset(skb);
5878	skb->mark = `0`;
5879	skb_clear_tstamp(skb);
5880	}
5881	EXPORT_SYMBOL_GPL(skb_scrub_packet);
5882
5883	static struct sk_buff skb_reorder_vlan_header(struct* sk_buff *skb)
5884	{
5885	int mac_len, meta_len;
5886	void *meta;
5887
5888	if (skb_cow(skb, headroom: skb_headroom(skb)) < `0`) {
5889	kfree_skb(skb);
5890	return NULL;
5891	}
5892
5893	mac_len = skb->data - skb_mac_header(skb);
5894	if (likely(mac_len > VLAN_HLEN + ETH_TLEN)) {
5895	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
5896	mac_len - VLAN_HLEN - ETH_TLEN);
5897	}
5898
5899	meta_len = skb_metadata_len(skb);
5900	if (meta_len) {
5901	meta = skb_metadata_end(skb) - meta_len;
5902	memmove(meta + VLAN_HLEN, meta, meta_len);
5903	}
5904
5905	skb->mac_header += VLAN_HLEN;
5906	return skb;
5907	}
5908
5909	struct sk_buff skb_vlan_untag(struct* sk_buff *skb)
5910	{
5911	struct vlan_hdr *vhdr;
5912	u16 vlan_tci;
5913
5914	if (unlikely(skb_vlan_tag_present(skb))) {
5915	/ vlan_tci is already set-up so leave this for another time /
5916	return skb;
5917	}
5918
5919	skb = skb_share_check(skb, GFP_ATOMIC);
5920	if (unlikely(!skb))
5921	goto err_free;
5922	/ We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). /
5923	if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
5924	goto err_free;
5925
5926	vhdr = (struct vlan_hdr *)skb->data;
5927	vlan_tci = ntohs(vhdr->h_vlan_TCI);
5928	__vlan_hwaccel_put_tag(skb, vlan_proto: skb->protocol, vlan_tci);
5929
5930	skb_pull_rcsum(skb, VLAN_HLEN);
5931	vlan_set_encap_proto(skb, vhdr);
5932
5933	skb = skb_reorder_vlan_header(skb);
5934	if (unlikely(!skb))
5935	goto err_free;
5936
5937	skb_reset_network_header(skb);
5938	if (!skb_transport_header_was_set(skb))
5939	skb_reset_transport_header(skb);
5940	skb_reset_mac_len(skb);
5941
5942	return skb;
5943
5944	err_free:
5945	kfree_skb(skb);
5946	return NULL;
5947	}
5948	EXPORT_SYMBOL(skb_vlan_untag);
5949
5950	int skb_ensure_writable(struct sk_buff skb, unsigned* int write_len)
5951	{
5952	if (!pskb_may_pull(skb, len: write_len))
5953	return -ENOMEM;
5954
5955	if (!skb_cloned(skb) \|\| skb_clone_writable(skb, len: write_len))
5956	return `0`;
5957
5958	return pskb_expand_head(skb, `0`, `0`, GFP_ATOMIC);
5959	}
5960	EXPORT_SYMBOL(skb_ensure_writable);
5961
5962	/ remove VLAN header from packet and update csum accordingly.*
5963	* expects a non skb_vlan_tag_present skb with a vlan tag payload
5964	*/
5965	int __skb_vlan_pop(struct sk_buff skb, u16 vlan_tci)
5966	{
5967	int offset = skb->data - skb_mac_header(skb);
5968	int err;
5969
5970	if (WARN_ONCE(offset,
5971	"__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
5972	offset)) {
5973	return -EINVAL;
5974	}
5975
5976	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
5977	if (unlikely(err))
5978	return err;
5979
5980	skb_postpull_rcsum(skb, start: skb->data + (`2` * ETH_ALEN), VLAN_HLEN);
5981
5982	vlan_remove_tag(skb, vlan_tci);
5983
5984	skb->mac_header += VLAN_HLEN;
5985
5986	if (skb_network_offset(skb) < ETH_HLEN)
5987	skb_set_network_header(skb, ETH_HLEN);
5988
5989	skb_reset_mac_len(skb);
5990
5991	return err;
5992	}
5993	EXPORT_SYMBOL(__skb_vlan_pop);
5994
5995	/ Pop a vlan tag either from hwaccel or from payload.*
5996	* Expects skb->data at mac header.
5997	*/
5998	int skb_vlan_pop(struct sk_buff *skb)
5999	{
6000	u16 vlan_tci;
6001	__be16 vlan_proto;
6002	int err;
6003
6004	if (likely(skb_vlan_tag_present(skb))) {
6005	__vlan_hwaccel_clear_tag(skb);
6006	} else {
6007	if (unlikely(!eth_type_vlan(skb->protocol)))
6008	return `0`;
6009
6010	err = __skb_vlan_pop(skb, &vlan_tci);
6011	if (err)
6012	return err;
6013	}
6014	/ move next vlan tag to hw accel tag /
6015	if (likely(!eth_type_vlan(skb->protocol)))
6016	return `0`;
6017
6018	vlan_proto = skb->protocol;
6019	err = __skb_vlan_pop(skb, &vlan_tci);
6020	if (unlikely(err))
6021	return err;
6022
6023	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
6024	return `0`;
6025	}
6026	EXPORT_SYMBOL(skb_vlan_pop);
6027
6028	/ Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).*
6029	* Expects skb->data at mac header.
6030	*/
6031	int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
6032	{
6033	if (skb_vlan_tag_present(skb)) {
6034	int offset = skb->data - skb_mac_header(skb);
6035	int err;
6036
6037	if (WARN_ONCE(offset,
6038	"skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
6039	offset)) {
6040	return -EINVAL;
6041	}
6042
6043	err = __vlan_insert_tag(skb, vlan_proto: skb->vlan_proto,
6044	skb_vlan_tag_get(skb));
6045	if (err)
6046	return err;
6047
6048	skb->protocol = skb->vlan_proto;
6049	skb->mac_len += VLAN_HLEN;
6050
6051	skb_postpush_rcsum(skb, start: skb->data + (`2` * ETH_ALEN), VLAN_HLEN);
6052	}
6053	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
6054	return `0`;
6055	}
6056	EXPORT_SYMBOL(skb_vlan_push);
6057
6058	/**
6059	* skb_eth_pop() - Drop the Ethernet header at the head of a packet
6060	*
6061	* @skb: Socket buffer to modify
6062	*
6063	* Drop the Ethernet header of @skb.
6064	*
6065	* Expects that skb->data points to the mac header and that no VLAN tags are
6066	* present.
6067	*
6068	* Returns 0 on success, -errno otherwise.
6069	*/
6070	int skb_eth_pop(struct sk_buff *skb)
6071	{
6072	if (!pskb_may_pull(skb, ETH_HLEN) \|\| skb_vlan_tagged(skb) \|\|
6073	skb_network_offset(skb) < ETH_HLEN)
6074	return -EPROTO;
6075
6076	skb_pull_rcsum(skb, ETH_HLEN);
6077	skb_reset_mac_header(skb);
6078	skb_reset_mac_len(skb);
6079
6080	return `0`;
6081	}
6082	EXPORT_SYMBOL(skb_eth_pop);
6083
6084	/**
6085	* skb_eth_push() - Add a new Ethernet header at the head of a packet
6086	*
6087	* @skb: Socket buffer to modify
6088	* @dst: Destination MAC address of the new header
6089	* @src: Source MAC address of the new header
6090	*
6091	* Prepend @skb with a new Ethernet header.
6092	*
6093	* Expects that skb->data points to the mac header, which must be empty.
6094	*
6095	* Returns 0 on success, -errno otherwise.
6096	*/
6097	int skb_eth_push(struct sk_buff skb, const* unsigned char *dst,
6098	const unsigned char *src)
6099	{
6100	struct ethhdr *eth;
6101	int err;
6102
6103	if (skb_network_offset(skb) \|\| skb_vlan_tag_present(skb))
6104	return -EPROTO;
6105
6106	err = skb_cow_head(skb, headroom: sizeof(*eth));
6107	if (err < `0`)
6108	return err;
6109
6110	skb_push(skb, sizeof(*eth));
6111	skb_reset_mac_header(skb);
6112	skb_reset_mac_len(skb);
6113
6114	eth = eth_hdr(skb);
6115	ether_addr_copy(dst: eth->h_dest, src: dst);
6116	ether_addr_copy(dst: eth->h_source, src);
6117	eth->h_proto = skb->protocol;
6118
6119	skb_postpush_rcsum(skb, start: eth, len: sizeof(*eth));
6120
6121	return `0`;
6122	}
6123	EXPORT_SYMBOL(skb_eth_push);
6124
6125	/ Update the ethertype of hdr and the skb csum value if required. /
6126	static void skb_mod_eth_type(struct sk_buff skb, struct* ethhdr *hdr,
6127	__be16 ethertype)
6128	{
6129	if (skb->ip_summed == CHECKSUM_COMPLETE) {
6130	__be16 diff[] = { ~hdr->h_proto, ethertype };
6131
6132	skb->csum = csum_partial(buff: (char )diff, len: sizeof*(diff), sum: skb->csum);
6133	}
6134
6135	hdr->h_proto = ethertype;
6136	}
6137
6138	/**
6139	* skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
6140	* the packet
6141	*
6142	* @skb: buffer
6143	* @mpls_lse: MPLS label stack entry to push
6144	* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
6145	* @mac_len: length of the MAC header
6146	* @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
6147	* ethernet
6148	*
6149	* Expects skb->data at mac header.
6150	*
6151	* Returns 0 on success, -errno otherwise.
6152	*/
6153	int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
6154	int mac_len, bool ethernet)
6155	{
6156	struct mpls_shim_hdr *lse;
6157	int err;
6158
6159	if (unlikely(!eth_p_mpls(mpls_proto)))
6160	return -EINVAL;
6161
6162	/ Networking stack does not allow simultaneous Tunnel and MPLS GSO. /
6163	if (skb->encapsulation)
6164	return -EINVAL;
6165
6166	err = skb_cow_head(skb, MPLS_HLEN);
6167	if (unlikely(err))
6168	return err;
6169
6170	if (!skb->inner_protocol) {
6171	skb_set_inner_network_header(skb, offset: skb_network_offset(skb));
6172	skb_set_inner_protocol(skb, protocol: skb->protocol);
6173	}
6174
6175	skb_push(skb, MPLS_HLEN);
6176	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
6177	mac_len);
6178	skb_reset_mac_header(skb);
6179	skb_set_network_header(skb, offset: mac_len);
6180	skb_reset_mac_len(skb);
6181
6182	lse = mpls_hdr(skb);
6183	lse->label_stack_entry = mpls_lse;
6184	skb_postpush_rcsum(skb, start: lse, MPLS_HLEN);
6185
6186	if (ethernet && mac_len >= ETH_HLEN)
6187	skb_mod_eth_type(skb, hdr: eth_hdr(skb), ethertype: mpls_proto);
6188	skb->protocol = mpls_proto;
6189
6190	return `0`;
6191	}
6192	EXPORT_SYMBOL_GPL(skb_mpls_push);
6193
6194	/**
6195	* skb_mpls_pop() - pop the outermost MPLS header
6196	*
6197	* @skb: buffer
6198	* @next_proto: ethertype of header after popped MPLS header
6199	* @mac_len: length of the MAC header
6200	* @ethernet: flag to indicate if the packet is ethernet
6201	*
6202	* Expects skb->data at mac header.
6203	*
6204	* Returns 0 on success, -errno otherwise.
6205	*/
6206	int skb_mpls_pop(struct sk_buff skb, __be16 next_proto, int* mac_len,
6207	bool ethernet)
6208	{
6209	int err;
6210
6211	if (unlikely(!eth_p_mpls(skb->protocol)))
6212	return `0`;
6213
6214	err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
6215	if (unlikely(err))
6216	return err;
6217
6218	skb_postpull_rcsum(skb, start: mpls_hdr(skb), MPLS_HLEN);
6219	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
6220	mac_len);
6221
6222	__skb_pull(skb, MPLS_HLEN);
6223	skb_reset_mac_header(skb);
6224	skb_set_network_header(skb, offset: mac_len);
6225
6226	if (ethernet && mac_len >= ETH_HLEN) {
6227	struct ethhdr *hdr;
6228
6229	/ use mpls_hdr() to get ethertype to account for VLANs. /
6230	hdr = (struct ethhdr )((void* *)mpls_hdr(skb) - ETH_HLEN);
6231	skb_mod_eth_type(skb, hdr, ethertype: next_proto);
6232	}
6233	skb->protocol = next_proto;
6234
6235	return `0`;
6236	}
6237	EXPORT_SYMBOL_GPL(skb_mpls_pop);
6238
6239	/**
6240	* skb_mpls_update_lse() - modify outermost MPLS header and update csum
6241	*
6242	* @skb: buffer
6243	* @mpls_lse: new MPLS label stack entry to update to
6244	*
6245	* Expects skb->data at mac header.
6246	*
6247	* Returns 0 on success, -errno otherwise.
6248	*/
6249	int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
6250	{
6251	int err;
6252
6253	if (unlikely(!eth_p_mpls(skb->protocol)))
6254	return -EINVAL;
6255
6256	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
6257	if (unlikely(err))
6258	return err;
6259
6260	if (skb->ip_summed == CHECKSUM_COMPLETE) {
6261	__be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
6262
6263	skb->csum = csum_partial(buff: (char )diff, len: sizeof*(diff), sum: skb->csum);
6264	}
6265
6266	mpls_hdr(skb)->label_stack_entry = mpls_lse;
6267
6268	return `0`;
6269	}
6270	EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
6271
6272	/**
6273	* skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
6274	*
6275	* @skb: buffer
6276	*
6277	* Expects skb->data at mac header.
6278	*
6279	* Returns 0 on success, -errno otherwise.
6280	*/
6281	int skb_mpls_dec_ttl(struct sk_buff *skb)
6282	{
6283	u32 lse;
6284	u8 ttl;
6285
6286	if (unlikely(!eth_p_mpls(skb->protocol)))
6287	return -EINVAL;
6288
6289	if (!pskb_may_pull(skb, len: skb_network_offset(skb) + MPLS_HLEN))
6290	return -ENOMEM;
6291
6292	lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
6293	ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
6294	if (!--ttl)
6295	return -EINVAL;
6296
6297	lse &= ~MPLS_LS_TTL_MASK;
6298	lse \|= ttl << MPLS_LS_TTL_SHIFT;
6299
6300	return skb_mpls_update_lse(skb, cpu_to_be32(lse));
6301	}
6302	EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
6303
6304	/**
6305	* alloc_skb_with_frags - allocate skb with page frags
6306	*
6307	* @header_len: size of linear part
6308	* @data_len: needed length in frags
6309	* @order: max page order desired.
6310	* @errcode: pointer to error code if any
6311	* @gfp_mask: allocation mask
6312	*
6313	* This can be used to allocate a paged skb, given a maximal order for frags.
6314	*/
6315	struct sk_buff alloc_skb_with_frags(unsigned* long header_len,
6316	unsigned long data_len,
6317	int order,
6318	int *errcode,
6319	gfp_t gfp_mask)
6320	{
6321	unsigned long chunk;
6322	struct sk_buff *skb;
6323	struct page *page;
6324	int nr_frags = `0`;
6325
6326	*errcode = -EMSGSIZE;
6327	if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))
6328	return NULL;
6329
6330	*errcode = -ENOBUFS;
6331	skb = alloc_skb(size: header_len, priority: gfp_mask);
6332	if (!skb)
6333	return NULL;
6334
6335	while (data_len) {
6336	if (nr_frags == MAX_SKB_FRAGS - `1`)
6337	goto failure;
6338	while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order))
6339	order--;
6340
6341	if (order) {
6342	page = alloc_pages(gfp: (gfp_mask & ~__GFP_DIRECT_RECLAIM) \|
6343	__GFP_COMP \|
6344	__GFP_NOWARN,
6345	order);
6346	if (!page) {
6347	order--;
6348	continue;
6349	}
6350	} else {
6351	page = alloc_page(gfp_mask);
6352	if (!page)
6353	goto failure;
6354	}
6355	chunk = min_t(unsigned long, data_len,
6356	PAGE_SIZE << order);
6357	skb_fill_page_desc(skb, i: nr_frags, page, off: `0`, size: chunk);
6358	nr_frags++;
6359	skb->truesize += (PAGE_SIZE << order);
6360	data_len -= chunk;
6361	}
6362	return skb;
6363
6364	failure:
6365	kfree_skb(skb);
6366	return NULL;
6367	}
6368	EXPORT_SYMBOL(alloc_skb_with_frags);
6369
6370	/ carve out the first off bytes from skb when off < headlen /
6371	static int pskb_carve_inside_header(struct sk_buff skb, const* u32 off,
6372	const int headlen, gfp_t gfp_mask)
6373	{
6374	int i;
6375	unsigned int size = skb_end_offset(skb);
6376	int new_hlen = headlen - off;
6377	u8 *data;
6378
6379	if (skb_pfmemalloc(skb))
6380	gfp_mask \|= __GFP_MEMALLOC;
6381
6382	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
6383	if (!data)
6384	return -ENOMEM;
6385	size = SKB_WITH_OVERHEAD(size);
6386
6387	/ Copy real data, and all frags /
6388	skb_copy_from_linear_data_offset(skb, offset: off, to: data, len: new_hlen);
6389	skb->len -= off;
6390
6391	memcpy((struct skb_shared_info *)(data + size),
6392	skb_shinfo(skb),
6393	offsetof(struct skb_shared_info,
6394	frags[skb_shinfo(skb)->nr_frags]));
6395	if (skb_cloned(skb)) {
6396	/ drop the old head gracefully /
6397	if (skb_orphan_frags(skb, gfp_mask)) {
6398	skb_kfree_head(head: data, end_offset: size);
6399	return -ENOMEM;
6400	}
6401	for (i = `0`; i < skb_shinfo(skb)->nr_frags; i++)
6402	skb_frag_ref(skb, f: i);
6403	if (skb_has_frag_list(skb))
6404	skb_clone_fraglist(skb);
6405	skb_release_data(skb, reason: SKB_CONSUMED, napi_safe: false);
6406	} else {
6407	/ we can reuse existing recount- all we did was*
6408	* relocate values
6409	*/
6410	skb_free_head(skb, napi_safe: false);
6411	}
6412
6413	skb->head = data;
6414	skb->data = data;
6415	skb->head_frag = `0`;
6416	skb_set_end_offset(skb, offset: size);
6417	skb_set_tail_pointer(skb, offset: skb_headlen(skb));
6418	skb_headers_offset_update(skb, `0`);
6419	skb->cloned = `0`;
6420	skb->hdr_len = `0`;
6421	skb->nohdr = `0`;
6422	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
6423
6424	return `0`;
6425	}
6426
6427	static int pskb_carve(struct sk_buff skb, const* u32 off, gfp_t gfp);
6428
6429	/ carve out the first eat bytes from skb's frag_list. May recurse into*
6430	* pskb_carve()
6431	*/
6432	static int pskb_carve_frag_list(struct sk_buff *skb,
6433	struct skb_shared_info shinfo, int* eat,
6434	gfp_t gfp_mask)
6435	{
6436	struct sk_buff *list = shinfo->frag_list;
6437	struct sk_buff *clone = NULL;
6438	struct sk_buff *insp = NULL;
6439
6440	do {
6441	if (!list) {
6442	pr_err("Not enough bytes to eat. Want %d\n", eat);
6443	return -EFAULT;
6444	}
6445	if (list->len <= eat) {
6446	/ Eaten as whole. /
6447	eat -= list->len;
6448	list = list->next;
6449	insp = list;
6450	} else {
6451	/ Eaten partially. /
6452	if (skb_shared(skb: list)) {
6453	clone = skb_clone(list, gfp_mask);
6454	if (!clone)
6455	return -ENOMEM;
6456	insp = list->next;
6457	list = clone;
6458	} else {
6459	/ This may be pulled without problems. /
6460	insp = list;
6461	}
6462	if (pskb_carve(skb: list, off: eat, gfp: gfp_mask) < `0`) {
6463	kfree_skb(skb: clone);
6464	return -ENOMEM;
6465	}
6466	break;
6467	}
6468	} while (eat);
6469
6470	/ Free pulled out fragments. /
6471	while ((list = shinfo->frag_list) != insp) {
6472	shinfo->frag_list = list->next;
6473	consume_skb(list);
6474	}
6475	/ And insert new clone at head. /
6476	if (clone) {
6477	clone->next = list;
6478	shinfo->frag_list = clone;
6479	}
6480	return `0`;
6481	}
6482
6483	/ carve off first len bytes from skb. Split line (off) is in the*
6484	* non-linear part of skb
6485	*/
6486	static int pskb_carve_inside_nonlinear(struct sk_buff skb, const* u32 off,
6487	int pos, gfp_t gfp_mask)
6488	{
6489	int i, k = `0`;
6490	unsigned int size = skb_end_offset(skb);
6491	u8 *data;
6492	const int nfrags = skb_shinfo(skb)->nr_frags;
6493	struct skb_shared_info *shinfo;
6494
6495	if (skb_pfmemalloc(skb))
6496	gfp_mask \|= __GFP_MEMALLOC;
6497
6498	data = kmalloc_reserve(size: &size, flags: gfp_mask, NUMA_NO_NODE, NULL);
6499	if (!data)
6500	return -ENOMEM;
6501	size = SKB_WITH_OVERHEAD(size);
6502
6503	memcpy((struct skb_shared_info *)(data + size),
6504	skb_shinfo(skb), offsetof(struct skb_shared_info, frags[`0`]));
6505	if (skb_orphan_frags(skb, gfp_mask)) {
6506	skb_kfree_head(head: data, end_offset: size);
6507	return -ENOMEM;
6508	}
6509	shinfo = (struct skb_shared_info *)(data + size);
6510	for (i = `0`; i < nfrags; i++) {
6511	int fsize = skb_frag_size(frag: &skb_shinfo(skb)->frags[i]);
6512
6513	if (pos + fsize > off) {
6514	shinfo->frags[k] = skb_shinfo(skb)->frags[i];
6515
6516	if (pos < off) {
6517	/ Split frag.*
6518	* We have two variants in this case:
6519	* 1. Move all the frag to the second
6520	* part, if it is possible. F.e.
6521	* this approach is mandatory for TUX,
6522	* where splitting is expensive.
6523	* 2. Split is accurately. We make this.
6524	*/
6525	skb_frag_off_add(frag: &shinfo->frags[`0`], delta: off - pos);
6526	skb_frag_size_sub(frag: &shinfo->frags[`0`], delta: off - pos);
6527	}
6528	skb_frag_ref(skb, f: i);
6529	k++;
6530	}
6531	pos += fsize;
6532	}
6533	shinfo->nr_frags = k;
6534	if (skb_has_frag_list(skb))
6535	skb_clone_fraglist(skb);
6536
6537	/ split line is in frag list /
6538	if (k == `0` && pskb_carve_frag_list(skb, shinfo, eat: off - pos, gfp_mask)) {
6539	/ skb_frag_unref() is not needed here as shinfo->nr_frags = 0. /
6540	if (skb_has_frag_list(skb))
6541	kfree_skb_list(skb_shinfo(skb)->frag_list);
6542	skb_kfree_head(head: data, end_offset: size);
6543	return -ENOMEM;
6544	}
6545	skb_release_data(skb, reason: SKB_CONSUMED, napi_safe: false);
6546
6547	skb->head = data;
6548	skb->head_frag = `0`;
6549	skb->data = data;
6550	skb_set_end_offset(skb, offset: size);
6551	skb_reset_tail_pointer(skb);
6552	skb_headers_offset_update(skb, `0`);
6553	skb->cloned = `0`;
6554	skb->hdr_len = `0`;
6555	skb->nohdr = `0`;
6556	skb->len -= off;
6557	skb->data_len = skb->len;
6558	atomic_set(v: &skb_shinfo(skb)->dataref, i: `1`);
6559	return `0`;
6560	}
6561
6562	/ remove len bytes from the beginning of the skb /
6563	static int pskb_carve(struct sk_buff skb, const* u32 len, gfp_t gfp)
6564	{
6565	int headlen = skb_headlen(skb);
6566
6567	if (len < headlen)
6568	return pskb_carve_inside_header(skb, off: len, headlen, gfp_mask: gfp);
6569	else
6570	return pskb_carve_inside_nonlinear(skb, off: len, pos: headlen, gfp_mask: gfp);
6571	}
6572
6573	/ Extract to_copy bytes starting at off from skb, and return this in*
6574	* a new skb
6575	*/
6576	struct sk_buff pskb_extract(struct* sk_buff skb, int* off,
6577	int to_copy, gfp_t gfp)
6578	{
6579	struct sk_buff *clone = skb_clone(skb, gfp);
6580
6581	if (!clone)
6582	return NULL;
6583
6584	if (pskb_carve(skb: clone, len: off, gfp) < `0` \|\|
6585	pskb_trim(skb: clone, len: to_copy)) {
6586	kfree_skb(skb: clone);
6587	return NULL;
6588	}
6589	return clone;
6590	}
6591	EXPORT_SYMBOL(pskb_extract);
6592
6593	/**
6594	* skb_condense - try to get rid of fragments/frag_list if possible
6595	* @skb: buffer
6596	*
6597	* Can be used to save memory before skb is added to a busy queue.
6598	* If packet has bytes in frags and enough tail room in skb->head,
6599	* pull all of them, so that we can free the frags right now and adjust
6600	* truesize.
6601	* Notes:
6602	* We do not reallocate skb->head thus can not fail.
6603	* Caller must re-evaluate skb->truesize if needed.
6604	*/
6605	void skb_condense(struct sk_buff *skb)
6606	{
6607	if (skb->data_len) {
6608	if (skb->data_len > skb->end - skb->tail \|\|
6609	skb_cloned(skb))
6610	return;
6611
6612	/ Nice, we can free page frag(s) right now /
6613	__pskb_pull_tail(skb, skb->data_len);
6614	}
6615	/ At this point, skb->truesize might be over estimated,*
6616	* because skb had a fragment, and fragments do not tell
6617	* their truesize.
6618	* When we pulled its content into skb->head, fragment
6619	* was freed, but __pskb_pull_tail() could not possibly
6620	* adjust skb->truesize, not knowing the frag truesize.
6621	*/
6622	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
6623	}
6624	EXPORT_SYMBOL(skb_condense);
6625
6626	#ifdef CONFIG_SKB_EXTENSIONS
6627	static void skb_ext_get_ptr(struct* skb_ext ext, enum* skb_ext_id id)
6628	{
6629	return (void )ext + (ext->offset[id] SKB_EXT_ALIGN_VALUE);
6630	}
6631
6632	/**
6633	* __skb_ext_alloc - allocate a new skb extensions storage
6634	*
6635	* @flags: See kmalloc().
6636	*
6637	* Returns the newly allocated pointer. The pointer can later attached to a
6638	* skb via __skb_ext_set().
6639	* Note: caller must handle the skb_ext as an opaque data.
6640	*/
6641	struct skb_ext *__skb_ext_alloc(gfp_t flags)
6642	{
6643	struct skb_ext *new = kmem_cache_alloc(cachep: skbuff_ext_cache, flags);
6644
6645	if (new) {
6646	memset(new->offset, `0`, sizeof(new->offset));
6647	refcount_set(r: &new->refcnt, n: `1`);
6648	}
6649
6650	return new;
6651	}
6652
6653	static struct skb_ext skb_ext_maybe_cow(struct* skb_ext *old,
6654	unsigned int old_active)
6655	{
6656	struct skb_ext *new;
6657
6658	if (refcount_read(r: &old->refcnt) == `1`)
6659	return old;
6660
6661	new = kmem_cache_alloc(cachep: skbuff_ext_cache, GFP_ATOMIC);
6662	if (!new)
6663	return NULL;
6664
6665	memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
6666	refcount_set(r: &new->refcnt, n: `1`);
6667
6668	#ifdef CONFIG_XFRM
6669	if (old_active & (`1` << SKB_EXT_SEC_PATH)) {
6670	struct sec_path *sp = skb_ext_get_ptr(ext: old, id: SKB_EXT_SEC_PATH);
6671	unsigned int i;
6672
6673	for (i = `0`; i < sp->len; i++)
6674	xfrm_state_hold(x: sp->xvec[i]);
6675	}
6676	#endif
6677	__skb_ext_put(ext: old);
6678	return new;
6679	}
6680
6681	/**
6682	* __skb_ext_set - attach the specified extension storage to this skb
6683	* @skb: buffer
6684	* @id: extension id
6685	* @ext: extension storage previously allocated via __skb_ext_alloc()
6686	*
6687	* Existing extensions, if any, are cleared.
6688	*
6689	* Returns the pointer to the extension.
6690	*/
6691	void __skb_ext_set(struct* sk_buff skb, enum* skb_ext_id id,
6692	struct skb_ext *ext)
6693	{
6694	unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
6695
6696	skb_ext_put(skb);
6697	newlen = newoff + skb_ext_type_len[id];
6698	ext->chunks = newlen;
6699	ext->offset[id] = newoff;
6700	skb->extensions = ext;
6701	skb->active_extensions = `1` << id;
6702	return skb_ext_get_ptr(ext, id);
6703	}
6704
6705	/**
6706	* skb_ext_add - allocate space for given extension, COW if needed
6707	* @skb: buffer
6708	* @id: extension to allocate space for
6709	*
6710	* Allocates enough space for the given extension.
6711	* If the extension is already present, a pointer to that extension
6712	* is returned.
6713	*
6714	* If the skb was cloned, COW applies and the returned memory can be
6715	* modified without changing the extension space of clones buffers.
6716	*
6717	* Returns pointer to the extension or NULL on allocation failure.
6718	*/
6719	void skb_ext_add(struct* sk_buff skb, enum* skb_ext_id id)
6720	{
6721	struct skb_ext new, old = NULL;
6722	unsigned int newlen, newoff;
6723
6724	if (skb->active_extensions) {
6725	old = skb->extensions;
6726
6727	new = skb_ext_maybe_cow(old, old_active: skb->active_extensions);
6728	if (!new)
6729	return NULL;
6730
6731	if (__skb_ext_exist(ext: new, i: id))
6732	goto set_active;
6733
6734	newoff = new->chunks;
6735	} else {
6736	newoff = SKB_EXT_CHUNKSIZEOF(*new);
6737
6738	new = __skb_ext_alloc(GFP_ATOMIC);
6739	if (!new)
6740	return NULL;
6741	}
6742
6743	newlen = newoff + skb_ext_type_len[id];
6744	new->chunks = newlen;
6745	new->offset[id] = newoff;
6746	set_active:
6747	skb->slow_gro = `1`;
6748	skb->extensions = new;
6749	skb->active_extensions \|= `1` << id;
6750	return skb_ext_get_ptr(ext: new, id);
6751	}
6752	EXPORT_SYMBOL(skb_ext_add);
6753
6754	#ifdef CONFIG_XFRM
6755	static void skb_ext_put_sp(struct sec_path *sp)
6756	{
6757	unsigned int i;
6758
6759	for (i = `0`; i < sp->len; i++)
6760	xfrm_state_put(x: sp->xvec[i]);
6761	}
6762	#endif
6763
6764	#ifdef CONFIG_MCTP_FLOWS
6765	static void skb_ext_put_mctp(struct mctp_flow *flow)
6766	{
6767	if (flow->key)
6768	mctp_key_unref(key: flow->key);
6769	}
6770	#endif
6771
6772	void __skb_ext_del(struct sk_buff skb, enum* skb_ext_id id)
6773	{
6774	struct skb_ext *ext = skb->extensions;
6775
6776	skb->active_extensions &= ~(`1` << id);
6777	if (skb->active_extensions == `0`) {
6778	skb->extensions = NULL;
6779	__skb_ext_put(ext);
6780	#ifdef CONFIG_XFRM
6781	} else if (id == SKB_EXT_SEC_PATH &&
6782	refcount_read(r: &ext->refcnt) == `1`) {
6783	struct sec_path *sp = skb_ext_get_ptr(ext, id: SKB_EXT_SEC_PATH);
6784
6785	skb_ext_put_sp(sp);
6786	sp->len = `0`;
6787	#endif
6788	}
6789	}
6790	EXPORT_SYMBOL(__skb_ext_del);
6791
6792	void __skb_ext_put(struct skb_ext *ext)
6793	{
6794	/ If this is last clone, nothing can increment*
6795	* it after check passes. Avoids one atomic op.
6796	*/
6797	if (refcount_read(r: &ext->refcnt) == `1`)
6798	goto free_now;
6799
6800	if (!refcount_dec_and_test(r: &ext->refcnt))
6801	return;
6802	free_now:
6803	#ifdef CONFIG_XFRM
6804	if (__skb_ext_exist(ext, i: SKB_EXT_SEC_PATH))
6805	skb_ext_put_sp(sp: skb_ext_get_ptr(ext, id: SKB_EXT_SEC_PATH));
6806	#endif
6807	#ifdef CONFIG_MCTP_FLOWS
6808	if (__skb_ext_exist(ext, i: SKB_EXT_MCTP))
6809	skb_ext_put_mctp(flow: skb_ext_get_ptr(ext, id: SKB_EXT_MCTP));
6810	#endif
6811
6812	kmem_cache_free(s: skbuff_ext_cache, objp: ext);
6813	}
6814	EXPORT_SYMBOL(__skb_ext_put);
6815	#endif /* CONFIG_SKB_EXTENSIONS */
6816
6817	/**
6818	* skb_attempt_defer_free - queue skb for remote freeing
6819	* @skb: buffer
6820	*
6821	* Put @skb in a per-cpu list, using the cpu which
6822	* allocated the skb/pages to reduce false sharing
6823	* and memory zone spinlock contention.
6824	*/
6825	void skb_attempt_defer_free(struct sk_buff *skb)
6826	{
6827	int cpu = skb->alloc_cpu;
6828	struct softnet_data *sd;
6829	unsigned int defer_max;
6830	bool kick;
6831
6832	if (WARN_ON_ONCE(cpu >= nr_cpu_ids) \|\|
6833	!cpu_online(cpu) \|\|
6834	cpu == raw_smp_processor_id()) {
6835	nodefer: __kfree_skb(skb);
6836	return;
6837	}
6838
6839	DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
6840	DEBUG_NET_WARN_ON_ONCE(skb->destructor);
6841
6842	sd = &per_cpu(softnet_data, cpu);
6843	defer_max = READ_ONCE(sysctl_skb_defer_max);
6844	if (READ_ONCE(sd->defer_count) >= defer_max)
6845	goto nodefer;
6846
6847	spin_lock_bh(lock: &sd->defer_lock);
6848	/ Send an IPI every time queue reaches half capacity. /
6849	kick = sd->defer_count == (defer_max >> `1`);
6850	/ Paired with the READ_ONCE() few lines above /
6851	WRITE_ONCE(sd->defer_count, sd->defer_count + `1`);
6852
6853	skb->next = sd->defer_list;
6854	/ Paired with READ_ONCE() in skb_defer_free_flush() /
6855	WRITE_ONCE(sd->defer_list, skb);
6856	spin_unlock_bh(lock: &sd->defer_lock);
6857
6858	/ Make sure to trigger NET_RX_SOFTIRQ on the remote CPU*
6859	* if we are unlucky enough (this seems very unlikely).
6860	*/
6861	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, `0`, `1`))
6862	smp_call_function_single_async(cpu, csd: &sd->defer_csd);
6863	}
6864
6865	static void skb_splice_csum_page(struct sk_buff skb, struct* page *page,
6866	size_t offset, size_t len)
6867	{
6868	const char *kaddr;
6869	__wsum csum;
6870
6871	kaddr = kmap_local_page(page);
6872	csum = csum_partial(buff: kaddr + offset, len, sum: `0`);
6873	kunmap_local(kaddr);
6874	skb->csum = csum_block_add(csum: skb->csum, csum2: csum, offset: skb->len);
6875	}
6876
6877	/**
6878	* skb_splice_from_iter - Splice (or copy) pages to skbuff
6879	* @skb: The buffer to add pages to
6880	* @iter: Iterator representing the pages to be added
6881	* @maxsize: Maximum amount of pages to be added
6882	* @gfp: Allocation flags
6883	*
6884	* This is a common helper function for supporting MSG_SPLICE_PAGES. It
6885	* extracts pages from an iterator and adds them to the socket buffer if
6886	* possible, copying them to fragments if not possible (such as if they're slab
6887	* pages).
6888	*
6889	* Returns the amount of data spliced/copied or -EMSGSIZE if there's
6890	* insufficient space in the buffer to transfer anything.
6891	*/
6892	ssize_t skb_splice_from_iter(struct sk_buff skb, struct* iov_iter *iter,
6893	ssize_t maxsize, gfp_t gfp)
6894	{
6895	size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
6896	struct page pages[`8`], *ppages = pages;
6897	ssize_t spliced = `0`, ret = `0`;
6898	unsigned int i;
6899
6900	while (iter->count > `0`) {
6901	ssize_t space, nr, len;
6902	size_t off;
6903
6904	ret = -EMSGSIZE;
6905	space = frag_limit - skb_shinfo(skb)->nr_frags;
6906	if (space < `0`)
6907	break;
6908
6909	/ We might be able to coalesce without increasing nr_frags /
6910	nr = clamp_t(size_t, space, `1`, ARRAY_SIZE(pages));
6911
6912	len = iov_iter_extract_pages(i: iter, pages: &ppages, maxsize, maxpages: nr, extraction_flags: `0`, offset0: &off);
6913	if (len <= `0`) {
6914	ret = len ?: -EIO;
6915	break;
6916	}
6917
6918	i = `0`;
6919	do {
6920	struct page *page = pages[i++];
6921	size_t part = min_t(size_t, PAGE_SIZE - off, len);
6922
6923	ret = -EIO;
6924	if (WARN_ON_ONCE(!sendpage_ok(page)))
6925	goto out;
6926
6927	ret = skb_append_pagefrags(skb, page, off, part,
6928	frag_limit);
6929	if (ret < `0`) {
6930	iov_iter_revert(i: iter, bytes: len);
6931	goto out;
6932	}
6933
6934	if (skb->ip_summed == CHECKSUM_NONE)
6935	skb_splice_csum_page(skb, page, offset: off, len: part);
6936
6937	off = `0`;
6938	spliced += part;
6939	maxsize -= part;
6940	len -= part;
6941	} while (len > `0`);
6942
6943	if (maxsize <= `0`)
6944	break;
6945	}
6946
6947	out:
6948	skb_len_add(skb, delta: spliced);
6949	return spliced ?: ret;
6950	}
6951	EXPORT_SYMBOL(skb_splice_from_iter);
6952
6953	static __always_inline
6954	size_t memcpy_from_iter_csum(void *iter_from, size_t progress,
6955	size_t len, void to, void* *priv2)
6956	{
6957	__wsum *csum = priv2;
6958	__wsum next = csum_partial_copy_nocheck(src: iter_from, dst: to + progress, len);
6959
6960	csum = csum_block_add(csum: csum, csum2: next, offset: progress);
6961	return `0`;
6962	}
6963
6964	static __always_inline
6965	size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress,
6966	size_t len, void to, void* *priv2)
6967	{
6968	__wsum next, *csum = priv2;
6969
6970	next = csum_and_copy_from_user(src: iter_from, dst: to + progress, len);
6971	csum = csum_block_add(csum: csum, csum2: next, offset: progress);
6972	return next ? `0` : len;
6973	}
6974
6975	bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
6976	__wsum csum, struct* iov_iter *i)
6977	{
6978	size_t copied;
6979
6980	if (WARN_ON_ONCE(!i->data_source))
6981	return false;
6982	copied = iterate_and_advance2(iter: i, len: bytes, priv: addr, priv2: csum,
6983	ustep: copy_from_user_iter_csum,
6984	step: memcpy_from_iter_csum);
6985	if (likely(copied == bytes))
6986	return true;
6987	iov_iter_revert(i, bytes: copied);
6988	return false;
6989	}
6990	EXPORT_SYMBOL(csum_and_copy_from_iter_full);
6991

source code of linux/net/core/skbuff.c