netback.c source code [linux/drivers/net/xen-netback/netback.c]

1	/*
2	* Back-end of the driver for virtual network devices. This portion of the
3	* driver exports a 'unified' network-device interface that can be accessed
4	* by any operating system that implements a compatible front end. A
5	* reference front-end implementation can be found in:
6	* drivers/net/xen-netfront.c
7	*
8	* Copyright (c) 2002-2005, K A Fraser
9	*
10	* This program is free software; you can redistribute it and/or
11	* modify it under the terms of the GNU General Public License version 2
12	* as published by the Free Software Foundation; or, when distributed
13	* separately from the Linux kernel or incorporated into other
14	* software packages, subject to the following license:
15	*
16	* Permission is hereby granted, free of charge, to any person obtaining a copy
17	* of this source file (the "Software"), to deal in the Software without
18	* restriction, including without limitation the rights to use, copy, modify,
19	* merge, publish, distribute, sublicense, and/or sell copies of the Software,
20	* and to permit persons to whom the Software is furnished to do so, subject to
21	* the following conditions:
22	*
23	* The above copyright notice and this permission notice shall be included in
24	* all copies or substantial portions of the Software.
25	*
26	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32	* IN THE SOFTWARE.
33	*/
34
35	#include "common.h"
36
37	#include <linux/kthread.h>
38	#include <linux/if_vlan.h>
39	#include <linux/udp.h>
40	#include <linux/highmem.h>
41
42	#include <net/tcp.h>
43
44	#include <xen/xen.h>
45	#include <xen/events.h>
46	#include <xen/interface/memory.h>
47	#include <xen/page.h>
48
49	#include <asm/xen/hypercall.h>
50
51	/ Provide an option to disable split event channels at load time as*
52	* event channels are limited resource. Split event channels are
53	* enabled by default.
54	*/
55	bool separate_tx_rx_irq = true;
56	module_param(separate_tx_rx_irq, bool, `0644`);
57
58	/ The time that packets can stay on the guest Rx internal queue*
59	* before they are dropped.
60	*/
61	unsigned int rx_drain_timeout_msecs = `10000`;
62	module_param(rx_drain_timeout_msecs, uint, `0444`);
63
64	/ The length of time before the frontend is considered unresponsive*
65	* because it isn't providing Rx slots.
66	*/
67	unsigned int rx_stall_timeout_msecs = `60000`;
68	module_param(rx_stall_timeout_msecs, uint, `0444`);
69
70	#define MAX_QUEUES_DEFAULT 8
71	unsigned int xenvif_max_queues;
72	module_param_named(max_queues, xenvif_max_queues, uint, `0644`);
73	MODULE_PARM_DESC(max_queues,
74	"Maximum number of queues per virtual interface");
75
76	/*
77	* This is the maximum slots a skb can have. If a guest sends a skb
78	* which exceeds this limit it is considered malicious.
79	*/
80	#define FATAL_SKB_SLOTS_DEFAULT 20
81	static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
82	module_param(fatal_skb_slots, uint, `0444`);
83
84	/ The amount to copy out of the first guest Tx slot into the skb's*
85	* linear area. If the first slot has more data, it will be mapped
86	* and put into the first frag.
87	*
88	* This is sized to avoid pulling headers from the frags for most
89	* TCP/IP packets.
90	*/
91	#define XEN_NETBACK_TX_COPY_LEN 128
92
93	/ This is the maximum number of flows in the hash cache. /
94	#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
95	unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
96	module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, `0644`);
97	MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
98
99	/ The module parameter tells that we have to put data*
100	* for xen-netfront with the XDP_PACKET_HEADROOM offset
101	* needed for XDP processing
102	*/
103	bool provides_xdp_headroom = true;
104	module_param(provides_xdp_headroom, bool, `0644`);
105
106	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
107	u8 status);
108
109	static void make_tx_response(struct xenvif_queue *queue,
110	struct xen_netif_tx_request *txp,
111	unsigned int extra_count,
112	s8 st);
113	static void push_tx_responses(struct xenvif_queue *queue);
114
115	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx);
116
117	static inline int tx_work_todo(struct xenvif_queue *queue);
118
119	static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
120	u16 idx)
121	{
122	return page_to_pfn(queue->mmap_pages[idx]);
123	}
124
125	static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
126	u16 idx)
127	{
128	return (unsigned long)pfn_to_kaddr(idx_to_pfn(queue, idx));
129	}
130
131	#define callback_param(vif, pending_idx) \
132	(vif->pending_tx_info[pending_idx].callback_struct)
133
134	/ Find the containing VIF's structure from a pointer in pending_tx_info array*
135	*/
136	static inline struct xenvif_queue ubuf_to_queue(const* struct ubuf_info_msgzc *ubuf)
137	{
138	u16 pending_idx = ubuf->desc;
139	struct pending_tx_info *temp =
140	container_of(ubuf, struct pending_tx_info, callback_struct);
141	return container_of(temp - pending_idx,
142	struct xenvif_queue,
143	pending_tx_info[`0`]);
144	}
145
146	static u16 frag_get_pending_idx(skb_frag_t *frag)
147	{
148	return (u16)skb_frag_off(frag);
149	}
150
151	static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
152	{
153	skb_frag_off_set(frag, offset: pending_idx);
154	}
155
156	static inline pending_ring_idx_t pending_index(unsigned i)
157	{
158	return i & (MAX_PENDING_REQS-`1`);
159	}
160
161	void xenvif_kick_thread(struct xenvif_queue *queue)
162	{
163	wake_up(&queue->wq);
164	}
165
166	void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
167	{
168	int more_to_do;
169
170	RING_FINAL_CHECK_FOR_REQUESTS(&queue->tx, more_to_do);
171
172	if (more_to_do)
173	napi_schedule(n: &queue->napi);
174	else if (atomic_fetch_andnot(NETBK_TX_EOI \| NETBK_COMMON_EOI,
175	v: &queue->eoi_pending) &
176	(NETBK_TX_EOI \| NETBK_COMMON_EOI))
177	xen_irq_lateeoi(irq: queue->tx_irq, eoi_flags: `0`);
178	}
179
180	static void tx_add_credit(struct xenvif_queue *queue)
181	{
182	unsigned long max_burst, max_credit;
183
184	/*
185	* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
186	* Otherwise the interface can seize up due to insufficient credit.
187	*/
188	max_burst = max(`131072UL`, queue->credit_bytes);
189
190	/ Take care that adding a new chunk of credit doesn't wrap to zero. /
191	max_credit = queue->remaining_credit + queue->credit_bytes;
192	if (max_credit < queue->remaining_credit)
193	max_credit = ULONG_MAX; / wrapped: clamp to ULONG_MAX /
194
195	queue->remaining_credit = min(max_credit, max_burst);
196	queue->rate_limited = false;
197	}
198
199	void xenvif_tx_credit_callback(struct timer_list *t)
200	{
201	struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
202	tx_add_credit(queue);
203	xenvif_napi_schedule_or_enable_events(queue);
204	}
205
206	static void xenvif_tx_err(struct xenvif_queue *queue,
207	struct xen_netif_tx_request *txp,
208	unsigned int extra_count, RING_IDX end)
209	{
210	RING_IDX cons = queue->tx.req_cons;
211	unsigned long flags;
212
213	do {
214	spin_lock_irqsave(&queue->response_lock, flags);
215	make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR);
216	push_tx_responses(queue);
217	spin_unlock_irqrestore(lock: &queue->response_lock, flags);
218	if (cons == end)
219	break;
220	RING_COPY_REQUEST(&queue->tx, cons++, txp);
221	extra_count = `0`; / only the first frag can have extras /
222	} while (`1`);
223	queue->tx.req_cons = cons;
224	}
225
226	static void xenvif_fatal_tx_err(struct xenvif *vif)
227	{
228	netdev_err(dev: vif->dev, format: "fatal error; disabling device\n");
229	vif->disabled = true;
230	/ Disable the vif from queue 0's kthread /
231	if (vif->num_queues)
232	xenvif_kick_thread(queue: &vif->queues[`0`]);
233	}
234
235	static int xenvif_count_requests(struct xenvif_queue *queue,
236	struct xen_netif_tx_request *first,
237	unsigned int extra_count,
238	struct xen_netif_tx_request *txp,
239	int work_to_do)
240	{
241	RING_IDX cons = queue->tx.req_cons;
242	int slots = `0`;
243	int drop_err = `0`;
244	int more_data;
245
246	if (!(first->flags & XEN_NETTXF_more_data))
247	return `0`;
248
249	do {
250	struct xen_netif_tx_request dropped_tx = { `0` };
251
252	if (slots >= work_to_do) {
253	netdev_err(dev: queue->vif->dev,
254	format: "Asked for %d slots but exceeds this limit\n",
255	work_to_do);
256	xenvif_fatal_tx_err(vif: queue->vif);
257	return -ENODATA;
258	}
259
260	/ This guest is really using too many slots and*
261	* considered malicious.
262	*/
263	if (unlikely(slots >= fatal_skb_slots)) {
264	netdev_err(dev: queue->vif->dev,
265	format: "Malicious frontend using %d slots, threshold %u\n",
266	slots, fatal_skb_slots);
267	xenvif_fatal_tx_err(vif: queue->vif);
268	return -E2BIG;
269	}
270
271	/ Xen network protocol had implicit dependency on*
272	* MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
273	* the historical MAX_SKB_FRAGS value 18 to honor the
274	* same behavior as before. Any packet using more than
275	* 18 slots but less than fatal_skb_slots slots is
276	* dropped
277	*/
278	if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
279	if (net_ratelimit())
280	netdev_dbg(queue->vif->dev,
281	"Too many slots (%d) exceeding limit (%d), dropping packet\n",
282	slots, XEN_NETBK_LEGACY_SLOTS_MAX);
283	drop_err = -E2BIG;
284	}
285
286	if (drop_err)
287	txp = &dropped_tx;
288
289	RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
290
291	/ If the guest submitted a frame >= 64 KiB then*
292	* first->size overflowed and following slots will
293	* appear to be larger than the frame.
294	*
295	* This cannot be fatal error as there are buggy
296	* frontends that do this.
297	*
298	* Consume all slots and drop the packet.
299	*/
300	if (!drop_err && txp->size > first->size) {
301	if (net_ratelimit())
302	netdev_dbg(queue->vif->dev,
303	"Invalid tx request, slot size %u > remaining size %u\n",
304	txp->size, first->size);
305	drop_err = -EIO;
306	}
307
308	first->size -= txp->size;
309	slots++;
310
311	if (unlikely((txp->offset + txp->size) > XEN_PAGE_SIZE)) {
312	netdev_err(dev: queue->vif->dev, format: "Cross page boundary, txp->offset: %u, size: %u\n",
313	txp->offset, txp->size);
314	xenvif_fatal_tx_err(vif: queue->vif);
315	return -EINVAL;
316	}
317
318	more_data = txp->flags & XEN_NETTXF_more_data;
319
320	if (!drop_err)
321	txp++;
322
323	} while (more_data);
324
325	if (drop_err) {
326	xenvif_tx_err(queue, txp: first, extra_count, end: cons + slots);
327	return drop_err;
328	}
329
330	return slots;
331	}
332
333
334	struct xenvif_tx_cb {
335	u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + `1`];
336	u8 copy_count;
337	u32 split_mask;
338	};
339
340	#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
341	#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
342	#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
343
344	static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
345	u16 pending_idx,
346	struct xen_netif_tx_request *txp,
347	unsigned int extra_count,
348	struct gnttab_map_grant_ref *mop)
349	{
350	queue->pages_to_map[mop-queue->tx_map_ops] = queue->mmap_pages[pending_idx];
351	gnttab_set_map_op(map: mop, addr: idx_to_kaddr(queue, idx: pending_idx),
352	GNTMAP_host_map \| GNTMAP_readonly,
353	ref: txp->gref, domid: queue->vif->domid);
354
355	memcpy(&queue->pending_tx_info[pending_idx].req, txp,
356	sizeof(*txp));
357	queue->pending_tx_info[pending_idx].extra_count = extra_count;
358	}
359
360	static inline struct sk_buff xenvif_alloc_skb(unsigned* int size)
361	{
362	struct sk_buff *skb =
363	alloc_skb(size: size + NET_SKB_PAD + NET_IP_ALIGN,
364	GFP_ATOMIC \| __GFP_NOWARN);
365
366	BUILD_BUG_ON(sizeof(XENVIF_TX_CB(skb)) > sizeof*(skb->cb));
367	if (unlikely(skb == NULL))
368	return NULL;
369
370	/ Packets passed to netif_rx() must have some headroom. /
371	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
372
373	/ Initialize it here to avoid later surprises /
374	skb_shinfo(skb)->destructor_arg = NULL;
375
376	return skb;
377	}
378
379	static void xenvif_get_requests(struct xenvif_queue *queue,
380	struct sk_buff *skb,
381	struct xen_netif_tx_request *first,
382	struct xen_netif_tx_request *txfrags,
383	unsigned *copy_ops,
384	unsigned *map_ops,
385	unsigned int frag_overflow,
386	struct sk_buff *nskb,
387	unsigned int extra_count,
388	unsigned int data_len)
389	{
390	struct skb_shared_info *shinfo = skb_shinfo(skb);
391	skb_frag_t *frags = shinfo->frags;
392	u16 pending_idx;
393	pending_ring_idx_t index;
394	unsigned int nr_slots;
395	struct gnttab_copy cop = queue->tx_copy_ops + copy_ops;
396	struct gnttab_map_grant_ref gop = queue->tx_map_ops + map_ops;
397	struct xen_netif_tx_request *txp = first;
398
399	nr_slots = shinfo->nr_frags + frag_overflow + `1`;
400
401	copy_count(skb) = `0`;
402	XENVIF_TX_CB(skb)->split_mask = `0`;
403
404	/ Create copy ops for exactly data_len bytes into the skb head. /
405	__skb_put(skb, len: data_len);
406	while (data_len > `0`) {
407	int amount = data_len > txp->size ? txp->size : data_len;
408	bool split = false;
409
410	cop->source.u.ref = txp->gref;
411	cop->source.domid = queue->vif->domid;
412	cop->source.offset = txp->offset;
413
414	cop->dest.domid = DOMID_SELF;
415	cop->dest.offset = (offset_in_page(skb->data +
416	skb_headlen(skb) -
417	data_len)) & ~XEN_PAGE_MASK;
418	cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
419	- data_len);
420
421	/ Don't cross local page boundary! /
422	if (cop->dest.offset + amount > XEN_PAGE_SIZE) {
423	amount = XEN_PAGE_SIZE - cop->dest.offset;
424	XENVIF_TX_CB(skb)->split_mask \|= `1U` << copy_count(skb);
425	split = true;
426	}
427
428	cop->len = amount;
429	cop->flags = GNTCOPY_source_gref;
430
431	index = pending_index(i: queue->pending_cons);
432	pending_idx = queue->pending_ring[index];
433	callback_param(queue, pending_idx).ctx = NULL;
434	copy_pending_idx(skb, copy_count(skb)) = pending_idx;
435	if (!split)
436	copy_count(skb)++;
437
438	cop++;
439	data_len -= amount;
440
441	if (amount == txp->size) {
442	/ The copy op covered the full tx_request /
443
444	memcpy(&queue->pending_tx_info[pending_idx].req,
445	txp, sizeof(*txp));
446	queue->pending_tx_info[pending_idx].extra_count =
447	(txp == first) ? extra_count : `0`;
448
449	if (txp == first)
450	txp = txfrags;
451	else
452	txp++;
453	queue->pending_cons++;
454	nr_slots--;
455	} else {
456	/ The copy op partially covered the tx_request.*
457	* The remainder will be mapped or copied in the next
458	* iteration.
459	*/
460	txp->offset += amount;
461	txp->size -= amount;
462	}
463	}
464
465	for (shinfo->nr_frags = `0`; nr_slots > `0` && shinfo->nr_frags < MAX_SKB_FRAGS;
466	shinfo->nr_frags++, gop++, nr_slots--) {
467	index = pending_index(i: queue->pending_cons++);
468	pending_idx = queue->pending_ring[index];
469	xenvif_tx_create_map_op(queue, pending_idx, txp,
470	extra_count: txp == first ? extra_count : `0`, mop: gop);
471	frag_set_pending_idx(frag: &frags[shinfo->nr_frags], pending_idx);
472
473	if (txp == first)
474	txp = txfrags;
475	else
476	txp++;
477	}
478
479	if (nr_slots > `0`) {
480
481	shinfo = skb_shinfo(nskb);
482	frags = shinfo->frags;
483
484	for (shinfo->nr_frags = `0`; shinfo->nr_frags < nr_slots;
485	shinfo->nr_frags++, txp++, gop++) {
486	index = pending_index(i: queue->pending_cons++);
487	pending_idx = queue->pending_ring[index];
488	xenvif_tx_create_map_op(queue, pending_idx, txp, extra_count: `0`,
489	mop: gop);
490	frag_set_pending_idx(frag: &frags[shinfo->nr_frags],
491	pending_idx);
492	}
493
494	skb_shinfo(skb)->frag_list = nskb;
495	} else if (nskb) {
496	/ A frag_list skb was allocated but it is no longer needed*
497	* because enough slots were converted to copy ops above.
498	*/
499	kfree_skb(skb: nskb);
500	}
501
502	(*copy_ops) = cop - queue->tx_copy_ops;
503	(*map_ops) = gop - queue->tx_map_ops;
504	}
505
506	static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
507	u16 pending_idx,
508	grant_handle_t handle)
509	{
510	if (unlikely(queue->grant_tx_handle[pending_idx] !=
511	NETBACK_INVALID_HANDLE)) {
512	netdev_err(dev: queue->vif->dev,
513	format: "Trying to overwrite active handle! pending_idx: 0x%x\n",
514	pending_idx);
515	BUG();
516	}
517	queue->grant_tx_handle[pending_idx] = handle;
518	}
519
520	static inline void xenvif_grant_handle_reset(struct xenvif_queue *queue,
521	u16 pending_idx)
522	{
523	if (unlikely(queue->grant_tx_handle[pending_idx] ==
524	NETBACK_INVALID_HANDLE)) {
525	netdev_err(dev: queue->vif->dev,
526	format: "Trying to unmap invalid handle! pending_idx: 0x%x\n",
527	pending_idx);
528	BUG();
529	}
530	queue->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
531	}
532
533	static int xenvif_tx_check_gop(struct xenvif_queue *queue,
534	struct sk_buff *skb,
535	struct gnttab_map_grant_ref **gopp_map,
536	struct gnttab_copy **gopp_copy)
537	{
538	struct gnttab_map_grant_ref gop_map = gopp_map;
539	u16 pending_idx;
540	/ This always points to the shinfo of the skb being checked, which*
541	* could be either the first or the one on the frag_list
542	*/
543	struct skb_shared_info *shinfo = skb_shinfo(skb);
544	/ If this is non-NULL, we are currently checking the frag_list skb, and*
545	* this points to the shinfo of the first one
546	*/
547	struct skb_shared_info *first_shinfo = NULL;
548	int nr_frags = shinfo->nr_frags;
549	const bool sharedslot = nr_frags &&
550	frag_get_pending_idx(frag: &shinfo->frags[`0`]) ==
551	copy_pending_idx(skb, copy_count(skb) - `1`);
552	int i, err = `0`;
553
554	for (i = `0`; i < copy_count(skb); i++) {
555	int newerr;
556
557	/ Check status of header. /
558	pending_idx = copy_pending_idx(skb, i);
559
560	newerr = (*gopp_copy)->status;
561
562	/ Split copies need to be handled together. /
563	if (XENVIF_TX_CB(skb)->split_mask & (`1U` << i)) {
564	(*gopp_copy)++;
565	if (!newerr)
566	newerr = (*gopp_copy)->status;
567	}
568	if (likely(!newerr)) {
569	/ The first frag might still have this slot mapped /
570	if (i < copy_count(skb) - `1` \|\| !sharedslot)
571	xenvif_idx_release(queue, pending_idx,
572	XEN_NETIF_RSP_OKAY);
573	} else {
574	err = newerr;
575	if (net_ratelimit())
576	netdev_dbg(queue->vif->dev,
577	"Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
578	(*gopp_copy)->status,
579	pending_idx,
580	(*gopp_copy)->source.u.ref);
581	/ The first frag might still have this slot mapped /
582	if (i < copy_count(skb) - `1` \|\| !sharedslot)
583	xenvif_idx_release(queue, pending_idx,
584	XEN_NETIF_RSP_ERROR);
585	}
586	(*gopp_copy)++;
587	}
588
589	check_frags:
590	for (i = `0`; i < nr_frags; i++, gop_map++) {
591	int j, newerr;
592
593	pending_idx = frag_get_pending_idx(frag: &shinfo->frags[i]);
594
595	/ Check error status: if okay then remember grant handle. /
596	newerr = gop_map->status;
597
598	if (likely(!newerr)) {
599	xenvif_grant_handle_set(queue,
600	pending_idx,
601	handle: gop_map->handle);
602	/ Had a previous error? Invalidate this fragment. /
603	if (unlikely(err)) {
604	xenvif_idx_unmap(queue, pending_idx);
605	/ If the mapping of the first frag was OK, but*
606	* the header's copy failed, and they are
607	* sharing a slot, send an error
608	*/
609	if (i == `0` && !first_shinfo && sharedslot)
610	xenvif_idx_release(queue, pending_idx,
611	XEN_NETIF_RSP_ERROR);
612	else
613	xenvif_idx_release(queue, pending_idx,
614	XEN_NETIF_RSP_OKAY);
615	}
616	continue;
617	}
618
619	/ Error on this fragment: respond to client with an error. /
620	if (net_ratelimit())
621	netdev_dbg(queue->vif->dev,
622	"Grant map of %d. frag failed! status: %d pending_idx: %u ref: %u\n",
623	i,
624	gop_map->status,
625	pending_idx,
626	gop_map->ref);
627
628	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
629
630	/ Not the first error? Preceding frags already invalidated. /
631	if (err)
632	continue;
633
634	/ Invalidate preceding fragments of this skb. /
635	for (j = `0`; j < i; j++) {
636	pending_idx = frag_get_pending_idx(frag: &shinfo->frags[j]);
637	xenvif_idx_unmap(queue, pending_idx);
638	xenvif_idx_release(queue, pending_idx,
639	XEN_NETIF_RSP_OKAY);
640	}
641
642	/ And if we found the error while checking the frag_list, unmap*
643	* the first skb's frags
644	*/
645	if (first_shinfo) {
646	for (j = `0`; j < first_shinfo->nr_frags; j++) {
647	pending_idx = frag_get_pending_idx(frag: &first_shinfo->frags[j]);
648	xenvif_idx_unmap(queue, pending_idx);
649	xenvif_idx_release(queue, pending_idx,
650	XEN_NETIF_RSP_OKAY);
651	}
652	}
653
654	/ Remember the error: invalidate all subsequent fragments. /
655	err = newerr;
656	}
657
658	if (skb_has_frag_list(skb) && !first_shinfo) {
659	first_shinfo = shinfo;
660	shinfo = skb_shinfo(shinfo->frag_list);
661	nr_frags = shinfo->nr_frags;
662
663	goto check_frags;
664	}
665
666	*gopp_map = gop_map;
667	return err;
668	}
669
670	static void xenvif_fill_frags(struct xenvif_queue queue, struct* sk_buff *skb)
671	{
672	struct skb_shared_info *shinfo = skb_shinfo(skb);
673	int nr_frags = shinfo->nr_frags;
674	int i;
675	u16 prev_pending_idx = INVALID_PENDING_IDX;
676
677	for (i = `0`; i < nr_frags; i++) {
678	skb_frag_t *frag = shinfo->frags + i;
679	struct xen_netif_tx_request *txp;
680	struct page *page;
681	u16 pending_idx;
682
683	pending_idx = frag_get_pending_idx(frag);
684
685	/ If this is not the first frag, chain it to the previous/
686	if (prev_pending_idx == INVALID_PENDING_IDX)
687	skb_shinfo(skb)->destructor_arg =
688	&callback_param(queue, pending_idx);
689	else
690	callback_param(queue, prev_pending_idx).ctx =
691	&callback_param(queue, pending_idx);
692
693	callback_param(queue, pending_idx).ctx = NULL;
694	prev_pending_idx = pending_idx;
695
696	txp = &queue->pending_tx_info[pending_idx].req;
697	page = virt_to_page((void *)idx_to_kaddr(queue, pending_idx));
698	__skb_fill_page_desc(skb, i, page, off: txp->offset, size: txp->size);
699	skb->len += txp->size;
700	skb->data_len += txp->size;
701	skb->truesize += txp->size;
702
703	/ Take an extra reference to offset network stack's put_page /
704	get_page(page: queue->mmap_pages[pending_idx]);
705	}
706	}
707
708	static int xenvif_get_extras(struct xenvif_queue *queue,
709	struct xen_netif_extra_info *extras,
710	unsigned int *extra_count,
711	int work_to_do)
712	{
713	struct xen_netif_extra_info extra;
714	RING_IDX cons = queue->tx.req_cons;
715
716	do {
717	if (unlikely(work_to_do-- <= `0`)) {
718	netdev_err(dev: queue->vif->dev, format: "Missing extra info\n");
719	xenvif_fatal_tx_err(vif: queue->vif);
720	return -EBADR;
721	}
722
723	RING_COPY_REQUEST(&queue->tx, cons, &extra);
724
725	queue->tx.req_cons = ++cons;
726	(*extra_count)++;
727
728	if (unlikely(!extra.type \|\|
729	extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
730	netdev_err(dev: queue->vif->dev,
731	format: "Invalid extra type: %d\n", extra.type);
732	xenvif_fatal_tx_err(vif: queue->vif);
733	return -EINVAL;
734	}
735
736	memcpy(&extras[extra.type - `1`], &extra, sizeof(extra));
737	} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
738
739	return work_to_do;
740	}
741
742	static int xenvif_set_skb_gso(struct xenvif *vif,
743	struct sk_buff *skb,
744	struct xen_netif_extra_info *gso)
745	{
746	if (!gso->u.gso.size) {
747	netdev_err(dev: vif->dev, format: "GSO size must not be zero.\n");
748	xenvif_fatal_tx_err(vif);
749	return -EINVAL;
750	}
751
752	switch (gso->u.gso.type) {
753	case XEN_NETIF_GSO_TYPE_TCPV4:
754	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
755	break;
756	case XEN_NETIF_GSO_TYPE_TCPV6:
757	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
758	break;
759	default:
760	netdev_err(dev: vif->dev, format: "Bad GSO type %d.\n", gso->u.gso.type);
761	xenvif_fatal_tx_err(vif);
762	return -EINVAL;
763	}
764
765	skb_shinfo(skb)->gso_size = gso->u.gso.size;
766	/ gso_segs will be calculated later /
767
768	return `0`;
769	}
770
771	static int checksum_setup(struct xenvif_queue queue, struct* sk_buff *skb)
772	{
773	bool recalculate_partial_csum = false;
774
775	/ A GSO SKB must be CHECKSUM_PARTIAL. However some buggy*
776	* peers can fail to set NETRXF_csum_blank when sending a GSO
777	* frame. In this case force the SKB to CHECKSUM_PARTIAL and
778	* recalculate the partial checksum.
779	*/
780	if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
781	queue->stats.rx_gso_checksum_fixup++;
782	skb->ip_summed = CHECKSUM_PARTIAL;
783	recalculate_partial_csum = true;
784	}
785
786	/ A non-CHECKSUM_PARTIAL SKB does not require setup. /
787	if (skb->ip_summed != CHECKSUM_PARTIAL)
788	return `0`;
789
790	return skb_checksum_setup(skb, recalculate: recalculate_partial_csum);
791	}
792
793	static bool tx_credit_exceeded(struct xenvif_queue queue, unsigned* size)
794	{
795	u64 now = get_jiffies_64();
796	u64 next_credit = queue->credit_window_start +
797	msecs_to_jiffies(m: queue->credit_usec / `1000`);
798
799	/ Timer could already be pending in rare cases. /
800	if (timer_pending(timer: &queue->credit_timeout)) {
801	queue->rate_limited = true;
802	return true;
803	}
804
805	/ Passed the point where we can replenish credit? /
806	if (time_after_eq64(now, next_credit)) {
807	queue->credit_window_start = now;
808	tx_add_credit(queue);
809	}
810
811	/ Still too big to send right now? Set a callback. /
812	if (size > queue->remaining_credit) {
813	mod_timer(timer: &queue->credit_timeout,
814	expires: next_credit);
815	queue->credit_window_start = next_credit;
816	queue->rate_limited = true;
817
818	return true;
819	}
820
821	return false;
822	}
823
824	/ No locking is required in xenvif_mcast_add/del() as they are*
825	* only ever invoked from NAPI poll. An RCU list is used because
826	* xenvif_mcast_match() is called asynchronously, during start_xmit.
827	*/
828
829	static int xenvif_mcast_add(struct xenvif vif, const* u8 *addr)
830	{
831	struct xenvif_mcast_addr *mcast;
832
833	if (vif->fe_mcast_count == XEN_NETBK_MCAST_MAX) {
834	if (net_ratelimit())
835	netdev_err(dev: vif->dev,
836	format: "Too many multicast addresses\n");
837	return -ENOSPC;
838	}
839
840	mcast = kzalloc(size: sizeof(*mcast), GFP_ATOMIC);
841	if (!mcast)
842	return -ENOMEM;
843
844	ether_addr_copy(dst: mcast->addr, src: addr);
845	list_add_tail_rcu(new: &mcast->entry, head: &vif->fe_mcast_addr);
846	vif->fe_mcast_count++;
847
848	return `0`;
849	}
850
851	static void xenvif_mcast_del(struct xenvif vif, const* u8 *addr)
852	{
853	struct xenvif_mcast_addr *mcast;
854
855	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
856	if (ether_addr_equal(addr1: addr, addr2: mcast->addr)) {
857	--vif->fe_mcast_count;
858	list_del_rcu(entry: &mcast->entry);
859	kfree_rcu(mcast, rcu);
860	break;
861	}
862	}
863	}
864
865	bool xenvif_mcast_match(struct xenvif vif, const* u8 *addr)
866	{
867	struct xenvif_mcast_addr *mcast;
868
869	rcu_read_lock();
870	list_for_each_entry_rcu(mcast, &vif->fe_mcast_addr, entry) {
871	if (ether_addr_equal(addr1: addr, addr2: mcast->addr)) {
872	rcu_read_unlock();
873	return true;
874	}
875	}
876	rcu_read_unlock();
877
878	return false;
879	}
880
881	void xenvif_mcast_addr_list_free(struct xenvif *vif)
882	{
883	/ No need for locking or RCU here. NAPI poll and TX queue*
884	* are stopped.
885	*/
886	while (!list_empty(head: &vif->fe_mcast_addr)) {
887	struct xenvif_mcast_addr *mcast;
888
889	mcast = list_first_entry(&vif->fe_mcast_addr,
890	struct xenvif_mcast_addr,
891	entry);
892	--vif->fe_mcast_count;
893	list_del(entry: &mcast->entry);
894	kfree(objp: mcast);
895	}
896	}
897
898	static void xenvif_tx_build_gops(struct xenvif_queue *queue,
899	int budget,
900	unsigned *copy_ops,
901	unsigned *map_ops)
902	{
903	struct sk_buff skb, nskb;
904	int ret;
905	unsigned int frag_overflow;
906
907	while (skb_queue_len(list_: &queue->tx_queue) < budget) {
908	struct xen_netif_tx_request txreq;
909	struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
910	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-`1`];
911	unsigned int extra_count;
912	RING_IDX idx;
913	int work_to_do;
914	unsigned int data_len;
915
916	if (queue->tx.sring->req_prod - queue->tx.req_cons >
917	XEN_NETIF_TX_RING_SIZE) {
918	netdev_err(dev: queue->vif->dev,
919	format: "Impossible number of requests. "
920	"req_prod %d, req_cons %d, size %ld\n",
921	queue->tx.sring->req_prod, queue->tx.req_cons,
922	XEN_NETIF_TX_RING_SIZE);
923	xenvif_fatal_tx_err(vif: queue->vif);
924	break;
925	}
926
927	work_to_do = XEN_RING_NR_UNCONSUMED_REQUESTS(&queue->tx);
928	if (!work_to_do)
929	break;
930
931	idx = queue->tx.req_cons;
932	rmb(); / Ensure that we see the request before we copy it. /
933	RING_COPY_REQUEST(&queue->tx, idx, &txreq);
934
935	/ Credit-based scheduling. /
936	if (txreq.size > queue->remaining_credit &&
937	tx_credit_exceeded(queue, size: txreq.size))
938	break;
939
940	queue->remaining_credit -= txreq.size;
941
942	work_to_do--;
943	queue->tx.req_cons = ++idx;
944
945	memset(extras, `0`, sizeof(extras));
946	extra_count = `0`;
947	if (txreq.flags & XEN_NETTXF_extra_info) {
948	work_to_do = xenvif_get_extras(queue, extras,
949	extra_count: &extra_count,
950	work_to_do);
951	idx = queue->tx.req_cons;
952	if (unlikely(work_to_do < `0`))
953	break;
954	}
955
956	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - `1`].type) {
957	struct xen_netif_extra_info *extra;
958
959	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_ADD - `1`];
960	ret = xenvif_mcast_add(vif: queue->vif, addr: extra->u.mcast.addr);
961
962	make_tx_response(queue, txp: &txreq, extra_count,
963	st: (ret == `0`) ?
964	XEN_NETIF_RSP_OKAY :
965	XEN_NETIF_RSP_ERROR);
966	push_tx_responses(queue);
967	continue;
968	}
969
970	if (extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - `1`].type) {
971	struct xen_netif_extra_info *extra;
972
973	extra = &extras[XEN_NETIF_EXTRA_TYPE_MCAST_DEL - `1`];
974	xenvif_mcast_del(vif: queue->vif, addr: extra->u.mcast.addr);
975
976	make_tx_response(queue, txp: &txreq, extra_count,
977	XEN_NETIF_RSP_OKAY);
978	push_tx_responses(queue);
979	continue;
980	}
981
982	data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
983	XEN_NETBACK_TX_COPY_LEN : txreq.size;
984
985	ret = xenvif_count_requests(queue, first: &txreq, extra_count,
986	txp: txfrags, work_to_do);
987
988	if (unlikely(ret < `0`))
989	break;
990
991	idx += ret;
992
993	if (unlikely(txreq.size < ETH_HLEN)) {
994	netdev_dbg(queue->vif->dev,
995	"Bad packet size: %d\n", txreq.size);
996	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
997	break;
998	}
999
1000	/ No crossing a page as the payload mustn't fragment. /
1001	if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) {
1002	netdev_err(dev: queue->vif->dev, format: "Cross page boundary, txreq.offset: %u, size: %u\n",
1003	txreq.offset, txreq.size);
1004	xenvif_fatal_tx_err(vif: queue->vif);
1005	break;
1006	}
1007
1008	if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - `1` && data_len < txreq.size)
1009	data_len = txreq.size;
1010
1011	skb = xenvif_alloc_skb(size: data_len);
1012	if (unlikely(skb == NULL)) {
1013	netdev_dbg(queue->vif->dev,
1014	"Can't allocate a skb in start_xmit.\n");
1015	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
1016	break;
1017	}
1018
1019	skb_shinfo(skb)->nr_frags = ret;
1020	/ At this point shinfo->nr_frags is in fact the number of*
1021	* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
1022	*/
1023	frag_overflow = `0`;
1024	nskb = NULL;
1025	if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) {
1026	frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS;
1027	BUG_ON(frag_overflow > MAX_SKB_FRAGS);
1028	skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS;
1029	nskb = xenvif_alloc_skb(size: `0`);
1030	if (unlikely(nskb == NULL)) {
1031	skb_shinfo(skb)->nr_frags = `0`;
1032	kfree_skb(skb);
1033	xenvif_tx_err(queue, txp: &txreq, extra_count, end: idx);
1034	if (net_ratelimit())
1035	netdev_err(dev: queue->vif->dev,
1036	format: "Can't allocate the frag_list skb.\n");
1037	break;
1038	}
1039	}
1040
1041	if (extras[XEN_NETIF_EXTRA_TYPE_GSO - `1`].type) {
1042	struct xen_netif_extra_info *gso;
1043	gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - `1`];
1044
1045	if (xenvif_set_skb_gso(vif: queue->vif, skb, gso)) {
1046	/ Failure in xenvif_set_skb_gso is fatal. /
1047	skb_shinfo(skb)->nr_frags = `0`;
1048	kfree_skb(skb);
1049	kfree_skb(skb: nskb);
1050	break;
1051	}
1052	}
1053
1054	if (extras[XEN_NETIF_EXTRA_TYPE_HASH - `1`].type) {
1055	struct xen_netif_extra_info *extra;
1056	enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
1057
1058	extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - `1`];
1059
1060	switch (extra->u.hash.type) {
1061	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
1062	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
1063	type = PKT_HASH_TYPE_L3;
1064	break;
1065
1066	case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
1067	case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
1068	type = PKT_HASH_TYPE_L4;
1069	break;
1070
1071	default:
1072	break;
1073	}
1074
1075	if (type != PKT_HASH_TYPE_NONE)
1076	skb_set_hash(skb,
1077	hash: (u32 )extra->u.hash.value,
1078	type);
1079	}
1080
1081	xenvif_get_requests(queue, skb, first: &txreq, txfrags, copy_ops,
1082	map_ops, frag_overflow, nskb, extra_count,
1083	data_len);
1084
1085	__skb_queue_tail(list: &queue->tx_queue, newsk: skb);
1086
1087	queue->tx.req_cons = idx;
1088	}
1089
1090	return;
1091	}
1092
1093	/ Consolidate skb with a frag_list into a brand new one with local pages on*
1094	* frags. Returns 0 or -ENOMEM if can't allocate new pages.
1095	*/
1096	static int xenvif_handle_frag_list(struct xenvif_queue queue, struct* sk_buff *skb)
1097	{
1098	unsigned int offset = skb_headlen(skb);
1099	skb_frag_t frags[MAX_SKB_FRAGS];
1100	int i, f;
1101	struct ubuf_info *uarg;
1102	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1103
1104	queue->stats.tx_zerocopy_sent += `2`;
1105	queue->stats.tx_frag_overflow++;
1106
1107	xenvif_fill_frags(queue, skb: nskb);
1108	/ Subtract frags size, we will correct it later /
1109	skb->truesize -= skb->data_len;
1110	skb->len += nskb->len;
1111	skb->data_len += nskb->len;
1112
1113	/ create a brand new frags array and coalesce there /
1114	for (i = `0`; offset < skb->len; i++) {
1115	struct page *page;
1116	unsigned int len;
1117
1118	BUG_ON(i >= MAX_SKB_FRAGS);
1119	page = alloc_page(GFP_ATOMIC);
1120	if (!page) {
1121	int j;
1122	skb->truesize += skb->data_len;
1123	for (j = `0`; j < i; j++)
1124	put_page(page: skb_frag_page(frag: &frags[j]));
1125	return -ENOMEM;
1126	}
1127
1128	if (offset + PAGE_SIZE < skb->len)
1129	len = PAGE_SIZE;
1130	else
1131	len = skb->len - offset;
1132	if (skb_copy_bits(skb, offset, page_address(page), len))
1133	BUG();
1134
1135	offset += len;
1136	skb_frag_fill_page_desc(frag: &frags[i], page, off: `0`, size: len);
1137	}
1138
1139	/ Release all the original (foreign) frags. /
1140	for (f = `0`; f < skb_shinfo(skb)->nr_frags; f++)
1141	skb_frag_unref(skb, f);
1142	uarg = skb_shinfo(skb)->destructor_arg;
1143	/ increase inflight counter to offset decrement in callback /
1144	atomic_inc(v: &queue->inflight_packets);
1145	uarg->callback(NULL, uarg, true);
1146	skb_shinfo(skb)->destructor_arg = NULL;
1147
1148	/ Fill the skb with the new (local) frags. /
1149	memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
1150	skb_shinfo(skb)->nr_frags = i;
1151	skb->truesize += i * PAGE_SIZE;
1152
1153	return `0`;
1154	}
1155
1156	static int xenvif_tx_submit(struct xenvif_queue *queue)
1157	{
1158	struct gnttab_map_grant_ref *gop_map = queue->tx_map_ops;
1159	struct gnttab_copy *gop_copy = queue->tx_copy_ops;
1160	struct sk_buff *skb;
1161	int work_done = `0`;
1162
1163	while ((skb = __skb_dequeue(list: &queue->tx_queue)) != NULL) {
1164	struct xen_netif_tx_request *txp;
1165	u16 pending_idx;
1166
1167	pending_idx = copy_pending_idx(skb, `0`);
1168	txp = &queue->pending_tx_info[pending_idx].req;
1169
1170	/ Check the remap error code. /
1171	if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
1172	/ If there was an error, xenvif_tx_check_gop is*
1173	* expected to release all the frags which were mapped,
1174	* so kfree_skb shouldn't do it again
1175	*/
1176	skb_shinfo(skb)->nr_frags = `0`;
1177	if (skb_has_frag_list(skb)) {
1178	struct sk_buff *nskb =
1179	skb_shinfo(skb)->frag_list;
1180	skb_shinfo(nskb)->nr_frags = `0`;
1181	}
1182	kfree_skb(skb);
1183	continue;
1184	}
1185
1186	if (txp->flags & XEN_NETTXF_csum_blank)
1187	skb->ip_summed = CHECKSUM_PARTIAL;
1188	else if (txp->flags & XEN_NETTXF_data_validated)
1189	skb->ip_summed = CHECKSUM_UNNECESSARY;
1190
1191	xenvif_fill_frags(queue, skb);
1192
1193	if (unlikely(skb_has_frag_list(skb))) {
1194	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
1195	xenvif_skb_zerocopy_prepare(queue, skb: nskb);
1196	if (xenvif_handle_frag_list(queue, skb)) {
1197	if (net_ratelimit())
1198	netdev_err(dev: queue->vif->dev,
1199	format: "Not enough memory to consolidate frag_list!\n");
1200	xenvif_skb_zerocopy_prepare(queue, skb);
1201	kfree_skb(skb);
1202	continue;
1203	}
1204	/ Copied all the bits from the frag list -- free it. /
1205	skb_frag_list_init(skb);
1206	kfree_skb(skb: nskb);
1207	}
1208
1209	skb->dev = queue->vif->dev;
1210	skb->protocol = eth_type_trans(skb, dev: skb->dev);
1211	skb_reset_network_header(skb);
1212
1213	if (checksum_setup(queue, skb)) {
1214	netdev_dbg(queue->vif->dev,
1215	"Can't setup checksum in net_tx_action\n");
1216	/ We have to set this flag to trigger the callback /
1217	if (skb_shinfo(skb)->destructor_arg)
1218	xenvif_skb_zerocopy_prepare(queue, skb);
1219	kfree_skb(skb);
1220	continue;
1221	}
1222
1223	skb_probe_transport_header(skb);
1224
1225	/ If the packet is GSO then we will have just set up the*
1226	* transport header offset in checksum_setup so it's now
1227	* straightforward to calculate gso_segs.
1228	*/
1229	if (skb_is_gso(skb)) {
1230	int mss, hdrlen;
1231
1232	/ GSO implies having the L4 header. /
1233	WARN_ON_ONCE(!skb_transport_header_was_set(skb));
1234	if (unlikely(!skb_transport_header_was_set(skb))) {
1235	kfree_skb(skb);
1236	continue;
1237	}
1238
1239	mss = skb_shinfo(skb)->gso_size;
1240	hdrlen = skb_tcp_all_headers(skb);
1241
1242	skb_shinfo(skb)->gso_segs =
1243	DIV_ROUND_UP(skb->len - hdrlen, mss);
1244	}
1245
1246	queue->stats.rx_bytes += skb->len;
1247	queue->stats.rx_packets++;
1248
1249	work_done++;
1250
1251	/ Set this flag right before netif_receive_skb, otherwise*
1252	* someone might think this packet already left netback, and
1253	* do a skb_copy_ubufs while we are still in control of the
1254	* skb. E.g. the __pskb_pull_tail earlier can do such thing.
1255	*/
1256	if (skb_shinfo(skb)->destructor_arg) {
1257	xenvif_skb_zerocopy_prepare(queue, skb);
1258	queue->stats.tx_zerocopy_sent++;
1259	}
1260
1261	netif_receive_skb(skb);
1262	}
1263
1264	return work_done;
1265	}
1266
1267	void xenvif_zerocopy_callback(struct sk_buff skb, struct* ubuf_info *ubuf_base,
1268	bool zerocopy_success)
1269	{
1270	unsigned long flags;
1271	pending_ring_idx_t index;
1272	struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
1273	struct xenvif_queue *queue = ubuf_to_queue(ubuf);
1274
1275	/ This is the only place where we grab this lock, to protect callbacks*
1276	* from each other.
1277	*/
1278	spin_lock_irqsave(&queue->callback_lock, flags);
1279	do {
1280	u16 pending_idx = ubuf->desc;
1281	ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
1282	BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
1283	MAX_PENDING_REQS);
1284	index = pending_index(i: queue->dealloc_prod);
1285	queue->dealloc_ring[index] = pending_idx;
1286	/ Sync with xenvif_tx_dealloc_action:*
1287	* insert idx then incr producer.
1288	*/
1289	smp_wmb();
1290	queue->dealloc_prod++;
1291	} while (ubuf);
1292	spin_unlock_irqrestore(lock: &queue->callback_lock, flags);
1293
1294	if (likely(zerocopy_success))
1295	queue->stats.tx_zerocopy_success++;
1296	else
1297	queue->stats.tx_zerocopy_fail++;
1298	xenvif_skb_zerocopy_complete(queue);
1299	}
1300
1301	static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
1302	{
1303	struct gnttab_unmap_grant_ref *gop;
1304	pending_ring_idx_t dc, dp;
1305	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
1306	unsigned int i = `0`;
1307
1308	dc = queue->dealloc_cons;
1309	gop = queue->tx_unmap_ops;
1310
1311	/ Free up any grants we have finished using /
1312	do {
1313	dp = queue->dealloc_prod;
1314
1315	/ Ensure we see all indices enqueued by all*
1316	* xenvif_zerocopy_callback().
1317	*/
1318	smp_rmb();
1319
1320	while (dc != dp) {
1321	BUG_ON(gop - queue->tx_unmap_ops >= MAX_PENDING_REQS);
1322	pending_idx =
1323	queue->dealloc_ring[pending_index(i: dc++)];
1324
1325	pending_idx_release[gop - queue->tx_unmap_ops] =
1326	pending_idx;
1327	queue->pages_to_unmap[gop - queue->tx_unmap_ops] =
1328	queue->mmap_pages[pending_idx];
1329	gnttab_set_unmap_op(unmap: gop,
1330	addr: idx_to_kaddr(queue, idx: pending_idx),
1331	GNTMAP_host_map,
1332	handle: queue->grant_tx_handle[pending_idx]);
1333	xenvif_grant_handle_reset(queue, pending_idx);
1334	++gop;
1335	}
1336
1337	} while (dp != queue->dealloc_prod);
1338
1339	queue->dealloc_cons = dc;
1340
1341	if (gop - queue->tx_unmap_ops > `0`) {
1342	int ret;
1343	ret = gnttab_unmap_refs(unmap_ops: queue->tx_unmap_ops,
1344	NULL,
1345	pages: queue->pages_to_unmap,
1346	count: gop - queue->tx_unmap_ops);
1347	if (ret) {
1348	netdev_err(dev: queue->vif->dev, format: "Unmap fail: nr_ops %tu ret %d\n",
1349	gop - queue->tx_unmap_ops, ret);
1350	for (i = `0`; i < gop - queue->tx_unmap_ops; ++i) {
1351	if (gop[i].status != GNTST_okay)
1352	netdev_err(dev: queue->vif->dev,
1353	format: " host_addr: 0x%llx handle: 0x%x status: %d\n",
1354	gop[i].host_addr,
1355	gop[i].handle,
1356	gop[i].status);
1357	}
1358	BUG();
1359	}
1360	}
1361
1362	for (i = `0`; i < gop - queue->tx_unmap_ops; ++i)
1363	xenvif_idx_release(queue, pending_idx: pending_idx_release[i],
1364	XEN_NETIF_RSP_OKAY);
1365	}
1366
1367
1368	/ Called after netfront has transmitted /
1369	int xenvif_tx_action(struct xenvif_queue queue, int* budget)
1370	{
1371	unsigned nr_mops = `0`, nr_cops = `0`;
1372	int work_done, ret;
1373
1374	if (unlikely(!tx_work_todo(queue)))
1375	return `0`;
1376
1377	xenvif_tx_build_gops(queue, budget, copy_ops: &nr_cops, map_ops: &nr_mops);
1378
1379	if (nr_cops == `0`)
1380	return `0`;
1381
1382	gnttab_batch_copy(batch: queue->tx_copy_ops, count: nr_cops);
1383	if (nr_mops != `0`) {
1384	ret = gnttab_map_refs(map_ops: queue->tx_map_ops,
1385	NULL,
1386	pages: queue->pages_to_map,
1387	count: nr_mops);
1388	if (ret) {
1389	unsigned int i;
1390
1391	netdev_err(dev: queue->vif->dev, format: "Map fail: nr %u ret %d\n",
1392	nr_mops, ret);
1393	for (i = `0`; i < nr_mops; ++i)
1394	WARN_ON_ONCE(queue->tx_map_ops[i].status ==
1395	GNTST_okay);
1396	}
1397	}
1398
1399	work_done = xenvif_tx_submit(queue);
1400
1401	return work_done;
1402	}
1403
1404	static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
1405	u8 status)
1406	{
1407	struct pending_tx_info *pending_tx_info;
1408	pending_ring_idx_t index;
1409	unsigned long flags;
1410
1411	pending_tx_info = &queue->pending_tx_info[pending_idx];
1412
1413	spin_lock_irqsave(&queue->response_lock, flags);
1414
1415	make_tx_response(queue, txp: &pending_tx_info->req,
1416	extra_count: pending_tx_info->extra_count, st: status);
1417
1418	/ Release the pending index before pusing the Tx response so*
1419	* its available before a new Tx request is pushed by the
1420	* frontend.
1421	*/
1422	index = pending_index(i: queue->pending_prod++);
1423	queue->pending_ring[index] = pending_idx;
1424
1425	push_tx_responses(queue);
1426
1427	spin_unlock_irqrestore(lock: &queue->response_lock, flags);
1428	}
1429
1430
1431	static void make_tx_response(struct xenvif_queue *queue,
1432	struct xen_netif_tx_request *txp,
1433	unsigned int extra_count,
1434	s8 st)
1435	{
1436	RING_IDX i = queue->tx.rsp_prod_pvt;
1437	struct xen_netif_tx_response *resp;
1438
1439	resp = RING_GET_RESPONSE(&queue->tx, i);
1440	resp->id = txp->id;
1441	resp->status = st;
1442
1443	while (extra_count-- != `0`)
1444	RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1445
1446	queue->tx.rsp_prod_pvt = ++i;
1447	}
1448
1449	static void push_tx_responses(struct xenvif_queue *queue)
1450	{
1451	int notify;
1452
1453	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
1454	if (notify)
1455	notify_remote_via_irq(irq: queue->tx_irq);
1456	}
1457
1458	static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
1459	{
1460	int ret;
1461	struct gnttab_unmap_grant_ref tx_unmap_op;
1462
1463	gnttab_set_unmap_op(unmap: &tx_unmap_op,
1464	addr: idx_to_kaddr(queue, idx: pending_idx),
1465	GNTMAP_host_map,
1466	handle: queue->grant_tx_handle[pending_idx]);
1467	xenvif_grant_handle_reset(queue, pending_idx);
1468
1469	ret = gnttab_unmap_refs(unmap_ops: &tx_unmap_op, NULL,
1470	pages: &queue->mmap_pages[pending_idx], count: `1`);
1471	if (ret) {
1472	netdev_err(dev: queue->vif->dev,
1473	format: "Unmap fail: ret: %d pending_idx: %d host_addr: %llx handle: 0x%x status: %d\n",
1474	ret,
1475	pending_idx,
1476	tx_unmap_op.host_addr,
1477	tx_unmap_op.handle,
1478	tx_unmap_op.status);
1479	BUG();
1480	}
1481	}
1482
1483	static inline int tx_work_todo(struct xenvif_queue *queue)
1484	{
1485	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
1486	return `1`;
1487
1488	return `0`;
1489	}
1490
1491	static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
1492	{
1493	return queue->dealloc_cons != queue->dealloc_prod;
1494	}
1495
1496	void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
1497	{
1498	if (queue->tx.sring)
1499	xenbus_unmap_ring_vfree(dev: xenvif_to_xenbus_device(vif: queue->vif),
1500	vaddr: queue->tx.sring);
1501	if (queue->rx.sring)
1502	xenbus_unmap_ring_vfree(dev: xenvif_to_xenbus_device(vif: queue->vif),
1503	vaddr: queue->rx.sring);
1504	}
1505
1506	int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
1507	grant_ref_t tx_ring_ref,
1508	grant_ref_t rx_ring_ref)
1509	{
1510	void *addr;
1511	struct xen_netif_tx_sring *txs;
1512	struct xen_netif_rx_sring *rxs;
1513	RING_IDX rsp_prod, req_prod;
1514	int err;
1515
1516	err = xenbus_map_ring_valloc(dev: xenvif_to_xenbus_device(vif: queue->vif),
1517	gnt_refs: &tx_ring_ref, nr_grefs: `1`, vaddr: &addr);
1518	if (err)
1519	goto err;
1520
1521	txs = (struct xen_netif_tx_sring *)addr;
1522	rsp_prod = READ_ONCE(txs->rsp_prod);
1523	req_prod = READ_ONCE(txs->req_prod);
1524
1525	BACK_RING_ATTACH(&queue->tx, txs, rsp_prod, XEN_PAGE_SIZE);
1526
1527	err = -EIO;
1528	if (req_prod - rsp_prod > RING_SIZE(&queue->tx))
1529	goto err;
1530
1531	err = xenbus_map_ring_valloc(dev: xenvif_to_xenbus_device(vif: queue->vif),
1532	gnt_refs: &rx_ring_ref, nr_grefs: `1`, vaddr: &addr);
1533	if (err)
1534	goto err;
1535
1536	rxs = (struct xen_netif_rx_sring *)addr;
1537	rsp_prod = READ_ONCE(rxs->rsp_prod);
1538	req_prod = READ_ONCE(rxs->req_prod);
1539
1540	BACK_RING_ATTACH(&queue->rx, rxs, rsp_prod, XEN_PAGE_SIZE);
1541
1542	err = -EIO;
1543	if (req_prod - rsp_prod > RING_SIZE(&queue->rx))
1544	goto err;
1545
1546	return `0`;
1547
1548	err:
1549	xenvif_unmap_frontend_data_rings(queue);
1550	return err;
1551	}
1552
1553	static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
1554	{
1555	/ Dealloc thread must remain running until all inflight*
1556	* packets complete.
1557	*/
1558	return kthread_should_stop() &&
1559	!atomic_read(v: &queue->inflight_packets);
1560	}
1561
1562	int xenvif_dealloc_kthread(void *data)
1563	{
1564	struct xenvif_queue *queue = data;
1565
1566	for (;;) {
1567	wait_event_interruptible(queue->dealloc_wq,
1568	tx_dealloc_work_todo(queue) \|\|
1569	xenvif_dealloc_kthread_should_stop(queue));
1570	if (xenvif_dealloc_kthread_should_stop(queue))
1571	break;
1572
1573	xenvif_tx_dealloc_action(queue);
1574	cond_resched();
1575	}
1576
1577	/ Unmap anything remaining/
1578	if (tx_dealloc_work_todo(queue))
1579	xenvif_tx_dealloc_action(queue);
1580
1581	return `0`;
1582	}
1583
1584	static void make_ctrl_response(struct xenvif *vif,
1585	const struct xen_netif_ctrl_request *req,
1586	u32 status, u32 data)
1587	{
1588	RING_IDX idx = vif->ctrl.rsp_prod_pvt;
1589	struct xen_netif_ctrl_response rsp = {
1590	.id = req->id,
1591	.type = req->type,
1592	.status = status,
1593	.data = data,
1594	};
1595
1596	*RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
1597	vif->ctrl.rsp_prod_pvt = ++idx;
1598	}
1599
1600	static void push_ctrl_response(struct xenvif *vif)
1601	{
1602	int notify;
1603
1604	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
1605	if (notify)
1606	notify_remote_via_irq(irq: vif->ctrl_irq);
1607	}
1608
1609	static void process_ctrl_request(struct xenvif *vif,
1610	const struct xen_netif_ctrl_request *req)
1611	{
1612	u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
1613	u32 data = `0`;
1614
1615	switch (req->type) {
1616	case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
1617	status = xenvif_set_hash_alg(vif, alg: req->data[`0`]);
1618	break;
1619
1620	case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
1621	status = xenvif_get_hash_flags(vif, flags: &data);
1622	break;
1623
1624	case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
1625	status = xenvif_set_hash_flags(vif, flags: req->data[`0`]);
1626	break;
1627
1628	case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
1629	status = xenvif_set_hash_key(vif, gref: req->data[`0`],
1630	len: req->data[`1`]);
1631	break;
1632
1633	case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
1634	status = XEN_NETIF_CTRL_STATUS_SUCCESS;
1635	data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
1636	break;
1637
1638	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
1639	status = xenvif_set_hash_mapping_size(vif,
1640	size: req->data[`0`]);
1641	break;
1642
1643	case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
1644	status = xenvif_set_hash_mapping(vif, gref: req->data[`0`],
1645	len: req->data[`1`],
1646	off: req->data[`2`]);
1647	break;
1648
1649	default:
1650	break;
1651	}
1652
1653	make_ctrl_response(vif, req, status, data);
1654	push_ctrl_response(vif);
1655	}
1656
1657	static void xenvif_ctrl_action(struct xenvif *vif)
1658	{
1659	for (;;) {
1660	RING_IDX req_prod, req_cons;
1661
1662	req_prod = vif->ctrl.sring->req_prod;
1663	req_cons = vif->ctrl.req_cons;
1664
1665	/ Make sure we can see requests before we process them. /
1666	rmb();
1667
1668	if (req_cons == req_prod)
1669	break;
1670
1671	while (req_cons != req_prod) {
1672	struct xen_netif_ctrl_request req;
1673
1674	RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
1675	req_cons++;
1676
1677	process_ctrl_request(vif, req: &req);
1678	}
1679
1680	vif->ctrl.req_cons = req_cons;
1681	vif->ctrl.sring->req_event = req_cons + `1`;
1682	}
1683	}
1684
1685	static bool xenvif_ctrl_work_todo(struct xenvif *vif)
1686	{
1687	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
1688	return true;
1689
1690	return false;
1691	}
1692
1693	irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
1694	{
1695	struct xenvif *vif = data;
1696	unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
1697
1698	while (xenvif_ctrl_work_todo(vif)) {
1699	xenvif_ctrl_action(vif);
1700	eoi_flag = `0`;
1701	}
1702
1703	xen_irq_lateeoi(irq, eoi_flags: eoi_flag);
1704
1705	return IRQ_HANDLED;
1706	}
1707
1708	static int __init netback_init(void)
1709	{
1710	int rc = `0`;
1711
1712	if (!xen_domain())
1713	return -ENODEV;
1714
1715	/ Allow as many queues as there are CPUs but max. 8 if user has not*
1716	* specified a value.
1717	*/
1718	if (xenvif_max_queues == `0`)
1719	xenvif_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
1720	num_online_cpus());
1721
1722	if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1723	pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1724	fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1725	fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1726	}
1727
1728	rc = xenvif_xenbus_init();
1729	if (rc)
1730	goto failed_init;
1731
1732	#ifdef CONFIG_DEBUG_FS
1733	xen_netback_dbg_root = debugfs_create_dir(name: "xen-netback", NULL);
1734	#endif /* CONFIG_DEBUG_FS */
1735
1736	return `0`;
1737
1738	failed_init:
1739	return rc;
1740	}
1741
1742	module_init(netback_init);
1743
1744	static void __exit netback_fini(void)
1745	{
1746	#ifdef CONFIG_DEBUG_FS
1747	debugfs_remove_recursive(dentry: xen_netback_dbg_root);
1748	#endif /* CONFIG_DEBUG_FS */
1749	xenvif_xenbus_fini();
1750	}
1751	module_exit(netback_fini);
1752
1753	MODULE_LICENSE("Dual BSD/GPL");
1754	MODULE_ALIAS("xen-backend:vif");
1755

source code of linux/drivers/net/xen-netback/netback.c