virtio_ring.c source code [linux/drivers/virtio/virtio_ring.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/ Virtio ring implementation.*
3	*
4	* Copyright 2007 Rusty Russell IBM Corporation
5	*/
6	#include <linux/virtio.h>
7	#include <linux/virtio_ring.h>
8	#include <linux/virtio_config.h>
9	#include <linux/device.h>
10	#include <linux/slab.h>
11	#include <linux/module.h>
12	#include <linux/hrtimer.h>
13	#include <linux/dma-mapping.h>
14	#include <linux/kmsan.h>
15	#include <linux/spinlock.h>
16	#include <xen/xen.h>
17
18	#ifdef DEBUG
19	/ For development, we want to crash whenever the ring is screwed. /
20	#define BAD_RING(_vq, fmt, args...) \
21	do { \
22	dev_err(&(_vq)->vq.vdev->dev, \
23	"%s:"fmt, (_vq)->vq.name, ##args); \
24	BUG(); \
25	} while (0)
26	/ Caller is supposed to guarantee no reentry. /
27	#define START_USE(_vq) \
28	do { \
29	if ((_vq)->in_use) \
30	panic("%s:in_use = %i\n", \
31	(_vq)->vq.name, (_vq)->in_use); \
32	(_vq)->in_use = __LINE__; \
33	} while (0)
34	#define END_USE(_vq) \
35	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
36	#define LAST_ADD_TIME_UPDATE(_vq) \
37	do { \
38	ktime_t now = ktime_get(); \
39	\
40	/* No kick or get, with .1 second between? Warn. */ \
41	if ((_vq)->last_add_time_valid) \
42	WARN_ON(ktime_to_ms(ktime_sub(now, \
43	(_vq)->last_add_time)) > 100); \
44	(_vq)->last_add_time = now; \
45	(_vq)->last_add_time_valid = true; \
46	} while (0)
47	#define LAST_ADD_TIME_CHECK(_vq) \
48	do { \
49	if ((_vq)->last_add_time_valid) { \
50	WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
51	(_vq)->last_add_time)) > 100); \
52	} \
53	} while (0)
54	#define LAST_ADD_TIME_INVALID(_vq) \
55	((_vq)->last_add_time_valid = false)
56	#else
57	#define BAD_RING(_vq, fmt, args...) \
58	do { \
59	dev_err(&_vq->vq.vdev->dev, \
60	"%s:"fmt, (_vq)->vq.name, ##args); \
61	(_vq)->broken = true; \
62	} while (0)
63	#define START_USE(vq)
64	#define END_USE(vq)
65	#define LAST_ADD_TIME_UPDATE(vq)
66	#define LAST_ADD_TIME_CHECK(vq)
67	#define LAST_ADD_TIME_INVALID(vq)
68	#endif
69
70	struct vring_desc_state_split {
71	void data; /* Data for callback. /
72	struct vring_desc indir_desc; /* Indirect descriptor, if any. /
73	};
74
75	struct vring_desc_state_packed {
76	void data; /* Data for callback. /
77	struct vring_packed_desc indir_desc; /* Indirect descriptor, if any. /
78	u16 num; / Descriptor list length. /
79	u16 last; / The last desc state in a list. /
80	};
81
82	struct vring_desc_extra {
83	dma_addr_t addr; / Descriptor DMA addr. /
84	u32 len; / Descriptor length. /
85	u16 flags; / Descriptor flags. /
86	u16 next; / The next desc state in a list. /
87	};
88
89	struct vring_virtqueue_split {
90	/ Actual memory layout for this queue. /
91	struct vring vring;
92
93	/ Last written value to avail->flags /
94	u16 avail_flags_shadow;
95
96	/*
97	* Last written value to avail->idx in
98	* guest byte order.
99	*/
100	u16 avail_idx_shadow;
101
102	/ Per-descriptor state. /
103	struct vring_desc_state_split *desc_state;
104	struct vring_desc_extra *desc_extra;
105
106	/ DMA address and size information /
107	dma_addr_t queue_dma_addr;
108	size_t queue_size_in_bytes;
109
110	/*
111	* The parameters for creating vrings are reserved for creating new
112	* vring.
113	*/
114	u32 vring_align;
115	bool may_reduce_num;
116	};
117
118	struct vring_virtqueue_packed {
119	/ Actual memory layout for this queue. /
120	struct {
121	unsigned int num;
122	struct vring_packed_desc *desc;
123	struct vring_packed_desc_event *driver;
124	struct vring_packed_desc_event *device;
125	} vring;
126
127	/ Driver ring wrap counter. /
128	bool avail_wrap_counter;
129
130	/ Avail used flags. /
131	u16 avail_used_flags;
132
133	/ Index of the next avail descriptor. /
134	u16 next_avail_idx;
135
136	/*
137	* Last written value to driver->flags in
138	* guest byte order.
139	*/
140	u16 event_flags_shadow;
141
142	/ Per-descriptor state. /
143	struct vring_desc_state_packed *desc_state;
144	struct vring_desc_extra *desc_extra;
145
146	/ DMA address and size information /
147	dma_addr_t ring_dma_addr;
148	dma_addr_t driver_event_dma_addr;
149	dma_addr_t device_event_dma_addr;
150	size_t ring_size_in_bytes;
151	size_t event_size_in_bytes;
152	};
153
154	struct vring_virtqueue {
155	struct virtqueue vq;
156
157	/ Is this a packed ring? /
158	bool packed_ring;
159
160	/ Is DMA API used? /
161	bool use_dma_api;
162
163	/ Can we use weak barriers? /
164	bool weak_barriers;
165
166	/ Other side has made a mess, don't try any more. /
167	bool broken;
168
169	/ Host supports indirect buffers /
170	bool indirect;
171
172	/ Host publishes avail event idx /
173	bool event;
174
175	/ Do DMA mapping by driver /
176	bool premapped;
177
178	/ Do unmap or not for desc. Just when premapped is False and*
179	* use_dma_api is true, this is true.
180	*/
181	bool do_unmap;
182
183	/ Head of free buffer list. /
184	unsigned int free_head;
185	/ Number we've added since last sync. /
186	unsigned int num_added;
187
188	/ Last used index we've seen.*
189	* for split ring, it just contains last used index
190	* for packed ring:
191	* bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
192	* bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
193	*/
194	u16 last_used_idx;
195
196	/ Hint for event idx: already triggered no need to disable. /
197	bool event_triggered;
198
199	union {
200	/ Available for split ring /
201	struct vring_virtqueue_split split;
202
203	/ Available for packed ring /
204	struct vring_virtqueue_packed packed;
205	};
206
207	/ How to notify other side. FIXME: commonalize hcalls! /
208	bool (notify)(struct* virtqueue *vq);
209
210	/ DMA, allocation, and size information /
211	bool we_own_ring;
212
213	/ Device used for doing DMA /
214	struct device *dma_dev;
215
216	#ifdef DEBUG
217	/ They're supposed to lock for us. /
218	unsigned int in_use;
219
220	/ Figure out if their kicks are too delayed. /
221	bool last_add_time_valid;
222	ktime_t last_add_time;
223	#endif
224	};
225
226	static struct virtqueue __vring_new_virtqueue(unsigned* int index,
227	struct vring_virtqueue_split *vring_split,
228	struct virtio_device *vdev,
229	bool weak_barriers,
230	bool context,
231	bool (notify)(struct* virtqueue *),
232	void (callback)(struct* virtqueue *),
233	const char *name,
234	struct device *dma_dev);
235	static struct vring_desc_extra vring_alloc_desc_extra(unsigned* int num);
236	static void vring_free(struct virtqueue *_vq);
237
238	/*
239	* Helpers.
240	*/
241
242	#define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
243
244	static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
245	unsigned int total_sg)
246	{
247	/*
248	* If the host supports indirect descriptor tables, and we have multiple
249	* buffers, then go indirect. FIXME: tune this threshold
250	*/
251	return (vq->indirect && total_sg > `1` && vq->vq.num_free);
252	}
253
254	/*
255	* Modern virtio devices have feature bits to specify whether they need a
256	* quirk and bypass the IOMMU. If not there, just use the DMA API.
257	*
258	* If there, the interaction between virtio and DMA API is messy.
259	*
260	* On most systems with virtio, physical addresses match bus addresses,
261	* and it doesn't particularly matter whether we use the DMA API.
262	*
263	* On some systems, including Xen and any system with a physical device
264	* that speaks virtio behind a physical IOMMU, we must use the DMA API
265	* for virtio DMA to work at all.
266	*
267	* On other systems, including SPARC and PPC64, virtio-pci devices are
268	* enumerated as though they are behind an IOMMU, but the virtio host
269	* ignores the IOMMU, so we must either pretend that the IOMMU isn't
270	* there or somehow map everything as the identity.
271	*
272	* For the time being, we preserve historic behavior and bypass the DMA
273	* API.
274	*
275	* TODO: install a per-device DMA ops structure that does the right thing
276	* taking into account all the above quirks, and use the DMA API
277	* unconditionally on data path.
278	*/
279
280	static bool vring_use_dma_api(const struct virtio_device *vdev)
281	{
282	if (!virtio_has_dma_quirk(vdev))
283	return true;
284
285	/ Otherwise, we are left to guess. /
286	/*
287	* In theory, it's possible to have a buggy QEMU-supposed
288	* emulated Q35 IOMMU and Xen enabled at the same time. On
289	* such a configuration, virtio has never worked and will
290	* not work without an even larger kludge. Instead, enable
291	* the DMA API if we're a Xen guest, which at least allows
292	* all of the sensible Xen configurations to work correctly.
293	*/
294	if (xen_domain())
295	return true;
296
297	return false;
298	}
299
300	size_t virtio_max_dma_size(const struct virtio_device *vdev)
301	{
302	size_t max_segment_size = SIZE_MAX;
303
304	if (vring_use_dma_api(vdev))
305	max_segment_size = dma_max_mapping_size(dev: vdev->dev.parent);
306
307	return max_segment_size;
308	}
309	EXPORT_SYMBOL_GPL(virtio_max_dma_size);
310
311	static void vring_alloc_queue(struct* virtio_device *vdev, size_t size,
312	dma_addr_t *dma_handle, gfp_t flag,
313	struct device *dma_dev)
314	{
315	if (vring_use_dma_api(vdev)) {
316	return dma_alloc_coherent(dev: dma_dev, size,
317	dma_handle, gfp: flag);
318	} else {
319	void *queue = alloc_pages_exact(PAGE_ALIGN(size), gfp_mask: flag);
320
321	if (queue) {
322	phys_addr_t phys_addr = virt_to_phys(address: queue);
323	*dma_handle = (dma_addr_t)phys_addr;
324
325	/*
326	* Sanity check: make sure we dind't truncate
327	* the address. The only arches I can find that
328	* have 64-bit phys_addr_t but 32-bit dma_addr_t
329	* are certain non-highmem MIPS and x86
330	* configurations, but these configurations
331	* should never allocate physical pages above 32
332	* bits, so this is fine. Just in case, throw a
333	* warning and abort if we end up with an
334	* unrepresentable address.
335	*/
336	if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
337	free_pages_exact(virt: queue, PAGE_ALIGN(size));
338	return NULL;
339	}
340	}
341	return queue;
342	}
343	}
344
345	static void vring_free_queue(struct virtio_device *vdev, size_t size,
346	void *queue, dma_addr_t dma_handle,
347	struct device *dma_dev)
348	{
349	if (vring_use_dma_api(vdev))
350	dma_free_coherent(dev: dma_dev, size, cpu_addr: queue, dma_handle);
351	else
352	free_pages_exact(virt: queue, PAGE_ALIGN(size));
353	}
354
355	/*
356	* The DMA ops on various arches are rather gnarly right now, and
357	* making all of the arch DMA ops work on the vring device itself
358	* is a mess.
359	*/
360	static struct device vring_dma_dev(const* struct vring_virtqueue *vq)
361	{
362	return vq->dma_dev;
363	}
364
365	/ Map one sg entry. /
366	static int vring_map_one_sg(const struct vring_virtqueue vq, struct* scatterlist *sg,
367	enum dma_data_direction direction, dma_addr_t *addr)
368	{
369	if (vq->premapped) {
370	*addr = sg_dma_address(sg);
371	return `0`;
372	}
373
374	if (!vq->use_dma_api) {
375	/*
376	* If DMA is not used, KMSAN doesn't know that the scatterlist
377	* is initialized by the hardware. Explicitly check/unpoison it
378	* depending on the direction.
379	*/
380	kmsan_handle_dma(page: sg_page(sg), offset: sg->offset, size: sg->length, dir: direction);
381	*addr = (dma_addr_t)sg_phys(sg);
382	return `0`;
383	}
384
385	/*
386	* We can't use dma_map_sg, because we don't use scatterlists in
387	* the way it expects (we don't guarantee that the scatterlist
388	* will exist for the lifetime of the mapping).
389	*/
390	*addr = dma_map_page(vring_dma_dev(vq),
391	sg_page(sg), sg->offset, sg->length,
392	direction);
393
394	if (dma_mapping_error(dev: vring_dma_dev(vq), dma_addr: *addr))
395	return -ENOMEM;
396
397	return `0`;
398	}
399
400	static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
401	void *cpu_addr, size_t size,
402	enum dma_data_direction direction)
403	{
404	if (!vq->use_dma_api)
405	return (dma_addr_t)virt_to_phys(address: cpu_addr);
406
407	return dma_map_single(vring_dma_dev(vq),
408	cpu_addr, size, direction);
409	}
410
411	static int vring_mapping_error(const struct vring_virtqueue *vq,
412	dma_addr_t addr)
413	{
414	if (!vq->use_dma_api)
415	return `0`;
416
417	return dma_mapping_error(dev: vring_dma_dev(vq), dma_addr: addr);
418	}
419
420	static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
421	{
422	vq->vq.num_free = num;
423
424	if (vq->packed_ring)
425	vq->last_used_idx = `0` \| (`1` << VRING_PACKED_EVENT_F_WRAP_CTR);
426	else
427	vq->last_used_idx = `0`;
428
429	vq->event_triggered = false;
430	vq->num_added = `0`;
431
432	#ifdef DEBUG
433	vq->in_use = false;
434	vq->last_add_time_valid = false;
435	#endif
436	}
437
438
439	/*
440	* Split ring specific functions - *_split().
441	*/
442
443	static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
444	const struct vring_desc *desc)
445	{
446	u16 flags;
447
448	if (!vq->do_unmap)
449	return;
450
451	flags = virtio16_to_cpu(vdev: vq->vq.vdev, val: desc->flags);
452
453	dma_unmap_page(vring_dma_dev(vq),
454	virtio64_to_cpu(vq->vq.vdev, desc->addr),
455	virtio32_to_cpu(vq->vq.vdev, desc->len),
456	(flags & VRING_DESC_F_WRITE) ?
457	DMA_FROM_DEVICE : DMA_TO_DEVICE);
458	}
459
460	static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
461	unsigned int i)
462	{
463	struct vring_desc_extra *extra = vq->split.desc_extra;
464	u16 flags;
465
466	flags = extra[i].flags;
467
468	if (flags & VRING_DESC_F_INDIRECT) {
469	if (!vq->use_dma_api)
470	goto out;
471
472	dma_unmap_single(vring_dma_dev(vq),
473	extra[i].addr,
474	extra[i].len,
475	(flags & VRING_DESC_F_WRITE) ?
476	DMA_FROM_DEVICE : DMA_TO_DEVICE);
477	} else {
478	if (!vq->do_unmap)
479	goto out;
480
481	dma_unmap_page(vring_dma_dev(vq),
482	extra[i].addr,
483	extra[i].len,
484	(flags & VRING_DESC_F_WRITE) ?
485	DMA_FROM_DEVICE : DMA_TO_DEVICE);
486	}
487
488	out:
489	return extra[i].next;
490	}
491
492	static struct vring_desc alloc_indirect_split(struct* virtqueue *_vq,
493	unsigned int total_sg,
494	gfp_t gfp)
495	{
496	struct vring_desc *desc;
497	unsigned int i;
498
499	/*
500	* We require lowmem mappings for the descriptors because
501	* otherwise virt_to_phys will give us bogus addresses in the
502	* virtqueue.
503	*/
504	gfp &= ~__GFP_HIGHMEM;
505
506	desc = kmalloc_array(n: total_sg, size: sizeof(struct vring_desc), flags: gfp);
507	if (!desc)
508	return NULL;
509
510	for (i = `0`; i < total_sg; i++)
511	desc[i].next = cpu_to_virtio16(vdev: _vq->vdev, val: i + `1`);
512	return desc;
513	}
514
515	static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
516	struct vring_desc *desc,
517	unsigned int i,
518	dma_addr_t addr,
519	unsigned int len,
520	u16 flags,
521	bool indirect)
522	{
523	struct vring_virtqueue *vring = to_vvq(vq);
524	struct vring_desc_extra *extra = vring->split.desc_extra;
525	u16 next;
526
527	desc[i].flags = cpu_to_virtio16(vdev: vq->vdev, val: flags);
528	desc[i].addr = cpu_to_virtio64(vdev: vq->vdev, val: addr);
529	desc[i].len = cpu_to_virtio32(vdev: vq->vdev, val: len);
530
531	if (!indirect) {
532	next = extra[i].next;
533	desc[i].next = cpu_to_virtio16(vdev: vq->vdev, val: next);
534
535	extra[i].addr = addr;
536	extra[i].len = len;
537	extra[i].flags = flags;
538	} else
539	next = virtio16_to_cpu(vdev: vq->vdev, val: desc[i].next);
540
541	return next;
542	}
543
544	static inline int virtqueue_add_split(struct virtqueue *_vq,
545	struct scatterlist *sgs[],
546	unsigned int total_sg,
547	unsigned int out_sgs,
548	unsigned int in_sgs,
549	void *data,
550	void *ctx,
551	gfp_t gfp)
552	{
553	struct vring_virtqueue *vq = to_vvq(_vq);
554	struct scatterlist *sg;
555	struct vring_desc *desc;
556	unsigned int i, n, avail, descs_used, prev, err_idx;
557	int head;
558	bool indirect;
559
560	START_USE(vq);
561
562	BUG_ON(data == NULL);
563	BUG_ON(ctx && vq->indirect);
564
565	if (unlikely(vq->broken)) {
566	END_USE(vq);
567	return -EIO;
568	}
569
570	LAST_ADD_TIME_UPDATE(vq);
571
572	BUG_ON(total_sg == `0`);
573
574	head = vq->free_head;
575
576	if (virtqueue_use_indirect(vq, total_sg))
577	desc = alloc_indirect_split(_vq, total_sg, gfp);
578	else {
579	desc = NULL;
580	WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
581	}
582
583	if (desc) {
584	/ Use a single buffer which doesn't continue /
585	indirect = true;
586	/ Set up rest to use this indirect table. /
587	i = `0`;
588	descs_used = `1`;
589	} else {
590	indirect = false;
591	desc = vq->split.vring.desc;
592	i = head;
593	descs_used = total_sg;
594	}
595
596	if (unlikely(vq->vq.num_free < descs_used)) {
597	pr_debug("Can't add buf len %i - avail = %i\n",
598	descs_used, vq->vq.num_free);
599	/ FIXME: for historical reasons, we force a notify here if*
600	* there are outgoing parts to the buffer. Presumably the
601	* host should service the ring ASAP. */
602	if (out_sgs)
603	vq->notify(&vq->vq);
604	if (indirect)
605	kfree(objp: desc);
606	END_USE(vq);
607	return -ENOSPC;
608	}
609
610	for (n = `0`; n < out_sgs; n++) {
611	for (sg = sgs[n]; sg; sg = sg_next(sg)) {
612	dma_addr_t addr;
613
614	if (vring_map_one_sg(vq, sg, direction: DMA_TO_DEVICE, addr: &addr))
615	goto unmap_release;
616
617	prev = i;
618	/ Note that we trust indirect descriptor*
619	* table since it use stream DMA mapping.
620	*/
621	i = virtqueue_add_desc_split(vq: _vq, desc, i, addr, len: sg->length,
622	VRING_DESC_F_NEXT,
623	indirect);
624	}
625	}
626	for (; n < (out_sgs + in_sgs); n++) {
627	for (sg = sgs[n]; sg; sg = sg_next(sg)) {
628	dma_addr_t addr;
629
630	if (vring_map_one_sg(vq, sg, direction: DMA_FROM_DEVICE, addr: &addr))
631	goto unmap_release;
632
633	prev = i;
634	/ Note that we trust indirect descriptor*
635	* table since it use stream DMA mapping.
636	*/
637	i = virtqueue_add_desc_split(vq: _vq, desc, i, addr,
638	len: sg->length,
639	VRING_DESC_F_NEXT \|
640	VRING_DESC_F_WRITE,
641	indirect);
642	}
643	}
644	/ Last one doesn't continue. /
645	desc[prev].flags &= cpu_to_virtio16(vdev: _vq->vdev, val: ~VRING_DESC_F_NEXT);
646	if (!indirect && vq->do_unmap)
647	vq->split.desc_extra[prev & (vq->split.vring.num - `1`)].flags &=
648	~VRING_DESC_F_NEXT;
649
650	if (indirect) {
651	/ Now that the indirect table is filled in, map it. /
652	dma_addr_t addr = vring_map_single(
653	vq, cpu_addr: desc, size: total_sg * sizeof(struct vring_desc),
654	direction: DMA_TO_DEVICE);
655	if (vring_mapping_error(vq, addr)) {
656	if (vq->premapped)
657	goto free_indirect;
658
659	goto unmap_release;
660	}
661
662	virtqueue_add_desc_split(vq: _vq, desc: vq->split.vring.desc,
663	i: head, addr,
664	len: total_sg * sizeof(struct vring_desc),
665	VRING_DESC_F_INDIRECT,
666	indirect: false);
667	}
668
669	/ We're using some buffers from the free list. /
670	vq->vq.num_free -= descs_used;
671
672	/ Update free pointer /
673	if (indirect)
674	vq->free_head = vq->split.desc_extra[head].next;
675	else
676	vq->free_head = i;
677
678	/ Store token and indirect buffer state. /
679	vq->split.desc_state[head].data = data;
680	if (indirect)
681	vq->split.desc_state[head].indir_desc = desc;
682	else
683	vq->split.desc_state[head].indir_desc = ctx;
684
685	/ Put entry in available array (but don't update avail->idx until they*
686	* do sync). */
687	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - `1`);
688	vq->split.vring.avail->ring[avail] = cpu_to_virtio16(vdev: _vq->vdev, val: head);
689
690	/ Descriptors and available array need to be set before we expose the*
691	* new available array entries. */
692	virtio_wmb(weak_barriers: vq->weak_barriers);
693	vq->split.avail_idx_shadow++;
694	vq->split.vring.avail->idx = cpu_to_virtio16(vdev: _vq->vdev,
695	val: vq->split.avail_idx_shadow);
696	vq->num_added++;
697
698	pr_debug("Added buffer head %i to %p\n", head, vq);
699	END_USE(vq);
700
701	/ This is very unlikely, but theoretically possible. Kick*
702	* just in case. */
703	if (unlikely(vq->num_added == (`1` << `16`) - `1`))
704	virtqueue_kick(vq: _vq);
705
706	return `0`;
707
708	unmap_release:
709	err_idx = i;
710
711	if (indirect)
712	i = `0`;
713	else
714	i = head;
715
716	for (n = `0`; n < total_sg; n++) {
717	if (i == err_idx)
718	break;
719	if (indirect) {
720	vring_unmap_one_split_indirect(vq, desc: &desc[i]);
721	i = virtio16_to_cpu(vdev: _vq->vdev, val: desc[i].next);
722	} else
723	i = vring_unmap_one_split(vq, i);
724	}
725
726	free_indirect:
727	if (indirect)
728	kfree(objp: desc);
729
730	END_USE(vq);
731	return -ENOMEM;
732	}
733
734	static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
735	{
736	struct vring_virtqueue *vq = to_vvq(_vq);
737	u16 new, old;
738	bool needs_kick;
739
740	START_USE(vq);
741	/ We need to expose available array entries before checking avail*
742	* event. */
743	virtio_mb(weak_barriers: vq->weak_barriers);
744
745	old = vq->split.avail_idx_shadow - vq->num_added;
746	new = vq->split.avail_idx_shadow;
747	vq->num_added = `0`;
748
749	LAST_ADD_TIME_CHECK(vq);
750	LAST_ADD_TIME_INVALID(vq);
751
752	if (vq->event) {
753	needs_kick = vring_need_event(event_idx: virtio16_to_cpu(vdev: _vq->vdev,
754	vring_avail_event(&vq->split.vring)),
755	new_idx: new, old);
756	} else {
757	needs_kick = !(vq->split.vring.used->flags &
758	cpu_to_virtio16(vdev: _vq->vdev,
759	VRING_USED_F_NO_NOTIFY));
760	}
761	END_USE(vq);
762	return needs_kick;
763	}
764
765	static void detach_buf_split(struct vring_virtqueue vq, unsigned* int head,
766	void **ctx)
767	{
768	unsigned int i, j;
769	__virtio16 nextflag = cpu_to_virtio16(vdev: vq->vq.vdev, VRING_DESC_F_NEXT);
770
771	/ Clear data ptr. /
772	vq->split.desc_state[head].data = NULL;
773
774	/ Put back on free list: unmap first-level descriptors and find end /
775	i = head;
776
777	while (vq->split.vring.desc[i].flags & nextflag) {
778	vring_unmap_one_split(vq, i);
779	i = vq->split.desc_extra[i].next;
780	vq->vq.num_free++;
781	}
782
783	vring_unmap_one_split(vq, i);
784	vq->split.desc_extra[i].next = vq->free_head;
785	vq->free_head = head;
786
787	/ Plus final descriptor /
788	vq->vq.num_free++;
789
790	if (vq->indirect) {
791	struct vring_desc *indir_desc =
792	vq->split.desc_state[head].indir_desc;
793	u32 len;
794
795	/ Free the indirect table, if any, now that it's unmapped. /
796	if (!indir_desc)
797	return;
798
799	len = vq->split.desc_extra[head].len;
800
801	BUG_ON(!(vq->split.desc_extra[head].flags &
802	VRING_DESC_F_INDIRECT));
803	BUG_ON(len == `0` \|\| len % sizeof(struct vring_desc));
804
805	if (vq->do_unmap) {
806	for (j = `0`; j < len / sizeof(struct vring_desc); j++)
807	vring_unmap_one_split_indirect(vq, desc: &indir_desc[j]);
808	}
809
810	kfree(objp: indir_desc);
811	vq->split.desc_state[head].indir_desc = NULL;
812	} else if (ctx) {
813	*ctx = vq->split.desc_state[head].indir_desc;
814	}
815	}
816
817	static bool more_used_split(const struct vring_virtqueue *vq)
818	{
819	return vq->last_used_idx != virtio16_to_cpu(vdev: vq->vq.vdev,
820	val: vq->split.vring.used->idx);
821	}
822
823	static void virtqueue_get_buf_ctx_split(struct* virtqueue *_vq,
824	unsigned int *len,
825	void **ctx)
826	{
827	struct vring_virtqueue *vq = to_vvq(_vq);
828	void *ret;
829	unsigned int i;
830	u16 last_used;
831
832	START_USE(vq);
833
834	if (unlikely(vq->broken)) {
835	END_USE(vq);
836	return NULL;
837	}
838
839	if (!more_used_split(vq)) {
840	pr_debug("No more buffers in queue\n");
841	END_USE(vq);
842	return NULL;
843	}
844
845	/ Only get used array entries after they have been exposed by host. /
846	virtio_rmb(weak_barriers: vq->weak_barriers);
847
848	last_used = (vq->last_used_idx & (vq->split.vring.num - `1`));
849	i = virtio32_to_cpu(vdev: _vq->vdev,
850	val: vq->split.vring.used->ring[last_used].id);
851	*len = virtio32_to_cpu(vdev: _vq->vdev,
852	val: vq->split.vring.used->ring[last_used].len);
853
854	if (unlikely(i >= vq->split.vring.num)) {
855	BAD_RING(vq, "id %u out of range\n", i);
856	return NULL;
857	}
858	if (unlikely(!vq->split.desc_state[i].data)) {
859	BAD_RING(vq, "id %u is not a head!\n", i);
860	return NULL;
861	}
862
863	/ detach_buf_split clears data, so grab it now. /
864	ret = vq->split.desc_state[i].data;
865	detach_buf_split(vq, head: i, ctx);
866	vq->last_used_idx++;
867	/ If we expect an interrupt for the next entry, tell host*
868	* by writing event index and flush out the write before
869	* the read in the next get_buf call. */
870	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
871	virtio_store_mb(vq->weak_barriers,
872	&vring_used_event(&vq->split.vring),
873	cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
874
875	LAST_ADD_TIME_INVALID(vq);
876
877	END_USE(vq);
878	return ret;
879	}
880
881	static void virtqueue_disable_cb_split(struct virtqueue *_vq)
882	{
883	struct vring_virtqueue *vq = to_vvq(_vq);
884
885	if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
886	vq->split.avail_flags_shadow \|= VRING_AVAIL_F_NO_INTERRUPT;
887
888	/*
889	* If device triggered an event already it won't trigger one again:
890	* no need to disable.
891	*/
892	if (vq->event_triggered)
893	return;
894
895	if (vq->event)
896	/ TODO: this is a hack. Figure out a cleaner value to write. /
897	vring_used_event(&vq->split.vring) = `0x0`;
898	else
899	vq->split.vring.avail->flags =
900	cpu_to_virtio16(vdev: _vq->vdev,
901	val: vq->split.avail_flags_shadow);
902	}
903	}
904
905	static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
906	{
907	struct vring_virtqueue *vq = to_vvq(_vq);
908	u16 last_used_idx;
909
910	START_USE(vq);
911
912	/ We optimistically turn back on interrupts, then check if there was*
913	* more to do. */
914	/ Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to*
915	* either clear the flags bit or point the event index at the next
916	* entry. Always do both to keep code simple. */
917	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
918	vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
919	if (!vq->event)
920	vq->split.vring.avail->flags =
921	cpu_to_virtio16(vdev: _vq->vdev,
922	val: vq->split.avail_flags_shadow);
923	}
924	vring_used_event(&vq->split.vring) = cpu_to_virtio16(vdev: _vq->vdev,
925	val: last_used_idx = vq->last_used_idx);
926	END_USE(vq);
927	return last_used_idx;
928	}
929
930	static bool virtqueue_poll_split(struct virtqueue _vq, unsigned* int last_used_idx)
931	{
932	struct vring_virtqueue *vq = to_vvq(_vq);
933
934	return (u16)last_used_idx != virtio16_to_cpu(vdev: _vq->vdev,
935	val: vq->split.vring.used->idx);
936	}
937
938	static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
939	{
940	struct vring_virtqueue *vq = to_vvq(_vq);
941	u16 bufs;
942
943	START_USE(vq);
944
945	/ We optimistically turn back on interrupts, then check if there was*
946	* more to do. */
947	/ Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to*
948	* either clear the flags bit or point the event index at the next
949	* entry. Always update the event index to keep code simple. */
950	if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
951	vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
952	if (!vq->event)
953	vq->split.vring.avail->flags =
954	cpu_to_virtio16(vdev: _vq->vdev,
955	val: vq->split.avail_flags_shadow);
956	}
957	/ TODO: tune this threshold /
958	bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * `3` / `4`;
959
960	virtio_store_mb(vq->weak_barriers,
961	&vring_used_event(&vq->split.vring),
962	cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
963
964	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
965	- vq->last_used_idx) > bufs)) {
966	END_USE(vq);
967	return false;
968	}
969
970	END_USE(vq);
971	return true;
972	}
973
974	static void virtqueue_detach_unused_buf_split(struct* virtqueue *_vq)
975	{
976	struct vring_virtqueue *vq = to_vvq(_vq);
977	unsigned int i;
978	void *buf;
979
980	START_USE(vq);
981
982	for (i = `0`; i < vq->split.vring.num; i++) {
983	if (!vq->split.desc_state[i].data)
984	continue;
985	/ detach_buf_split clears data, so grab it now. /
986	buf = vq->split.desc_state[i].data;
987	detach_buf_split(vq, head: i, NULL);
988	vq->split.avail_idx_shadow--;
989	vq->split.vring.avail->idx = cpu_to_virtio16(vdev: _vq->vdev,
990	val: vq->split.avail_idx_shadow);
991	END_USE(vq);
992	return buf;
993	}
994	/ That should have freed everything. /
995	BUG_ON(vq->vq.num_free != vq->split.vring.num);
996
997	END_USE(vq);
998	return NULL;
999	}
1000
1001	static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
1002	struct vring_virtqueue *vq)
1003	{
1004	struct virtio_device *vdev;
1005
1006	vdev = vq->vq.vdev;
1007
1008	vring_split->avail_flags_shadow = `0`;
1009	vring_split->avail_idx_shadow = `0`;
1010
1011	/ No callback? Tell other side not to bother us. /
1012	if (!vq->vq.callback) {
1013	vring_split->avail_flags_shadow \|= VRING_AVAIL_F_NO_INTERRUPT;
1014	if (!vq->event)
1015	vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
1016	val: vring_split->avail_flags_shadow);
1017	}
1018	}
1019
1020	static void virtqueue_reinit_split(struct vring_virtqueue *vq)
1021	{
1022	int num;
1023
1024	num = vq->split.vring.num;
1025
1026	vq->split.vring.avail->flags = `0`;
1027	vq->split.vring.avail->idx = `0`;
1028
1029	/ reset avail event /
1030	vq->split.vring.avail->ring[num] = `0`;
1031
1032	vq->split.vring.used->flags = `0`;
1033	vq->split.vring.used->idx = `0`;
1034
1035	/ reset used event /
1036	(__virtio16 )&(vq->split.vring.used->ring[num]) = `0`;
1037
1038	virtqueue_init(vq, num);
1039
1040	virtqueue_vring_init_split(vring_split: &vq->split, vq);
1041	}
1042
1043	static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
1044	struct vring_virtqueue_split *vring_split)
1045	{
1046	vq->split = *vring_split;
1047
1048	/ Put everything in free lists. /
1049	vq->free_head = `0`;
1050	}
1051
1052	static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
1053	{
1054	struct vring_desc_state_split *state;
1055	struct vring_desc_extra *extra;
1056	u32 num = vring_split->vring.num;
1057
1058	state = kmalloc_array(n: num, size: sizeof(struct vring_desc_state_split), GFP_KERNEL);
1059	if (!state)
1060	goto err_state;
1061
1062	extra = vring_alloc_desc_extra(num);
1063	if (!extra)
1064	goto err_extra;
1065
1066	memset(state, `0`, num * sizeof(struct vring_desc_state_split));
1067
1068	vring_split->desc_state = state;
1069	vring_split->desc_extra = extra;
1070	return `0`;
1071
1072	err_extra:
1073	kfree(objp: state);
1074	err_state:
1075	return -ENOMEM;
1076	}
1077
1078	static void vring_free_split(struct vring_virtqueue_split *vring_split,
1079	struct virtio_device vdev, struct* device *dma_dev)
1080	{
1081	vring_free_queue(vdev, size: vring_split->queue_size_in_bytes,
1082	queue: vring_split->vring.desc,
1083	dma_handle: vring_split->queue_dma_addr,
1084	dma_dev);
1085
1086	kfree(objp: vring_split->desc_state);
1087	kfree(objp: vring_split->desc_extra);
1088	}
1089
1090	static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
1091	struct virtio_device *vdev,
1092	u32 num,
1093	unsigned int vring_align,
1094	bool may_reduce_num,
1095	struct device *dma_dev)
1096	{
1097	void *queue = NULL;
1098	dma_addr_t dma_addr;
1099
1100	/ We assume num is a power of 2. /
1101	if (!is_power_of_2(n: num)) {
1102	dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
1103	return -EINVAL;
1104	}
1105
1106	/ TODO: allocate each queue chunk individually /
1107	for (; num && vring_size(num, align: vring_align) > PAGE_SIZE; num /= `2`) {
1108	queue = vring_alloc_queue(vdev, size: vring_size(num, align: vring_align),
1109	dma_handle: &dma_addr,
1110	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO,
1111	dma_dev);
1112	if (queue)
1113	break;
1114	if (!may_reduce_num)
1115	return -ENOMEM;
1116	}
1117
1118	if (!num)
1119	return -ENOMEM;
1120
1121	if (!queue) {
1122	/ Try to get a single page. You are my only hope! /
1123	queue = vring_alloc_queue(vdev, size: vring_size(num, align: vring_align),
1124	dma_handle: &dma_addr, GFP_KERNEL \| __GFP_ZERO,
1125	dma_dev);
1126	}
1127	if (!queue)
1128	return -ENOMEM;
1129
1130	vring_init(vr: &vring_split->vring, num, p: queue, align: vring_align);
1131
1132	vring_split->queue_dma_addr = dma_addr;
1133	vring_split->queue_size_in_bytes = vring_size(num, align: vring_align);
1134
1135	vring_split->vring_align = vring_align;
1136	vring_split->may_reduce_num = may_reduce_num;
1137
1138	return `0`;
1139	}
1140
1141	static struct virtqueue *vring_create_virtqueue_split(
1142	unsigned int index,
1143	unsigned int num,
1144	unsigned int vring_align,
1145	struct virtio_device *vdev,
1146	bool weak_barriers,
1147	bool may_reduce_num,
1148	bool context,
1149	bool (notify)(struct* virtqueue *),
1150	void (callback)(struct* virtqueue *),
1151	const char *name,
1152	struct device *dma_dev)
1153	{
1154	struct vring_virtqueue_split vring_split = {};
1155	struct virtqueue *vq;
1156	int err;
1157
1158	err = vring_alloc_queue_split(vring_split: &vring_split, vdev, num, vring_align,
1159	may_reduce_num, dma_dev);
1160	if (err)
1161	return NULL;
1162
1163	vq = __vring_new_virtqueue(index, vring_split: &vring_split, vdev, weak_barriers,
1164	context, notify, callback, name, dma_dev);
1165	if (!vq) {
1166	vring_free_split(vring_split: &vring_split, vdev, dma_dev);
1167	return NULL;
1168	}
1169
1170	to_vvq(vq)->we_own_ring = true;
1171
1172	return vq;
1173	}
1174
1175	static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
1176	{
1177	struct vring_virtqueue_split vring_split = {};
1178	struct vring_virtqueue *vq = to_vvq(_vq);
1179	struct virtio_device *vdev = _vq->vdev;
1180	int err;
1181
1182	err = vring_alloc_queue_split(vring_split: &vring_split, vdev, num,
1183	vring_align: vq->split.vring_align,
1184	may_reduce_num: vq->split.may_reduce_num,
1185	dma_dev: vring_dma_dev(vq));
1186	if (err)
1187	goto err;
1188
1189	err = vring_alloc_state_extra_split(vring_split: &vring_split);
1190	if (err)
1191	goto err_state_extra;
1192
1193	vring_free(vq: &vq->vq);
1194
1195	virtqueue_vring_init_split(vring_split: &vring_split, vq);
1196
1197	virtqueue_init(vq, num: vring_split.vring.num);
1198	virtqueue_vring_attach_split(vq, vring_split: &vring_split);
1199
1200	return `0`;
1201
1202	err_state_extra:
1203	vring_free_split(vring_split: &vring_split, vdev, dma_dev: vring_dma_dev(vq));
1204	err:
1205	virtqueue_reinit_split(vq);
1206	return -ENOMEM;
1207	}
1208
1209
1210	/*
1211	* Packed ring specific functions - *_packed().
1212	*/
1213	static bool packed_used_wrap_counter(u16 last_used_idx)
1214	{
1215	return !!(last_used_idx & (`1` << VRING_PACKED_EVENT_F_WRAP_CTR));
1216	}
1217
1218	static u16 packed_last_used(u16 last_used_idx)
1219	{
1220	return last_used_idx & ~(-(`1` << VRING_PACKED_EVENT_F_WRAP_CTR));
1221	}
1222
1223	static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
1224	const struct vring_desc_extra *extra)
1225	{
1226	u16 flags;
1227
1228	flags = extra->flags;
1229
1230	if (flags & VRING_DESC_F_INDIRECT) {
1231	if (!vq->use_dma_api)
1232	return;
1233
1234	dma_unmap_single(vring_dma_dev(vq),
1235	extra->addr, extra->len,
1236	(flags & VRING_DESC_F_WRITE) ?
1237	DMA_FROM_DEVICE : DMA_TO_DEVICE);
1238	} else {
1239	if (!vq->do_unmap)
1240	return;
1241
1242	dma_unmap_page(vring_dma_dev(vq),
1243	extra->addr, extra->len,
1244	(flags & VRING_DESC_F_WRITE) ?
1245	DMA_FROM_DEVICE : DMA_TO_DEVICE);
1246	}
1247	}
1248
1249	static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1250	const struct vring_packed_desc *desc)
1251	{
1252	u16 flags;
1253
1254	if (!vq->do_unmap)
1255	return;
1256
1257	flags = le16_to_cpu(desc->flags);
1258
1259	dma_unmap_page(vring_dma_dev(vq),
1260	le64_to_cpu(desc->addr),
1261	le32_to_cpu(desc->len),
1262	(flags & VRING_DESC_F_WRITE) ?
1263	DMA_FROM_DEVICE : DMA_TO_DEVICE);
1264	}
1265
1266	static struct vring_packed_desc alloc_indirect_packed(unsigned* int total_sg,
1267	gfp_t gfp)
1268	{
1269	struct vring_packed_desc *desc;
1270
1271	/*
1272	* We require lowmem mappings for the descriptors because
1273	* otherwise virt_to_phys will give us bogus addresses in the
1274	* virtqueue.
1275	*/
1276	gfp &= ~__GFP_HIGHMEM;
1277
1278	desc = kmalloc_array(n: total_sg, size: sizeof(struct vring_packed_desc), flags: gfp);
1279
1280	return desc;
1281	}
1282
1283	static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1284	struct scatterlist *sgs[],
1285	unsigned int total_sg,
1286	unsigned int out_sgs,
1287	unsigned int in_sgs,
1288	void *data,
1289	gfp_t gfp)
1290	{
1291	struct vring_packed_desc *desc;
1292	struct scatterlist *sg;
1293	unsigned int i, n, err_idx;
1294	u16 head, id;
1295	dma_addr_t addr;
1296
1297	head = vq->packed.next_avail_idx;
1298	desc = alloc_indirect_packed(total_sg, gfp);
1299	if (!desc)
1300	return -ENOMEM;
1301
1302	if (unlikely(vq->vq.num_free < `1`)) {
1303	pr_debug("Can't add buf len 1 - avail = 0\n");
1304	kfree(objp: desc);
1305	END_USE(vq);
1306	return -ENOSPC;
1307	}
1308
1309	i = `0`;
1310	id = vq->free_head;
1311	BUG_ON(id == vq->packed.vring.num);
1312
1313	for (n = `0`; n < out_sgs + in_sgs; n++) {
1314	for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1315	if (vring_map_one_sg(vq, sg, direction: n < out_sgs ?
1316	DMA_TO_DEVICE : DMA_FROM_DEVICE, addr: &addr))
1317	goto unmap_release;
1318
1319	desc[i].flags = cpu_to_le16(n < out_sgs ?
1320	`0` : VRING_DESC_F_WRITE);
1321	desc[i].addr = cpu_to_le64(addr);
1322	desc[i].len = cpu_to_le32(sg->length);
1323	i++;
1324	}
1325	}
1326
1327	/ Now that the indirect table is filled in, map it. /
1328	addr = vring_map_single(vq, cpu_addr: desc,
1329	size: total_sg * sizeof(struct vring_packed_desc),
1330	direction: DMA_TO_DEVICE);
1331	if (vring_mapping_error(vq, addr)) {
1332	if (vq->premapped)
1333	goto free_desc;
1334
1335	goto unmap_release;
1336	}
1337
1338	vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1339	vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1340	sizeof(struct vring_packed_desc));
1341	vq->packed.vring.desc[head].id = cpu_to_le16(id);
1342
1343	if (vq->do_unmap) {
1344	vq->packed.desc_extra[id].addr = addr;
1345	vq->packed.desc_extra[id].len = total_sg *
1346	sizeof(struct vring_packed_desc);
1347	vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT \|
1348	vq->packed.avail_used_flags;
1349	}
1350
1351	/*
1352	* A driver MUST NOT make the first descriptor in the list
1353	* available before all subsequent descriptors comprising
1354	* the list are made available.
1355	*/
1356	virtio_wmb(weak_barriers: vq->weak_barriers);
1357	vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT \|
1358	vq->packed.avail_used_flags);
1359
1360	/ We're using some buffers from the free list. /
1361	vq->vq.num_free -= `1`;
1362
1363	/ Update free pointer /
1364	n = head + `1`;
1365	if (n >= vq->packed.vring.num) {
1366	n = `0`;
1367	vq->packed.avail_wrap_counter ^= `1`;
1368	vq->packed.avail_used_flags ^=
1369	`1` << VRING_PACKED_DESC_F_AVAIL \|
1370	`1` << VRING_PACKED_DESC_F_USED;
1371	}
1372	vq->packed.next_avail_idx = n;
1373	vq->free_head = vq->packed.desc_extra[id].next;
1374
1375	/ Store token and indirect buffer state. /
1376	vq->packed.desc_state[id].num = `1`;
1377	vq->packed.desc_state[id].data = data;
1378	vq->packed.desc_state[id].indir_desc = desc;
1379	vq->packed.desc_state[id].last = id;
1380
1381	vq->num_added += `1`;
1382
1383	pr_debug("Added buffer head %i to %p\n", head, vq);
1384	END_USE(vq);
1385
1386	return `0`;
1387
1388	unmap_release:
1389	err_idx = i;
1390
1391	for (i = `0`; i < err_idx; i++)
1392	vring_unmap_desc_packed(vq, desc: &desc[i]);
1393
1394	free_desc:
1395	kfree(objp: desc);
1396
1397	END_USE(vq);
1398	return -ENOMEM;
1399	}
1400
1401	static inline int virtqueue_add_packed(struct virtqueue *_vq,
1402	struct scatterlist *sgs[],
1403	unsigned int total_sg,
1404	unsigned int out_sgs,
1405	unsigned int in_sgs,
1406	void *data,
1407	void *ctx,
1408	gfp_t gfp)
1409	{
1410	struct vring_virtqueue *vq = to_vvq(_vq);
1411	struct vring_packed_desc *desc;
1412	struct scatterlist *sg;
1413	unsigned int i, n, c, descs_used, err_idx;
1414	__le16 head_flags, flags;
1415	u16 head, id, prev, curr, avail_used_flags;
1416	int err;
1417
1418	START_USE(vq);
1419
1420	BUG_ON(data == NULL);
1421	BUG_ON(ctx && vq->indirect);
1422
1423	if (unlikely(vq->broken)) {
1424	END_USE(vq);
1425	return -EIO;
1426	}
1427
1428	LAST_ADD_TIME_UPDATE(vq);
1429
1430	BUG_ON(total_sg == `0`);
1431
1432	if (virtqueue_use_indirect(vq, total_sg)) {
1433	err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1434	in_sgs, data, gfp);
1435	if (err != -ENOMEM) {
1436	END_USE(vq);
1437	return err;
1438	}
1439
1440	/ fall back on direct /
1441	}
1442
1443	head = vq->packed.next_avail_idx;
1444	avail_used_flags = vq->packed.avail_used_flags;
1445
1446	WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1447
1448	desc = vq->packed.vring.desc;
1449	i = head;
1450	descs_used = total_sg;
1451
1452	if (unlikely(vq->vq.num_free < descs_used)) {
1453	pr_debug("Can't add buf len %i - avail = %i\n",
1454	descs_used, vq->vq.num_free);
1455	END_USE(vq);
1456	return -ENOSPC;
1457	}
1458
1459	id = vq->free_head;
1460	BUG_ON(id == vq->packed.vring.num);
1461
1462	curr = id;
1463	c = `0`;
1464	for (n = `0`; n < out_sgs + in_sgs; n++) {
1465	for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1466	dma_addr_t addr;
1467
1468	if (vring_map_one_sg(vq, sg, direction: n < out_sgs ?
1469	DMA_TO_DEVICE : DMA_FROM_DEVICE, addr: &addr))
1470	goto unmap_release;
1471
1472	flags = cpu_to_le16(vq->packed.avail_used_flags \|
1473	(++c == total_sg ? `0` : VRING_DESC_F_NEXT) \|
1474	(n < out_sgs ? `0` : VRING_DESC_F_WRITE));
1475	if (i == head)
1476	head_flags = flags;
1477	else
1478	desc[i].flags = flags;
1479
1480	desc[i].addr = cpu_to_le64(addr);
1481	desc[i].len = cpu_to_le32(sg->length);
1482	desc[i].id = cpu_to_le16(id);
1483
1484	if (unlikely(vq->do_unmap)) {
1485	vq->packed.desc_extra[curr].addr = addr;
1486	vq->packed.desc_extra[curr].len = sg->length;
1487	vq->packed.desc_extra[curr].flags =
1488	le16_to_cpu(flags);
1489	}
1490	prev = curr;
1491	curr = vq->packed.desc_extra[curr].next;
1492
1493	if ((unlikely(++i >= vq->packed.vring.num))) {
1494	i = `0`;
1495	vq->packed.avail_used_flags ^=
1496	`1` << VRING_PACKED_DESC_F_AVAIL \|
1497	`1` << VRING_PACKED_DESC_F_USED;
1498	}
1499	}
1500	}
1501
1502	if (i <= head)
1503	vq->packed.avail_wrap_counter ^= `1`;
1504
1505	/ We're using some buffers from the free list. /
1506	vq->vq.num_free -= descs_used;
1507
1508	/ Update free pointer /
1509	vq->packed.next_avail_idx = i;
1510	vq->free_head = curr;
1511
1512	/ Store token. /
1513	vq->packed.desc_state[id].num = descs_used;
1514	vq->packed.desc_state[id].data = data;
1515	vq->packed.desc_state[id].indir_desc = ctx;
1516	vq->packed.desc_state[id].last = prev;
1517
1518	/*
1519	* A driver MUST NOT make the first descriptor in the list
1520	* available before all subsequent descriptors comprising
1521	* the list are made available.
1522	*/
1523	virtio_wmb(weak_barriers: vq->weak_barriers);
1524	vq->packed.vring.desc[head].flags = head_flags;
1525	vq->num_added += descs_used;
1526
1527	pr_debug("Added buffer head %i to %p\n", head, vq);
1528	END_USE(vq);
1529
1530	return `0`;
1531
1532	unmap_release:
1533	err_idx = i;
1534	i = head;
1535	curr = vq->free_head;
1536
1537	vq->packed.avail_used_flags = avail_used_flags;
1538
1539	for (n = `0`; n < total_sg; n++) {
1540	if (i == err_idx)
1541	break;
1542	vring_unmap_extra_packed(vq, extra: &vq->packed.desc_extra[curr]);
1543	curr = vq->packed.desc_extra[curr].next;
1544	i++;
1545	if (i >= vq->packed.vring.num)
1546	i = `0`;
1547	}
1548
1549	END_USE(vq);
1550	return -EIO;
1551	}
1552
1553	static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1554	{
1555	struct vring_virtqueue *vq = to_vvq(_vq);
1556	u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1557	bool needs_kick;
1558	union {
1559	struct {
1560	__le16 off_wrap;
1561	__le16 flags;
1562	};
1563	u32 u32;
1564	} snapshot;
1565
1566	START_USE(vq);
1567
1568	/*
1569	* We need to expose the new flags value before checking notification
1570	* suppressions.
1571	*/
1572	virtio_mb(weak_barriers: vq->weak_barriers);
1573
1574	old = vq->packed.next_avail_idx - vq->num_added;
1575	new = vq->packed.next_avail_idx;
1576	vq->num_added = `0`;
1577
1578	snapshot.u32 = (u32 )vq->packed.vring.device;
1579	flags = le16_to_cpu(snapshot.flags);
1580
1581	LAST_ADD_TIME_CHECK(vq);
1582	LAST_ADD_TIME_INVALID(vq);
1583
1584	if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1585	needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1586	goto out;
1587	}
1588
1589	off_wrap = le16_to_cpu(snapshot.off_wrap);
1590
1591	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1592	event_idx = off_wrap & ~(`1` << VRING_PACKED_EVENT_F_WRAP_CTR);
1593	if (wrap_counter != vq->packed.avail_wrap_counter)
1594	event_idx -= vq->packed.vring.num;
1595
1596	needs_kick = vring_need_event(event_idx, new_idx: new, old);
1597	out:
1598	END_USE(vq);
1599	return needs_kick;
1600	}
1601
1602	static void detach_buf_packed(struct vring_virtqueue *vq,
1603	unsigned int id, void **ctx)
1604	{
1605	struct vring_desc_state_packed *state = NULL;
1606	struct vring_packed_desc *desc;
1607	unsigned int i, curr;
1608
1609	state = &vq->packed.desc_state[id];
1610
1611	/ Clear data ptr. /
1612	state->data = NULL;
1613
1614	vq->packed.desc_extra[state->last].next = vq->free_head;
1615	vq->free_head = id;
1616	vq->vq.num_free += state->num;
1617
1618	if (unlikely(vq->do_unmap)) {
1619	curr = id;
1620	for (i = `0`; i < state->num; i++) {
1621	vring_unmap_extra_packed(vq,
1622	extra: &vq->packed.desc_extra[curr]);
1623	curr = vq->packed.desc_extra[curr].next;
1624	}
1625	}
1626
1627	if (vq->indirect) {
1628	u32 len;
1629
1630	/ Free the indirect table, if any, now that it's unmapped. /
1631	desc = state->indir_desc;
1632	if (!desc)
1633	return;
1634
1635	if (vq->do_unmap) {
1636	len = vq->packed.desc_extra[id].len;
1637	for (i = `0`; i < len / sizeof(struct vring_packed_desc);
1638	i++)
1639	vring_unmap_desc_packed(vq, desc: &desc[i]);
1640	}
1641	kfree(objp: desc);
1642	state->indir_desc = NULL;
1643	} else if (ctx) {
1644	*ctx = state->indir_desc;
1645	}
1646	}
1647
1648	static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1649	u16 idx, bool used_wrap_counter)
1650	{
1651	bool avail, used;
1652	u16 flags;
1653
1654	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1655	avail = !!(flags & (`1` << VRING_PACKED_DESC_F_AVAIL));
1656	used = !!(flags & (`1` << VRING_PACKED_DESC_F_USED));
1657
1658	return avail == used && used == used_wrap_counter;
1659	}
1660
1661	static bool more_used_packed(const struct vring_virtqueue *vq)
1662	{
1663	u16 last_used;
1664	u16 last_used_idx;
1665	bool used_wrap_counter;
1666
1667	last_used_idx = READ_ONCE(vq->last_used_idx);
1668	last_used = packed_last_used(last_used_idx);
1669	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1670	return is_used_desc_packed(vq, idx: last_used, used_wrap_counter);
1671	}
1672
1673	static void virtqueue_get_buf_ctx_packed(struct* virtqueue *_vq,
1674	unsigned int *len,
1675	void **ctx)
1676	{
1677	struct vring_virtqueue *vq = to_vvq(_vq);
1678	u16 last_used, id, last_used_idx;
1679	bool used_wrap_counter;
1680	void *ret;
1681
1682	START_USE(vq);
1683
1684	if (unlikely(vq->broken)) {
1685	END_USE(vq);
1686	return NULL;
1687	}
1688
1689	if (!more_used_packed(vq)) {
1690	pr_debug("No more buffers in queue\n");
1691	END_USE(vq);
1692	return NULL;
1693	}
1694
1695	/ Only get used elements after they have been exposed by host. /
1696	virtio_rmb(weak_barriers: vq->weak_barriers);
1697
1698	last_used_idx = READ_ONCE(vq->last_used_idx);
1699	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1700	last_used = packed_last_used(last_used_idx);
1701	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1702	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1703
1704	if (unlikely(id >= vq->packed.vring.num)) {
1705	BAD_RING(vq, "id %u out of range\n", id);
1706	return NULL;
1707	}
1708	if (unlikely(!vq->packed.desc_state[id].data)) {
1709	BAD_RING(vq, "id %u is not a head!\n", id);
1710	return NULL;
1711	}
1712
1713	/ detach_buf_packed clears data, so grab it now. /
1714	ret = vq->packed.desc_state[id].data;
1715	detach_buf_packed(vq, id, ctx);
1716
1717	last_used += vq->packed.desc_state[id].num;
1718	if (unlikely(last_used >= vq->packed.vring.num)) {
1719	last_used -= vq->packed.vring.num;
1720	used_wrap_counter ^= `1`;
1721	}
1722
1723	last_used = (last_used \| (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1724	WRITE_ONCE(vq->last_used_idx, last_used);
1725
1726	/*
1727	* If we expect an interrupt for the next entry, tell host
1728	* by writing event index and flush out the write before
1729	* the read in the next get_buf call.
1730	*/
1731	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1732	virtio_store_mb(vq->weak_barriers,
1733	&vq->packed.vring.driver->off_wrap,
1734	cpu_to_le16(vq->last_used_idx));
1735
1736	LAST_ADD_TIME_INVALID(vq);
1737
1738	END_USE(vq);
1739	return ret;
1740	}
1741
1742	static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1743	{
1744	struct vring_virtqueue *vq = to_vvq(_vq);
1745
1746	if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1747	vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1748
1749	/*
1750	* If device triggered an event already it won't trigger one again:
1751	* no need to disable.
1752	*/
1753	if (vq->event_triggered)
1754	return;
1755
1756	vq->packed.vring.driver->flags =
1757	cpu_to_le16(vq->packed.event_flags_shadow);
1758	}
1759	}
1760
1761	static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1762	{
1763	struct vring_virtqueue *vq = to_vvq(_vq);
1764
1765	START_USE(vq);
1766
1767	/*
1768	* We optimistically turn back on interrupts, then check if there was
1769	* more to do.
1770	*/
1771
1772	if (vq->event) {
1773	vq->packed.vring.driver->off_wrap =
1774	cpu_to_le16(vq->last_used_idx);
1775	/*
1776	* We need to update event offset and event wrap
1777	* counter first before updating event flags.
1778	*/
1779	virtio_wmb(weak_barriers: vq->weak_barriers);
1780	}
1781
1782	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1783	vq->packed.event_flags_shadow = vq->event ?
1784	VRING_PACKED_EVENT_FLAG_DESC :
1785	VRING_PACKED_EVENT_FLAG_ENABLE;
1786	vq->packed.vring.driver->flags =
1787	cpu_to_le16(vq->packed.event_flags_shadow);
1788	}
1789
1790	END_USE(vq);
1791	return vq->last_used_idx;
1792	}
1793
1794	static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1795	{
1796	struct vring_virtqueue *vq = to_vvq(_vq);
1797	bool wrap_counter;
1798	u16 used_idx;
1799
1800	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1801	used_idx = off_wrap & ~(`1` << VRING_PACKED_EVENT_F_WRAP_CTR);
1802
1803	return is_used_desc_packed(vq, idx: used_idx, used_wrap_counter: wrap_counter);
1804	}
1805
1806	static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1807	{
1808	struct vring_virtqueue *vq = to_vvq(_vq);
1809	u16 used_idx, wrap_counter, last_used_idx;
1810	u16 bufs;
1811
1812	START_USE(vq);
1813
1814	/*
1815	* We optimistically turn back on interrupts, then check if there was
1816	* more to do.
1817	*/
1818
1819	if (vq->event) {
1820	/ TODO: tune this threshold /
1821	bufs = (vq->packed.vring.num - vq->vq.num_free) * `3` / `4`;
1822	last_used_idx = READ_ONCE(vq->last_used_idx);
1823	wrap_counter = packed_used_wrap_counter(last_used_idx);
1824
1825	used_idx = packed_last_used(last_used_idx) + bufs;
1826	if (used_idx >= vq->packed.vring.num) {
1827	used_idx -= vq->packed.vring.num;
1828	wrap_counter ^= `1`;
1829	}
1830
1831	vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx \|
1832	(wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1833
1834	/*
1835	* We need to update event offset and event wrap
1836	* counter first before updating event flags.
1837	*/
1838	virtio_wmb(weak_barriers: vq->weak_barriers);
1839	}
1840
1841	if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1842	vq->packed.event_flags_shadow = vq->event ?
1843	VRING_PACKED_EVENT_FLAG_DESC :
1844	VRING_PACKED_EVENT_FLAG_ENABLE;
1845	vq->packed.vring.driver->flags =
1846	cpu_to_le16(vq->packed.event_flags_shadow);
1847	}
1848
1849	/*
1850	* We need to update event suppression structure first
1851	* before re-checking for more used buffers.
1852	*/
1853	virtio_mb(weak_barriers: vq->weak_barriers);
1854
1855	last_used_idx = READ_ONCE(vq->last_used_idx);
1856	wrap_counter = packed_used_wrap_counter(last_used_idx);
1857	used_idx = packed_last_used(last_used_idx);
1858	if (is_used_desc_packed(vq, idx: used_idx, used_wrap_counter: wrap_counter)) {
1859	END_USE(vq);
1860	return false;
1861	}
1862
1863	END_USE(vq);
1864	return true;
1865	}
1866
1867	static void virtqueue_detach_unused_buf_packed(struct* virtqueue *_vq)
1868	{
1869	struct vring_virtqueue *vq = to_vvq(_vq);
1870	unsigned int i;
1871	void *buf;
1872
1873	START_USE(vq);
1874
1875	for (i = `0`; i < vq->packed.vring.num; i++) {
1876	if (!vq->packed.desc_state[i].data)
1877	continue;
1878	/ detach_buf clears data, so grab it now. /
1879	buf = vq->packed.desc_state[i].data;
1880	detach_buf_packed(vq, id: i, NULL);
1881	END_USE(vq);
1882	return buf;
1883	}
1884	/ That should have freed everything. /
1885	BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1886
1887	END_USE(vq);
1888	return NULL;
1889	}
1890
1891	static struct vring_desc_extra vring_alloc_desc_extra(unsigned* int num)
1892	{
1893	struct vring_desc_extra *desc_extra;
1894	unsigned int i;
1895
1896	desc_extra = kmalloc_array(n: num, size: sizeof(struct vring_desc_extra),
1897	GFP_KERNEL);
1898	if (!desc_extra)
1899	return NULL;
1900
1901	memset(desc_extra, `0`, num * sizeof(struct vring_desc_extra));
1902
1903	for (i = `0`; i < num - `1`; i++)
1904	desc_extra[i].next = i + `1`;
1905
1906	return desc_extra;
1907	}
1908
1909	static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
1910	struct virtio_device *vdev,
1911	struct device *dma_dev)
1912	{
1913	if (vring_packed->vring.desc)
1914	vring_free_queue(vdev, size: vring_packed->ring_size_in_bytes,
1915	queue: vring_packed->vring.desc,
1916	dma_handle: vring_packed->ring_dma_addr,
1917	dma_dev);
1918
1919	if (vring_packed->vring.driver)
1920	vring_free_queue(vdev, size: vring_packed->event_size_in_bytes,
1921	queue: vring_packed->vring.driver,
1922	dma_handle: vring_packed->driver_event_dma_addr,
1923	dma_dev);
1924
1925	if (vring_packed->vring.device)
1926	vring_free_queue(vdev, size: vring_packed->event_size_in_bytes,
1927	queue: vring_packed->vring.device,
1928	dma_handle: vring_packed->device_event_dma_addr,
1929	dma_dev);
1930
1931	kfree(objp: vring_packed->desc_state);
1932	kfree(objp: vring_packed->desc_extra);
1933	}
1934
1935	static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
1936	struct virtio_device *vdev,
1937	u32 num, struct device *dma_dev)
1938	{
1939	struct vring_packed_desc *ring;
1940	struct vring_packed_desc_event driver, device;
1941	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1942	size_t ring_size_in_bytes, event_size_in_bytes;
1943
1944	ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1945
1946	ring = vring_alloc_queue(vdev, size: ring_size_in_bytes,
1947	dma_handle: &ring_dma_addr,
1948	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO,
1949	dma_dev);
1950	if (!ring)
1951	goto err;
1952
1953	vring_packed->vring.desc = ring;
1954	vring_packed->ring_dma_addr = ring_dma_addr;
1955	vring_packed->ring_size_in_bytes = ring_size_in_bytes;
1956
1957	event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1958
1959	driver = vring_alloc_queue(vdev, size: event_size_in_bytes,
1960	dma_handle: &driver_event_dma_addr,
1961	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO,
1962	dma_dev);
1963	if (!driver)
1964	goto err;
1965
1966	vring_packed->vring.driver = driver;
1967	vring_packed->event_size_in_bytes = event_size_in_bytes;
1968	vring_packed->driver_event_dma_addr = driver_event_dma_addr;
1969
1970	device = vring_alloc_queue(vdev, size: event_size_in_bytes,
1971	dma_handle: &device_event_dma_addr,
1972	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO,
1973	dma_dev);
1974	if (!device)
1975	goto err;
1976
1977	vring_packed->vring.device = device;
1978	vring_packed->device_event_dma_addr = device_event_dma_addr;
1979
1980	vring_packed->vring.num = num;
1981
1982	return `0`;
1983
1984	err:
1985	vring_free_packed(vring_packed, vdev, dma_dev);
1986	return -ENOMEM;
1987	}
1988
1989	static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
1990	{
1991	struct vring_desc_state_packed *state;
1992	struct vring_desc_extra *extra;
1993	u32 num = vring_packed->vring.num;
1994
1995	state = kmalloc_array(n: num, size: sizeof(struct vring_desc_state_packed), GFP_KERNEL);
1996	if (!state)
1997	goto err_desc_state;
1998
1999	memset(state, `0`, num * sizeof(struct vring_desc_state_packed));
2000
2001	extra = vring_alloc_desc_extra(num);
2002	if (!extra)
2003	goto err_desc_extra;
2004
2005	vring_packed->desc_state = state;
2006	vring_packed->desc_extra = extra;
2007
2008	return `0`;
2009
2010	err_desc_extra:
2011	kfree(objp: state);
2012	err_desc_state:
2013	return -ENOMEM;
2014	}
2015
2016	static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
2017	bool callback)
2018	{
2019	vring_packed->next_avail_idx = `0`;
2020	vring_packed->avail_wrap_counter = `1`;
2021	vring_packed->event_flags_shadow = `0`;
2022	vring_packed->avail_used_flags = `1` << VRING_PACKED_DESC_F_AVAIL;
2023
2024	/ No callback? Tell other side not to bother us. /
2025	if (!callback) {
2026	vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
2027	vring_packed->vring.driver->flags =
2028	cpu_to_le16(vring_packed->event_flags_shadow);
2029	}
2030	}
2031
2032	static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
2033	struct vring_virtqueue_packed *vring_packed)
2034	{
2035	vq->packed = *vring_packed;
2036
2037	/ Put everything in free lists. /
2038	vq->free_head = `0`;
2039	}
2040
2041	static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
2042	{
2043	memset(vq->packed.vring.device, `0`, vq->packed.event_size_in_bytes);
2044	memset(vq->packed.vring.driver, `0`, vq->packed.event_size_in_bytes);
2045
2046	/ we need to reset the desc.flags. For more, see is_used_desc_packed() /
2047	memset(vq->packed.vring.desc, `0`, vq->packed.ring_size_in_bytes);
2048
2049	virtqueue_init(vq, num: vq->packed.vring.num);
2050	virtqueue_vring_init_packed(vring_packed: &vq->packed, callback: !!vq->vq.callback);
2051	}
2052
2053	static struct virtqueue *vring_create_virtqueue_packed(
2054	unsigned int index,
2055	unsigned int num,
2056	unsigned int vring_align,
2057	struct virtio_device *vdev,
2058	bool weak_barriers,
2059	bool may_reduce_num,
2060	bool context,
2061	bool (notify)(struct* virtqueue *),
2062	void (callback)(struct* virtqueue *),
2063	const char *name,
2064	struct device *dma_dev)
2065	{
2066	struct vring_virtqueue_packed vring_packed = {};
2067	struct vring_virtqueue *vq;
2068	int err;
2069
2070	if (vring_alloc_queue_packed(vring_packed: &vring_packed, vdev, num, dma_dev))
2071	goto err_ring;
2072
2073	vq = kmalloc(size: sizeof(*vq), GFP_KERNEL);
2074	if (!vq)
2075	goto err_vq;
2076
2077	vq->vq.callback = callback;
2078	vq->vq.vdev = vdev;
2079	vq->vq.name = name;
2080	vq->vq.index = index;
2081	vq->vq.reset = false;
2082	vq->we_own_ring = true;
2083	vq->notify = notify;
2084	vq->weak_barriers = weak_barriers;
2085	#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2086	vq->broken = true;
2087	#else
2088	vq->broken = false;
2089	#endif
2090	vq->packed_ring = true;
2091	vq->dma_dev = dma_dev;
2092	vq->use_dma_api = vring_use_dma_api(vdev);
2093	vq->premapped = false;
2094	vq->do_unmap = vq->use_dma_api;
2095
2096	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2097	!context;
2098	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2099
2100	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2101	vq->weak_barriers = false;
2102
2103	err = vring_alloc_state_extra_packed(vring_packed: &vring_packed);
2104	if (err)
2105	goto err_state_extra;
2106
2107	virtqueue_vring_init_packed(vring_packed: &vring_packed, callback: !!callback);
2108
2109	virtqueue_init(vq, num);
2110	virtqueue_vring_attach_packed(vq, vring_packed: &vring_packed);
2111
2112	spin_lock(lock: &vdev->vqs_list_lock);
2113	list_add_tail(new: &vq->vq.list, head: &vdev->vqs);
2114	spin_unlock(lock: &vdev->vqs_list_lock);
2115	return &vq->vq;
2116
2117	err_state_extra:
2118	kfree(objp: vq);
2119	err_vq:
2120	vring_free_packed(vring_packed: &vring_packed, vdev, dma_dev);
2121	err_ring:
2122	return NULL;
2123	}
2124
2125	static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
2126	{
2127	struct vring_virtqueue_packed vring_packed = {};
2128	struct vring_virtqueue *vq = to_vvq(_vq);
2129	struct virtio_device *vdev = _vq->vdev;
2130	int err;
2131
2132	if (vring_alloc_queue_packed(vring_packed: &vring_packed, vdev, num, dma_dev: vring_dma_dev(vq)))
2133	goto err_ring;
2134
2135	err = vring_alloc_state_extra_packed(vring_packed: &vring_packed);
2136	if (err)
2137	goto err_state_extra;
2138
2139	vring_free(vq: &vq->vq);
2140
2141	virtqueue_vring_init_packed(vring_packed: &vring_packed, callback: !!vq->vq.callback);
2142
2143	virtqueue_init(vq, num: vring_packed.vring.num);
2144	virtqueue_vring_attach_packed(vq, vring_packed: &vring_packed);
2145
2146	return `0`;
2147
2148	err_state_extra:
2149	vring_free_packed(vring_packed: &vring_packed, vdev, dma_dev: vring_dma_dev(vq));
2150	err_ring:
2151	virtqueue_reinit_packed(vq);
2152	return -ENOMEM;
2153	}
2154
2155	static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
2156	void (recycle)(struct* virtqueue vq, void* *buf))
2157	{
2158	struct vring_virtqueue *vq = to_vvq(_vq);
2159	struct virtio_device *vdev = vq->vq.vdev;
2160	void *buf;
2161	int err;
2162
2163	if (!vq->we_own_ring)
2164	return -EPERM;
2165
2166	if (!vdev->config->disable_vq_and_reset)
2167	return -ENOENT;
2168
2169	if (!vdev->config->enable_vq_after_reset)
2170	return -ENOENT;
2171
2172	err = vdev->config->disable_vq_and_reset(_vq);
2173	if (err)
2174	return err;
2175
2176	while ((buf = virtqueue_detach_unused_buf(vq: _vq)) != NULL)
2177	recycle(_vq, buf);
2178
2179	return `0`;
2180	}
2181
2182	static int virtqueue_enable_after_reset(struct virtqueue *_vq)
2183	{
2184	struct vring_virtqueue *vq = to_vvq(_vq);
2185	struct virtio_device *vdev = vq->vq.vdev;
2186
2187	if (vdev->config->enable_vq_after_reset(_vq))
2188	return -EBUSY;
2189
2190	return `0`;
2191	}
2192
2193	/*
2194	* Generic functions and exported symbols.
2195	*/
2196
2197	static inline int virtqueue_add(struct virtqueue *_vq,
2198	struct scatterlist *sgs[],
2199	unsigned int total_sg,
2200	unsigned int out_sgs,
2201	unsigned int in_sgs,
2202	void *data,
2203	void *ctx,
2204	gfp_t gfp)
2205	{
2206	struct vring_virtqueue *vq = to_vvq(_vq);
2207
2208	return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
2209	out_sgs, in_sgs, data, ctx, gfp) :
2210	virtqueue_add_split(_vq, sgs, total_sg,
2211	out_sgs, in_sgs, data, ctx, gfp);
2212	}
2213
2214	/**
2215	* virtqueue_add_sgs - expose buffers to other end
2216	* @_vq: the struct virtqueue we're talking about.
2217	* @sgs: array of terminated scatterlists.
2218	* @out_sgs: the number of scatterlists readable by other side
2219	* @in_sgs: the number of scatterlists which are writable (after readable ones)
2220	* @data: the token identifying the buffer.
2221	* @gfp: how to do memory allocations (if necessary).
2222	*
2223	* Caller must ensure we don't call this with other virtqueue operations
2224	* at the same time (except where noted).
2225	*
2226	* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2227	*/
2228	int virtqueue_add_sgs(struct virtqueue *_vq,
2229	struct scatterlist *sgs[],
2230	unsigned int out_sgs,
2231	unsigned int in_sgs,
2232	void *data,
2233	gfp_t gfp)
2234	{
2235	unsigned int i, total_sg = `0`;
2236
2237	/ Count them first. /
2238	for (i = `0`; i < out_sgs + in_sgs; i++) {
2239	struct scatterlist *sg;
2240
2241	for (sg = sgs[i]; sg; sg = sg_next(sg))
2242	total_sg++;
2243	}
2244	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
2245	data, NULL, gfp);
2246	}
2247	EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
2248
2249	/**
2250	* virtqueue_add_outbuf - expose output buffers to other end
2251	* @vq: the struct virtqueue we're talking about.
2252	* @sg: scatterlist (must be well-formed and terminated!)
2253	* @num: the number of entries in @sg readable by other side
2254	* @data: the token identifying the buffer.
2255	* @gfp: how to do memory allocations (if necessary).
2256	*
2257	* Caller must ensure we don't call this with other virtqueue operations
2258	* at the same time (except where noted).
2259	*
2260	* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2261	*/
2262	int virtqueue_add_outbuf(struct virtqueue *vq,
2263	struct scatterlist sg, unsigned* int num,
2264	void *data,
2265	gfp_t gfp)
2266	{
2267	return virtqueue_add(vq: vq, sgs: &sg, total_sg: num, out_sgs: `1`, in_sgs: `0`, data, NULL, gfp);
2268	}
2269	EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
2270
2271	/**
2272	* virtqueue_add_inbuf - expose input buffers to other end
2273	* @vq: the struct virtqueue we're talking about.
2274	* @sg: scatterlist (must be well-formed and terminated!)
2275	* @num: the number of entries in @sg writable by other side
2276	* @data: the token identifying the buffer.
2277	* @gfp: how to do memory allocations (if necessary).
2278	*
2279	* Caller must ensure we don't call this with other virtqueue operations
2280	* at the same time (except where noted).
2281	*
2282	* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2283	*/
2284	int virtqueue_add_inbuf(struct virtqueue *vq,
2285	struct scatterlist sg, unsigned* int num,
2286	void *data,
2287	gfp_t gfp)
2288	{
2289	return virtqueue_add(vq: vq, sgs: &sg, total_sg: num, out_sgs: `0`, in_sgs: `1`, data, NULL, gfp);
2290	}
2291	EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
2292
2293	/**
2294	* virtqueue_add_inbuf_ctx - expose input buffers to other end
2295	* @vq: the struct virtqueue we're talking about.
2296	* @sg: scatterlist (must be well-formed and terminated!)
2297	* @num: the number of entries in @sg writable by other side
2298	* @data: the token identifying the buffer.
2299	* @ctx: extra context for the token
2300	* @gfp: how to do memory allocations (if necessary).
2301	*
2302	* Caller must ensure we don't call this with other virtqueue operations
2303	* at the same time (except where noted).
2304	*
2305	* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
2306	*/
2307	int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
2308	struct scatterlist sg, unsigned* int num,
2309	void *data,
2310	void *ctx,
2311	gfp_t gfp)
2312	{
2313	return virtqueue_add(vq: vq, sgs: &sg, total_sg: num, out_sgs: `0`, in_sgs: `1`, data, ctx, gfp);
2314	}
2315	EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
2316
2317	/**
2318	* virtqueue_dma_dev - get the dma dev
2319	* @_vq: the struct virtqueue we're talking about.
2320	*
2321	* Returns the dma dev. That can been used for dma api.
2322	*/
2323	struct device virtqueue_dma_dev(struct* virtqueue *_vq)
2324	{
2325	struct vring_virtqueue *vq = to_vvq(_vq);
2326
2327	if (vq->use_dma_api)
2328	return vring_dma_dev(vq);
2329	else
2330	return NULL;
2331	}
2332	EXPORT_SYMBOL_GPL(virtqueue_dma_dev);
2333
2334	/**
2335	* virtqueue_kick_prepare - first half of split virtqueue_kick call.
2336	* @_vq: the struct virtqueue
2337	*
2338	* Instead of virtqueue_kick(), you can do:
2339	* if (virtqueue_kick_prepare(vq))
2340	* virtqueue_notify(vq);
2341	*
2342	* This is sometimes useful because the virtqueue_kick_prepare() needs
2343	* to be serialized, but the actual virtqueue_notify() call does not.
2344	*/
2345	bool virtqueue_kick_prepare(struct virtqueue *_vq)
2346	{
2347	struct vring_virtqueue *vq = to_vvq(_vq);
2348
2349	return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
2350	virtqueue_kick_prepare_split(_vq);
2351	}
2352	EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
2353
2354	/**
2355	* virtqueue_notify - second half of split virtqueue_kick call.
2356	* @_vq: the struct virtqueue
2357	*
2358	* This does not need to be serialized.
2359	*
2360	* Returns false if host notify failed or queue is broken, otherwise true.
2361	*/
2362	bool virtqueue_notify(struct virtqueue *_vq)
2363	{
2364	struct vring_virtqueue *vq = to_vvq(_vq);
2365
2366	if (unlikely(vq->broken))
2367	return false;
2368
2369	/ Prod other side to tell it about changes. /
2370	if (!vq->notify(_vq)) {
2371	vq->broken = true;
2372	return false;
2373	}
2374	return true;
2375	}
2376	EXPORT_SYMBOL_GPL(virtqueue_notify);
2377
2378	/**
2379	* virtqueue_kick - update after add_buf
2380	* @vq: the struct virtqueue
2381	*
2382	* After one or more virtqueue_add_* calls, invoke this to kick
2383	* the other side.
2384	*
2385	* Caller must ensure we don't call this with other virtqueue
2386	* operations at the same time (except where noted).
2387	*
2388	* Returns false if kick failed, otherwise true.
2389	*/
2390	bool virtqueue_kick(struct virtqueue *vq)
2391	{
2392	if (virtqueue_kick_prepare(vq))
2393	return virtqueue_notify(vq);
2394	return true;
2395	}
2396	EXPORT_SYMBOL_GPL(virtqueue_kick);
2397
2398	/**
2399	* virtqueue_get_buf_ctx - get the next used buffer
2400	* @_vq: the struct virtqueue we're talking about.
2401	* @len: the length written into the buffer
2402	* @ctx: extra context for the token
2403	*
2404	* If the device wrote data into the buffer, @len will be set to the
2405	* amount written. This means you don't need to clear the buffer
2406	* beforehand to ensure there's no data leakage in the case of short
2407	* writes.
2408	*
2409	* Caller must ensure we don't call this with other virtqueue
2410	* operations at the same time (except where noted).
2411	*
2412	* Returns NULL if there are no used buffers, or the "data" token
2413	* handed to virtqueue_add_*().
2414	*/
2415	void virtqueue_get_buf_ctx(struct* virtqueue _vq, unsigned* int *len,
2416	void **ctx)
2417	{
2418	struct vring_virtqueue *vq = to_vvq(_vq);
2419
2420	return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2421	virtqueue_get_buf_ctx_split(_vq, len, ctx);
2422	}
2423	EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2424
2425	void virtqueue_get_buf(struct* virtqueue _vq, unsigned* int *len)
2426	{
2427	return virtqueue_get_buf_ctx(_vq, len, NULL);
2428	}
2429	EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2430	/**
2431	* virtqueue_disable_cb - disable callbacks
2432	* @_vq: the struct virtqueue we're talking about.
2433	*
2434	* Note that this is not necessarily synchronous, hence unreliable and only
2435	* useful as an optimization.
2436	*
2437	* Unlike other operations, this need not be serialized.
2438	*/
2439	void virtqueue_disable_cb(struct virtqueue *_vq)
2440	{
2441	struct vring_virtqueue *vq = to_vvq(_vq);
2442
2443	if (vq->packed_ring)
2444	virtqueue_disable_cb_packed(_vq);
2445	else
2446	virtqueue_disable_cb_split(_vq);
2447	}
2448	EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2449
2450	/**
2451	* virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2452	* @_vq: the struct virtqueue we're talking about.
2453	*
2454	* This re-enables callbacks; it returns current queue state
2455	* in an opaque unsigned value. This value should be later tested by
2456	* virtqueue_poll, to detect a possible race between the driver checking for
2457	* more work, and enabling callbacks.
2458	*
2459	* Caller must ensure we don't call this with other virtqueue
2460	* operations at the same time (except where noted).
2461	*/
2462	unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2463	{
2464	struct vring_virtqueue *vq = to_vvq(_vq);
2465
2466	if (vq->event_triggered)
2467	vq->event_triggered = false;
2468
2469	return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2470	virtqueue_enable_cb_prepare_split(_vq);
2471	}
2472	EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2473
2474	/**
2475	* virtqueue_poll - query pending used buffers
2476	* @_vq: the struct virtqueue we're talking about.
2477	* @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2478	*
2479	* Returns "true" if there are pending used buffers in the queue.
2480	*
2481	* This does not need to be serialized.
2482	*/
2483	bool virtqueue_poll(struct virtqueue _vq, unsigned* int last_used_idx)
2484	{
2485	struct vring_virtqueue *vq = to_vvq(_vq);
2486
2487	if (unlikely(vq->broken))
2488	return false;
2489
2490	virtio_mb(weak_barriers: vq->weak_barriers);
2491	return vq->packed_ring ? virtqueue_poll_packed(_vq, off_wrap: last_used_idx) :
2492	virtqueue_poll_split(_vq, last_used_idx);
2493	}
2494	EXPORT_SYMBOL_GPL(virtqueue_poll);
2495
2496	/**
2497	* virtqueue_enable_cb - restart callbacks after disable_cb.
2498	* @_vq: the struct virtqueue we're talking about.
2499	*
2500	* This re-enables callbacks; it returns "false" if there are pending
2501	* buffers in the queue, to detect a possible race between the driver
2502	* checking for more work, and enabling callbacks.
2503	*
2504	* Caller must ensure we don't call this with other virtqueue
2505	* operations at the same time (except where noted).
2506	*/
2507	bool virtqueue_enable_cb(struct virtqueue *_vq)
2508	{
2509	unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2510
2511	return !virtqueue_poll(_vq, last_used_idx);
2512	}
2513	EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2514
2515	/**
2516	* virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2517	* @_vq: the struct virtqueue we're talking about.
2518	*
2519	* This re-enables callbacks but hints to the other side to delay
2520	* interrupts until most of the available buffers have been processed;
2521	* it returns "false" if there are many pending buffers in the queue,
2522	* to detect a possible race between the driver checking for more work,
2523	* and enabling callbacks.
2524	*
2525	* Caller must ensure we don't call this with other virtqueue
2526	* operations at the same time (except where noted).
2527	*/
2528	bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2529	{
2530	struct vring_virtqueue *vq = to_vvq(_vq);
2531
2532	if (vq->event_triggered)
2533	vq->event_triggered = false;
2534
2535	return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2536	virtqueue_enable_cb_delayed_split(_vq);
2537	}
2538	EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2539
2540	/**
2541	* virtqueue_detach_unused_buf - detach first unused buffer
2542	* @_vq: the struct virtqueue we're talking about.
2543	*
2544	* Returns NULL or the "data" token handed to virtqueue_add_*().
2545	* This is not valid on an active queue; it is useful for device
2546	* shutdown or the reset queue.
2547	*/
2548	void virtqueue_detach_unused_buf(struct* virtqueue *_vq)
2549	{
2550	struct vring_virtqueue *vq = to_vvq(_vq);
2551
2552	return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2553	virtqueue_detach_unused_buf_split(_vq);
2554	}
2555	EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2556
2557	static inline bool more_used(const struct vring_virtqueue *vq)
2558	{
2559	return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2560	}
2561
2562	/**
2563	* vring_interrupt - notify a virtqueue on an interrupt
2564	* @irq: the IRQ number (ignored)
2565	* @_vq: the struct virtqueue to notify
2566	*
2567	* Calls the callback function of @_vq to process the virtqueue
2568	* notification.
2569	*/
2570	irqreturn_t vring_interrupt(int irq, void *_vq)
2571	{
2572	struct vring_virtqueue *vq = to_vvq(_vq);
2573
2574	if (!more_used(vq)) {
2575	pr_debug("virtqueue interrupt with no work for %p\n", vq);
2576	return IRQ_NONE;
2577	}
2578
2579	if (unlikely(vq->broken)) {
2580	#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2581	dev_warn_once(&vq->vq.vdev->dev,
2582	"virtio vring IRQ raised before DRIVER_OK");
2583	return IRQ_NONE;
2584	#else
2585	return IRQ_HANDLED;
2586	#endif
2587	}
2588
2589	/ Just a hint for performance: so it's ok that this can be racy! /
2590	if (vq->event)
2591	vq->event_triggered = true;
2592
2593	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2594	if (vq->vq.callback)
2595	vq->vq.callback(&vq->vq);
2596
2597	return IRQ_HANDLED;
2598	}
2599	EXPORT_SYMBOL_GPL(vring_interrupt);
2600
2601	/ Only available for split ring /
2602	static struct virtqueue __vring_new_virtqueue(unsigned* int index,
2603	struct vring_virtqueue_split *vring_split,
2604	struct virtio_device *vdev,
2605	bool weak_barriers,
2606	bool context,
2607	bool (notify)(struct* virtqueue *),
2608	void (callback)(struct* virtqueue *),
2609	const char *name,
2610	struct device *dma_dev)
2611	{
2612	struct vring_virtqueue *vq;
2613	int err;
2614
2615	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2616	return NULL;
2617
2618	vq = kmalloc(size: sizeof(*vq), GFP_KERNEL);
2619	if (!vq)
2620	return NULL;
2621
2622	vq->packed_ring = false;
2623	vq->vq.callback = callback;
2624	vq->vq.vdev = vdev;
2625	vq->vq.name = name;
2626	vq->vq.index = index;
2627	vq->vq.reset = false;
2628	vq->we_own_ring = false;
2629	vq->notify = notify;
2630	vq->weak_barriers = weak_barriers;
2631	#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2632	vq->broken = true;
2633	#else
2634	vq->broken = false;
2635	#endif
2636	vq->dma_dev = dma_dev;
2637	vq->use_dma_api = vring_use_dma_api(vdev);
2638	vq->premapped = false;
2639	vq->do_unmap = vq->use_dma_api;
2640
2641	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2642	!context;
2643	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2644
2645	if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2646	vq->weak_barriers = false;
2647
2648	err = vring_alloc_state_extra_split(vring_split);
2649	if (err) {
2650	kfree(objp: vq);
2651	return NULL;
2652	}
2653
2654	virtqueue_vring_init_split(vring_split, vq);
2655
2656	virtqueue_init(vq, num: vring_split->vring.num);
2657	virtqueue_vring_attach_split(vq, vring_split);
2658
2659	spin_lock(lock: &vdev->vqs_list_lock);
2660	list_add_tail(new: &vq->vq.list, head: &vdev->vqs);
2661	spin_unlock(lock: &vdev->vqs_list_lock);
2662	return &vq->vq;
2663	}
2664
2665	struct virtqueue *vring_create_virtqueue(
2666	unsigned int index,
2667	unsigned int num,
2668	unsigned int vring_align,
2669	struct virtio_device *vdev,
2670	bool weak_barriers,
2671	bool may_reduce_num,
2672	bool context,
2673	bool (notify)(struct* virtqueue *),
2674	void (callback)(struct* virtqueue *),
2675	const char *name)
2676	{
2677
2678	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2679	return vring_create_virtqueue_packed(index, num, vring_align,
2680	vdev, weak_barriers, may_reduce_num,
2681	context, notify, callback, name, dma_dev: vdev->dev.parent);
2682
2683	return vring_create_virtqueue_split(index, num, vring_align,
2684	vdev, weak_barriers, may_reduce_num,
2685	context, notify, callback, name, dma_dev: vdev->dev.parent);
2686	}
2687	EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2688
2689	struct virtqueue *vring_create_virtqueue_dma(
2690	unsigned int index,
2691	unsigned int num,
2692	unsigned int vring_align,
2693	struct virtio_device *vdev,
2694	bool weak_barriers,
2695	bool may_reduce_num,
2696	bool context,
2697	bool (notify)(struct* virtqueue *),
2698	void (callback)(struct* virtqueue *),
2699	const char *name,
2700	struct device *dma_dev)
2701	{
2702
2703	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2704	return vring_create_virtqueue_packed(index, num, vring_align,
2705	vdev, weak_barriers, may_reduce_num,
2706	context, notify, callback, name, dma_dev);
2707
2708	return vring_create_virtqueue_split(index, num, vring_align,
2709	vdev, weak_barriers, may_reduce_num,
2710	context, notify, callback, name, dma_dev);
2711	}
2712	EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma);
2713
2714	/**
2715	* virtqueue_resize - resize the vring of vq
2716	* @_vq: the struct virtqueue we're talking about.
2717	* @num: new ring num
2718	* @recycle: callback to recycle unused buffers
2719	*
2720	* When it is really necessary to create a new vring, it will set the current vq
2721	* into the reset state. Then call the passed callback to recycle the buffer
2722	* that is no longer used. Only after the new vring is successfully created, the
2723	* old vring will be released.
2724	*
2725	* Caller must ensure we don't call this with other virtqueue operations
2726	* at the same time (except where noted).
2727	*
2728	* Returns zero or a negative error.
2729	* 0: success.
2730	* -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
2731	* vq can still work normally
2732	* -EBUSY: Failed to sync with device, vq may not work properly
2733	* -ENOENT: Transport or device not supported
2734	* -E2BIG/-EINVAL: num error
2735	* -EPERM: Operation not permitted
2736	*
2737	*/
2738	int virtqueue_resize(struct virtqueue *_vq, u32 num,
2739	void (recycle)(struct* virtqueue vq, void* *buf))
2740	{
2741	struct vring_virtqueue *vq = to_vvq(_vq);
2742	int err;
2743
2744	if (num > vq->vq.num_max)
2745	return -E2BIG;
2746
2747	if (!num)
2748	return -EINVAL;
2749
2750	if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
2751	return `0`;
2752
2753	err = virtqueue_disable_and_recycle(_vq, recycle);
2754	if (err)
2755	return err;
2756
2757	if (vq->packed_ring)
2758	err = virtqueue_resize_packed(_vq, num);
2759	else
2760	err = virtqueue_resize_split(_vq, num);
2761
2762	return virtqueue_enable_after_reset(_vq);
2763	}
2764	EXPORT_SYMBOL_GPL(virtqueue_resize);
2765
2766	/**
2767	* virtqueue_set_dma_premapped - set the vring premapped mode
2768	* @_vq: the struct virtqueue we're talking about.
2769	*
2770	* Enable the premapped mode of the vq.
2771	*
2772	* The vring in premapped mode does not do dma internally, so the driver must
2773	* do dma mapping in advance. The driver must pass the dma_address through
2774	* dma_address of scatterlist. When the driver got a used buffer from
2775	* the vring, it has to unmap the dma address.
2776	*
2777	* This function must be called immediately after creating the vq, or after vq
2778	* reset, and before adding any buffers to it.
2779	*
2780	* Caller must ensure we don't call this with other virtqueue operations
2781	* at the same time (except where noted).
2782	*
2783	* Returns zero or a negative error.
2784	* 0: success.
2785	* -EINVAL: vring does not use the dma api, so we can not enable premapped mode.
2786	*/
2787	int virtqueue_set_dma_premapped(struct virtqueue *_vq)
2788	{
2789	struct vring_virtqueue *vq = to_vvq(_vq);
2790	u32 num;
2791
2792	START_USE(vq);
2793
2794	num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2795
2796	if (num != vq->vq.num_free) {
2797	END_USE(vq);
2798	return -EINVAL;
2799	}
2800
2801	if (!vq->use_dma_api) {
2802	END_USE(vq);
2803	return -EINVAL;
2804	}
2805
2806	vq->premapped = true;
2807	vq->do_unmap = false;
2808
2809	END_USE(vq);
2810
2811	return `0`;
2812	}
2813	EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped);
2814
2815	/**
2816	* virtqueue_reset - detach and recycle all unused buffers
2817	* @_vq: the struct virtqueue we're talking about.
2818	* @recycle: callback to recycle unused buffers
2819	*
2820	* Caller must ensure we don't call this with other virtqueue operations
2821	* at the same time (except where noted).
2822	*
2823	* Returns zero or a negative error.
2824	* 0: success.
2825	* -EBUSY: Failed to sync with device, vq may not work properly
2826	* -ENOENT: Transport or device not supported
2827	* -EPERM: Operation not permitted
2828	*/
2829	int virtqueue_reset(struct virtqueue *_vq,
2830	void (recycle)(struct* virtqueue vq, void* *buf))
2831	{
2832	struct vring_virtqueue *vq = to_vvq(_vq);
2833	int err;
2834
2835	err = virtqueue_disable_and_recycle(_vq, recycle);
2836	if (err)
2837	return err;
2838
2839	if (vq->packed_ring)
2840	virtqueue_reinit_packed(vq);
2841	else
2842	virtqueue_reinit_split(vq);
2843
2844	return virtqueue_enable_after_reset(_vq);
2845	}
2846	EXPORT_SYMBOL_GPL(virtqueue_reset);
2847
2848	/ Only available for split ring /
2849	struct virtqueue vring_new_virtqueue(unsigned* int index,
2850	unsigned int num,
2851	unsigned int vring_align,
2852	struct virtio_device *vdev,
2853	bool weak_barriers,
2854	bool context,
2855	void *pages,
2856	bool (notify)(struct* virtqueue *vq),
2857	void (callback)(struct* virtqueue *vq),
2858	const char *name)
2859	{
2860	struct vring_virtqueue_split vring_split = {};
2861
2862	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2863	return NULL;
2864
2865	vring_init(vr: &vring_split.vring, num, p: pages, align: vring_align);
2866	return __vring_new_virtqueue(index, vring_split: &vring_split, vdev, weak_barriers,
2867	context, notify, callback, name,
2868	dma_dev: vdev->dev.parent);
2869	}
2870	EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2871
2872	static void vring_free(struct virtqueue *_vq)
2873	{
2874	struct vring_virtqueue *vq = to_vvq(_vq);
2875
2876	if (vq->we_own_ring) {
2877	if (vq->packed_ring) {
2878	vring_free_queue(vdev: vq->vq.vdev,
2879	size: vq->packed.ring_size_in_bytes,
2880	queue: vq->packed.vring.desc,
2881	dma_handle: vq->packed.ring_dma_addr,
2882	dma_dev: vring_dma_dev(vq));
2883
2884	vring_free_queue(vdev: vq->vq.vdev,
2885	size: vq->packed.event_size_in_bytes,
2886	queue: vq->packed.vring.driver,
2887	dma_handle: vq->packed.driver_event_dma_addr,
2888	dma_dev: vring_dma_dev(vq));
2889
2890	vring_free_queue(vdev: vq->vq.vdev,
2891	size: vq->packed.event_size_in_bytes,
2892	queue: vq->packed.vring.device,
2893	dma_handle: vq->packed.device_event_dma_addr,
2894	dma_dev: vring_dma_dev(vq));
2895
2896	kfree(objp: vq->packed.desc_state);
2897	kfree(objp: vq->packed.desc_extra);
2898	} else {
2899	vring_free_queue(vdev: vq->vq.vdev,
2900	size: vq->split.queue_size_in_bytes,
2901	queue: vq->split.vring.desc,
2902	dma_handle: vq->split.queue_dma_addr,
2903	dma_dev: vring_dma_dev(vq));
2904	}
2905	}
2906	if (!vq->packed_ring) {
2907	kfree(objp: vq->split.desc_state);
2908	kfree(objp: vq->split.desc_extra);
2909	}
2910	}
2911
2912	void vring_del_virtqueue(struct virtqueue *_vq)
2913	{
2914	struct vring_virtqueue *vq = to_vvq(_vq);
2915
2916	spin_lock(lock: &vq->vq.vdev->vqs_list_lock);
2917	list_del(entry: &_vq->list);
2918	spin_unlock(lock: &vq->vq.vdev->vqs_list_lock);
2919
2920	vring_free(_vq);
2921
2922	kfree(objp: vq);
2923	}
2924	EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2925
2926	u32 vring_notification_data(struct virtqueue *_vq)
2927	{
2928	struct vring_virtqueue *vq = to_vvq(_vq);
2929	u16 next;
2930
2931	if (vq->packed_ring)
2932	next = (vq->packed.next_avail_idx &
2933	~(-(`1` << VRING_PACKED_EVENT_F_WRAP_CTR))) \|
2934	vq->packed.avail_wrap_counter <<
2935	VRING_PACKED_EVENT_F_WRAP_CTR;
2936	else
2937	next = vq->split.avail_idx_shadow;
2938
2939	return next << `16` \| _vq->index;
2940	}
2941	EXPORT_SYMBOL_GPL(vring_notification_data);
2942
2943	/ Manipulates transport-specific feature bits. /
2944	void vring_transport_features(struct virtio_device *vdev)
2945	{
2946	unsigned int i;
2947
2948	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2949	switch (i) {
2950	case VIRTIO_RING_F_INDIRECT_DESC:
2951	break;
2952	case VIRTIO_RING_F_EVENT_IDX:
2953	break;
2954	case VIRTIO_F_VERSION_1:
2955	break;
2956	case VIRTIO_F_ACCESS_PLATFORM:
2957	break;
2958	case VIRTIO_F_RING_PACKED:
2959	break;
2960	case VIRTIO_F_ORDER_PLATFORM:
2961	break;
2962	case VIRTIO_F_NOTIFICATION_DATA:
2963	break;
2964	default:
2965	/ We don't understand this bit. /
2966	__virtio_clear_bit(vdev, fbit: i);
2967	}
2968	}
2969	}
2970	EXPORT_SYMBOL_GPL(vring_transport_features);
2971
2972	/**
2973	* virtqueue_get_vring_size - return the size of the virtqueue's vring
2974	* @_vq: the struct virtqueue containing the vring of interest.
2975	*
2976	* Returns the size of the vring. This is mainly used for boasting to
2977	* userspace. Unlike other operations, this need not be serialized.
2978	*/
2979	unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq)
2980	{
2981
2982	const struct vring_virtqueue *vq = to_vvq(_vq);
2983
2984	return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2985	}
2986	EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2987
2988	/*
2989	* This function should only be called by the core, not directly by the driver.
2990	*/
2991	void __virtqueue_break(struct virtqueue *_vq)
2992	{
2993	struct vring_virtqueue *vq = to_vvq(_vq);
2994
2995	/ Pairs with READ_ONCE() in virtqueue_is_broken(). /
2996	WRITE_ONCE(vq->broken, true);
2997	}
2998	EXPORT_SYMBOL_GPL(__virtqueue_break);
2999
3000	/*
3001	* This function should only be called by the core, not directly by the driver.
3002	*/
3003	void __virtqueue_unbreak(struct virtqueue *_vq)
3004	{
3005	struct vring_virtqueue *vq = to_vvq(_vq);
3006
3007	/ Pairs with READ_ONCE() in virtqueue_is_broken(). /
3008	WRITE_ONCE(vq->broken, false);
3009	}
3010	EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
3011
3012	bool virtqueue_is_broken(const struct virtqueue *_vq)
3013	{
3014	const struct vring_virtqueue *vq = to_vvq(_vq);
3015
3016	return READ_ONCE(vq->broken);
3017	}
3018	EXPORT_SYMBOL_GPL(virtqueue_is_broken);
3019
3020	/*
3021	* This should prevent the device from being used, allowing drivers to
3022	* recover. You may need to grab appropriate locks to flush.
3023	*/
3024	void virtio_break_device(struct virtio_device *dev)
3025	{
3026	struct virtqueue *_vq;
3027
3028	spin_lock(lock: &dev->vqs_list_lock);
3029	list_for_each_entry(_vq, &dev->vqs, list) {
3030	struct vring_virtqueue *vq = to_vvq(_vq);
3031
3032	/ Pairs with READ_ONCE() in virtqueue_is_broken(). /
3033	WRITE_ONCE(vq->broken, true);
3034	}
3035	spin_unlock(lock: &dev->vqs_list_lock);
3036	}
3037	EXPORT_SYMBOL_GPL(virtio_break_device);
3038
3039	/*
3040	* This should allow the device to be used by the driver. You may
3041	* need to grab appropriate locks to flush the write to
3042	* vq->broken. This should only be used in some specific case e.g
3043	* (probing and restoring). This function should only be called by the
3044	* core, not directly by the driver.
3045	*/
3046	void __virtio_unbreak_device(struct virtio_device *dev)
3047	{
3048	struct virtqueue *_vq;
3049
3050	spin_lock(lock: &dev->vqs_list_lock);
3051	list_for_each_entry(_vq, &dev->vqs, list) {
3052	struct vring_virtqueue *vq = to_vvq(_vq);
3053
3054	/ Pairs with READ_ONCE() in virtqueue_is_broken(). /
3055	WRITE_ONCE(vq->broken, false);
3056	}
3057	spin_unlock(lock: &dev->vqs_list_lock);
3058	}
3059	EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
3060
3061	dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq)
3062	{
3063	const struct vring_virtqueue *vq = to_vvq(_vq);
3064
3065	BUG_ON(!vq->we_own_ring);
3066
3067	if (vq->packed_ring)
3068	return vq->packed.ring_dma_addr;
3069
3070	return vq->split.queue_dma_addr;
3071	}
3072	EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
3073
3074	dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq)
3075	{
3076	const struct vring_virtqueue *vq = to_vvq(_vq);
3077
3078	BUG_ON(!vq->we_own_ring);
3079
3080	if (vq->packed_ring)
3081	return vq->packed.driver_event_dma_addr;
3082
3083	return vq->split.queue_dma_addr +
3084	((char )vq->split.vring.avail - (char* *)vq->split.vring.desc);
3085	}
3086	EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
3087
3088	dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq)
3089	{
3090	const struct vring_virtqueue *vq = to_vvq(_vq);
3091
3092	BUG_ON(!vq->we_own_ring);
3093
3094	if (vq->packed_ring)
3095	return vq->packed.device_event_dma_addr;
3096
3097	return vq->split.queue_dma_addr +
3098	((char )vq->split.vring.used - (char* *)vq->split.vring.desc);
3099	}
3100	EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
3101
3102	/ Only available for split ring /
3103	const struct vring virtqueue_get_vring(const* struct virtqueue *vq)
3104	{
3105	return &to_vvq(vq)->split.vring;
3106	}
3107	EXPORT_SYMBOL_GPL(virtqueue_get_vring);
3108
3109	/**
3110	* virtqueue_dma_map_single_attrs - map DMA for _vq
3111	* @_vq: the struct virtqueue we're talking about.
3112	* @ptr: the pointer of the buffer to do dma
3113	* @size: the size of the buffer to do dma
3114	* @dir: DMA direction
3115	* @attrs: DMA Attrs
3116	*
3117	* The caller calls this to do dma mapping in advance. The DMA address can be
3118	* passed to this _vq when it is in pre-mapped mode.
3119	*
3120	* return DMA address. Caller should check that by virtqueue_dma_mapping_error().
3121	*/
3122	dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue _vq, void* *ptr,
3123	size_t size,
3124	enum dma_data_direction dir,
3125	unsigned long attrs)
3126	{
3127	struct vring_virtqueue *vq = to_vvq(_vq);
3128
3129	if (!vq->use_dma_api)
3130	return (dma_addr_t)virt_to_phys(address: ptr);
3131
3132	return dma_map_single_attrs(dev: vring_dma_dev(vq), ptr, size, dir, attrs);
3133	}
3134	EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs);
3135
3136	/**
3137	* virtqueue_dma_unmap_single_attrs - unmap DMA for _vq
3138	* @_vq: the struct virtqueue we're talking about.
3139	* @addr: the dma address to unmap
3140	* @size: the size of the buffer
3141	* @dir: DMA direction
3142	* @attrs: DMA Attrs
3143	*
3144	* Unmap the address that is mapped by the virtqueue_dma_map_* APIs.
3145	*
3146	*/
3147	void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr,
3148	size_t size, enum dma_data_direction dir,
3149	unsigned long attrs)
3150	{
3151	struct vring_virtqueue *vq = to_vvq(_vq);
3152
3153	if (!vq->use_dma_api)
3154	return;
3155
3156	dma_unmap_single_attrs(dev: vring_dma_dev(vq), addr, size, dir, attrs);
3157	}
3158	EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs);
3159
3160	/**
3161	* virtqueue_dma_mapping_error - check dma address
3162	* @_vq: the struct virtqueue we're talking about.
3163	* @addr: DMA address
3164	*
3165	* Returns 0 means dma valid. Other means invalid dma address.
3166	*/
3167	int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr)
3168	{
3169	struct vring_virtqueue *vq = to_vvq(_vq);
3170
3171	if (!vq->use_dma_api)
3172	return `0`;
3173
3174	return dma_mapping_error(dev: vring_dma_dev(vq), dma_addr: addr);
3175	}
3176	EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error);
3177
3178	/**
3179	* virtqueue_dma_need_sync - check a dma address needs sync
3180	* @_vq: the struct virtqueue we're talking about.
3181	* @addr: DMA address
3182	*
3183	* Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be
3184	* synchronized
3185	*
3186	* return bool
3187	*/
3188	bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr)
3189	{
3190	struct vring_virtqueue *vq = to_vvq(_vq);
3191
3192	if (!vq->use_dma_api)
3193	return false;
3194
3195	return dma_need_sync(dev: vring_dma_dev(vq), dma_addr: addr);
3196	}
3197	EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync);
3198
3199	/**
3200	* virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu
3201	* @_vq: the struct virtqueue we're talking about.
3202	* @addr: DMA address
3203	* @offset: DMA address offset
3204	* @size: buf size for sync
3205	* @dir: DMA direction
3206	*
3207	* Before calling this function, use virtqueue_dma_need_sync() to confirm that
3208	* the DMA address really needs to be synchronized
3209	*
3210	*/
3211	void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq,
3212	dma_addr_t addr,
3213	unsigned long offset, size_t size,
3214	enum dma_data_direction dir)
3215	{
3216	struct vring_virtqueue *vq = to_vvq(_vq);
3217	struct device *dev = vring_dma_dev(vq);
3218
3219	if (!vq->use_dma_api)
3220	return;
3221
3222	dma_sync_single_range_for_cpu(dev, addr, offset, size,
3223	dir: DMA_BIDIRECTIONAL);
3224	}
3225	EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu);
3226
3227	/**
3228	* virtqueue_dma_sync_single_range_for_device - dma sync for device
3229	* @_vq: the struct virtqueue we're talking about.
3230	* @addr: DMA address
3231	* @offset: DMA address offset
3232	* @size: buf size for sync
3233	* @dir: DMA direction
3234	*
3235	* Before calling this function, use virtqueue_dma_need_sync() to confirm that
3236	* the DMA address really needs to be synchronized
3237	*/
3238	void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq,
3239	dma_addr_t addr,
3240	unsigned long offset, size_t size,
3241	enum dma_data_direction dir)
3242	{
3243	struct vring_virtqueue *vq = to_vvq(_vq);
3244	struct device *dev = vring_dma_dev(vq);
3245
3246	if (!vq->use_dma_api)
3247	return;
3248
3249	dma_sync_single_range_for_device(dev, addr, offset, size,
3250	dir: DMA_BIDIRECTIONAL);
3251	}
3252	EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device);
3253
3254	MODULE_LICENSE("GPL");
3255

source code of linux/drivers/virtio/virtio_ring.c