1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2016 Red Hat, Inc. |
4 | * Author: Michael S. Tsirkin <mst@redhat.com> |
5 | * |
6 | * Simple descriptor-based ring. virtio 0.9 compatible event index is used for |
7 | * signalling, unconditionally. |
8 | */ |
9 | #define _GNU_SOURCE |
10 | #include "main.h" |
11 | #include <stdlib.h> |
12 | #include <stdio.h> |
13 | #include <string.h> |
14 | |
15 | /* Next - Where next entry will be written. |
16 | * Prev - "Next" value when event triggered previously. |
17 | * Event - Peer requested event after writing this entry. |
18 | */ |
19 | static inline bool need_event(unsigned short event, |
20 | unsigned short next, |
21 | unsigned short prev) |
22 | { |
23 | return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); |
24 | } |
25 | |
26 | /* Design: |
27 | * Guest adds descriptors with unique index values and DESC_HW in flags. |
28 | * Host overwrites used descriptors with correct len, index, and DESC_HW clear. |
29 | * Flags are always set last. |
30 | */ |
31 | #define DESC_HW 0x1 |
32 | |
33 | struct desc { |
34 | unsigned short flags; |
35 | unsigned short index; |
36 | unsigned len; |
37 | unsigned long long addr; |
38 | }; |
39 | |
40 | /* how much padding is needed to avoid false cache sharing */ |
41 | #define HOST_GUEST_PADDING 0x80 |
42 | |
43 | /* Mostly read */ |
44 | struct event { |
45 | unsigned short kick_index; |
46 | unsigned char reserved0[HOST_GUEST_PADDING - 2]; |
47 | unsigned short call_index; |
48 | unsigned char reserved1[HOST_GUEST_PADDING - 2]; |
49 | }; |
50 | |
51 | struct data { |
52 | void *buf; /* descriptor is writeable, we can't get buf from there */ |
53 | void *data; |
54 | } *data; |
55 | |
56 | struct desc *ring; |
57 | struct event *event; |
58 | |
59 | struct guest { |
60 | unsigned avail_idx; |
61 | unsigned last_used_idx; |
62 | unsigned num_free; |
63 | unsigned kicked_avail_idx; |
64 | unsigned char reserved[HOST_GUEST_PADDING - 12]; |
65 | } guest; |
66 | |
67 | struct host { |
68 | /* we do not need to track last avail index |
69 | * unless we have more than one in flight. |
70 | */ |
71 | unsigned used_idx; |
72 | unsigned called_used_idx; |
73 | unsigned char reserved[HOST_GUEST_PADDING - 4]; |
74 | } host; |
75 | |
76 | /* implemented by ring */ |
77 | void alloc_ring(void) |
78 | { |
79 | int ret; |
80 | int i; |
81 | |
82 | ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); |
83 | if (ret) { |
84 | perror("Unable to allocate ring buffer.\n" ); |
85 | exit(3); |
86 | } |
87 | event = calloc(1, sizeof(*event)); |
88 | if (!event) { |
89 | perror("Unable to allocate event buffer.\n" ); |
90 | exit(3); |
91 | } |
92 | guest.avail_idx = 0; |
93 | guest.kicked_avail_idx = -1; |
94 | guest.last_used_idx = 0; |
95 | host.used_idx = 0; |
96 | host.called_used_idx = -1; |
97 | for (i = 0; i < ring_size; ++i) { |
98 | struct desc desc = { |
99 | .index = i, |
100 | }; |
101 | ring[i] = desc; |
102 | } |
103 | guest.num_free = ring_size; |
104 | data = calloc(ring_size, sizeof(*data)); |
105 | if (!data) { |
106 | perror("Unable to allocate data buffer.\n" ); |
107 | exit(3); |
108 | } |
109 | } |
110 | |
111 | /* guest side */ |
112 | int add_inbuf(unsigned len, void *buf, void *datap) |
113 | { |
114 | unsigned head, index; |
115 | |
116 | if (!guest.num_free) |
117 | return -1; |
118 | |
119 | guest.num_free--; |
120 | head = (ring_size - 1) & (guest.avail_idx++); |
121 | |
122 | /* Start with a write. On MESI architectures this helps |
123 | * avoid a shared state with consumer that is polling this descriptor. |
124 | */ |
125 | ring[head].addr = (unsigned long)(void*)buf; |
126 | ring[head].len = len; |
127 | /* read below might bypass write above. That is OK because it's just an |
128 | * optimization. If this happens, we will get the cache line in a |
129 | * shared state which is unfortunate, but probably not worth it to |
130 | * add an explicit full barrier to avoid this. |
131 | */ |
132 | barrier(); |
133 | index = ring[head].index; |
134 | data[index].buf = buf; |
135 | data[index].data = datap; |
136 | /* Barrier A (for pairing) */ |
137 | smp_release(); |
138 | ring[head].flags = DESC_HW; |
139 | |
140 | return 0; |
141 | } |
142 | |
143 | void *get_buf(unsigned *lenp, void **bufp) |
144 | { |
145 | unsigned head = (ring_size - 1) & guest.last_used_idx; |
146 | unsigned index; |
147 | void *datap; |
148 | |
149 | if (ring[head].flags & DESC_HW) |
150 | return NULL; |
151 | /* Barrier B (for pairing) */ |
152 | smp_acquire(); |
153 | *lenp = ring[head].len; |
154 | index = ring[head].index & (ring_size - 1); |
155 | datap = data[index].data; |
156 | *bufp = data[index].buf; |
157 | data[index].buf = NULL; |
158 | data[index].data = NULL; |
159 | guest.num_free++; |
160 | guest.last_used_idx++; |
161 | return datap; |
162 | } |
163 | |
164 | bool used_empty() |
165 | { |
166 | unsigned head = (ring_size - 1) & guest.last_used_idx; |
167 | |
168 | return (ring[head].flags & DESC_HW); |
169 | } |
170 | |
171 | void disable_call() |
172 | { |
173 | /* Doing nothing to disable calls might cause |
174 | * extra interrupts, but reduces the number of cache misses. |
175 | */ |
176 | } |
177 | |
178 | bool enable_call() |
179 | { |
180 | event->call_index = guest.last_used_idx; |
181 | /* Flush call index write */ |
182 | /* Barrier D (for pairing) */ |
183 | smp_mb(); |
184 | return used_empty(); |
185 | } |
186 | |
187 | void kick_available(void) |
188 | { |
189 | bool need; |
190 | |
191 | /* Flush in previous flags write */ |
192 | /* Barrier C (for pairing) */ |
193 | smp_mb(); |
194 | need = need_event(event->kick_index, |
195 | guest.avail_idx, |
196 | guest.kicked_avail_idx); |
197 | |
198 | guest.kicked_avail_idx = guest.avail_idx; |
199 | if (need) |
200 | kick(); |
201 | } |
202 | |
203 | /* host side */ |
204 | void disable_kick() |
205 | { |
206 | /* Doing nothing to disable kicks might cause |
207 | * extra interrupts, but reduces the number of cache misses. |
208 | */ |
209 | } |
210 | |
211 | bool enable_kick() |
212 | { |
213 | event->kick_index = host.used_idx; |
214 | /* Barrier C (for pairing) */ |
215 | smp_mb(); |
216 | return avail_empty(); |
217 | } |
218 | |
219 | bool avail_empty() |
220 | { |
221 | unsigned head = (ring_size - 1) & host.used_idx; |
222 | |
223 | return !(ring[head].flags & DESC_HW); |
224 | } |
225 | |
226 | bool use_buf(unsigned *lenp, void **bufp) |
227 | { |
228 | unsigned head = (ring_size - 1) & host.used_idx; |
229 | |
230 | if (!(ring[head].flags & DESC_HW)) |
231 | return false; |
232 | |
233 | /* make sure length read below is not speculated */ |
234 | /* Barrier A (for pairing) */ |
235 | smp_acquire(); |
236 | |
237 | /* simple in-order completion: we don't need |
238 | * to touch index at all. This also means we |
239 | * can just modify the descriptor in-place. |
240 | */ |
241 | ring[head].len--; |
242 | /* Make sure len is valid before flags. |
243 | * Note: alternative is to write len and flags in one access - |
244 | * possible on 64 bit architectures but wmb is free on Intel anyway |
245 | * so I have no way to test whether it's a gain. |
246 | */ |
247 | /* Barrier B (for pairing) */ |
248 | smp_release(); |
249 | ring[head].flags = 0; |
250 | host.used_idx++; |
251 | return true; |
252 | } |
253 | |
254 | void call_used(void) |
255 | { |
256 | bool need; |
257 | |
258 | /* Flush in previous flags write */ |
259 | /* Barrier D (for pairing) */ |
260 | smp_mb(); |
261 | |
262 | need = need_event(event->call_index, |
263 | host.used_idx, |
264 | host.called_used_idx); |
265 | |
266 | host.called_used_idx = host.used_idx; |
267 | |
268 | if (need) |
269 | call(); |
270 | } |
271 | |