1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> |
7 | * Hank Janssen <hjanssen@microsoft.com> |
8 | */ |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | |
11 | #include <linux/kernel.h> |
12 | #include <linux/sched.h> |
13 | #include <linux/wait.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/io.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/netdevice.h> |
19 | #include <linux/if_ether.h> |
20 | #include <linux/vmalloc.h> |
21 | #include <linux/rtnetlink.h> |
22 | #include <linux/prefetch.h> |
23 | #include <linux/filter.h> |
24 | |
25 | #include <asm/sync_bitops.h> |
26 | #include <asm/mshyperv.h> |
27 | |
28 | #include "hyperv_net.h" |
29 | #include "netvsc_trace.h" |
30 | |
31 | /* |
32 | * Switch the data path from the synthetic interface to the VF |
33 | * interface. |
34 | */ |
35 | int netvsc_switch_datapath(struct net_device *ndev, bool vf) |
36 | { |
37 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
38 | struct hv_device *dev = net_device_ctx->device_ctx; |
39 | struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); |
40 | struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; |
41 | int ret, retry = 0; |
42 | |
43 | /* Block sending traffic to VF if it's about to be gone */ |
44 | if (!vf) |
45 | net_device_ctx->data_path_is_vf = vf; |
46 | |
47 | memset(init_pkt, 0, sizeof(struct nvsp_message)); |
48 | init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; |
49 | if (vf) |
50 | init_pkt->msg.v4_msg.active_dp.active_datapath = |
51 | NVSP_DATAPATH_VF; |
52 | else |
53 | init_pkt->msg.v4_msg.active_dp.active_datapath = |
54 | NVSP_DATAPATH_SYNTHETIC; |
55 | |
56 | again: |
57 | trace_nvsp_send(ndev, msg: init_pkt); |
58 | |
59 | ret = vmbus_sendpacket(channel: dev->channel, buffer: init_pkt, |
60 | bufferLen: sizeof(struct nvsp_message), |
61 | requestid: (unsigned long)init_pkt, type: VM_PKT_DATA_INBAND, |
62 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
63 | |
64 | /* If failed to switch to/from VF, let data_path_is_vf stay false, |
65 | * so we use synthetic path to send data. |
66 | */ |
67 | if (ret) { |
68 | if (ret != -EAGAIN) { |
69 | netdev_err(dev: ndev, |
70 | format: "Unable to send sw datapath msg, err: %d\n" , |
71 | ret); |
72 | return ret; |
73 | } |
74 | |
75 | if (retry++ < RETRY_MAX) { |
76 | usleep_range(RETRY_US_LO, RETRY_US_HI); |
77 | goto again; |
78 | } else { |
79 | netdev_err( |
80 | dev: ndev, |
81 | format: "Retry failed to send sw datapath msg, err: %d\n" , |
82 | ret); |
83 | return ret; |
84 | } |
85 | } |
86 | |
87 | wait_for_completion(&nv_dev->channel_init_wait); |
88 | net_device_ctx->data_path_is_vf = vf; |
89 | |
90 | return 0; |
91 | } |
92 | |
93 | /* Worker to setup sub channels on initial setup |
94 | * Initial hotplug event occurs in softirq context |
95 | * and can't wait for channels. |
96 | */ |
97 | static void netvsc_subchan_work(struct work_struct *w) |
98 | { |
99 | struct netvsc_device *nvdev = |
100 | container_of(w, struct netvsc_device, subchan_work); |
101 | struct rndis_device *rdev; |
102 | int i, ret; |
103 | |
104 | /* Avoid deadlock with device removal already under RTNL */ |
105 | if (!rtnl_trylock()) { |
106 | schedule_work(work: w); |
107 | return; |
108 | } |
109 | |
110 | rdev = nvdev->extension; |
111 | if (rdev) { |
112 | ret = rndis_set_subchannel(ndev: rdev->ndev, nvdev, NULL); |
113 | if (ret == 0) { |
114 | netif_device_attach(dev: rdev->ndev); |
115 | } else { |
116 | /* fallback to only primary channel */ |
117 | for (i = 1; i < nvdev->num_chn; i++) |
118 | netif_napi_del(napi: &nvdev->chan_table[i].napi); |
119 | |
120 | nvdev->max_chn = 1; |
121 | nvdev->num_chn = 1; |
122 | } |
123 | } |
124 | |
125 | rtnl_unlock(); |
126 | } |
127 | |
128 | static struct netvsc_device *alloc_net_device(void) |
129 | { |
130 | struct netvsc_device *net_device; |
131 | |
132 | net_device = kzalloc(size: sizeof(struct netvsc_device), GFP_KERNEL); |
133 | if (!net_device) |
134 | return NULL; |
135 | |
136 | init_waitqueue_head(&net_device->wait_drain); |
137 | net_device->destroy = false; |
138 | net_device->tx_disable = true; |
139 | |
140 | net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; |
141 | net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; |
142 | |
143 | init_completion(x: &net_device->channel_init_wait); |
144 | init_waitqueue_head(&net_device->subchan_open); |
145 | INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); |
146 | |
147 | return net_device; |
148 | } |
149 | |
150 | static void free_netvsc_device(struct rcu_head *head) |
151 | { |
152 | struct netvsc_device *nvdev |
153 | = container_of(head, struct netvsc_device, rcu); |
154 | int i; |
155 | |
156 | kfree(objp: nvdev->extension); |
157 | vfree(addr: nvdev->recv_buf); |
158 | vfree(addr: nvdev->send_buf); |
159 | bitmap_free(bitmap: nvdev->send_section_map); |
160 | |
161 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
162 | xdp_rxq_info_unreg(xdp_rxq: &nvdev->chan_table[i].xdp_rxq); |
163 | kfree(objp: nvdev->chan_table[i].recv_buf); |
164 | vfree(addr: nvdev->chan_table[i].mrc.slots); |
165 | } |
166 | |
167 | kfree(objp: nvdev); |
168 | } |
169 | |
170 | static void free_netvsc_device_rcu(struct netvsc_device *nvdev) |
171 | { |
172 | call_rcu(head: &nvdev->rcu, func: free_netvsc_device); |
173 | } |
174 | |
175 | static void netvsc_revoke_recv_buf(struct hv_device *device, |
176 | struct netvsc_device *net_device, |
177 | struct net_device *ndev) |
178 | { |
179 | struct nvsp_message *revoke_packet; |
180 | int ret; |
181 | |
182 | /* |
183 | * If we got a section count, it means we received a |
184 | * SendReceiveBufferComplete msg (ie sent |
185 | * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need |
186 | * to send a revoke msg here |
187 | */ |
188 | if (net_device->recv_section_cnt) { |
189 | /* Send the revoke receive buffer */ |
190 | revoke_packet = &net_device->revoke_packet; |
191 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); |
192 | |
193 | revoke_packet->hdr.msg_type = |
194 | NVSP_MSG1_TYPE_REVOKE_RECV_BUF; |
195 | revoke_packet->msg.v1_msg. |
196 | revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; |
197 | |
198 | trace_nvsp_send(ndev, msg: revoke_packet); |
199 | |
200 | ret = vmbus_sendpacket(channel: device->channel, |
201 | buffer: revoke_packet, |
202 | bufferLen: sizeof(struct nvsp_message), |
203 | VMBUS_RQST_ID_NO_RESPONSE, |
204 | type: VM_PKT_DATA_INBAND, flags: 0); |
205 | /* If the failure is because the channel is rescinded; |
206 | * ignore the failure since we cannot send on a rescinded |
207 | * channel. This would allow us to properly cleanup |
208 | * even when the channel is rescinded. |
209 | */ |
210 | if (device->channel->rescind) |
211 | ret = 0; |
212 | /* |
213 | * If we failed here, we might as well return and |
214 | * have a leak rather than continue and a bugchk |
215 | */ |
216 | if (ret != 0) { |
217 | netdev_err(dev: ndev, format: "unable to send " |
218 | "revoke receive buffer to netvsp\n" ); |
219 | return; |
220 | } |
221 | net_device->recv_section_cnt = 0; |
222 | } |
223 | } |
224 | |
225 | static void netvsc_revoke_send_buf(struct hv_device *device, |
226 | struct netvsc_device *net_device, |
227 | struct net_device *ndev) |
228 | { |
229 | struct nvsp_message *revoke_packet; |
230 | int ret; |
231 | |
232 | /* Deal with the send buffer we may have setup. |
233 | * If we got a send section size, it means we received a |
234 | * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent |
235 | * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need |
236 | * to send a revoke msg here |
237 | */ |
238 | if (net_device->send_section_cnt) { |
239 | /* Send the revoke receive buffer */ |
240 | revoke_packet = &net_device->revoke_packet; |
241 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); |
242 | |
243 | revoke_packet->hdr.msg_type = |
244 | NVSP_MSG1_TYPE_REVOKE_SEND_BUF; |
245 | revoke_packet->msg.v1_msg.revoke_send_buf.id = |
246 | NETVSC_SEND_BUFFER_ID; |
247 | |
248 | trace_nvsp_send(ndev, msg: revoke_packet); |
249 | |
250 | ret = vmbus_sendpacket(channel: device->channel, |
251 | buffer: revoke_packet, |
252 | bufferLen: sizeof(struct nvsp_message), |
253 | VMBUS_RQST_ID_NO_RESPONSE, |
254 | type: VM_PKT_DATA_INBAND, flags: 0); |
255 | |
256 | /* If the failure is because the channel is rescinded; |
257 | * ignore the failure since we cannot send on a rescinded |
258 | * channel. This would allow us to properly cleanup |
259 | * even when the channel is rescinded. |
260 | */ |
261 | if (device->channel->rescind) |
262 | ret = 0; |
263 | |
264 | /* If we failed here, we might as well return and |
265 | * have a leak rather than continue and a bugchk |
266 | */ |
267 | if (ret != 0) { |
268 | netdev_err(dev: ndev, format: "unable to send " |
269 | "revoke send buffer to netvsp\n" ); |
270 | return; |
271 | } |
272 | net_device->send_section_cnt = 0; |
273 | } |
274 | } |
275 | |
276 | static void netvsc_teardown_recv_gpadl(struct hv_device *device, |
277 | struct netvsc_device *net_device, |
278 | struct net_device *ndev) |
279 | { |
280 | int ret; |
281 | |
282 | if (net_device->recv_buf_gpadl_handle.gpadl_handle) { |
283 | ret = vmbus_teardown_gpadl(channel: device->channel, |
284 | gpadl: &net_device->recv_buf_gpadl_handle); |
285 | |
286 | /* If we failed here, we might as well return and have a leak |
287 | * rather than continue and a bugchk |
288 | */ |
289 | if (ret != 0) { |
290 | netdev_err(dev: ndev, |
291 | format: "unable to teardown receive buffer's gpadl\n" ); |
292 | return; |
293 | } |
294 | } |
295 | } |
296 | |
297 | static void netvsc_teardown_send_gpadl(struct hv_device *device, |
298 | struct netvsc_device *net_device, |
299 | struct net_device *ndev) |
300 | { |
301 | int ret; |
302 | |
303 | if (net_device->send_buf_gpadl_handle.gpadl_handle) { |
304 | ret = vmbus_teardown_gpadl(channel: device->channel, |
305 | gpadl: &net_device->send_buf_gpadl_handle); |
306 | |
307 | /* If we failed here, we might as well return and have a leak |
308 | * rather than continue and a bugchk |
309 | */ |
310 | if (ret != 0) { |
311 | netdev_err(dev: ndev, |
312 | format: "unable to teardown send buffer's gpadl\n" ); |
313 | return; |
314 | } |
315 | } |
316 | } |
317 | |
318 | int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) |
319 | { |
320 | struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; |
321 | int node = cpu_to_node(cpu: nvchan->channel->target_cpu); |
322 | size_t size; |
323 | |
324 | size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data); |
325 | nvchan->mrc.slots = vzalloc_node(size, node); |
326 | if (!nvchan->mrc.slots) |
327 | nvchan->mrc.slots = vzalloc(size); |
328 | |
329 | return nvchan->mrc.slots ? 0 : -ENOMEM; |
330 | } |
331 | |
332 | static int netvsc_init_buf(struct hv_device *device, |
333 | struct netvsc_device *net_device, |
334 | const struct netvsc_device_info *device_info) |
335 | { |
336 | struct nvsp_1_message_send_receive_buffer_complete *resp; |
337 | struct net_device *ndev = hv_get_drvdata(dev: device); |
338 | struct nvsp_message *init_packet; |
339 | unsigned int buf_size; |
340 | int i, ret = 0; |
341 | |
342 | /* Get receive buffer area. */ |
343 | buf_size = device_info->recv_sections * device_info->recv_section_size; |
344 | buf_size = roundup(buf_size, PAGE_SIZE); |
345 | |
346 | /* Legacy hosts only allow smaller receive buffer */ |
347 | if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) |
348 | buf_size = min_t(unsigned int, buf_size, |
349 | NETVSC_RECEIVE_BUFFER_SIZE_LEGACY); |
350 | |
351 | net_device->recv_buf = vzalloc(size: buf_size); |
352 | if (!net_device->recv_buf) { |
353 | netdev_err(dev: ndev, |
354 | format: "unable to allocate receive buffer of size %u\n" , |
355 | buf_size); |
356 | ret = -ENOMEM; |
357 | goto cleanup; |
358 | } |
359 | |
360 | net_device->recv_buf_size = buf_size; |
361 | |
362 | /* |
363 | * Establish the gpadl handle for this buffer on this |
364 | * channel. Note: This call uses the vmbus connection rather |
365 | * than the channel to establish the gpadl handle. |
366 | */ |
367 | ret = vmbus_establish_gpadl(channel: device->channel, kbuffer: net_device->recv_buf, |
368 | size: buf_size, |
369 | gpadl: &net_device->recv_buf_gpadl_handle); |
370 | if (ret != 0) { |
371 | netdev_err(dev: ndev, |
372 | format: "unable to establish receive buffer's gpadl\n" ); |
373 | goto cleanup; |
374 | } |
375 | |
376 | /* Notify the NetVsp of the gpadl handle */ |
377 | init_packet = &net_device->channel_init_pkt; |
378 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
379 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; |
380 | init_packet->msg.v1_msg.send_recv_buf. |
381 | gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle; |
382 | init_packet->msg.v1_msg. |
383 | send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; |
384 | |
385 | trace_nvsp_send(ndev, msg: init_packet); |
386 | |
387 | /* Send the gpadl notification request */ |
388 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
389 | bufferLen: sizeof(struct nvsp_message), |
390 | requestid: (unsigned long)init_packet, |
391 | type: VM_PKT_DATA_INBAND, |
392 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
393 | if (ret != 0) { |
394 | netdev_err(dev: ndev, |
395 | format: "unable to send receive buffer's gpadl to netvsp\n" ); |
396 | goto cleanup; |
397 | } |
398 | |
399 | wait_for_completion(&net_device->channel_init_wait); |
400 | |
401 | /* Check the response */ |
402 | resp = &init_packet->msg.v1_msg.send_recv_buf_complete; |
403 | if (resp->status != NVSP_STAT_SUCCESS) { |
404 | netdev_err(dev: ndev, |
405 | format: "Unable to complete receive buffer initialization with NetVsp - status %d\n" , |
406 | resp->status); |
407 | ret = -EINVAL; |
408 | goto cleanup; |
409 | } |
410 | |
411 | /* Parse the response */ |
412 | netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n" , |
413 | resp->num_sections, resp->sections[0].sub_alloc_size, |
414 | resp->sections[0].num_sub_allocs); |
415 | |
416 | /* There should only be one section for the entire receive buffer */ |
417 | if (resp->num_sections != 1 || resp->sections[0].offset != 0) { |
418 | ret = -EINVAL; |
419 | goto cleanup; |
420 | } |
421 | |
422 | net_device->recv_section_size = resp->sections[0].sub_alloc_size; |
423 | net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; |
424 | |
425 | /* Ensure buffer will not overflow */ |
426 | if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size * |
427 | (u64)net_device->recv_section_cnt > (u64)buf_size) { |
428 | netdev_err(dev: ndev, format: "invalid recv_section_size %u\n" , |
429 | net_device->recv_section_size); |
430 | ret = -EINVAL; |
431 | goto cleanup; |
432 | } |
433 | |
434 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
435 | struct netvsc_channel *nvchan = &net_device->chan_table[i]; |
436 | |
437 | nvchan->recv_buf = kzalloc(size: net_device->recv_section_size, GFP_KERNEL); |
438 | if (nvchan->recv_buf == NULL) { |
439 | ret = -ENOMEM; |
440 | goto cleanup; |
441 | } |
442 | } |
443 | |
444 | /* Setup receive completion ring. |
445 | * Add 1 to the recv_section_cnt because at least one entry in a |
446 | * ring buffer has to be empty. |
447 | */ |
448 | net_device->recv_completion_cnt = net_device->recv_section_cnt + 1; |
449 | ret = netvsc_alloc_recv_comp_ring(net_device, q_idx: 0); |
450 | if (ret) |
451 | goto cleanup; |
452 | |
453 | /* Now setup the send buffer. */ |
454 | buf_size = device_info->send_sections * device_info->send_section_size; |
455 | buf_size = round_up(buf_size, PAGE_SIZE); |
456 | |
457 | net_device->send_buf = vzalloc(size: buf_size); |
458 | if (!net_device->send_buf) { |
459 | netdev_err(dev: ndev, format: "unable to allocate send buffer of size %u\n" , |
460 | buf_size); |
461 | ret = -ENOMEM; |
462 | goto cleanup; |
463 | } |
464 | net_device->send_buf_size = buf_size; |
465 | |
466 | /* Establish the gpadl handle for this buffer on this |
467 | * channel. Note: This call uses the vmbus connection rather |
468 | * than the channel to establish the gpadl handle. |
469 | */ |
470 | ret = vmbus_establish_gpadl(channel: device->channel, kbuffer: net_device->send_buf, |
471 | size: buf_size, |
472 | gpadl: &net_device->send_buf_gpadl_handle); |
473 | if (ret != 0) { |
474 | netdev_err(dev: ndev, |
475 | format: "unable to establish send buffer's gpadl\n" ); |
476 | goto cleanup; |
477 | } |
478 | |
479 | /* Notify the NetVsp of the gpadl handle */ |
480 | init_packet = &net_device->channel_init_pkt; |
481 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
482 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; |
483 | init_packet->msg.v1_msg.send_send_buf.gpadl_handle = |
484 | net_device->send_buf_gpadl_handle.gpadl_handle; |
485 | init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID; |
486 | |
487 | trace_nvsp_send(ndev, msg: init_packet); |
488 | |
489 | /* Send the gpadl notification request */ |
490 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
491 | bufferLen: sizeof(struct nvsp_message), |
492 | requestid: (unsigned long)init_packet, |
493 | type: VM_PKT_DATA_INBAND, |
494 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
495 | if (ret != 0) { |
496 | netdev_err(dev: ndev, |
497 | format: "unable to send send buffer's gpadl to netvsp\n" ); |
498 | goto cleanup; |
499 | } |
500 | |
501 | wait_for_completion(&net_device->channel_init_wait); |
502 | |
503 | /* Check the response */ |
504 | if (init_packet->msg.v1_msg. |
505 | send_send_buf_complete.status != NVSP_STAT_SUCCESS) { |
506 | netdev_err(dev: ndev, format: "Unable to complete send buffer " |
507 | "initialization with NetVsp - status %d\n" , |
508 | init_packet->msg.v1_msg. |
509 | send_send_buf_complete.status); |
510 | ret = -EINVAL; |
511 | goto cleanup; |
512 | } |
513 | |
514 | /* Parse the response */ |
515 | net_device->send_section_size = init_packet->msg. |
516 | v1_msg.send_send_buf_complete.section_size; |
517 | if (net_device->send_section_size < NETVSC_MTU_MIN) { |
518 | netdev_err(dev: ndev, format: "invalid send_section_size %u\n" , |
519 | net_device->send_section_size); |
520 | ret = -EINVAL; |
521 | goto cleanup; |
522 | } |
523 | |
524 | /* Section count is simply the size divided by the section size. */ |
525 | net_device->send_section_cnt = buf_size / net_device->send_section_size; |
526 | |
527 | netdev_dbg(ndev, "Send section size: %d, Section count:%d\n" , |
528 | net_device->send_section_size, net_device->send_section_cnt); |
529 | |
530 | /* Setup state for managing the send buffer. */ |
531 | net_device->send_section_map = bitmap_zalloc(nbits: net_device->send_section_cnt, |
532 | GFP_KERNEL); |
533 | if (!net_device->send_section_map) { |
534 | ret = -ENOMEM; |
535 | goto cleanup; |
536 | } |
537 | |
538 | goto exit; |
539 | |
540 | cleanup: |
541 | netvsc_revoke_recv_buf(device, net_device, ndev); |
542 | netvsc_revoke_send_buf(device, net_device, ndev); |
543 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
544 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
545 | |
546 | exit: |
547 | return ret; |
548 | } |
549 | |
550 | /* Negotiate NVSP protocol version */ |
551 | static int negotiate_nvsp_ver(struct hv_device *device, |
552 | struct netvsc_device *net_device, |
553 | struct nvsp_message *init_packet, |
554 | u32 nvsp_ver) |
555 | { |
556 | struct net_device *ndev = hv_get_drvdata(dev: device); |
557 | int ret; |
558 | |
559 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
560 | init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT; |
561 | init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver; |
562 | init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver; |
563 | trace_nvsp_send(ndev, msg: init_packet); |
564 | |
565 | /* Send the init request */ |
566 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
567 | bufferLen: sizeof(struct nvsp_message), |
568 | requestid: (unsigned long)init_packet, |
569 | type: VM_PKT_DATA_INBAND, |
570 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
571 | |
572 | if (ret != 0) |
573 | return ret; |
574 | |
575 | wait_for_completion(&net_device->channel_init_wait); |
576 | |
577 | if (init_packet->msg.init_msg.init_complete.status != |
578 | NVSP_STAT_SUCCESS) |
579 | return -EINVAL; |
580 | |
581 | if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) |
582 | return 0; |
583 | |
584 | /* NVSPv2 or later: Send NDIS config */ |
585 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
586 | init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; |
587 | init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN; |
588 | init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; |
589 | |
590 | if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { |
591 | if (hv_is_isolation_supported()) |
592 | netdev_info(dev: ndev, format: "SR-IOV not advertised by guests on the host supporting isolation\n" ); |
593 | else |
594 | init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; |
595 | |
596 | /* Teaming bit is needed to receive link speed updates */ |
597 | init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; |
598 | } |
599 | |
600 | if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61) |
601 | init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1; |
602 | |
603 | trace_nvsp_send(ndev, msg: init_packet); |
604 | |
605 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
606 | bufferLen: sizeof(struct nvsp_message), |
607 | VMBUS_RQST_ID_NO_RESPONSE, |
608 | type: VM_PKT_DATA_INBAND, flags: 0); |
609 | |
610 | return ret; |
611 | } |
612 | |
613 | static int netvsc_connect_vsp(struct hv_device *device, |
614 | struct netvsc_device *net_device, |
615 | const struct netvsc_device_info *device_info) |
616 | { |
617 | struct net_device *ndev = hv_get_drvdata(dev: device); |
618 | static const u32 ver_list[] = { |
619 | NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, |
620 | NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5, |
621 | NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61 |
622 | }; |
623 | struct nvsp_message *init_packet; |
624 | int ndis_version, i, ret; |
625 | |
626 | init_packet = &net_device->channel_init_pkt; |
627 | |
628 | /* Negotiate the latest NVSP protocol supported */ |
629 | for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--) |
630 | if (negotiate_nvsp_ver(device, net_device, init_packet, |
631 | nvsp_ver: ver_list[i]) == 0) { |
632 | net_device->nvsp_version = ver_list[i]; |
633 | break; |
634 | } |
635 | |
636 | if (i < 0) { |
637 | ret = -EPROTO; |
638 | goto cleanup; |
639 | } |
640 | |
641 | if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) { |
642 | netdev_err(dev: ndev, format: "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n" , |
643 | net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61); |
644 | ret = -EPROTO; |
645 | goto cleanup; |
646 | } |
647 | |
648 | pr_debug("Negotiated NVSP version:%x\n" , net_device->nvsp_version); |
649 | |
650 | /* Send the ndis version */ |
651 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
652 | |
653 | if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) |
654 | ndis_version = 0x00060001; |
655 | else |
656 | ndis_version = 0x0006001e; |
657 | |
658 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; |
659 | init_packet->msg.v1_msg. |
660 | send_ndis_ver.ndis_major_ver = |
661 | (ndis_version & 0xFFFF0000) >> 16; |
662 | init_packet->msg.v1_msg. |
663 | send_ndis_ver.ndis_minor_ver = |
664 | ndis_version & 0xFFFF; |
665 | |
666 | trace_nvsp_send(ndev, msg: init_packet); |
667 | |
668 | /* Send the init request */ |
669 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
670 | bufferLen: sizeof(struct nvsp_message), |
671 | VMBUS_RQST_ID_NO_RESPONSE, |
672 | type: VM_PKT_DATA_INBAND, flags: 0); |
673 | if (ret != 0) |
674 | goto cleanup; |
675 | |
676 | |
677 | ret = netvsc_init_buf(device, net_device, device_info); |
678 | |
679 | cleanup: |
680 | return ret; |
681 | } |
682 | |
683 | /* |
684 | * netvsc_device_remove - Callback when the root bus device is removed |
685 | */ |
686 | void netvsc_device_remove(struct hv_device *device) |
687 | { |
688 | struct net_device *ndev = hv_get_drvdata(dev: device); |
689 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
690 | struct netvsc_device *net_device |
691 | = rtnl_dereference(net_device_ctx->nvdev); |
692 | int i; |
693 | |
694 | /* |
695 | * Revoke receive buffer. If host is pre-Win2016 then tear down |
696 | * receive buffer GPADL. Do the same for send buffer. |
697 | */ |
698 | netvsc_revoke_recv_buf(device, net_device, ndev); |
699 | if (vmbus_proto_version < VERSION_WIN10) |
700 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
701 | |
702 | netvsc_revoke_send_buf(device, net_device, ndev); |
703 | if (vmbus_proto_version < VERSION_WIN10) |
704 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
705 | |
706 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); |
707 | |
708 | /* Disable NAPI and disassociate its context from the device. */ |
709 | for (i = 0; i < net_device->num_chn; i++) { |
710 | /* See also vmbus_reset_channel_cb(). */ |
711 | napi_disable(n: &net_device->chan_table[i].napi); |
712 | netif_napi_del(napi: &net_device->chan_table[i].napi); |
713 | } |
714 | |
715 | /* |
716 | * At this point, no one should be accessing net_device |
717 | * except in here |
718 | */ |
719 | netdev_dbg(ndev, "net device safe to remove\n" ); |
720 | |
721 | /* Now, we can close the channel safely */ |
722 | vmbus_close(channel: device->channel); |
723 | |
724 | /* |
725 | * If host is Win2016 or higher then we do the GPADL tear down |
726 | * here after VMBus is closed. |
727 | */ |
728 | if (vmbus_proto_version >= VERSION_WIN10) { |
729 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
730 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
731 | } |
732 | |
733 | /* Release all resources */ |
734 | free_netvsc_device_rcu(nvdev: net_device); |
735 | } |
736 | |
737 | #define RING_AVAIL_PERCENT_HIWATER 20 |
738 | #define RING_AVAIL_PERCENT_LOWATER 10 |
739 | |
740 | static inline void netvsc_free_send_slot(struct netvsc_device *net_device, |
741 | u32 index) |
742 | { |
743 | sync_change_bit(nr: index, addr: net_device->send_section_map); |
744 | } |
745 | |
746 | static void netvsc_send_tx_complete(struct net_device *ndev, |
747 | struct netvsc_device *net_device, |
748 | struct vmbus_channel *channel, |
749 | const struct vmpacket_descriptor *desc, |
750 | int budget) |
751 | { |
752 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
753 | struct sk_buff *skb; |
754 | u16 q_idx = 0; |
755 | int queue_sends; |
756 | u64 cmd_rqst; |
757 | |
758 | cmd_rqst = channel->request_addr_callback(channel, desc->trans_id); |
759 | if (cmd_rqst == VMBUS_RQST_ERROR) { |
760 | netdev_err(dev: ndev, format: "Invalid transaction ID %llx\n" , desc->trans_id); |
761 | return; |
762 | } |
763 | |
764 | skb = (struct sk_buff *)(unsigned long)cmd_rqst; |
765 | |
766 | /* Notify the layer above us */ |
767 | if (likely(skb)) { |
768 | struct hv_netvsc_packet *packet |
769 | = (struct hv_netvsc_packet *)skb->cb; |
770 | u32 send_index = packet->send_buf_index; |
771 | struct netvsc_stats_tx *tx_stats; |
772 | |
773 | if (send_index != NETVSC_INVALID_INDEX) |
774 | netvsc_free_send_slot(net_device, index: send_index); |
775 | q_idx = packet->q_idx; |
776 | |
777 | tx_stats = &net_device->chan_table[q_idx].tx_stats; |
778 | |
779 | u64_stats_update_begin(syncp: &tx_stats->syncp); |
780 | tx_stats->packets += packet->total_packets; |
781 | tx_stats->bytes += packet->total_bytes; |
782 | u64_stats_update_end(syncp: &tx_stats->syncp); |
783 | |
784 | netvsc_dma_unmap(hv_dev: ndev_ctx->device_ctx, packet); |
785 | napi_consume_skb(skb, budget); |
786 | } |
787 | |
788 | queue_sends = |
789 | atomic_dec_return(v: &net_device->chan_table[q_idx].queue_sends); |
790 | |
791 | if (unlikely(net_device->destroy)) { |
792 | if (queue_sends == 0) |
793 | wake_up(&net_device->wait_drain); |
794 | } else { |
795 | struct netdev_queue *txq = netdev_get_tx_queue(dev: ndev, index: q_idx); |
796 | |
797 | if (netif_tx_queue_stopped(dev_queue: txq) && !net_device->tx_disable && |
798 | (hv_get_avail_to_write_percent(rbi: &channel->outbound) > |
799 | RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { |
800 | netif_tx_wake_queue(dev_queue: txq); |
801 | ndev_ctx->eth_stats.wake_queue++; |
802 | } |
803 | } |
804 | } |
805 | |
806 | static void netvsc_send_completion(struct net_device *ndev, |
807 | struct netvsc_device *net_device, |
808 | struct vmbus_channel *incoming_channel, |
809 | const struct vmpacket_descriptor *desc, |
810 | int budget) |
811 | { |
812 | const struct nvsp_message *nvsp_packet; |
813 | u32 msglen = hv_pkt_datalen(desc); |
814 | struct nvsp_message *pkt_rqst; |
815 | u64 cmd_rqst; |
816 | u32 status; |
817 | |
818 | /* First check if this is a VMBUS completion without data payload */ |
819 | if (!msglen) { |
820 | cmd_rqst = incoming_channel->request_addr_callback(incoming_channel, |
821 | desc->trans_id); |
822 | if (cmd_rqst == VMBUS_RQST_ERROR) { |
823 | netdev_err(dev: ndev, format: "Invalid transaction ID %llx\n" , desc->trans_id); |
824 | return; |
825 | } |
826 | |
827 | pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst; |
828 | switch (pkt_rqst->hdr.msg_type) { |
829 | case NVSP_MSG4_TYPE_SWITCH_DATA_PATH: |
830 | complete(&net_device->channel_init_wait); |
831 | break; |
832 | |
833 | default: |
834 | netdev_err(dev: ndev, format: "Unexpected VMBUS completion!!\n" ); |
835 | } |
836 | return; |
837 | } |
838 | |
839 | /* Ensure packet is big enough to read header fields */ |
840 | if (msglen < sizeof(struct nvsp_message_header)) { |
841 | netdev_err(dev: ndev, format: "nvsp_message length too small: %u\n" , msglen); |
842 | return; |
843 | } |
844 | |
845 | nvsp_packet = hv_pkt_data(desc); |
846 | switch (nvsp_packet->hdr.msg_type) { |
847 | case NVSP_MSG_TYPE_INIT_COMPLETE: |
848 | if (msglen < sizeof(struct nvsp_message_header) + |
849 | sizeof(struct nvsp_message_init_complete)) { |
850 | netdev_err(dev: ndev, format: "nvsp_msg length too small: %u\n" , |
851 | msglen); |
852 | return; |
853 | } |
854 | break; |
855 | |
856 | case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: |
857 | if (msglen < sizeof(struct nvsp_message_header) + |
858 | sizeof(struct nvsp_1_message_send_receive_buffer_complete)) { |
859 | netdev_err(dev: ndev, format: "nvsp_msg1 length too small: %u\n" , |
860 | msglen); |
861 | return; |
862 | } |
863 | break; |
864 | |
865 | case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: |
866 | if (msglen < sizeof(struct nvsp_message_header) + |
867 | sizeof(struct nvsp_1_message_send_send_buffer_complete)) { |
868 | netdev_err(dev: ndev, format: "nvsp_msg1 length too small: %u\n" , |
869 | msglen); |
870 | return; |
871 | } |
872 | break; |
873 | |
874 | case NVSP_MSG5_TYPE_SUBCHANNEL: |
875 | if (msglen < sizeof(struct nvsp_message_header) + |
876 | sizeof(struct nvsp_5_subchannel_complete)) { |
877 | netdev_err(dev: ndev, format: "nvsp_msg5 length too small: %u\n" , |
878 | msglen); |
879 | return; |
880 | } |
881 | break; |
882 | |
883 | case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: |
884 | if (msglen < sizeof(struct nvsp_message_header) + |
885 | sizeof(struct nvsp_1_message_send_rndis_packet_complete)) { |
886 | if (net_ratelimit()) |
887 | netdev_err(dev: ndev, format: "nvsp_rndis_pkt_complete length too small: %u\n" , |
888 | msglen); |
889 | return; |
890 | } |
891 | |
892 | /* If status indicates an error, output a message so we know |
893 | * there's a problem. But process the completion anyway so the |
894 | * resources are released. |
895 | */ |
896 | status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status; |
897 | if (status != NVSP_STAT_SUCCESS && net_ratelimit()) |
898 | netdev_err(dev: ndev, format: "nvsp_rndis_pkt_complete error status: %x\n" , |
899 | status); |
900 | |
901 | netvsc_send_tx_complete(ndev, net_device, channel: incoming_channel, |
902 | desc, budget); |
903 | return; |
904 | |
905 | default: |
906 | netdev_err(dev: ndev, |
907 | format: "Unknown send completion type %d received!!\n" , |
908 | nvsp_packet->hdr.msg_type); |
909 | return; |
910 | } |
911 | |
912 | /* Copy the response back */ |
913 | memcpy(&net_device->channel_init_pkt, nvsp_packet, |
914 | sizeof(struct nvsp_message)); |
915 | complete(&net_device->channel_init_wait); |
916 | } |
917 | |
918 | static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) |
919 | { |
920 | unsigned long *map_addr = net_device->send_section_map; |
921 | unsigned int i; |
922 | |
923 | for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { |
924 | if (sync_test_and_set_bit(nr: i, addr: map_addr) == 0) |
925 | return i; |
926 | } |
927 | |
928 | return NETVSC_INVALID_INDEX; |
929 | } |
930 | |
931 | static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, |
932 | unsigned int section_index, |
933 | u32 pend_size, |
934 | struct hv_netvsc_packet *packet, |
935 | struct rndis_message *rndis_msg, |
936 | struct hv_page_buffer *pb, |
937 | bool xmit_more) |
938 | { |
939 | char *start = net_device->send_buf; |
940 | char *dest = start + (section_index * net_device->send_section_size) |
941 | + pend_size; |
942 | int i; |
943 | u32 padding = 0; |
944 | u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : |
945 | packet->page_buf_cnt; |
946 | u32 remain; |
947 | |
948 | /* Add padding */ |
949 | remain = packet->total_data_buflen & (net_device->pkt_align - 1); |
950 | if (xmit_more && remain) { |
951 | padding = net_device->pkt_align - remain; |
952 | rndis_msg->msg_len += padding; |
953 | packet->total_data_buflen += padding; |
954 | } |
955 | |
956 | for (i = 0; i < page_count; i++) { |
957 | char *src = phys_to_virt(address: pb[i].pfn << HV_HYP_PAGE_SHIFT); |
958 | u32 offset = pb[i].offset; |
959 | u32 len = pb[i].len; |
960 | |
961 | memcpy(dest, (src + offset), len); |
962 | dest += len; |
963 | } |
964 | |
965 | if (padding) |
966 | memset(dest, 0, padding); |
967 | } |
968 | |
969 | void netvsc_dma_unmap(struct hv_device *hv_dev, |
970 | struct hv_netvsc_packet *packet) |
971 | { |
972 | int i; |
973 | |
974 | if (!hv_is_isolation_supported()) |
975 | return; |
976 | |
977 | if (!packet->dma_range) |
978 | return; |
979 | |
980 | for (i = 0; i < packet->page_buf_cnt; i++) |
981 | dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, |
982 | packet->dma_range[i].mapping_size, |
983 | DMA_TO_DEVICE); |
984 | |
985 | kfree(objp: packet->dma_range); |
986 | } |
987 | |
988 | /* netvsc_dma_map - Map swiotlb bounce buffer with data page of |
989 | * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation |
990 | * VM. |
991 | * |
992 | * In isolation VM, netvsc send buffer has been marked visible to |
993 | * host and so the data copied to send buffer doesn't need to use |
994 | * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer() |
995 | * may not be copied to send buffer and so these pages need to be |
996 | * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do |
997 | * that. The pfns in the struct hv_page_buffer need to be converted |
998 | * to bounce buffer's pfn. The loop here is necessary because the |
999 | * entries in the page buffer array are not necessarily full |
1000 | * pages of data. Each entry in the array has a separate offset and |
1001 | * len that may be non-zero, even for entries in the middle of the |
1002 | * array. And the entries are not physically contiguous. So each |
1003 | * entry must be individually mapped rather than as a contiguous unit. |
1004 | * So not use dma_map_sg() here. |
1005 | */ |
1006 | static int netvsc_dma_map(struct hv_device *hv_dev, |
1007 | struct hv_netvsc_packet *packet, |
1008 | struct hv_page_buffer *pb) |
1009 | { |
1010 | u32 page_count = packet->page_buf_cnt; |
1011 | dma_addr_t dma; |
1012 | int i; |
1013 | |
1014 | if (!hv_is_isolation_supported()) |
1015 | return 0; |
1016 | |
1017 | packet->dma_range = kcalloc(n: page_count, |
1018 | size: sizeof(*packet->dma_range), |
1019 | GFP_ATOMIC); |
1020 | if (!packet->dma_range) |
1021 | return -ENOMEM; |
1022 | |
1023 | for (i = 0; i < page_count; i++) { |
1024 | char *src = phys_to_virt(address: (pb[i].pfn << HV_HYP_PAGE_SHIFT) |
1025 | + pb[i].offset); |
1026 | u32 len = pb[i].len; |
1027 | |
1028 | dma = dma_map_single(&hv_dev->device, src, len, |
1029 | DMA_TO_DEVICE); |
1030 | if (dma_mapping_error(dev: &hv_dev->device, dma_addr: dma)) { |
1031 | kfree(objp: packet->dma_range); |
1032 | return -ENOMEM; |
1033 | } |
1034 | |
1035 | /* pb[].offset and pb[].len are not changed during dma mapping |
1036 | * and so not reassign. |
1037 | */ |
1038 | packet->dma_range[i].dma = dma; |
1039 | packet->dma_range[i].mapping_size = len; |
1040 | pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT; |
1041 | } |
1042 | |
1043 | return 0; |
1044 | } |
1045 | |
1046 | static inline int netvsc_send_pkt( |
1047 | struct hv_device *device, |
1048 | struct hv_netvsc_packet *packet, |
1049 | struct netvsc_device *net_device, |
1050 | struct hv_page_buffer *pb, |
1051 | struct sk_buff *skb) |
1052 | { |
1053 | struct nvsp_message nvmsg; |
1054 | struct nvsp_1_message_send_rndis_packet *rpkt = |
1055 | &nvmsg.msg.v1_msg.send_rndis_pkt; |
1056 | struct netvsc_channel * const nvchan = |
1057 | &net_device->chan_table[packet->q_idx]; |
1058 | struct vmbus_channel *out_channel = nvchan->channel; |
1059 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1060 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1061 | struct netdev_queue *txq = netdev_get_tx_queue(dev: ndev, index: packet->q_idx); |
1062 | u64 req_id; |
1063 | int ret; |
1064 | u32 ring_avail = hv_get_avail_to_write_percent(rbi: &out_channel->outbound); |
1065 | |
1066 | memset(&nvmsg, 0, sizeof(struct nvsp_message)); |
1067 | nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; |
1068 | if (skb) |
1069 | rpkt->channel_type = 0; /* 0 is RMC_DATA */ |
1070 | else |
1071 | rpkt->channel_type = 1; /* 1 is RMC_CONTROL */ |
1072 | |
1073 | rpkt->send_buf_section_index = packet->send_buf_index; |
1074 | if (packet->send_buf_index == NETVSC_INVALID_INDEX) |
1075 | rpkt->send_buf_section_size = 0; |
1076 | else |
1077 | rpkt->send_buf_section_size = packet->total_data_buflen; |
1078 | |
1079 | req_id = (ulong)skb; |
1080 | |
1081 | if (out_channel->rescind) |
1082 | return -ENODEV; |
1083 | |
1084 | trace_nvsp_send_pkt(ndev, chan: out_channel, rpkt); |
1085 | |
1086 | packet->dma_range = NULL; |
1087 | if (packet->page_buf_cnt) { |
1088 | if (packet->cp_partial) |
1089 | pb += packet->rmsg_pgcnt; |
1090 | |
1091 | ret = netvsc_dma_map(hv_dev: ndev_ctx->device_ctx, packet, pb); |
1092 | if (ret) { |
1093 | ret = -EAGAIN; |
1094 | goto exit; |
1095 | } |
1096 | |
1097 | ret = vmbus_sendpacket_pagebuffer(channel: out_channel, |
1098 | pagebuffers: pb, pagecount: packet->page_buf_cnt, |
1099 | buffer: &nvmsg, bufferlen: sizeof(nvmsg), |
1100 | requestid: req_id); |
1101 | |
1102 | if (ret) |
1103 | netvsc_dma_unmap(hv_dev: ndev_ctx->device_ctx, packet); |
1104 | } else { |
1105 | ret = vmbus_sendpacket(channel: out_channel, |
1106 | buffer: &nvmsg, bufferLen: sizeof(nvmsg), |
1107 | requestid: req_id, type: VM_PKT_DATA_INBAND, |
1108 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
1109 | } |
1110 | |
1111 | exit: |
1112 | if (ret == 0) { |
1113 | atomic_inc_return(v: &nvchan->queue_sends); |
1114 | |
1115 | if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { |
1116 | netif_tx_stop_queue(dev_queue: txq); |
1117 | ndev_ctx->eth_stats.stop_queue++; |
1118 | } |
1119 | } else if (ret == -EAGAIN) { |
1120 | netif_tx_stop_queue(dev_queue: txq); |
1121 | ndev_ctx->eth_stats.stop_queue++; |
1122 | } else { |
1123 | netdev_err(dev: ndev, |
1124 | format: "Unable to send packet pages %u len %u, ret %d\n" , |
1125 | packet->page_buf_cnt, packet->total_data_buflen, |
1126 | ret); |
1127 | } |
1128 | |
1129 | if (netif_tx_queue_stopped(dev_queue: txq) && |
1130 | atomic_read(v: &nvchan->queue_sends) < 1 && |
1131 | !net_device->tx_disable) { |
1132 | netif_tx_wake_queue(dev_queue: txq); |
1133 | ndev_ctx->eth_stats.wake_queue++; |
1134 | if (ret == -EAGAIN) |
1135 | ret = -ENOSPC; |
1136 | } |
1137 | |
1138 | return ret; |
1139 | } |
1140 | |
1141 | /* Move packet out of multi send data (msd), and clear msd */ |
1142 | static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send, |
1143 | struct sk_buff **msd_skb, |
1144 | struct multi_send_data *msdp) |
1145 | { |
1146 | *msd_skb = msdp->skb; |
1147 | *msd_send = msdp->pkt; |
1148 | msdp->skb = NULL; |
1149 | msdp->pkt = NULL; |
1150 | msdp->count = 0; |
1151 | } |
1152 | |
1153 | /* RCU already held by caller */ |
1154 | /* Batching/bouncing logic is designed to attempt to optimize |
1155 | * performance. |
1156 | * |
1157 | * For small, non-LSO packets we copy the packet to a send buffer |
1158 | * which is pre-registered with the Hyper-V side. This enables the |
1159 | * hypervisor to avoid remapping the aperture to access the packet |
1160 | * descriptor and data. |
1161 | * |
1162 | * If we already started using a buffer and the netdev is transmitting |
1163 | * a burst of packets, keep on copying into the buffer until it is |
1164 | * full or we are done collecting a burst. If there is an existing |
1165 | * buffer with space for the RNDIS descriptor but not the packet, copy |
1166 | * the RNDIS descriptor to the buffer, keeping the packet in place. |
1167 | * |
1168 | * If we do batching and send more than one packet using a single |
1169 | * NetVSC message, free the SKBs of the packets copied, except for the |
1170 | * last packet. This is done to streamline the handling of the case |
1171 | * where the last packet only had the RNDIS descriptor copied to the |
1172 | * send buffer, with the data pointers included in the NetVSC message. |
1173 | */ |
1174 | int netvsc_send(struct net_device *ndev, |
1175 | struct hv_netvsc_packet *packet, |
1176 | struct rndis_message *rndis_msg, |
1177 | struct hv_page_buffer *pb, |
1178 | struct sk_buff *skb, |
1179 | bool xdp_tx) |
1180 | { |
1181 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1182 | struct netvsc_device *net_device |
1183 | = rcu_dereference_bh(ndev_ctx->nvdev); |
1184 | struct hv_device *device = ndev_ctx->device_ctx; |
1185 | int ret = 0; |
1186 | struct netvsc_channel *nvchan; |
1187 | u32 pktlen = packet->total_data_buflen, msd_len = 0; |
1188 | unsigned int section_index = NETVSC_INVALID_INDEX; |
1189 | struct multi_send_data *msdp; |
1190 | struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; |
1191 | struct sk_buff *msd_skb = NULL; |
1192 | bool try_batch, xmit_more; |
1193 | |
1194 | /* If device is rescinded, return error and packet will get dropped. */ |
1195 | if (unlikely(!net_device || net_device->destroy)) |
1196 | return -ENODEV; |
1197 | |
1198 | nvchan = &net_device->chan_table[packet->q_idx]; |
1199 | packet->send_buf_index = NETVSC_INVALID_INDEX; |
1200 | packet->cp_partial = false; |
1201 | |
1202 | /* Send a control message or XDP packet directly without accessing |
1203 | * msd (Multi-Send Data) field which may be changed during data packet |
1204 | * processing. |
1205 | */ |
1206 | if (!skb || xdp_tx) |
1207 | return netvsc_send_pkt(device, packet, net_device, pb, skb); |
1208 | |
1209 | /* batch packets in send buffer if possible */ |
1210 | msdp = &nvchan->msd; |
1211 | if (msdp->pkt) |
1212 | msd_len = msdp->pkt->total_data_buflen; |
1213 | |
1214 | try_batch = msd_len > 0 && msdp->count < net_device->max_pkt; |
1215 | if (try_batch && msd_len + pktlen + net_device->pkt_align < |
1216 | net_device->send_section_size) { |
1217 | section_index = msdp->pkt->send_buf_index; |
1218 | |
1219 | } else if (try_batch && msd_len + packet->rmsg_size < |
1220 | net_device->send_section_size) { |
1221 | section_index = msdp->pkt->send_buf_index; |
1222 | packet->cp_partial = true; |
1223 | |
1224 | } else if (pktlen + net_device->pkt_align < |
1225 | net_device->send_section_size) { |
1226 | section_index = netvsc_get_next_send_section(net_device); |
1227 | if (unlikely(section_index == NETVSC_INVALID_INDEX)) { |
1228 | ++ndev_ctx->eth_stats.tx_send_full; |
1229 | } else { |
1230 | move_pkt_msd(msd_send: &msd_send, msd_skb: &msd_skb, msdp); |
1231 | msd_len = 0; |
1232 | } |
1233 | } |
1234 | |
1235 | /* Keep aggregating only if stack says more data is coming |
1236 | * and not doing mixed modes send and not flow blocked |
1237 | */ |
1238 | xmit_more = netdev_xmit_more() && |
1239 | !packet->cp_partial && |
1240 | !netif_xmit_stopped(dev_queue: netdev_get_tx_queue(dev: ndev, index: packet->q_idx)); |
1241 | |
1242 | if (section_index != NETVSC_INVALID_INDEX) { |
1243 | netvsc_copy_to_send_buf(net_device, |
1244 | section_index, pend_size: msd_len, |
1245 | packet, rndis_msg, pb, xmit_more); |
1246 | |
1247 | packet->send_buf_index = section_index; |
1248 | |
1249 | if (packet->cp_partial) { |
1250 | packet->page_buf_cnt -= packet->rmsg_pgcnt; |
1251 | packet->total_data_buflen = msd_len + packet->rmsg_size; |
1252 | } else { |
1253 | packet->page_buf_cnt = 0; |
1254 | packet->total_data_buflen += msd_len; |
1255 | } |
1256 | |
1257 | if (msdp->pkt) { |
1258 | packet->total_packets += msdp->pkt->total_packets; |
1259 | packet->total_bytes += msdp->pkt->total_bytes; |
1260 | } |
1261 | |
1262 | if (msdp->skb) |
1263 | dev_consume_skb_any(skb: msdp->skb); |
1264 | |
1265 | if (xmit_more) { |
1266 | msdp->skb = skb; |
1267 | msdp->pkt = packet; |
1268 | msdp->count++; |
1269 | } else { |
1270 | cur_send = packet; |
1271 | msdp->skb = NULL; |
1272 | msdp->pkt = NULL; |
1273 | msdp->count = 0; |
1274 | } |
1275 | } else { |
1276 | move_pkt_msd(msd_send: &msd_send, msd_skb: &msd_skb, msdp); |
1277 | cur_send = packet; |
1278 | } |
1279 | |
1280 | if (msd_send) { |
1281 | int m_ret = netvsc_send_pkt(device, packet: msd_send, net_device, |
1282 | NULL, skb: msd_skb); |
1283 | |
1284 | if (m_ret != 0) { |
1285 | netvsc_free_send_slot(net_device, |
1286 | index: msd_send->send_buf_index); |
1287 | dev_kfree_skb_any(skb: msd_skb); |
1288 | } |
1289 | } |
1290 | |
1291 | if (cur_send) |
1292 | ret = netvsc_send_pkt(device, packet: cur_send, net_device, pb, skb); |
1293 | |
1294 | if (ret != 0 && section_index != NETVSC_INVALID_INDEX) |
1295 | netvsc_free_send_slot(net_device, index: section_index); |
1296 | |
1297 | return ret; |
1298 | } |
1299 | |
1300 | /* Send pending recv completions */ |
1301 | static int send_recv_completions(struct net_device *ndev, |
1302 | struct netvsc_device *nvdev, |
1303 | struct netvsc_channel *nvchan) |
1304 | { |
1305 | struct multi_recv_comp *mrc = &nvchan->mrc; |
1306 | struct recv_comp_msg { |
1307 | struct nvsp_message_header hdr; |
1308 | u32 status; |
1309 | } __packed; |
1310 | struct recv_comp_msg msg = { |
1311 | .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, |
1312 | }; |
1313 | int ret; |
1314 | |
1315 | while (mrc->first != mrc->next) { |
1316 | const struct recv_comp_data *rcd |
1317 | = mrc->slots + mrc->first; |
1318 | |
1319 | msg.status = rcd->status; |
1320 | ret = vmbus_sendpacket(channel: nvchan->channel, buffer: &msg, bufferLen: sizeof(msg), |
1321 | requestid: rcd->tid, type: VM_PKT_COMP, flags: 0); |
1322 | if (unlikely(ret)) { |
1323 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1324 | |
1325 | ++ndev_ctx->eth_stats.rx_comp_busy; |
1326 | return ret; |
1327 | } |
1328 | |
1329 | if (++mrc->first == nvdev->recv_completion_cnt) |
1330 | mrc->first = 0; |
1331 | } |
1332 | |
1333 | /* receive completion ring has been emptied */ |
1334 | if (unlikely(nvdev->destroy)) |
1335 | wake_up(&nvdev->wait_drain); |
1336 | |
1337 | return 0; |
1338 | } |
1339 | |
1340 | /* Count how many receive completions are outstanding */ |
1341 | static void recv_comp_slot_avail(const struct netvsc_device *nvdev, |
1342 | const struct multi_recv_comp *mrc, |
1343 | u32 *filled, u32 *avail) |
1344 | { |
1345 | u32 count = nvdev->recv_completion_cnt; |
1346 | |
1347 | if (mrc->next >= mrc->first) |
1348 | *filled = mrc->next - mrc->first; |
1349 | else |
1350 | *filled = (count - mrc->first) + mrc->next; |
1351 | |
1352 | *avail = count - *filled - 1; |
1353 | } |
1354 | |
1355 | /* Add receive complete to ring to send to host. */ |
1356 | static void enq_receive_complete(struct net_device *ndev, |
1357 | struct netvsc_device *nvdev, u16 q_idx, |
1358 | u64 tid, u32 status) |
1359 | { |
1360 | struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx]; |
1361 | struct multi_recv_comp *mrc = &nvchan->mrc; |
1362 | struct recv_comp_data *rcd; |
1363 | u32 filled, avail; |
1364 | |
1365 | recv_comp_slot_avail(nvdev, mrc, filled: &filled, avail: &avail); |
1366 | |
1367 | if (unlikely(filled > NAPI_POLL_WEIGHT)) { |
1368 | send_recv_completions(ndev, nvdev, nvchan); |
1369 | recv_comp_slot_avail(nvdev, mrc, filled: &filled, avail: &avail); |
1370 | } |
1371 | |
1372 | if (unlikely(!avail)) { |
1373 | netdev_err(dev: ndev, format: "Recv_comp full buf q:%hd, tid:%llx\n" , |
1374 | q_idx, tid); |
1375 | return; |
1376 | } |
1377 | |
1378 | rcd = mrc->slots + mrc->next; |
1379 | rcd->tid = tid; |
1380 | rcd->status = status; |
1381 | |
1382 | if (++mrc->next == nvdev->recv_completion_cnt) |
1383 | mrc->next = 0; |
1384 | } |
1385 | |
1386 | static int netvsc_receive(struct net_device *ndev, |
1387 | struct netvsc_device *net_device, |
1388 | struct netvsc_channel *nvchan, |
1389 | const struct vmpacket_descriptor *desc) |
1390 | { |
1391 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1392 | struct vmbus_channel *channel = nvchan->channel; |
1393 | const struct vmtransfer_page_packet_header *vmxferpage_packet |
1394 | = container_of(desc, const struct vmtransfer_page_packet_header, d); |
1395 | const struct nvsp_message *nvsp = hv_pkt_data(desc); |
1396 | u32 msglen = hv_pkt_datalen(desc); |
1397 | u16 q_idx = channel->offermsg.offer.sub_channel_index; |
1398 | char *recv_buf = net_device->recv_buf; |
1399 | u32 status = NVSP_STAT_SUCCESS; |
1400 | int i; |
1401 | int count = 0; |
1402 | |
1403 | /* Ensure packet is big enough to read header fields */ |
1404 | if (msglen < sizeof(struct nvsp_message_header)) { |
1405 | netif_err(net_device_ctx, rx_err, ndev, |
1406 | "invalid nvsp header, length too small: %u\n" , |
1407 | msglen); |
1408 | return 0; |
1409 | } |
1410 | |
1411 | /* Make sure this is a valid nvsp packet */ |
1412 | if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { |
1413 | netif_err(net_device_ctx, rx_err, ndev, |
1414 | "Unknown nvsp packet type received %u\n" , |
1415 | nvsp->hdr.msg_type); |
1416 | return 0; |
1417 | } |
1418 | |
1419 | /* Validate xfer page pkt header */ |
1420 | if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) { |
1421 | netif_err(net_device_ctx, rx_err, ndev, |
1422 | "Invalid xfer page pkt, offset too small: %u\n" , |
1423 | desc->offset8 << 3); |
1424 | return 0; |
1425 | } |
1426 | |
1427 | if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { |
1428 | netif_err(net_device_ctx, rx_err, ndev, |
1429 | "Invalid xfer page set id - expecting %x got %x\n" , |
1430 | NETVSC_RECEIVE_BUFFER_ID, |
1431 | vmxferpage_packet->xfer_pageset_id); |
1432 | return 0; |
1433 | } |
1434 | |
1435 | count = vmxferpage_packet->range_cnt; |
1436 | |
1437 | /* Check count for a valid value */ |
1438 | if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) { |
1439 | netif_err(net_device_ctx, rx_err, ndev, |
1440 | "Range count is not valid: %d\n" , |
1441 | count); |
1442 | return 0; |
1443 | } |
1444 | |
1445 | /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ |
1446 | for (i = 0; i < count; i++) { |
1447 | u32 offset = vmxferpage_packet->ranges[i].byte_offset; |
1448 | u32 buflen = vmxferpage_packet->ranges[i].byte_count; |
1449 | void *data; |
1450 | int ret; |
1451 | |
1452 | if (unlikely(offset > net_device->recv_buf_size || |
1453 | buflen > net_device->recv_buf_size - offset)) { |
1454 | nvchan->rsc.cnt = 0; |
1455 | status = NVSP_STAT_FAIL; |
1456 | netif_err(net_device_ctx, rx_err, ndev, |
1457 | "Packet offset:%u + len:%u too big\n" , |
1458 | offset, buflen); |
1459 | |
1460 | continue; |
1461 | } |
1462 | |
1463 | /* We're going to copy (sections of) the packet into nvchan->recv_buf; |
1464 | * make sure that nvchan->recv_buf is large enough to hold the packet. |
1465 | */ |
1466 | if (unlikely(buflen > net_device->recv_section_size)) { |
1467 | nvchan->rsc.cnt = 0; |
1468 | status = NVSP_STAT_FAIL; |
1469 | netif_err(net_device_ctx, rx_err, ndev, |
1470 | "Packet too big: buflen=%u recv_section_size=%u\n" , |
1471 | buflen, net_device->recv_section_size); |
1472 | |
1473 | continue; |
1474 | } |
1475 | |
1476 | data = recv_buf + offset; |
1477 | |
1478 | nvchan->rsc.is_last = (i == count - 1); |
1479 | |
1480 | trace_rndis_recv(ndev, q: q_idx, msg: data); |
1481 | |
1482 | /* Pass it to the upper layer */ |
1483 | ret = rndis_filter_receive(ndev, net_dev: net_device, |
1484 | nvchan, data, buflen); |
1485 | |
1486 | if (unlikely(ret != NVSP_STAT_SUCCESS)) { |
1487 | /* Drop incomplete packet */ |
1488 | nvchan->rsc.cnt = 0; |
1489 | status = NVSP_STAT_FAIL; |
1490 | } |
1491 | } |
1492 | |
1493 | enq_receive_complete(ndev, nvdev: net_device, q_idx, |
1494 | tid: vmxferpage_packet->d.trans_id, status); |
1495 | |
1496 | return count; |
1497 | } |
1498 | |
1499 | static void netvsc_send_table(struct net_device *ndev, |
1500 | struct netvsc_device *nvscdev, |
1501 | const struct nvsp_message *nvmsg, |
1502 | u32 msglen) |
1503 | { |
1504 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1505 | u32 count, offset, *tab; |
1506 | int i; |
1507 | |
1508 | /* Ensure packet is big enough to read send_table fields */ |
1509 | if (msglen < sizeof(struct nvsp_message_header) + |
1510 | sizeof(struct nvsp_5_send_indirect_table)) { |
1511 | netdev_err(dev: ndev, format: "nvsp_v5_msg length too small: %u\n" , msglen); |
1512 | return; |
1513 | } |
1514 | |
1515 | count = nvmsg->msg.v5_msg.send_table.count; |
1516 | offset = nvmsg->msg.v5_msg.send_table.offset; |
1517 | |
1518 | if (count != VRSS_SEND_TAB_SIZE) { |
1519 | netdev_err(dev: ndev, format: "Received wrong send-table size:%u\n" , count); |
1520 | return; |
1521 | } |
1522 | |
1523 | /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be |
1524 | * wrong due to a host bug. So fix the offset here. |
1525 | */ |
1526 | if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 && |
1527 | msglen >= sizeof(struct nvsp_message_header) + |
1528 | sizeof(union nvsp_6_message_uber) + count * sizeof(u32)) |
1529 | offset = sizeof(struct nvsp_message_header) + |
1530 | sizeof(union nvsp_6_message_uber); |
1531 | |
1532 | /* Boundary check for all versions */ |
1533 | if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) { |
1534 | netdev_err(dev: ndev, format: "Received send-table offset too big:%u\n" , |
1535 | offset); |
1536 | return; |
1537 | } |
1538 | |
1539 | tab = (void *)nvmsg + offset; |
1540 | |
1541 | for (i = 0; i < count; i++) |
1542 | net_device_ctx->tx_table[i] = tab[i]; |
1543 | } |
1544 | |
1545 | static void netvsc_send_vf(struct net_device *ndev, |
1546 | const struct nvsp_message *nvmsg, |
1547 | u32 msglen) |
1548 | { |
1549 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1550 | |
1551 | /* Ensure packet is big enough to read its fields */ |
1552 | if (msglen < sizeof(struct nvsp_message_header) + |
1553 | sizeof(struct nvsp_4_send_vf_association)) { |
1554 | netdev_err(dev: ndev, format: "nvsp_v4_msg length too small: %u\n" , msglen); |
1555 | return; |
1556 | } |
1557 | |
1558 | net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; |
1559 | net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; |
1560 | |
1561 | if (net_device_ctx->vf_alloc) |
1562 | complete(&net_device_ctx->vf_add); |
1563 | |
1564 | netdev_info(dev: ndev, format: "VF slot %u %s\n" , |
1565 | net_device_ctx->vf_serial, |
1566 | net_device_ctx->vf_alloc ? "added" : "removed" ); |
1567 | } |
1568 | |
1569 | static void netvsc_receive_inband(struct net_device *ndev, |
1570 | struct netvsc_device *nvscdev, |
1571 | const struct vmpacket_descriptor *desc) |
1572 | { |
1573 | const struct nvsp_message *nvmsg = hv_pkt_data(desc); |
1574 | u32 msglen = hv_pkt_datalen(desc); |
1575 | |
1576 | /* Ensure packet is big enough to read header fields */ |
1577 | if (msglen < sizeof(struct nvsp_message_header)) { |
1578 | netdev_err(dev: ndev, format: "inband nvsp_message length too small: %u\n" , msglen); |
1579 | return; |
1580 | } |
1581 | |
1582 | switch (nvmsg->hdr.msg_type) { |
1583 | case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: |
1584 | netvsc_send_table(ndev, nvscdev, nvmsg, msglen); |
1585 | break; |
1586 | |
1587 | case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: |
1588 | if (hv_is_isolation_supported()) |
1589 | netdev_err(dev: ndev, format: "Ignore VF_ASSOCIATION msg from the host supporting isolation\n" ); |
1590 | else |
1591 | netvsc_send_vf(ndev, nvmsg, msglen); |
1592 | break; |
1593 | } |
1594 | } |
1595 | |
1596 | static int netvsc_process_raw_pkt(struct hv_device *device, |
1597 | struct netvsc_channel *nvchan, |
1598 | struct netvsc_device *net_device, |
1599 | struct net_device *ndev, |
1600 | const struct vmpacket_descriptor *desc, |
1601 | int budget) |
1602 | { |
1603 | struct vmbus_channel *channel = nvchan->channel; |
1604 | const struct nvsp_message *nvmsg = hv_pkt_data(desc); |
1605 | |
1606 | trace_nvsp_recv(ndev, chan: channel, msg: nvmsg); |
1607 | |
1608 | switch (desc->type) { |
1609 | case VM_PKT_COMP: |
1610 | netvsc_send_completion(ndev, net_device, incoming_channel: channel, desc, budget); |
1611 | break; |
1612 | |
1613 | case VM_PKT_DATA_USING_XFER_PAGES: |
1614 | return netvsc_receive(ndev, net_device, nvchan, desc); |
1615 | |
1616 | case VM_PKT_DATA_INBAND: |
1617 | netvsc_receive_inband(ndev, nvscdev: net_device, desc); |
1618 | break; |
1619 | |
1620 | default: |
1621 | netdev_err(dev: ndev, format: "unhandled packet type %d, tid %llx\n" , |
1622 | desc->type, desc->trans_id); |
1623 | break; |
1624 | } |
1625 | |
1626 | return 0; |
1627 | } |
1628 | |
1629 | static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel) |
1630 | { |
1631 | struct vmbus_channel *primary = channel->primary_channel; |
1632 | |
1633 | return primary ? primary->device_obj : channel->device_obj; |
1634 | } |
1635 | |
1636 | /* Network processing softirq |
1637 | * Process data in incoming ring buffer from host |
1638 | * Stops when ring is empty or budget is met or exceeded. |
1639 | */ |
1640 | int netvsc_poll(struct napi_struct *napi, int budget) |
1641 | { |
1642 | struct netvsc_channel *nvchan |
1643 | = container_of(napi, struct netvsc_channel, napi); |
1644 | struct netvsc_device *net_device = nvchan->net_device; |
1645 | struct vmbus_channel *channel = nvchan->channel; |
1646 | struct hv_device *device = netvsc_channel_to_device(channel); |
1647 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1648 | int work_done = 0; |
1649 | int ret; |
1650 | |
1651 | /* If starting a new interval */ |
1652 | if (!nvchan->desc) |
1653 | nvchan->desc = hv_pkt_iter_first(channel); |
1654 | |
1655 | nvchan->xdp_flush = false; |
1656 | |
1657 | while (nvchan->desc && work_done < budget) { |
1658 | work_done += netvsc_process_raw_pkt(device, nvchan, net_device, |
1659 | ndev, desc: nvchan->desc, budget); |
1660 | nvchan->desc = hv_pkt_iter_next(channel, pkt: nvchan->desc); |
1661 | } |
1662 | |
1663 | if (nvchan->xdp_flush) |
1664 | xdp_do_flush(); |
1665 | |
1666 | /* Send any pending receive completions */ |
1667 | ret = send_recv_completions(ndev, nvdev: net_device, nvchan); |
1668 | |
1669 | /* If it did not exhaust NAPI budget this time |
1670 | * and not doing busy poll |
1671 | * then re-enable host interrupts |
1672 | * and reschedule if ring is not empty |
1673 | * or sending receive completion failed. |
1674 | */ |
1675 | if (work_done < budget && |
1676 | napi_complete_done(n: napi, work_done) && |
1677 | (ret || hv_end_read(rbi: &channel->inbound)) && |
1678 | napi_schedule_prep(n: napi)) { |
1679 | hv_begin_read(rbi: &channel->inbound); |
1680 | __napi_schedule(n: napi); |
1681 | } |
1682 | |
1683 | /* Driver may overshoot since multiple packets per descriptor */ |
1684 | return min(work_done, budget); |
1685 | } |
1686 | |
1687 | /* Call back when data is available in host ring buffer. |
1688 | * Processing is deferred until network softirq (NAPI) |
1689 | */ |
1690 | void netvsc_channel_cb(void *context) |
1691 | { |
1692 | struct netvsc_channel *nvchan = context; |
1693 | struct vmbus_channel *channel = nvchan->channel; |
1694 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1695 | |
1696 | /* preload first vmpacket descriptor */ |
1697 | prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index); |
1698 | |
1699 | if (napi_schedule_prep(n: &nvchan->napi)) { |
1700 | /* disable interrupts from host */ |
1701 | hv_begin_read(rbi); |
1702 | |
1703 | __napi_schedule_irqoff(n: &nvchan->napi); |
1704 | } |
1705 | } |
1706 | |
1707 | /* |
1708 | * netvsc_device_add - Callback when the device belonging to this |
1709 | * driver is added |
1710 | */ |
1711 | struct netvsc_device *netvsc_device_add(struct hv_device *device, |
1712 | const struct netvsc_device_info *device_info) |
1713 | { |
1714 | int i, ret = 0; |
1715 | struct netvsc_device *net_device; |
1716 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1717 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1718 | |
1719 | net_device = alloc_net_device(); |
1720 | if (!net_device) |
1721 | return ERR_PTR(error: -ENOMEM); |
1722 | |
1723 | for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) |
1724 | net_device_ctx->tx_table[i] = 0; |
1725 | |
1726 | /* Because the device uses NAPI, all the interrupt batching and |
1727 | * control is done via Net softirq, not the channel handling |
1728 | */ |
1729 | set_channel_read_mode(c: device->channel, mode: HV_CALL_ISR); |
1730 | |
1731 | /* If we're reopening the device we may have multiple queues, fill the |
1732 | * chn_table with the default channel to use it before subchannels are |
1733 | * opened. |
1734 | * Initialize the channel state before we open; |
1735 | * we can be interrupted as soon as we open the channel. |
1736 | */ |
1737 | |
1738 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
1739 | struct netvsc_channel *nvchan = &net_device->chan_table[i]; |
1740 | |
1741 | nvchan->channel = device->channel; |
1742 | nvchan->net_device = net_device; |
1743 | u64_stats_init(syncp: &nvchan->tx_stats.syncp); |
1744 | u64_stats_init(syncp: &nvchan->rx_stats.syncp); |
1745 | |
1746 | ret = xdp_rxq_info_reg(xdp_rxq: &nvchan->xdp_rxq, dev: ndev, queue_index: i, napi_id: 0); |
1747 | |
1748 | if (ret) { |
1749 | netdev_err(dev: ndev, format: "xdp_rxq_info_reg fail: %d\n" , ret); |
1750 | goto cleanup2; |
1751 | } |
1752 | |
1753 | ret = xdp_rxq_info_reg_mem_model(xdp_rxq: &nvchan->xdp_rxq, |
1754 | type: MEM_TYPE_PAGE_SHARED, NULL); |
1755 | |
1756 | if (ret) { |
1757 | netdev_err(dev: ndev, format: "xdp reg_mem_model fail: %d\n" , ret); |
1758 | goto cleanup2; |
1759 | } |
1760 | } |
1761 | |
1762 | /* Enable NAPI handler before init callbacks */ |
1763 | netif_napi_add(dev: ndev, napi: &net_device->chan_table[0].napi, poll: netvsc_poll); |
1764 | |
1765 | /* Open the channel */ |
1766 | device->channel->next_request_id_callback = vmbus_next_request_id; |
1767 | device->channel->request_addr_callback = vmbus_request_addr; |
1768 | device->channel->rqstor_size = netvsc_rqstor_size(ringbytes: netvsc_ring_bytes); |
1769 | device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE; |
1770 | |
1771 | ret = vmbus_open(channel: device->channel, send_ringbuffersize: netvsc_ring_bytes, |
1772 | recv_ringbuffersize: netvsc_ring_bytes, NULL, userdatalen: 0, |
1773 | onchannel_callback: netvsc_channel_cb, context: net_device->chan_table); |
1774 | |
1775 | if (ret != 0) { |
1776 | netdev_err(dev: ndev, format: "unable to open channel: %d\n" , ret); |
1777 | goto cleanup; |
1778 | } |
1779 | |
1780 | /* Channel is opened */ |
1781 | netdev_dbg(ndev, "hv_netvsc channel opened successfully\n" ); |
1782 | |
1783 | napi_enable(n: &net_device->chan_table[0].napi); |
1784 | |
1785 | /* Connect with the NetVsp */ |
1786 | ret = netvsc_connect_vsp(device, net_device, device_info); |
1787 | if (ret != 0) { |
1788 | netdev_err(dev: ndev, |
1789 | format: "unable to connect to NetVSP - %d\n" , ret); |
1790 | goto close; |
1791 | } |
1792 | |
1793 | /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is |
1794 | * populated. |
1795 | */ |
1796 | rcu_assign_pointer(net_device_ctx->nvdev, net_device); |
1797 | |
1798 | return net_device; |
1799 | |
1800 | close: |
1801 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); |
1802 | napi_disable(n: &net_device->chan_table[0].napi); |
1803 | |
1804 | /* Now, we can close the channel safely */ |
1805 | vmbus_close(channel: device->channel); |
1806 | |
1807 | cleanup: |
1808 | netif_napi_del(napi: &net_device->chan_table[0].napi); |
1809 | |
1810 | cleanup2: |
1811 | free_netvsc_device(head: &net_device->rcu); |
1812 | |
1813 | return ERR_PTR(error: ret); |
1814 | } |
1815 | |