1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> |
7 | * Hank Janssen <hjanssen@microsoft.com> |
8 | */ |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | |
11 | #include <linux/kernel.h> |
12 | #include <linux/sched.h> |
13 | #include <linux/wait.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/io.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/netdevice.h> |
19 | #include <linux/if_ether.h> |
20 | #include <linux/vmalloc.h> |
21 | #include <linux/rtnetlink.h> |
22 | #include <linux/prefetch.h> |
23 | #include <linux/filter.h> |
24 | |
25 | #include <asm/sync_bitops.h> |
26 | #include <asm/mshyperv.h> |
27 | |
28 | #include "hyperv_net.h" |
29 | #include "netvsc_trace.h" |
30 | |
31 | /* |
32 | * Switch the data path from the synthetic interface to the VF |
33 | * interface. |
34 | */ |
35 | int netvsc_switch_datapath(struct net_device *ndev, bool vf) |
36 | { |
37 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
38 | struct hv_device *dev = net_device_ctx->device_ctx; |
39 | struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); |
40 | struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; |
41 | int ret, retry = 0; |
42 | |
43 | /* Block sending traffic to VF if it's about to be gone */ |
44 | if (!vf) |
45 | net_device_ctx->data_path_is_vf = vf; |
46 | |
47 | memset(init_pkt, 0, sizeof(struct nvsp_message)); |
48 | init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; |
49 | if (vf) |
50 | init_pkt->msg.v4_msg.active_dp.active_datapath = |
51 | NVSP_DATAPATH_VF; |
52 | else |
53 | init_pkt->msg.v4_msg.active_dp.active_datapath = |
54 | NVSP_DATAPATH_SYNTHETIC; |
55 | |
56 | again: |
57 | trace_nvsp_send(ndev, msg: init_pkt); |
58 | |
59 | ret = vmbus_sendpacket(channel: dev->channel, buffer: init_pkt, |
60 | bufferLen: sizeof(struct nvsp_message), |
61 | requestid: (unsigned long)init_pkt, type: VM_PKT_DATA_INBAND, |
62 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
63 | |
64 | /* If failed to switch to/from VF, let data_path_is_vf stay false, |
65 | * so we use synthetic path to send data. |
66 | */ |
67 | if (ret) { |
68 | if (ret != -EAGAIN) { |
69 | netdev_err(dev: ndev, |
70 | format: "Unable to send sw datapath msg, err: %d\n" , |
71 | ret); |
72 | return ret; |
73 | } |
74 | |
75 | if (retry++ < RETRY_MAX) { |
76 | usleep_range(RETRY_US_LO, RETRY_US_HI); |
77 | goto again; |
78 | } else { |
79 | netdev_err( |
80 | dev: ndev, |
81 | format: "Retry failed to send sw datapath msg, err: %d\n" , |
82 | ret); |
83 | return ret; |
84 | } |
85 | } |
86 | |
87 | wait_for_completion(&nv_dev->channel_init_wait); |
88 | net_device_ctx->data_path_is_vf = vf; |
89 | |
90 | return 0; |
91 | } |
92 | |
93 | /* Worker to setup sub channels on initial setup |
94 | * Initial hotplug event occurs in softirq context |
95 | * and can't wait for channels. |
96 | */ |
97 | static void netvsc_subchan_work(struct work_struct *w) |
98 | { |
99 | struct netvsc_device *nvdev = |
100 | container_of(w, struct netvsc_device, subchan_work); |
101 | struct rndis_device *rdev; |
102 | int i, ret; |
103 | |
104 | /* Avoid deadlock with device removal already under RTNL */ |
105 | if (!rtnl_trylock()) { |
106 | schedule_work(work: w); |
107 | return; |
108 | } |
109 | |
110 | rdev = nvdev->extension; |
111 | if (rdev) { |
112 | ret = rndis_set_subchannel(ndev: rdev->ndev, nvdev, NULL); |
113 | if (ret == 0) { |
114 | netif_device_attach(dev: rdev->ndev); |
115 | } else { |
116 | /* fallback to only primary channel */ |
117 | for (i = 1; i < nvdev->num_chn; i++) |
118 | netif_napi_del(napi: &nvdev->chan_table[i].napi); |
119 | |
120 | nvdev->max_chn = 1; |
121 | nvdev->num_chn = 1; |
122 | } |
123 | } |
124 | |
125 | rtnl_unlock(); |
126 | } |
127 | |
128 | static struct netvsc_device *alloc_net_device(void) |
129 | { |
130 | struct netvsc_device *net_device; |
131 | |
132 | net_device = kzalloc(size: sizeof(struct netvsc_device), GFP_KERNEL); |
133 | if (!net_device) |
134 | return NULL; |
135 | |
136 | init_waitqueue_head(&net_device->wait_drain); |
137 | net_device->destroy = false; |
138 | net_device->tx_disable = true; |
139 | |
140 | net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; |
141 | net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; |
142 | |
143 | init_completion(x: &net_device->channel_init_wait); |
144 | init_waitqueue_head(&net_device->subchan_open); |
145 | INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); |
146 | |
147 | return net_device; |
148 | } |
149 | |
150 | static void free_netvsc_device(struct rcu_head *head) |
151 | { |
152 | struct netvsc_device *nvdev |
153 | = container_of(head, struct netvsc_device, rcu); |
154 | int i; |
155 | |
156 | kfree(objp: nvdev->extension); |
157 | |
158 | if (!nvdev->recv_buf_gpadl_handle.decrypted) |
159 | vfree(addr: nvdev->recv_buf); |
160 | if (!nvdev->send_buf_gpadl_handle.decrypted) |
161 | vfree(addr: nvdev->send_buf); |
162 | bitmap_free(bitmap: nvdev->send_section_map); |
163 | |
164 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
165 | xdp_rxq_info_unreg(xdp_rxq: &nvdev->chan_table[i].xdp_rxq); |
166 | kfree(objp: nvdev->chan_table[i].recv_buf); |
167 | vfree(addr: nvdev->chan_table[i].mrc.slots); |
168 | } |
169 | |
170 | kfree(objp: nvdev); |
171 | } |
172 | |
173 | static void free_netvsc_device_rcu(struct netvsc_device *nvdev) |
174 | { |
175 | call_rcu(head: &nvdev->rcu, func: free_netvsc_device); |
176 | } |
177 | |
178 | static void netvsc_revoke_recv_buf(struct hv_device *device, |
179 | struct netvsc_device *net_device, |
180 | struct net_device *ndev) |
181 | { |
182 | struct nvsp_message *revoke_packet; |
183 | int ret; |
184 | |
185 | /* |
186 | * If we got a section count, it means we received a |
187 | * SendReceiveBufferComplete msg (ie sent |
188 | * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need |
189 | * to send a revoke msg here |
190 | */ |
191 | if (net_device->recv_section_cnt) { |
192 | /* Send the revoke receive buffer */ |
193 | revoke_packet = &net_device->revoke_packet; |
194 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); |
195 | |
196 | revoke_packet->hdr.msg_type = |
197 | NVSP_MSG1_TYPE_REVOKE_RECV_BUF; |
198 | revoke_packet->msg.v1_msg. |
199 | revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; |
200 | |
201 | trace_nvsp_send(ndev, msg: revoke_packet); |
202 | |
203 | ret = vmbus_sendpacket(channel: device->channel, |
204 | buffer: revoke_packet, |
205 | bufferLen: sizeof(struct nvsp_message), |
206 | VMBUS_RQST_ID_NO_RESPONSE, |
207 | type: VM_PKT_DATA_INBAND, flags: 0); |
208 | /* If the failure is because the channel is rescinded; |
209 | * ignore the failure since we cannot send on a rescinded |
210 | * channel. This would allow us to properly cleanup |
211 | * even when the channel is rescinded. |
212 | */ |
213 | if (device->channel->rescind) |
214 | ret = 0; |
215 | /* |
216 | * If we failed here, we might as well return and |
217 | * have a leak rather than continue and a bugchk |
218 | */ |
219 | if (ret != 0) { |
220 | netdev_err(dev: ndev, format: "unable to send " |
221 | "revoke receive buffer to netvsp\n" ); |
222 | return; |
223 | } |
224 | net_device->recv_section_cnt = 0; |
225 | } |
226 | } |
227 | |
228 | static void netvsc_revoke_send_buf(struct hv_device *device, |
229 | struct netvsc_device *net_device, |
230 | struct net_device *ndev) |
231 | { |
232 | struct nvsp_message *revoke_packet; |
233 | int ret; |
234 | |
235 | /* Deal with the send buffer we may have setup. |
236 | * If we got a send section size, it means we received a |
237 | * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent |
238 | * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need |
239 | * to send a revoke msg here |
240 | */ |
241 | if (net_device->send_section_cnt) { |
242 | /* Send the revoke receive buffer */ |
243 | revoke_packet = &net_device->revoke_packet; |
244 | memset(revoke_packet, 0, sizeof(struct nvsp_message)); |
245 | |
246 | revoke_packet->hdr.msg_type = |
247 | NVSP_MSG1_TYPE_REVOKE_SEND_BUF; |
248 | revoke_packet->msg.v1_msg.revoke_send_buf.id = |
249 | NETVSC_SEND_BUFFER_ID; |
250 | |
251 | trace_nvsp_send(ndev, msg: revoke_packet); |
252 | |
253 | ret = vmbus_sendpacket(channel: device->channel, |
254 | buffer: revoke_packet, |
255 | bufferLen: sizeof(struct nvsp_message), |
256 | VMBUS_RQST_ID_NO_RESPONSE, |
257 | type: VM_PKT_DATA_INBAND, flags: 0); |
258 | |
259 | /* If the failure is because the channel is rescinded; |
260 | * ignore the failure since we cannot send on a rescinded |
261 | * channel. This would allow us to properly cleanup |
262 | * even when the channel is rescinded. |
263 | */ |
264 | if (device->channel->rescind) |
265 | ret = 0; |
266 | |
267 | /* If we failed here, we might as well return and |
268 | * have a leak rather than continue and a bugchk |
269 | */ |
270 | if (ret != 0) { |
271 | netdev_err(dev: ndev, format: "unable to send " |
272 | "revoke send buffer to netvsp\n" ); |
273 | return; |
274 | } |
275 | net_device->send_section_cnt = 0; |
276 | } |
277 | } |
278 | |
279 | static void netvsc_teardown_recv_gpadl(struct hv_device *device, |
280 | struct netvsc_device *net_device, |
281 | struct net_device *ndev) |
282 | { |
283 | int ret; |
284 | |
285 | if (net_device->recv_buf_gpadl_handle.gpadl_handle) { |
286 | ret = vmbus_teardown_gpadl(channel: device->channel, |
287 | gpadl: &net_device->recv_buf_gpadl_handle); |
288 | |
289 | /* If we failed here, we might as well return and have a leak |
290 | * rather than continue and a bugchk |
291 | */ |
292 | if (ret != 0) { |
293 | netdev_err(dev: ndev, |
294 | format: "unable to teardown receive buffer's gpadl\n" ); |
295 | return; |
296 | } |
297 | } |
298 | } |
299 | |
300 | static void netvsc_teardown_send_gpadl(struct hv_device *device, |
301 | struct netvsc_device *net_device, |
302 | struct net_device *ndev) |
303 | { |
304 | int ret; |
305 | |
306 | if (net_device->send_buf_gpadl_handle.gpadl_handle) { |
307 | ret = vmbus_teardown_gpadl(channel: device->channel, |
308 | gpadl: &net_device->send_buf_gpadl_handle); |
309 | |
310 | /* If we failed here, we might as well return and have a leak |
311 | * rather than continue and a bugchk |
312 | */ |
313 | if (ret != 0) { |
314 | netdev_err(dev: ndev, |
315 | format: "unable to teardown send buffer's gpadl\n" ); |
316 | return; |
317 | } |
318 | } |
319 | } |
320 | |
321 | int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx) |
322 | { |
323 | struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; |
324 | int node = cpu_to_node(cpu: nvchan->channel->target_cpu); |
325 | size_t size; |
326 | |
327 | size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data); |
328 | nvchan->mrc.slots = vzalloc_node(size, node); |
329 | if (!nvchan->mrc.slots) |
330 | nvchan->mrc.slots = vzalloc(size); |
331 | |
332 | return nvchan->mrc.slots ? 0 : -ENOMEM; |
333 | } |
334 | |
335 | static int netvsc_init_buf(struct hv_device *device, |
336 | struct netvsc_device *net_device, |
337 | const struct netvsc_device_info *device_info) |
338 | { |
339 | struct nvsp_1_message_send_receive_buffer_complete *resp; |
340 | struct net_device *ndev = hv_get_drvdata(dev: device); |
341 | struct nvsp_message *init_packet; |
342 | unsigned int buf_size; |
343 | int i, ret = 0; |
344 | |
345 | /* Get receive buffer area. */ |
346 | buf_size = device_info->recv_sections * device_info->recv_section_size; |
347 | buf_size = roundup(buf_size, PAGE_SIZE); |
348 | |
349 | /* Legacy hosts only allow smaller receive buffer */ |
350 | if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) |
351 | buf_size = min_t(unsigned int, buf_size, |
352 | NETVSC_RECEIVE_BUFFER_SIZE_LEGACY); |
353 | |
354 | net_device->recv_buf = vzalloc(size: buf_size); |
355 | if (!net_device->recv_buf) { |
356 | netdev_err(dev: ndev, |
357 | format: "unable to allocate receive buffer of size %u\n" , |
358 | buf_size); |
359 | ret = -ENOMEM; |
360 | goto cleanup; |
361 | } |
362 | |
363 | net_device->recv_buf_size = buf_size; |
364 | |
365 | /* |
366 | * Establish the gpadl handle for this buffer on this |
367 | * channel. Note: This call uses the vmbus connection rather |
368 | * than the channel to establish the gpadl handle. |
369 | */ |
370 | ret = vmbus_establish_gpadl(channel: device->channel, kbuffer: net_device->recv_buf, |
371 | size: buf_size, |
372 | gpadl: &net_device->recv_buf_gpadl_handle); |
373 | if (ret != 0) { |
374 | netdev_err(dev: ndev, |
375 | format: "unable to establish receive buffer's gpadl\n" ); |
376 | goto cleanup; |
377 | } |
378 | |
379 | /* Notify the NetVsp of the gpadl handle */ |
380 | init_packet = &net_device->channel_init_pkt; |
381 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
382 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; |
383 | init_packet->msg.v1_msg.send_recv_buf. |
384 | gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle; |
385 | init_packet->msg.v1_msg. |
386 | send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; |
387 | |
388 | trace_nvsp_send(ndev, msg: init_packet); |
389 | |
390 | /* Send the gpadl notification request */ |
391 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
392 | bufferLen: sizeof(struct nvsp_message), |
393 | requestid: (unsigned long)init_packet, |
394 | type: VM_PKT_DATA_INBAND, |
395 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
396 | if (ret != 0) { |
397 | netdev_err(dev: ndev, |
398 | format: "unable to send receive buffer's gpadl to netvsp\n" ); |
399 | goto cleanup; |
400 | } |
401 | |
402 | wait_for_completion(&net_device->channel_init_wait); |
403 | |
404 | /* Check the response */ |
405 | resp = &init_packet->msg.v1_msg.send_recv_buf_complete; |
406 | if (resp->status != NVSP_STAT_SUCCESS) { |
407 | netdev_err(dev: ndev, |
408 | format: "Unable to complete receive buffer initialization with NetVsp - status %d\n" , |
409 | resp->status); |
410 | ret = -EINVAL; |
411 | goto cleanup; |
412 | } |
413 | |
414 | /* Parse the response */ |
415 | netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n" , |
416 | resp->num_sections, resp->sections[0].sub_alloc_size, |
417 | resp->sections[0].num_sub_allocs); |
418 | |
419 | /* There should only be one section for the entire receive buffer */ |
420 | if (resp->num_sections != 1 || resp->sections[0].offset != 0) { |
421 | ret = -EINVAL; |
422 | goto cleanup; |
423 | } |
424 | |
425 | net_device->recv_section_size = resp->sections[0].sub_alloc_size; |
426 | net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; |
427 | |
428 | /* Ensure buffer will not overflow */ |
429 | if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size * |
430 | (u64)net_device->recv_section_cnt > (u64)buf_size) { |
431 | netdev_err(dev: ndev, format: "invalid recv_section_size %u\n" , |
432 | net_device->recv_section_size); |
433 | ret = -EINVAL; |
434 | goto cleanup; |
435 | } |
436 | |
437 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
438 | struct netvsc_channel *nvchan = &net_device->chan_table[i]; |
439 | |
440 | nvchan->recv_buf = kzalloc(size: net_device->recv_section_size, GFP_KERNEL); |
441 | if (nvchan->recv_buf == NULL) { |
442 | ret = -ENOMEM; |
443 | goto cleanup; |
444 | } |
445 | } |
446 | |
447 | /* Setup receive completion ring. |
448 | * Add 1 to the recv_section_cnt because at least one entry in a |
449 | * ring buffer has to be empty. |
450 | */ |
451 | net_device->recv_completion_cnt = net_device->recv_section_cnt + 1; |
452 | ret = netvsc_alloc_recv_comp_ring(net_device, q_idx: 0); |
453 | if (ret) |
454 | goto cleanup; |
455 | |
456 | /* Now setup the send buffer. */ |
457 | buf_size = device_info->send_sections * device_info->send_section_size; |
458 | buf_size = round_up(buf_size, PAGE_SIZE); |
459 | |
460 | net_device->send_buf = vzalloc(size: buf_size); |
461 | if (!net_device->send_buf) { |
462 | netdev_err(dev: ndev, format: "unable to allocate send buffer of size %u\n" , |
463 | buf_size); |
464 | ret = -ENOMEM; |
465 | goto cleanup; |
466 | } |
467 | net_device->send_buf_size = buf_size; |
468 | |
469 | /* Establish the gpadl handle for this buffer on this |
470 | * channel. Note: This call uses the vmbus connection rather |
471 | * than the channel to establish the gpadl handle. |
472 | */ |
473 | ret = vmbus_establish_gpadl(channel: device->channel, kbuffer: net_device->send_buf, |
474 | size: buf_size, |
475 | gpadl: &net_device->send_buf_gpadl_handle); |
476 | if (ret != 0) { |
477 | netdev_err(dev: ndev, |
478 | format: "unable to establish send buffer's gpadl\n" ); |
479 | goto cleanup; |
480 | } |
481 | |
482 | /* Notify the NetVsp of the gpadl handle */ |
483 | init_packet = &net_device->channel_init_pkt; |
484 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
485 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; |
486 | init_packet->msg.v1_msg.send_send_buf.gpadl_handle = |
487 | net_device->send_buf_gpadl_handle.gpadl_handle; |
488 | init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID; |
489 | |
490 | trace_nvsp_send(ndev, msg: init_packet); |
491 | |
492 | /* Send the gpadl notification request */ |
493 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
494 | bufferLen: sizeof(struct nvsp_message), |
495 | requestid: (unsigned long)init_packet, |
496 | type: VM_PKT_DATA_INBAND, |
497 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
498 | if (ret != 0) { |
499 | netdev_err(dev: ndev, |
500 | format: "unable to send send buffer's gpadl to netvsp\n" ); |
501 | goto cleanup; |
502 | } |
503 | |
504 | wait_for_completion(&net_device->channel_init_wait); |
505 | |
506 | /* Check the response */ |
507 | if (init_packet->msg.v1_msg. |
508 | send_send_buf_complete.status != NVSP_STAT_SUCCESS) { |
509 | netdev_err(dev: ndev, format: "Unable to complete send buffer " |
510 | "initialization with NetVsp - status %d\n" , |
511 | init_packet->msg.v1_msg. |
512 | send_send_buf_complete.status); |
513 | ret = -EINVAL; |
514 | goto cleanup; |
515 | } |
516 | |
517 | /* Parse the response */ |
518 | net_device->send_section_size = init_packet->msg. |
519 | v1_msg.send_send_buf_complete.section_size; |
520 | if (net_device->send_section_size < NETVSC_MTU_MIN) { |
521 | netdev_err(dev: ndev, format: "invalid send_section_size %u\n" , |
522 | net_device->send_section_size); |
523 | ret = -EINVAL; |
524 | goto cleanup; |
525 | } |
526 | |
527 | /* Section count is simply the size divided by the section size. */ |
528 | net_device->send_section_cnt = buf_size / net_device->send_section_size; |
529 | |
530 | netdev_dbg(ndev, "Send section size: %d, Section count:%d\n" , |
531 | net_device->send_section_size, net_device->send_section_cnt); |
532 | |
533 | /* Setup state for managing the send buffer. */ |
534 | net_device->send_section_map = bitmap_zalloc(nbits: net_device->send_section_cnt, |
535 | GFP_KERNEL); |
536 | if (!net_device->send_section_map) { |
537 | ret = -ENOMEM; |
538 | goto cleanup; |
539 | } |
540 | |
541 | goto exit; |
542 | |
543 | cleanup: |
544 | netvsc_revoke_recv_buf(device, net_device, ndev); |
545 | netvsc_revoke_send_buf(device, net_device, ndev); |
546 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
547 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
548 | |
549 | exit: |
550 | return ret; |
551 | } |
552 | |
553 | /* Negotiate NVSP protocol version */ |
554 | static int negotiate_nvsp_ver(struct hv_device *device, |
555 | struct netvsc_device *net_device, |
556 | struct nvsp_message *init_packet, |
557 | u32 nvsp_ver) |
558 | { |
559 | struct net_device *ndev = hv_get_drvdata(dev: device); |
560 | int ret; |
561 | |
562 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
563 | init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT; |
564 | init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver; |
565 | init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver; |
566 | trace_nvsp_send(ndev, msg: init_packet); |
567 | |
568 | /* Send the init request */ |
569 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
570 | bufferLen: sizeof(struct nvsp_message), |
571 | requestid: (unsigned long)init_packet, |
572 | type: VM_PKT_DATA_INBAND, |
573 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
574 | |
575 | if (ret != 0) |
576 | return ret; |
577 | |
578 | wait_for_completion(&net_device->channel_init_wait); |
579 | |
580 | if (init_packet->msg.init_msg.init_complete.status != |
581 | NVSP_STAT_SUCCESS) |
582 | return -EINVAL; |
583 | |
584 | if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) |
585 | return 0; |
586 | |
587 | /* NVSPv2 or later: Send NDIS config */ |
588 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
589 | init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; |
590 | init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN; |
591 | init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; |
592 | |
593 | if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { |
594 | if (hv_is_isolation_supported()) |
595 | netdev_info(dev: ndev, format: "SR-IOV not advertised by guests on the host supporting isolation\n" ); |
596 | else |
597 | init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; |
598 | |
599 | /* Teaming bit is needed to receive link speed updates */ |
600 | init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; |
601 | } |
602 | |
603 | if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61) |
604 | init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1; |
605 | |
606 | trace_nvsp_send(ndev, msg: init_packet); |
607 | |
608 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
609 | bufferLen: sizeof(struct nvsp_message), |
610 | VMBUS_RQST_ID_NO_RESPONSE, |
611 | type: VM_PKT_DATA_INBAND, flags: 0); |
612 | |
613 | return ret; |
614 | } |
615 | |
616 | static int netvsc_connect_vsp(struct hv_device *device, |
617 | struct netvsc_device *net_device, |
618 | const struct netvsc_device_info *device_info) |
619 | { |
620 | struct net_device *ndev = hv_get_drvdata(dev: device); |
621 | static const u32 ver_list[] = { |
622 | NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, |
623 | NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5, |
624 | NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61 |
625 | }; |
626 | struct nvsp_message *init_packet; |
627 | int ndis_version, i, ret; |
628 | |
629 | init_packet = &net_device->channel_init_pkt; |
630 | |
631 | /* Negotiate the latest NVSP protocol supported */ |
632 | for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--) |
633 | if (negotiate_nvsp_ver(device, net_device, init_packet, |
634 | nvsp_ver: ver_list[i]) == 0) { |
635 | net_device->nvsp_version = ver_list[i]; |
636 | break; |
637 | } |
638 | |
639 | if (i < 0) { |
640 | ret = -EPROTO; |
641 | goto cleanup; |
642 | } |
643 | |
644 | if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) { |
645 | netdev_err(dev: ndev, format: "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n" , |
646 | net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61); |
647 | ret = -EPROTO; |
648 | goto cleanup; |
649 | } |
650 | |
651 | pr_debug("Negotiated NVSP version:%x\n" , net_device->nvsp_version); |
652 | |
653 | /* Send the ndis version */ |
654 | memset(init_packet, 0, sizeof(struct nvsp_message)); |
655 | |
656 | if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) |
657 | ndis_version = 0x00060001; |
658 | else |
659 | ndis_version = 0x0006001e; |
660 | |
661 | init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; |
662 | init_packet->msg.v1_msg. |
663 | send_ndis_ver.ndis_major_ver = |
664 | (ndis_version & 0xFFFF0000) >> 16; |
665 | init_packet->msg.v1_msg. |
666 | send_ndis_ver.ndis_minor_ver = |
667 | ndis_version & 0xFFFF; |
668 | |
669 | trace_nvsp_send(ndev, msg: init_packet); |
670 | |
671 | /* Send the init request */ |
672 | ret = vmbus_sendpacket(channel: device->channel, buffer: init_packet, |
673 | bufferLen: sizeof(struct nvsp_message), |
674 | VMBUS_RQST_ID_NO_RESPONSE, |
675 | type: VM_PKT_DATA_INBAND, flags: 0); |
676 | if (ret != 0) |
677 | goto cleanup; |
678 | |
679 | |
680 | ret = netvsc_init_buf(device, net_device, device_info); |
681 | |
682 | cleanup: |
683 | return ret; |
684 | } |
685 | |
686 | /* |
687 | * netvsc_device_remove - Callback when the root bus device is removed |
688 | */ |
689 | void netvsc_device_remove(struct hv_device *device) |
690 | { |
691 | struct net_device *ndev = hv_get_drvdata(dev: device); |
692 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
693 | struct netvsc_device *net_device |
694 | = rtnl_dereference(net_device_ctx->nvdev); |
695 | int i; |
696 | |
697 | /* |
698 | * Revoke receive buffer. If host is pre-Win2016 then tear down |
699 | * receive buffer GPADL. Do the same for send buffer. |
700 | */ |
701 | netvsc_revoke_recv_buf(device, net_device, ndev); |
702 | if (vmbus_proto_version < VERSION_WIN10) |
703 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
704 | |
705 | netvsc_revoke_send_buf(device, net_device, ndev); |
706 | if (vmbus_proto_version < VERSION_WIN10) |
707 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
708 | |
709 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); |
710 | |
711 | /* Disable NAPI and disassociate its context from the device. */ |
712 | for (i = 0; i < net_device->num_chn; i++) { |
713 | /* See also vmbus_reset_channel_cb(). */ |
714 | /* only disable enabled NAPI channel */ |
715 | if (i < ndev->real_num_rx_queues) |
716 | napi_disable(n: &net_device->chan_table[i].napi); |
717 | |
718 | netif_napi_del(napi: &net_device->chan_table[i].napi); |
719 | } |
720 | |
721 | /* |
722 | * At this point, no one should be accessing net_device |
723 | * except in here |
724 | */ |
725 | netdev_dbg(ndev, "net device safe to remove\n" ); |
726 | |
727 | /* Now, we can close the channel safely */ |
728 | vmbus_close(channel: device->channel); |
729 | |
730 | /* |
731 | * If host is Win2016 or higher then we do the GPADL tear down |
732 | * here after VMBus is closed. |
733 | */ |
734 | if (vmbus_proto_version >= VERSION_WIN10) { |
735 | netvsc_teardown_recv_gpadl(device, net_device, ndev); |
736 | netvsc_teardown_send_gpadl(device, net_device, ndev); |
737 | } |
738 | |
739 | /* Release all resources */ |
740 | free_netvsc_device_rcu(nvdev: net_device); |
741 | } |
742 | |
743 | #define RING_AVAIL_PERCENT_HIWATER 20 |
744 | #define RING_AVAIL_PERCENT_LOWATER 10 |
745 | |
746 | static inline void netvsc_free_send_slot(struct netvsc_device *net_device, |
747 | u32 index) |
748 | { |
749 | sync_change_bit(nr: index, addr: net_device->send_section_map); |
750 | } |
751 | |
752 | static void netvsc_send_tx_complete(struct net_device *ndev, |
753 | struct netvsc_device *net_device, |
754 | struct vmbus_channel *channel, |
755 | const struct vmpacket_descriptor *desc, |
756 | int budget) |
757 | { |
758 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
759 | struct sk_buff *skb; |
760 | u16 q_idx = 0; |
761 | int queue_sends; |
762 | u64 cmd_rqst; |
763 | |
764 | cmd_rqst = channel->request_addr_callback(channel, desc->trans_id); |
765 | if (cmd_rqst == VMBUS_RQST_ERROR) { |
766 | netdev_err(dev: ndev, format: "Invalid transaction ID %llx\n" , desc->trans_id); |
767 | return; |
768 | } |
769 | |
770 | skb = (struct sk_buff *)(unsigned long)cmd_rqst; |
771 | |
772 | /* Notify the layer above us */ |
773 | if (likely(skb)) { |
774 | struct hv_netvsc_packet *packet |
775 | = (struct hv_netvsc_packet *)skb->cb; |
776 | u32 send_index = packet->send_buf_index; |
777 | struct netvsc_stats_tx *tx_stats; |
778 | |
779 | if (send_index != NETVSC_INVALID_INDEX) |
780 | netvsc_free_send_slot(net_device, index: send_index); |
781 | q_idx = packet->q_idx; |
782 | |
783 | tx_stats = &net_device->chan_table[q_idx].tx_stats; |
784 | |
785 | u64_stats_update_begin(syncp: &tx_stats->syncp); |
786 | tx_stats->packets += packet->total_packets; |
787 | tx_stats->bytes += packet->total_bytes; |
788 | u64_stats_update_end(syncp: &tx_stats->syncp); |
789 | |
790 | netvsc_dma_unmap(hv_dev: ndev_ctx->device_ctx, packet); |
791 | napi_consume_skb(skb, budget); |
792 | } |
793 | |
794 | queue_sends = |
795 | atomic_dec_return(v: &net_device->chan_table[q_idx].queue_sends); |
796 | |
797 | if (unlikely(net_device->destroy)) { |
798 | if (queue_sends == 0) |
799 | wake_up(&net_device->wait_drain); |
800 | } else { |
801 | struct netdev_queue *txq = netdev_get_tx_queue(dev: ndev, index: q_idx); |
802 | |
803 | if (netif_tx_queue_stopped(dev_queue: txq) && !net_device->tx_disable && |
804 | (hv_get_avail_to_write_percent(rbi: &channel->outbound) > |
805 | RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) { |
806 | netif_tx_wake_queue(dev_queue: txq); |
807 | ndev_ctx->eth_stats.wake_queue++; |
808 | } |
809 | } |
810 | } |
811 | |
812 | static void netvsc_send_completion(struct net_device *ndev, |
813 | struct netvsc_device *net_device, |
814 | struct vmbus_channel *incoming_channel, |
815 | const struct vmpacket_descriptor *desc, |
816 | int budget) |
817 | { |
818 | const struct nvsp_message *nvsp_packet; |
819 | u32 msglen = hv_pkt_datalen(desc); |
820 | struct nvsp_message *pkt_rqst; |
821 | u64 cmd_rqst; |
822 | u32 status; |
823 | |
824 | /* First check if this is a VMBUS completion without data payload */ |
825 | if (!msglen) { |
826 | cmd_rqst = incoming_channel->request_addr_callback(incoming_channel, |
827 | desc->trans_id); |
828 | if (cmd_rqst == VMBUS_RQST_ERROR) { |
829 | netdev_err(dev: ndev, format: "Invalid transaction ID %llx\n" , desc->trans_id); |
830 | return; |
831 | } |
832 | |
833 | pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst; |
834 | switch (pkt_rqst->hdr.msg_type) { |
835 | case NVSP_MSG4_TYPE_SWITCH_DATA_PATH: |
836 | complete(&net_device->channel_init_wait); |
837 | break; |
838 | |
839 | default: |
840 | netdev_err(dev: ndev, format: "Unexpected VMBUS completion!!\n" ); |
841 | } |
842 | return; |
843 | } |
844 | |
845 | /* Ensure packet is big enough to read header fields */ |
846 | if (msglen < sizeof(struct nvsp_message_header)) { |
847 | netdev_err(dev: ndev, format: "nvsp_message length too small: %u\n" , msglen); |
848 | return; |
849 | } |
850 | |
851 | nvsp_packet = hv_pkt_data(desc); |
852 | switch (nvsp_packet->hdr.msg_type) { |
853 | case NVSP_MSG_TYPE_INIT_COMPLETE: |
854 | if (msglen < sizeof(struct nvsp_message_header) + |
855 | sizeof(struct nvsp_message_init_complete)) { |
856 | netdev_err(dev: ndev, format: "nvsp_msg length too small: %u\n" , |
857 | msglen); |
858 | return; |
859 | } |
860 | break; |
861 | |
862 | case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: |
863 | if (msglen < sizeof(struct nvsp_message_header) + |
864 | sizeof(struct nvsp_1_message_send_receive_buffer_complete)) { |
865 | netdev_err(dev: ndev, format: "nvsp_msg1 length too small: %u\n" , |
866 | msglen); |
867 | return; |
868 | } |
869 | break; |
870 | |
871 | case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: |
872 | if (msglen < sizeof(struct nvsp_message_header) + |
873 | sizeof(struct nvsp_1_message_send_send_buffer_complete)) { |
874 | netdev_err(dev: ndev, format: "nvsp_msg1 length too small: %u\n" , |
875 | msglen); |
876 | return; |
877 | } |
878 | break; |
879 | |
880 | case NVSP_MSG5_TYPE_SUBCHANNEL: |
881 | if (msglen < sizeof(struct nvsp_message_header) + |
882 | sizeof(struct nvsp_5_subchannel_complete)) { |
883 | netdev_err(dev: ndev, format: "nvsp_msg5 length too small: %u\n" , |
884 | msglen); |
885 | return; |
886 | } |
887 | break; |
888 | |
889 | case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: |
890 | if (msglen < sizeof(struct nvsp_message_header) + |
891 | sizeof(struct nvsp_1_message_send_rndis_packet_complete)) { |
892 | if (net_ratelimit()) |
893 | netdev_err(dev: ndev, format: "nvsp_rndis_pkt_complete length too small: %u\n" , |
894 | msglen); |
895 | return; |
896 | } |
897 | |
898 | /* If status indicates an error, output a message so we know |
899 | * there's a problem. But process the completion anyway so the |
900 | * resources are released. |
901 | */ |
902 | status = nvsp_packet->msg.v1_msg.send_rndis_pkt_complete.status; |
903 | if (status != NVSP_STAT_SUCCESS && net_ratelimit()) |
904 | netdev_err(dev: ndev, format: "nvsp_rndis_pkt_complete error status: %x\n" , |
905 | status); |
906 | |
907 | netvsc_send_tx_complete(ndev, net_device, channel: incoming_channel, |
908 | desc, budget); |
909 | return; |
910 | |
911 | default: |
912 | netdev_err(dev: ndev, |
913 | format: "Unknown send completion type %d received!!\n" , |
914 | nvsp_packet->hdr.msg_type); |
915 | return; |
916 | } |
917 | |
918 | /* Copy the response back */ |
919 | memcpy(&net_device->channel_init_pkt, nvsp_packet, |
920 | sizeof(struct nvsp_message)); |
921 | complete(&net_device->channel_init_wait); |
922 | } |
923 | |
924 | static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) |
925 | { |
926 | unsigned long *map_addr = net_device->send_section_map; |
927 | unsigned int i; |
928 | |
929 | for_each_clear_bit(i, map_addr, net_device->send_section_cnt) { |
930 | if (sync_test_and_set_bit(nr: i, addr: map_addr) == 0) |
931 | return i; |
932 | } |
933 | |
934 | return NETVSC_INVALID_INDEX; |
935 | } |
936 | |
937 | static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, |
938 | unsigned int section_index, |
939 | u32 pend_size, |
940 | struct hv_netvsc_packet *packet, |
941 | struct rndis_message *rndis_msg, |
942 | struct hv_page_buffer *pb, |
943 | bool xmit_more) |
944 | { |
945 | char *start = net_device->send_buf; |
946 | char *dest = start + (section_index * net_device->send_section_size) |
947 | + pend_size; |
948 | int i; |
949 | u32 padding = 0; |
950 | u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : |
951 | packet->page_buf_cnt; |
952 | u32 remain; |
953 | |
954 | /* Add padding */ |
955 | remain = packet->total_data_buflen & (net_device->pkt_align - 1); |
956 | if (xmit_more && remain) { |
957 | padding = net_device->pkt_align - remain; |
958 | rndis_msg->msg_len += padding; |
959 | packet->total_data_buflen += padding; |
960 | } |
961 | |
962 | for (i = 0; i < page_count; i++) { |
963 | char *src = phys_to_virt(address: pb[i].pfn << HV_HYP_PAGE_SHIFT); |
964 | u32 offset = pb[i].offset; |
965 | u32 len = pb[i].len; |
966 | |
967 | memcpy(dest, (src + offset), len); |
968 | dest += len; |
969 | } |
970 | |
971 | if (padding) |
972 | memset(dest, 0, padding); |
973 | } |
974 | |
975 | void netvsc_dma_unmap(struct hv_device *hv_dev, |
976 | struct hv_netvsc_packet *packet) |
977 | { |
978 | int i; |
979 | |
980 | if (!hv_is_isolation_supported()) |
981 | return; |
982 | |
983 | if (!packet->dma_range) |
984 | return; |
985 | |
986 | for (i = 0; i < packet->page_buf_cnt; i++) |
987 | dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma, |
988 | packet->dma_range[i].mapping_size, |
989 | DMA_TO_DEVICE); |
990 | |
991 | kfree(objp: packet->dma_range); |
992 | } |
993 | |
994 | /* netvsc_dma_map - Map swiotlb bounce buffer with data page of |
995 | * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation |
996 | * VM. |
997 | * |
998 | * In isolation VM, netvsc send buffer has been marked visible to |
999 | * host and so the data copied to send buffer doesn't need to use |
1000 | * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer() |
1001 | * may not be copied to send buffer and so these pages need to be |
1002 | * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do |
1003 | * that. The pfns in the struct hv_page_buffer need to be converted |
1004 | * to bounce buffer's pfn. The loop here is necessary because the |
1005 | * entries in the page buffer array are not necessarily full |
1006 | * pages of data. Each entry in the array has a separate offset and |
1007 | * len that may be non-zero, even for entries in the middle of the |
1008 | * array. And the entries are not physically contiguous. So each |
1009 | * entry must be individually mapped rather than as a contiguous unit. |
1010 | * So not use dma_map_sg() here. |
1011 | */ |
1012 | static int netvsc_dma_map(struct hv_device *hv_dev, |
1013 | struct hv_netvsc_packet *packet, |
1014 | struct hv_page_buffer *pb) |
1015 | { |
1016 | u32 page_count = packet->page_buf_cnt; |
1017 | dma_addr_t dma; |
1018 | int i; |
1019 | |
1020 | if (!hv_is_isolation_supported()) |
1021 | return 0; |
1022 | |
1023 | packet->dma_range = kcalloc(n: page_count, |
1024 | size: sizeof(*packet->dma_range), |
1025 | GFP_ATOMIC); |
1026 | if (!packet->dma_range) |
1027 | return -ENOMEM; |
1028 | |
1029 | for (i = 0; i < page_count; i++) { |
1030 | char *src = phys_to_virt(address: (pb[i].pfn << HV_HYP_PAGE_SHIFT) |
1031 | + pb[i].offset); |
1032 | u32 len = pb[i].len; |
1033 | |
1034 | dma = dma_map_single(&hv_dev->device, src, len, |
1035 | DMA_TO_DEVICE); |
1036 | if (dma_mapping_error(dev: &hv_dev->device, dma_addr: dma)) { |
1037 | kfree(objp: packet->dma_range); |
1038 | return -ENOMEM; |
1039 | } |
1040 | |
1041 | /* pb[].offset and pb[].len are not changed during dma mapping |
1042 | * and so not reassign. |
1043 | */ |
1044 | packet->dma_range[i].dma = dma; |
1045 | packet->dma_range[i].mapping_size = len; |
1046 | pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT; |
1047 | } |
1048 | |
1049 | return 0; |
1050 | } |
1051 | |
1052 | static inline int netvsc_send_pkt( |
1053 | struct hv_device *device, |
1054 | struct hv_netvsc_packet *packet, |
1055 | struct netvsc_device *net_device, |
1056 | struct hv_page_buffer *pb, |
1057 | struct sk_buff *skb) |
1058 | { |
1059 | struct nvsp_message nvmsg; |
1060 | struct nvsp_1_message_send_rndis_packet *rpkt = |
1061 | &nvmsg.msg.v1_msg.send_rndis_pkt; |
1062 | struct netvsc_channel * const nvchan = |
1063 | &net_device->chan_table[packet->q_idx]; |
1064 | struct vmbus_channel *out_channel = nvchan->channel; |
1065 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1066 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1067 | struct netdev_queue *txq = netdev_get_tx_queue(dev: ndev, index: packet->q_idx); |
1068 | u64 req_id; |
1069 | int ret; |
1070 | u32 ring_avail = hv_get_avail_to_write_percent(rbi: &out_channel->outbound); |
1071 | |
1072 | memset(&nvmsg, 0, sizeof(struct nvsp_message)); |
1073 | nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; |
1074 | if (skb) |
1075 | rpkt->channel_type = 0; /* 0 is RMC_DATA */ |
1076 | else |
1077 | rpkt->channel_type = 1; /* 1 is RMC_CONTROL */ |
1078 | |
1079 | rpkt->send_buf_section_index = packet->send_buf_index; |
1080 | if (packet->send_buf_index == NETVSC_INVALID_INDEX) |
1081 | rpkt->send_buf_section_size = 0; |
1082 | else |
1083 | rpkt->send_buf_section_size = packet->total_data_buflen; |
1084 | |
1085 | req_id = (ulong)skb; |
1086 | |
1087 | if (out_channel->rescind) |
1088 | return -ENODEV; |
1089 | |
1090 | trace_nvsp_send_pkt(ndev, chan: out_channel, rpkt); |
1091 | |
1092 | packet->dma_range = NULL; |
1093 | if (packet->page_buf_cnt) { |
1094 | if (packet->cp_partial) |
1095 | pb += packet->rmsg_pgcnt; |
1096 | |
1097 | ret = netvsc_dma_map(hv_dev: ndev_ctx->device_ctx, packet, pb); |
1098 | if (ret) { |
1099 | ret = -EAGAIN; |
1100 | goto exit; |
1101 | } |
1102 | |
1103 | ret = vmbus_sendpacket_pagebuffer(channel: out_channel, |
1104 | pagebuffers: pb, pagecount: packet->page_buf_cnt, |
1105 | buffer: &nvmsg, bufferlen: sizeof(nvmsg), |
1106 | requestid: req_id); |
1107 | |
1108 | if (ret) |
1109 | netvsc_dma_unmap(hv_dev: ndev_ctx->device_ctx, packet); |
1110 | } else { |
1111 | ret = vmbus_sendpacket(channel: out_channel, |
1112 | buffer: &nvmsg, bufferLen: sizeof(nvmsg), |
1113 | requestid: req_id, type: VM_PKT_DATA_INBAND, |
1114 | VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); |
1115 | } |
1116 | |
1117 | exit: |
1118 | if (ret == 0) { |
1119 | atomic_inc_return(v: &nvchan->queue_sends); |
1120 | |
1121 | if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { |
1122 | netif_tx_stop_queue(dev_queue: txq); |
1123 | ndev_ctx->eth_stats.stop_queue++; |
1124 | } |
1125 | } else if (ret == -EAGAIN) { |
1126 | netif_tx_stop_queue(dev_queue: txq); |
1127 | ndev_ctx->eth_stats.stop_queue++; |
1128 | } else { |
1129 | netdev_err(dev: ndev, |
1130 | format: "Unable to send packet pages %u len %u, ret %d\n" , |
1131 | packet->page_buf_cnt, packet->total_data_buflen, |
1132 | ret); |
1133 | } |
1134 | |
1135 | if (netif_tx_queue_stopped(dev_queue: txq) && |
1136 | atomic_read(v: &nvchan->queue_sends) < 1 && |
1137 | !net_device->tx_disable) { |
1138 | netif_tx_wake_queue(dev_queue: txq); |
1139 | ndev_ctx->eth_stats.wake_queue++; |
1140 | if (ret == -EAGAIN) |
1141 | ret = -ENOSPC; |
1142 | } |
1143 | |
1144 | return ret; |
1145 | } |
1146 | |
1147 | /* Move packet out of multi send data (msd), and clear msd */ |
1148 | static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send, |
1149 | struct sk_buff **msd_skb, |
1150 | struct multi_send_data *msdp) |
1151 | { |
1152 | *msd_skb = msdp->skb; |
1153 | *msd_send = msdp->pkt; |
1154 | msdp->skb = NULL; |
1155 | msdp->pkt = NULL; |
1156 | msdp->count = 0; |
1157 | } |
1158 | |
1159 | /* RCU already held by caller */ |
1160 | /* Batching/bouncing logic is designed to attempt to optimize |
1161 | * performance. |
1162 | * |
1163 | * For small, non-LSO packets we copy the packet to a send buffer |
1164 | * which is pre-registered with the Hyper-V side. This enables the |
1165 | * hypervisor to avoid remapping the aperture to access the packet |
1166 | * descriptor and data. |
1167 | * |
1168 | * If we already started using a buffer and the netdev is transmitting |
1169 | * a burst of packets, keep on copying into the buffer until it is |
1170 | * full or we are done collecting a burst. If there is an existing |
1171 | * buffer with space for the RNDIS descriptor but not the packet, copy |
1172 | * the RNDIS descriptor to the buffer, keeping the packet in place. |
1173 | * |
1174 | * If we do batching and send more than one packet using a single |
1175 | * NetVSC message, free the SKBs of the packets copied, except for the |
1176 | * last packet. This is done to streamline the handling of the case |
1177 | * where the last packet only had the RNDIS descriptor copied to the |
1178 | * send buffer, with the data pointers included in the NetVSC message. |
1179 | */ |
1180 | int netvsc_send(struct net_device *ndev, |
1181 | struct hv_netvsc_packet *packet, |
1182 | struct rndis_message *rndis_msg, |
1183 | struct hv_page_buffer *pb, |
1184 | struct sk_buff *skb, |
1185 | bool xdp_tx) |
1186 | { |
1187 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1188 | struct netvsc_device *net_device |
1189 | = rcu_dereference_bh(ndev_ctx->nvdev); |
1190 | struct hv_device *device = ndev_ctx->device_ctx; |
1191 | int ret = 0; |
1192 | struct netvsc_channel *nvchan; |
1193 | u32 pktlen = packet->total_data_buflen, msd_len = 0; |
1194 | unsigned int section_index = NETVSC_INVALID_INDEX; |
1195 | struct multi_send_data *msdp; |
1196 | struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; |
1197 | struct sk_buff *msd_skb = NULL; |
1198 | bool try_batch, xmit_more; |
1199 | |
1200 | /* If device is rescinded, return error and packet will get dropped. */ |
1201 | if (unlikely(!net_device || net_device->destroy)) |
1202 | return -ENODEV; |
1203 | |
1204 | nvchan = &net_device->chan_table[packet->q_idx]; |
1205 | packet->send_buf_index = NETVSC_INVALID_INDEX; |
1206 | packet->cp_partial = false; |
1207 | |
1208 | /* Send a control message or XDP packet directly without accessing |
1209 | * msd (Multi-Send Data) field which may be changed during data packet |
1210 | * processing. |
1211 | */ |
1212 | if (!skb || xdp_tx) |
1213 | return netvsc_send_pkt(device, packet, net_device, pb, skb); |
1214 | |
1215 | /* batch packets in send buffer if possible */ |
1216 | msdp = &nvchan->msd; |
1217 | if (msdp->pkt) |
1218 | msd_len = msdp->pkt->total_data_buflen; |
1219 | |
1220 | try_batch = msd_len > 0 && msdp->count < net_device->max_pkt; |
1221 | if (try_batch && msd_len + pktlen + net_device->pkt_align < |
1222 | net_device->send_section_size) { |
1223 | section_index = msdp->pkt->send_buf_index; |
1224 | |
1225 | } else if (try_batch && msd_len + packet->rmsg_size < |
1226 | net_device->send_section_size) { |
1227 | section_index = msdp->pkt->send_buf_index; |
1228 | packet->cp_partial = true; |
1229 | |
1230 | } else if (pktlen + net_device->pkt_align < |
1231 | net_device->send_section_size) { |
1232 | section_index = netvsc_get_next_send_section(net_device); |
1233 | if (unlikely(section_index == NETVSC_INVALID_INDEX)) { |
1234 | ++ndev_ctx->eth_stats.tx_send_full; |
1235 | } else { |
1236 | move_pkt_msd(msd_send: &msd_send, msd_skb: &msd_skb, msdp); |
1237 | msd_len = 0; |
1238 | } |
1239 | } |
1240 | |
1241 | /* Keep aggregating only if stack says more data is coming |
1242 | * and not doing mixed modes send and not flow blocked |
1243 | */ |
1244 | xmit_more = netdev_xmit_more() && |
1245 | !packet->cp_partial && |
1246 | !netif_xmit_stopped(dev_queue: netdev_get_tx_queue(dev: ndev, index: packet->q_idx)); |
1247 | |
1248 | if (section_index != NETVSC_INVALID_INDEX) { |
1249 | netvsc_copy_to_send_buf(net_device, |
1250 | section_index, pend_size: msd_len, |
1251 | packet, rndis_msg, pb, xmit_more); |
1252 | |
1253 | packet->send_buf_index = section_index; |
1254 | |
1255 | if (packet->cp_partial) { |
1256 | packet->page_buf_cnt -= packet->rmsg_pgcnt; |
1257 | packet->total_data_buflen = msd_len + packet->rmsg_size; |
1258 | } else { |
1259 | packet->page_buf_cnt = 0; |
1260 | packet->total_data_buflen += msd_len; |
1261 | } |
1262 | |
1263 | if (msdp->pkt) { |
1264 | packet->total_packets += msdp->pkt->total_packets; |
1265 | packet->total_bytes += msdp->pkt->total_bytes; |
1266 | } |
1267 | |
1268 | if (msdp->skb) |
1269 | dev_consume_skb_any(skb: msdp->skb); |
1270 | |
1271 | if (xmit_more) { |
1272 | msdp->skb = skb; |
1273 | msdp->pkt = packet; |
1274 | msdp->count++; |
1275 | } else { |
1276 | cur_send = packet; |
1277 | msdp->skb = NULL; |
1278 | msdp->pkt = NULL; |
1279 | msdp->count = 0; |
1280 | } |
1281 | } else { |
1282 | move_pkt_msd(msd_send: &msd_send, msd_skb: &msd_skb, msdp); |
1283 | cur_send = packet; |
1284 | } |
1285 | |
1286 | if (msd_send) { |
1287 | int m_ret = netvsc_send_pkt(device, packet: msd_send, net_device, |
1288 | NULL, skb: msd_skb); |
1289 | |
1290 | if (m_ret != 0) { |
1291 | netvsc_free_send_slot(net_device, |
1292 | index: msd_send->send_buf_index); |
1293 | dev_kfree_skb_any(skb: msd_skb); |
1294 | } |
1295 | } |
1296 | |
1297 | if (cur_send) |
1298 | ret = netvsc_send_pkt(device, packet: cur_send, net_device, pb, skb); |
1299 | |
1300 | if (ret != 0 && section_index != NETVSC_INVALID_INDEX) |
1301 | netvsc_free_send_slot(net_device, index: section_index); |
1302 | |
1303 | return ret; |
1304 | } |
1305 | |
1306 | /* Send pending recv completions */ |
1307 | static int send_recv_completions(struct net_device *ndev, |
1308 | struct netvsc_device *nvdev, |
1309 | struct netvsc_channel *nvchan) |
1310 | { |
1311 | struct multi_recv_comp *mrc = &nvchan->mrc; |
1312 | struct recv_comp_msg { |
1313 | struct nvsp_message_header hdr; |
1314 | u32 status; |
1315 | } __packed; |
1316 | struct recv_comp_msg msg = { |
1317 | .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, |
1318 | }; |
1319 | int ret; |
1320 | |
1321 | while (mrc->first != mrc->next) { |
1322 | const struct recv_comp_data *rcd |
1323 | = mrc->slots + mrc->first; |
1324 | |
1325 | msg.status = rcd->status; |
1326 | ret = vmbus_sendpacket(channel: nvchan->channel, buffer: &msg, bufferLen: sizeof(msg), |
1327 | requestid: rcd->tid, type: VM_PKT_COMP, flags: 0); |
1328 | if (unlikely(ret)) { |
1329 | struct net_device_context *ndev_ctx = netdev_priv(dev: ndev); |
1330 | |
1331 | ++ndev_ctx->eth_stats.rx_comp_busy; |
1332 | return ret; |
1333 | } |
1334 | |
1335 | if (++mrc->first == nvdev->recv_completion_cnt) |
1336 | mrc->first = 0; |
1337 | } |
1338 | |
1339 | /* receive completion ring has been emptied */ |
1340 | if (unlikely(nvdev->destroy)) |
1341 | wake_up(&nvdev->wait_drain); |
1342 | |
1343 | return 0; |
1344 | } |
1345 | |
1346 | /* Count how many receive completions are outstanding */ |
1347 | static void recv_comp_slot_avail(const struct netvsc_device *nvdev, |
1348 | const struct multi_recv_comp *mrc, |
1349 | u32 *filled, u32 *avail) |
1350 | { |
1351 | u32 count = nvdev->recv_completion_cnt; |
1352 | |
1353 | if (mrc->next >= mrc->first) |
1354 | *filled = mrc->next - mrc->first; |
1355 | else |
1356 | *filled = (count - mrc->first) + mrc->next; |
1357 | |
1358 | *avail = count - *filled - 1; |
1359 | } |
1360 | |
1361 | /* Add receive complete to ring to send to host. */ |
1362 | static void enq_receive_complete(struct net_device *ndev, |
1363 | struct netvsc_device *nvdev, u16 q_idx, |
1364 | u64 tid, u32 status) |
1365 | { |
1366 | struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx]; |
1367 | struct multi_recv_comp *mrc = &nvchan->mrc; |
1368 | struct recv_comp_data *rcd; |
1369 | u32 filled, avail; |
1370 | |
1371 | recv_comp_slot_avail(nvdev, mrc, filled: &filled, avail: &avail); |
1372 | |
1373 | if (unlikely(filled > NAPI_POLL_WEIGHT)) { |
1374 | send_recv_completions(ndev, nvdev, nvchan); |
1375 | recv_comp_slot_avail(nvdev, mrc, filled: &filled, avail: &avail); |
1376 | } |
1377 | |
1378 | if (unlikely(!avail)) { |
1379 | netdev_err(dev: ndev, format: "Recv_comp full buf q:%hd, tid:%llx\n" , |
1380 | q_idx, tid); |
1381 | return; |
1382 | } |
1383 | |
1384 | rcd = mrc->slots + mrc->next; |
1385 | rcd->tid = tid; |
1386 | rcd->status = status; |
1387 | |
1388 | if (++mrc->next == nvdev->recv_completion_cnt) |
1389 | mrc->next = 0; |
1390 | } |
1391 | |
1392 | static int netvsc_receive(struct net_device *ndev, |
1393 | struct netvsc_device *net_device, |
1394 | struct netvsc_channel *nvchan, |
1395 | const struct vmpacket_descriptor *desc) |
1396 | { |
1397 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1398 | struct vmbus_channel *channel = nvchan->channel; |
1399 | const struct vmtransfer_page_packet_header *vmxferpage_packet |
1400 | = container_of(desc, const struct vmtransfer_page_packet_header, d); |
1401 | const struct nvsp_message *nvsp = hv_pkt_data(desc); |
1402 | u32 msglen = hv_pkt_datalen(desc); |
1403 | u16 q_idx = channel->offermsg.offer.sub_channel_index; |
1404 | char *recv_buf = net_device->recv_buf; |
1405 | u32 status = NVSP_STAT_SUCCESS; |
1406 | int i; |
1407 | int count = 0; |
1408 | |
1409 | /* Ensure packet is big enough to read header fields */ |
1410 | if (msglen < sizeof(struct nvsp_message_header)) { |
1411 | netif_err(net_device_ctx, rx_err, ndev, |
1412 | "invalid nvsp header, length too small: %u\n" , |
1413 | msglen); |
1414 | return 0; |
1415 | } |
1416 | |
1417 | /* Make sure this is a valid nvsp packet */ |
1418 | if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { |
1419 | netif_err(net_device_ctx, rx_err, ndev, |
1420 | "Unknown nvsp packet type received %u\n" , |
1421 | nvsp->hdr.msg_type); |
1422 | return 0; |
1423 | } |
1424 | |
1425 | /* Validate xfer page pkt header */ |
1426 | if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) { |
1427 | netif_err(net_device_ctx, rx_err, ndev, |
1428 | "Invalid xfer page pkt, offset too small: %u\n" , |
1429 | desc->offset8 << 3); |
1430 | return 0; |
1431 | } |
1432 | |
1433 | if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { |
1434 | netif_err(net_device_ctx, rx_err, ndev, |
1435 | "Invalid xfer page set id - expecting %x got %x\n" , |
1436 | NETVSC_RECEIVE_BUFFER_ID, |
1437 | vmxferpage_packet->xfer_pageset_id); |
1438 | return 0; |
1439 | } |
1440 | |
1441 | count = vmxferpage_packet->range_cnt; |
1442 | |
1443 | /* Check count for a valid value */ |
1444 | if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) { |
1445 | netif_err(net_device_ctx, rx_err, ndev, |
1446 | "Range count is not valid: %d\n" , |
1447 | count); |
1448 | return 0; |
1449 | } |
1450 | |
1451 | /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ |
1452 | for (i = 0; i < count; i++) { |
1453 | u32 offset = vmxferpage_packet->ranges[i].byte_offset; |
1454 | u32 buflen = vmxferpage_packet->ranges[i].byte_count; |
1455 | void *data; |
1456 | int ret; |
1457 | |
1458 | if (unlikely(offset > net_device->recv_buf_size || |
1459 | buflen > net_device->recv_buf_size - offset)) { |
1460 | nvchan->rsc.cnt = 0; |
1461 | status = NVSP_STAT_FAIL; |
1462 | netif_err(net_device_ctx, rx_err, ndev, |
1463 | "Packet offset:%u + len:%u too big\n" , |
1464 | offset, buflen); |
1465 | |
1466 | continue; |
1467 | } |
1468 | |
1469 | /* We're going to copy (sections of) the packet into nvchan->recv_buf; |
1470 | * make sure that nvchan->recv_buf is large enough to hold the packet. |
1471 | */ |
1472 | if (unlikely(buflen > net_device->recv_section_size)) { |
1473 | nvchan->rsc.cnt = 0; |
1474 | status = NVSP_STAT_FAIL; |
1475 | netif_err(net_device_ctx, rx_err, ndev, |
1476 | "Packet too big: buflen=%u recv_section_size=%u\n" , |
1477 | buflen, net_device->recv_section_size); |
1478 | |
1479 | continue; |
1480 | } |
1481 | |
1482 | data = recv_buf + offset; |
1483 | |
1484 | nvchan->rsc.is_last = (i == count - 1); |
1485 | |
1486 | trace_rndis_recv(ndev, q: q_idx, msg: data); |
1487 | |
1488 | /* Pass it to the upper layer */ |
1489 | ret = rndis_filter_receive(ndev, net_dev: net_device, |
1490 | nvchan, data, buflen); |
1491 | |
1492 | if (unlikely(ret != NVSP_STAT_SUCCESS)) { |
1493 | /* Drop incomplete packet */ |
1494 | nvchan->rsc.cnt = 0; |
1495 | status = NVSP_STAT_FAIL; |
1496 | } |
1497 | } |
1498 | |
1499 | enq_receive_complete(ndev, nvdev: net_device, q_idx, |
1500 | tid: vmxferpage_packet->d.trans_id, status); |
1501 | |
1502 | return count; |
1503 | } |
1504 | |
1505 | static void netvsc_send_table(struct net_device *ndev, |
1506 | struct netvsc_device *nvscdev, |
1507 | const struct nvsp_message *nvmsg, |
1508 | u32 msglen) |
1509 | { |
1510 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1511 | u32 count, offset, *tab; |
1512 | int i; |
1513 | |
1514 | /* Ensure packet is big enough to read send_table fields */ |
1515 | if (msglen < sizeof(struct nvsp_message_header) + |
1516 | sizeof(struct nvsp_5_send_indirect_table)) { |
1517 | netdev_err(dev: ndev, format: "nvsp_v5_msg length too small: %u\n" , msglen); |
1518 | return; |
1519 | } |
1520 | |
1521 | count = nvmsg->msg.v5_msg.send_table.count; |
1522 | offset = nvmsg->msg.v5_msg.send_table.offset; |
1523 | |
1524 | if (count != VRSS_SEND_TAB_SIZE) { |
1525 | netdev_err(dev: ndev, format: "Received wrong send-table size:%u\n" , count); |
1526 | return; |
1527 | } |
1528 | |
1529 | /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be |
1530 | * wrong due to a host bug. So fix the offset here. |
1531 | */ |
1532 | if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 && |
1533 | msglen >= sizeof(struct nvsp_message_header) + |
1534 | sizeof(union nvsp_6_message_uber) + count * sizeof(u32)) |
1535 | offset = sizeof(struct nvsp_message_header) + |
1536 | sizeof(union nvsp_6_message_uber); |
1537 | |
1538 | /* Boundary check for all versions */ |
1539 | if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) { |
1540 | netdev_err(dev: ndev, format: "Received send-table offset too big:%u\n" , |
1541 | offset); |
1542 | return; |
1543 | } |
1544 | |
1545 | tab = (void *)nvmsg + offset; |
1546 | |
1547 | for (i = 0; i < count; i++) |
1548 | net_device_ctx->tx_table[i] = tab[i]; |
1549 | } |
1550 | |
1551 | static void netvsc_send_vf(struct net_device *ndev, |
1552 | const struct nvsp_message *nvmsg, |
1553 | u32 msglen) |
1554 | { |
1555 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1556 | |
1557 | /* Ensure packet is big enough to read its fields */ |
1558 | if (msglen < sizeof(struct nvsp_message_header) + |
1559 | sizeof(struct nvsp_4_send_vf_association)) { |
1560 | netdev_err(dev: ndev, format: "nvsp_v4_msg length too small: %u\n" , msglen); |
1561 | return; |
1562 | } |
1563 | |
1564 | net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; |
1565 | net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; |
1566 | |
1567 | if (net_device_ctx->vf_alloc) |
1568 | complete(&net_device_ctx->vf_add); |
1569 | |
1570 | netdev_info(dev: ndev, format: "VF slot %u %s\n" , |
1571 | net_device_ctx->vf_serial, |
1572 | net_device_ctx->vf_alloc ? "added" : "removed" ); |
1573 | } |
1574 | |
1575 | static void netvsc_receive_inband(struct net_device *ndev, |
1576 | struct netvsc_device *nvscdev, |
1577 | const struct vmpacket_descriptor *desc) |
1578 | { |
1579 | const struct nvsp_message *nvmsg = hv_pkt_data(desc); |
1580 | u32 msglen = hv_pkt_datalen(desc); |
1581 | |
1582 | /* Ensure packet is big enough to read header fields */ |
1583 | if (msglen < sizeof(struct nvsp_message_header)) { |
1584 | netdev_err(dev: ndev, format: "inband nvsp_message length too small: %u\n" , msglen); |
1585 | return; |
1586 | } |
1587 | |
1588 | switch (nvmsg->hdr.msg_type) { |
1589 | case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: |
1590 | netvsc_send_table(ndev, nvscdev, nvmsg, msglen); |
1591 | break; |
1592 | |
1593 | case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: |
1594 | if (hv_is_isolation_supported()) |
1595 | netdev_err(dev: ndev, format: "Ignore VF_ASSOCIATION msg from the host supporting isolation\n" ); |
1596 | else |
1597 | netvsc_send_vf(ndev, nvmsg, msglen); |
1598 | break; |
1599 | } |
1600 | } |
1601 | |
1602 | static int netvsc_process_raw_pkt(struct hv_device *device, |
1603 | struct netvsc_channel *nvchan, |
1604 | struct netvsc_device *net_device, |
1605 | struct net_device *ndev, |
1606 | const struct vmpacket_descriptor *desc, |
1607 | int budget) |
1608 | { |
1609 | struct vmbus_channel *channel = nvchan->channel; |
1610 | const struct nvsp_message *nvmsg = hv_pkt_data(desc); |
1611 | |
1612 | trace_nvsp_recv(ndev, chan: channel, msg: nvmsg); |
1613 | |
1614 | switch (desc->type) { |
1615 | case VM_PKT_COMP: |
1616 | netvsc_send_completion(ndev, net_device, incoming_channel: channel, desc, budget); |
1617 | break; |
1618 | |
1619 | case VM_PKT_DATA_USING_XFER_PAGES: |
1620 | return netvsc_receive(ndev, net_device, nvchan, desc); |
1621 | |
1622 | case VM_PKT_DATA_INBAND: |
1623 | netvsc_receive_inband(ndev, nvscdev: net_device, desc); |
1624 | break; |
1625 | |
1626 | default: |
1627 | netdev_err(dev: ndev, format: "unhandled packet type %d, tid %llx\n" , |
1628 | desc->type, desc->trans_id); |
1629 | break; |
1630 | } |
1631 | |
1632 | return 0; |
1633 | } |
1634 | |
1635 | static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel) |
1636 | { |
1637 | struct vmbus_channel *primary = channel->primary_channel; |
1638 | |
1639 | return primary ? primary->device_obj : channel->device_obj; |
1640 | } |
1641 | |
1642 | /* Network processing softirq |
1643 | * Process data in incoming ring buffer from host |
1644 | * Stops when ring is empty or budget is met or exceeded. |
1645 | */ |
1646 | int netvsc_poll(struct napi_struct *napi, int budget) |
1647 | { |
1648 | struct netvsc_channel *nvchan |
1649 | = container_of(napi, struct netvsc_channel, napi); |
1650 | struct netvsc_device *net_device = nvchan->net_device; |
1651 | struct vmbus_channel *channel = nvchan->channel; |
1652 | struct hv_device *device = netvsc_channel_to_device(channel); |
1653 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1654 | int work_done = 0; |
1655 | int ret; |
1656 | |
1657 | /* If starting a new interval */ |
1658 | if (!nvchan->desc) |
1659 | nvchan->desc = hv_pkt_iter_first(channel); |
1660 | |
1661 | nvchan->xdp_flush = false; |
1662 | |
1663 | while (nvchan->desc && work_done < budget) { |
1664 | work_done += netvsc_process_raw_pkt(device, nvchan, net_device, |
1665 | ndev, desc: nvchan->desc, budget); |
1666 | nvchan->desc = hv_pkt_iter_next(channel, pkt: nvchan->desc); |
1667 | } |
1668 | |
1669 | if (nvchan->xdp_flush) |
1670 | xdp_do_flush(); |
1671 | |
1672 | /* Send any pending receive completions */ |
1673 | ret = send_recv_completions(ndev, nvdev: net_device, nvchan); |
1674 | |
1675 | /* If it did not exhaust NAPI budget this time |
1676 | * and not doing busy poll |
1677 | * then re-enable host interrupts |
1678 | * and reschedule if ring is not empty |
1679 | * or sending receive completion failed. |
1680 | */ |
1681 | if (work_done < budget && |
1682 | napi_complete_done(n: napi, work_done) && |
1683 | (ret || hv_end_read(rbi: &channel->inbound)) && |
1684 | napi_schedule_prep(n: napi)) { |
1685 | hv_begin_read(rbi: &channel->inbound); |
1686 | __napi_schedule(n: napi); |
1687 | } |
1688 | |
1689 | /* Driver may overshoot since multiple packets per descriptor */ |
1690 | return min(work_done, budget); |
1691 | } |
1692 | |
1693 | /* Call back when data is available in host ring buffer. |
1694 | * Processing is deferred until network softirq (NAPI) |
1695 | */ |
1696 | void netvsc_channel_cb(void *context) |
1697 | { |
1698 | struct netvsc_channel *nvchan = context; |
1699 | struct vmbus_channel *channel = nvchan->channel; |
1700 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1701 | |
1702 | /* preload first vmpacket descriptor */ |
1703 | prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index); |
1704 | |
1705 | if (napi_schedule_prep(n: &nvchan->napi)) { |
1706 | /* disable interrupts from host */ |
1707 | hv_begin_read(rbi); |
1708 | |
1709 | __napi_schedule_irqoff(n: &nvchan->napi); |
1710 | } |
1711 | } |
1712 | |
1713 | /* |
1714 | * netvsc_device_add - Callback when the device belonging to this |
1715 | * driver is added |
1716 | */ |
1717 | struct netvsc_device *netvsc_device_add(struct hv_device *device, |
1718 | const struct netvsc_device_info *device_info) |
1719 | { |
1720 | int i, ret = 0; |
1721 | struct netvsc_device *net_device; |
1722 | struct net_device *ndev = hv_get_drvdata(dev: device); |
1723 | struct net_device_context *net_device_ctx = netdev_priv(dev: ndev); |
1724 | |
1725 | net_device = alloc_net_device(); |
1726 | if (!net_device) |
1727 | return ERR_PTR(error: -ENOMEM); |
1728 | |
1729 | for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) |
1730 | net_device_ctx->tx_table[i] = 0; |
1731 | |
1732 | /* Because the device uses NAPI, all the interrupt batching and |
1733 | * control is done via Net softirq, not the channel handling |
1734 | */ |
1735 | set_channel_read_mode(c: device->channel, mode: HV_CALL_ISR); |
1736 | |
1737 | /* If we're reopening the device we may have multiple queues, fill the |
1738 | * chn_table with the default channel to use it before subchannels are |
1739 | * opened. |
1740 | * Initialize the channel state before we open; |
1741 | * we can be interrupted as soon as we open the channel. |
1742 | */ |
1743 | |
1744 | for (i = 0; i < VRSS_CHANNEL_MAX; i++) { |
1745 | struct netvsc_channel *nvchan = &net_device->chan_table[i]; |
1746 | |
1747 | nvchan->channel = device->channel; |
1748 | nvchan->net_device = net_device; |
1749 | u64_stats_init(syncp: &nvchan->tx_stats.syncp); |
1750 | u64_stats_init(syncp: &nvchan->rx_stats.syncp); |
1751 | |
1752 | ret = xdp_rxq_info_reg(xdp_rxq: &nvchan->xdp_rxq, dev: ndev, queue_index: i, napi_id: 0); |
1753 | |
1754 | if (ret) { |
1755 | netdev_err(dev: ndev, format: "xdp_rxq_info_reg fail: %d\n" , ret); |
1756 | goto cleanup2; |
1757 | } |
1758 | |
1759 | ret = xdp_rxq_info_reg_mem_model(xdp_rxq: &nvchan->xdp_rxq, |
1760 | type: MEM_TYPE_PAGE_SHARED, NULL); |
1761 | |
1762 | if (ret) { |
1763 | netdev_err(dev: ndev, format: "xdp reg_mem_model fail: %d\n" , ret); |
1764 | goto cleanup2; |
1765 | } |
1766 | } |
1767 | |
1768 | /* Enable NAPI handler before init callbacks */ |
1769 | netif_napi_add(dev: ndev, napi: &net_device->chan_table[0].napi, poll: netvsc_poll); |
1770 | |
1771 | /* Open the channel */ |
1772 | device->channel->next_request_id_callback = vmbus_next_request_id; |
1773 | device->channel->request_addr_callback = vmbus_request_addr; |
1774 | device->channel->rqstor_size = netvsc_rqstor_size(ringbytes: netvsc_ring_bytes); |
1775 | device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE; |
1776 | |
1777 | ret = vmbus_open(channel: device->channel, send_ringbuffersize: netvsc_ring_bytes, |
1778 | recv_ringbuffersize: netvsc_ring_bytes, NULL, userdatalen: 0, |
1779 | onchannel_callback: netvsc_channel_cb, context: net_device->chan_table); |
1780 | |
1781 | if (ret != 0) { |
1782 | netdev_err(dev: ndev, format: "unable to open channel: %d\n" , ret); |
1783 | goto cleanup; |
1784 | } |
1785 | |
1786 | /* Channel is opened */ |
1787 | netdev_dbg(ndev, "hv_netvsc channel opened successfully\n" ); |
1788 | |
1789 | napi_enable(n: &net_device->chan_table[0].napi); |
1790 | |
1791 | /* Connect with the NetVsp */ |
1792 | ret = netvsc_connect_vsp(device, net_device, device_info); |
1793 | if (ret != 0) { |
1794 | netdev_err(dev: ndev, |
1795 | format: "unable to connect to NetVSP - %d\n" , ret); |
1796 | goto close; |
1797 | } |
1798 | |
1799 | /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is |
1800 | * populated. |
1801 | */ |
1802 | rcu_assign_pointer(net_device_ctx->nvdev, net_device); |
1803 | |
1804 | return net_device; |
1805 | |
1806 | close: |
1807 | RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); |
1808 | napi_disable(n: &net_device->chan_table[0].napi); |
1809 | |
1810 | /* Now, we can close the channel safely */ |
1811 | vmbus_close(channel: device->channel); |
1812 | |
1813 | cleanup: |
1814 | netif_napi_del(napi: &net_device->chan_table[0].napi); |
1815 | |
1816 | cleanup2: |
1817 | free_netvsc_device(head: &net_device->rcu); |
1818 | |
1819 | return ERR_PTR(error: ret); |
1820 | } |
1821 | |