xref: /trueos/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 24ff9ca75ce66f9f0ff5d3167cca04d1487ac4c1)
1 /*-
2  * Copyright (c) 2009-2012 Microsoft Corp.
3  * Copyright (c) 2010-2012 Citrix Inc.
4  * Copyright (c) 2012 NetApp Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /**
32  * HyperV vmbus network VSC (virtual services client) module
33  *
34  */
35 
36 
37 #include <sys/param.h>
38 #include <sys/kernel.h>
39 #include <sys/socket.h>
40 #include <sys/lock.h>
41 #include <net/if.h>
42 #include <net/if_arp.h>
43 #include <machine/bus.h>
44 #include <machine/atomic.h>
45 
46 #include <dev/hyperv/include/hyperv.h>
47 #include "hv_net_vsc.h"
48 #include "hv_rndis.h"
49 #include "hv_rndis_filter.h"
50 
51 
52 /*
53  * Forward declarations
54  */
55 static void hv_nv_on_channel_callback(void *context);
56 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
57 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
58 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
59 static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
60 static int  hv_nv_connect_to_vsp(struct hv_device *device);
61 static void hv_nv_on_send_completion(struct hv_device *device,
62 				     hv_vm_packet_descriptor *pkt);
63 static void hv_nv_on_receive(struct hv_device *device,
64 			     hv_vm_packet_descriptor *pkt);
65 static void hv_nv_send_receive_completion(struct hv_device *device,
66 					  uint64_t tid);
67 
68 
69 /*
70  *
71  */
72 static inline netvsc_dev *
hv_nv_alloc_net_device(struct hv_device * device)73 hv_nv_alloc_net_device(struct hv_device *device)
74 {
75 	netvsc_dev *net_dev;
76 	hn_softc_t *sc = device_get_softc(device->device);
77 
78 	net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
79 	if (net_dev == NULL) {
80 		return (NULL);
81 	}
82 
83 	net_dev->dev = device;
84 	net_dev->destroy = FALSE;
85 	sc->net_dev = net_dev;
86 
87 	return (net_dev);
88 }
89 
90 /*
91  *
92  */
93 static inline netvsc_dev *
hv_nv_get_outbound_net_device(struct hv_device * device)94 hv_nv_get_outbound_net_device(struct hv_device *device)
95 {
96 	hn_softc_t *sc = device_get_softc(device->device);
97 	netvsc_dev *net_dev = sc->net_dev;;
98 
99 	if ((net_dev != NULL) && net_dev->destroy) {
100 		return (NULL);
101 	}
102 
103 	return (net_dev);
104 }
105 
106 /*
107  *
108  */
109 static inline netvsc_dev *
hv_nv_get_inbound_net_device(struct hv_device * device)110 hv_nv_get_inbound_net_device(struct hv_device *device)
111 {
112 	hn_softc_t *sc = device_get_softc(device->device);
113 	netvsc_dev *net_dev = sc->net_dev;;
114 
115 	if (net_dev == NULL) {
116 		return (net_dev);
117 	}
118 	/*
119 	 * When the device is being destroyed; we only
120 	 * permit incoming packets if and only if there
121 	 * are outstanding sends.
122 	 */
123 	if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
124 		return (NULL);
125 	}
126 
127 	return (net_dev);
128 }
129 
130 /*
131  * Net VSC initialize receive buffer with net VSP
132  *
133  * Net VSP:  Network virtual services client, also known as the
134  *     Hyper-V extensible switch and the synthetic data path.
135  */
136 static int
hv_nv_init_rx_buffer_with_net_vsp(struct hv_device * device)137 hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
138 {
139 	netvsc_dev *net_dev;
140 	nvsp_msg *init_pkt;
141 	int ret = 0;
142 
143 	net_dev = hv_nv_get_outbound_net_device(device);
144 	if (!net_dev) {
145 		return (ENODEV);
146 	}
147 
148 	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
149 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
150 	if (net_dev->rx_buf == NULL) {
151 		ret = ENOMEM;
152 		goto cleanup;
153 	}
154 
155 	/*
156 	 * Establish the GPADL handle for this buffer on this channel.
157 	 * Note:  This call uses the vmbus connection rather than the
158 	 * channel to establish the gpadl handle.
159 	 * GPADL:  Guest physical address descriptor list.
160 	 */
161 	ret = hv_vmbus_channel_establish_gpadl(
162 		device->channel, net_dev->rx_buf,
163 		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
164 	if (ret != 0) {
165 		goto cleanup;
166 	}
167 
168 	/* sema_wait(&ext->channel_init_sema); KYS CHECK */
169 
170 	/* Notify the NetVsp of the gpadl handle */
171 	init_pkt = &net_dev->channel_init_packet;
172 
173 	memset(init_pkt, 0, sizeof(nvsp_msg));
174 
175 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
176 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
177 	    net_dev->rx_buf_gpadl_handle;
178 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
179 	    NETVSC_RECEIVE_BUFFER_ID;
180 
181 	/* Send the gpadl notification request */
182 
183 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
184 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
185 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
186 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
187 	if (ret != 0) {
188 		goto cleanup;
189 	}
190 
191 	sema_wait(&net_dev->channel_init_sema);
192 
193 	/* Check the response */
194 	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
195 	    != nvsp_status_success) {
196 		ret = EINVAL;
197 		goto cleanup;
198 	}
199 
200 	net_dev->rx_section_count =
201 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
202 
203 	net_dev->rx_sections = malloc(net_dev->rx_section_count *
204 	    sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
205 	if (net_dev->rx_sections == NULL) {
206 		ret = EINVAL;
207 		goto cleanup;
208 	}
209 	memcpy(net_dev->rx_sections,
210 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
211 	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
212 
213 
214 	/*
215 	 * For first release, there should only be 1 section that represents
216 	 * the entire receive buffer
217 	 */
218 	if (net_dev->rx_section_count != 1
219 	    || net_dev->rx_sections->offset != 0) {
220 		ret = EINVAL;
221 		goto cleanup;
222 	}
223 
224 	goto exit;
225 
226 cleanup:
227 	hv_nv_destroy_rx_buffer(net_dev);
228 
229 exit:
230 	return (ret);
231 }
232 
233 /*
234  * Net VSC initialize send buffer with net VSP
235  */
236 static int
hv_nv_init_send_buffer_with_net_vsp(struct hv_device * device)237 hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
238 {
239 	netvsc_dev *net_dev;
240 	nvsp_msg *init_pkt;
241 	int ret = 0;
242 
243 	net_dev = hv_nv_get_outbound_net_device(device);
244 	if (!net_dev) {
245 		return (ENODEV);
246 	}
247 
248 	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
249 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
250 	if (net_dev->send_buf == NULL) {
251 		ret = ENOMEM;
252 		goto cleanup;
253 	}
254 
255 	/*
256 	 * Establish the gpadl handle for this buffer on this channel.
257 	 * Note:  This call uses the vmbus connection rather than the
258 	 * channel to establish the gpadl handle.
259 	 */
260 	ret = hv_vmbus_channel_establish_gpadl(device->channel,
261 	    net_dev->send_buf, net_dev->send_buf_size,
262 	    &net_dev->send_buf_gpadl_handle);
263 	if (ret != 0) {
264 		goto cleanup;
265 	}
266 
267 	/* Notify the NetVsp of the gpadl handle */
268 
269 	init_pkt = &net_dev->channel_init_packet;
270 
271 	memset(init_pkt, 0, sizeof(nvsp_msg));
272 
273 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
274 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
275 	    net_dev->send_buf_gpadl_handle;
276 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
277 	    NETVSC_SEND_BUFFER_ID;
278 
279 	/* Send the gpadl notification request */
280 
281 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
282 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
283 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
284 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
285 	if (ret != 0) {
286 		goto cleanup;
287 	}
288 
289 	sema_wait(&net_dev->channel_init_sema);
290 
291 	/* Check the response */
292 	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
293 	    != nvsp_status_success) {
294 		ret = EINVAL;
295 		goto cleanup;
296 	}
297 
298 	net_dev->send_section_size =
299 	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
300 
301 	goto exit;
302 
303 cleanup:
304 	hv_nv_destroy_send_buffer(net_dev);
305 
306 exit:
307 	return (ret);
308 }
309 
310 /*
311  * Net VSC destroy receive buffer
312  */
313 static int
hv_nv_destroy_rx_buffer(netvsc_dev * net_dev)314 hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
315 {
316 	nvsp_msg *revoke_pkt;
317 	int ret = 0;
318 
319 	/*
320 	 * If we got a section count, it means we received a
321 	 * send_rx_buf_complete msg
322 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
323 	 * we need to send a revoke msg here
324 	 */
325 	if (net_dev->rx_section_count) {
326 		/* Send the revoke receive buffer */
327 		revoke_pkt = &net_dev->revoke_packet;
328 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
329 
330 		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
331 		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
332 		    NETVSC_RECEIVE_BUFFER_ID;
333 
334 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
335 		    revoke_pkt, sizeof(nvsp_msg),
336 		    (uint64_t)(uintptr_t)revoke_pkt,
337 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
338 
339 		/*
340 		 * If we failed here, we might as well return and have a leak
341 		 * rather than continue and a bugchk
342 		 */
343 		if (ret != 0) {
344 			return (ret);
345 		}
346 	}
347 
348 	/* Tear down the gpadl on the vsp end */
349 	if (net_dev->rx_buf_gpadl_handle) {
350 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
351 		    net_dev->rx_buf_gpadl_handle);
352 		/*
353 		 * If we failed here, we might as well return and have a leak
354 		 * rather than continue and a bugchk
355 		 */
356 		if (ret != 0) {
357 			return (ret);
358 		}
359 		net_dev->rx_buf_gpadl_handle = 0;
360 	}
361 
362 	if (net_dev->rx_buf) {
363 		/* Free up the receive buffer */
364 		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
365 		net_dev->rx_buf = NULL;
366 	}
367 
368 	if (net_dev->rx_sections) {
369 		free(net_dev->rx_sections, M_DEVBUF);
370 		net_dev->rx_sections = NULL;
371 		net_dev->rx_section_count = 0;
372 	}
373 
374 	return (ret);
375 }
376 
377 /*
378  * Net VSC destroy send buffer
379  */
380 static int
hv_nv_destroy_send_buffer(netvsc_dev * net_dev)381 hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
382 {
383 	nvsp_msg *revoke_pkt;
384 	int ret = 0;
385 
386 	/*
387 	 * If we got a section count, it means we received a
388 	 * send_rx_buf_complete msg
389 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
390 	 * we need to send a revoke msg here
391 	 */
392 	if (net_dev->send_section_size) {
393 		/* Send the revoke send buffer */
394 		revoke_pkt = &net_dev->revoke_packet;
395 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
396 
397 		revoke_pkt->hdr.msg_type =
398 		    nvsp_msg_1_type_revoke_send_buf;
399 		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
400 		    NETVSC_SEND_BUFFER_ID;
401 
402 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
403 		    revoke_pkt, sizeof(nvsp_msg),
404 		    (uint64_t)(uintptr_t)revoke_pkt,
405 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
406 		/*
407 		 * If we failed here, we might as well return and have a leak
408 		 * rather than continue and a bugchk
409 		 */
410 		if (ret != 0) {
411 			return (ret);
412 		}
413 	}
414 
415 	/* Tear down the gpadl on the vsp end */
416 	if (net_dev->send_buf_gpadl_handle) {
417 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
418 		    net_dev->send_buf_gpadl_handle);
419 
420 		/*
421 		 * If we failed here, we might as well return and have a leak
422 		 * rather than continue and a bugchk
423 		 */
424 		if (ret != 0) {
425 			return (ret);
426 		}
427 		net_dev->send_buf_gpadl_handle = 0;
428 	}
429 
430 	if (net_dev->send_buf) {
431 		/* Free up the receive buffer */
432 		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
433 		net_dev->send_buf = NULL;
434 	}
435 
436 	return (ret);
437 }
438 
439 
440 /*
441  * Attempt to negotiate the caller-specified NVSP version
442  *
443  * For NVSP v2, Server 2008 R2 does not set
444  * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
445  * to the negotiated version, so we cannot rely on that.
446  */
447 static int
hv_nv_negotiate_nvsp_protocol(struct hv_device * device,netvsc_dev * net_dev,uint32_t nvsp_ver)448 hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
449 			      uint32_t nvsp_ver)
450 {
451 	nvsp_msg *init_pkt;
452 	int ret;
453 
454 	init_pkt = &net_dev->channel_init_packet;
455 	memset(init_pkt, 0, sizeof(nvsp_msg));
456 	init_pkt->hdr.msg_type = nvsp_msg_type_init;
457 
458 	/*
459 	 * Specify parameter as the only acceptable protocol version
460 	 */
461 	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
462 	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
463 
464 	/* Send the init request */
465 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
466 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
467 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
468 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
469 	if (ret != 0)
470 		return (-1);
471 
472 	sema_wait(&net_dev->channel_init_sema);
473 
474 	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
475 		return (EINVAL);
476 
477 	return (0);
478 }
479 
480 /*
481  * Send NDIS version 2 config packet containing MTU.
482  *
483  * Not valid for NDIS version 1.
484  */
485 static int
hv_nv_send_ndis_config(struct hv_device * device,uint32_t mtu)486 hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
487 {
488 	netvsc_dev *net_dev;
489 	nvsp_msg *init_pkt;
490 	int ret;
491 
492 	net_dev = hv_nv_get_outbound_net_device(device);
493 	if (!net_dev)
494 		return (-ENODEV);
495 
496 	/*
497 	 * Set up configuration packet, write MTU
498 	 * Indicate we are capable of handling VLAN tags
499 	 */
500 	init_pkt = &net_dev->channel_init_packet;
501 	memset(init_pkt, 0, sizeof(nvsp_msg));
502 	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
503 	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
504 	init_pkt->
505 		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
506 		= 1;
507 
508 	/* Send the configuration packet */
509 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
510 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
511 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
512 	if (ret != 0)
513 		return (-EINVAL);
514 
515 	return (0);
516 }
517 
518 /*
519  * Net VSC connect to VSP
520  */
521 static int
hv_nv_connect_to_vsp(struct hv_device * device)522 hv_nv_connect_to_vsp(struct hv_device *device)
523 {
524 	netvsc_dev *net_dev;
525 	nvsp_msg *init_pkt;
526 	uint32_t nvsp_vers;
527 	uint32_t ndis_version;
528 	int ret = 0;
529 	device_t dev = device->device;
530 	hn_softc_t *sc = device_get_softc(dev);
531 	struct ifnet *ifp = sc->arpcom.ac_ifp;
532 
533 	net_dev = hv_nv_get_outbound_net_device(device);
534 	if (!net_dev) {
535 		return (ENODEV);
536 	}
537 
538 	/*
539 	 * Negotiate the NVSP version.  Try NVSP v2 first.
540 	 */
541 	nvsp_vers = NVSP_PROTOCOL_VERSION_2;
542 	ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
543 	if (ret != 0) {
544 		/* NVSP v2 failed, try NVSP v1 */
545 		nvsp_vers = NVSP_PROTOCOL_VERSION_1;
546 		ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
547 		if (ret != 0) {
548 			/* NVSP v1 failed, return bad status */
549 			return (ret);
550 		}
551 	}
552 	net_dev->nvsp_version = nvsp_vers;
553 
554 	/*
555 	 * Set the MTU if supported by this NVSP protocol version
556 	 * This needs to be right after the NVSP init message per Haiyang
557 	 */
558 	if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
559 		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
560 
561 	/*
562 	 * Send the NDIS version
563 	 */
564 	init_pkt = &net_dev->channel_init_packet;
565 
566 	memset(init_pkt, 0, sizeof(nvsp_msg));
567 
568 	/*
569 	 * Updated to version 5.1, minimum, for VLAN per Haiyang
570 	 */
571 	ndis_version = NDIS_VERSION;
572 
573 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
574 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
575 	    (ndis_version & 0xFFFF0000) >> 16;
576 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
577 	    ndis_version & 0xFFFF;
578 
579 	/* Send the init request */
580 
581 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
582 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
583 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
584 	if (ret != 0) {
585 		goto cleanup;
586 	}
587 	/*
588 	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
589 	 * uses KMCL which acknowledges packet (completion packet)
590 	 * since our Vmbus always set the
591 	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
592 	 */
593 	/* sema_wait(&NetVscChannel->channel_init_sema); */
594 
595 	/* Post the big receive buffer to NetVSP */
596 	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
597 	if (ret == 0)
598 		ret = hv_nv_init_send_buffer_with_net_vsp(device);
599 
600 cleanup:
601 	return (ret);
602 }
603 
604 /*
605  * Net VSC disconnect from VSP
606  */
607 static void
hv_nv_disconnect_from_vsp(netvsc_dev * net_dev)608 hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
609 {
610 	hv_nv_destroy_rx_buffer(net_dev);
611 	hv_nv_destroy_send_buffer(net_dev);
612 }
613 
614 /*
615  * Net VSC on device add
616  *
617  * Callback when the device belonging to this driver is added
618  */
619 netvsc_dev *
hv_nv_on_device_add(struct hv_device * device,void * additional_info)620 hv_nv_on_device_add(struct hv_device *device, void *additional_info)
621 {
622 	netvsc_dev *net_dev;
623 	netvsc_packet *packet;
624 	netvsc_packet *next_packet;
625 	int i, ret = 0;
626 
627 	net_dev = hv_nv_alloc_net_device(device);
628 	if (!net_dev)
629 		goto cleanup;
630 
631 	/* Initialize the NetVSC channel extension */
632 	net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
633 	mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
634 	    MTX_SPIN | MTX_RECURSE);
635 
636 	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
637 
638 	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
639 	STAILQ_INIT(&net_dev->myrx_packet_list);
640 
641 	/*
642 	 * malloc a sufficient number of netvsc_packet buffers to hold
643 	 * a packet list.  Add them to the netvsc device packet queue.
644 	 */
645 	for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
646 		packet = malloc(sizeof(netvsc_packet) +
647 		    (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
648 		    M_DEVBUF, M_NOWAIT | M_ZERO);
649 		if (!packet) {
650 			break;
651 		}
652 		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
653 		    mylist_entry);
654 	}
655 
656 	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
657 
658 	/*
659 	 * Open the channel
660 	 */
661 	ret = hv_vmbus_channel_open(device->channel,
662 	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
663 	    NULL, 0, hv_nv_on_channel_callback, device);
664 	if (ret != 0)
665 		goto cleanup;
666 
667 	/*
668 	 * Connect with the NetVsp
669 	 */
670 	ret = hv_nv_connect_to_vsp(device);
671 	if (ret != 0)
672 		goto close;
673 
674 	return (net_dev);
675 
676 close:
677 	/* Now, we can close the channel safely */
678 
679 	hv_vmbus_channel_close(device->channel);
680 
681 cleanup:
682 	/*
683 	 * Free the packet buffers on the netvsc device packet queue.
684 	 * Release other resources.
685 	 */
686 	if (net_dev) {
687 		sema_destroy(&net_dev->channel_init_sema);
688 
689 		packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
690 		while (packet != NULL) {
691 			next_packet = STAILQ_NEXT(packet, mylist_entry);
692 			free(packet, M_DEVBUF);
693 			packet = next_packet;
694 		}
695 		/* Reset the list to initial state */
696 		STAILQ_INIT(&net_dev->myrx_packet_list);
697 
698 		mtx_destroy(&net_dev->rx_pkt_list_lock);
699 
700 		free(net_dev, M_DEVBUF);
701 	}
702 
703 	return (NULL);
704 }
705 
706 /*
707  * Net VSC on device remove
708  */
709 int
hv_nv_on_device_remove(struct hv_device * device,boolean_t destroy_channel)710 hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
711 {
712 	netvsc_packet *net_vsc_pkt;
713 	netvsc_packet *next_net_vsc_pkt;
714 	hn_softc_t *sc = device_get_softc(device->device);
715 	netvsc_dev *net_dev = sc->net_dev;;
716 
717 	/* Stop outbound traffic ie sends and receives completions */
718 	mtx_lock(&device->channel->inbound_lock);
719 	net_dev->destroy = TRUE;
720 	mtx_unlock(&device->channel->inbound_lock);
721 
722 	/* Wait for all send completions */
723 	while (net_dev->num_outstanding_sends) {
724 		DELAY(100);
725 	}
726 
727 	hv_nv_disconnect_from_vsp(net_dev);
728 
729 	/* At this point, no one should be accessing net_dev except in here */
730 
731 	/* Now, we can close the channel safely */
732 
733 	if (!destroy_channel) {
734 		device->channel->state =
735 		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
736 	}
737 
738 	hv_vmbus_channel_close(device->channel);
739 
740 	/* Release all resources */
741 	net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
742 	while (net_vsc_pkt != NULL) {
743 		next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
744 		free(net_vsc_pkt, M_DEVBUF);
745 		net_vsc_pkt = next_net_vsc_pkt;
746 	}
747 
748 	/* Reset the list to initial state */
749 	STAILQ_INIT(&net_dev->myrx_packet_list);
750 
751 	mtx_destroy(&net_dev->rx_pkt_list_lock);
752 	sema_destroy(&net_dev->channel_init_sema);
753 	free(net_dev, M_DEVBUF);
754 
755 	return (0);
756 }
757 
758 /*
759  * Net VSC on send completion
760  */
761 static void
hv_nv_on_send_completion(struct hv_device * device,hv_vm_packet_descriptor * pkt)762 hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
763 {
764 	netvsc_dev *net_dev;
765 	nvsp_msg *nvsp_msg_pkt;
766 	netvsc_packet *net_vsc_pkt;
767 
768 	net_dev = hv_nv_get_inbound_net_device(device);
769 	if (!net_dev) {
770 		return;
771 	}
772 
773 	nvsp_msg_pkt =
774 	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
775 
776 	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
777 		|| nvsp_msg_pkt->hdr.msg_type
778 			== nvsp_msg_1_type_send_rx_buf_complete
779 		|| nvsp_msg_pkt->hdr.msg_type
780 			== nvsp_msg_1_type_send_send_buf_complete) {
781 		/* Copy the response back */
782 		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
783 		    sizeof(nvsp_msg));
784 		sema_post(&net_dev->channel_init_sema);
785 	} else if (nvsp_msg_pkt->hdr.msg_type ==
786 				   nvsp_msg_1_type_send_rndis_pkt_complete) {
787 		/* Get the send context */
788 		net_vsc_pkt =
789 		    (netvsc_packet *)(unsigned long)pkt->transaction_id;
790 
791 		/* Notify the layer above us */
792 		net_vsc_pkt->compl.send.on_send_completion(
793 		    net_vsc_pkt->compl.send.send_completion_context);
794 
795 		atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
796 	}
797 }
798 
799 /*
800  * Net VSC on send
801  * Sends a packet on the specified Hyper-V device.
802  * Returns 0 on success, non-zero on failure.
803  */
804 int
hv_nv_on_send(struct hv_device * device,netvsc_packet * pkt)805 hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
806 {
807 	netvsc_dev *net_dev;
808 	nvsp_msg send_msg;
809 	int ret;
810 
811 	net_dev = hv_nv_get_outbound_net_device(device);
812 	if (!net_dev)
813 		return (ENODEV);
814 
815 	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
816 	if (pkt->is_data_pkt) {
817 		/* 0 is RMC_DATA */
818 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
819 	} else {
820 		/* 1 is RMC_CONTROL */
821 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
822 	}
823 
824 	/* Not using send buffer section */
825 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
826 	    0xFFFFFFFF;
827 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
828 
829 	if (pkt->page_buf_count) {
830 		ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
831 		    pkt->page_buffers, pkt->page_buf_count,
832 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
833 	} else {
834 		ret = hv_vmbus_channel_send_packet(device->channel,
835 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
836 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
837 		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
838 	}
839 
840 	/* Record outstanding send only if send_packet() succeeded */
841 	if (ret == 0)
842 		atomic_add_int(&net_dev->num_outstanding_sends, 1);
843 
844 	return (ret);
845 }
846 
847 /*
848  * Net VSC on receive
849  *
850  * In the FreeBSD Hyper-V virtual world, this function deals exclusively
851  * with virtual addresses.
852  */
853 static void
hv_nv_on_receive(struct hv_device * device,hv_vm_packet_descriptor * pkt)854 hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
855 {
856 	netvsc_dev *net_dev;
857 	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
858 	nvsp_msg *nvsp_msg_pkt;
859 	netvsc_packet *net_vsc_pkt = NULL;
860 	unsigned long start;
861 	xfer_page_packet *xfer_page_pkt = NULL;
862 	STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
863 	    STAILQ_HEAD_INITIALIZER(mylist_head);
864 	int count = 0;
865 	int i = 0;
866 
867 	net_dev = hv_nv_get_inbound_net_device(device);
868 	if (!net_dev)
869 		return;
870 
871 	/*
872 	 * All inbound packets other than send completion should be
873 	 * xfer page packet.
874 	 */
875 	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
876 		return;
877 
878 	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
879 		+ (pkt->data_offset8 << 3));
880 
881 	/* Make sure this is a valid nvsp packet */
882 	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
883 		return;
884 
885 	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
886 
887 	if (vm_xfer_page_pkt->transfer_page_set_id
888 		!= NETVSC_RECEIVE_BUFFER_ID) {
889 		return;
890 	}
891 
892 	STAILQ_INIT(&mylist_head);
893 
894 	/*
895 	 * Grab free packets (range count + 1) to represent this xfer page
896 	 * packet.  +1 to represent the xfer page packet itself.  We grab it
897 	 * here so that we know exactly how many we can fulfill.
898 	 */
899 	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
900 	while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {
901 		net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
902 		STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
903 
904 		STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
905 
906 		if (++count == vm_xfer_page_pkt->range_count + 1)
907 			break;
908 	}
909 
910 	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
911 
912 	/*
913 	 * We need at least 2 netvsc pkts (1 to represent the xfer page
914 	 * and at least 1 for the range) i.e. we can handle some of the
915 	 * xfer page packet ranges...
916 	 */
917 	if (count < 2) {
918 		/* Return netvsc packet to the freelist */
919 		mtx_lock_spin(&net_dev->rx_pkt_list_lock);
920 		for (i=count; i != 0; i--) {
921 			net_vsc_pkt = STAILQ_FIRST(&mylist_head);
922 			STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
923 
924 			STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
925 			    net_vsc_pkt, mylist_entry);
926 		}
927 		mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
928 
929 		hv_nv_send_receive_completion(device,
930 		    vm_xfer_page_pkt->d.transaction_id);
931 
932 		return;
933 	}
934 
935 	/* Take the first packet in the list */
936 	xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
937 	STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
938 
939 	/* This is how many data packets we can supply */
940 	xfer_page_pkt->count = count - 1;
941 
942 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
943 	for (i=0; i < (count - 1); i++) {
944 		net_vsc_pkt = STAILQ_FIRST(&mylist_head);
945 		STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
946 
947 		/*
948 		 * Initialize the netvsc packet
949 		 */
950 		net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
951 		net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
952 		net_vsc_pkt->device = device;
953 		/* Save this so that we can send it back */
954 		net_vsc_pkt->compl.rx.rx_completion_tid =
955 		    vm_xfer_page_pkt->d.transaction_id;
956 
957 		net_vsc_pkt->tot_data_buf_len =
958 		    vm_xfer_page_pkt->ranges[i].byte_count;
959 		net_vsc_pkt->page_buf_count = 1;
960 
961 		net_vsc_pkt->page_buffers[0].length =
962 		    vm_xfer_page_pkt->ranges[i].byte_count;
963 
964 		/* The virtual address of the packet in the receive buffer */
965 		start = ((unsigned long)net_dev->rx_buf +
966 		    vm_xfer_page_pkt->ranges[i].byte_offset);
967 		start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
968 
969 		/* Page number of the virtual page containing packet start */
970 		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
971 
972 		/* Calculate the page relative offset */
973 		net_vsc_pkt->page_buffers[0].offset =
974 		    vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
975 
976 		/*
977 		 * In this implementation, we are dealing with virtual
978 		 * addresses exclusively.  Since we aren't using physical
979 		 * addresses at all, we don't care if a packet crosses a
980 		 * page boundary.  For this reason, the original code to
981 		 * check for and handle page crossings has been removed.
982 		 */
983 
984 		/*
985 		 * Pass it to the upper layer.  The receive completion call
986 		 * has been moved into this function.
987 		 */
988 		hv_rf_on_receive(device, net_vsc_pkt);
989 
990 		/*
991 		 * Moved completion call back here so that all received
992 		 * messages (not just data messages) will trigger a response
993 		 * message back to the host.
994 		 */
995 		hv_nv_on_receive_completion(net_vsc_pkt);
996 	}
997 }
998 
999 /*
1000  * Net VSC send receive completion
1001  */
1002 static void
hv_nv_send_receive_completion(struct hv_device * device,uint64_t tid)1003 hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
1004 {
1005 	nvsp_msg rx_comp_msg;
1006 	int retries = 0;
1007 	int ret = 0;
1008 
1009 	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
1010 
1011 	/* Pass in the status */
1012 	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
1013 	    nvsp_status_success;
1014 
1015 retry_send_cmplt:
1016 	/* Send the completion */
1017 	ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
1018 	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
1019 	if (ret == 0) {
1020 		/* success */
1021 		/* no-op */
1022 	} else if (ret == EAGAIN) {
1023 		/* no more room... wait a bit and attempt to retry 3 times */
1024 		retries++;
1025 
1026 		if (retries < 4) {
1027 			DELAY(100);
1028 			goto retry_send_cmplt;
1029 		}
1030 	}
1031 }
1032 
1033 /*
1034  * Net VSC on receive completion
1035  *
1036  * Send a receive completion packet to RNDIS device (ie NetVsp)
1037  */
1038 void
hv_nv_on_receive_completion(void * context)1039 hv_nv_on_receive_completion(void *context)
1040 {
1041 	netvsc_packet *packet = (netvsc_packet *)context;
1042 	struct hv_device *device = (struct hv_device *)packet->device;
1043 	netvsc_dev    *net_dev;
1044 	uint64_t       tid = 0;
1045 	boolean_t send_rx_completion = FALSE;
1046 
1047 	/*
1048 	 * Even though it seems logical to do a hv_nv_get_outbound_net_device()
1049 	 * here to send out receive completion, we are using
1050 	 * hv_nv_get_inbound_net_device() since we may have disabled
1051 	 * outbound traffic already.
1052 	 */
1053 	net_dev = hv_nv_get_inbound_net_device(device);
1054 	if (net_dev == NULL)
1055 		return;
1056 
1057 	/* Overloading use of the lock. */
1058 	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
1059 
1060 	packet->xfer_page_pkt->count--;
1061 
1062 	/*
1063 	 * Last one in the line that represent 1 xfer page packet.
1064 	 * Return the xfer page packet itself to the free list.
1065 	 */
1066 	if (packet->xfer_page_pkt->count == 0) {
1067 		send_rx_completion = TRUE;
1068 		tid = packet->compl.rx.rx_completion_tid;
1069 		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
1070 		    (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
1071 	}
1072 
1073 	/* Put the packet back on the free list */
1074 	STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
1075 	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
1076 
1077 	/* Send a receive completion for the xfer page packet */
1078 	if (send_rx_completion)
1079 		hv_nv_send_receive_completion(device, tid);
1080 }
1081 
1082 /*
1083  * Net VSC on channel callback
1084  */
1085 static void
hv_nv_on_channel_callback(void * context)1086 hv_nv_on_channel_callback(void *context)
1087 {
1088 	/* Fixme:  Magic number */
1089 	const int net_pkt_size = 2048;
1090 	struct hv_device *device = (struct hv_device *)context;
1091 	netvsc_dev *net_dev;
1092 	uint32_t bytes_rxed;
1093 	uint64_t request_id;
1094 	uint8_t  *packet;
1095 	hv_vm_packet_descriptor *desc;
1096 	uint8_t *buffer;
1097 	int     bufferlen = net_pkt_size;
1098 	int     ret = 0;
1099 
1100 	packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
1101 	if (!packet)
1102 		return;
1103 
1104 	buffer = packet;
1105 
1106 	net_dev = hv_nv_get_inbound_net_device(device);
1107 	if (net_dev == NULL)
1108 		goto out;
1109 
1110 	do {
1111 		ret = hv_vmbus_channel_recv_packet_raw(device->channel,
1112 		    buffer, bufferlen, &bytes_rxed, &request_id);
1113 		if (ret == 0) {
1114 			if (bytes_rxed > 0) {
1115 				desc = (hv_vm_packet_descriptor *)buffer;
1116 				switch (desc->type) {
1117 				case HV_VMBUS_PACKET_TYPE_COMPLETION:
1118 					hv_nv_on_send_completion(device, desc);
1119 					break;
1120 				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
1121 					hv_nv_on_receive(device, desc);
1122 					break;
1123 				default:
1124 					break;
1125 				}
1126 			} else {
1127 				break;
1128 			}
1129 		} else if (ret == ENOBUFS) {
1130 			/* Handle large packet */
1131 			free(buffer, M_DEVBUF);
1132 			buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
1133 			if (buffer == NULL) {
1134 				break;
1135 			}
1136 			bufferlen = bytes_rxed;
1137 		}
1138 	} while (1);
1139 
1140 out:
1141 	free(buffer, M_DEVBUF);
1142 }
1143 
1144