1 /* $OpenBSD: vioblk.c,v 1.21 2024/11/27 22:32:14 kirill Exp $ */
2
3 /*
4 * Copyright (c) 2023 Dave Voutila <dv@openbsd.org>
5 * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <stdint.h>
20
21 #include <dev/pci/virtio_pcireg.h>
22 #include <dev/pv/vioblkreg.h>
23 #include <dev/pv/virtioreg.h>
24
25 #include <errno.h>
26 #include <event.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "atomicio.h"
32 #include "pci.h"
33 #include "virtio.h"
34 #include "vmd.h"
35
36 extern char *__progname;
37 extern struct vmd_vm *current_vm;
38 struct iovec io_v[VIOBLK_QUEUE_SIZE];
39
40 static const char *disk_type(int);
41 static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
42 int8_t *);
43 static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
44
45 static void vioblk_update_qs(struct vioblk_dev *);
46 static void vioblk_update_qa(struct vioblk_dev *);
47 static int vioblk_notifyq(struct vioblk_dev *);
48 static ssize_t vioblk_rw(struct vioblk_dev *, int, off_t,
49 struct vring_desc *, struct vring_desc **);
50
51 static void dev_dispatch_vm(int, short, void *);
52 static void handle_sync_io(int, short, void *);
53
54 static const char *
disk_type(int type)55 disk_type(int type)
56 {
57 switch (type) {
58 case VMDF_RAW: return "raw";
59 case VMDF_QCOW2: return "qcow2";
60 }
61 return "unknown";
62 }
63
64 __dead void
vioblk_main(int fd,int fd_vmm)65 vioblk_main(int fd, int fd_vmm)
66 {
67 struct virtio_dev dev;
68 struct vioblk_dev *vioblk = NULL;
69 struct viodev_msg msg;
70 struct vmd_vm vm;
71 struct vm_create_params *vcp;
72 ssize_t sz;
73 off_t szp = 0;
74 int i, ret, type;
75
76 /*
77 * stdio - needed for read/write to disk fds and channels to the vm.
78 * vmm + proc - needed to create shared vm mappings.
79 */
80 if (pledge("stdio vmm proc", NULL) == -1)
81 fatal("pledge");
82
83 /* Zero and initialize io work queue. */
84 memset(io_v, 0, nitems(io_v)*sizeof(io_v[0]));
85
86 /* Receive our virtio_dev, mostly preconfigured. */
87 memset(&dev, 0, sizeof(dev));
88 sz = atomicio(read, fd, &dev, sizeof(dev));
89 if (sz != sizeof(dev)) {
90 ret = errno;
91 log_warn("failed to receive vioblk");
92 goto fail;
93 }
94 if (dev.dev_type != VMD_DEVTYPE_DISK) {
95 ret = EINVAL;
96 log_warn("received invalid device type");
97 goto fail;
98 }
99 dev.sync_fd = fd;
100 vioblk = &dev.vioblk;
101
102 log_debug("%s: got viblk dev. num disk fds = %d, sync fd = %d, "
103 "async fd = %d, capacity = %lld seg_max = %u, vmm fd = %d",
104 __func__, vioblk->ndisk_fd, dev.sync_fd, dev.async_fd,
105 vioblk->capacity, vioblk->seg_max, fd_vmm);
106
107 /* Receive our vm information from the vm process. */
108 memset(&vm, 0, sizeof(vm));
109 sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm));
110 if (sz != sizeof(vm)) {
111 ret = EIO;
112 log_warnx("failed to receive vm details");
113 goto fail;
114 }
115 vcp = &vm.vm_params.vmc_params;
116 current_vm = &vm;
117
118 setproctitle("%s/vioblk%d", vcp->vcp_name, vioblk->idx);
119 log_procinit("vm/%s/vioblk%d", vcp->vcp_name, vioblk->idx);
120
121 /* Now that we have our vm information, we can remap memory. */
122 ret = remap_guest_mem(&vm, fd_vmm);
123 if (ret) {
124 log_warnx("failed to remap guest memory");
125 goto fail;
126 }
127
128 /*
129 * We no longer need /dev/vmm access.
130 */
131 close_fd(fd_vmm);
132 if (pledge("stdio", NULL) == -1)
133 fatal("pledge2");
134
135 /* Initialize the virtio block abstractions. */
136 type = vm.vm_params.vmc_disktypes[vioblk->idx];
137 switch (type) {
138 case VMDF_RAW:
139 ret = virtio_raw_init(&vioblk->file, &szp, vioblk->disk_fd,
140 vioblk->ndisk_fd);
141 break;
142 case VMDF_QCOW2:
143 ret = virtio_qcow2_init(&vioblk->file, &szp, vioblk->disk_fd,
144 vioblk->ndisk_fd);
145 break;
146 default:
147 log_warnx("invalid disk image type");
148 goto fail;
149 }
150 if (ret || szp < 0) {
151 log_warnx("failed to init disk %s image", disk_type(type));
152 goto fail;
153 }
154 vioblk->capacity = szp / 512;
155 log_debug("%s: initialized vioblk%d with %s image (capacity=%lld)",
156 __func__, vioblk->idx, disk_type(type), vioblk->capacity);
157
158 /* If we're restoring hardware, reinitialize the virtqueue hva. */
159 if (vm.vm_state & VM_STATE_RECEIVED)
160 vioblk_update_qa(vioblk);
161
162 /* Initialize libevent so we can start wiring event handlers. */
163 event_init();
164
165 /* Wire up an async imsg channel. */
166 log_debug("%s: wiring in async vm event handler (fd=%d)", __func__,
167 dev.async_fd);
168 if (vm_device_pipe(&dev, dev_dispatch_vm, NULL)) {
169 ret = EIO;
170 log_warnx("vm_device_pipe");
171 goto fail;
172 }
173
174 /* Configure our sync channel event handler. */
175 log_debug("%s: wiring in sync channel handler (fd=%d)", __func__,
176 dev.sync_fd);
177 if (imsgbuf_init(&dev.sync_iev.ibuf, dev.sync_fd) == -1) {
178 log_warn("imsgbuf_init");
179 goto fail;
180 }
181 imsgbuf_allow_fdpass(&dev.sync_iev.ibuf);
182 dev.sync_iev.handler = handle_sync_io;
183 dev.sync_iev.data = &dev;
184 dev.sync_iev.events = EV_READ;
185 imsg_event_add(&dev.sync_iev);
186
187 /* Send a ready message over the sync channel. */
188 log_debug("%s: telling vm %s device is ready", __func__, vcp->vcp_name);
189 memset(&msg, 0, sizeof(msg));
190 msg.type = VIODEV_MSG_READY;
191 imsg_compose_event(&dev.sync_iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
192 sizeof(msg));
193
194 /* Send a ready message over the async channel. */
195 log_debug("%s: sending heartbeat", __func__);
196 ret = imsg_compose_event(&dev.async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
197 &msg, sizeof(msg));
198 if (ret == -1) {
199 log_warnx("%s: failed to send async ready message!", __func__);
200 goto fail;
201 }
202
203 /* Engage the event loop! */
204 ret = event_dispatch();
205
206 if (ret == 0) {
207 /* Clean shutdown. */
208 close_fd(dev.sync_fd);
209 close_fd(dev.async_fd);
210 for (i = 0; i < vioblk->ndisk_fd; i++)
211 close_fd(vioblk->disk_fd[i]);
212 _exit(0);
213 /* NOTREACHED */
214 }
215
216 fail:
217 /* Try letting the vm know we've failed something. */
218 memset(&msg, 0, sizeof(msg));
219 msg.type = VIODEV_MSG_ERROR;
220 msg.data = ret;
221 imsg_compose(&dev.sync_iev.ibuf, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
222 sizeof(msg));
223 imsgbuf_flush(&dev.sync_iev.ibuf);
224
225 close_fd(dev.sync_fd);
226 close_fd(dev.async_fd);
227 if (vioblk != NULL) {
228 for (i = 0; i < vioblk->ndisk_fd; i++)
229 close_fd(vioblk->disk_fd[i]);
230 }
231 _exit(ret);
232 /* NOTREACHED */
233 }
234
235 const char *
vioblk_cmd_name(uint32_t type)236 vioblk_cmd_name(uint32_t type)
237 {
238 switch (type) {
239 case VIRTIO_BLK_T_IN: return "read";
240 case VIRTIO_BLK_T_OUT: return "write";
241 case VIRTIO_BLK_T_SCSI_CMD: return "scsi read";
242 case VIRTIO_BLK_T_SCSI_CMD_OUT: return "scsi write";
243 case VIRTIO_BLK_T_FLUSH: return "flush";
244 case VIRTIO_BLK_T_FLUSH_OUT: return "flush out";
245 case VIRTIO_BLK_T_GET_ID: return "get id";
246 default: return "unknown";
247 }
248 }
249
250 static void
vioblk_update_qa(struct vioblk_dev * dev)251 vioblk_update_qa(struct vioblk_dev *dev)
252 {
253 struct virtio_vq_info *vq_info;
254 void *hva = NULL;
255
256 /* Invalid queue? */
257 if (dev->cfg.queue_select > 0)
258 return;
259
260 vq_info = &dev->vq[dev->cfg.queue_select];
261 vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE;
262
263 hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE));
264 if (hva == NULL)
265 fatal("vioblk_update_qa");
266 vq_info->q_hva = hva;
267 }
268
269 static void
vioblk_update_qs(struct vioblk_dev * dev)270 vioblk_update_qs(struct vioblk_dev *dev)
271 {
272 struct virtio_vq_info *vq_info;
273
274 /* Invalid queue? */
275 if (dev->cfg.queue_select > 0) {
276 dev->cfg.queue_size = 0;
277 return;
278 }
279
280 vq_info = &dev->vq[dev->cfg.queue_select];
281
282 /* Update queue pfn/size based on queue select */
283 dev->cfg.queue_pfn = vq_info->q_gpa >> 12;
284 dev->cfg.queue_size = vq_info->qs;
285 }
286
287 /*
288 * Process virtqueue notifications. If an unrecoverable error occurs, puts
289 * device into a "needs reset" state.
290 *
291 * Returns 1 if an we need to assert an IRQ.
292 */
293 static int
vioblk_notifyq(struct vioblk_dev * dev)294 vioblk_notifyq(struct vioblk_dev *dev)
295 {
296 uint32_t cmd_len;
297 uint16_t idx, cmd_desc_idx;
298 uint8_t ds;
299 off_t offset;
300 ssize_t sz;
301 int is_write, notify = 0, i;
302 char *vr;
303 struct vring_desc *table, *desc;
304 struct vring_avail *avail;
305 struct vring_used *used;
306 struct virtio_blk_req_hdr *cmd;
307 struct virtio_vq_info *vq_info;
308
309 /* Invalid queue? */
310 if (dev->cfg.queue_notify > 0)
311 return (0);
312
313 vq_info = &dev->vq[dev->cfg.queue_notify];
314 idx = vq_info->last_avail;
315 vr = vq_info->q_hva;
316 if (vr == NULL)
317 fatalx("%s: null vring", __func__);
318
319 /* Compute offsets in table of descriptors, avail ring, and used ring */
320 table = (struct vring_desc *)(vr);
321 avail = (struct vring_avail *)(vr + vq_info->vq_availoffset);
322 used = (struct vring_used *)(vr + vq_info->vq_usedoffset);
323
324 while (idx != avail->idx) {
325 /* Retrieve Command descriptor. */
326 cmd_desc_idx = avail->ring[idx & VIOBLK_QUEUE_MASK];
327 desc = &table[cmd_desc_idx];
328 cmd_len = desc->len;
329
330 /*
331 * Validate Command descriptor. It should be chained to another
332 * descriptor and not be itself writable.
333 */
334 if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
335 log_warnx("%s: unchained cmd descriptor", __func__);
336 goto reset;
337 }
338 if (DESC_WRITABLE(desc)) {
339 log_warnx("%s: invalid cmd descriptor state", __func__);
340 goto reset;
341 }
342
343 /* Retrieve the vioblk command request. */
344 cmd = hvaddr_mem(desc->addr, sizeof(*cmd));
345 if (cmd == NULL)
346 goto reset;
347
348 /* Advance to the 2nd descriptor. */
349 desc = &table[desc->next & VIOBLK_QUEUE_MASK];
350
351 /* Process each available command & chain. */
352 switch (cmd->type) {
353 case VIRTIO_BLK_T_IN:
354 case VIRTIO_BLK_T_OUT:
355 /* Read (IN) & Write (OUT) */
356 is_write = (cmd->type == VIRTIO_BLK_T_OUT) ? 1 : 0;
357 offset = cmd->sector * VIRTIO_BLK_SECTOR_SIZE;
358 sz = vioblk_rw(dev, is_write, offset, table, &desc);
359 if (sz == -1)
360 ds = VIRTIO_BLK_S_IOERR;
361 else
362 ds = VIRTIO_BLK_S_OK;
363 break;
364 case VIRTIO_BLK_T_GET_ID:
365 /*
366 * We don't support this command yet. While it's not
367 * officially part of the virtio spec (will be in v1.2)
368 * there's no feature to negotiate. Linux drivers will
369 * often send this command regardless.
370 */
371 ds = VIRTIO_BLK_S_UNSUPP;
372 break;
373 default:
374 log_warnx("%s: unsupported vioblk command %d", __func__,
375 cmd->type);
376 ds = VIRTIO_BLK_S_UNSUPP;
377 break;
378 }
379
380 /* Advance to the end of the chain, if needed. */
381 i = 0;
382 while (desc->flags & VRING_DESC_F_NEXT) {
383 desc = &table[desc->next & VIOBLK_QUEUE_MASK];
384 if (++i >= VIOBLK_QUEUE_SIZE) {
385 /*
386 * If we encounter an infinite/looping chain,
387 * not much we can do but say we need a reset.
388 */
389 log_warnx("%s: descriptor chain overflow",
390 __func__);
391 goto reset;
392 }
393 }
394
395 /* Provide the status of our command processing. */
396 if (!DESC_WRITABLE(desc)) {
397 log_warnx("%s: status descriptor unwritable", __func__);
398 goto reset;
399 }
400 /* Overkill as ds is 1 byte, but validates gpa. */
401 if (write_mem(desc->addr, &ds, sizeof(ds)))
402 log_warnx("%s: can't write device status data "
403 "@ 0x%llx",__func__, desc->addr);
404
405 dev->cfg.isr_status |= 1;
406 notify = 1;
407
408 used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx;
409 used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_len;
410
411 __sync_synchronize();
412 used->idx++;
413 idx++;
414 }
415
416 vq_info->last_avail = idx;
417 return (notify);
418
419 reset:
420 /*
421 * When setting the "needs reset" flag, the driver is notified
422 * via a configuration change interrupt.
423 */
424 dev->cfg.device_status |= DEVICE_NEEDS_RESET;
425 dev->cfg.isr_status |= VIRTIO_CONFIG_ISR_CONFIG_CHANGE;
426 return (1);
427 }
428
429 static void
dev_dispatch_vm(int fd,short event,void * arg)430 dev_dispatch_vm(int fd, short event, void *arg)
431 {
432 struct virtio_dev *dev = (struct virtio_dev *)arg;
433 struct imsgev *iev = &dev->async_iev;
434 struct imsgbuf *ibuf = &iev->ibuf;
435 struct imsg imsg;
436 ssize_t n = 0;
437 int verbose;
438
439 if (event & EV_READ) {
440 if ((n = imsgbuf_read(ibuf)) == -1)
441 fatal("%s: imsgbuf_read", __func__);
442 if (n == 0) {
443 /* this pipe is dead, so remove the event handler */
444 log_debug("%s: pipe dead (EV_READ)", __func__);
445 event_del(&iev->ev);
446 event_loopexit(NULL);
447 return;
448 }
449 }
450
451 if (event & EV_WRITE) {
452 if (imsgbuf_write(ibuf) == -1) {
453 if (errno == EPIPE) {
454 /* this pipe is dead, remove the handler */
455 log_debug("%s: pipe dead (EV_WRITE)", __func__);
456 event_del(&iev->ev);
457 event_loopexit(NULL);
458 return;
459 }
460 fatal("%s: imsgbuf_write", __func__);
461 }
462 }
463
464 for (;;) {
465 if ((n = imsg_get(ibuf, &imsg)) == -1)
466 fatal("%s: imsg_get", __func__);
467 if (n == 0)
468 break;
469
470 switch (imsg.hdr.type) {
471 case IMSG_VMDOP_PAUSE_VM:
472 log_debug("%s: pausing", __func__);
473 break;
474 case IMSG_VMDOP_UNPAUSE_VM:
475 log_debug("%s: unpausing", __func__);
476 break;
477 case IMSG_CTL_VERBOSE:
478 IMSG_SIZE_CHECK(&imsg, &verbose);
479 memcpy(&verbose, imsg.data, sizeof(verbose));
480 log_setverbose(verbose);
481 break;
482 default:
483 log_warnx("%s: unhandled imsg type %d", __func__,
484 imsg.hdr.type);
485 break;
486 }
487 imsg_free(&imsg);
488 }
489 imsg_event_add(iev);
490 }
491
492 /*
493 * Synchronous IO handler.
494 *
495 */
496 static void
handle_sync_io(int fd,short event,void * arg)497 handle_sync_io(int fd, short event, void *arg)
498 {
499 struct virtio_dev *dev = (struct virtio_dev *)arg;
500 struct imsgev *iev = &dev->sync_iev;
501 struct imsgbuf *ibuf = &iev->ibuf;
502 struct viodev_msg msg;
503 struct imsg imsg;
504 ssize_t n;
505 int8_t intr = INTR_STATE_NOOP;
506
507 if (event & EV_READ) {
508 if ((n = imsgbuf_read(ibuf)) == -1)
509 fatal("%s: imsgbuf_read", __func__);
510 if (n == 0) {
511 /* this pipe is dead, so remove the event handler */
512 log_debug("%s: vioblk pipe dead (EV_READ)", __func__);
513 event_del(&iev->ev);
514 event_loopexit(NULL);
515 return;
516 }
517 }
518
519 if (event & EV_WRITE) {
520 if (imsgbuf_write(ibuf) == -1) {
521 if (errno == EPIPE) {
522 /* this pipe is dead, remove the handler */
523 log_debug("%s: pipe dead (EV_WRITE)", __func__);
524 event_del(&iev->ev);
525 event_loopexit(NULL);
526 return;
527 }
528 fatal("%s: imsgbuf_write", __func__);
529 }
530 }
531
532 for (;;) {
533 if ((n = imsg_get(ibuf, &imsg)) == -1)
534 fatalx("%s: imsg_get (n=%ld)", __func__, n);
535 if (n == 0)
536 break;
537
538 /* Unpack our message. They ALL should be dev messeges! */
539 IMSG_SIZE_CHECK(&imsg, &msg);
540 memcpy(&msg, imsg.data, sizeof(msg));
541 imsg_free(&imsg);
542
543 switch (msg.type) {
544 case VIODEV_MSG_DUMP:
545 /* Dump device */
546 n = atomicio(vwrite, dev->sync_fd, dev, sizeof(*dev));
547 if (n != sizeof(*dev)) {
548 log_warnx("%s: failed to dump vioblk device",
549 __func__);
550 break;
551 }
552 case VIODEV_MSG_IO_READ:
553 /* Read IO: make sure to send a reply */
554 msg.data = handle_io_read(&msg, dev, &intr);
555 msg.data_valid = 1;
556 msg.state = intr;
557 imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1, &msg,
558 sizeof(msg));
559 break;
560 case VIODEV_MSG_IO_WRITE:
561 /* Write IO: no reply needed */
562 if (handle_io_write(&msg, dev) == 1)
563 virtio_assert_irq(dev, 0);
564 break;
565 case VIODEV_MSG_SHUTDOWN:
566 event_del(&dev->sync_iev.ev);
567 event_loopbreak();
568 return;
569 default:
570 fatalx("%s: invalid msg type %d", __func__, msg.type);
571 }
572 }
573 imsg_event_add(iev);
574 }
575
576 static int
handle_io_write(struct viodev_msg * msg,struct virtio_dev * dev)577 handle_io_write(struct viodev_msg *msg, struct virtio_dev *dev)
578 {
579 struct vioblk_dev *vioblk = &dev->vioblk;
580 uint32_t data = msg->data;
581 int intr = 0;
582
583 switch (msg->reg) {
584 case VIRTIO_CONFIG_DEVICE_FEATURES:
585 case VIRTIO_CONFIG_QUEUE_SIZE:
586 case VIRTIO_CONFIG_ISR_STATUS:
587 log_warnx("%s: illegal write %x to %s", __progname, data,
588 virtio_reg_name(msg->reg));
589 break;
590 case VIRTIO_CONFIG_GUEST_FEATURES:
591 vioblk->cfg.guest_feature = data;
592 break;
593 case VIRTIO_CONFIG_QUEUE_PFN:
594 vioblk->cfg.queue_pfn = data;
595 vioblk_update_qa(vioblk);
596 break;
597 case VIRTIO_CONFIG_QUEUE_SELECT:
598 vioblk->cfg.queue_select = data;
599 vioblk_update_qs(vioblk);
600 break;
601 case VIRTIO_CONFIG_QUEUE_NOTIFY:
602 /* XXX We should be stricter about status checks. */
603 if (!(vioblk->cfg.device_status & DEVICE_NEEDS_RESET)) {
604 vioblk->cfg.queue_notify = data;
605 if (vioblk_notifyq(vioblk))
606 intr = 1;
607 }
608 break;
609 case VIRTIO_CONFIG_DEVICE_STATUS:
610 vioblk->cfg.device_status = data;
611 if (vioblk->cfg.device_status == 0) {
612 vioblk->cfg.guest_feature = 0;
613 vioblk->cfg.queue_pfn = 0;
614 vioblk_update_qa(vioblk);
615 vioblk->cfg.queue_size = 0;
616 vioblk_update_qs(vioblk);
617 vioblk->cfg.queue_select = 0;
618 vioblk->cfg.queue_notify = 0;
619 vioblk->cfg.isr_status = 0;
620 vioblk->vq[0].last_avail = 0;
621 vioblk->vq[0].notified_avail = 0;
622 virtio_deassert_irq(dev, msg->vcpu);
623 }
624 break;
625 default:
626 break;
627 }
628 return (intr);
629 }
630
631 static uint32_t
handle_io_read(struct viodev_msg * msg,struct virtio_dev * dev,int8_t * intr)632 handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t *intr)
633 {
634 struct vioblk_dev *vioblk = &dev->vioblk;
635 uint8_t sz = msg->io_sz;
636 uint32_t data;
637
638 if (msg->data_valid)
639 data = msg->data;
640 else
641 data = 0;
642
643 switch (msg->reg) {
644 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI:
645 switch (sz) {
646 case 4:
647 data = (uint32_t)(vioblk->capacity);
648 break;
649 case 2:
650 data &= 0xFFFF0000;
651 data |= (uint32_t)(vioblk->capacity) & 0xFFFF;
652 break;
653 case 1:
654 data &= 0xFFFFFF00;
655 data |= (uint32_t)(vioblk->capacity) & 0xFF;
656 break;
657 }
658 /* XXX handle invalid sz */
659 break;
660 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 1:
661 if (sz == 1) {
662 data &= 0xFFFFFF00;
663 data |= (uint32_t)(vioblk->capacity >> 8) & 0xFF;
664 }
665 /* XXX handle invalid sz */
666 break;
667 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 2:
668 if (sz == 1) {
669 data &= 0xFFFFFF00;
670 data |= (uint32_t)(vioblk->capacity >> 16) & 0xFF;
671 } else if (sz == 2) {
672 data &= 0xFFFF0000;
673 data |= (uint32_t)(vioblk->capacity >> 16) & 0xFFFF;
674 }
675 /* XXX handle invalid sz */
676 break;
677 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 3:
678 if (sz == 1) {
679 data &= 0xFFFFFF00;
680 data |= (uint32_t)(vioblk->capacity >> 24) & 0xFF;
681 }
682 /* XXX handle invalid sz */
683 break;
684 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 4:
685 switch (sz) {
686 case 4:
687 data = (uint32_t)(vioblk->capacity >> 32);
688 break;
689 case 2:
690 data &= 0xFFFF0000;
691 data |= (uint32_t)(vioblk->capacity >> 32) & 0xFFFF;
692 break;
693 case 1:
694 data &= 0xFFFFFF00;
695 data |= (uint32_t)(vioblk->capacity >> 32) & 0xFF;
696 break;
697 }
698 /* XXX handle invalid sz */
699 break;
700 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 5:
701 if (sz == 1) {
702 data &= 0xFFFFFF00;
703 data |= (uint32_t)(vioblk->capacity >> 40) & 0xFF;
704 }
705 /* XXX handle invalid sz */
706 break;
707 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 6:
708 if (sz == 1) {
709 data &= 0xFFFFFF00;
710 data |= (uint32_t)(vioblk->capacity >> 48) & 0xFF;
711 } else if (sz == 2) {
712 data &= 0xFFFF0000;
713 data |= (uint32_t)(vioblk->capacity >> 48) & 0xFFFF;
714 }
715 /* XXX handle invalid sz */
716 break;
717 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 7:
718 if (sz == 1) {
719 data &= 0xFFFFFF00;
720 data |= (uint32_t)(vioblk->capacity >> 56) & 0xFF;
721 }
722 /* XXX handle invalid sz */
723 break;
724 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 12:
725 switch (sz) {
726 case 4:
727 data = (uint32_t)(vioblk->seg_max);
728 break;
729 case 2:
730 data &= 0xFFFF0000;
731 data |= (uint32_t)(vioblk->seg_max) & 0xFFFF;
732 break;
733 case 1:
734 data &= 0xFFFFFF00;
735 data |= (uint32_t)(vioblk->seg_max) & 0xFF;
736 break;
737 }
738 /* XXX handle invalid sz */
739 break;
740 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 13:
741 if (sz == 1) {
742 data &= 0xFFFFFF00;
743 data |= (uint32_t)(vioblk->seg_max >> 8) & 0xFF;
744 }
745 /* XXX handle invalid sz */
746 break;
747 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 14:
748 if (sz == 1) {
749 data &= 0xFFFFFF00;
750 data |= (uint32_t)(vioblk->seg_max >> 16) & 0xFF;
751 } else if (sz == 2) {
752 data &= 0xFFFF0000;
753 data |= (uint32_t)(vioblk->seg_max >> 16)
754 & 0xFFFF;
755 }
756 /* XXX handle invalid sz */
757 break;
758 case VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI + 15:
759 if (sz == 1) {
760 data &= 0xFFFFFF00;
761 data |= (uint32_t)(vioblk->seg_max >> 24) & 0xFF;
762 }
763 /* XXX handle invalid sz */
764 break;
765 case VIRTIO_CONFIG_DEVICE_FEATURES:
766 data = vioblk->cfg.device_feature;
767 break;
768 case VIRTIO_CONFIG_GUEST_FEATURES:
769 data = vioblk->cfg.guest_feature;
770 break;
771 case VIRTIO_CONFIG_QUEUE_PFN:
772 data = vioblk->cfg.queue_pfn;
773 break;
774 case VIRTIO_CONFIG_QUEUE_SIZE:
775 data = vioblk->cfg.queue_size;
776 break;
777 case VIRTIO_CONFIG_QUEUE_SELECT:
778 data = vioblk->cfg.queue_select;
779 break;
780 case VIRTIO_CONFIG_QUEUE_NOTIFY:
781 data = vioblk->cfg.queue_notify;
782 break;
783 case VIRTIO_CONFIG_DEVICE_STATUS:
784 data = vioblk->cfg.device_status;
785 break;
786 case VIRTIO_CONFIG_ISR_STATUS:
787 data = vioblk->cfg.isr_status;
788 vioblk->cfg.isr_status = 0;
789 if (intr != NULL)
790 *intr = INTR_STATE_DEASSERT;
791 break;
792 default:
793 return (0xFFFFFFFF);
794 }
795
796 return (data);
797 }
798
799 /*
800 * Emulate read/write io. Walks the descriptor chain, collecting io work and
801 * then emulates the read or write.
802 *
803 * On success, returns bytes read/written.
804 * On error, returns -1 and descriptor (desc) remains at its current position.
805 */
806 static ssize_t
vioblk_rw(struct vioblk_dev * dev,int is_write,off_t offset,struct vring_desc * desc_tbl,struct vring_desc ** desc)807 vioblk_rw(struct vioblk_dev *dev, int is_write, off_t offset,
808 struct vring_desc *desc_tbl, struct vring_desc **desc)
809 {
810 struct iovec *iov = NULL;
811 ssize_t sz = 0;
812 size_t io_idx = 0; /* Index into iovec workqueue. */
813 size_t xfer_sz = 0; /* Total accumulated io bytes. */
814
815 do {
816 iov = &io_v[io_idx];
817
818 /*
819 * Reads require writable descriptors. Writes require
820 * non-writeable descriptors.
821 */
822 if ((!is_write) ^ DESC_WRITABLE(*desc)) {
823 log_warnx("%s: invalid descriptor for %s command",
824 __func__, is_write ? "write" : "read");
825 return (-1);
826 }
827
828 /* Collect the IO segment information. */
829 iov->iov_len = (size_t)(*desc)->len;
830 iov->iov_base = hvaddr_mem((*desc)->addr, iov->iov_len);
831 if (iov->iov_base == NULL)
832 return (-1);
833
834 /* Move our counters. */
835 xfer_sz += iov->iov_len;
836 io_idx++;
837
838 /* Guard against infinite chains */
839 if (io_idx >= nitems(io_v)) {
840 log_warnx("%s: descriptor table "
841 "invalid", __func__);
842 return (-1);
843 }
844
845 /* Advance to the next descriptor. */
846 *desc = &desc_tbl[(*desc)->next & VIOBLK_QUEUE_MASK];
847 } while ((*desc)->flags & VRING_DESC_F_NEXT);
848
849 /*
850 * Validate the requested block io operation alignment and size.
851 * Checking offset is just an extra caution as it is derived from
852 * a disk sector and is done for completeness in bounds checking.
853 */
854 if (offset % VIRTIO_BLK_SECTOR_SIZE != 0 &&
855 xfer_sz % VIRTIO_BLK_SECTOR_SIZE != 0) {
856 log_warnx("%s: unaligned read", __func__);
857 return (-1);
858 }
859 if (xfer_sz > SSIZE_MAX) { /* iovec_copyin limit */
860 log_warnx("%s: invalid %s size: %zu", __func__,
861 is_write ? "write" : "read", xfer_sz);
862 return (-1);
863 }
864
865 /* Emulate the Read or Write operation. */
866 if (is_write)
867 sz = dev->file.pwritev(dev->file.p, io_v, io_idx, offset);
868 else
869 sz = dev->file.preadv(dev->file.p, io_v, io_idx, offset);
870 if (sz != (ssize_t)xfer_sz) {
871 log_warnx("%s: %s failure at offset 0x%llx, xfer_sz=%zu, "
872 "sz=%ld", __func__, (is_write ? "write" : "read"), offset,
873 xfer_sz, sz);
874 return (-1);
875 }
876
877 return (sz);
878 }
879