xref: /dragonfly/sys/dev/virtual/nvmm/nvmm.c (revision 24f14bf420a8b016f2534fd3c7f3b9e45ce582e3)
1 /*
2  * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3  * All rights reserved.
4  *
5  * This code is part of the NVMM hypervisor.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 
32 #include <sys/kernel.h>
33 #include <sys/mman.h>
34 
35 #include "nvmm.h"
36 #include "nvmm_internal.h"
37 #include "nvmm_ioctl.h"
38 
39 static struct nvmm_machine machines[NVMM_MAX_MACHINES];
40 volatile unsigned int nmachines __cacheline_aligned;
41 
42 static const struct nvmm_impl *nvmm_impl_list[] = {
43 #if defined(__x86_64__)
44           &nvmm_x86_svm,      /* x86 AMD SVM */
45           &nvmm_x86_vmx       /* x86 Intel VMX */
46 #endif
47 };
48 
49 const struct nvmm_impl *nvmm_impl __read_mostly = NULL;
50 
51 struct nvmm_owner nvmm_root_owner;
52 
53 /* -------------------------------------------------------------------------- */
54 
55 static int
nvmm_machine_alloc(struct nvmm_machine ** ret)56 nvmm_machine_alloc(struct nvmm_machine **ret)
57 {
58           struct nvmm_machine *mach;
59           size_t i;
60 
61           for (i = 0; i < NVMM_MAX_MACHINES; i++) {
62                     mach = &machines[i];
63 
64                     os_rwl_wlock(&mach->lock);
65                     if (mach->present) {
66                               os_rwl_unlock(&mach->lock);
67                               continue;
68                     }
69 
70                     mach->present = true;
71                     mach->time = time_second;
72                     *ret = mach;
73                     os_atomic_inc_uint(&nmachines);
74                     return 0;
75           }
76 
77           return ENOBUFS;
78 }
79 
80 static void
nvmm_machine_free(struct nvmm_machine * mach)81 nvmm_machine_free(struct nvmm_machine *mach)
82 {
83           OS_ASSERT(os_rwl_wheld(&mach->lock));
84           OS_ASSERT(mach->present);
85           mach->present = false;
86           os_atomic_dec_uint(&nmachines);
87 }
88 
89 static int
nvmm_machine_get(struct nvmm_owner * owner,nvmm_machid_t machid,struct nvmm_machine ** ret,bool writer)90 nvmm_machine_get(struct nvmm_owner *owner, nvmm_machid_t machid,
91     struct nvmm_machine **ret, bool writer)
92 {
93           struct nvmm_machine *mach;
94 
95           if (__predict_false(machid >= NVMM_MAX_MACHINES)) {
96                     return EINVAL;
97           }
98           mach = &machines[machid];
99 
100           if (__predict_false(writer)) {
101                     os_rwl_wlock(&mach->lock);
102           } else {
103                     os_rwl_rlock(&mach->lock);
104           }
105           if (__predict_false(!mach->present)) {
106                     os_rwl_unlock(&mach->lock);
107                     return ENOENT;
108           }
109           if (__predict_false(mach->owner != owner &&
110                                   owner != &nvmm_root_owner)) {
111                     os_rwl_unlock(&mach->lock);
112                     return EPERM;
113           }
114           *ret = mach;
115 
116           return 0;
117 }
118 
119 static void
nvmm_machine_put(struct nvmm_machine * mach)120 nvmm_machine_put(struct nvmm_machine *mach)
121 {
122           os_rwl_unlock(&mach->lock);
123 }
124 
125 /* -------------------------------------------------------------------------- */
126 
127 static int
nvmm_vcpu_alloc(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)128 nvmm_vcpu_alloc(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
129     struct nvmm_cpu **ret)
130 {
131           struct nvmm_cpu *vcpu;
132 
133           if (cpuid >= NVMM_MAX_VCPUS) {
134                     return EINVAL;
135           }
136           vcpu = &mach->cpus[cpuid];
137 
138           os_mtx_lock(&vcpu->lock);
139           if (vcpu->present) {
140                     os_mtx_unlock(&vcpu->lock);
141                     return EBUSY;
142           }
143 
144           vcpu->present = true;
145           vcpu->comm = NULL;
146           vcpu->hcpu_last = -1;
147           *ret = vcpu;
148           return 0;
149 }
150 
151 static void
nvmm_vcpu_free(struct nvmm_machine * mach,struct nvmm_cpu * vcpu)152 nvmm_vcpu_free(struct nvmm_machine *mach, struct nvmm_cpu *vcpu)
153 {
154           OS_ASSERT(os_mtx_owned(&vcpu->lock));
155           vcpu->present = false;
156           if (vcpu->comm != NULL) {
157                     os_vmobj_unmap(os_kernel_map, (vaddr_t)vcpu->comm,
158                         (vaddr_t)vcpu->comm + NVMM_COMM_PAGE_SIZE, true);
159                     /*
160                      * Require userland to unmap the comm page from its address
161                      * space, because os_curproc_map at this point (fd close)
162                      * is not guaranteed to be the correct address space.
163                      */
164           }
165 }
166 
167 static int
nvmm_vcpu_get(struct nvmm_machine * mach,nvmm_cpuid_t cpuid,struct nvmm_cpu ** ret)168 nvmm_vcpu_get(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
169     struct nvmm_cpu **ret)
170 {
171           struct nvmm_cpu *vcpu;
172 
173           if (__predict_false(cpuid >= NVMM_MAX_VCPUS)) {
174                     return EINVAL;
175           }
176           vcpu = &mach->cpus[cpuid];
177 
178           os_mtx_lock(&vcpu->lock);
179           if (__predict_false(!vcpu->present)) {
180                     os_mtx_unlock(&vcpu->lock);
181                     return ENOENT;
182           }
183           *ret = vcpu;
184 
185           return 0;
186 }
187 
188 static void
nvmm_vcpu_put(struct nvmm_cpu * vcpu)189 nvmm_vcpu_put(struct nvmm_cpu *vcpu)
190 {
191           os_mtx_unlock(&vcpu->lock);
192 }
193 
194 /* -------------------------------------------------------------------------- */
195 
196 void
nvmm_kill_machines(struct nvmm_owner * owner)197 nvmm_kill_machines(struct nvmm_owner *owner)
198 {
199           struct nvmm_machine *mach;
200           struct nvmm_cpu *vcpu;
201           size_t i, j;
202           int error;
203 
204           for (i = 0; i < NVMM_MAX_MACHINES; i++) {
205                     mach = &machines[i];
206 
207                     os_rwl_wlock(&mach->lock);
208                     if (!mach->present || mach->owner != owner) {
209                               os_rwl_unlock(&mach->lock);
210                               continue;
211                     }
212 
213                     /* Kill it. */
214                     for (j = 0; j < NVMM_MAX_VCPUS; j++) {
215                               error = nvmm_vcpu_get(mach, j, &vcpu);
216                               if (error)
217                                         continue;
218                               (*nvmm_impl->vcpu_destroy)(mach, vcpu);
219                               nvmm_vcpu_free(mach, vcpu);
220                               nvmm_vcpu_put(vcpu);
221                               os_atomic_dec_uint(&mach->ncpus);
222                     }
223                     (*nvmm_impl->machine_destroy)(mach);
224                     os_vmspace_destroy(mach->vm);
225 
226                     /* Drop the kernel vmobj refs. */
227                     for (j = 0; j < NVMM_MAX_HMAPPINGS; j++) {
228                               if (!mach->hmap[j].present)
229                                         continue;
230                               os_vmobj_rel(mach->hmap[j].vmobj);
231                     }
232 
233                     nvmm_machine_free(mach);
234 
235                     os_rwl_unlock(&mach->lock);
236           }
237 }
238 
239 /* -------------------------------------------------------------------------- */
240 
241 static int
nvmm_capability(struct nvmm_owner * owner,struct nvmm_ioc_capability * args)242 nvmm_capability(struct nvmm_owner *owner, struct nvmm_ioc_capability *args)
243 {
244           args->cap.version = NVMM_KERN_VERSION;
245           args->cap.state_size = nvmm_impl->state_size;
246           args->cap.comm_size = NVMM_COMM_PAGE_SIZE;
247           args->cap.max_machines = NVMM_MAX_MACHINES;
248           args->cap.max_vcpus = NVMM_MAX_VCPUS;
249           args->cap.max_ram = NVMM_MAX_RAM;
250 
251           (*nvmm_impl->capability)(&args->cap);
252 
253           return 0;
254 }
255 
256 static int
nvmm_machine_create(struct nvmm_owner * owner,struct nvmm_ioc_machine_create * args)257 nvmm_machine_create(struct nvmm_owner *owner,
258     struct nvmm_ioc_machine_create *args)
259 {
260           struct nvmm_machine *mach;
261           int error;
262 
263           error = nvmm_machine_alloc(&mach);
264           if (error)
265                     return error;
266 
267           /* Curproc owns the machine. */
268           mach->owner = owner;
269 
270           /* Zero out the host mappings. */
271           memset(&mach->hmap, 0, sizeof(mach->hmap));
272 
273           /* Create the machine vmspace. */
274           mach->gpa_begin = 0;
275           mach->gpa_end = NVMM_MAX_RAM;
276           mach->vm = os_vmspace_create(mach->gpa_begin, mach->gpa_end);
277 
278           /* Create the comm vmobj. */
279           mach->commvmobj = os_vmobj_create(
280               NVMM_MAX_VCPUS * NVMM_COMM_PAGE_SIZE);
281 
282           (*nvmm_impl->machine_create)(mach);
283 
284           args->machid = mach->machid;
285           nvmm_machine_put(mach);
286 
287           return 0;
288 }
289 
290 static int
nvmm_machine_destroy(struct nvmm_owner * owner,struct nvmm_ioc_machine_destroy * args)291 nvmm_machine_destroy(struct nvmm_owner *owner,
292     struct nvmm_ioc_machine_destroy *args)
293 {
294           struct nvmm_machine *mach;
295           struct nvmm_cpu *vcpu;
296           int error;
297           size_t i;
298 
299           error = nvmm_machine_get(owner, args->machid, &mach, true);
300           if (error)
301                     return error;
302 
303           for (i = 0; i < NVMM_MAX_VCPUS; i++) {
304                     error = nvmm_vcpu_get(mach, i, &vcpu);
305                     if (error)
306                               continue;
307 
308                     (*nvmm_impl->vcpu_destroy)(mach, vcpu);
309                     nvmm_vcpu_free(mach, vcpu);
310                     nvmm_vcpu_put(vcpu);
311                     os_atomic_dec_uint(&mach->ncpus);
312           }
313 
314           (*nvmm_impl->machine_destroy)(mach);
315 
316           /* Free the machine vmspace. */
317           os_vmspace_destroy(mach->vm);
318 
319           /* Drop the kernel vmobj refs. */
320           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
321                     if (!mach->hmap[i].present)
322                               continue;
323                     os_vmobj_rel(mach->hmap[i].vmobj);
324           }
325 
326           nvmm_machine_free(mach);
327           nvmm_machine_put(mach);
328 
329           return 0;
330 }
331 
332 static int
nvmm_machine_configure(struct nvmm_owner * owner,struct nvmm_ioc_machine_configure * args)333 nvmm_machine_configure(struct nvmm_owner *owner,
334     struct nvmm_ioc_machine_configure *args)
335 {
336           struct nvmm_machine *mach;
337           size_t allocsz;
338           uint64_t op;
339           void *data;
340           int error;
341 
342           op = NVMM_MACH_CONF_MD(args->op);
343           if (__predict_false(op >= nvmm_impl->mach_conf_max)) {
344                     return EINVAL;
345           }
346 
347           allocsz = nvmm_impl->mach_conf_sizes[op];
348           data = os_mem_alloc(allocsz);
349 
350           error = nvmm_machine_get(owner, args->machid, &mach, true);
351           if (error) {
352                     os_mem_free(data, allocsz);
353                     return error;
354           }
355 
356           error = copyin(args->conf, data, allocsz);
357           if (error) {
358                     goto out;
359           }
360 
361           error = (*nvmm_impl->machine_configure)(mach, op, data);
362 
363 out:
364           nvmm_machine_put(mach);
365           os_mem_free(data, allocsz);
366           return error;
367 }
368 
369 static int
nvmm_vcpu_create(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_create * args)370 nvmm_vcpu_create(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_create *args)
371 {
372           struct nvmm_machine *mach;
373           struct nvmm_cpu *vcpu;
374           int error;
375 
376           error = nvmm_machine_get(owner, args->machid, &mach, false);
377           if (error)
378                     return error;
379 
380           error = nvmm_vcpu_alloc(mach, args->cpuid, &vcpu);
381           if (error)
382                     goto out;
383 
384           /* Map the comm page on the kernel side, as wired. */
385           error = os_vmobj_map(os_kernel_map, (vaddr_t *)&vcpu->comm,
386               NVMM_COMM_PAGE_SIZE, mach->commvmobj,
387               args->cpuid * NVMM_COMM_PAGE_SIZE, true /* wired */,
388               false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
389               PROT_READ | PROT_WRITE);
390           if (error) {
391                     nvmm_vcpu_free(mach, vcpu);
392                     nvmm_vcpu_put(vcpu);
393                     goto out;
394           }
395 
396           memset(vcpu->comm, 0, NVMM_COMM_PAGE_SIZE);
397 
398           /* Map the comm page on the user side, as pageable. */
399           error = os_vmobj_map(os_curproc_map, (vaddr_t *)&args->comm,
400               NVMM_COMM_PAGE_SIZE, mach->commvmobj,
401               args->cpuid * NVMM_COMM_PAGE_SIZE, false /* !wired */,
402               false /* !fixed */, true /* shared */, PROT_READ | PROT_WRITE,
403               PROT_READ | PROT_WRITE);
404           if (error) {
405                     nvmm_vcpu_free(mach, vcpu);
406                     nvmm_vcpu_put(vcpu);
407                     goto out;
408           }
409 
410           error = (*nvmm_impl->vcpu_create)(mach, vcpu);
411           if (error) {
412                     nvmm_vcpu_free(mach, vcpu);
413                     nvmm_vcpu_put(vcpu);
414                     goto out;
415           }
416 
417           nvmm_vcpu_put(vcpu);
418           os_atomic_inc_uint(&mach->ncpus);
419 
420 out:
421           nvmm_machine_put(mach);
422           return error;
423 }
424 
425 static int
nvmm_vcpu_destroy(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_destroy * args)426 nvmm_vcpu_destroy(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_destroy *args)
427 {
428           struct nvmm_machine *mach;
429           struct nvmm_cpu *vcpu;
430           int error;
431 
432           error = nvmm_machine_get(owner, args->machid, &mach, false);
433           if (error)
434                     return error;
435 
436           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
437           if (error)
438                     goto out;
439 
440           (*nvmm_impl->vcpu_destroy)(mach, vcpu);
441           nvmm_vcpu_free(mach, vcpu);
442           nvmm_vcpu_put(vcpu);
443           os_atomic_dec_uint(&mach->ncpus);
444 
445 out:
446           nvmm_machine_put(mach);
447           return error;
448 }
449 
450 static int
nvmm_vcpu_configure(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_configure * args)451 nvmm_vcpu_configure(struct nvmm_owner *owner,
452     struct nvmm_ioc_vcpu_configure *args)
453 {
454           struct nvmm_machine *mach;
455           struct nvmm_cpu *vcpu;
456           size_t allocsz;
457           uint64_t op;
458           void *data;
459           int error;
460 
461           op = NVMM_VCPU_CONF_MD(args->op);
462           if (__predict_false(op >= nvmm_impl->vcpu_conf_max))
463                     return EINVAL;
464 
465           allocsz = nvmm_impl->vcpu_conf_sizes[op];
466           data = os_mem_alloc(allocsz);
467 
468           error = nvmm_machine_get(owner, args->machid, &mach, false);
469           if (error) {
470                     os_mem_free(data, allocsz);
471                     return error;
472           }
473 
474           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
475           if (error) {
476                     nvmm_machine_put(mach);
477                     os_mem_free(data, allocsz);
478                     return error;
479           }
480 
481           error = copyin(args->conf, data, allocsz);
482           if (error) {
483                     goto out;
484           }
485 
486           error = (*nvmm_impl->vcpu_configure)(vcpu, op, data);
487 
488 out:
489           nvmm_vcpu_put(vcpu);
490           nvmm_machine_put(mach);
491           os_mem_free(data, allocsz);
492           return error;
493 }
494 
495 static int
nvmm_vcpu_setstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_setstate * args)496 nvmm_vcpu_setstate(struct nvmm_owner *owner,
497     struct nvmm_ioc_vcpu_setstate *args)
498 {
499           struct nvmm_machine *mach;
500           struct nvmm_cpu *vcpu;
501           int error;
502 
503           error = nvmm_machine_get(owner, args->machid, &mach, false);
504           if (error)
505                     return error;
506 
507           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
508           if (error)
509                     goto out;
510 
511           (*nvmm_impl->vcpu_setstate)(vcpu);
512           nvmm_vcpu_put(vcpu);
513 
514 out:
515           nvmm_machine_put(mach);
516           return error;
517 }
518 
519 static int
nvmm_vcpu_getstate(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_getstate * args)520 nvmm_vcpu_getstate(struct nvmm_owner *owner,
521     struct nvmm_ioc_vcpu_getstate *args)
522 {
523           struct nvmm_machine *mach;
524           struct nvmm_cpu *vcpu;
525           int error;
526 
527           error = nvmm_machine_get(owner, args->machid, &mach, false);
528           if (error)
529                     return error;
530 
531           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
532           if (error)
533                     goto out;
534 
535           (*nvmm_impl->vcpu_getstate)(vcpu);
536           nvmm_vcpu_put(vcpu);
537 
538 out:
539           nvmm_machine_put(mach);
540           return error;
541 }
542 
543 static int
nvmm_vcpu_inject(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_inject * args)544 nvmm_vcpu_inject(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_inject *args)
545 {
546           struct nvmm_machine *mach;
547           struct nvmm_cpu *vcpu;
548           int error;
549 
550           error = nvmm_machine_get(owner, args->machid, &mach, false);
551           if (error)
552                     return error;
553 
554           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
555           if (error)
556                     goto out;
557 
558           error = (*nvmm_impl->vcpu_inject)(vcpu);
559           nvmm_vcpu_put(vcpu);
560 
561 out:
562           nvmm_machine_put(mach);
563           return error;
564 }
565 
566 static int
nvmm_do_vcpu_run(struct nvmm_machine * mach,struct nvmm_cpu * vcpu,struct nvmm_vcpu_exit * exit)567 nvmm_do_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu,
568     struct nvmm_vcpu_exit *exit)
569 {
570           struct vmspace *vm = mach->vm;
571           int ret;
572 
573           while (1) {
574                     /* Got a signal? Or pending resched? Leave. */
575                     if (__predict_false(os_return_needed())) {
576                               exit->reason = NVMM_VCPU_EXIT_NONE;
577                               return 0;
578                     }
579 
580                     /* Run the VCPU. */
581                     ret = (*nvmm_impl->vcpu_run)(mach, vcpu, exit);
582                     if (__predict_false(ret != 0)) {
583                               return ret;
584                     }
585 
586                     /* Process nested page faults. */
587                     if (__predict_true(exit->reason != NVMM_VCPU_EXIT_MEMORY)) {
588                               break;
589                     }
590                     if (exit->u.mem.gpa >= mach->gpa_end) {
591                               break;
592                     }
593                     if (os_vmspace_fault(vm, exit->u.mem.gpa, exit->u.mem.prot)) {
594                               break;
595                     }
596           }
597 
598           return 0;
599 }
600 
601 static int
nvmm_vcpu_run(struct nvmm_owner * owner,struct nvmm_ioc_vcpu_run * args)602 nvmm_vcpu_run(struct nvmm_owner *owner, struct nvmm_ioc_vcpu_run *args)
603 {
604           struct nvmm_machine *mach;
605           struct nvmm_cpu *vcpu;
606           int error;
607 
608           error = nvmm_machine_get(owner, args->machid, &mach, false);
609           if (error)
610                     return error;
611 
612           error = nvmm_vcpu_get(mach, args->cpuid, &vcpu);
613           if (error)
614                     goto out;
615 
616           error = nvmm_do_vcpu_run(mach, vcpu, &args->exit);
617           nvmm_vcpu_put(vcpu);
618 
619 out:
620           nvmm_machine_put(mach);
621           return error;
622 }
623 
624 /* -------------------------------------------------------------------------- */
625 
626 static os_vmobj_t *
nvmm_hmapping_getvmobj(struct nvmm_machine * mach,uintptr_t hva,size_t size,size_t * off)627 nvmm_hmapping_getvmobj(struct nvmm_machine *mach, uintptr_t hva, size_t size,
628    size_t *off)
629 {
630           struct nvmm_hmapping *hmapping;
631           size_t i;
632 
633           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
634                     hmapping = &mach->hmap[i];
635                     if (!hmapping->present) {
636                               continue;
637                     }
638                     if (hva >= hmapping->hva &&
639                         hva + size <= hmapping->hva + hmapping->size) {
640                               *off = hva - hmapping->hva;
641                               return hmapping->vmobj;
642                     }
643           }
644 
645           return NULL;
646 }
647 
648 static int
nvmm_hmapping_validate(struct nvmm_machine * mach,uintptr_t hva,size_t size)649 nvmm_hmapping_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size)
650 {
651           struct nvmm_hmapping *hmapping;
652           size_t i;
653           uintptr_t hva_end;
654           uintptr_t hmap_end;
655 
656           if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) {
657                     return EINVAL;
658           }
659           if (hva == 0) {
660                     return EINVAL;
661           }
662 
663           /*
664            * Overflow tests MUST be done very carefully to avoid compiler
665            * optimizations from effectively deleting the test.
666            */
667           hva_end = hva + size;
668           if (hva_end <= hva)
669                     return EINVAL;
670 
671           /*
672            * Overlap tests
673            */
674           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
675                     hmapping = &mach->hmap[i];
676 
677                     if (!hmapping->present) {
678                               continue;
679                     }
680                     hmap_end = hmapping->hva + hmapping->size;
681 
682                     if (hva >= hmapping->hva && hva_end <= hmap_end)
683                               break;
684                     if (hva >= hmapping->hva && hva < hmap_end)
685                               return EEXIST;
686                     if (hva_end > hmapping->hva && hva_end <= hmap_end)
687                               return EEXIST;
688                     if (hva <= hmapping->hva && hva_end >= hmap_end)
689                               return EEXIST;
690           }
691 
692           return 0;
693 }
694 
695 static struct nvmm_hmapping *
nvmm_hmapping_alloc(struct nvmm_machine * mach)696 nvmm_hmapping_alloc(struct nvmm_machine *mach)
697 {
698           struct nvmm_hmapping *hmapping;
699           size_t i;
700 
701           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
702                     hmapping = &mach->hmap[i];
703                     if (!hmapping->present) {
704                               hmapping->present = true;
705                               return hmapping;
706                     }
707           }
708 
709           return NULL;
710 }
711 
712 static int
nvmm_hmapping_free(struct nvmm_machine * mach,uintptr_t hva,size_t size)713 nvmm_hmapping_free(struct nvmm_machine *mach, uintptr_t hva, size_t size)
714 {
715           struct nvmm_hmapping *hmapping;
716           size_t i;
717 
718           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
719                     hmapping = &mach->hmap[i];
720                     if (!hmapping->present || hmapping->hva != hva ||
721                         hmapping->size != size) {
722                               continue;
723                     }
724 
725                     os_vmobj_unmap(os_curproc_map, hmapping->hva,
726                         hmapping->hva + hmapping->size, false);
727                     os_vmobj_rel(hmapping->vmobj);
728 
729                     hmapping->vmobj = NULL;
730                     hmapping->present = false;
731 
732                     return 0;
733           }
734 
735           return ENOENT;
736 }
737 
738 static int
nvmm_hva_map(struct nvmm_owner * owner,struct nvmm_ioc_hva_map * args)739 nvmm_hva_map(struct nvmm_owner *owner, struct nvmm_ioc_hva_map *args)
740 {
741           struct nvmm_machine *mach;
742           struct nvmm_hmapping *hmapping;
743           vaddr_t uva;
744           int error;
745 
746           error = nvmm_machine_get(owner, args->machid, &mach, true);
747           if (error)
748                     return error;
749 
750           error = nvmm_hmapping_validate(mach, args->hva, args->size);
751           if (error)
752                     goto out;
753 
754           hmapping = nvmm_hmapping_alloc(mach);
755           if (hmapping == NULL) {
756                     error = ENOBUFS;
757                     goto out;
758           }
759 
760           hmapping->hva = args->hva;
761           hmapping->size = args->size;
762           hmapping->vmobj = os_vmobj_create(hmapping->size);
763           uva = hmapping->hva;
764 
765           /* Map the vmobj into the user address space, as pageable. */
766           error = os_vmobj_map(os_curproc_map, &uva, hmapping->size,
767               hmapping->vmobj, 0, false /* !wired */, true /* fixed */,
768               true /* shared */, PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE);
769 
770 out:
771           nvmm_machine_put(mach);
772           return error;
773 }
774 
775 static int
nvmm_hva_unmap(struct nvmm_owner * owner,struct nvmm_ioc_hva_unmap * args)776 nvmm_hva_unmap(struct nvmm_owner *owner, struct nvmm_ioc_hva_unmap *args)
777 {
778           struct nvmm_machine *mach;
779           int error;
780 
781           error = nvmm_machine_get(owner, args->machid, &mach, true);
782           if (error)
783                     return error;
784 
785           error = nvmm_hmapping_free(mach, args->hva, args->size);
786 
787           nvmm_machine_put(mach);
788           return error;
789 }
790 
791 /* -------------------------------------------------------------------------- */
792 
793 static int
nvmm_gpa_map(struct nvmm_owner * owner,struct nvmm_ioc_gpa_map * args)794 nvmm_gpa_map(struct nvmm_owner *owner, struct nvmm_ioc_gpa_map *args)
795 {
796           struct nvmm_machine *mach;
797           os_vmobj_t *vmobj;
798           gpaddr_t gpa;
799           gpaddr_t gpa_end;
800           size_t off;
801           int error;
802 
803           error = nvmm_machine_get(owner, args->machid, &mach, false);
804           if (error)
805                     return error;
806 
807           if ((args->prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) != 0) {
808                     error = EINVAL;
809                     goto out;
810           }
811 
812           /*
813            * Overflow tests MUST be done very carefully to avoid compiler
814            * optimizations from effectively deleting the test.
815            */
816           gpa = args->gpa;
817           gpa_end = gpa + args->size;
818           if (gpa_end <= gpa) {
819                     error = EINVAL;
820                     goto out;
821           }
822 
823           if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 ||
824               (args->hva % PAGE_SIZE) != 0) {
825                     error = EINVAL;
826                     goto out;
827           }
828           if (args->hva == 0) {
829                     error = EINVAL;
830                     goto out;
831           }
832 
833           if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
834                     error = EINVAL;
835                     goto out;
836           }
837           if (gpa_end  > mach->gpa_end) {
838                     error = EINVAL;
839                     goto out;
840           }
841 
842           vmobj = nvmm_hmapping_getvmobj(mach, args->hva, args->size, &off);
843           if (vmobj == NULL) {
844                     error = EINVAL;
845                     goto out;
846           }
847 
848           /* Map the vmobj into the machine address space, as pageable. */
849           error = os_vmobj_map(&mach->vm->vm_map, &gpa, args->size, vmobj, off,
850               false /* !wired */, true /* fixed */, false /* !shared */,
851               args->prot, PROT_READ | PROT_WRITE | PROT_EXEC);
852 
853 out:
854           nvmm_machine_put(mach);
855           return error;
856 }
857 
858 static int
nvmm_gpa_unmap(struct nvmm_owner * owner,struct nvmm_ioc_gpa_unmap * args)859 nvmm_gpa_unmap(struct nvmm_owner *owner, struct nvmm_ioc_gpa_unmap *args)
860 {
861           struct nvmm_machine *mach;
862           gpaddr_t gpa;
863           gpaddr_t gpa_end;
864           int error;
865 
866           error = nvmm_machine_get(owner, args->machid, &mach, false);
867           if (error)
868                     return error;
869 
870           /*
871            * Overflow tests MUST be done very carefully to avoid compiler
872            * optimizations from effectively deleting the test.
873            */
874           gpa = args->gpa;
875           gpa_end = gpa + args->size;
876           if (gpa_end <= gpa) {
877                     error = EINVAL;
878                     goto out;
879           }
880 
881           if ((gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0) {
882                     error = EINVAL;
883                     goto out;
884           }
885           if (gpa < mach->gpa_begin || gpa >= mach->gpa_end) {
886                     error = EINVAL;
887                     goto out;
888           }
889           if (gpa_end >= mach->gpa_end) {
890                     error = EINVAL;
891                     goto out;
892           }
893 
894           /* Unmap the memory from the machine. */
895           os_vmobj_unmap(&mach->vm->vm_map, gpa, gpa + args->size, false);
896 
897 out:
898           nvmm_machine_put(mach);
899           return error;
900 }
901 
902 /* -------------------------------------------------------------------------- */
903 
904 static int
nvmm_ctl_mach_info(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)905 nvmm_ctl_mach_info(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
906 {
907           struct nvmm_ctl_mach_info ctl;
908           struct nvmm_machine *mach;
909           int error;
910           size_t i;
911 
912           if (args->size != sizeof(ctl))
913                     return EINVAL;
914           error = copyin(args->data, &ctl, sizeof(ctl));
915           if (error)
916                     return error;
917 
918           error = nvmm_machine_get(owner, ctl.machid, &mach, true);
919           if (error)
920                     return error;
921 
922           ctl.nvcpus = mach->ncpus;
923 
924           ctl.nram = 0;
925           for (i = 0; i < NVMM_MAX_HMAPPINGS; i++) {
926                     if (!mach->hmap[i].present)
927                               continue;
928                     ctl.nram += mach->hmap[i].size;
929           }
930 
931           ctl.pid = mach->owner->pid;
932           ctl.time = mach->time;
933 
934           nvmm_machine_put(mach);
935 
936           error = copyout(&ctl, args->data, sizeof(ctl));
937           if (error)
938                     return error;
939 
940           return 0;
941 }
942 
943 static int
nvmm_ctl(struct nvmm_owner * owner,struct nvmm_ioc_ctl * args)944 nvmm_ctl(struct nvmm_owner *owner, struct nvmm_ioc_ctl *args)
945 {
946           switch (args->op) {
947           case NVMM_CTL_MACH_INFO:
948                     return nvmm_ctl_mach_info(owner, args);
949           default:
950                     return EINVAL;
951           }
952 }
953 
954 /* -------------------------------------------------------------------------- */
955 
956 const struct nvmm_impl *
nvmm_ident(void)957 nvmm_ident(void)
958 {
959           size_t i;
960 
961           for (i = 0; i < __arraycount(nvmm_impl_list); i++) {
962                     if ((*nvmm_impl_list[i]->ident)())
963                               return nvmm_impl_list[i];
964           }
965 
966           return NULL;
967 }
968 
969 int
nvmm_init(void)970 nvmm_init(void)
971 {
972           size_t i, n;
973 
974           nvmm_impl = nvmm_ident();
975           if (nvmm_impl == NULL)
976                     return ENOTSUP;
977 
978           for (i = 0; i < NVMM_MAX_MACHINES; i++) {
979                     machines[i].machid = i;
980                     os_rwl_init(&machines[i].lock);
981                     for (n = 0; n < NVMM_MAX_VCPUS; n++) {
982                               machines[i].cpus[n].present = false;
983                               machines[i].cpus[n].cpuid = n;
984                               os_mtx_init(&machines[i].cpus[n].lock);
985                     }
986           }
987 
988           (*nvmm_impl->init)();
989 
990           return 0;
991 }
992 
993 void
nvmm_fini(void)994 nvmm_fini(void)
995 {
996           size_t i, n;
997 
998           for (i = 0; i < NVMM_MAX_MACHINES; i++) {
999                     os_rwl_destroy(&machines[i].lock);
1000                     for (n = 0; n < NVMM_MAX_VCPUS; n++) {
1001                               os_mtx_destroy(&machines[i].cpus[n].lock);
1002                     }
1003           }
1004 
1005           (*nvmm_impl->fini)();
1006           nvmm_impl = NULL;
1007 }
1008 
1009 /* -------------------------------------------------------------------------- */
1010 
1011 int
nvmm_ioctl(struct nvmm_owner * owner,unsigned long cmd,void * data)1012 nvmm_ioctl(struct nvmm_owner *owner, unsigned long cmd, void *data)
1013 {
1014           switch (cmd) {
1015           case NVMM_IOC_CAPABILITY:
1016                     return nvmm_capability(owner, data);
1017           case NVMM_IOC_MACHINE_CREATE:
1018                     return nvmm_machine_create(owner, data);
1019           case NVMM_IOC_MACHINE_DESTROY:
1020                     return nvmm_machine_destroy(owner, data);
1021           case NVMM_IOC_MACHINE_CONFIGURE:
1022                     return nvmm_machine_configure(owner, data);
1023           case NVMM_IOC_VCPU_CREATE:
1024                     return nvmm_vcpu_create(owner, data);
1025           case NVMM_IOC_VCPU_DESTROY:
1026                     return nvmm_vcpu_destroy(owner, data);
1027           case NVMM_IOC_VCPU_CONFIGURE:
1028                     return nvmm_vcpu_configure(owner, data);
1029           case NVMM_IOC_VCPU_SETSTATE:
1030                     return nvmm_vcpu_setstate(owner, data);
1031           case NVMM_IOC_VCPU_GETSTATE:
1032                     return nvmm_vcpu_getstate(owner, data);
1033           case NVMM_IOC_VCPU_INJECT:
1034                     return nvmm_vcpu_inject(owner, data);
1035           case NVMM_IOC_VCPU_RUN:
1036                     return nvmm_vcpu_run(owner, data);
1037           case NVMM_IOC_GPA_MAP:
1038                     return nvmm_gpa_map(owner, data);
1039           case NVMM_IOC_GPA_UNMAP:
1040                     return nvmm_gpa_unmap(owner, data);
1041           case NVMM_IOC_HVA_MAP:
1042                     return nvmm_hva_map(owner, data);
1043           case NVMM_IOC_HVA_UNMAP:
1044                     return nvmm_hva_unmap(owner, data);
1045           case NVMM_IOC_CTL:
1046                     return nvmm_ctl(owner, data);
1047           default:
1048                     return EINVAL;
1049           }
1050 }
1051