1 /*
2  *
3  * Copyright (c) 2004 Christian Limpach.
4  * Copyright (c) 2004-2006,2008 Kip Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *      This product includes software developed by Christian Limpach.
18  * 4. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD: stable/10/sys/i386/xen/xen_machdep.c 271132 2014-09-04 20:47:14Z emaste $");
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/kernel.h>
45 #include <sys/proc.h>
46 #include <sys/reboot.h>
47 #include <sys/rwlock.h>
48 #include <sys/sysproto.h>
49 #include <sys/boot.h>
50 
51 #include <xen/xen-os.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <machine/segments.h>
56 #include <machine/pcb.h>
57 #include <machine/stdarg.h>
58 #include <machine/vmparam.h>
59 #include <machine/cpu.h>
60 #include <machine/intr_machdep.h>
61 #include <machine/md_var.h>
62 #include <machine/asmacros.h>
63 
64 
65 
66 #include <xen/hypervisor.h>
67 #include <machine/xen/xenvar.h>
68 #include <machine/xen/xenfunc.h>
69 #include <machine/xen/xenpmap.h>
70 #include <machine/xen/xenfunc.h>
71 #include <xen/interface/memory.h>
72 #include <machine/xen/features.h>
73 #ifdef SMP
74 #include <machine/privatespace.h>
75 #endif
76 
77 
78 #include <vm/vm_page.h>
79 
80 
81 #define	IDTVEC(name)	__CONCAT(X,name)
82 
83 extern inthand_t
84 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
85 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
86 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
87 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
88 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
89 
90 
91 int xendebug_flags;
92 start_info_t *xen_start_info;
93 shared_info_t *HYPERVISOR_shared_info;
94 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
95 xen_pfn_t *xen_phys_machine;
96 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
97 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
98 int preemptable, init_first;
99 extern unsigned int avail_space;
100 int xen_vector_callback_enabled = 0;
101 enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
102 
103 void ni_cli(void);
104 void ni_sti(void);
105 
106 
107 void
ni_cli(void)108 ni_cli(void)
109 {
110 	CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
111 	__asm__("pushl %edx;"
112 		"pushl %eax;"
113 		);
114 	__cli();
115 	__asm__("popl %eax;"
116 		"popl %edx;"
117 		);
118 }
119 
120 
121 void
ni_sti(void)122 ni_sti(void)
123 {
124 	__asm__("pushl %edx;"
125 		"pushl %esi;"
126 		"pushl %eax;"
127 		);
128 	__sti();
129 	__asm__("popl %eax;"
130 		"popl %esi;"
131 		"popl %edx;"
132 		);
133 }
134 
135 void
force_evtchn_callback(void)136 force_evtchn_callback(void)
137 {
138     (void)HYPERVISOR_xen_version(0, NULL);
139 }
140 
141 /*
142  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format
143  * suitable for the static env vars.
144  */
145 char *
xen_setbootenv(char * cmd_line)146 xen_setbootenv(char *cmd_line)
147 {
148 	char *cmd_line_next;
149 
150         /* Skip leading spaces */
151         for (; *cmd_line == ' '; cmd_line++);
152 
153 	xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
154 
155 	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
156 	return cmd_line;
157 }
158 
159 int
xen_boothowto(char * envp)160 xen_boothowto(char *envp)
161 {
162 	int i, howto = 0;
163 
164 	/* get equivalents from the environment */
165 	for (i = 0; howto_names[i].ev != NULL; i++)
166 		if (getenv(howto_names[i].ev) != NULL)
167 			howto |= howto_names[i].mask;
168 	return howto;
169 }
170 
171 #define XC_PRINTF_BUFSIZE 1024
172 void
xc_printf(const char * fmt,...)173 xc_printf(const char *fmt, ...)
174 {
175         __va_list ap;
176         int retval;
177         static char buf[XC_PRINTF_BUFSIZE];
178 
179         va_start(ap, fmt);
180         retval = vsnprintf(buf, XC_PRINTF_BUFSIZE - 1, fmt, ap);
181         va_end(ap);
182         buf[retval] = 0;
183         (void)HYPERVISOR_console_write(buf, retval);
184 }
185 
186 
187 #define XPQUEUE_SIZE 128
188 
189 struct mmu_log {
190 	char *file;
191 	int line;
192 };
193 
194 #ifdef SMP
195 /* per-cpu queues and indices */
196 #ifdef INVARIANTS
197 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
198 #endif
199 
200 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
201 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
202 
203 #define	XPQ_QUEUE_LOG xpq_queue_log[vcpu]
204 #define	XPQ_QUEUE xpq_queue[vcpu]
205 #define	XPQ_IDX xpq_idx[vcpu]
206 #define	SET_VCPU() int vcpu = smp_processor_id()
207 #else
208 
209 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
210 #ifdef INVARIANTS
211 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
212 #endif
213 static int xpq_idx = 0;
214 
215 #define	XPQ_QUEUE_LOG xpq_queue_log
216 #define	XPQ_QUEUE xpq_queue
217 #define	XPQ_IDX xpq_idx
218 #define	SET_VCPU()
219 #endif /* !SMP */
220 
221 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
222 
223 #if 0
224 static void
225 xen_dump_queue(void)
226 {
227 	int _xpq_idx = XPQ_IDX;
228 	int i;
229 
230 	if (_xpq_idx <= 1)
231 		return;
232 
233 	xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
234 	for (i = 0; i < _xpq_idx; i++) {
235 		xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
236 		    XPQ_QUEUE[i].ptr);
237 	}
238 }
239 #endif
240 
241 
242 static __inline void
_xen_flush_queue(void)243 _xen_flush_queue(void)
244 {
245 	SET_VCPU();
246 	int _xpq_idx = XPQ_IDX;
247 	int error, i;
248 
249 #ifdef INVARIANTS
250 	if (__predict_true(gdtset))
251 		CRITICAL_ASSERT(curthread);
252 #endif
253 
254 	XPQ_IDX = 0;
255 	/* Make sure index is cleared first to avoid double updates. */
256 	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
257 				      _xpq_idx, NULL, DOMID_SELF);
258 
259 #if 0
260 	if (__predict_true(gdtset))
261 	for (i = _xpq_idx; i > 0;) {
262 		if (i >= 3) {
263 			CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
264 			    "ptr: %lx val: %lx ptr: %lx",
265 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
266 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
267 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
268 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff),
269 			    (XPQ_QUEUE[i-3].val & 0xffffffff),
270 			    (XPQ_QUEUE[i-3].ptr & 0xffffffff));
271 			    i -= 3;
272 		} else if (i == 2) {
273 			CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
274 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
275 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
276 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
277 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff));
278 			i = 0;
279 		} else {
280 			CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx",
281 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
282 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff));
283 			i = 0;
284 		}
285 	}
286 #endif
287 	if (__predict_false(error < 0)) {
288 		for (i = 0; i < _xpq_idx; i++)
289 			printf("val: %llx ptr: %llx\n",
290 			    XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
291 		panic("Failed to execute MMU updates: %d", error);
292 	}
293 
294 }
295 
296 void
xen_flush_queue(void)297 xen_flush_queue(void)
298 {
299 	SET_VCPU();
300 
301 	if (__predict_true(gdtset))
302 		critical_enter();
303 	if (XPQ_IDX != 0) _xen_flush_queue();
304 	if (__predict_true(gdtset))
305 		critical_exit();
306 }
307 
308 static __inline void
xen_increment_idx(void)309 xen_increment_idx(void)
310 {
311 	SET_VCPU();
312 
313 	XPQ_IDX++;
314 	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
315 		xen_flush_queue();
316 }
317 
318 void
xen_check_queue(void)319 xen_check_queue(void)
320 {
321 #ifdef INVARIANTS
322 	SET_VCPU();
323 
324 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
325 #endif
326 }
327 
328 void
xen_invlpg(vm_offset_t va)329 xen_invlpg(vm_offset_t va)
330 {
331 	struct mmuext_op op;
332 	op.cmd = MMUEXT_INVLPG_ALL;
333 	op.arg1.linear_addr = va & ~PAGE_MASK;
334 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
335 }
336 
337 void
xen_load_cr3(u_int val)338 xen_load_cr3(u_int val)
339 {
340 	struct mmuext_op op;
341 #ifdef INVARIANTS
342 	SET_VCPU();
343 
344 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
345 #endif
346 	op.cmd = MMUEXT_NEW_BASEPTR;
347 	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
348 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
349 }
350 
351 #ifdef KTR
352 static __inline u_int
rebp(void)353 rebp(void)
354 {
355 	u_int	data;
356 
357 	__asm __volatile("movl 4(%%ebp),%0" : "=r" (data));
358 	return (data);
359 }
360 #endif
361 
362 u_int
read_eflags(void)363 read_eflags(void)
364 {
365         vcpu_info_t *_vcpu;
366 	u_int eflags;
367 
368 	eflags = _read_eflags();
369         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
370 	if (_vcpu->evtchn_upcall_mask)
371 		eflags &= ~PSL_I;
372 
373 	return (eflags);
374 }
375 
376 void
write_eflags(u_int eflags)377 write_eflags(u_int eflags)
378 {
379 	u_int intr;
380 
381 	CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
382 	intr = ((eflags & PSL_I) == 0);
383 	__restore_flags(intr);
384 	_write_eflags(eflags);
385 }
386 
387 void
xen_cli(void)388 xen_cli(void)
389 {
390 	CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
391 	__cli();
392 }
393 
394 void
xen_sti(void)395 xen_sti(void)
396 {
397 	CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
398 	__sti();
399 }
400 
401 u_int
xen_rcr2(void)402 xen_rcr2(void)
403 {
404 
405 	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
406 }
407 
408 void
_xen_machphys_update(vm_paddr_t mfn,vm_paddr_t pfn,char * file,int line)409 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
410 {
411 	SET_VCPU();
412 
413 	if (__predict_true(gdtset))
414 		critical_enter();
415 	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
416 	XPQ_QUEUE[XPQ_IDX].val = pfn;
417 #ifdef INVARIANTS
418 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
419 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;
420 #endif
421 	xen_increment_idx();
422 	if (__predict_true(gdtset))
423 		critical_exit();
424 }
425 
426 extern struct rwlock pvh_global_lock;
427 
428 void
_xen_queue_pt_update(vm_paddr_t ptr,vm_paddr_t val,char * file,int line)429 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
430 {
431 	SET_VCPU();
432 
433 	if (__predict_true(gdtset))
434 		rw_assert(&pvh_global_lock, RA_WLOCKED);
435 
436 	KASSERT((ptr & 7) == 0, ("misaligned update"));
437 
438 	if (__predict_true(gdtset))
439 		critical_enter();
440 
441 	XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
442 	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
443 #ifdef INVARIANTS
444 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
445 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;
446 #endif
447 	xen_increment_idx();
448 	if (__predict_true(gdtset))
449 		critical_exit();
450 }
451 
452 void
xen_pgdpt_pin(vm_paddr_t ma)453 xen_pgdpt_pin(vm_paddr_t ma)
454 {
455 	struct mmuext_op op;
456 	op.cmd = MMUEXT_PIN_L3_TABLE;
457 	op.arg1.mfn = ma >> PAGE_SHIFT;
458 	xen_flush_queue();
459 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
460 }
461 
462 void
xen_pgd_pin(vm_paddr_t ma)463 xen_pgd_pin(vm_paddr_t ma)
464 {
465 	struct mmuext_op op;
466 	op.cmd = MMUEXT_PIN_L2_TABLE;
467 	op.arg1.mfn = ma >> PAGE_SHIFT;
468 	xen_flush_queue();
469 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
470 }
471 
472 void
xen_pgd_unpin(vm_paddr_t ma)473 xen_pgd_unpin(vm_paddr_t ma)
474 {
475 	struct mmuext_op op;
476 	op.cmd = MMUEXT_UNPIN_TABLE;
477 	op.arg1.mfn = ma >> PAGE_SHIFT;
478 	xen_flush_queue();
479 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
480 }
481 
482 void
xen_pt_pin(vm_paddr_t ma)483 xen_pt_pin(vm_paddr_t ma)
484 {
485 	struct mmuext_op op;
486 	op.cmd = MMUEXT_PIN_L1_TABLE;
487 	op.arg1.mfn = ma >> PAGE_SHIFT;
488 	xen_flush_queue();
489 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
490 }
491 
492 void
xen_pt_unpin(vm_paddr_t ma)493 xen_pt_unpin(vm_paddr_t ma)
494 {
495 	struct mmuext_op op;
496 	op.cmd = MMUEXT_UNPIN_TABLE;
497 	op.arg1.mfn = ma >> PAGE_SHIFT;
498 	xen_flush_queue();
499 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
500 }
501 
502 void
xen_set_ldt(vm_paddr_t ptr,unsigned long len)503 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
504 {
505 	struct mmuext_op op;
506 	op.cmd = MMUEXT_SET_LDT;
507 	op.arg1.linear_addr = ptr;
508 	op.arg2.nr_ents = len;
509 	xen_flush_queue();
510 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
511 }
512 
xen_tlb_flush(void)513 void xen_tlb_flush(void)
514 {
515 	struct mmuext_op op;
516 	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
517 	xen_flush_queue();
518 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
519 }
520 
521 void
xen_update_descriptor(union descriptor * table,union descriptor * entry)522 xen_update_descriptor(union descriptor *table, union descriptor *entry)
523 {
524 	vm_paddr_t pa;
525 	pt_entry_t *ptp;
526 
527 	ptp = vtopte((vm_offset_t)table);
528 	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
529 	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
530 		panic("HYPERVISOR_update_descriptor failed\n");
531 }
532 
533 
534 #if 0
535 /*
536  * Bitmap is indexed by page number. If bit is set, the page is part of a
537  * xen_create_contiguous_region() area of memory.
538  */
539 unsigned long *contiguous_bitmap;
540 
541 static void
542 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
543 {
544 	unsigned long start_off, end_off, curr_idx, end_idx;
545 
546 	curr_idx  = first_page / BITS_PER_LONG;
547 	start_off = first_page & (BITS_PER_LONG-1);
548 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
549 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
550 
551 	if (curr_idx == end_idx) {
552 		contiguous_bitmap[curr_idx] |=
553 			((1UL<<end_off)-1) & -(1UL<<start_off);
554 	} else {
555 		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
556 		while ( ++curr_idx < end_idx )
557 			contiguous_bitmap[curr_idx] = ~0UL;
558 		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
559 	}
560 }
561 
562 static void
563 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
564 {
565 	unsigned long start_off, end_off, curr_idx, end_idx;
566 
567 	curr_idx  = first_page / BITS_PER_LONG;
568 	start_off = first_page & (BITS_PER_LONG-1);
569 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
570 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
571 
572 	if (curr_idx == end_idx) {
573 		contiguous_bitmap[curr_idx] &=
574 			-(1UL<<end_off) | ((1UL<<start_off)-1);
575 	} else {
576 		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
577 		while ( ++curr_idx != end_idx )
578 			contiguous_bitmap[curr_idx] = 0;
579 		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
580 	}
581 }
582 #endif
583 
584 /* Ensure multi-page extents are contiguous in machine memory. */
585 int
xen_create_contiguous_region(vm_page_t pages,int npages)586 xen_create_contiguous_region(vm_page_t pages, int npages)
587 {
588 	unsigned long  mfn, i, flags;
589 	int order;
590 	struct xen_memory_reservation reservation = {
591 		.nr_extents   = 1,
592 		.extent_order = 0,
593 		.domid        = DOMID_SELF
594 	};
595 	set_xen_guest_handle(reservation.extent_start, &mfn);
596 
597 	balloon_lock(flags);
598 
599 	/* can currently only handle power of two allocation */
600 	PANIC_IF(ffs(npages) != fls(npages));
601 
602 	/* 0. determine order */
603 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
604 
605 	/* 1. give away machine pages. */
606 	for (i = 0; i < (1 << order); i++) {
607 		int pfn;
608 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
609 		mfn = PFNTOMFN(pfn);
610 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
611 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
612 	}
613 
614 
615 	/* 2. Get a new contiguous memory extent. */
616 	reservation.extent_order = order;
617 	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not
618 	 * running with a broxen driver XXXEN
619 	 */
620 	reservation.address_bits = 31;
621 	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
622 		goto fail;
623 
624 	/* 3. Map the new extent in place of old pages. */
625 	for (i = 0; i < (1 << order); i++) {
626 		int pfn;
627 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
628 		xen_machphys_update(mfn+i, pfn);
629 		PFNTOMFN(pfn) = mfn+i;
630 	}
631 
632 	xen_tlb_flush();
633 
634 #if 0
635 	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
636 #endif
637 
638 	balloon_unlock(flags);
639 
640 	return 0;
641 
642  fail:
643 	reservation.extent_order = 0;
644 	reservation.address_bits = 0;
645 
646 	for (i = 0; i < (1 << order); i++) {
647 		int pfn;
648 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
649 		PANIC_IF(HYPERVISOR_memory_op(
650 			XENMEM_increase_reservation, &reservation) != 1);
651 		xen_machphys_update(mfn, pfn);
652 		PFNTOMFN(pfn) = mfn;
653 	}
654 
655 	xen_tlb_flush();
656 
657 	balloon_unlock(flags);
658 
659 	return ENOMEM;
660 }
661 
662 void
xen_destroy_contiguous_region(void * addr,int npages)663 xen_destroy_contiguous_region(void *addr, int npages)
664 {
665 	unsigned long  mfn, i, flags, order, pfn0;
666 	struct xen_memory_reservation reservation = {
667 		.nr_extents   = 1,
668 		.extent_order = 0,
669 		.domid        = DOMID_SELF
670 	};
671 	set_xen_guest_handle(reservation.extent_start, &mfn);
672 
673 	pfn0 = vtophys(addr) >> PAGE_SHIFT;
674 #if 0
675 	scrub_pages(vstart, 1 << order);
676 #endif
677 	/* can currently only handle power of two allocation */
678 	PANIC_IF(ffs(npages) != fls(npages));
679 
680 	/* 0. determine order */
681 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
682 
683 	balloon_lock(flags);
684 
685 #if 0
686 	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
687 #endif
688 
689 	/* 1. Zap current PTEs, giving away the underlying pages. */
690 	for (i = 0; i < (1 << order); i++) {
691 		int pfn;
692 		uint64_t new_val = 0;
693 		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
694 
695 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
696 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
697 		PANIC_IF(HYPERVISOR_memory_op(
698 			XENMEM_decrease_reservation, &reservation) != 1);
699 	}
700 
701 	/* 2. Map new pages in place of old pages. */
702 	for (i = 0; i < (1 << order); i++) {
703 		int pfn;
704 		uint64_t new_val;
705 		pfn = pfn0 + i;
706 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
707 
708 		new_val = mfn << PAGE_SHIFT;
709 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE),
710 						      new_val, PG_KERNEL));
711 		xen_machphys_update(mfn, pfn);
712 		PFNTOMFN(pfn) = mfn;
713 	}
714 
715 	xen_tlb_flush();
716 
717 	balloon_unlock(flags);
718 }
719 
720 extern  vm_offset_t	proc0kstack;
721 extern int vm86paddr, vm86phystk;
722 char *bootmem_start, *bootmem_current, *bootmem_end;
723 
724 pteinfo_t *pteinfo_list;
725 void initvalues(start_info_t *startinfo);
726 
727 struct xenstore_domain_interface;
728 extern struct xenstore_domain_interface *xen_store;
729 
730 char *console_page;
731 
732 void *
bootmem_alloc(unsigned int size)733 bootmem_alloc(unsigned int size)
734 {
735 	char *retptr;
736 
737 	retptr = bootmem_current;
738 	PANIC_IF(retptr + size > bootmem_end);
739 	bootmem_current += size;
740 
741 	return retptr;
742 }
743 
744 void
bootmem_free(void * ptr,unsigned int size)745 bootmem_free(void *ptr, unsigned int size)
746 {
747 	char *tptr;
748 
749 	tptr = ptr;
750 	PANIC_IF(tptr != bootmem_current - size ||
751 		bootmem_current - size < bootmem_start);
752 
753 	bootmem_current -= size;
754 }
755 
756 #if 0
757 static vm_paddr_t
758 xpmap_mtop2(vm_paddr_t mpa)
759 {
760         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
761             ) | (mpa & ~PG_FRAME);
762 }
763 
764 static pd_entry_t
765 xpmap_get_bootpde(vm_paddr_t va)
766 {
767 
768         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
769 }
770 
771 static pd_entry_t
772 xpmap_get_vbootpde(vm_paddr_t va)
773 {
774         pd_entry_t pde;
775 
776         pde = xpmap_get_bootpde(va);
777         if ((pde & PG_V) == 0)
778                 return (pde & ~PG_FRAME);
779         return (pde & ~PG_FRAME) |
780                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
781 }
782 
783 static pt_entry_t 8*
784 xpmap_get_bootptep(vm_paddr_t va)
785 {
786         pd_entry_t pde;
787 
788         pde = xpmap_get_vbootpde(va);
789         if ((pde & PG_V) == 0)
790                 return (void *)-1;
791 #define PT_MASK         0x003ff000      /* page table address bits */
792         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
793 }
794 
795 static pt_entry_t
796 xpmap_get_bootpte(vm_paddr_t va)
797 {
798 
799         return xpmap_get_bootptep(va)[0];
800 }
801 #endif
802 
803 
804 #ifdef ADD_ISA_HOLE
805 static void
shift_phys_machine(unsigned long * phys_machine,int nr_pages)806 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
807 {
808 
809         unsigned long *tmp_page, *current_page, *next_page;
810 	int i;
811 
812 	tmp_page = bootmem_alloc(PAGE_SIZE);
813 	current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));
814 	next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));
815 	bcopy(phys_machine, tmp_page, PAGE_SIZE);
816 
817 	while (current_page > phys_machine) {
818 	        /*  save next page */
819 	        bcopy(next_page, tmp_page, PAGE_SIZE);
820 	        /* shift down page */
821 		bcopy(current_page, next_page, PAGE_SIZE);
822 	        /*  finish swap */
823 	        bcopy(tmp_page, current_page, PAGE_SIZE);
824 
825 		current_page -= (PAGE_SIZE/sizeof(unsigned long));
826 		next_page -= (PAGE_SIZE/sizeof(unsigned long));
827 	}
828 	bootmem_free(tmp_page, PAGE_SIZE);
829 
830 	for (i = 0; i < nr_pages; i++) {
831 	        xen_machphys_update(phys_machine[i], i);
832 	}
833 	memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
834 
835 }
836 #endif /* ADD_ISA_HOLE */
837 
838 /*
839  * Build a directory of the pages that make up our Physical to Machine
840  * mapping table. The Xen suspend/restore code uses this to find our
841  * mapping table.
842  */
843 static void
init_frame_list_list(void * arg)844 init_frame_list_list(void *arg)
845 {
846 	unsigned long nr_pages = xen_start_info->nr_pages;
847 #define FPP	(PAGE_SIZE/sizeof(xen_pfn_t))
848 	int i, j, k;
849 
850 	xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
851 	for (i = 0, j = 0, k = -1; i < nr_pages;
852 	     i += FPP, j++) {
853 		if ((j & (FPP - 1)) == 0) {
854 			k++;
855 			xen_pfn_to_mfn_frame_list[k] =
856 				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
857 			xen_pfn_to_mfn_frame_list_list[k] =
858 				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
859 			j = 0;
860 		}
861 		xen_pfn_to_mfn_frame_list[k][j] =
862 			VTOMFN(&xen_phys_machine[i]);
863 	}
864 
865 	HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
866 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
867 		= VTOMFN(xen_pfn_to_mfn_frame_list_list);
868 }
869 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
870 
871 extern unsigned long physfree;
872 
873 int pdir, curoffset;
874 extern int nkpt;
875 
876 extern uint32_t kernbase;
877 
878 void
initvalues(start_info_t * startinfo)879 initvalues(start_info_t *startinfo)
880 {
881 	vm_offset_t cur_space, cur_space_pt;
882 	struct physdev_set_iopl set_iopl;
883 
884 	int l3_pages, l2_pages, l1_pages, offset;
885 	vm_paddr_t console_page_ma, xen_store_ma;
886 	vm_offset_t tmpva;
887 	vm_paddr_t shinfo;
888 #ifdef PAE
889 	vm_paddr_t IdlePDPTma, IdlePDPTnewma;
890 	vm_paddr_t IdlePTDnewma[4];
891 	pd_entry_t *IdlePDPTnew, *IdlePTDnew;
892 	vm_paddr_t IdlePTDma[4];
893 #else
894 	vm_paddr_t IdlePTDma[1];
895 #endif
896 	unsigned long i;
897 	int ncpus = MAXCPU;
898 
899 	nkpt = min(
900 		min(
901 			max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
902 		    NPGPTD*NPDEPG - KPTDI),
903 		    (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
904 
905 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
906 #ifdef notyet
907 	/*
908 	 * need to install handler
909 	 */
910 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);
911 #endif
912 	xen_start_info = startinfo;
913 	xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
914 
915 	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
916 	l1_pages = 0;
917 
918 #ifdef PAE
919 	l3_pages = 1;
920 	l2_pages = 0;
921 	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
922 	IdlePDPTma = VTOM(startinfo->pt_base);
923 	for (i = (KERNBASE >> 30);
924 	     (i < 4) && (IdlePDPT[i] != 0); i++)
925 			l2_pages++;
926 	/*
927 	 * Note that only one page directory has been allocated at this point.
928 	 * Thus, if KERNBASE
929 	 */
930 	for (i = 0; i < l2_pages; i++)
931 		IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
932 
933 	l2_pages = (l2_pages == 0) ? 1 : l2_pages;
934 #else
935 	l3_pages = 0;
936 	l2_pages = 1;
937 #endif
938 	for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
939 	     (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
940 
941 		if (IdlePTD[i] == 0)
942 			break;
943 		l1_pages++;
944 	}
945 
946 	/* number of pages allocated after the pts + 1*/;
947 	cur_space = xen_start_info->pt_base +
948 	    (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
949 
950 	xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
951 	    cur_space);
952 
953 	xc_printf("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
954 	    KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
955 	    xen_start_info->nr_pt_frames);
956 	xendebug_flags = 0; /* 0xffffffff; */
957 
958 #ifdef ADD_ISA_HOLE
959 	shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
960 #endif
961 	XENPRINTF("IdlePTD %p\n", IdlePTD);
962 	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
963 		  "mod_start: 0x%lx mod_len: 0x%lx\n",
964 		  xen_start_info->nr_pages, xen_start_info->shared_info,
965 		  xen_start_info->flags, xen_start_info->pt_base,
966 		  xen_start_info->mod_start, xen_start_info->mod_len);
967 
968 #ifdef PAE
969 	IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
970 	bzero(IdlePDPTnew, PAGE_SIZE);
971 
972 	IdlePDPTnewma =  VTOM(IdlePDPTnew);
973 	IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
974 	bzero(IdlePTDnew, 4*PAGE_SIZE);
975 
976 	for (i = 0; i < 4; i++)
977 		IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
978 	/*
979 	 * L3
980 	 *
981 	 * Copy the 4 machine addresses of the new PTDs in to the PDPT
982 	 *
983 	 */
984 	for (i = 0; i < 4; i++)
985 		IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
986 
987 	__asm__("nop;");
988 	/*
989 	 *
990 	 * re-map the new PDPT read-only
991 	 */
992 	PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
993 	/*
994 	 *
995 	 * Unpin the current PDPT
996 	 */
997 	xen_pt_unpin(IdlePDPTma);
998 
999 #endif  /* PAE */
1000 
1001 	/* Map proc0's KSTACK */
1002 	proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
1003 	xc_printf("proc0kstack=%u\n", proc0kstack);
1004 
1005 	/* vm86/bios stack */
1006 	cur_space += PAGE_SIZE;
1007 
1008 	/* Map space for the vm86 region */
1009 	vm86paddr = (vm_offset_t)cur_space;
1010 	cur_space += (PAGE_SIZE * 3);
1011 
1012 	/* allocate 4 pages for bootmem allocator */
1013 	bootmem_start = bootmem_current = (char *)cur_space;
1014 	cur_space += (4 * PAGE_SIZE);
1015 	bootmem_end = (char *)cur_space;
1016 
1017 	/* allocate pages for gdt */
1018 	gdt = (union descriptor *)cur_space;
1019 	cur_space += PAGE_SIZE*ncpus;
1020 
1021         /* allocate page for ldt */
1022 	ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
1023 	cur_space += PAGE_SIZE;
1024 
1025 	/* unmap remaining pages from initial chunk
1026 	 *
1027 	 */
1028 	for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
1029 	     tmpva += PAGE_SIZE) {
1030 		bzero((char *)tmpva, PAGE_SIZE);
1031 		PT_SET_MA(tmpva, (vm_paddr_t)0);
1032 	}
1033 
1034 	PT_UPDATES_FLUSH();
1035 
1036 	memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
1037 	    ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
1038 	    l1_pages*sizeof(pt_entry_t));
1039 
1040 	for (i = 0; i < 4; i++) {
1041 		PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
1042 		    IdlePTDnewma[i] | PG_V);
1043 	}
1044 	xen_load_cr3(VTOP(IdlePDPTnew));
1045 	xen_pgdpt_pin(VTOM(IdlePDPTnew));
1046 
1047 	/* allocate remainder of nkpt pages */
1048 	cur_space_pt = cur_space;
1049 	for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
1050 	     i++, cur_space += PAGE_SIZE) {
1051 		pdir = (offset + i) / NPDEPG;
1052 		curoffset = ((offset + i) % NPDEPG);
1053 		if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
1054 			break;
1055 
1056 		/*
1057 		 * make sure that all the initial page table pages
1058 		 * have been zeroed
1059 		 */
1060 		PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
1061 		bzero((char *)cur_space, PAGE_SIZE);
1062 		PT_SET_MA(cur_space, (vm_paddr_t)0);
1063 		xen_pt_pin(VTOM(cur_space));
1064 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1065 			curoffset*sizeof(vm_paddr_t)),
1066 		    VTOM(cur_space) | PG_KERNEL);
1067 		PT_UPDATES_FLUSH();
1068 	}
1069 
1070 	for (i = 0; i < 4; i++) {
1071 		pdir = (PTDPTDI + i) / NPDEPG;
1072 		curoffset = (PTDPTDI + i) % NPDEPG;
1073 
1074 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
1075 			curoffset*sizeof(vm_paddr_t)),
1076 		    IdlePTDnewma[i] | PG_V);
1077 	}
1078 
1079 	PT_UPDATES_FLUSH();
1080 
1081 	IdlePTD = IdlePTDnew;
1082 	IdlePDPT = IdlePDPTnew;
1083 	IdlePDPTma = IdlePDPTnewma;
1084 
1085 	HYPERVISOR_shared_info = (shared_info_t *)cur_space;
1086 	cur_space += PAGE_SIZE;
1087 
1088 	xen_store = (struct xenstore_domain_interface *)cur_space;
1089 	cur_space += PAGE_SIZE;
1090 
1091 	console_page = (char *)cur_space;
1092 	cur_space += PAGE_SIZE;
1093 
1094 	/*
1095 	 * shared_info is an unsigned long so this will randomly break if
1096 	 * it is allocated above 4GB - I guess people are used to that
1097 	 * sort of thing with Xen ... sigh
1098 	 */
1099 	shinfo = xen_start_info->shared_info;
1100 	PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
1101 
1102 	xc_printf("#4\n");
1103 
1104 	xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
1105 	PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
1106 	console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
1107 	PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
1108 
1109 	xc_printf("#5\n");
1110 
1111 	set_iopl.iopl = 1;
1112 	PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
1113 	xc_printf("#6\n");
1114 #if 0
1115 	/* add page table for KERNBASE */
1116 	xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t),
1117 			    VTOM(cur_space) | PG_KERNEL);
1118 	xen_flush_queue();
1119 #ifdef PAE
1120 	xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t),
1121 			    VTOM(cur_space) | PG_V | PG_A);
1122 #else
1123 	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t),
1124 			    VTOM(cur_space) | PG_V | PG_A);
1125 #endif
1126 	xen_flush_queue();
1127 	cur_space += PAGE_SIZE;
1128 	xc_printf("#6\n");
1129 #endif /* 0 */
1130 #ifdef notyet
1131 	if (xen_start_info->flags & SIF_INITDOMAIN) {
1132 		/* Map first megabyte */
1133 		for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE)
1134 			PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
1135 		xen_flush_queue();
1136 	}
1137 #endif
1138 	/*
1139 	 * re-map kernel text read-only
1140 	 *
1141 	 */
1142 	for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
1143 	     i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
1144 		PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
1145 
1146 	xc_printf("#7\n");
1147 	physfree = VTOP(cur_space);
1148 	init_first = physfree >> PAGE_SHIFT;
1149 	IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
1150 	IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
1151 	setup_xen_features();
1152 	xc_printf("#8, proc0kstack=%u\n", proc0kstack);
1153 }
1154 
1155 
1156 trap_info_t trap_table[] = {
1157 	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
1158 	{ 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
1159 	{ 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
1160 	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
1161 	/* This is UPL on Linux and KPL on BSD */
1162 	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
1163 	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
1164 	{ 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
1165 	/*
1166 	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
1167 	 *   no handler for double fault
1168 	 */
1169 	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
1170 	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
1171 	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
1172 	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
1173 	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
1174 	{14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
1175 	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
1176 	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
1177 	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
1178 	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
1179 	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
1180 	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
1181 	{  0, 0,           0, 0 }
1182 };
1183 
1184 /* Perform a multicall and check that individual calls succeeded. */
1185 int
HYPERVISOR_multicall(struct multicall_entry * call_list,int nr_calls)1186 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
1187 {
1188 	int ret = 0;
1189 	int i;
1190 
1191 	/* Perform the multicall. */
1192 	PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
1193 
1194 	/* Check the results of individual hypercalls. */
1195 	for (i = 0; i < nr_calls; i++)
1196 		if (__predict_false(call_list[i].result < 0))
1197 			ret++;
1198 	if (__predict_false(ret > 0))
1199 		panic("%d multicall(s) failed: cpu %d\n",
1200 		    ret, smp_processor_id());
1201 
1202 	/* If we didn't panic already, everything succeeded. */
1203 	return (0);
1204 }
1205 
1206 /********** CODE WORTH KEEPING ABOVE HERE *****************/
1207 
1208 void xen_failsafe_handler(void);
1209 
1210 void
xen_failsafe_handler(void)1211 xen_failsafe_handler(void)
1212 {
1213 
1214 	panic("xen_failsafe_handler called!\n");
1215 }
1216 
1217 void xen_handle_thread_switch(struct pcb *pcb);
1218 
1219 /* This is called by cpu_switch() when switching threads. */
1220 /* The pcb arg refers to the process control block of the */
1221 /* next thread which is to run */
1222 void
xen_handle_thread_switch(struct pcb * pcb)1223 xen_handle_thread_switch(struct pcb *pcb)
1224 {
1225     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
1226     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
1227     multicall_entry_t mcl[3];
1228     int i = 0;
1229 
1230     /* Notify Xen of task switch */
1231     mcl[i].op = __HYPERVISOR_stack_switch;
1232     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
1233     mcl[i++].args[1] = (unsigned long)pcb;
1234 
1235     /* Check for update of fsd */
1236     if (*a != *b || *(a+1) != *(b+1)) {
1237         mcl[i].op = __HYPERVISOR_update_descriptor;
1238         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1239         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1240     }
1241 
1242     a += 2;
1243     b += 2;
1244 
1245     /* Check for update of gsd */
1246     if (*a != *b || *(a+1) != *(b+1)) {
1247         mcl[i].op = __HYPERVISOR_update_descriptor;
1248         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
1249         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
1250     }
1251 
1252     (void)HYPERVISOR_multicall(mcl, i);
1253 }
1254