1 /*-
2  * Copyright (c) 2003,2004 Marcel Moolenaar
3  * Copyright (c) 2000,2001 Doug Rabson
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/machdep.c 278412 2015-02-08 22:17:20Z peter $");
30 
31 #include "opt_compat.h"
32 #include "opt_ddb.h"
33 #include "opt_kstack_pages.h"
34 #include "opt_sched.h"
35 #include "opt_xtrace.h"
36 
37 #include <sys/param.h>
38 #include <sys/proc.h>
39 #include <sys/systm.h>
40 #include <sys/bio.h>
41 #include <sys/buf.h>
42 #include <sys/bus.h>
43 #include <sys/cons.h>
44 #include <sys/cpu.h>
45 #include <sys/efi.h>
46 #include <sys/eventhandler.h>
47 #include <sys/exec.h>
48 #include <sys/imgact.h>
49 #include <sys/kdb.h>
50 #include <sys/kernel.h>
51 #include <sys/linker.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/msgbuf.h>
56 #include <sys/pcpu.h>
57 #include <sys/ptrace.h>
58 #include <sys/random.h>
59 #include <sys/reboot.h>
60 #include <sys/rwlock.h>
61 #include <sys/sched.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscall.h>
64 #include <sys/syscallsubr.h>
65 #include <sys/sysctl.h>
66 #include <sys/sysproto.h>
67 #include <sys/ucontext.h>
68 #include <sys/uio.h>
69 #include <sys/uuid.h>
70 #include <sys/vmmeter.h>
71 #include <sys/vnode.h>
72 
73 #include <ddb/ddb.h>
74 
75 #include <net/netisr.h>
76 
77 #include <vm/vm.h>
78 #include <vm/vm_extern.h>
79 #include <vm/vm_kern.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_pager.h>
84 
85 #include <machine/bootinfo.h>
86 #include <machine/cpu.h>
87 #include <machine/elf.h>
88 #include <machine/fpu.h>
89 #include <machine/intr.h>
90 #include <machine/kdb.h>
91 #include <machine/mca.h>
92 #include <machine/md_var.h>
93 #include <machine/pal.h>
94 #include <machine/pcb.h>
95 #include <machine/reg.h>
96 #include <machine/sal.h>
97 #include <machine/sigframe.h>
98 #ifdef SMP
99 #include <machine/smp.h>
100 #endif
101 #include <machine/unwind.h>
102 #include <machine/vmparam.h>
103 
104 /*
105  * For atomicity reasons, we demand that pc_curthread is the first
106  * field in the struct pcpu. It allows us to read the pointer with
107  * a single atomic instruction:
108  *	ld8 %curthread = [r13]
109  * Otherwise we would first have to calculate the load address and
110  * store the result in a temporary register and that for the load:
111  *	add %temp = %offsetof(struct pcpu), r13
112  *	ld8 %curthread = [%temp]
113  * A context switch inbetween the add and the ld8 could have the
114  * thread migrate to a different core. In that case,  %curthread
115  * would be the thread running on the original core and not actually
116  * the current thread.
117  */
118 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
119 
120 static SYSCTL_NODE(_hw, OID_AUTO, freq, CTLFLAG_RD, 0, "");
121 static SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RD, 0, "");
122 
123 static u_int bus_freq;
124 SYSCTL_UINT(_hw_freq, OID_AUTO, bus, CTLFLAG_RD, &bus_freq, 0,
125     "Bus clock frequency");
126 
127 static u_int cpu_freq;
128 SYSCTL_UINT(_hw_freq, OID_AUTO, cpu, CTLFLAG_RD, &cpu_freq, 0,
129     "CPU clock frequency");
130 
131 static u_int itc_freq;
132 SYSCTL_UINT(_hw_freq, OID_AUTO, itc, CTLFLAG_RD, &itc_freq, 0,
133     "ITC frequency");
134 
135 int cold = 1;
136 int unmapped_buf_allowed = 0;
137 
138 struct bootinfo *bootinfo;
139 
140 struct pcpu pcpu0;
141 
142 extern u_int64_t kernel_text[], _end[];
143 
144 extern u_int64_t ia64_gateway_page[];
145 extern u_int64_t break_sigtramp[];
146 extern u_int64_t epc_sigtramp[];
147 
148 struct fpswa_iface *fpswa_iface;
149 
150 vm_size_t ia64_pal_size;
151 vm_paddr_t ia64_pal_base;
152 vm_offset_t ia64_port_base;
153 
154 u_int64_t ia64_lapic_addr = PAL_PIB_DEFAULT_ADDR;
155 
156 struct ia64_pib *ia64_pib;
157 
158 static int ia64_sync_icache_needed;
159 
160 char machine[] = MACHINE;
161 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
162 
163 static char cpu_model[64];
164 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0,
165     "The CPU model name");
166 
167 static char cpu_family[64];
168 SYSCTL_STRING(_hw, OID_AUTO, family, CTLFLAG_RD, cpu_family, 0,
169     "The CPU family name");
170 
171 #ifdef DDB
172 extern vm_offset_t ksym_start, ksym_end;
173 #endif
174 
175 struct msgbuf *msgbufp = NULL;
176 
177 /* Other subsystems (e.g., ACPI) can hook this later. */
178 void (*cpu_idle_hook)(sbintime_t) = NULL;
179 
180 struct kva_md_info kmi;
181 
182 static void
identifycpu(void)183 identifycpu(void)
184 {
185 	char vendor[17];
186 	char *family_name, *model_name;
187 	u_int64_t features, tmp;
188 	int number, revision, model, family, archrev;
189 
190 	/*
191 	 * Assumes little-endian.
192 	 */
193 	*(u_int64_t *) &vendor[0] = ia64_get_cpuid(0);
194 	*(u_int64_t *) &vendor[8] = ia64_get_cpuid(1);
195 	vendor[16] = '\0';
196 
197 	tmp = ia64_get_cpuid(3);
198 	number = (tmp >> 0) & 0xff;
199 	revision = (tmp >> 8) & 0xff;
200 	model = (tmp >> 16) & 0xff;
201 	family = (tmp >> 24) & 0xff;
202 	archrev = (tmp >> 32) & 0xff;
203 
204 	family_name = model_name = "unknown";
205 	switch (family) {
206 	case 0x07:
207 		family_name = "Itanium";
208 		model_name = "Merced";
209 		break;
210 	case 0x1f:
211 		family_name = "Itanium 2";
212 		switch (model) {
213 		case 0x00:
214 			model_name = "McKinley";
215 			break;
216 		case 0x01:
217 			/*
218 			 * Deerfield is a low-voltage variant based on the
219 			 * Madison core. We need circumstantial evidence
220 			 * (i.e. the clock frequency) to identify those.
221 			 * Allow for roughly 1% error margin.
222 			 */
223 			if (cpu_freq > 990 && cpu_freq < 1010)
224 				model_name = "Deerfield";
225 			else
226 				model_name = "Madison";
227 			break;
228 		case 0x02:
229 			model_name = "Madison II";
230 			break;
231 		}
232 		break;
233 	case 0x20:
234 		ia64_sync_icache_needed = 1;
235 
236 		family_name = "Itanium 2";
237 		switch (model) {
238 		case 0x00:
239 			model_name = "Montecito";
240 			break;
241 		case 0x01:
242 			model_name = "Montvale";
243 			break;
244 		}
245 		break;
246 	}
247 	snprintf(cpu_family, sizeof(cpu_family), "%s", family_name);
248 	snprintf(cpu_model, sizeof(cpu_model), "%s", model_name);
249 
250 	features = ia64_get_cpuid(4);
251 
252 	printf("CPU: %s (", model_name);
253 	if (cpu_freq)
254 		printf("%u MHz ", cpu_freq);
255 	printf("%s)\n", family_name);
256 	printf("  Origin = \"%s\"  Revision = %d\n", vendor, revision);
257 	printf("  Features = 0x%b\n", (u_int32_t) features,
258 	    "\020"
259 	    "\001LB"	/* long branch (brl) instruction. */
260 	    "\002SD"	/* Spontaneous deferral. */
261 	    "\003AO"	/* 16-byte atomic operations (ld, st, cmpxchg). */ );
262 }
263 
264 static void
cpu_startup(void * dummy)265 cpu_startup(void *dummy)
266 {
267 	char nodename[16];
268 	struct pcpu *pc;
269 	struct pcpu_stats *pcs;
270 
271 	/*
272 	 * Good {morning,afternoon,evening,night}.
273 	 */
274 	identifycpu();
275 
276 #ifdef PERFMON
277 	perfmon_init();
278 #endif
279 	printf("real memory  = %ld (%ld MB)\n", ptoa(realmem),
280 	    ptoa(realmem) / 1048576);
281 
282 	vm_ksubmap_init(&kmi);
283 
284 	printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count),
285 	    ptoa(cnt.v_free_count) / 1048576);
286 
287 	if (fpswa_iface == NULL)
288 		printf("Warning: no FPSWA package supplied\n");
289 	else
290 		printf("FPSWA Revision = 0x%lx, Entry = %p\n",
291 		    (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa);
292 
293 	/*
294 	 * Set up buffers, so they can be used to read disk labels.
295 	 */
296 	bufinit();
297 	vm_pager_bufferinit();
298 
299 	/*
300 	 * Traverse the MADT to discover IOSAPIC and Local SAPIC
301 	 * information.
302 	 */
303 	ia64_probe_sapics();
304 	ia64_pib = pmap_mapdev(ia64_lapic_addr, sizeof(*ia64_pib));
305 
306 	ia64_mca_init();
307 
308 	/*
309 	 * Create sysctl tree for per-CPU information.
310 	 */
311 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
312 		snprintf(nodename, sizeof(nodename), "%u", pc->pc_cpuid);
313 		sysctl_ctx_init(&pc->pc_md.sysctl_ctx);
314 		pc->pc_md.sysctl_tree = SYSCTL_ADD_NODE(&pc->pc_md.sysctl_ctx,
315 		    SYSCTL_STATIC_CHILDREN(_machdep_cpu), OID_AUTO, nodename,
316 		    CTLFLAG_RD, NULL, "");
317 		if (pc->pc_md.sysctl_tree == NULL)
318 			continue;
319 
320 		pcs = &pc->pc_md.stats;
321 
322 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
323 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
324 		    "nasts", CTLFLAG_RD, &pcs->pcs_nasts,
325 		    "Number of IPI_AST interrupts");
326 
327 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
328 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
329 		    "nclks", CTLFLAG_RD, &pcs->pcs_nclks,
330 		    "Number of clock interrupts");
331 
332 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
333 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
334 		    "nextints", CTLFLAG_RD, &pcs->pcs_nextints,
335 		    "Number of ExtINT interrupts");
336 
337 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
338 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
339 		    "nhardclocks", CTLFLAG_RD, &pcs->pcs_nhardclocks,
340 		    "Number of IPI_HARDCLOCK interrupts");
341 
342 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
343 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
344 		    "nhighfps", CTLFLAG_RD, &pcs->pcs_nhighfps,
345 		    "Number of IPI_HIGH_FP interrupts");
346 
347 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
348 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
349 		    "nhwints", CTLFLAG_RD, &pcs->pcs_nhwints,
350 		    "Number of hardware (device) interrupts");
351 
352 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
353 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
354 		    "npreempts", CTLFLAG_RD, &pcs->pcs_npreempts,
355 		    "Number of IPI_PREEMPT interrupts");
356 
357 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
358 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
359 		    "nrdvs", CTLFLAG_RD, &pcs->pcs_nrdvs,
360 		    "Number of IPI_RENDEZVOUS interrupts");
361 
362 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
363 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
364 		    "nstops", CTLFLAG_RD, &pcs->pcs_nstops,
365 		    "Number of IPI_STOP interrupts");
366 
367 		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
368 		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
369 		    "nstrays", CTLFLAG_RD, &pcs->pcs_nstrays,
370 		    "Number of stray interrupts");
371 	}
372 }
373 SYSINIT(cpu_startup, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
374 
375 void
cpu_flush_dcache(void * ptr,size_t len)376 cpu_flush_dcache(void *ptr, size_t len)
377 {
378 	vm_offset_t lim, va;
379 
380 	va = (uintptr_t)ptr & ~31;
381 	lim = (uintptr_t)ptr + len;
382 	while (va < lim) {
383 		ia64_fc(va);
384 		va += 32;
385 	}
386 
387 	ia64_srlz_d();
388 }
389 
390 /* Get current clock frequency for the given cpu id. */
391 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)392 cpu_est_clockrate(int cpu_id, uint64_t *rate)
393 {
394 
395 	if (pcpu_find(cpu_id) == NULL || rate == NULL)
396 		return (EINVAL);
397 	*rate = (u_long)cpu_freq * 1000000ul;
398 	return (0);
399 }
400 
401 void
cpu_halt()402 cpu_halt()
403 {
404 
405 	efi_reset_system();
406 }
407 
408 void
cpu_idle(int busy)409 cpu_idle(int busy)
410 {
411 	register_t ie;
412 	sbintime_t sbt = -1;
413 
414 	if (!busy) {
415 		critical_enter();
416 		sbt = cpu_idleclock();
417 	}
418 
419 	ie = intr_disable();
420 	KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
421 
422 	if (sched_runnable())
423 		ia64_enable_intr();
424 	else if (cpu_idle_hook != NULL) {
425 		(*cpu_idle_hook)(sbt);
426 		/* The hook must enable interrupts! */
427 	} else {
428 		ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
429 		ia64_enable_intr();
430 	}
431 
432 	if (!busy) {
433 		cpu_activeclock();
434 		critical_exit();
435 	}
436 }
437 
438 int
cpu_idle_wakeup(int cpu)439 cpu_idle_wakeup(int cpu)
440 {
441 
442 	return (0);
443 }
444 
445 void
cpu_reset()446 cpu_reset()
447 {
448 
449 	efi_reset_system();
450 }
451 
452 void
cpu_switch(struct thread * old,struct thread * new,struct mtx * mtx)453 cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
454 {
455 	struct pcb *oldpcb, *newpcb;
456 
457 	oldpcb = old->td_pcb;
458 #ifdef COMPAT_FREEBSD32
459 	ia32_savectx(oldpcb);
460 #endif
461 	if (pcpup->pc_fpcurthread == old)
462 		old->td_frame->tf_special.psr |= IA64_PSR_DFH;
463 	if (!savectx(oldpcb)) {
464 		newpcb = new->td_pcb;
465 		oldpcb->pcb_current_pmap =
466 		    pmap_switch(newpcb->pcb_current_pmap);
467 
468 		ia64_mf();
469 
470 		atomic_store_rel_ptr(&old->td_lock, mtx);
471 
472 #if defined(SCHED_ULE) && defined(SMP)
473 		while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
474 			cpu_spinwait();
475 #endif
476 
477 		pcpup->pc_curthread = new;
478 
479 #ifdef COMPAT_FREEBSD32
480 		ia32_restorectx(newpcb);
481 #endif
482 
483 		if (pcpup->pc_fpcurthread == new)
484 			new->td_frame->tf_special.psr &= ~IA64_PSR_DFH;
485 		restorectx(newpcb);
486 		/* We should not get here. */
487 		panic("cpu_switch: restorectx() returned");
488 		/* NOTREACHED */
489 	}
490 }
491 
492 void
cpu_throw(struct thread * old __unused,struct thread * new)493 cpu_throw(struct thread *old __unused, struct thread *new)
494 {
495 	struct pcb *newpcb;
496 
497 	newpcb = new->td_pcb;
498 	(void)pmap_switch(newpcb->pcb_current_pmap);
499 
500 #if defined(SCHED_ULE) && defined(SMP)
501 	while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
502 		cpu_spinwait();
503 #endif
504 
505 	pcpup->pc_curthread = new;
506 
507 #ifdef COMPAT_FREEBSD32
508 	ia32_restorectx(newpcb);
509 #endif
510 
511 	restorectx(newpcb);
512 	/* We should not get here. */
513 	panic("cpu_throw: restorectx() returned");
514 	/* NOTREACHED */
515 }
516 
517 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)518 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
519 {
520 
521 	/*
522 	 * Set pc_acpi_id to "uninitialized".
523 	 * See sys/dev/acpica/acpi_cpu.c
524 	 */
525 	pcpu->pc_acpi_id = 0xffffffff;
526 }
527 
528 void
cpu_pcpu_setup(struct pcpu * pc,u_int acpi_id,u_int sapic_id)529 cpu_pcpu_setup(struct pcpu *pc, u_int acpi_id, u_int sapic_id)
530 {
531 
532 	pc->pc_acpi_id = acpi_id;
533 	pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
534 }
535 
536 void
spinlock_enter(void)537 spinlock_enter(void)
538 {
539 	struct thread *td;
540 	int intr;
541 
542 	td = curthread;
543 	if (td->td_md.md_spinlock_count == 0) {
544 		intr = intr_disable();
545 		td->td_md.md_spinlock_count = 1;
546 		td->td_md.md_saved_intr = intr;
547 	} else
548 		td->td_md.md_spinlock_count++;
549 	critical_enter();
550 }
551 
552 void
spinlock_exit(void)553 spinlock_exit(void)
554 {
555 	struct thread *td;
556 	int intr;
557 
558 	td = curthread;
559 	critical_exit();
560 	intr = td->td_md.md_saved_intr;
561 	td->td_md.md_spinlock_count--;
562 	if (td->td_md.md_spinlock_count == 0)
563 		intr_restore(intr);
564 }
565 
566 void
kdb_cpu_trap(int vector,int code __unused)567 kdb_cpu_trap(int vector, int code __unused)
568 {
569 
570 #ifdef XTRACE
571 	ia64_xtrace_stop();
572 #endif
573 	__asm __volatile("flushrs;;");
574 
575 	/* Restart after the break instruction. */
576 	if (vector == IA64_VEC_BREAK &&
577 	    kdb_frame->tf_special.ifa == IA64_FIXED_BREAK)
578 		kdb_frame->tf_special.psr += IA64_PSR_RI_1;
579 }
580 
581 void
map_vhpt(uintptr_t vhpt)582 map_vhpt(uintptr_t vhpt)
583 {
584 	pt_entry_t pte;
585 	uint64_t psr;
586 
587 	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
588 	    PTE_PL_KERN | PTE_AR_RW;
589 	pte |= vhpt & PTE_PPN_MASK;
590 
591 	__asm __volatile("ptr.d %0,%1" :: "r"(vhpt),
592 	    "r"(pmap_vhpt_log2size << 2));
593 
594 	__asm __volatile("mov   %0=psr" : "=r"(psr));
595 	__asm __volatile("rsm   psr.ic|psr.i");
596 	ia64_srlz_i();
597 	ia64_set_ifa(vhpt);
598 	ia64_set_itir(pmap_vhpt_log2size << 2);
599 	ia64_srlz_d();
600 	__asm __volatile("itr.d dtr[%0]=%1" :: "r"(3), "r"(pte));
601 	__asm __volatile("mov   psr.l=%0" :: "r" (psr));
602 	ia64_srlz_i();
603 }
604 
605 void
map_pal_code(void)606 map_pal_code(void)
607 {
608 	pt_entry_t pte;
609 	vm_offset_t va;
610 	vm_size_t sz;
611 	uint64_t psr;
612 	u_int shft;
613 
614 	if (ia64_pal_size == 0)
615 		return;
616 
617 	va = IA64_PHYS_TO_RR7(ia64_pal_base);
618 
619 	sz = ia64_pal_size;
620 	shft = 0;
621 	while (sz > 1) {
622 		shft++;
623 		sz >>= 1;
624 	}
625 
626 	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
627 	    PTE_PL_KERN | PTE_AR_RWX;
628 	pte |= ia64_pal_base & PTE_PPN_MASK;
629 
630 	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(va), "r"(shft<<2));
631 
632 	__asm __volatile("mov	%0=psr" : "=r"(psr));
633 	__asm __volatile("rsm	psr.ic|psr.i");
634 	ia64_srlz_i();
635 	ia64_set_ifa(va);
636 	ia64_set_itir(shft << 2);
637 	ia64_srlz_d();
638 	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(4), "r"(pte));
639 	ia64_srlz_d();
640 	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(1), "r"(pte));
641 	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
642 	ia64_srlz_i();
643 }
644 
645 void
map_gateway_page(void)646 map_gateway_page(void)
647 {
648 	pt_entry_t pte;
649 	uint64_t psr;
650 
651 	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
652 	    PTE_PL_KERN | PTE_AR_X_RX;
653 	pte |= ia64_tpa((uint64_t)ia64_gateway_page) & PTE_PPN_MASK;
654 
655 	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" ::
656 	    "r"(VM_MAXUSER_ADDRESS), "r"(PAGE_SHIFT << 2));
657 
658 	__asm __volatile("mov	%0=psr" : "=r"(psr));
659 	__asm __volatile("rsm	psr.ic|psr.i");
660 	ia64_srlz_i();
661 	ia64_set_ifa(VM_MAXUSER_ADDRESS);
662 	ia64_set_itir(PAGE_SHIFT << 2);
663 	ia64_srlz_d();
664 	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(5), "r"(pte));
665 	ia64_srlz_d();
666 	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(2), "r"(pte));
667 	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
668 	ia64_srlz_i();
669 
670 	/* Expose the mapping to userland in ar.k5 */
671 	ia64_set_k5(VM_MAXUSER_ADDRESS);
672 }
673 
674 static u_int
freq_ratio(u_long base,u_long ratio)675 freq_ratio(u_long base, u_long ratio)
676 {
677 	u_long f;
678 
679 	f = (base * (ratio >> 32)) / (ratio & 0xfffffffful);
680 	return ((f + 500000) / 1000000);
681 }
682 
683 static void
calculate_frequencies(void)684 calculate_frequencies(void)
685 {
686 	struct ia64_sal_result sal;
687 	struct ia64_pal_result pal;
688 	register_t ie;
689 
690 	ie = intr_disable();
691 	sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
692 	pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
693 	intr_restore(ie);
694 
695 	if (sal.sal_status == 0 && pal.pal_status == 0) {
696 		if (bootverbose) {
697 			printf("Platform clock frequency %ld Hz\n",
698 			       sal.sal_result[0]);
699 			printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, "
700 			       "ITC ratio %ld/%ld\n",
701 			       pal.pal_result[0] >> 32,
702 			       pal.pal_result[0] & ((1L << 32) - 1),
703 			       pal.pal_result[1] >> 32,
704 			       pal.pal_result[1] & ((1L << 32) - 1),
705 			       pal.pal_result[2] >> 32,
706 			       pal.pal_result[2] & ((1L << 32) - 1));
707 		}
708 		cpu_freq = freq_ratio(sal.sal_result[0], pal.pal_result[0]);
709 		bus_freq = freq_ratio(sal.sal_result[0], pal.pal_result[1]);
710 		itc_freq = freq_ratio(sal.sal_result[0], pal.pal_result[2]);
711 	}
712 }
713 
714 struct ia64_init_return
ia64_init(void)715 ia64_init(void)
716 {
717 	struct ia64_init_return ret;
718 	struct efi_md *md;
719 	pt_entry_t *pbvm_pgtbl_ent, *pbvm_pgtbl_lim;
720 	char *p;
721 	vm_size_t mdlen;
722 	int metadata_missing;
723 
724 	/*
725 	 * NO OUTPUT ALLOWED UNTIL FURTHER NOTICE.
726 	 */
727 
728 	ia64_set_fpsr(IA64_FPSR_DEFAULT);
729 
730 	/*
731 	 * Region 6 is direct mapped UC and region 7 is direct mapped
732 	 * WC. The details of this is controlled by the Alt {I,D}TLB
733 	 * handlers. Here we just make sure that they have the largest
734 	 * possible page size to minimise TLB usage.
735 	 */
736 	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (LOG2_ID_PAGE_SIZE << 2));
737 	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (LOG2_ID_PAGE_SIZE << 2));
738 	ia64_srlz_d();
739 
740 	/* Initialize/setup physical memory datastructures */
741 	ia64_physmem_init();
742 
743 	/*
744 	 * Process the memory map. This gives us the PAL locations,
745 	 * the I/O port base address, the available memory regions
746 	 * for initializing the physical memory map.
747 	 */
748 	for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) {
749 		mdlen = md->md_pages * EFI_PAGE_SIZE;
750 		switch (md->md_type) {
751 		case EFI_MD_TYPE_IOPORT:
752 			ia64_port_base = pmap_mapdev_priv(md->md_phys,
753 			    mdlen, VM_MEMATTR_UNCACHEABLE);
754 			break;
755 		case EFI_MD_TYPE_PALCODE:
756 			ia64_pal_base = md->md_phys;
757 			ia64_pal_size = mdlen;
758 			/*FALLTHROUGH*/
759 		case EFI_MD_TYPE_BAD:
760 		case EFI_MD_TYPE_FIRMWARE:
761 		case EFI_MD_TYPE_RECLAIM:
762 		case EFI_MD_TYPE_RT_CODE:
763 		case EFI_MD_TYPE_RT_DATA:
764 			/* Don't use these memory regions. */
765 			ia64_physmem_track(md->md_phys, mdlen);
766 			break;
767 		case EFI_MD_TYPE_BS_CODE:
768 		case EFI_MD_TYPE_BS_DATA:
769 		case EFI_MD_TYPE_CODE:
770 		case EFI_MD_TYPE_DATA:
771 		case EFI_MD_TYPE_FREE:
772 			/* These are ok to use. */
773 			ia64_physmem_add(md->md_phys, mdlen);
774 			break;
775 		}
776 	}
777 
778 	/*
779 	 * Remove the PBVM and its page table from phys_avail. The loader
780 	 * passes the physical address of the page table to us. The virtual
781 	 * address of the page table is fixed.
782 	 * Track and the PBVM limit for later use.
783 	 */
784 	ia64_physmem_delete(bootinfo->bi_pbvm_pgtbl, bootinfo->bi_pbvm_pgtblsz);
785 	pbvm_pgtbl_ent = (void *)IA64_PBVM_PGTBL;
786 	pbvm_pgtbl_lim = (void *)(IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
787 	while (pbvm_pgtbl_ent < pbvm_pgtbl_lim) {
788 		if ((*pbvm_pgtbl_ent & PTE_PRESENT) == 0)
789 			break;
790 		ia64_physmem_delete(*pbvm_pgtbl_ent & PTE_PPN_MASK,
791 		    IA64_PBVM_PAGE_SIZE);
792 		pbvm_pgtbl_ent++;
793 	}
794 
795 	/* Finalize physical memory datastructures */
796 	ia64_physmem_fini();
797 
798 	metadata_missing = 0;
799 	if (bootinfo->bi_modulep)
800 		preload_metadata = (caddr_t)bootinfo->bi_modulep;
801 	else
802 		metadata_missing = 1;
803 
804 	if (envmode == 0 && bootinfo->bi_envp)
805 		kern_envp = (caddr_t)bootinfo->bi_envp;
806 	else
807 		kern_envp = static_env;
808 
809 	/*
810 	 * Look at arguments passed to us and compute boothowto.
811 	 */
812 	boothowto = bootinfo->bi_boothowto;
813 
814 	if (boothowto & RB_VERBOSE)
815 		bootverbose = 1;
816 
817 	/*
818 	 * Wire things up so we can call the firmware.
819 	 */
820 	map_pal_code();
821 	efi_boot_minimal(bootinfo->bi_systab);
822 	ia64_xiv_init();
823 	ia64_sal_init();
824 	calculate_frequencies();
825 
826 	set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0);
827 
828 	/*
829 	 * Setup the PCPU data for the bootstrap processor. It is needed
830 	 * by printf(). Also, since printf() has critical sections, we
831 	 * need to initialize at least pc_curthread.
832 	 */
833 	pcpup = &pcpu0;
834 	ia64_set_k4((u_int64_t)pcpup);
835 	pcpu_init(pcpup, 0, sizeof(pcpu0));
836 	dpcpu_init(ia64_physmem_alloc(DPCPU_SIZE, PAGE_SIZE), 0);
837 	cpu_pcpu_setup(pcpup, ~0U, ia64_get_lid());
838 	pcpup->pc_curthread = &thread0;
839 
840 	/*
841 	 * Initialize the console before we print anything out.
842 	 */
843 	cninit();
844 
845 	/* OUTPUT NOW ALLOWED */
846 
847 	if (metadata_missing)
848 		printf("WARNING: loader(8) metadata is missing!\n");
849 
850 	/* Get FPSWA interface */
851 	fpswa_iface = (bootinfo->bi_fpswa == 0) ? NULL :
852 	    (struct fpswa_iface *)IA64_PHYS_TO_RR7(bootinfo->bi_fpswa);
853 
854 	/* Init basic tunables, including hz */
855 	init_param1();
856 
857 	p = getenv("kernelname");
858 	if (p != NULL) {
859 		strlcpy(kernelname, p, sizeof(kernelname));
860 		freeenv(p);
861 	}
862 
863 	init_param2(physmem);
864 
865 	/*
866 	 * Initialize error message buffer (at end of core).
867 	 */
868 	msgbufp = ia64_physmem_alloc(msgbufsize, PAGE_SIZE);
869 	msgbufinit(msgbufp, msgbufsize);
870 
871 	proc_linkup0(&proc0, &thread0);
872 	/*
873 	 * Init mapping for kernel stack for proc 0
874 	 */
875 	p = ia64_physmem_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
876 	thread0.td_kstack = (uintptr_t)p;
877 	thread0.td_kstack_pages = KSTACK_PAGES;
878 
879 	mutex_init();
880 
881 	/*
882 	 * Initialize the rest of proc 0's PCB.
883 	 *
884 	 * Set the kernel sp, reserving space for an (empty) trapframe,
885 	 * and make proc0's trapframe pointer point to it for sanity.
886 	 * Initialise proc0's backing store to start after u area.
887 	 */
888 	cpu_thread_alloc(&thread0);
889 	thread0.td_frame->tf_flags = FRAME_SYSCALL;
890 	thread0.td_pcb->pcb_special.sp =
891 	    (u_int64_t)thread0.td_frame - 16;
892 	thread0.td_pcb->pcb_special.bspstore = thread0.td_kstack;
893 
894 	/*
895 	 * Initialize the virtual memory system.
896 	 */
897 	pmap_bootstrap();
898 
899 #ifdef XTRACE
900 	ia64_xtrace_init_bsp();
901 #endif
902 
903 	/*
904 	 * Initialize debuggers, and break into them if appropriate.
905 	 */
906 #ifdef DDB
907 	ksym_start = bootinfo->bi_symtab;
908 	ksym_end = bootinfo->bi_esymtab;
909 #endif
910 
911 	kdb_init();
912 
913 #ifdef KDB
914 	if (boothowto & RB_KDB)
915 		kdb_enter(KDB_WHY_BOOTFLAGS,
916 		    "Boot flags requested debugger\n");
917 #endif
918 
919 	ia64_set_tpr(0);
920 	ia64_srlz_d();
921 
922 	ret.bspstore = thread0.td_pcb->pcb_special.bspstore;
923 	ret.sp = thread0.td_pcb->pcb_special.sp;
924 	return (ret);
925 }
926 
927 uint64_t
ia64_get_hcdp(void)928 ia64_get_hcdp(void)
929 {
930 
931 	return (bootinfo->bi_hcdp);
932 }
933 
934 void
bzero(void * buf,size_t len)935 bzero(void *buf, size_t len)
936 {
937 	caddr_t p = buf;
938 
939 	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
940 		*p++ = 0;
941 		len--;
942 	}
943 	while (len >= sizeof(u_long) * 8) {
944 		*(u_long*) p = 0;
945 		*((u_long*) p + 1) = 0;
946 		*((u_long*) p + 2) = 0;
947 		*((u_long*) p + 3) = 0;
948 		len -= sizeof(u_long) * 8;
949 		*((u_long*) p + 4) = 0;
950 		*((u_long*) p + 5) = 0;
951 		*((u_long*) p + 6) = 0;
952 		*((u_long*) p + 7) = 0;
953 		p += sizeof(u_long) * 8;
954 	}
955 	while (len >= sizeof(u_long)) {
956 		*(u_long*) p = 0;
957 		len -= sizeof(u_long);
958 		p += sizeof(u_long);
959 	}
960 	while (len) {
961 		*p++ = 0;
962 		len--;
963 	}
964 }
965 
966 u_int
ia64_itc_freq(void)967 ia64_itc_freq(void)
968 {
969 
970 	return (itc_freq);
971 }
972 
973 void
DELAY(int n)974 DELAY(int n)
975 {
976 	u_int64_t start, end, now;
977 
978 	sched_pin();
979 
980 	start = ia64_get_itc();
981 	end = start + itc_freq * n;
982 	/* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */
983 	do {
984 		now = ia64_get_itc();
985 	} while (now < end || (now > start && end < start));
986 
987 	sched_unpin();
988 }
989 
990 /*
991  * Send an interrupt (signal) to a process.
992  */
993 void
sendsig(sig_t catcher,ksiginfo_t * ksi,sigset_t * mask)994 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
995 {
996 	struct proc *p;
997 	struct thread *td;
998 	struct trapframe *tf;
999 	struct sigacts *psp;
1000 	struct sigframe sf, *sfp;
1001 	u_int64_t sbs, sp;
1002 	int oonstack;
1003 	int sig;
1004 	u_long code;
1005 
1006 	td = curthread;
1007 	p = td->td_proc;
1008 	PROC_LOCK_ASSERT(p, MA_OWNED);
1009 	sig = ksi->ksi_signo;
1010 	code = ksi->ksi_code;
1011 	psp = p->p_sigacts;
1012 	mtx_assert(&psp->ps_mtx, MA_OWNED);
1013 	tf = td->td_frame;
1014 	sp = tf->tf_special.sp;
1015 	oonstack = sigonstack(sp);
1016 	sbs = 0;
1017 
1018 	/* save user context */
1019 	bzero(&sf, sizeof(struct sigframe));
1020 	sf.sf_uc.uc_sigmask = *mask;
1021 	sf.sf_uc.uc_stack = td->td_sigstk;
1022 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
1023 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
1024 
1025 	/*
1026 	 * Allocate and validate space for the signal handler
1027 	 * context. Note that if the stack is in P0 space, the
1028 	 * call to grow() is a nop, and the useracc() check
1029 	 * will fail if the process has not already allocated
1030 	 * the space with a `brk'.
1031 	 */
1032 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
1033 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
1034 		sbs = (u_int64_t)td->td_sigstk.ss_sp;
1035 		sbs = (sbs + 15) & ~15;
1036 		sfp = (struct sigframe *)(sbs + td->td_sigstk.ss_size);
1037 #if defined(COMPAT_43)
1038 		td->td_sigstk.ss_flags |= SS_ONSTACK;
1039 #endif
1040 	} else
1041 		sfp = (struct sigframe *)sp;
1042 	sfp = (struct sigframe *)((u_int64_t)(sfp - 1) & ~15);
1043 
1044 	/* Fill in the siginfo structure for POSIX handlers. */
1045 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
1046 		sf.sf_si = ksi->ksi_info;
1047 		sf.sf_si.si_signo = sig;
1048 		/*
1049 		 * XXX this shouldn't be here after code in trap.c
1050 		 * is fixed
1051 		 */
1052 		sf.sf_si.si_addr = (void*)tf->tf_special.ifa;
1053 		code = (u_int64_t)&sfp->sf_si;
1054 	}
1055 
1056 	mtx_unlock(&psp->ps_mtx);
1057 	PROC_UNLOCK(p);
1058 
1059 	get_mcontext(td, &sf.sf_uc.uc_mcontext, 0);
1060 
1061 	/* Copy the frame out to userland. */
1062 	if (copyout(&sf, sfp, sizeof(sf)) != 0) {
1063 		/*
1064 		 * Process has trashed its stack; give it an illegal
1065 		 * instruction to halt it in its tracks.
1066 		 */
1067 		PROC_LOCK(p);
1068 		sigexit(td, SIGILL);
1069 		return;
1070 	}
1071 
1072 	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {
1073 		tf->tf_special.psr &= ~IA64_PSR_RI;
1074 		tf->tf_special.iip = ia64_get_k5() +
1075 		    ((uint64_t)break_sigtramp - (uint64_t)ia64_gateway_page);
1076 	} else
1077 		tf->tf_special.iip = ia64_get_k5() +
1078 		    ((uint64_t)epc_sigtramp - (uint64_t)ia64_gateway_page);
1079 
1080 	/*
1081 	 * Setup the trapframe to return to the signal trampoline. We pass
1082 	 * information to the trampoline in the following registers:
1083 	 *
1084 	 *	gp	new backing store or NULL
1085 	 *	r8	signal number
1086 	 *	r9	signal code or siginfo pointer
1087 	 *	r10	signal handler (function descriptor)
1088 	 */
1089 	tf->tf_special.sp = (u_int64_t)sfp - 16;
1090 	tf->tf_special.gp = sbs;
1091 	tf->tf_special.bspstore = sf.sf_uc.uc_mcontext.mc_special.bspstore;
1092 	tf->tf_special.ndirty = 0;
1093 	tf->tf_special.rnat = sf.sf_uc.uc_mcontext.mc_special.rnat;
1094 	tf->tf_scratch.gr8 = sig;
1095 	tf->tf_scratch.gr9 = code;
1096 	tf->tf_scratch.gr10 = (u_int64_t)catcher;
1097 
1098 	PROC_LOCK(p);
1099 	mtx_lock(&psp->ps_mtx);
1100 }
1101 
1102 /*
1103  * System call to cleanup state after a signal
1104  * has been taken.  Reset signal mask and
1105  * stack state from context left by sendsig (above).
1106  * Return to previous pc and psl as specified by
1107  * context left by sendsig. Check carefully to
1108  * make sure that the user has not modified the
1109  * state to gain improper privileges.
1110  *
1111  * MPSAFE
1112  */
1113 int
sys_sigreturn(struct thread * td,struct sigreturn_args * uap)1114 sys_sigreturn(struct thread *td,
1115 	struct sigreturn_args /* {
1116 		ucontext_t *sigcntxp;
1117 	} */ *uap)
1118 {
1119 	ucontext_t uc;
1120 	struct trapframe *tf;
1121 	struct pcb *pcb;
1122 
1123 	tf = td->td_frame;
1124 	pcb = td->td_pcb;
1125 
1126 	/*
1127 	 * Fetch the entire context structure at once for speed.
1128 	 * We don't use a normal argument to simplify RSE handling.
1129 	 */
1130 	if (copyin(uap->sigcntxp, (caddr_t)&uc, sizeof(uc)))
1131 		return (EFAULT);
1132 
1133 	set_mcontext(td, &uc.uc_mcontext);
1134 
1135 #if defined(COMPAT_43)
1136 	if (sigonstack(tf->tf_special.sp))
1137 		td->td_sigstk.ss_flags |= SS_ONSTACK;
1138 	else
1139 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
1140 #endif
1141 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
1142 
1143 	return (EJUSTRETURN);
1144 }
1145 
1146 #ifdef COMPAT_FREEBSD4
1147 int
freebsd4_sigreturn(struct thread * td,struct freebsd4_sigreturn_args * uap)1148 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
1149 {
1150 
1151 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
1152 }
1153 #endif
1154 
1155 /*
1156  * Construct a PCB from a trapframe. This is called from kdb_trap() where
1157  * we want to start a backtrace from the function that caused us to enter
1158  * the debugger. We have the context in the trapframe, but base the trace
1159  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
1160  * enough for a backtrace.
1161  */
1162 void
makectx(struct trapframe * tf,struct pcb * pcb)1163 makectx(struct trapframe *tf, struct pcb *pcb)
1164 {
1165 
1166 	pcb->pcb_special = tf->tf_special;
1167 	pcb->pcb_special.__spare = ~0UL;	/* XXX see unwind.c */
1168 	save_callee_saved(&pcb->pcb_preserved);
1169 	save_callee_saved_fp(&pcb->pcb_preserved_fp);
1170 }
1171 
1172 int
ia64_flush_dirty(struct thread * td,struct _special * r)1173 ia64_flush_dirty(struct thread *td, struct _special *r)
1174 {
1175 	struct iovec iov;
1176 	struct uio uio;
1177 	uint64_t bspst, kstk, rnat;
1178 	int error, locked;
1179 
1180 	if (r->ndirty == 0)
1181 		return (0);
1182 
1183 	kstk = td->td_kstack + (r->bspstore & 0x1ffUL);
1184 	if (td == curthread) {
1185 		__asm __volatile("mov	ar.rsc=0;;");
1186 		__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1187 		/* Make sure we have all the user registers written out. */
1188 		if (bspst - kstk < r->ndirty) {
1189 			__asm __volatile("flushrs;;");
1190 			__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1191 		}
1192 		__asm __volatile("mov	%0=ar.rnat;;" : "=r"(rnat));
1193 		__asm __volatile("mov	ar.rsc=3");
1194 		error = copyout((void*)kstk, (void*)r->bspstore, r->ndirty);
1195 		kstk += r->ndirty;
1196 		r->rnat = (bspst > kstk && (bspst & 0x1ffL) < (kstk & 0x1ffL))
1197 		    ? *(uint64_t*)(kstk | 0x1f8L) : rnat;
1198 	} else {
1199 		locked = PROC_LOCKED(td->td_proc);
1200 		if (!locked)
1201 			PHOLD(td->td_proc);
1202 		iov.iov_base = (void*)(uintptr_t)kstk;
1203 		iov.iov_len = r->ndirty;
1204 		uio.uio_iov = &iov;
1205 		uio.uio_iovcnt = 1;
1206 		uio.uio_offset = r->bspstore;
1207 		uio.uio_resid = r->ndirty;
1208 		uio.uio_segflg = UIO_SYSSPACE;
1209 		uio.uio_rw = UIO_WRITE;
1210 		uio.uio_td = td;
1211 		error = proc_rwmem(td->td_proc, &uio);
1212 		/*
1213 		 * XXX proc_rwmem() doesn't currently return ENOSPC,
1214 		 * so I think it can bogusly return 0. Neither do
1215 		 * we allow short writes.
1216 		 */
1217 		if (uio.uio_resid != 0 && error == 0)
1218 			error = ENOSPC;
1219 		if (!locked)
1220 			PRELE(td->td_proc);
1221 	}
1222 
1223 	r->bspstore += r->ndirty;
1224 	r->ndirty = 0;
1225 	return (error);
1226 }
1227 
1228 int
get_mcontext(struct thread * td,mcontext_t * mc,int flags)1229 get_mcontext(struct thread *td, mcontext_t *mc, int flags)
1230 {
1231 	struct trapframe *tf;
1232 	int error;
1233 
1234 	tf = td->td_frame;
1235 	bzero(mc, sizeof(*mc));
1236 	mc->mc_special = tf->tf_special;
1237 	error = ia64_flush_dirty(td, &mc->mc_special);
1238 	if (tf->tf_flags & FRAME_SYSCALL) {
1239 		mc->mc_flags |= _MC_FLAGS_SYSCALL_CONTEXT;
1240 		mc->mc_scratch = tf->tf_scratch;
1241 		if (flags & GET_MC_CLEAR_RET) {
1242 			mc->mc_scratch.gr8 = 0;
1243 			mc->mc_scratch.gr9 = 0;
1244 			mc->mc_scratch.gr10 = 0;
1245 			mc->mc_scratch.gr11 = 0;
1246 		}
1247 	} else {
1248 		mc->mc_flags |= _MC_FLAGS_ASYNC_CONTEXT;
1249 		mc->mc_scratch = tf->tf_scratch;
1250 		mc->mc_scratch_fp = tf->tf_scratch_fp;
1251 		/*
1252 		 * XXX If the thread never used the high FP registers, we
1253 		 * probably shouldn't waste time saving them.
1254 		 */
1255 		ia64_highfp_save(td);
1256 		mc->mc_flags |= _MC_FLAGS_HIGHFP_VALID;
1257 		mc->mc_high_fp = td->td_pcb->pcb_high_fp;
1258 	}
1259 	save_callee_saved(&mc->mc_preserved);
1260 	save_callee_saved_fp(&mc->mc_preserved_fp);
1261 	return (error);
1262 }
1263 
1264 int
set_mcontext(struct thread * td,mcontext_t * mc)1265 set_mcontext(struct thread *td, mcontext_t *mc)
1266 {
1267 	struct _special s;
1268 	struct trapframe *tf;
1269 	uint64_t psrmask;
1270 
1271 	tf = td->td_frame;
1272 
1273 	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1274 	    ("Whoa there! We have more than 8KB of dirty registers!"));
1275 
1276 	s = mc->mc_special;
1277 	/*
1278 	 * Only copy the user mask and the restart instruction bit from
1279 	 * the new context.
1280 	 */
1281 	psrmask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1282 	    IA64_PSR_MFH | IA64_PSR_RI;
1283 	s.psr = (tf->tf_special.psr & ~psrmask) | (s.psr & psrmask);
1284 	/* We don't have any dirty registers of the new context. */
1285 	s.ndirty = 0;
1286 	if (mc->mc_flags & _MC_FLAGS_ASYNC_CONTEXT) {
1287 		/*
1288 		 * We can get an async context passed to us while we
1289 		 * entered the kernel through a syscall: sigreturn(2)
1290 		 * takes contexts that could previously be the result of
1291 		 * a trap or interrupt.
1292 		 * Hence, we cannot assert that the trapframe is not
1293 		 * a syscall frame, but we can assert that it's at
1294 		 * least an expected syscall.
1295 		 */
1296 		if (tf->tf_flags & FRAME_SYSCALL) {
1297 			KASSERT(tf->tf_scratch.gr15 == SYS_sigreturn, ("foo"));
1298 			tf->tf_flags &= ~FRAME_SYSCALL;
1299 		}
1300 		tf->tf_scratch = mc->mc_scratch;
1301 		tf->tf_scratch_fp = mc->mc_scratch_fp;
1302 		if (mc->mc_flags & _MC_FLAGS_HIGHFP_VALID)
1303 			td->td_pcb->pcb_high_fp = mc->mc_high_fp;
1304 	} else {
1305 		KASSERT((tf->tf_flags & FRAME_SYSCALL) != 0, ("foo"));
1306 		if ((mc->mc_flags & _MC_FLAGS_SYSCALL_CONTEXT) == 0) {
1307 			s.cfm = tf->tf_special.cfm;
1308 			s.iip = tf->tf_special.iip;
1309 			tf->tf_scratch.gr15 = 0;	/* Clear syscall nr. */
1310 		} else
1311 			tf->tf_scratch = mc->mc_scratch;
1312 	}
1313 	tf->tf_special = s;
1314 	restore_callee_saved(&mc->mc_preserved);
1315 	restore_callee_saved_fp(&mc->mc_preserved_fp);
1316 
1317 	return (0);
1318 }
1319 
1320 /*
1321  * Clear registers on exec.
1322  */
1323 void
exec_setregs(struct thread * td,struct image_params * imgp,u_long stack)1324 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
1325 {
1326 	struct trapframe *tf;
1327 	uint64_t *ksttop, *kst;
1328 
1329 	tf = td->td_frame;
1330 	ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty +
1331 	    (tf->tf_special.bspstore & 0x1ffUL));
1332 
1333 	/*
1334 	 * We can ignore up to 8KB of dirty registers by masking off the
1335 	 * lower 13 bits in exception_restore() or epc_syscall(). This
1336 	 * should be enough for a couple of years, but if there are more
1337 	 * than 8KB of dirty registers, we lose track of the bottom of
1338 	 * the kernel stack. The solution is to copy the active part of
1339 	 * the kernel stack down 1 page (or 2, but not more than that)
1340 	 * so that we always have less than 8KB of dirty registers.
1341 	 */
1342 	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1343 	    ("Whoa there! We have more than 8KB of dirty registers!"));
1344 
1345 	bzero(&tf->tf_special, sizeof(tf->tf_special));
1346 	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {	/* break syscalls. */
1347 		bzero(&tf->tf_scratch, sizeof(tf->tf_scratch));
1348 		bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp));
1349 		tf->tf_special.cfm = (1UL<<63) | (3UL<<7) | 3UL;
1350 		tf->tf_special.bspstore = IA64_BACKINGSTORE;
1351 		/*
1352 		 * Copy the arguments onto the kernel register stack so that
1353 		 * they get loaded by the loadrs instruction. Skip over the
1354 		 * NaT collection points.
1355 		 */
1356 		kst = ksttop - 1;
1357 		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1358 			*kst-- = 0;
1359 		*kst-- = 0;
1360 		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1361 			*kst-- = 0;
1362 		*kst-- = imgp->ps_strings;
1363 		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1364 			*kst-- = 0;
1365 		*kst = stack;
1366 		tf->tf_special.ndirty = (ksttop - kst) << 3;
1367 	} else {				/* epc syscalls (default). */
1368 		tf->tf_special.cfm = (3UL<<62) | (3UL<<7) | 3UL;
1369 		tf->tf_special.bspstore = IA64_BACKINGSTORE + 24;
1370 		/*
1371 		 * Write values for out0, out1 and out2 to the user's backing
1372 		 * store and arrange for them to be restored into the user's
1373 		 * initial register frame.
1374 		 * Assumes that (bspstore & 0x1f8) < 0x1e0.
1375 		 */
1376 		suword((caddr_t)tf->tf_special.bspstore - 24, stack);
1377 		suword((caddr_t)tf->tf_special.bspstore - 16, imgp->ps_strings);
1378 		suword((caddr_t)tf->tf_special.bspstore -  8, 0);
1379 	}
1380 
1381 	tf->tf_special.iip = imgp->entry_addr;
1382 	tf->tf_special.sp = (stack & ~15) - 16;
1383 	tf->tf_special.rsc = 0xf;
1384 	tf->tf_special.fpsr = IA64_FPSR_DEFAULT;
1385 	tf->tf_special.psr = IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT |
1386 	    IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | IA64_PSR_BN |
1387 	    IA64_PSR_CPL_USER;
1388 }
1389 
1390 int
ptrace_set_pc(struct thread * td,unsigned long addr)1391 ptrace_set_pc(struct thread *td, unsigned long addr)
1392 {
1393 	uint64_t slot;
1394 
1395 	switch (addr & 0xFUL) {
1396 	case 0:
1397 		slot = IA64_PSR_RI_0;
1398 		break;
1399 	case 1:
1400 		/* XXX we need to deal with MLX bundles here */
1401 		slot = IA64_PSR_RI_1;
1402 		break;
1403 	case 2:
1404 		slot = IA64_PSR_RI_2;
1405 		break;
1406 	default:
1407 		return (EINVAL);
1408 	}
1409 
1410 	td->td_frame->tf_special.iip = addr & ~0x0FULL;
1411 	td->td_frame->tf_special.psr =
1412 	    (td->td_frame->tf_special.psr & ~IA64_PSR_RI) | slot;
1413 	return (0);
1414 }
1415 
1416 int
ptrace_single_step(struct thread * td)1417 ptrace_single_step(struct thread *td)
1418 {
1419 	struct trapframe *tf;
1420 
1421 	/*
1422 	 * There's no way to set single stepping when we're leaving the
1423 	 * kernel through the EPC syscall path. The way we solve this is
1424 	 * by enabling the lower-privilege trap so that we re-enter the
1425 	 * kernel as soon as the privilege level changes. See trap.c for
1426 	 * how we proceed from there.
1427 	 */
1428 	tf = td->td_frame;
1429 	if (tf->tf_flags & FRAME_SYSCALL)
1430 		tf->tf_special.psr |= IA64_PSR_LP;
1431 	else
1432 		tf->tf_special.psr |= IA64_PSR_SS;
1433 	return (0);
1434 }
1435 
1436 int
ptrace_clear_single_step(struct thread * td)1437 ptrace_clear_single_step(struct thread *td)
1438 {
1439 	struct trapframe *tf;
1440 
1441 	/*
1442 	 * Clear any and all status bits we may use to implement single
1443 	 * stepping.
1444 	 */
1445 	tf = td->td_frame;
1446 	tf->tf_special.psr &= ~IA64_PSR_SS;
1447 	tf->tf_special.psr &= ~IA64_PSR_LP;
1448 	tf->tf_special.psr &= ~IA64_PSR_TB;
1449 	return (0);
1450 }
1451 
1452 int
fill_regs(struct thread * td,struct reg * regs)1453 fill_regs(struct thread *td, struct reg *regs)
1454 {
1455 	struct trapframe *tf;
1456 
1457 	tf = td->td_frame;
1458 	regs->r_special = tf->tf_special;
1459 	regs->r_scratch = tf->tf_scratch;
1460 	save_callee_saved(&regs->r_preserved);
1461 	return (0);
1462 }
1463 
1464 int
set_regs(struct thread * td,struct reg * regs)1465 set_regs(struct thread *td, struct reg *regs)
1466 {
1467 	struct trapframe *tf;
1468 	int error;
1469 
1470 	tf = td->td_frame;
1471 	error = ia64_flush_dirty(td, &tf->tf_special);
1472 	if (!error) {
1473 		tf->tf_special = regs->r_special;
1474 		tf->tf_special.bspstore += tf->tf_special.ndirty;
1475 		tf->tf_special.ndirty = 0;
1476 		tf->tf_scratch = regs->r_scratch;
1477 		restore_callee_saved(&regs->r_preserved);
1478 	}
1479 	return (error);
1480 }
1481 
1482 int
fill_dbregs(struct thread * td,struct dbreg * dbregs)1483 fill_dbregs(struct thread *td, struct dbreg *dbregs)
1484 {
1485 
1486 	return (ENOSYS);
1487 }
1488 
1489 int
set_dbregs(struct thread * td,struct dbreg * dbregs)1490 set_dbregs(struct thread *td, struct dbreg *dbregs)
1491 {
1492 
1493 	return (ENOSYS);
1494 }
1495 
1496 int
fill_fpregs(struct thread * td,struct fpreg * fpregs)1497 fill_fpregs(struct thread *td, struct fpreg *fpregs)
1498 {
1499 	struct trapframe *frame = td->td_frame;
1500 	struct pcb *pcb = td->td_pcb;
1501 
1502 	/* Save the high FP registers. */
1503 	ia64_highfp_save(td);
1504 
1505 	fpregs->fpr_scratch = frame->tf_scratch_fp;
1506 	save_callee_saved_fp(&fpregs->fpr_preserved);
1507 	fpregs->fpr_high = pcb->pcb_high_fp;
1508 	return (0);
1509 }
1510 
1511 int
set_fpregs(struct thread * td,struct fpreg * fpregs)1512 set_fpregs(struct thread *td, struct fpreg *fpregs)
1513 {
1514 	struct trapframe *frame = td->td_frame;
1515 	struct pcb *pcb = td->td_pcb;
1516 
1517 	/* Throw away the high FP registers (should be redundant). */
1518 	ia64_highfp_drop(td);
1519 
1520 	frame->tf_scratch_fp = fpregs->fpr_scratch;
1521 	restore_callee_saved_fp(&fpregs->fpr_preserved);
1522 	pcb->pcb_high_fp = fpregs->fpr_high;
1523 	return (0);
1524 }
1525 
1526 void
ia64_sync_icache(vm_offset_t va,vm_offset_t sz)1527 ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
1528 {
1529 	vm_offset_t lim;
1530 
1531 	if (!ia64_sync_icache_needed)
1532 		return;
1533 
1534 	lim = va + sz;
1535 	while (va < lim) {
1536 		ia64_fc_i(va);
1537 		va += 32;	/* XXX */
1538 	}
1539 
1540 	ia64_sync_i();
1541 	ia64_srlz_i();
1542 }
1543