1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2010 The FreeBSD Foundation
22  *
23  * $FreeBSD$
24  */
25 
26 /*
27  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #ifdef illumos
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 #endif
34 
35 #include <sys/fasttrap_isa.h>
36 #include <sys/fasttrap_impl.h>
37 #include <sys/dtrace.h>
38 #include <sys/dtrace_impl.h>
39 #include <sys/cmn_err.h>
40 #ifdef illumos
41 #include <sys/regset.h>
42 #include <sys/privregs.h>
43 #include <sys/segments.h>
44 #include <sys/x86_archext.h>
45 #else
46 #include <cddl/dev/dtrace/dtrace_cddl.h>
47 #include <sys/types.h>
48 #include <sys/proc.h>
49 #include <sys/rmlock.h>
50 #include <sys/dtrace_bsd.h>
51 #include <cddl/dev/dtrace/x86/regset.h>
52 #include <machine/segments.h>
53 #include <machine/reg.h>
54 #include <machine/pcb.h>
55 #endif
56 #include <sys/sysmacros.h>
57 #ifdef illumos
58 #include <sys/trap.h>
59 #include <sys/archsystm.h>
60 #else
61 #include <sys/ptrace.h>
62 
63 static int
proc_ops(int op,proc_t * p,void * kaddr,off_t uaddr,size_t len)64 proc_ops(int op, proc_t *p, void *kaddr, off_t uaddr, size_t len)
65 {
66 	struct iovec iov;
67 	struct uio uio;
68 
69 	iov.iov_base = kaddr;
70 	iov.iov_len = len;
71 	uio.uio_offset = uaddr;
72 	uio.uio_iov = &iov;
73 	uio.uio_resid = len;
74 	uio.uio_iovcnt = 1;
75 	uio.uio_segflg = UIO_SYSSPACE;
76 	uio.uio_td = curthread;
77 	uio.uio_rw = op;
78 	PHOLD(p);
79 	if (proc_rwmem(p, &uio) != 0) {
80 		PRELE(p);
81 		return (-1);
82 	}
83 	PRELE(p);
84 
85 	return (0);
86 }
87 
88 static int
uread(proc_t * p,void * kaddr,size_t len,uintptr_t uaddr)89 uread(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr)
90 {
91 
92 	return (proc_ops(UIO_READ, p, kaddr, uaddr, len));
93 }
94 
95 static int
uwrite(proc_t * p,void * kaddr,size_t len,uintptr_t uaddr)96 uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr)
97 {
98 
99 	return (proc_ops(UIO_WRITE, p, kaddr, uaddr, len));
100 }
101 #endif /* illumos */
102 #ifdef __i386__
103 #define	r_rax	r_eax
104 #define	r_rbx	r_ebx
105 #define	r_rip	r_eip
106 #define	r_rflags r_eflags
107 #define	r_rsp	r_esp
108 #define	r_rbp	r_ebp
109 #endif
110 
111 /*
112  * Lossless User-Land Tracing on x86
113  * ---------------------------------
114  *
115  * The execution of most instructions is not dependent on the address; for
116  * these instructions it is sufficient to copy them into the user process's
117  * address space and execute them. To effectively single-step an instruction
118  * in user-land, we copy out the following sequence of instructions to scratch
119  * space in the user thread's ulwp_t structure.
120  *
121  * We then set the program counter (%eip or %rip) to point to this scratch
122  * space. Once execution resumes, the original instruction is executed and
123  * then control flow is redirected to what was originally the subsequent
124  * instruction. If the kernel attemps to deliver a signal while single-
125  * stepping, the signal is deferred and the program counter is moved into the
126  * second sequence of instructions. The second sequence ends in a trap into
127  * the kernel where the deferred signal is then properly handled and delivered.
128  *
129  * For instructions whose execute is position dependent, we perform simple
130  * emulation. These instructions are limited to control transfer
131  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
132  * of %rip-relative addressing that means that almost any instruction can be
133  * position dependent. For all the details on how we emulate generic
134  * instructions included %rip-relative instructions, see the code in
135  * fasttrap_pid_probe() below where we handle instructions of type
136  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
137  */
138 
139 #define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
140 #define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
141 #define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
142 #define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
143 
144 #define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
145 #define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
146 #define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
147 
148 #define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
149 #define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
150 #define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
151 #define	FASTTRAP_REX_B(rex)		((rex) & 1)
152 #define	FASTTRAP_REX(w, r, x, b)	\
153 	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
154 
155 /*
156  * Single-byte op-codes.
157  */
158 #define	FASTTRAP_PUSHL_EBP	0x55
159 
160 #define	FASTTRAP_JO		0x70
161 #define	FASTTRAP_JNO		0x71
162 #define	FASTTRAP_JB		0x72
163 #define	FASTTRAP_JAE		0x73
164 #define	FASTTRAP_JE		0x74
165 #define	FASTTRAP_JNE		0x75
166 #define	FASTTRAP_JBE		0x76
167 #define	FASTTRAP_JA		0x77
168 #define	FASTTRAP_JS		0x78
169 #define	FASTTRAP_JNS		0x79
170 #define	FASTTRAP_JP		0x7a
171 #define	FASTTRAP_JNP		0x7b
172 #define	FASTTRAP_JL		0x7c
173 #define	FASTTRAP_JGE		0x7d
174 #define	FASTTRAP_JLE		0x7e
175 #define	FASTTRAP_JG		0x7f
176 
177 #define	FASTTRAP_NOP		0x90
178 
179 #define	FASTTRAP_MOV_EAX	0xb8
180 #define	FASTTRAP_MOV_ECX	0xb9
181 
182 #define	FASTTRAP_RET16		0xc2
183 #define	FASTTRAP_RET		0xc3
184 
185 #define	FASTTRAP_LOOPNZ		0xe0
186 #define	FASTTRAP_LOOPZ		0xe1
187 #define	FASTTRAP_LOOP		0xe2
188 #define	FASTTRAP_JCXZ		0xe3
189 
190 #define	FASTTRAP_CALL		0xe8
191 #define	FASTTRAP_JMP32		0xe9
192 #define	FASTTRAP_JMP8		0xeb
193 
194 #define	FASTTRAP_INT3		0xcc
195 #define	FASTTRAP_INT		0xcd
196 
197 #define	FASTTRAP_2_BYTE_OP	0x0f
198 #define	FASTTRAP_GROUP5_OP	0xff
199 
200 /*
201  * Two-byte op-codes (second byte only).
202  */
203 #define	FASTTRAP_0F_JO		0x80
204 #define	FASTTRAP_0F_JNO		0x81
205 #define	FASTTRAP_0F_JB		0x82
206 #define	FASTTRAP_0F_JAE		0x83
207 #define	FASTTRAP_0F_JE		0x84
208 #define	FASTTRAP_0F_JNE		0x85
209 #define	FASTTRAP_0F_JBE		0x86
210 #define	FASTTRAP_0F_JA		0x87
211 #define	FASTTRAP_0F_JS		0x88
212 #define	FASTTRAP_0F_JNS		0x89
213 #define	FASTTRAP_0F_JP		0x8a
214 #define	FASTTRAP_0F_JNP		0x8b
215 #define	FASTTRAP_0F_JL		0x8c
216 #define	FASTTRAP_0F_JGE		0x8d
217 #define	FASTTRAP_0F_JLE		0x8e
218 #define	FASTTRAP_0F_JG		0x8f
219 
220 #define	FASTTRAP_EFLAGS_OF	0x800
221 #define	FASTTRAP_EFLAGS_DF	0x400
222 #define	FASTTRAP_EFLAGS_SF	0x080
223 #define	FASTTRAP_EFLAGS_ZF	0x040
224 #define	FASTTRAP_EFLAGS_AF	0x010
225 #define	FASTTRAP_EFLAGS_PF	0x004
226 #define	FASTTRAP_EFLAGS_CF	0x001
227 
228 /*
229  * Instruction prefixes.
230  */
231 #define	FASTTRAP_PREFIX_OPERAND	0x66
232 #define	FASTTRAP_PREFIX_ADDRESS	0x67
233 #define	FASTTRAP_PREFIX_CS	0x2E
234 #define	FASTTRAP_PREFIX_DS	0x3E
235 #define	FASTTRAP_PREFIX_ES	0x26
236 #define	FASTTRAP_PREFIX_FS	0x64
237 #define	FASTTRAP_PREFIX_GS	0x65
238 #define	FASTTRAP_PREFIX_SS	0x36
239 #define	FASTTRAP_PREFIX_LOCK	0xF0
240 #define	FASTTRAP_PREFIX_REP	0xF3
241 #define	FASTTRAP_PREFIX_REPNE	0xF2
242 
243 #define	FASTTRAP_NOREG	0xff
244 
245 /*
246  * Map between instruction register encodings and the kernel constants which
247  * correspond to indicies into struct regs.
248  */
249 #ifdef __amd64
250 static const uint8_t regmap[16] = {
251 	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
252 	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
253 };
254 #else
255 static const uint8_t regmap[8] = {
256 	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
257 };
258 #endif
259 
260 static ulong_t fasttrap_getreg(struct reg *, uint_t);
261 
262 static uint64_t
fasttrap_anarg(struct reg * rp,int function_entry,int argno)263 fasttrap_anarg(struct reg *rp, int function_entry, int argno)
264 {
265 	uint64_t value = 0;
266 	int shift = function_entry ? 1 : 0;
267 
268 #ifdef __amd64
269 	if (curproc->p_model == DATAMODEL_LP64) {
270 		uintptr_t *stack;
271 
272 		/*
273 		 * In 64-bit mode, the first six arguments are stored in
274 		 * registers.
275 		 */
276 		if (argno < 6)
277 			switch (argno) {
278 			case 0:
279 				return (rp->r_rdi);
280 			case 1:
281 				return (rp->r_rsi);
282 			case 2:
283 				return (rp->r_rdx);
284 			case 3:
285 				return (rp->r_rcx);
286 			case 4:
287 				return (rp->r_r8);
288 			case 5:
289 				return (rp->r_r9);
290 			}
291 
292 		stack = (uintptr_t *)rp->r_rsp;
293 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
294 		value = dtrace_fulword(&stack[argno - 6 + shift]);
295 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
296 	} else {
297 #endif
298 #ifdef __i386
299 		uint32_t *stack = (uint32_t *)rp->r_esp;
300 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
301 		value = dtrace_fuword32(&stack[argno + shift]);
302 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
303 #endif
304 #ifdef __amd64
305 	}
306 #endif
307 
308 	return (value);
309 }
310 
311 /*ARGSUSED*/
312 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,uintptr_t pc,fasttrap_probe_type_t type)313 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
314     fasttrap_probe_type_t type)
315 {
316 	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
317 	size_t len = FASTTRAP_MAX_INSTR_SIZE;
318 	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
319 	uint_t start = 0;
320 	int rmindex, size;
321 	uint8_t seg, rex = 0;
322 
323 	/*
324 	 * Read the instruction at the given address out of the process's
325 	 * address space. We don't have to worry about a debugger
326 	 * changing this instruction before we overwrite it with our trap
327 	 * instruction since P_PR_LOCK is set. Since instructions can span
328 	 * pages, we potentially read the instruction in two parts. If the
329 	 * second part fails, we just zero out that part of the instruction.
330 	 */
331 	if (uread(p, &instr[0], first, pc) != 0)
332 		return (-1);
333 	if (len > first &&
334 	    uread(p, &instr[first], len - first, pc + first) != 0) {
335 		bzero(&instr[first], len - first);
336 		len = first;
337 	}
338 
339 	/*
340 	 * If the disassembly fails, then we have a malformed instruction.
341 	 */
342 	if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
343 		return (-1);
344 
345 	/*
346 	 * Make sure the disassembler isn't completely broken.
347 	 */
348 	ASSERT(-1 <= rmindex && rmindex < size);
349 
350 	/*
351 	 * If the computed size is greater than the number of bytes read,
352 	 * then it was a malformed instruction possibly because it fell on a
353 	 * page boundary and the subsequent page was missing or because of
354 	 * some malicious user.
355 	 */
356 	if (size > len)
357 		return (-1);
358 
359 	tp->ftt_size = (uint8_t)size;
360 	tp->ftt_segment = FASTTRAP_SEG_NONE;
361 
362 	/*
363 	 * Find the start of the instruction's opcode by processing any
364 	 * legacy prefixes.
365 	 */
366 	for (;;) {
367 		seg = 0;
368 		switch (instr[start]) {
369 		case FASTTRAP_PREFIX_SS:
370 			seg++;
371 			/*FALLTHRU*/
372 		case FASTTRAP_PREFIX_GS:
373 			seg++;
374 			/*FALLTHRU*/
375 		case FASTTRAP_PREFIX_FS:
376 			seg++;
377 			/*FALLTHRU*/
378 		case FASTTRAP_PREFIX_ES:
379 			seg++;
380 			/*FALLTHRU*/
381 		case FASTTRAP_PREFIX_DS:
382 			seg++;
383 			/*FALLTHRU*/
384 		case FASTTRAP_PREFIX_CS:
385 			seg++;
386 			/*FALLTHRU*/
387 		case FASTTRAP_PREFIX_OPERAND:
388 		case FASTTRAP_PREFIX_ADDRESS:
389 		case FASTTRAP_PREFIX_LOCK:
390 		case FASTTRAP_PREFIX_REP:
391 		case FASTTRAP_PREFIX_REPNE:
392 			if (seg != 0) {
393 				/*
394 				 * It's illegal for an instruction to specify
395 				 * two segment prefixes -- give up on this
396 				 * illegal instruction.
397 				 */
398 				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
399 					return (-1);
400 
401 				tp->ftt_segment = seg;
402 			}
403 			start++;
404 			continue;
405 		}
406 		break;
407 	}
408 
409 #ifdef __amd64
410 	/*
411 	 * Identify the REX prefix on 64-bit processes.
412 	 */
413 	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
414 		rex = instr[start++];
415 #endif
416 
417 	/*
418 	 * Now that we're pretty sure that the instruction is okay, copy the
419 	 * valid part to the tracepoint.
420 	 */
421 	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
422 
423 	tp->ftt_type = FASTTRAP_T_COMMON;
424 	if (instr[start] == FASTTRAP_2_BYTE_OP) {
425 		switch (instr[start + 1]) {
426 		case FASTTRAP_0F_JO:
427 		case FASTTRAP_0F_JNO:
428 		case FASTTRAP_0F_JB:
429 		case FASTTRAP_0F_JAE:
430 		case FASTTRAP_0F_JE:
431 		case FASTTRAP_0F_JNE:
432 		case FASTTRAP_0F_JBE:
433 		case FASTTRAP_0F_JA:
434 		case FASTTRAP_0F_JS:
435 		case FASTTRAP_0F_JNS:
436 		case FASTTRAP_0F_JP:
437 		case FASTTRAP_0F_JNP:
438 		case FASTTRAP_0F_JL:
439 		case FASTTRAP_0F_JGE:
440 		case FASTTRAP_0F_JLE:
441 		case FASTTRAP_0F_JG:
442 			tp->ftt_type = FASTTRAP_T_JCC;
443 			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
444 			tp->ftt_dest = pc + tp->ftt_size +
445 			    /* LINTED - alignment */
446 			    *(int32_t *)&instr[start + 2];
447 			break;
448 		}
449 	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
450 		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
451 		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
452 		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
453 
454 		if (reg == 2 || reg == 4) {
455 			uint_t i, sz;
456 
457 			if (reg == 2)
458 				tp->ftt_type = FASTTRAP_T_CALL;
459 			else
460 				tp->ftt_type = FASTTRAP_T_JMP;
461 
462 			if (mod == 3)
463 				tp->ftt_code = 2;
464 			else
465 				tp->ftt_code = 1;
466 
467 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
468 
469 			/*
470 			 * See AMD x86-64 Architecture Programmer's Manual
471 			 * Volume 3, Section 1.2.7, Table 1-12, and
472 			 * Appendix A.3.1, Table A-15.
473 			 */
474 			if (mod != 3 && rm == 4) {
475 				uint8_t sib = instr[start + 2];
476 				uint_t index = FASTTRAP_SIB_INDEX(sib);
477 				uint_t base = FASTTRAP_SIB_BASE(sib);
478 
479 				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
480 
481 				tp->ftt_index = (index == 4) ?
482 				    FASTTRAP_NOREG :
483 				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
484 				tp->ftt_base = (mod == 0 && base == 5) ?
485 				    FASTTRAP_NOREG :
486 				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
487 
488 				i = 3;
489 				sz = mod == 1 ? 1 : 4;
490 			} else {
491 				/*
492 				 * In 64-bit mode, mod == 0 and r/m == 5
493 				 * denotes %rip-relative addressing; in 32-bit
494 				 * mode, the base register isn't used. In both
495 				 * modes, there is a 32-bit operand.
496 				 */
497 				if (mod == 0 && rm == 5) {
498 #ifdef __amd64
499 					if (p->p_model == DATAMODEL_LP64)
500 						tp->ftt_base = REG_RIP;
501 					else
502 #endif
503 						tp->ftt_base = FASTTRAP_NOREG;
504 					sz = 4;
505 				} else  {
506 					uint8_t base = rm |
507 					    (FASTTRAP_REX_B(rex) << 3);
508 
509 					tp->ftt_base = regmap[base];
510 					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
511 				}
512 				tp->ftt_index = FASTTRAP_NOREG;
513 				i = 2;
514 			}
515 
516 			if (sz == 1) {
517 				tp->ftt_dest = *(int8_t *)&instr[start + i];
518 			} else if (sz == 4) {
519 				/* LINTED - alignment */
520 				tp->ftt_dest = *(int32_t *)&instr[start + i];
521 			} else {
522 				tp->ftt_dest = 0;
523 			}
524 		}
525 	} else {
526 		switch (instr[start]) {
527 		case FASTTRAP_RET:
528 			tp->ftt_type = FASTTRAP_T_RET;
529 			break;
530 
531 		case FASTTRAP_RET16:
532 			tp->ftt_type = FASTTRAP_T_RET16;
533 			/* LINTED - alignment */
534 			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
535 			break;
536 
537 		case FASTTRAP_JO:
538 		case FASTTRAP_JNO:
539 		case FASTTRAP_JB:
540 		case FASTTRAP_JAE:
541 		case FASTTRAP_JE:
542 		case FASTTRAP_JNE:
543 		case FASTTRAP_JBE:
544 		case FASTTRAP_JA:
545 		case FASTTRAP_JS:
546 		case FASTTRAP_JNS:
547 		case FASTTRAP_JP:
548 		case FASTTRAP_JNP:
549 		case FASTTRAP_JL:
550 		case FASTTRAP_JGE:
551 		case FASTTRAP_JLE:
552 		case FASTTRAP_JG:
553 			tp->ftt_type = FASTTRAP_T_JCC;
554 			tp->ftt_code = instr[start];
555 			tp->ftt_dest = pc + tp->ftt_size +
556 			    (int8_t)instr[start + 1];
557 			break;
558 
559 		case FASTTRAP_LOOPNZ:
560 		case FASTTRAP_LOOPZ:
561 		case FASTTRAP_LOOP:
562 			tp->ftt_type = FASTTRAP_T_LOOP;
563 			tp->ftt_code = instr[start];
564 			tp->ftt_dest = pc + tp->ftt_size +
565 			    (int8_t)instr[start + 1];
566 			break;
567 
568 		case FASTTRAP_JCXZ:
569 			tp->ftt_type = FASTTRAP_T_JCXZ;
570 			tp->ftt_dest = pc + tp->ftt_size +
571 			    (int8_t)instr[start + 1];
572 			break;
573 
574 		case FASTTRAP_CALL:
575 			tp->ftt_type = FASTTRAP_T_CALL;
576 			tp->ftt_dest = pc + tp->ftt_size +
577 			    /* LINTED - alignment */
578 			    *(int32_t *)&instr[start + 1];
579 			tp->ftt_code = 0;
580 			break;
581 
582 		case FASTTRAP_JMP32:
583 			tp->ftt_type = FASTTRAP_T_JMP;
584 			tp->ftt_dest = pc + tp->ftt_size +
585 			    /* LINTED - alignment */
586 			    *(int32_t *)&instr[start + 1];
587 			break;
588 		case FASTTRAP_JMP8:
589 			tp->ftt_type = FASTTRAP_T_JMP;
590 			tp->ftt_dest = pc + tp->ftt_size +
591 			    (int8_t)instr[start + 1];
592 			break;
593 
594 		case FASTTRAP_PUSHL_EBP:
595 			if (start == 0)
596 				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
597 			break;
598 
599 		case FASTTRAP_NOP:
600 #ifdef __amd64
601 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
602 
603 			/*
604 			 * On amd64 we have to be careful not to confuse a nop
605 			 * (actually xchgl %eax, %eax) with an instruction using
606 			 * the same opcode, but that does something different
607 			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
608 			 */
609 			if (FASTTRAP_REX_B(rex) == 0)
610 #endif
611 				tp->ftt_type = FASTTRAP_T_NOP;
612 			break;
613 
614 		case FASTTRAP_INT3:
615 			/*
616 			 * The pid provider shares the int3 trap with debugger
617 			 * breakpoints so we can't instrument them.
618 			 */
619 			ASSERT(instr[start] == FASTTRAP_INSTR);
620 			return (-1);
621 
622 		case FASTTRAP_INT:
623 			/*
624 			 * Interrupts seem like they could be traced with
625 			 * no negative implications, but it's possible that
626 			 * a thread could be redirected by the trap handling
627 			 * code which would eventually return to the
628 			 * instruction after the interrupt. If the interrupt
629 			 * were in our scratch space, the subsequent
630 			 * instruction might be overwritten before we return.
631 			 * Accordingly we refuse to instrument any interrupt.
632 			 */
633 			return (-1);
634 		}
635 	}
636 
637 #ifdef __amd64
638 	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
639 		/*
640 		 * If the process is 64-bit and the instruction type is still
641 		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
642 		 * execute it -- we need to watch for %rip-relative
643 		 * addressing mode. See the portion of fasttrap_pid_probe()
644 		 * below where we handle tracepoints with type
645 		 * FASTTRAP_T_COMMON for how we emulate instructions that
646 		 * employ %rip-relative addressing.
647 		 */
648 		if (rmindex != -1) {
649 			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
650 			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
651 			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
652 
653 			ASSERT(rmindex > start);
654 
655 			if (mod == 0 && rm == 5) {
656 				/*
657 				 * We need to be sure to avoid other
658 				 * registers used by this instruction. While
659 				 * the reg field may determine the op code
660 				 * rather than denoting a register, assuming
661 				 * that it denotes a register is always safe.
662 				 * We leave the REX field intact and use
663 				 * whatever value's there for simplicity.
664 				 */
665 				if (reg != 0) {
666 					tp->ftt_ripmode = FASTTRAP_RIP_1 |
667 					    (FASTTRAP_RIP_X *
668 					    FASTTRAP_REX_B(rex));
669 					rm = 0;
670 				} else {
671 					tp->ftt_ripmode = FASTTRAP_RIP_2 |
672 					    (FASTTRAP_RIP_X *
673 					    FASTTRAP_REX_B(rex));
674 					rm = 1;
675 				}
676 
677 				tp->ftt_modrm = tp->ftt_instr[rmindex];
678 				tp->ftt_instr[rmindex] =
679 				    FASTTRAP_MODRM(2, reg, rm);
680 			}
681 		}
682 	}
683 #endif
684 
685 	return (0);
686 }
687 
688 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)689 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
690 {
691 	fasttrap_instr_t instr = FASTTRAP_INSTR;
692 
693 	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
694 		return (-1);
695 
696 	return (0);
697 }
698 
699 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)700 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
701 {
702 	uint8_t instr;
703 
704 	/*
705 	 * Distinguish between read or write failures and a changed
706 	 * instruction.
707 	 */
708 	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
709 		return (0);
710 	if (instr != FASTTRAP_INSTR)
711 		return (0);
712 	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
713 		return (-1);
714 
715 	return (0);
716 }
717 
718 #ifdef __amd64
719 static uintptr_t
fasttrap_fulword_noerr(const void * uaddr)720 fasttrap_fulword_noerr(const void *uaddr)
721 {
722 	uintptr_t ret;
723 
724 	if ((ret = fasttrap_fulword(uaddr)) != -1)
725 		return (ret);
726 
727 	return (0);
728 }
729 #endif
730 
731 #ifdef __i386__
732 static uint32_t
fasttrap_fuword32_noerr(const void * uaddr)733 fasttrap_fuword32_noerr(const void *uaddr)
734 {
735 	uint32_t ret;
736 
737 	if ((ret = fasttrap_fuword32(uaddr)) != -1)
738 		return (ret);
739 
740 	return (0);
741 }
742 #endif
743 
744 static void
fasttrap_return_common(struct reg * rp,uintptr_t pc,pid_t pid,uintptr_t new_pc)745 fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid,
746     uintptr_t new_pc)
747 {
748 	fasttrap_tracepoint_t *tp;
749 	fasttrap_bucket_t *bucket;
750 	fasttrap_id_t *id;
751 #ifdef illumos
752 	kmutex_t *pid_mtx;
753 
754 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
755 	mutex_enter(pid_mtx);
756 #else
757 	struct rm_priotracker tracker;
758 
759 	rm_rlock(&fasttrap_tp_lock, &tracker);
760 #endif
761 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
762 
763 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
764 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
765 		    tp->ftt_proc->ftpc_acount != 0)
766 			break;
767 	}
768 
769 	/*
770 	 * Don't sweat it if we can't find the tracepoint again; unlike
771 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
772 	 * is not essential to the correct execution of the process.
773 	 */
774 	if (tp == NULL) {
775 #ifdef illumos
776 		mutex_exit(pid_mtx);
777 #else
778 		rm_runlock(&fasttrap_tp_lock, &tracker);
779 #endif
780 		return;
781 	}
782 
783 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
784 		/*
785 		 * If there's a branch that could act as a return site, we
786 		 * need to trace it, and check here if the program counter is
787 		 * external to the function.
788 		 */
789 		if (tp->ftt_type != FASTTRAP_T_RET &&
790 		    tp->ftt_type != FASTTRAP_T_RET16 &&
791 		    new_pc - id->fti_probe->ftp_faddr <
792 		    id->fti_probe->ftp_fsize)
793 			continue;
794 
795 		dtrace_probe(id->fti_probe->ftp_id,
796 		    pc - id->fti_probe->ftp_faddr,
797 		    rp->r_rax, rp->r_rbx, 0, 0);
798 	}
799 
800 #ifdef illumos
801 	mutex_exit(pid_mtx);
802 #else
803 	rm_runlock(&fasttrap_tp_lock, &tracker);
804 #endif
805 }
806 
807 static void
fasttrap_sigsegv(proc_t * p,kthread_t * t,uintptr_t addr)808 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
809 {
810 #ifdef illumos
811 	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
812 
813 	sqp->sq_info.si_signo = SIGSEGV;
814 	sqp->sq_info.si_code = SEGV_MAPERR;
815 	sqp->sq_info.si_addr = (caddr_t)addr;
816 
817 	mutex_enter(&p->p_lock);
818 	sigaddqa(p, t, sqp);
819 	mutex_exit(&p->p_lock);
820 
821 	if (t != NULL)
822 		aston(t);
823 #else
824 	ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP);
825 
826 	ksiginfo_init(ksi);
827 	ksi->ksi_signo = SIGSEGV;
828 	ksi->ksi_code = SEGV_MAPERR;
829 	ksi->ksi_addr = (caddr_t)addr;
830 	(void) tdksignal(t, SIGSEGV, ksi);
831 #endif
832 }
833 
834 #ifdef __amd64
835 static void
fasttrap_usdt_args64(fasttrap_probe_t * probe,struct reg * rp,int argc,uintptr_t * argv)836 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc,
837     uintptr_t *argv)
838 {
839 	int i, x, cap = MIN(argc, probe->ftp_nargs);
840 	uintptr_t *stack = (uintptr_t *)rp->r_rsp;
841 
842 	for (i = 0; i < cap; i++) {
843 		x = probe->ftp_argmap[i];
844 
845 		if (x < 6)
846 			argv[i] = (&rp->r_rdi)[x];
847 		else
848 			argv[i] = fasttrap_fulword_noerr(&stack[x]);
849 	}
850 
851 	for (; i < argc; i++) {
852 		argv[i] = 0;
853 	}
854 }
855 #endif
856 
857 #ifdef __i386__
858 static void
fasttrap_usdt_args32(fasttrap_probe_t * probe,struct reg * rp,int argc,uint32_t * argv)859 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc,
860     uint32_t *argv)
861 {
862 	int i, x, cap = MIN(argc, probe->ftp_nargs);
863 	uint32_t *stack = (uint32_t *)rp->r_rsp;
864 
865 	for (i = 0; i < cap; i++) {
866 		x = probe->ftp_argmap[i];
867 
868 		argv[i] = fasttrap_fuword32_noerr(&stack[x]);
869 	}
870 
871 	for (; i < argc; i++) {
872 		argv[i] = 0;
873 	}
874 }
875 #endif
876 
877 static int
fasttrap_do_seg(fasttrap_tracepoint_t * tp,struct reg * rp,uintptr_t * addr)878 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr)
879 {
880 	proc_t *p = curproc;
881 #ifdef __i386__
882 	struct segment_descriptor *desc;
883 #else
884 	struct user_segment_descriptor *desc;
885 #endif
886 	uint16_t sel = 0, ndx, type;
887 	uintptr_t limit;
888 
889 	switch (tp->ftt_segment) {
890 	case FASTTRAP_SEG_CS:
891 		sel = rp->r_cs;
892 		break;
893 	case FASTTRAP_SEG_DS:
894 		sel = rp->r_ds;
895 		break;
896 	case FASTTRAP_SEG_ES:
897 		sel = rp->r_es;
898 		break;
899 	case FASTTRAP_SEG_FS:
900 		sel = rp->r_fs;
901 		break;
902 	case FASTTRAP_SEG_GS:
903 		sel = rp->r_gs;
904 		break;
905 	case FASTTRAP_SEG_SS:
906 		sel = rp->r_ss;
907 		break;
908 	}
909 
910 	/*
911 	 * Make sure the given segment register specifies a user priority
912 	 * selector rather than a kernel selector.
913 	 */
914 	if (ISPL(sel) != SEL_UPL)
915 		return (-1);
916 
917 	ndx = IDXSEL(sel);
918 
919 	/*
920 	 * Check the bounds and grab the descriptor out of the specified
921 	 * descriptor table.
922 	 */
923 	if (ISLDT(sel)) {
924 #ifdef __i386__
925 		if (ndx > p->p_md.md_ldt->ldt_len)
926 			return (-1);
927 
928 		desc = (struct segment_descriptor *)
929 		    p->p_md.md_ldt[ndx].ldt_base;
930 #else
931 		if (ndx > max_ldt_segment)
932 			return (-1);
933 
934 		desc = (struct user_segment_descriptor *)
935 		    p->p_md.md_ldt[ndx].ldt_base;
936 #endif
937 
938 	} else {
939 		if (ndx >= NGDT)
940 			return (-1);
941 
942 #ifdef __i386__
943 		desc = &gdt[ndx].sd;
944 #else
945 		desc = &gdt[ndx];
946 #endif
947 	}
948 
949 	/*
950 	 * The descriptor must have user privilege level and it must be
951 	 * present in memory.
952 	 */
953 	if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1)
954 		return (-1);
955 
956 	type = desc->sd_type;
957 
958 	/*
959 	 * If the S bit in the type field is not set, this descriptor can
960 	 * only be used in system context.
961 	 */
962 	if ((type & 0x10) != 0x10)
963 		return (-1);
964 
965 	limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1);
966 
967 	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
968 		/*
969 		 * The code/data bit and readable bit must both be set.
970 		 */
971 		if ((type & 0xa) != 0xa)
972 			return (-1);
973 
974 		if (*addr > limit)
975 			return (-1);
976 	} else {
977 		/*
978 		 * The code/data bit must be clear.
979 		 */
980 		if ((type & 0x8) != 0)
981 			return (-1);
982 
983 		/*
984 		 * If the expand-down bit is clear, we just check the limit as
985 		 * it would naturally be applied. Otherwise, we need to check
986 		 * that the address is the range [limit + 1 .. 0xffff] or
987 		 * [limit + 1 ... 0xffffffff] depending on if the default
988 		 * operand size bit is set.
989 		 */
990 		if ((type & 0x4) == 0) {
991 			if (*addr > limit)
992 				return (-1);
993 		} else if (desc->sd_def32) {
994 			if (*addr < limit + 1 || 0xffff < *addr)
995 				return (-1);
996 		} else {
997 			if (*addr < limit + 1 || 0xffffffff < *addr)
998 				return (-1);
999 		}
1000 	}
1001 
1002 	*addr += USD_GETBASE(desc);
1003 
1004 	return (0);
1005 }
1006 
1007 int
fasttrap_pid_probe(struct reg * rp)1008 fasttrap_pid_probe(struct reg *rp)
1009 {
1010 	proc_t *p = curproc;
1011 #ifndef illumos
1012 	struct rm_priotracker tracker;
1013 	proc_t *pp;
1014 #endif
1015 	uintptr_t pc = rp->r_rip - 1;
1016 	uintptr_t new_pc = 0;
1017 	fasttrap_bucket_t *bucket;
1018 #ifdef illumos
1019 	kmutex_t *pid_mtx;
1020 #endif
1021 	fasttrap_tracepoint_t *tp, tp_local;
1022 	pid_t pid;
1023 	dtrace_icookie_t cookie;
1024 	uint_t is_enabled = 0;
1025 
1026 	/*
1027 	 * It's possible that a user (in a veritable orgy of bad planning)
1028 	 * could redirect this thread's flow of control before it reached the
1029 	 * return probe fasttrap. In this case we need to kill the process
1030 	 * since it's in a unrecoverable state.
1031 	 */
1032 	if (curthread->t_dtrace_step) {
1033 		ASSERT(curthread->t_dtrace_on);
1034 		fasttrap_sigtrap(p, curthread, pc);
1035 		return (0);
1036 	}
1037 
1038 	/*
1039 	 * Clear all user tracing flags.
1040 	 */
1041 	curthread->t_dtrace_ft = 0;
1042 	curthread->t_dtrace_pc = 0;
1043 	curthread->t_dtrace_npc = 0;
1044 	curthread->t_dtrace_scrpc = 0;
1045 	curthread->t_dtrace_astpc = 0;
1046 #ifdef __amd64
1047 	curthread->t_dtrace_regv = 0;
1048 #endif
1049 
1050 	/*
1051 	 * Treat a child created by a call to vfork(2) as if it were its
1052 	 * parent. We know that there's only one thread of control in such a
1053 	 * process: this one.
1054 	 */
1055 #ifdef illumos
1056 	while (p->p_flag & SVFORK) {
1057 		p = p->p_parent;
1058 	}
1059 
1060 	pid = p->p_pid;
1061 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
1062 	mutex_enter(pid_mtx);
1063 #else
1064 	pp = p;
1065 	sx_slock(&proctree_lock);
1066 	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
1067 		pp = pp->p_pptr;
1068 	pid = pp->p_pid;
1069 	sx_sunlock(&proctree_lock);
1070 	pp = NULL;
1071 
1072 	rm_rlock(&fasttrap_tp_lock, &tracker);
1073 #endif
1074 
1075 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
1076 
1077 	/*
1078 	 * Lookup the tracepoint that the process just hit.
1079 	 */
1080 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
1081 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
1082 		    tp->ftt_proc->ftpc_acount != 0)
1083 			break;
1084 	}
1085 
1086 	/*
1087 	 * If we couldn't find a matching tracepoint, either a tracepoint has
1088 	 * been inserted without using the pid<pid> ioctl interface (see
1089 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1090 	 */
1091 	if (tp == NULL) {
1092 #ifdef illumos
1093 		mutex_exit(pid_mtx);
1094 #else
1095 		rm_runlock(&fasttrap_tp_lock, &tracker);
1096 #endif
1097 		return (-1);
1098 	}
1099 
1100 	/*
1101 	 * Set the program counter to the address of the traced instruction
1102 	 * so that it looks right in ustack() output.
1103 	 */
1104 	rp->r_rip = pc;
1105 
1106 	if (tp->ftt_ids != NULL) {
1107 		fasttrap_id_t *id;
1108 
1109 #ifdef __amd64
1110 		if (p->p_model == DATAMODEL_LP64) {
1111 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1112 				fasttrap_probe_t *probe = id->fti_probe;
1113 
1114 				if (id->fti_ptype == DTFTP_ENTRY) {
1115 					/*
1116 					 * We note that this was an entry
1117 					 * probe to help ustack() find the
1118 					 * first caller.
1119 					 */
1120 					cookie = dtrace_interrupt_disable();
1121 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1122 					dtrace_probe(probe->ftp_id, rp->r_rdi,
1123 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
1124 					    rp->r_r8);
1125 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1126 					dtrace_interrupt_enable(cookie);
1127 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1128 					/*
1129 					 * Note that in this case, we don't
1130 					 * call dtrace_probe() since it's only
1131 					 * an artificial probe meant to change
1132 					 * the flow of control so that it
1133 					 * encounters the true probe.
1134 					 */
1135 					is_enabled = 1;
1136 				} else if (probe->ftp_argmap == NULL) {
1137 					dtrace_probe(probe->ftp_id, rp->r_rdi,
1138 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
1139 					    rp->r_r8);
1140 				} else {
1141 					uintptr_t t[5];
1142 
1143 					fasttrap_usdt_args64(probe, rp,
1144 					    sizeof (t) / sizeof (t[0]), t);
1145 
1146 					dtrace_probe(probe->ftp_id, t[0], t[1],
1147 					    t[2], t[3], t[4]);
1148 				}
1149 			}
1150 		} else {
1151 #else /* __amd64 */
1152 			uintptr_t s0, s1, s2, s3, s4, s5;
1153 			uint32_t *stack = (uint32_t *)rp->r_esp;
1154 
1155 			/*
1156 			 * In 32-bit mode, all arguments are passed on the
1157 			 * stack. If this is a function entry probe, we need
1158 			 * to skip the first entry on the stack as it
1159 			 * represents the return address rather than a
1160 			 * parameter to the function.
1161 			 */
1162 			s0 = fasttrap_fuword32_noerr(&stack[0]);
1163 			s1 = fasttrap_fuword32_noerr(&stack[1]);
1164 			s2 = fasttrap_fuword32_noerr(&stack[2]);
1165 			s3 = fasttrap_fuword32_noerr(&stack[3]);
1166 			s4 = fasttrap_fuword32_noerr(&stack[4]);
1167 			s5 = fasttrap_fuword32_noerr(&stack[5]);
1168 
1169 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1170 				fasttrap_probe_t *probe = id->fti_probe;
1171 
1172 				if (id->fti_ptype == DTFTP_ENTRY) {
1173 					/*
1174 					 * We note that this was an entry
1175 					 * probe to help ustack() find the
1176 					 * first caller.
1177 					 */
1178 					cookie = dtrace_interrupt_disable();
1179 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1180 					dtrace_probe(probe->ftp_id, s1, s2,
1181 					    s3, s4, s5);
1182 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1183 					dtrace_interrupt_enable(cookie);
1184 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1185 					/*
1186 					 * Note that in this case, we don't
1187 					 * call dtrace_probe() since it's only
1188 					 * an artificial probe meant to change
1189 					 * the flow of control so that it
1190 					 * encounters the true probe.
1191 					 */
1192 					is_enabled = 1;
1193 				} else if (probe->ftp_argmap == NULL) {
1194 					dtrace_probe(probe->ftp_id, s0, s1,
1195 					    s2, s3, s4);
1196 				} else {
1197 					uint32_t t[5];
1198 
1199 					fasttrap_usdt_args32(probe, rp,
1200 					    sizeof (t) / sizeof (t[0]), t);
1201 
1202 					dtrace_probe(probe->ftp_id, t[0], t[1],
1203 					    t[2], t[3], t[4]);
1204 				}
1205 			}
1206 #endif /* __amd64 */
1207 #ifdef __amd64
1208 		}
1209 #endif
1210 	}
1211 
1212 	/*
1213 	 * We're about to do a bunch of work so we cache a local copy of
1214 	 * the tracepoint to emulate the instruction, and then find the
1215 	 * tracepoint again later if we need to light up any return probes.
1216 	 */
1217 	tp_local = *tp;
1218 #ifdef illumos
1219 	mutex_exit(pid_mtx);
1220 #else
1221 	rm_runlock(&fasttrap_tp_lock, &tracker);
1222 #endif
1223 	tp = &tp_local;
1224 
1225 	/*
1226 	 * Set the program counter to appear as though the traced instruction
1227 	 * had completely executed. This ensures that fasttrap_getreg() will
1228 	 * report the expected value for REG_RIP.
1229 	 */
1230 	rp->r_rip = pc + tp->ftt_size;
1231 
1232 	/*
1233 	 * If there's an is-enabled probe connected to this tracepoint it
1234 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1235 	 * instruction that was placed there by DTrace when the binary was
1236 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
1237 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
1238 	 * emulation logic since we know the inevitable result. It's possible
1239 	 * that a user could construct a scenario where the 'is-enabled'
1240 	 * probe was on some other instruction, but that would be a rather
1241 	 * exotic way to shoot oneself in the foot.
1242 	 */
1243 	if (is_enabled) {
1244 		rp->r_rax = 1;
1245 		new_pc = rp->r_rip;
1246 		goto done;
1247 	}
1248 
1249 	/*
1250 	 * We emulate certain types of instructions to ensure correctness
1251 	 * (in the case of position dependent instructions) or optimize
1252 	 * common cases. The rest we have the thread execute back in user-
1253 	 * land.
1254 	 */
1255 	switch (tp->ftt_type) {
1256 	case FASTTRAP_T_RET:
1257 	case FASTTRAP_T_RET16:
1258 	{
1259 		uintptr_t dst = 0;
1260 		uintptr_t addr = 0;
1261 		int ret = 0;
1262 
1263 		/*
1264 		 * We have to emulate _every_ facet of the behavior of a ret
1265 		 * instruction including what happens if the load from %esp
1266 		 * fails; in that case, we send a SIGSEGV.
1267 		 */
1268 #ifdef __amd64
1269 		if (p->p_model == DATAMODEL_NATIVE) {
1270 			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
1271 			addr = rp->r_rsp + sizeof (uintptr_t);
1272 		} else {
1273 #endif
1274 #ifdef __i386__
1275 			uint32_t dst32;
1276 			ret = dst32 = fasttrap_fuword32((void *)rp->r_esp);
1277 			dst = dst32;
1278 			addr = rp->r_esp + sizeof (uint32_t);
1279 #endif
1280 #ifdef __amd64
1281 		}
1282 #endif
1283 
1284 		if (ret == -1) {
1285 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
1286 			new_pc = pc;
1287 			break;
1288 		}
1289 
1290 		if (tp->ftt_type == FASTTRAP_T_RET16)
1291 			addr += tp->ftt_dest;
1292 
1293 		rp->r_rsp = addr;
1294 		new_pc = dst;
1295 		break;
1296 	}
1297 
1298 	case FASTTRAP_T_JCC:
1299 	{
1300 		uint_t taken = 0;
1301 
1302 		switch (tp->ftt_code) {
1303 		case FASTTRAP_JO:
1304 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
1305 			break;
1306 		case FASTTRAP_JNO:
1307 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
1308 			break;
1309 		case FASTTRAP_JB:
1310 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
1311 			break;
1312 		case FASTTRAP_JAE:
1313 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
1314 			break;
1315 		case FASTTRAP_JE:
1316 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
1317 			break;
1318 		case FASTTRAP_JNE:
1319 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
1320 			break;
1321 		case FASTTRAP_JBE:
1322 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
1323 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
1324 			break;
1325 		case FASTTRAP_JA:
1326 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
1327 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
1328 			break;
1329 		case FASTTRAP_JS:
1330 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
1331 			break;
1332 		case FASTTRAP_JNS:
1333 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
1334 			break;
1335 		case FASTTRAP_JP:
1336 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
1337 			break;
1338 		case FASTTRAP_JNP:
1339 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
1340 			break;
1341 		case FASTTRAP_JL:
1342 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1343 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1344 			break;
1345 		case FASTTRAP_JGE:
1346 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1347 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1348 			break;
1349 		case FASTTRAP_JLE:
1350 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
1351 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1352 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1353 			break;
1354 		case FASTTRAP_JG:
1355 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1356 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1357 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
1358 			break;
1359 
1360 		}
1361 
1362 		if (taken)
1363 			new_pc = tp->ftt_dest;
1364 		else
1365 			new_pc = pc + tp->ftt_size;
1366 		break;
1367 	}
1368 
1369 	case FASTTRAP_T_LOOP:
1370 	{
1371 		uint_t taken = 0;
1372 #ifdef __amd64
1373 		greg_t cx = rp->r_rcx--;
1374 #else
1375 		greg_t cx = rp->r_ecx--;
1376 #endif
1377 
1378 		switch (tp->ftt_code) {
1379 		case FASTTRAP_LOOPNZ:
1380 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1381 			    cx != 0;
1382 			break;
1383 		case FASTTRAP_LOOPZ:
1384 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
1385 			    cx != 0;
1386 			break;
1387 		case FASTTRAP_LOOP:
1388 			taken = (cx != 0);
1389 			break;
1390 		}
1391 
1392 		if (taken)
1393 			new_pc = tp->ftt_dest;
1394 		else
1395 			new_pc = pc + tp->ftt_size;
1396 		break;
1397 	}
1398 
1399 	case FASTTRAP_T_JCXZ:
1400 	{
1401 #ifdef __amd64
1402 		greg_t cx = rp->r_rcx;
1403 #else
1404 		greg_t cx = rp->r_ecx;
1405 #endif
1406 
1407 		if (cx == 0)
1408 			new_pc = tp->ftt_dest;
1409 		else
1410 			new_pc = pc + tp->ftt_size;
1411 		break;
1412 	}
1413 
1414 	case FASTTRAP_T_PUSHL_EBP:
1415 	{
1416 		int ret = 0;
1417 
1418 #ifdef __amd64
1419 		if (p->p_model == DATAMODEL_NATIVE) {
1420 			rp->r_rsp -= sizeof (uintptr_t);
1421 			ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);
1422 		} else {
1423 #endif
1424 #ifdef __i386__
1425 			rp->r_rsp -= sizeof (uint32_t);
1426 			ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);
1427 #endif
1428 #ifdef __amd64
1429 		}
1430 #endif
1431 
1432 		if (ret == -1) {
1433 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
1434 			new_pc = pc;
1435 			break;
1436 		}
1437 
1438 		new_pc = pc + tp->ftt_size;
1439 		break;
1440 	}
1441 
1442 	case FASTTRAP_T_NOP:
1443 		new_pc = pc + tp->ftt_size;
1444 		break;
1445 
1446 	case FASTTRAP_T_JMP:
1447 	case FASTTRAP_T_CALL:
1448 		if (tp->ftt_code == 0) {
1449 			new_pc = tp->ftt_dest;
1450 		} else {
1451 			uintptr_t value, addr = tp->ftt_dest;
1452 
1453 			if (tp->ftt_base != FASTTRAP_NOREG)
1454 				addr += fasttrap_getreg(rp, tp->ftt_base);
1455 			if (tp->ftt_index != FASTTRAP_NOREG)
1456 				addr += fasttrap_getreg(rp, tp->ftt_index) <<
1457 				    tp->ftt_scale;
1458 
1459 			if (tp->ftt_code == 1) {
1460 				/*
1461 				 * If there's a segment prefix for this
1462 				 * instruction, we'll need to check permissions
1463 				 * and bounds on the given selector, and adjust
1464 				 * the address accordingly.
1465 				 */
1466 				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1467 				    fasttrap_do_seg(tp, rp, &addr) != 0) {
1468 					fasttrap_sigsegv(p, curthread, addr);
1469 					new_pc = pc;
1470 					break;
1471 				}
1472 
1473 #ifdef __amd64
1474 				if (p->p_model == DATAMODEL_NATIVE) {
1475 #endif
1476 					if ((value = fasttrap_fulword((void *)addr))
1477 					     == -1) {
1478 						fasttrap_sigsegv(p, curthread,
1479 						    addr);
1480 						new_pc = pc;
1481 						break;
1482 					}
1483 					new_pc = value;
1484 #ifdef __amd64
1485 				} else {
1486 					uint32_t value32;
1487 					addr = (uintptr_t)(uint32_t)addr;
1488 					if ((value32 = fasttrap_fuword32((void *)addr))
1489 					    == -1) {
1490 						fasttrap_sigsegv(p, curthread,
1491 						    addr);
1492 						new_pc = pc;
1493 						break;
1494 					}
1495 					new_pc = value32;
1496 				}
1497 #endif
1498 			} else {
1499 				new_pc = addr;
1500 			}
1501 		}
1502 
1503 		/*
1504 		 * If this is a call instruction, we need to push the return
1505 		 * address onto the stack. If this fails, we send the process
1506 		 * a SIGSEGV and reset the pc to emulate what would happen if
1507 		 * this instruction weren't traced.
1508 		 */
1509 		if (tp->ftt_type == FASTTRAP_T_CALL) {
1510 			int ret = 0;
1511 			uintptr_t addr = 0, pcps;
1512 #ifdef __amd64
1513 			if (p->p_model == DATAMODEL_NATIVE) {
1514 				addr = rp->r_rsp - sizeof (uintptr_t);
1515 				pcps = pc + tp->ftt_size;
1516 				ret = fasttrap_sulword((void *)addr, pcps);
1517 			} else {
1518 #endif
1519 				addr = rp->r_rsp - sizeof (uint32_t);
1520 				pcps = (uint32_t)(pc + tp->ftt_size);
1521 				ret = fasttrap_suword32((void *)addr, pcps);
1522 #ifdef __amd64
1523 			}
1524 #endif
1525 
1526 			if (ret == -1) {
1527 				fasttrap_sigsegv(p, curthread, addr);
1528 				new_pc = pc;
1529 				break;
1530 			}
1531 
1532 			rp->r_rsp = addr;
1533 		}
1534 
1535 		break;
1536 
1537 	case FASTTRAP_T_COMMON:
1538 	{
1539 		uintptr_t addr;
1540 #if defined(__amd64)
1541 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1542 #else
1543 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1544 #endif
1545 		uint_t i = 0;
1546 #ifdef illumos
1547 		klwp_t *lwp = ttolwp(curthread);
1548 
1549 		/*
1550 		 * Compute the address of the ulwp_t and step over the
1551 		 * ul_self pointer. The method used to store the user-land
1552 		 * thread pointer is very different on 32- and 64-bit
1553 		 * kernels.
1554 		 */
1555 #if defined(__amd64)
1556 		if (p->p_model == DATAMODEL_LP64) {
1557 			addr = lwp->lwp_pcb.pcb_fsbase;
1558 			addr += sizeof (void *);
1559 		} else {
1560 			addr = lwp->lwp_pcb.pcb_gsbase;
1561 			addr += sizeof (caddr32_t);
1562 		}
1563 #else
1564 		addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
1565 		addr += sizeof (void *);
1566 #endif
1567 #else	/* !illumos */
1568 		fasttrap_scrspace_t *scrspace;
1569 		scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);
1570 		if (scrspace == NULL) {
1571 			/*
1572 			 * We failed to allocate scratch space for this thread.
1573 			 * Try to write the original instruction back out and
1574 			 * reset the pc.
1575 			 */
1576 			if (fasttrap_copyout(tp->ftt_instr, (void *)pc,
1577 			    tp->ftt_size))
1578 				fasttrap_sigtrap(p, curthread, pc);
1579 			new_pc = pc;
1580 			break;
1581 		}
1582 		addr = scrspace->ftss_addr;
1583 #endif /* illumos */
1584 
1585 		/*
1586 		 * Generic Instruction Tracing
1587 		 * ---------------------------
1588 		 *
1589 		 * This is the layout of the scratch space in the user-land
1590 		 * thread structure for our generated instructions.
1591 		 *
1592 		 *	32-bit mode			bytes
1593 		 *	------------------------	-----
1594 		 * a:	<original instruction>		<= 15
1595 		 *	jmp	<pc + tp->ftt_size>	    5
1596 		 * b:	<original instruction>		<= 15
1597 		 *	int	T_DTRACE_RET		    2
1598 		 *					-----
1599 		 *					<= 37
1600 		 *
1601 		 *	64-bit mode			bytes
1602 		 *	------------------------	-----
1603 		 * a:	<original instruction>		<= 15
1604 		 *	jmp	0(%rip)			    6
1605 		 *	<pc + tp->ftt_size>		    8
1606 		 * b:	<original instruction>		<= 15
1607 		 * 	int	T_DTRACE_RET		    2
1608 		 * 					-----
1609 		 * 					<= 46
1610 		 *
1611 		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1612 		 * to b. If we encounter a signal on the way out of the
1613 		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1614 		 * so that we execute the original instruction and re-enter
1615 		 * the kernel rather than redirecting to the next instruction.
1616 		 *
1617 		 * If there are return probes (so we know that we're going to
1618 		 * need to reenter the kernel after executing the original
1619 		 * instruction), the scratch space will just contain the
1620 		 * original instruction followed by an interrupt -- the same
1621 		 * data as at b.
1622 		 *
1623 		 * %rip-relative Addressing
1624 		 * ------------------------
1625 		 *
1626 		 * There's a further complication in 64-bit mode due to %rip-
1627 		 * relative addressing. While this is clearly a beneficial
1628 		 * architectural decision for position independent code, it's
1629 		 * hard not to see it as a personal attack against the pid
1630 		 * provider since before there was a relatively small set of
1631 		 * instructions to emulate; with %rip-relative addressing,
1632 		 * almost every instruction can potentially depend on the
1633 		 * address at which it's executed. Rather than emulating
1634 		 * the broad spectrum of instructions that can now be
1635 		 * position dependent, we emulate jumps and others as in
1636 		 * 32-bit mode, and take a different tack for instructions
1637 		 * using %rip-relative addressing.
1638 		 *
1639 		 * For every instruction that uses the ModRM byte, the
1640 		 * in-kernel disassembler reports its location. We use the
1641 		 * ModRM byte to identify that an instruction uses
1642 		 * %rip-relative addressing and to see what other registers
1643 		 * the instruction uses. To emulate those instructions,
1644 		 * we modify the instruction to be %rax-relative rather than
1645 		 * %rip-relative (or %rcx-relative if the instruction uses
1646 		 * %rax; or %r8- or %r9-relative if the REX.B is present so
1647 		 * we don't have to rewrite the REX prefix). We then load
1648 		 * the value that %rip would have been into the scratch
1649 		 * register and generate an instruction to reset the scratch
1650 		 * register back to its original value. The instruction
1651 		 * sequence looks like this:
1652 		 *
1653 		 *	64-mode %rip-relative		bytes
1654 		 *	------------------------	-----
1655 		 * a:	<modified instruction>		<= 15
1656 		 *	movq	$<value>, %<scratch>	    6
1657 		 *	jmp	0(%rip)			    6
1658 		 *	<pc + tp->ftt_size>		    8
1659 		 * b:	<modified instruction>  	<= 15
1660 		 * 	int	T_DTRACE_RET		    2
1661 		 * 					-----
1662 		 *					   52
1663 		 *
1664 		 * We set curthread->t_dtrace_regv so that upon receiving
1665 		 * a signal we can reset the value of the scratch register.
1666 		 */
1667 
1668 		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1669 
1670 		curthread->t_dtrace_scrpc = addr;
1671 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1672 		i += tp->ftt_size;
1673 
1674 #ifdef __amd64
1675 		if (tp->ftt_ripmode != 0) {
1676 			greg_t *reg = NULL;
1677 
1678 			ASSERT(p->p_model == DATAMODEL_LP64);
1679 			ASSERT(tp->ftt_ripmode &
1680 			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
1681 
1682 			/*
1683 			 * If this was a %rip-relative instruction, we change
1684 			 * it to be either a %rax- or %rcx-relative
1685 			 * instruction (depending on whether those registers
1686 			 * are used as another operand; or %r8- or %r9-
1687 			 * relative depending on the value of REX.B). We then
1688 			 * set that register and generate a movq instruction
1689 			 * to reset the value.
1690 			 */
1691 			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
1692 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
1693 			else
1694 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
1695 
1696 			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
1697 				scratch[i++] = FASTTRAP_MOV_EAX;
1698 			else
1699 				scratch[i++] = FASTTRAP_MOV_ECX;
1700 
1701 			switch (tp->ftt_ripmode) {
1702 			case FASTTRAP_RIP_1:
1703 				reg = &rp->r_rax;
1704 				curthread->t_dtrace_reg = REG_RAX;
1705 				break;
1706 			case FASTTRAP_RIP_2:
1707 				reg = &rp->r_rcx;
1708 				curthread->t_dtrace_reg = REG_RCX;
1709 				break;
1710 			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
1711 				reg = &rp->r_r8;
1712 				curthread->t_dtrace_reg = REG_R8;
1713 				break;
1714 			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
1715 				reg = &rp->r_r9;
1716 				curthread->t_dtrace_reg = REG_R9;
1717 				break;
1718 			}
1719 
1720 			/* LINTED - alignment */
1721 			*(uint64_t *)&scratch[i] = *reg;
1722 			curthread->t_dtrace_regv = *reg;
1723 			*reg = pc + tp->ftt_size;
1724 			i += sizeof (uint64_t);
1725 		}
1726 #endif
1727 
1728 		/*
1729 		 * Generate the branch instruction to what would have
1730 		 * normally been the subsequent instruction. In 32-bit mode,
1731 		 * this is just a relative branch; in 64-bit mode this is a
1732 		 * %rip-relative branch that loads the 64-bit pc value
1733 		 * immediately after the jmp instruction.
1734 		 */
1735 #ifdef __amd64
1736 		if (p->p_model == DATAMODEL_LP64) {
1737 			scratch[i++] = FASTTRAP_GROUP5_OP;
1738 			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
1739 			/* LINTED - alignment */
1740 			*(uint32_t *)&scratch[i] = 0;
1741 			i += sizeof (uint32_t);
1742 			/* LINTED - alignment */
1743 			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
1744 			i += sizeof (uint64_t);
1745 		} else {
1746 #endif
1747 #ifdef __i386__
1748 			/*
1749 			 * Set up the jmp to the next instruction; note that
1750 			 * the size of the traced instruction cancels out.
1751 			 */
1752 			scratch[i++] = FASTTRAP_JMP32;
1753 			/* LINTED - alignment */
1754 			*(uint32_t *)&scratch[i] = pc - addr - 5;
1755 			i += sizeof (uint32_t);
1756 #endif
1757 #ifdef __amd64
1758 		}
1759 #endif
1760 
1761 		curthread->t_dtrace_astpc = addr + i;
1762 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1763 		i += tp->ftt_size;
1764 		scratch[i++] = FASTTRAP_INT;
1765 		scratch[i++] = T_DTRACE_RET;
1766 
1767 		ASSERT(i <= sizeof (scratch));
1768 
1769 #ifdef illumos
1770 		if (fasttrap_copyout(scratch, (char *)addr, i)) {
1771 #else
1772 		if (uwrite(p, scratch, i, addr)) {
1773 #endif
1774 			fasttrap_sigtrap(p, curthread, pc);
1775 			new_pc = pc;
1776 			break;
1777 		}
1778 		if (tp->ftt_retids != NULL) {
1779 			curthread->t_dtrace_step = 1;
1780 			curthread->t_dtrace_ret = 1;
1781 			new_pc = curthread->t_dtrace_astpc;
1782 		} else {
1783 			new_pc = curthread->t_dtrace_scrpc;
1784 		}
1785 
1786 		curthread->t_dtrace_pc = pc;
1787 		curthread->t_dtrace_npc = pc + tp->ftt_size;
1788 		curthread->t_dtrace_on = 1;
1789 		break;
1790 	}
1791 
1792 	default:
1793 		panic("fasttrap: mishandled an instruction");
1794 	}
1795 
1796 done:
1797 	/*
1798 	 * If there were no return probes when we first found the tracepoint,
1799 	 * we should feel no obligation to honor any return probes that were
1800 	 * subsequently enabled -- they'll just have to wait until the next
1801 	 * time around.
1802 	 */
1803 	if (tp->ftt_retids != NULL) {
1804 		/*
1805 		 * We need to wait until the results of the instruction are
1806 		 * apparent before invoking any return probes. If this
1807 		 * instruction was emulated we can just call
1808 		 * fasttrap_return_common(); if it needs to be executed, we
1809 		 * need to wait until the user thread returns to the kernel.
1810 		 */
1811 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
1812 			/*
1813 			 * Set the program counter to the address of the traced
1814 			 * instruction so that it looks right in ustack()
1815 			 * output. We had previously set it to the end of the
1816 			 * instruction to simplify %rip-relative addressing.
1817 			 */
1818 			rp->r_rip = pc;
1819 
1820 			fasttrap_return_common(rp, pc, pid, new_pc);
1821 		} else {
1822 			ASSERT(curthread->t_dtrace_ret != 0);
1823 			ASSERT(curthread->t_dtrace_pc == pc);
1824 			ASSERT(curthread->t_dtrace_scrpc != 0);
1825 			ASSERT(new_pc == curthread->t_dtrace_astpc);
1826 		}
1827 	}
1828 
1829 	rp->r_rip = new_pc;
1830 
1831 #ifndef illumos
1832 	PROC_LOCK(p);
1833 	proc_write_regs(curthread, rp);
1834 	PROC_UNLOCK(p);
1835 #endif
1836 
1837 	return (0);
1838 }
1839 
1840 int
1841 fasttrap_return_probe(struct reg *rp)
1842 {
1843 	proc_t *p = curproc;
1844 	uintptr_t pc = curthread->t_dtrace_pc;
1845 	uintptr_t npc = curthread->t_dtrace_npc;
1846 
1847 	curthread->t_dtrace_pc = 0;
1848 	curthread->t_dtrace_npc = 0;
1849 	curthread->t_dtrace_scrpc = 0;
1850 	curthread->t_dtrace_astpc = 0;
1851 
1852 #ifdef illumos
1853 	/*
1854 	 * Treat a child created by a call to vfork(2) as if it were its
1855 	 * parent. We know that there's only one thread of control in such a
1856 	 * process: this one.
1857 	 */
1858 	while (p->p_flag & SVFORK) {
1859 		p = p->p_parent;
1860 	}
1861 #endif
1862 
1863 	/*
1864 	 * We set rp->r_rip to the address of the traced instruction so
1865 	 * that it appears to dtrace_probe() that we're on the original
1866 	 * instruction, and so that the user can't easily detect our
1867 	 * complex web of lies. dtrace_return_probe() (our caller)
1868 	 * will correctly set %pc after we return.
1869 	 */
1870 	rp->r_rip = pc;
1871 
1872 	fasttrap_return_common(rp, pc, p->p_pid, npc);
1873 
1874 	return (0);
1875 }
1876 
1877 /*ARGSUSED*/
1878 uint64_t
1879 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1880     int aframes)
1881 {
1882 	struct reg r;
1883 
1884 	fill_regs(curthread, &r);
1885 
1886 	return (fasttrap_anarg(&r, 1, argno));
1887 }
1888 
1889 /*ARGSUSED*/
1890 uint64_t
1891 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1892     int aframes)
1893 {
1894 	struct reg r;
1895 
1896 	fill_regs(curthread, &r);
1897 
1898 	return (fasttrap_anarg(&r, 0, argno));
1899 }
1900 
1901 static ulong_t
1902 fasttrap_getreg(struct reg *rp, uint_t reg)
1903 {
1904 #ifdef __amd64
1905 	switch (reg) {
1906 	case REG_R15:		return (rp->r_r15);
1907 	case REG_R14:		return (rp->r_r14);
1908 	case REG_R13:		return (rp->r_r13);
1909 	case REG_R12:		return (rp->r_r12);
1910 	case REG_R11:		return (rp->r_r11);
1911 	case REG_R10:		return (rp->r_r10);
1912 	case REG_R9:		return (rp->r_r9);
1913 	case REG_R8:		return (rp->r_r8);
1914 	case REG_RDI:		return (rp->r_rdi);
1915 	case REG_RSI:		return (rp->r_rsi);
1916 	case REG_RBP:		return (rp->r_rbp);
1917 	case REG_RBX:		return (rp->r_rbx);
1918 	case REG_RDX:		return (rp->r_rdx);
1919 	case REG_RCX:		return (rp->r_rcx);
1920 	case REG_RAX:		return (rp->r_rax);
1921 	case REG_TRAPNO:	return (rp->r_trapno);
1922 	case REG_ERR:		return (rp->r_err);
1923 	case REG_RIP:		return (rp->r_rip);
1924 	case REG_CS:		return (rp->r_cs);
1925 #ifdef illumos
1926 	case REG_RFL:		return (rp->r_rfl);
1927 #endif
1928 	case REG_RSP:		return (rp->r_rsp);
1929 	case REG_SS:		return (rp->r_ss);
1930 	case REG_FS:		return (rp->r_fs);
1931 	case REG_GS:		return (rp->r_gs);
1932 	case REG_DS:		return (rp->r_ds);
1933 	case REG_ES:		return (rp->r_es);
1934 	case REG_FSBASE:	return (rdmsr(MSR_FSBASE));
1935 	case REG_GSBASE:	return (rdmsr(MSR_GSBASE));
1936 	}
1937 
1938 	panic("dtrace: illegal register constant");
1939 	/*NOTREACHED*/
1940 #else
1941 #define _NGREG 19
1942 	if (reg >= _NGREG)
1943 		panic("dtrace: illegal register constant");
1944 
1945 	return (((greg_t *)&rp->r_gs)[reg]);
1946 #endif
1947 }
1948