1/*- 2 * Copyright (c) 2005 3 * Bill Paul <wpaul@windriver.com>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Bill Paul. 16 * 4. Neither the name of the author nor the names of any co-contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 30 * THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* The 'ret' macro doesn't work in this file if GPROF is enabled. */ 34#ifdef GPROF 35#undef GPROF 36#endif 37 38#include <machine/asmacros.h> 39 40/* 41 * This file contains assembly language wrappers for the different 42 * calling conventions supported by Windows on the i386 architecture. 43 * In FreeBSD, the whole OS typically use same C calling convention 44 * everywhere, namely _cdecl. Windows, on the other hand, uses several 45 * different C calling conventions depending on the circumstances: 46 * 47 * _stdcall: Used for most ordinary Windows APIs. With _stdcall, 48 * arguments are passed on the stack, and the callee unwinds the stack 49 * before returning control to the caller. Not suitable for variadic 50 * functions. 51 * 52 * _fastcall: Used for some APIs that may be invoked frequently and 53 * where speed is a critical factor (e.g. KeAcquireSpinLock() and 54 * KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit 55 * or smaller arguments are passed in the %ecx and %edx registers 56 * instead of on the stack. Not suitable for variadic functions. 57 * 58 * _cdecl: Used for standard C library routines and for variadic 59 * functions. 60 * 61 * _regparm(3): Used for certain assembly routines. All arguments 62 * passed in %eax, %ecx and %edx. 63 * 64 * Furthermore, there is an additional wrinkle that's not obvious 65 * with all code: Microsoft supports the use of exceptions in C 66 * (__try/__except) both in user _and_ kernel mode. Sadly, Windows 67 * structured exception handling uses machine-specific features 68 * that conflict rather badly with FreeBSD. (See utility routines 69 * at the end of this module for more details.) 70 * 71 * We want to support these calling conventions in as portable a manner 72 * as possible. The trick is doing it not only with different versions 73 * of GNU C, but with compilers other than GNU C (e.g. the Solaris 74 * SunOne C compiler). The only sure fire method is with assembly 75 * language trampoline code which both fixes up the argument passing, 76 * stack unwinding and exception/thread context all at once. 77 * 78 * You'll notice that we call the thunk/unthunk routines in the 79 * *_wrap() functions in an awkward way. Rather than branching 80 * directly to the address, we load the address into a register 81 * first as a literal value, then we branch to it. This is done 82 * to insure that the assembler doesn't translate the branch into 83 * a relative branch. We use the *_wrap() routines here as templates 84 * and create the actual trampolines at run time, at which point 85 * we only know the absolute addresses of the thunk and unthunk 86 * routines. So we need to make sure the templates have enough 87 * room in them for the full address. 88 * 89 * Also note that when we call the a thunk/unthunk routine after 90 * invoking a wrapped function, we have to make sure to preserve 91 * the value returned from that function. Most functions return 92 * a 32-bit value in %eax, however some routines return 64-bit 93 * values, which span both %eax and %edx. Consequently, we have 94 * to preserve both registers. 95 */ 96 97/* 98 * Handle _stdcall going from Windows to UNIX. 99 * This is frustrating, because to do it right you have to 100 * know how many arguments the called function takes, and there's 101 * no way to figure this out on the fly: you just have to be told 102 * ahead of time. We assume there will be 16 arguments. I don't 103 * think there are any Windows APIs that require this many. 104 */ 105 106 .globl x86_stdcall_wrap_call 107 .globl x86_stdcall_wrap_arg 108 .globl x86_stdcall_wrap_end 109 110ENTRY(x86_stdcall_wrap) 111 push %esi 112 push %edi 113 sub $64,%esp 114 mov %esp,%esi 115 add $64+8+4,%esi 116 mov %esp,%edi 117 mov $16,%ecx # handle up to 16 args 118 rep 119 movsl 120 121 movl $ctxsw_wtou, %eax 122 call *%eax # unthunk 123 124x86_stdcall_wrap_call: 125 movl $0,%eax 126 call *%eax # jump to routine 127 push %eax # preserve return val 128 push %edx 129 130 movl $ctxsw_utow, %eax 131 call *%eax # thunk 132 133 pop %edx 134 pop %eax # restore return val 135 136 add $64,%esp # clean the stack 137 pop %edi 138 pop %esi 139x86_stdcall_wrap_arg: 140 ret $0xFF 141x86_stdcall_wrap_end: 142 143 144/* 145 * Handle _stdcall going from UNIX to Windows. This routine 146 * expects to be passed the function to be called, number of 147 * args and the arguments for the Windows function on the stack. 148 */ 149 150ENTRY(x86_stdcall_call) 151 push %esi # must preserve %esi 152 push %edi # and %edi 153 154 mov 16(%esp),%eax # get arg cnt 155 mov %eax,%ecx # save as copy count 156 mov %esp,%esi # Set source address register to point to 157 add $20,%esi # first agument to be forwarded. 158 shl $2,%eax # turn arg cnt into offset 159 sub %eax,%esp # shift stack to new location 160 mov %esp,%edi # store dest copy addr 161 rep # do the copy 162 movsl 163 164 call ctxsw_utow # thunk 165 166 call *12(%edi) # branch to stdcall routine 167 push %eax # preserve return val 168 push %edx 169 170 call ctxsw_wtou # unthunk 171 172 pop %edx 173 pop %eax # restore return val 174 mov %edi,%esp # restore stack 175 pop %edi # restore %edi 176 pop %esi # and %esi 177 ret 178 179/* 180 * Fastcall support. Similar to _stdcall, except the first 181 * two arguments are passed in %ecx and %edx. It happens we 182 * only support a small number of _fastcall APIs, none of them 183 * take more than three arguments. So to keep the code size 184 * and complexity down, we only handle 3 arguments here. 185 */ 186 187/* Call _fastcall function going from Windows to UNIX. */ 188 189 .globl x86_fastcall_wrap_call 190 .globl x86_fastcall_wrap_arg 191 .globl x86_fastcall_wrap_end 192 193ENTRY(x86_fastcall_wrap) 194 mov 4(%esp),%eax 195 push %eax 196 push %edx 197 push %ecx 198 199 movl $ctxsw_wtou, %eax 200 call *%eax # unthunk 201 202x86_fastcall_wrap_call: 203 mov $0,%eax 204 call *%eax # branch to fastcall routine 205 push %eax # preserve return val 206 push %edx 207 208 movl $ctxsw_utow, %eax 209 call *%eax # thunk 210 211 pop %edx 212 pop %eax # restore return val 213 add $12,%esp # clean the stack 214x86_fastcall_wrap_arg: 215 ret $0xFF 216x86_fastcall_wrap_end: 217 218/* 219 * Call _fastcall function going from UNIX to Windows. 220 * This routine isn't normally used since NDIS miniport drivers 221 * only have _stdcall entry points, but it's provided anyway 222 * to round out the API, and for testing purposes. 223 */ 224 225ENTRY(x86_fastcall_call) 226 mov 4(%esp),%eax 227 push 16(%esp) 228 229 call ctxsw_utow # thunk 230 231 mov 12(%esp),%ecx 232 mov 16(%esp),%edx 233 call *8(%esp) # branch to fastcall routine 234 push %eax # preserve return val 235 push %edx 236 237 call ctxsw_wtou # unthunk 238 239 pop %edx 240 pop %eax # restore return val 241 add $4,%esp # clean the stack 242 ret 243 244/* 245 * Call regparm(3) function going from Windows to UNIX. Arguments 246 * are passed in %eax, %edx and %ecx. Note that while additional 247 * arguments are passed on the stack, we never bother when them, 248 * since the only regparm(3) routines we need to wrap never take 249 * more than 3 arguments. 250 */ 251 252 .globl x86_regparm_wrap_call 253 .globl x86_regparm_wrap_end 254 255ENTRY(x86_regparm_wrap) 256 push %ecx 257 push %edx 258 push %eax 259 260 movl $ctxsw_wtou, %eax 261 call *%eax # unthunk 262 263x86_regparm_wrap_call: 264 movl $0,%eax 265 call *%eax # jump to routine 266 push %eax # preserve return val 267 push %edx # preserve return val 268 269 movl $ctxsw_utow, %eax 270 call *%eax # thunk 271 272 pop %edx # restore return val 273 pop %eax # restore return val 274 add $12,%esp # restore stack 275 ret 276x86_regparm_wrap_end: 277 278/* 279 * Call regparm(3) function going from UNIX to Windows. 280 * This routine isn't normally used since NDIS miniport drivers 281 * only have _stdcall entry points, but it's provided anyway 282 * to round out the API, and for testing purposes. 283 */ 284 285ENTRY(x86_regparm_call) 286 call ctxsw_utow # thunk 287 288 mov 8(%esp),%eax 289 mov 12(%esp),%edx 290 mov 16(%esp),%ecx 291 call *4(%esp) # branch to fastcall routine 292 push %eax # preserve return val 293 push %edx # preserve return val 294 295 call ctxsw_wtou # unthunk 296 297 pop %edx # restore return val 298 pop %eax # restore return val 299 ret 300 301/* 302 * Ugly hack alert: 303 * 304 * On Win32/i386, using __try/__except results in code that tries to 305 * manipulate what's supposed to be the Windows Threada Environment 306 * Block (TEB), which one accesses via the %fs register. In particular, 307 * %fs:0 (the first DWORD in the TEB) points to the exception 308 * registration list. Unfortunately, FreeBSD uses %fs for the 309 * per-cpu data structure (pcpu), and we can't allow Windows code 310 * to muck with that. I don't even know what Solaris uses %fs for 311 * (or if it even uses it at all). 312 * 313 * Even worse, in 32-bit protected mode, %fs is a selector that 314 * refers to an entry in either the GDT or the LDT. Ideally, we would 315 * like to be able to temporarily point it at another descriptor 316 * while Windows code executes, but to do that we need a separate 317 * descriptor entry of our own to play with. 318 * 319 * Therefore, we go to some trouble to learn the existing layout of 320 * the GDT and update it to include an extra entry that we can use. 321 * We need the following utility routines to help us do that. On 322 * FreeBSD, index #7 in the GDT happens to be unused, so we turn 323 * this into our own data segment descriptor. It would be better 324 * if we could use a private LDT entry, but there's no easy way to 325 * do that in SMP mode because of the way FreeBSD handles user LDTs. 326 * 327 * Once we have a custom descriptor, we have to thunk/unthunk whenever 328 * we cross between FreeBSD code and Windows code. The thunking is 329 * based on the premise that when executing instructions in the 330 * Windows binary itself, we won't go to sleep. This is because in 331 * order to yield the CPU, the code has to call back out to a FreeBSD 332 * routine first, and when that happens we can unthunk in order to 333 * restore FreeBSD context. What we're desperately trying to avoid is 334 * being involuntarily pre-empted with the %fs register still pointing 335 * to our fake TIB: if FreeBSD code runs with %fs pointing at our 336 * Windows TIB instead of pcpu, we'll panic the kernel. Fortunately, 337 * the only way involuntary preemption can occur is if an interrupt 338 * fires, and the trap handler saves/restores %fs for us. 339 * 340 * The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX 341 * to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are 342 * external to this module. This is done simply because it's easier 343 * to manipulate data structures in C rather than assembly. 344 */ 345 346ENTRY(x86_getldt) 347 movl 4(%esp),%eax 348 sgdtl (%eax) 349 movl 8(%esp),%eax 350 sldt (%eax) 351 xor %eax,%eax 352 ret 353 354ENTRY(x86_setldt) 355 movl 4(%esp),%eax 356 lgdt (%eax) 357 jmp 1f 358 nop 3591: 360 movl 8(%esp),%eax 361 lldt %ax 362 xor %eax,%eax 363 ret 364 365ENTRY(x86_getfs) 366 mov %fs,%ax 367 ret 368 369ENTRY(x86_setfs) 370 mov 4(%esp),%fs 371 ret 372 373ENTRY(x86_gettid) 374 mov %fs:12,%eax 375 ret 376 377ENTRY(x86_critical_enter) 378 cli 379 ret 380 381ENTRY(x86_critical_exit) 382 sti 383 ret 384