1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/cpuset.h>
35
36 #include <machine/clock.h>
37 #include <machine/cpufunc.h>
38 #include <machine/md_var.h>
39 #include <machine/specialreg.h>
40 #include <machine/vmm.h>
41
42 #include "vmx.h"
43 #include "vmx_msr.h"
44
45 static boolean_t
vmx_ctl_allows_one_setting(uint64_t msr_val,int bitpos)46 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
47 {
48
49 if (msr_val & (1UL << (bitpos + 32)))
50 return (TRUE);
51 else
52 return (FALSE);
53 }
54
55 static boolean_t
vmx_ctl_allows_zero_setting(uint64_t msr_val,int bitpos)56 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
57 {
58
59 if ((msr_val & (1UL << bitpos)) == 0)
60 return (TRUE);
61 else
62 return (FALSE);
63 }
64
65 uint32_t
vmx_revision(void)66 vmx_revision(void)
67 {
68
69 return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
70 }
71
72 /*
73 * Generate a bitmask to be used for the VMCS execution control fields.
74 *
75 * The caller specifies what bits should be set to one in 'ones_mask'
76 * and what bits should be set to zero in 'zeros_mask'. The don't-care
77 * bits are set to the default value. The default values are obtained
78 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
79 * VMX Capabilities".
80 *
81 * Returns zero on success and non-zero on error.
82 */
83 int
vmx_set_ctlreg(int ctl_reg,int true_ctl_reg,uint32_t ones_mask,uint32_t zeros_mask,uint32_t * retval)84 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
85 uint32_t zeros_mask, uint32_t *retval)
86 {
87 int i;
88 uint64_t val, trueval;
89 boolean_t true_ctls_avail, one_allowed, zero_allowed;
90
91 /* We cannot ask the same bit to be set to both '1' and '0' */
92 if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
93 return (EINVAL);
94
95 if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
96 true_ctls_avail = TRUE;
97 else
98 true_ctls_avail = FALSE;
99
100 val = rdmsr(ctl_reg);
101 if (true_ctls_avail)
102 trueval = rdmsr(true_ctl_reg); /* step c */
103 else
104 trueval = val; /* step a */
105
106 for (i = 0; i < 32; i++) {
107 one_allowed = vmx_ctl_allows_one_setting(trueval, i);
108 zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
109
110 KASSERT(one_allowed || zero_allowed,
111 ("invalid zero/one setting for bit %d of ctl 0x%0x, "
112 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
113
114 if (zero_allowed && !one_allowed) { /* b(i),c(i) */
115 if (ones_mask & (1 << i))
116 return (EINVAL);
117 *retval &= ~(1 << i);
118 } else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
119 if (zeros_mask & (1 << i))
120 return (EINVAL);
121 *retval |= 1 << i;
122 } else {
123 if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
124 *retval &= ~(1 << i);
125 else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
126 *retval |= 1 << i;
127 else if (!true_ctls_avail)
128 *retval &= ~(1 << i); /* b(iii) */
129 else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
130 *retval &= ~(1 << i);
131 else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
132 *retval |= 1 << i;
133 else {
134 panic("vmx_set_ctlreg: unable to determine "
135 "correct value of ctl bit %d for msr "
136 "0x%0x and true msr 0x%0x", i, ctl_reg,
137 true_ctl_reg);
138 }
139 }
140 }
141
142 return (0);
143 }
144
145 void
msr_bitmap_initialize(char * bitmap)146 msr_bitmap_initialize(char *bitmap)
147 {
148
149 memset(bitmap, 0xff, PAGE_SIZE);
150 }
151
152 int
msr_bitmap_change_access(char * bitmap,u_int msr,int access)153 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
154 {
155 int byte, bit;
156
157 if (msr <= 0x00001FFF)
158 byte = msr / 8;
159 else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
160 byte = 1024 + (msr - 0xC0000000) / 8;
161 else
162 return (EINVAL);
163
164 bit = msr & 0x7;
165
166 if (access & MSR_BITMAP_ACCESS_READ)
167 bitmap[byte] &= ~(1 << bit);
168 else
169 bitmap[byte] |= 1 << bit;
170
171 byte += 2048;
172 if (access & MSR_BITMAP_ACCESS_WRITE)
173 bitmap[byte] &= ~(1 << bit);
174 else
175 bitmap[byte] |= 1 << bit;
176
177 return (0);
178 }
179
180 static uint64_t misc_enable;
181 static uint64_t platform_info;
182 static uint64_t turbo_ratio_limit;
183 static uint64_t host_msrs[GUEST_MSR_NUM];
184
185 static bool
nehalem_cpu(void)186 nehalem_cpu(void)
187 {
188 u_int family, model;
189
190 /*
191 * The family:model numbers belonging to the Nehalem microarchitecture
192 * are documented in Section 35.5, Intel SDM dated Feb 2014.
193 */
194 family = CPUID_TO_FAMILY(cpu_id);
195 model = CPUID_TO_MODEL(cpu_id);
196 if (family == 0x6) {
197 switch (model) {
198 case 0x1A:
199 case 0x1E:
200 case 0x1F:
201 case 0x2E:
202 return (true);
203 default:
204 break;
205 }
206 }
207 return (false);
208 }
209
210 static bool
westmere_cpu(void)211 westmere_cpu(void)
212 {
213 u_int family, model;
214
215 /*
216 * The family:model numbers belonging to the Westmere microarchitecture
217 * are documented in Section 35.6, Intel SDM dated Feb 2014.
218 */
219 family = CPUID_TO_FAMILY(cpu_id);
220 model = CPUID_TO_MODEL(cpu_id);
221 if (family == 0x6) {
222 switch (model) {
223 case 0x25:
224 case 0x2C:
225 return (true);
226 default:
227 break;
228 }
229 }
230 return (false);
231 }
232
233 void
vmx_msr_init(void)234 vmx_msr_init(void)
235 {
236 uint64_t bus_freq, ratio;
237 int i;
238
239 /*
240 * It is safe to cache the values of the following MSRs because
241 * they don't change based on curcpu, curproc or curthread.
242 */
243 host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
244 host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
245 host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
246 host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
247
248 /*
249 * Initialize emulated MSRs
250 */
251 misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
252 /*
253 * Set mandatory bits
254 * 11: branch trace disabled
255 * 12: PEBS unavailable
256 * Clear unsupported features
257 * 16: SpeedStep enable
258 * 18: enable MONITOR FSM
259 */
260 misc_enable |= (1 << 12) | (1 << 11);
261 misc_enable &= ~((1 << 18) | (1 << 16));
262
263 if (nehalem_cpu() || westmere_cpu())
264 bus_freq = 133330000; /* 133Mhz */
265 else
266 bus_freq = 100000000; /* 100Mhz */
267
268 /*
269 * XXXtime
270 * The ratio should really be based on the virtual TSC frequency as
271 * opposed to the host TSC.
272 */
273 ratio = (tsc_freq / bus_freq) & 0xff;
274
275 /*
276 * The register definition is based on the micro-architecture
277 * but the following bits are always the same:
278 * [15:8] Maximum Non-Turbo Ratio
279 * [28] Programmable Ratio Limit for Turbo Mode
280 * [29] Programmable TDC-TDP Limit for Turbo Mode
281 * [47:40] Maximum Efficiency Ratio
282 *
283 * The other bits can be safely set to 0 on all
284 * micro-architectures up to Haswell.
285 */
286 platform_info = (ratio << 8) | (ratio << 40);
287
288 /*
289 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
290 * dependent on the maximum cores per package supported by the micro-
291 * architecture. For e.g., Westmere supports 6 cores per package and
292 * uses the low 48 bits. Sandybridge support 8 cores per package and
293 * uses up all 64 bits.
294 *
295 * However, the unused bits are reserved so we pretend that all bits
296 * in this MSR are valid.
297 */
298 for (i = 0; i < 8; i++)
299 turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
300 }
301
302 void
vmx_msr_guest_init(struct vmx * vmx,int vcpuid)303 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
304 {
305 /*
306 * The permissions bitmap is shared between all vcpus so initialize it
307 * once when initializing the vBSP.
308 */
309 if (vcpuid == 0) {
310 guest_msr_rw(vmx, MSR_LSTAR);
311 guest_msr_rw(vmx, MSR_CSTAR);
312 guest_msr_rw(vmx, MSR_STAR);
313 guest_msr_rw(vmx, MSR_SF_MASK);
314 guest_msr_rw(vmx, MSR_KGSBASE);
315 }
316 return;
317 }
318
319 void
vmx_msr_guest_enter(struct vmx * vmx,int vcpuid)320 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
321 {
322 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
323
324 /* Save host MSRs (if any) and restore guest MSRs */
325 wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
326 wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
327 wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
328 wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
329 wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
330 }
331
332 void
vmx_msr_guest_exit(struct vmx * vmx,int vcpuid)333 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
334 {
335 uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
336
337 /* Save guest MSRs */
338 guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
339 guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
340 guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
341 guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
342 guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
343
344 /* Restore host MSRs */
345 wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
346 wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
347 wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
348 wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
349
350 /* MSR_KGSBASE will be restored on the way back to userspace */
351 }
352
353 int
vmx_rdmsr(struct vmx * vmx,int vcpuid,u_int num,uint64_t * val,bool * retu)354 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
355 {
356 int error = 0;
357
358 switch (num) {
359 case MSR_IA32_MISC_ENABLE:
360 *val = misc_enable;
361 break;
362 case MSR_PLATFORM_INFO:
363 *val = platform_info;
364 break;
365 case MSR_TURBO_RATIO_LIMIT:
366 case MSR_TURBO_RATIO_LIMIT1:
367 *val = turbo_ratio_limit;
368 break;
369 default:
370 error = EINVAL;
371 break;
372 }
373 return (error);
374 }
375
376 int
vmx_wrmsr(struct vmx * vmx,int vcpuid,u_int num,uint64_t val,bool * retu)377 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
378 {
379 uint64_t changed;
380 int error;
381
382 error = 0;
383 switch (num) {
384 case MSR_IA32_MISC_ENABLE:
385 changed = val ^ misc_enable;
386 /*
387 * If the host has disabled the NX feature then the guest
388 * also cannot use it. However, a Linux guest will try to
389 * enable the NX feature by writing to the MISC_ENABLE MSR.
390 *
391 * This can be safely ignored because the memory management
392 * code looks at CPUID.80000001H:EDX.NX to check if the
393 * functionality is actually enabled.
394 */
395 changed &= ~(1UL << 34);
396
397 /*
398 * Punt to userspace if any other bits are being modified.
399 */
400 if (changed)
401 error = EINVAL;
402
403 break;
404 default:
405 error = EINVAL;
406 break;
407 }
408
409 return (error);
410 }
411