summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
commit8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be (patch)
treee9891aa6c295060d065adffd610c4f49ecf884f3 /arch/x86/kvm
parenta71852147516bc1cb5b0b3cbd13639bfd4022dc8 (diff)
Linux-libre 4.3.2-gnu
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/emulate.c37
-rw-r--r--arch/x86/kvm/hyperv.c377
-rw-r--r--arch/x86/kvm/hyperv.h32
-rw-r--r--arch/x86/kvm/i8259.c15
-rw-r--r--arch/x86/kvm/irq.h8
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c254
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h13
-rw-r--r--arch/x86/kvm/pmu_amd.c2
-rw-r--r--arch/x86/kvm/svm.c27
-rw-r--r--arch/x86/kvm/vmx.c229
-rw-r--r--arch/x86/kvm/x86.c463
-rw-r--r--arch/x86/kvm/x86.h5
16 files changed, 882 insertions, 601 deletions
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 67d215cb8..a1ff508bb 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
- i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
+ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
+ hyperv.o
+
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
kvm-intel-y += vmx.o pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2392541a9..1505587d0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,6 +650,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
u16 sel;
la = seg_base(ctxt, addr.seg) + addr.ea;
+ *linear = la;
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
@@ -693,7 +694,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
}
if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
return emulate_gp(ctxt, 0);
- *linear = la;
return X86EMUL_CONTINUE;
bad:
if (addr.seg == VCPU_SREG_SS)
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
#define GET_SMSTATE(type, smbase, offset) \
({ \
type __val; \
- int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
- sizeof(__val), NULL); \
+ int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
+ sizeof(__val)); \
if (r != X86EMUL_CONTINUE) \
return X86EMUL_UNHANDLEABLE; \
__val; \
@@ -2484,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
/*
* Get back to real mode, to prepare a safe state in which to load
- * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed
- * to read_std/write_std are not virtual.
- *
- * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+ * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
+ * supports long mode.
*/
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (emulator_has_longmode(ctxt)) {
+ struct desc_struct cs_desc;
+
+ /* Zero CR4.PCIDE before CR0.PG. */
+ if (cr4 & X86_CR4_PCIDE) {
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ cr4 &= ~X86_CR4_PCIDE;
+ }
+
+ /* A 32-bit code segment is required to clear EFER.LMA. */
+ memset(&cs_desc, 0, sizeof(cs_desc));
+ cs_desc.type = 0xb;
+ cs_desc.s = cs_desc.g = cs_desc.p = 1;
+ ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+ }
+
+ /* For the 64-bit case, this will clear EFER.LMA. */
cr0 = ctxt->ops->get_cr(ctxt, 0);
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+ /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
if (cr4 & X86_CR4_PAE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+ /* And finally go back to 32-bit mode. */
efer = 0;
ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
@@ -4455,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
/* 0xA8 - 0xAF */
I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
- II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
+ II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
new file mode 100644
index 000000000..a8160d2ae
--- /dev/null
+++ b/arch/x86/kvm/hyperv.c
@@ -0,0 +1,377 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ * Amit Shah <amit.shah@qumranet.com>
+ * Ben-Ami Yassour <benami@il.ibm.com>
+ * Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "x86.h"
+#include "lapic.h"
+#include "hyperv.h"
+
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+ bool r = false;
+
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ case HV_X64_MSR_HYPERCALL:
+ case HV_X64_MSR_REFERENCE_TSC:
+ case HV_X64_MSR_TIME_REF_COUNT:
+ case HV_X64_MSR_CRASH_CTL:
+ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+ r = true;
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
+ u32 index, u64 *pdata)
+{
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+ if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ return -EINVAL;
+
+ *pdata = hv->hv_crash_param[index];
+ return 0;
+}
+
+static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
+{
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+ *pdata = hv->hv_crash_ctl;
+ return 0;
+}
+
+static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
+{
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+ if (host)
+ hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
+
+ if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
+
+ vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
+ hv->hv_crash_param[0],
+ hv->hv_crash_param[1],
+ hv->hv_crash_param[2],
+ hv->hv_crash_param[3],
+ hv->hv_crash_param[4]);
+
+ /* Send notification about crash to user space */
+ kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
+ }
+
+ return 0;
+}
+
+static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
+ u32 index, u64 data)
+{
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+
+ if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param)))
+ return -EINVAL;
+
+ hv->hv_crash_param[index] = data;
+ return 0;
+}
+
+static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
+ bool host)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ hv->hv_guest_os_id = data;
+ /* setting guest os id to zero disables hypercall page */
+ if (!hv->hv_guest_os_id)
+ hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+ break;
+ case HV_X64_MSR_HYPERCALL: {
+ u64 gfn;
+ unsigned long addr;
+ u8 instructions[4];
+
+ /* if guest os id is not set hypercall should remain disabled */
+ if (!hv->hv_guest_os_id)
+ break;
+ if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+ hv->hv_hypercall = data;
+ break;
+ }
+ gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
+ return 1;
+ kvm_x86_ops->patch_hypercall(vcpu, instructions);
+ ((unsigned char *)instructions)[3] = 0xc3; /* ret */
+ if (__copy_to_user((void __user *)addr, instructions, 4))
+ return 1;
+ hv->hv_hypercall = data;
+ mark_page_dirty(kvm, gfn);
+ break;
+ }
+ case HV_X64_MSR_REFERENCE_TSC: {
+ u64 gfn;
+ HV_REFERENCE_TSC_PAGE tsc_ref;
+
+ memset(&tsc_ref, 0, sizeof(tsc_ref));
+ hv->hv_tsc_page = data;
+ if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+ break;
+ gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+ if (kvm_write_guest(
+ kvm,
+ gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
+ &tsc_ref, sizeof(tsc_ref)))
+ return 1;
+ mark_page_dirty(kvm, gfn);
+ break;
+ }
+ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+ return kvm_hv_msr_set_crash_data(vcpu,
+ msr - HV_X64_MSR_CRASH_P0,
+ data);
+ case HV_X64_MSR_CRASH_CTL:
+ return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+ default:
+ vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
+ return 1;
+ }
+ return 0;
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+ switch (msr) {
+ case HV_X64_MSR_APIC_ASSIST_PAGE: {
+ u64 gfn;
+ unsigned long addr;
+
+ if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+ hv->hv_vapic = data;
+ if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+ return 1;
+ break;
+ }
+ gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+ addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+ if (kvm_is_error_hva(addr))
+ return 1;
+ if (__clear_user((void __user *)addr, PAGE_SIZE))
+ return 1;
+ hv->hv_vapic = data;
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+ if (kvm_lapic_enable_pv_eoi(vcpu,
+ gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+ return 1;
+ break;
+ }
+ case HV_X64_MSR_EOI:
+ return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+ case HV_X64_MSR_ICR:
+ return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+ case HV_X64_MSR_TPR:
+ return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+ default:
+ vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ u64 data = 0;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+
+ switch (msr) {
+ case HV_X64_MSR_GUEST_OS_ID:
+ data = hv->hv_guest_os_id;
+ break;
+ case HV_X64_MSR_HYPERCALL:
+ data = hv->hv_hypercall;
+ break;
+ case HV_X64_MSR_TIME_REF_COUNT: {
+ data =
+ div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+ break;
+ }
+ case HV_X64_MSR_REFERENCE_TSC:
+ data = hv->hv_tsc_page;
+ break;
+ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+ return kvm_hv_msr_get_crash_data(vcpu,
+ msr - HV_X64_MSR_CRASH_P0,
+ pdata);
+ case HV_X64_MSR_CRASH_CTL:
+ return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+ default:
+ vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ return 1;
+ }
+
+ *pdata = data;
+ return 0;
+}
+
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ u64 data = 0;
+ struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+ switch (msr) {
+ case HV_X64_MSR_VP_INDEX: {
+ int r;
+ struct kvm_vcpu *v;
+
+ kvm_for_each_vcpu(r, v, vcpu->kvm) {
+ if (v == vcpu) {
+ data = r;
+ break;
+ }
+ }
+ break;
+ }
+ case HV_X64_MSR_EOI:
+ return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+ case HV_X64_MSR_ICR:
+ return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+ case HV_X64_MSR_TPR:
+ return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+ case HV_X64_MSR_APIC_ASSIST_PAGE:
+ data = hv->hv_vapic;
+ break;
+ default:
+ vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ return 1;
+ }
+ *pdata = data;
+ return 0;
+}
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
+{
+ if (kvm_hv_msr_partition_wide(msr)) {
+ int r;
+
+ mutex_lock(&vcpu->kvm->lock);
+ r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
+ mutex_unlock(&vcpu->kvm->lock);
+ return r;
+ } else
+ return kvm_hv_set_msr(vcpu, msr, data);
+}
+
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+ if (kvm_hv_msr_partition_wide(msr)) {
+ int r;
+
+ mutex_lock(&vcpu->kvm->lock);
+ r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
+ mutex_unlock(&vcpu->kvm->lock);
+ return r;
+ } else
+ return kvm_hv_get_msr(vcpu, msr, pdata);
+}
+
+bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+ return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+ u64 param, ingpa, outgpa, ret;
+ uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+ bool fast, longmode;
+
+ /*
+ * hypercall generates UD from non zero cpl and real mode
+ * per HYPER-V spec
+ */
+ if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 0;
+ }
+
+ longmode = is_64_bit_mode(vcpu);
+
+ if (!longmode) {
+ param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+ ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+ outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+ (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+ }
+#ifdef CONFIG_X86_64
+ else {
+ param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+ outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+ }
+#endif
+
+ code = param & 0xffff;
+ fast = (param >> 16) & 0x1;
+ rep_cnt = (param >> 32) & 0xfff;
+ rep_idx = (param >> 48) & 0xfff;
+
+ trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+ switch (code) {
+ case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+ kvm_vcpu_on_spin(vcpu);
+ break;
+ default:
+ res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ }
+
+ ret = res | (((u64)rep_done & 0xfff) << 32);
+ if (longmode) {
+ kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+ } else {
+ kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+ }
+
+ return 1;
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
new file mode 100644
index 000000000..c7bce559f
--- /dev/null
+++ b/arch/x86/kvm/hyperv.h
@@ -0,0 +1,32 @@
+/*
+ * KVM Microsoft Hyper-V emulation
+ *
+ * derived from arch/x86/kvm/x86.c
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * Authors:
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ * Amit Shah <amit.shah@qumranet.com>
+ * Ben-Ami Yassour <benami@il.ibm.com>
+ * Andrey Smetanin <asmetanin@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __ARCH_X86_KVM_HYPERV_H__
+#define __ARCH_X86_KVM_HYPERV_H__
+
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+bool kvm_hv_hypercall_enabled(struct kvm *kvm);
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index fef922ff2..7cc2360f1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -651,15 +651,10 @@ fail_unlock:
return NULL;
}
-void kvm_destroy_pic(struct kvm *kvm)
+void kvm_destroy_pic(struct kvm_pic *vpic)
{
- struct kvm_pic *vpic = kvm->arch.vpic;
-
- if (vpic) {
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
- kvm->arch.vpic = NULL;
- kfree(vpic);
- }
+ kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
+ kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
+ kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ kfree(vpic);
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ad68c7300..3d782a2c3 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -74,7 +74,7 @@ struct kvm_pic {
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_destroy_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm_pic *vpic);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
@@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- int ret;
+ struct kvm_pic *vpic = pic_irqchip(kvm);
- ret = (pic_irqchip(kvm) != NULL);
+ /* Read vpic before kvm->irq_routing. */
smp_rmb();
- return ret;
+ return vpic != NULL;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97c2..ae4483a3e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
struct kvm_lapic *apic = vcpu->arch.apic;
__kvm_apic_update_irr(pir, apic->regs);
+
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
@@ -1172,7 +1174,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
- guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
+ guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */
@@ -1240,7 +1242,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
local_irq_save(flags);
now = apic->lapic_timer.timer.base->get_time();
- guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
+ guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz);
@@ -1900,8 +1902,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
- kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
- sizeof(u32));
+ if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+ sizeof(u32)))
+ return;
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 719527482..764037991 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+ return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 82362ad2f..ff606f507 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3268,6 +3268,25 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
}
+static bool
+__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
+{
+ int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f;
+
+ return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) |
+ ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0);
+}
+
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+ return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level);
+}
+
+static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level)
+{
+ return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level);
+}
+
static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
if (direct)
@@ -3276,31 +3295,63 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
return vcpu_match_mmio_gva(vcpu, addr);
}
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
struct kvm_shadow_walk_iterator iterator;
- u64 spte = 0ull;
+ u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+ int root, leaf;
+ bool reserved = false;
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
- return spte;
+ goto exit;
walk_shadow_page_lockless_begin(vcpu);
- for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+
+ for (shadow_walk_init(&iterator, vcpu, addr),
+ leaf = root = iterator.level;
+ shadow_walk_okay(&iterator);
+ __shadow_walk_next(&iterator, spte)) {
+ spte = mmu_spte_get_lockless(iterator.sptep);
+
+ sptes[leaf - 1] = spte;
+ leaf--;
+
if (!is_shadow_present_pte(spte))
break;
+
+ reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+ iterator.level);
+ }
+
walk_shadow_page_lockless_end(vcpu);
- return spte;
+ if (reserved) {
+ pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+ __func__, addr);
+ while (root > leaf) {
+ pr_err("------ spte 0x%llx level %d.\n",
+ sptes[root - 1], root);
+ root--;
+ }
+ }
+exit:
+ *sptep = spte;
+ return reserved;
}
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{
u64 spte;
+ bool reserved;
if (quickly_check_mmio_pf(vcpu, addr, direct))
return RET_MMIO_PF_EMULATE;
- spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+ reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+ if (unlikely(reserved))
+ return RET_MMIO_PF_BUG;
if (is_mmio_spte(spte)) {
gfn_t gfn = get_mmio_spte_gfn(spte);
@@ -3559,102 +3610,119 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp
#include "paging_tmpl.h"
#undef PTTYPE
-static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void
+__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+ struct rsvd_bits_validate *rsvd_check,
+ int maxphyaddr, int level, bool nx, bool gbpages,
+ bool pse, bool amd)
{
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
u64 gbpages_bit_rsvd = 0;
u64 nonleaf_bit8_rsvd = 0;
- context->bad_mt_xwr = 0;
+ rsvd_check->bad_mt_xwr = 0;
- if (!context->nx)
+ if (!nx)
exb_bit_rsvd = rsvd_bits(63, 63);
- if (!guest_cpuid_has_gbpages(vcpu))
+ if (!gbpages)
gbpages_bit_rsvd = rsvd_bits(7, 7);
/*
* Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
* leaf entries) on AMD CPUs only.
*/
- if (guest_cpuid_is_amd(vcpu))
+ if (amd)
nonleaf_bit8_rsvd = rsvd_bits(8, 8);
- switch (context->root_level) {
+ switch (level) {
case PT32_ROOT_LEVEL:
/* no rsvd bits for 2 level 4K page table entries */
- context->rsvd_bits_mask[0][1] = 0;
- context->rsvd_bits_mask[0][0] = 0;
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+ rsvd_check->rsvd_bits_mask[0][1] = 0;
+ rsvd_check->rsvd_bits_mask[0][0] = 0;
+ rsvd_check->rsvd_bits_mask[1][0] =
+ rsvd_check->rsvd_bits_mask[0][0];
- if (!is_pse(vcpu)) {
- context->rsvd_bits_mask[1][1] = 0;
+ if (!pse) {
+ rsvd_check->rsvd_bits_mask[1][1] = 0;
break;
}
if (is_cpuid_PSE36())
/* 36bits PSE 4MB page */
- context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
else
/* 32 bits PSE 4MB page */
- context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+ rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
break;
case PT32E_ROOT_LEVEL:
- context->rsvd_bits_mask[0][2] =
+ rsvd_check->rsvd_bits_mask[0][2] =
rsvd_bits(maxphyaddr, 63) |
rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */
- context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62); /* PDE */
- context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62); /* PTE */
- context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+ rsvd_check->rsvd_bits_mask[1][0] =
+ rsvd_check->rsvd_bits_mask[0][0];
break;
case PT64_ROOT_LEVEL:
- context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
- context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
- context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd |
+ nonleaf_bit8_rsvd | rsvd_bits(7, 7) |
+ rsvd_bits(maxphyaddr, 51);
+ rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd |
+ nonleaf_bit8_rsvd | gbpages_bit_rsvd |
+ rsvd_bits(maxphyaddr, 51);
+ rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
- context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
- context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
- context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[1][3] =
+ rsvd_check->rsvd_bits_mask[0][3];
+ rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd |
gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
rsvd_bits(13, 29);
- context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
+ rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+ rsvd_check->rsvd_bits_mask[1][0] =
+ rsvd_check->rsvd_bits_mask[0][0];
break;
}
}
-static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context, bool execonly)
+static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
+{
+ __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+ cpuid_maxphyaddr(vcpu), context->root_level,
+ context->nx, guest_cpuid_has_gbpages(vcpu),
+ is_pse(vcpu), guest_cpuid_is_amd(vcpu));
+}
+
+static void
+__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
+ int maxphyaddr, bool execonly)
{
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
int pte;
- context->rsvd_bits_mask[0][3] =
+ rsvd_check->rsvd_bits_mask[0][3] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
- context->rsvd_bits_mask[0][2] =
+ rsvd_check->rsvd_bits_mask[0][2] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
- context->rsvd_bits_mask[0][1] =
+ rsvd_check->rsvd_bits_mask[0][1] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
- context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+ rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
/* large page */
- context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
- context->rsvd_bits_mask[1][2] =
+ rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
+ rsvd_check->rsvd_bits_mask[1][2] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
- context->rsvd_bits_mask[1][1] =
+ rsvd_check->rsvd_bits_mask[1][1] =
rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+ rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
for (pte = 0; pte < 64; pte++) {
int rwx_bits = pte & 7;
@@ -3662,10 +3730,75 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
rwx_bits == 0x2 || rwx_bits == 0x6 ||
(rwx_bits == 0x4 && !execonly))
- context->bad_mt_xwr |= (1ull << pte);
+ rsvd_check->bad_mt_xwr |= (1ull << pte);
}
}
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context, bool execonly)
+{
+ __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
+ cpuid_maxphyaddr(vcpu), execonly);
+}
+
+/*
+ * the page table on host is the shadow page table for the page
+ * table in guest or amd nested guest, its mmu features completely
+ * follow the features in guest.
+ */
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+{
+ /*
+ * Passing "true" to the last argument is okay; it adds a check
+ * on bit 8 of the SPTEs which KVM doesn't use anyway.
+ */
+ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+ boot_cpu_data.x86_phys_bits,
+ context->shadow_root_level, context->nx,
+ guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
+ true);
+}
+EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
+
+static inline bool boot_cpu_is_amd(void)
+{
+ WARN_ON_ONCE(!tdp_enabled);
+ return shadow_x_mask == 0;
+}
+
+/*
+ * the direct page table on host, use as much mmu features as
+ * possible, however, kvm currently does not do execution-protection.
+ */
+static void
+reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
+{
+ if (boot_cpu_is_amd())
+ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
+ boot_cpu_data.x86_phys_bits,
+ context->shadow_root_level, false,
+ cpu_has_gbpages, true, true);
+ else
+ __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+ boot_cpu_data.x86_phys_bits,
+ false);
+
+}
+
+/*
+ * as the comments in reset_shadow_zero_bits_mask() except it
+ * is the shadow page table for intel nested guest.
+ */
+static void
+reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context, bool execonly)
+{
+ __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
+ boot_cpu_data.x86_phys_bits, execonly);
+}
+
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
@@ -3844,6 +3977,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
update_permission_bitmask(vcpu, context, false);
update_last_pte_bitmap(vcpu, context);
+ reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
@@ -3871,6 +4005,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
context->base_role.smm = is_smm(vcpu);
+ reset_shadow_zero_bits_mask(vcpu, context);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
@@ -3894,6 +4029,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
update_permission_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+ reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
@@ -4815,28 +4951,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
return nr_mmu_pages;
}
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
-{
- struct kvm_shadow_walk_iterator iterator;
- u64 spte;
- int nr_sptes = 0;
-
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
- return nr_sptes;
-
- walk_shadow_page_lockless_begin(vcpu);
- for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
- sptes[iterator.level-1] = spte;
- nr_sptes++;
- if (!is_shadow_present_pte(spte))
- break;
- }
- walk_shadow_page_lockless_end(vcpu);
-
- return nr_sptes;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
-
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_mmu_unload(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 398d21c0f..e4202e41d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e)
return ((1ULL << (e - s + 1)) - 1) << s;
}
-int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
+void
+reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+
/*
* Return values of handle_mmio_page_fault_common:
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0f67d7e24..736e6ab87 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
*access &= mask;
}
-static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
-{
- int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
-
- return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
- ((mmu->bad_mt_xwr & (1ull << low6)) != 0);
-}
-
static inline int FNAME(is_present_gpte)(unsigned long pte)
{
#if PTTYPE != PTTYPE_EPT
@@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
u64 gpte)
{
- if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
goto no_present;
if (!FNAME(is_present_gpte)(gpte))
@@ -353,8 +345,7 @@ retry_walk:
if (unlikely(!FNAME(is_present_gpte)(pte)))
goto error;
- if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte,
- walker->level))) {
+ if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 886aa25a7..39b91127e 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* MSR_K7_PERFCTRn */
pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
if (pmc) {
- if (!msr_info->host_initiated)
- data = (s64)data;
pmc->counter += data - pmc_read_counter(pmc);
return 0;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2d32b67a1..d7f89387b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -202,6 +202,7 @@ module_param(npt, int, S_IRUGO);
static int nested = true;
module_param(nested, int, S_IRUGO);
+static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1080,12 +1081,12 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
u64 tsc;
- tsc = svm_scale_tsc(vcpu, native_read_tsc());
+ tsc = svm_scale_tsc(vcpu, rdtsc());
return target_tsc - tsc;
}
-static void init_vmcb(struct vcpu_svm *svm, bool init_event)
+static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
@@ -1106,6 +1107,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR);
+ set_exception_intercept(svm, AC_VECTOR);
set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI);
@@ -1156,8 +1158,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- if (!init_event)
- svm_set_efer(&svm->vcpu, 0);
+ svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, 2);
save->rip = 0x0000fff0;
@@ -1167,7 +1168,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
* It also updates the guest-visible cr0 value.
*/
- (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+ svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
kvm_mmu_reset_context(&svm->vcpu);
save->cr4 = X86_CR4_PAE;
@@ -1211,7 +1212,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
}
- init_vmcb(svm, init_event);
+ init_vmcb(svm);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
@@ -1267,7 +1268,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- init_vmcb(svm, false);
+ init_vmcb(svm);
svm_init_osvw(&svm->vcpu);
@@ -1795,6 +1796,12 @@ static int ud_interception(struct vcpu_svm *svm)
return 1;
}
+static int ac_interception(struct vcpu_svm *svm)
+{
+ kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
+ return 1;
+}
+
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1889,7 +1896,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
* so reinitialize it.
*/
clear_page(svm->vmcb);
- init_vmcb(svm, false);
+ init_vmcb(svm);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
@@ -2014,6 +2021,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
@@ -3080,7 +3088,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_IA32_TSC: {
msr_info->data = svm->vmcb->control.tsc_offset +
- svm_scale_tsc(vcpu, native_read_tsc());
+ svm_scale_tsc(vcpu, rdtsc());
break;
}
@@ -3369,6 +3377,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
+ [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = nop_on_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aa9e82295..343d3692d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1264,7 +1264,7 @@ static void vmcs_load(struct vmcs *vmcs)
vmcs, phys_addr);
}
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* This bitmap is used to indicate whether the vmclear
* operation is enabled on all cpus. All disabled by
@@ -1302,7 +1302,7 @@ static void crash_vmclear_local_loaded_vmcss(void)
#else
static inline void crash_enable_local_vmclear(int cpu) { }
static inline void crash_disable_local_vmclear(int cpu) { }
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
static void __loaded_vmcs_clear(void *arg)
{
@@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
- (1u << NM_VECTOR) | (1u << DB_VECTOR);
+ (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void)
{
u64 host_tsc, tsc_offset;
- rdtscll(host_tsc);
+ host_tsc = rdtsc();
tsc_offset = vmcs_read64(TSC_OFFSET);
return host_tsc + tsc_offset;
}
@@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
- return target_tsc - native_read_tsc();
+ return target_tsc - rdtsc();
}
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
@@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
#endif
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
- CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
- CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
- CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
+ CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
+ CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
/*
* We can allow some features even when not supported by the
* hardware. For example, L1 can specify an MSR bitmap - and we
@@ -3150,7 +3150,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
struct page *pages;
struct vmcs *vmcs;
- pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order);
+ pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
if (!pages)
return NULL;
vmcs = page_address(pages);
@@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
vmx_segment_cache_clear(to_vmx(vcpu));
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
- if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+ if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
pr_debug_ratelimited("%s: tss fixup for long mode. \n",
__func__);
vmcs_write32(GUEST_TR_AR_BYTES,
- (guest_tr_ar & ~AR_TYPE_MASK)
- | AR_TYPE_BUSY_64_TSS);
+ (guest_tr_ar & ~VMX_AR_TYPE_MASK)
+ | VMX_AR_TYPE_BUSY_64_TSS);
}
vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
}
@@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu)
return 0;
else {
int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
- return AR_DPL(ar);
+ return VMX_AR_DPL(ar);
}
}
@@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
if (cs.unusable)
return false;
- if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+ if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
return false;
if (!cs.s)
return false;
- if (cs.type & AR_TYPE_WRITEABLE_MASK) {
+ if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
if (cs.dpl > cs_rpl)
return false;
} else {
@@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
return false;
if (!var.present)
return false;
- if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+ if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
if (var.dpl < rpl) /* DPL < RPL */
return false;
}
@@ -4105,17 +4105,13 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
goto out;
- kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (r)
goto out;
@@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm)
{
/* Called with kvm->slots_lock held. */
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
BUG_ON(kvm->arch.ept_identity_pagetable_done);
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ kvm->arch.ept_identity_map_addr, PAGE_SIZE);
return r;
}
@@ -4780,8 +4771,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx_set_cr0(vcpu, cr0); /* enter rmode */
vmx->vcpu.arch.cr0 = cr0;
vmx_set_cr4(vcpu, 0);
- if (!init_event)
- vmx_set_efer(vcpu, 0);
+ vmx_set_efer(vcpu, 0);
vmx_fpu_activate(vcpu);
update_exception_bitmap(vcpu);
@@ -4949,14 +4939,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
- struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
- .guest_phys_addr = addr,
- .memory_size = PAGE_SIZE * 3,
- .flags = 0,
- };
- ret = x86_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
@@ -5118,6 +5103,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return handle_rmode_exception(vcpu, ex_no, error_code);
switch (ex_no) {
+ case AC_VECTOR:
+ kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+ return 1;
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
@@ -5759,73 +5747,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
}
-static u64 ept_rsvd_mask(u64 spte, int level)
-{
- int i;
- u64 mask = 0;
-
- for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
- mask |= (1ULL << i);
-
- if (level == 4)
- /* bits 7:3 reserved */
- mask |= 0xf8;
- else if (spte & (1ULL << 7))
- /*
- * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
- * level == 1 if the hypervisor is using the ignored bit 7.
- */
- mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
- else if (level > 1)
- /* bits 6:3 reserved */
- mask |= 0x78;
-
- return mask;
-}
-
-static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
- int level)
-{
- printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
-
- /* 010b (write-only) */
- WARN_ON((spte & 0x7) == 0x2);
-
- /* 110b (write/execute) */
- WARN_ON((spte & 0x7) == 0x6);
-
- /* 100b (execute-only) and value not supported by logical processor */
- if (!cpu_has_vmx_ept_execute_only())
- WARN_ON((spte & 0x7) == 0x4);
-
- /* not 000b */
- if ((spte & 0x7)) {
- u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
-
- if (rsvd_bits != 0) {
- printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n",
- __func__, rsvd_bits);
- WARN_ON(1);
- }
-
- /* bits 5:3 are _not_ reserved for large page or leaf page */
- if ((rsvd_bits & 0x38) == 0) {
- u64 ept_mem_type = (spte & 0x38) >> 3;
-
- if (ept_mem_type == 2 || ept_mem_type == 3 ||
- ept_mem_type == 7) {
- printk(KERN_ERR "%s: ept_mem_type=0x%llx\n",
- __func__, ept_mem_type);
- WARN_ON(1);
- }
- }
- }
-}
-
static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
- u64 sptes[4];
- int nr_sptes, i, ret;
+ int ret;
gpa_t gpa;
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -5846,13 +5770,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
return 1;
/* It is the real ept misconfig */
- printk(KERN_ERR "EPT: Misconfiguration.\n");
- printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
-
- nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes);
-
- for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
- ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
+ WARN_ON(1);
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
@@ -6248,6 +6166,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu)
return handle_nop(vcpu);
}
+static int handle_monitor_trap(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
static int handle_monitor(struct kvm_vcpu *vcpu)
{
printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
@@ -6410,8 +6333,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
*/
static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
unsigned long exit_qualification,
- u32 vmx_instruction_info, gva_t *ret)
+ u32 vmx_instruction_info, bool wr, gva_t *ret)
{
+ gva_t off;
+ bool exn;
+ struct kvm_segment s;
+
/*
* According to Vol. 3B, "Information for VM Exits Due to Instruction
* Execution", on an exit, vmx_instruction_info holds most of the
@@ -6436,22 +6363,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
/* Addr = segment_base + offset */
/* offset = base + [index * scale] + displacement */
- *ret = vmx_get_segment_base(vcpu, seg_reg);
+ off = exit_qualification; /* holds the displacement */
if (base_is_valid)
- *ret += kvm_register_read(vcpu, base_reg);
+ off += kvm_register_read(vcpu, base_reg);
if (index_is_valid)
- *ret += kvm_register_read(vcpu, index_reg)<<scaling;
- *ret += exit_qualification; /* holds the displacement */
+ off += kvm_register_read(vcpu, index_reg)<<scaling;
+ vmx_get_segment(vcpu, &s, seg_reg);
+ *ret = s.base + off;
if (addr_size == 1) /* 32 bit */
*ret &= 0xffffffff;
- /*
- * TODO: throw #GP (and return 1) in various cases that the VM*
- * instructions require it - e.g., offset beyond segment limit,
- * unusable or unreadable/unwritable segment, non-canonical 64-bit
- * address, and so on. Currently these are not checked.
- */
+ /* Checks for #GP/#SS exceptions. */
+ exn = false;
+ if (is_protmode(vcpu)) {
+ /* Protected mode: apply checks for segment validity in the
+ * following order:
+ * - segment type check (#GP(0) may be thrown)
+ * - usability check (#GP(0)/#SS(0))
+ * - limit check (#GP(0)/#SS(0))
+ */
+ if (wr)
+ /* #GP(0) if the destination operand is located in a
+ * read-only data segment or any code segment.
+ */
+ exn = ((s.type & 0xa) == 0 || (s.type & 8));
+ else
+ /* #GP(0) if the source operand is located in an
+ * execute-only code segment
+ */
+ exn = ((s.type & 0xa) == 8);
+ }
+ if (exn) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
+ if (is_long_mode(vcpu)) {
+ /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+ * non-canonical form. This is an only check for long mode.
+ */
+ exn = is_noncanonical_address(*ret);
+ } else if (is_protmode(vcpu)) {
+ /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
+ */
+ exn = (s.unusable != 0);
+ /* Protected mode: #GP(0)/#SS(0) if the memory
+ * operand is outside the segment limit.
+ */
+ exn = exn || (off + sizeof(u64) > s.limit);
+ }
+ if (exn) {
+ kvm_queue_exception_e(vcpu,
+ seg_reg == VCPU_SREG_SS ?
+ SS_VECTOR : GP_VECTOR,
+ 0);
+ return 1;
+ }
+
return 0;
}
@@ -6473,7 +6441,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
int maxphyaddr = cpuid_maxphyaddr(vcpu);
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
+ vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr,
@@ -7001,7 +6969,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
field_value);
} else {
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, &gva))
+ vmx_instruction_info, true, &gva))
return 1;
/* _system ok, as nested_vmx_check_permission verified cpl=0 */
kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
@@ -7038,7 +7006,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
(((vmx_instruction_info) >> 3) & 0xf));
else {
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, &gva))
+ vmx_instruction_info, false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
&field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
@@ -7130,7 +7098,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, &vmcs_gva))
+ vmx_instruction_info, true, &vmcs_gva))
return 1;
/* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
@@ -7186,7 +7154,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
- vmx_instruction_info, &gva))
+ vmx_instruction_info, false, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
sizeof(operand), &e)) {
@@ -7284,6 +7252,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
+ [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
[EXIT_REASON_INVEPT] = handle_invept,
[EXIT_REASON_INVVPID] = handle_invvpid,
@@ -7544,6 +7513,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return true;
case EXIT_REASON_MWAIT_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
+ case EXIT_REASON_MONITOR_TRAP_FLAG:
+ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
case EXIT_REASON_MONITOR_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
case EXIT_REASON_PAUSE_INSTRUCTION:
@@ -10435,7 +10406,7 @@ static int __init vmx_init(void)
if (r)
return r;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
@@ -10445,7 +10416,7 @@ static int __init vmx_init(void)
static void __exit vmx_exit(void)
{
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 373328b71..43609af03 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
#include "cpuid.h"
#include "assigned-dev.h"
#include "pmu.h"
+#include "hyperv.h"
#include <linux/clocksource.h>
#include <linux/interrupt.h>
@@ -148,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "nmi_window", VCPU_STAT(nmi_window_exits) },
{ "halt_exits", VCPU_STAT(halt_exits) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+ { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
@@ -221,11 +223,9 @@ static void shared_msr_update(unsigned slot, u32 msr)
void kvm_define_shared_msr(unsigned slot, u32 msr)
{
BUG_ON(slot >= KVM_NR_SHARED_MSRS);
+ shared_msrs_global.msrs[slot] = msr;
if (slot >= shared_msrs_global.nr)
shared_msrs_global.nr = slot + 1;
- shared_msrs_global.msrs[slot] = msr;
- /* we need ensured the shared_msr_global have been updated */
- smp_wmb();
}
EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
@@ -526,7 +526,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
}
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
if (is_present_gpte(pdpte[i]) &&
- (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
+ (pdpte[i] &
+ vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
ret = 0;
goto out;
}
@@ -621,7 +622,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if ((cr0 ^ old_cr0) & update_bits)
kvm_mmu_reset_context(vcpu);
- if ((cr0 ^ old_cr0) & X86_CR0_CD)
+ if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+ kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+ !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
return 0;
@@ -949,6 +952,8 @@ static u32 emulated_msrs[] = {
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
+ HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
+ HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
@@ -1217,11 +1222,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
__func__, base_khz, scaled_khz, shift, *pmultiplier);
}
-static inline u64 get_kernel_ns(void)
-{
- return ktime_get_boot_ns();
-}
-
#ifdef CONFIG_X86_64
static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
#endif
@@ -1444,20 +1444,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
static cycle_t read_tsc(void)
{
- cycle_t ret;
- u64 last;
-
- /*
- * Empirically, a fence (of type that depends on the CPU)
- * before rdtsc is enough to ensure that rdtsc is ordered
- * with respect to loads. The various CPU manuals are unclear
- * as to whether rdtsc can be reordered with later loads,
- * but no one has ever seen it happen.
- */
- rdtsc_barrier();
- ret = (cycle_t)vget_cycles();
-
- last = pvclock_gtod_data.clock.cycle_last;
+ cycle_t ret = (cycle_t)rdtsc_ordered();
+ u64 last = pvclock_gtod_data.clock.cycle_last;
if (likely(ret >= last))
return ret;
@@ -1646,7 +1634,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
return 1;
}
if (!use_master_clock) {
- host_tsc = native_read_tsc();
+ host_tsc = rdtsc();
kernel_ns = get_kernel_ns();
}
@@ -1722,8 +1710,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->pvclock_set_guest_stopped_request = false;
}
- pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
-
/* If the host uses TSC clocksource, then it is stable */
if (use_master_clock)
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -1869,123 +1855,6 @@ out:
return r;
}
-static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
-{
- return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
-}
-
-static bool kvm_hv_msr_partition_wide(u32 msr)
-{
- bool r = false;
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- case HV_X64_MSR_HYPERCALL:
- case HV_X64_MSR_REFERENCE_TSC:
- case HV_X64_MSR_TIME_REF_COUNT:
- r = true;
- break;
- }
-
- return r;
-}
-
-static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
- struct kvm *kvm = vcpu->kvm;
-
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- kvm->arch.hv_guest_os_id = data;
- /* setting guest os id to zero disables hypercall page */
- if (!kvm->arch.hv_guest_os_id)
- kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
- break;
- case HV_X64_MSR_HYPERCALL: {
- u64 gfn;
- unsigned long addr;
- u8 instructions[4];
-
- /* if guest os id is not set hypercall should remain disabled */
- if (!kvm->arch.hv_guest_os_id)
- break;
- if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
- kvm->arch.hv_hypercall = data;
- break;
- }
- gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return 1;
- kvm_x86_ops->patch_hypercall(vcpu, instructions);
- ((unsigned char *)instructions)[3] = 0xc3; /* ret */
- if (__copy_to_user((void __user *)addr, instructions, 4))
- return 1;
- kvm->arch.hv_hypercall = data;
- mark_page_dirty(kvm, gfn);
- break;
- }
- case HV_X64_MSR_REFERENCE_TSC: {
- u64 gfn;
- HV_REFERENCE_TSC_PAGE tsc_ref;
- memset(&tsc_ref, 0, sizeof(tsc_ref));
- kvm->arch.hv_tsc_page = data;
- if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
- break;
- gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
- if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
- &tsc_ref, sizeof(tsc_ref)))
- return 1;
- mark_page_dirty(kvm, gfn);
- break;
- }
- default:
- vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
- "data 0x%llx\n", msr, data);
- return 1;
- }
- return 0;
-}
-
-static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
- switch (msr) {
- case HV_X64_MSR_APIC_ASSIST_PAGE: {
- u64 gfn;
- unsigned long addr;
-
- if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
- vcpu->arch.hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0))
- return 1;
- break;
- }
- gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
- addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
- if (kvm_is_error_hva(addr))
- return 1;
- if (__clear_user((void __user *)addr, PAGE_SIZE))
- return 1;
- vcpu->arch.hv_vapic = data;
- kvm_vcpu_mark_page_dirty(vcpu, gfn);
- if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
- return 1;
- break;
- }
- case HV_X64_MSR_EOI:
- return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
- case HV_X64_MSR_ICR:
- return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
- case HV_X64_MSR_TPR:
- return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
- default:
- vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
- "data 0x%llx\n", msr, data);
- return 1;
- }
-
- return 0;
-}
-
static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{
gpa_t gpa = data & ~0x3f;
@@ -2138,8 +2007,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&vcpu->requests);
ka->boot_vcpu_runs_old_kvmclock = tmp;
-
- ka->kvmclock_offset = -get_kernel_ns();
}
vcpu->arch.time = data;
@@ -2224,15 +2091,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
- if (kvm_hv_msr_partition_wide(msr)) {
- int r;
- mutex_lock(&vcpu->kvm->lock);
- r = set_msr_hyperv_pw(vcpu, msr, data);
- mutex_unlock(&vcpu->kvm->lock);
- return r;
- } else
- return set_msr_hyperv(vcpu, msr, data);
- break;
+ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+ case HV_X64_MSR_CRASH_CTL:
+ return kvm_hv_set_msr_common(vcpu, msr, data,
+ msr_info->host_initiated);
case MSR_IA32_BBL_CR_CTL3:
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
@@ -2315,68 +2177,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 0;
}
-static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- u64 data = 0;
- struct kvm *kvm = vcpu->kvm;
-
- switch (msr) {
- case HV_X64_MSR_GUEST_OS_ID:
- data = kvm->arch.hv_guest_os_id;
- break;
- case HV_X64_MSR_HYPERCALL:
- data = kvm->arch.hv_hypercall;
- break;
- case HV_X64_MSR_TIME_REF_COUNT: {
- data =
- div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
- break;
- }
- case HV_X64_MSR_REFERENCE_TSC:
- data = kvm->arch.hv_tsc_page;
- break;
- default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
- return 1;
- }
-
- *pdata = data;
- return 0;
-}
-
-static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
- u64 data = 0;
-
- switch (msr) {
- case HV_X64_MSR_VP_INDEX: {
- int r;
- struct kvm_vcpu *v;
- kvm_for_each_vcpu(r, v, vcpu->kvm) {
- if (v == vcpu) {
- data = r;
- break;
- }
- }
- break;
- }
- case HV_X64_MSR_EOI:
- return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
- case HV_X64_MSR_ICR:
- return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
- case HV_X64_MSR_TPR:
- return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
- case HV_X64_MSR_APIC_ASSIST_PAGE:
- data = vcpu->arch.hv_vapic;
- break;
- default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
- return 1;
- }
- *pdata = data;
- return 0;
-}
-
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
switch (msr_info->index) {
@@ -2495,14 +2295,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x20000000;
break;
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
- if (kvm_hv_msr_partition_wide(msr_info->index)) {
- int r;
- mutex_lock(&vcpu->kvm->lock);
- r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data);
- mutex_unlock(&vcpu->kvm->lock);
- return r;
- } else
- return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data);
+ case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+ case HV_X64_MSR_CRASH_CTL:
+ return kvm_hv_get_msr_common(vcpu,
+ msr_info->index, &msr_info->data);
break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
@@ -2653,6 +2449,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_DISABLE_QUIRKS:
+ case KVM_CAP_SET_BOOT_CPU_ID:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -2812,7 +2609,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
- native_read_tsc() - vcpu->arch.last_host_tsc;
+ rdtsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
@@ -2840,7 +2637,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_host_tsc = native_read_tsc();
+ vcpu->arch.last_host_tsc = rdtsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -3819,30 +3616,25 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r) {
mutex_lock(&kvm->slots_lock);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
- &vpic->dev_master);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
- &vpic->dev_slave);
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
- &vpic->dev_eclr);
+ kvm_destroy_pic(vpic);
mutex_unlock(&kvm->slots_lock);
- kfree(vpic);
goto create_irqchip_unlock;
}
} else
goto create_irqchip_unlock;
- smp_wmb();
- kvm->arch.vpic = vpic;
- smp_wmb();
r = kvm_setup_default_irq_routing(kvm);
if (r) {
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->irq_lock);
kvm_ioapic_destroy(kvm);
- kvm_destroy_pic(kvm);
+ kvm_destroy_pic(vpic);
mutex_unlock(&kvm->irq_lock);
mutex_unlock(&kvm->slots_lock);
+ goto create_irqchip_unlock;
}
+ /* Write kvm->irq_routing before kvm->arch.vpic. */
+ smp_wmb();
+ kvm->arch.vpic = vpic;
create_irqchip_unlock:
mutex_unlock(&kvm->lock);
break;
@@ -3969,6 +3761,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_reinject(kvm, &control);
break;
}
+ case KVM_SET_BOOT_CPU_ID:
+ r = 0;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus) != 0)
+ r = -EBUSY;
+ else
+ kvm->arch.bsp_vcpu_id = arg;
+ mutex_unlock(&kvm->lock);
+ break;
case KVM_XEN_HVM_CONFIG: {
r = -EFAULT;
if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
@@ -4260,6 +4061,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
}
+static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr, void *val, unsigned int bytes)
+{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
+
+ return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
+}
+
int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val,
unsigned int bytes,
@@ -4995,6 +4805,7 @@ static const struct x86_emulate_ops emulate_ops = {
.write_gpr = emulator_write_gpr,
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
+ .read_phys = kvm_read_guest_phys_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
@@ -5884,66 +5695,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
-{
- u64 param, ingpa, outgpa, ret;
- uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
- bool fast, longmode;
-
- /*
- * hypercall generates UD from non zero cpl and real mode
- * per HYPER-V spec
- */
- if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 0;
- }
-
- longmode = is_64_bit_mode(vcpu);
-
- if (!longmode) {
- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
- }
-#ifdef CONFIG_X86_64
- else {
- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
- }
-#endif
-
- code = param & 0xffff;
- fast = (param >> 16) & 0x1;
- rep_cnt = (param >> 32) & 0xfff;
- rep_idx = (param >> 48) & 0xfff;
-
- trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
-
- switch (code) {
- case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
- kvm_vcpu_on_spin(vcpu);
- break;
- default:
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
- break;
- }
-
- ret = res | (((u64)rep_done & 0xfff) << 32);
- if (longmode) {
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
- } else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
- }
-
- return 1;
-}
-
/*
* kvm_pv_kick_cpu_op: Kick a vcpu.
*
@@ -6203,6 +5954,7 @@ static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
}
+#ifdef CONFIG_X86_64
static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
{
struct kvm_segment seg;
@@ -6218,6 +5970,7 @@ static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
put_smstate(u32, buf, offset + 4, seg.limit);
put_smstate(u64, buf, offset + 8, seg.base);
}
+#endif
static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
{
@@ -6520,6 +6273,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
+ if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
+ r = 0;
+ goto out;
+ }
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6629,7 +6388,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
hw_breakpoint_restore();
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
- native_read_tsc());
+ rdtsc());
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
@@ -7443,7 +7202,7 @@ int kvm_arch_hardware_enable(void)
if (ret != 0)
return ret;
- local_tsc = native_read_tsc();
+ local_tsc = rdtsc();
stable = !check_tsc_unstable();
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -7547,6 +7306,17 @@ void kvm_arch_check_processor_compat(void *rtn)
kvm_x86_ops->check_processor_compatibility(rtn);
}
+bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
+
+bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
{
return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
@@ -7721,34 +7491,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
+ unsigned long hva;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ return -EINVAL;
+
+ slot = id_to_memslot(slots, id);
+ if (size) {
+ if (WARN_ON(slot->npages))
+ return -EEXIST;
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *)hva))
+ return PTR_ERR((void *)hva);
+ } else {
+ if (!slot->npages)
+ return 0;
+
+ hva = 0;
+ }
+
+ old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m = *mem;
+ struct kvm_userspace_memory_region m;
- m.slot |= i << 16;
+ m.slot = id | (i << 16);
+ m.flags = 0;
+ m.guest_phys_addr = gpa;
+ m.userspace_addr = hva;
+ m.memory_size = size;
r = __kvm_set_memory_region(kvm, &m);
if (r < 0)
return r;
}
+ if (!size) {
+ r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ WARN_ON(r < 0);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int r;
mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, mem);
+ r = __x86_set_memory_region(kvm, id, gpa, size);
mutex_unlock(&kvm->slots_lock);
return r;
@@ -7763,16 +7565,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- struct kvm_userspace_memory_region mem;
- memset(&mem, 0, sizeof(mem));
- mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = TSS_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7875,27 +7670,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /*
- * Only private memory slots need to be mapped here since
- * KVM_SET_MEMORY_REGION ioctl is no longer supported.
- */
- if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
- unsigned long userspace_addr;
-
- /*
- * MAP_SHARED to prevent internal slot pages from being moved
- * by fork()/COW.
- */
- userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, 0);
-
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
-
- memslot->userspace_addr = userspace_addr;
- }
-
return 0;
}
@@ -7957,17 +7731,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int nr_mmu_pages = 0;
- if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
- int ret;
-
- ret = vm_munmap(old->userspace_addr,
- old->npages * PAGE_SIZE);
- if (ret < 0)
- printk(KERN_WARNING
- "kvm_vm_ioctl_set_memory_region: "
- "failed to munmap memory\n");
- }
-
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 0ca2f3e48..2f822cd88 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -147,6 +147,11 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
return kvm_register_write(vcpu, reg, val);
}
+static inline u64 get_kernel_ns(void)
+{
+ return ktime_get_boot_ns();
+}
+
static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
{
return !(kvm->arch.disabled_quirks & quirk);