Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull more kvm updates from Paolo Bonzini:

 - Xen timer fixes

 - Documentation formatting fixes

 - Make rseq selftest compatible with glibc-2.35

 - Fix handling of illegal LEA reg, reg

 - Cleanup creation of debugfs entries

 - Fix steal time cache handling bug

 - Fixes for MMIO caching

 - Optimize computation of number of LBRs

 - Fix uninitialized field in guest_maxphyaddr < host_maxphyaddr path

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits)
  KVM: x86/MMU: properly format KVM_CAP_VM_DISABLE_NX_HUGE_PAGES capability table
  Documentation: KVM: extend KVM_CAP_VM_DISABLE_NX_HUGE_PAGES heading underline
  KVM: VMX: Adjust number of LBR records for PERF_CAPABILITIES at refresh
  KVM: VMX: Use proper type-safe functions for vCPU => LBRs helpers
  KVM: x86: Refresh PMU after writes to MSR_IA32_PERF_CAPABILITIES
  KVM: selftests: Test all possible "invalid" PERF_CAPABILITIES.LBR_FMT vals
  KVM: selftests: Use getcpu() instead of sched_getcpu() in rseq_test
  KVM: selftests: Make rseq compatible with glibc-2.35
  KVM: Actually create debugfs in kvm_create_vm()
  KVM: Pass the name of the VM fd to kvm_create_vm_debugfs()
  KVM: Get an fd before creating the VM
  KVM: Shove vcpu stats_id init into kvm_vcpu_init()
  KVM: Shove vm stats_id init into kvm_create_vm()
  KVM: x86/mmu: Add sanity check that MMIO SPTE mask doesn't overlap gen
  KVM: x86/mmu: rename trace function name for asynchronous page fault
  KVM: x86/xen: Stop Xen timer before changing IRQ
  KVM: x86/xen: Initialize Xen timer only once
  KVM: SVM: Disable SEV-ES support if MMIO caching is disable
  KVM: x86/mmu: Fully re-evaluate MMIO caching when SPTE masks change
  KVM: x86: Tag kvm_mmu_x86_module_init() with __init
  ...
  • Loading branch information
torvalds committed Aug 11, 2022
2 parents 2ae08b3 + 19a7cc8 commit e18a904
Show file tree
Hide file tree
Showing 19 changed files with 221 additions and 129 deletions.
10 changes: 5 additions & 5 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8262,15 +8262,15 @@ dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
available and supports the `KVM_PV_DUMP_CPU` subcommand.

8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
---------------------------
-------------------------------------

:Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
:Architectures: x86
:Type: vm
:Parameters: arg[0] must be 0.
:Returns 0 on success, -EPERM if the userspace process does not
have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
created.
:Returns: 0 on success, -EPERM if the userspace process does not
have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
created.

This capability disables the NX huge pages mitigation for iTLB MULTIHIT.

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)

void kvm_mmu_x86_module_init(void);
void __init kvm_mmu_x86_module_init(void);
int kvm_mmu_vendor_module_init(void);
void kvm_mmu_vendor_module_exit(void);

Expand Down
6 changes: 5 additions & 1 deletion arch/x86/kvm/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -4578,6 +4578,10 @@ static const struct mode_dual mode_dual_63 = {
N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
};

static const struct instr_dual instr_dual_8d = {
D(DstReg | SrcMem | ModRM | NoAccess), N
};

static const struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
F6ALU(Lock, em_add),
Expand Down Expand Up @@ -4634,7 +4638,7 @@ static const struct opcode opcode_table[256] = {
I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
D(ModRM | SrcMem | NoAccess | DstReg),
ID(0, &instr_dual_8d),
I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
G(0, group1A),
/* 0x90 - 0x97 */
Expand Down
8 changes: 5 additions & 3 deletions arch/x86/kvm/lapic.c
Original file line number Diff line number Diff line change
Expand Up @@ -2284,10 +2284,12 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
struct kvm_lapic *apic = vcpu->arch.apic;
u64 val;

if (apic_x2apic_mode(apic))
kvm_lapic_msr_read(apic, offset, &val);
else
if (apic_x2apic_mode(apic)) {
if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
return;
} else {
val = kvm_lapic_get_reg(apic, offset);
}

/*
* ICR is a single 64-bit register when x2APIC is enabled. For legacy
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "kvm_cache_regs.h"
#include "cpuid.h"

extern bool __read_mostly enable_mmio_caching;

#define PT_WRITABLE_SHIFT 1
#define PT_USER_SHIFT 2

Expand Down
8 changes: 6 additions & 2 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -4164,7 +4164,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(fault->addr, fault->gfn);
if (kvm_find_async_pf_gfn(vcpu, fault->gfn)) {
trace_kvm_async_pf_doublefault(fault->addr, fault->gfn);
trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
return RET_PF_RETRY;
} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
Expand Down Expand Up @@ -6697,11 +6697,15 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
/*
* nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
* its default value of -1 is technically undefined behavior for a boolean.
* Forward the module init call to SPTE code so that it too can handle module
* params that need to be resolved/snapshot.
*/
void kvm_mmu_x86_module_init(void)
void __init kvm_mmu_x86_module_init(void)
{
if (nx_huge_pages == -1)
__set_nx_huge_pages(get_nx_auto_mode());

kvm_mmu_spte_module_init();
}

/*
Expand Down
28 changes: 28 additions & 0 deletions arch/x86/kvm/mmu/spte.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
#include <asm/vmx.h>

bool __read_mostly enable_mmio_caching = true;
static bool __ro_after_init allow_mmio_caching;
module_param_named(mmio_caching, enable_mmio_caching, bool, 0444);
EXPORT_SYMBOL_GPL(enable_mmio_caching);

u64 __read_mostly shadow_host_writable_mask;
u64 __read_mostly shadow_mmu_writable_mask;
Expand All @@ -43,6 +45,18 @@ u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;

u8 __read_mostly shadow_phys_bits;

void __init kvm_mmu_spte_module_init(void)
{
/*
* Snapshot userspace's desire to allow MMIO caching. Whether or not
* KVM can actually enable MMIO caching depends on vendor-specific
* hardware capabilities and other module params that can't be resolved
* until the vendor module is loaded, i.e. enable_mmio_caching can and
* will change when the vendor module is (re)loaded.
*/
allow_mmio_caching = enable_mmio_caching;
}

static u64 generation_mmio_spte_mask(u64 gen)
{
u64 mask;
Expand Down Expand Up @@ -340,9 +354,23 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
BUG_ON((u64)(unsigned)access_mask != access_mask);
WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);

/*
* Reset to the original module param value to honor userspace's desire
* to (dis)allow MMIO caching. Update the param itself so that
* userspace can see whether or not KVM is actually using MMIO caching.
*/
enable_mmio_caching = allow_mmio_caching;
if (!enable_mmio_caching)
mmio_value = 0;

/*
* The mask must contain only bits that are carved out specifically for
* the MMIO SPTE mask, e.g. to ensure there's no overlap with the MMIO
* generation.
*/
if (WARN_ON(mmio_mask & ~SPTE_MMIO_ALLOWED_MASK))
mmio_value = 0;

/*
* Disable MMIO caching if the MMIO value collides with the bits that
* are used to hold the relocated GFN when the L1TF mitigation is
Expand Down
17 changes: 15 additions & 2 deletions arch/x86/kvm/mmu/spte.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

#include "mmu_internal.h"

extern bool __read_mostly enable_mmio_caching;

/*
* A MMU present SPTE is backed by actual memory and may or may not be present
* in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
Expand Down Expand Up @@ -125,6 +123,20 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
static_assert(!(SPTE_MMU_PRESENT_MASK &
(MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));

/*
* The SPTE MMIO mask must NOT overlap the MMIO generation bits or the
* MMU-present bit. The generation obviously co-exists with the magic MMIO
* mask/value, and MMIO SPTEs are considered !MMU-present.
*
* The SPTE MMIO mask is allowed to use hardware "present" bits (i.e. all EPT
* RWX bits), all physical address bits (legal PA bits are used for "fast" MMIO
* and so they're off-limits for generation; additional checks ensure the mask
* doesn't overlap legal PA bits), and bit 63 (carved out for future usage).
*/
#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0))
static_assert(!(SPTE_MMIO_ALLOWED_MASK &
(SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));

#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)

Expand Down Expand Up @@ -450,6 +462,7 @@ static inline u64 restore_acc_track_spte(u64 spte)

u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);

void __init kvm_mmu_spte_module_init(void);
void kvm_mmu_reset_all_pte_masks(void);

#endif
10 changes: 10 additions & 0 deletions arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>

#include "mmu.h"
#include "x86.h"
#include "svm.h"
#include "svm_ops.h"
Expand Down Expand Up @@ -2221,6 +2222,15 @@ void __init sev_hardware_setup(void)
if (!sev_es_enabled)
goto out;

/*
* SEV-ES requires MMIO caching as KVM doesn't have access to the guest
* instruction stream, i.e. can't emulate in response to a #NPF and
* instead relies on #NPF(RSVD) being reflected into the guest as #VC
* (the guest can then do a #VMGEXIT to request MMIO emulation).
*/
if (!enable_mmio_caching)
goto out;

/* Does the CPU support SEV-ES? */
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
goto out;
Expand Down
9 changes: 6 additions & 3 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -5034,13 +5034,16 @@ static __init int svm_hardware_setup(void)
/* Setup shadow_me_value and shadow_me_mask */
kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);

/* Note, SEV setup consumes npt_enabled. */
svm_adjust_mmio_mask();

/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
* may be modified by svm_adjust_mmio_mask()).
*/
sev_hardware_setup();

svm_hv_hardware_setup();

svm_adjust_mmio_mask();

for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
Expand Down
12 changes: 3 additions & 9 deletions arch/x86/kvm/vmx/pmu_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,6 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
}

bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
{
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);

return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
}

static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
{
struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
Expand Down Expand Up @@ -592,15 +585,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
bitmap_set(pmu->all_valid_pmc_idx,
INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);

if (cpuid_model_is_consistent(vcpu))
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
if (cpuid_model_is_consistent(vcpu) &&
(perf_capabilities & PMU_CAP_LBR_FMT))
x86_perf_get_lbr(&lbr_desc->records);
else
lbr_desc->records.nr = 0;

if (lbr_desc->records.nr)
bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1);

perf_capabilities = vcpu_get_perf_capabilities(vcpu);
if (perf_capabilities & PERF_CAP_PEBS_FORMAT) {
if (perf_capabilities & PERF_CAP_PEBS_BASELINE) {
pmu->pebs_enable_mask = counter_mask;
Expand Down
29 changes: 20 additions & 9 deletions arch/x86/kvm/vmx/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <asm/kvm.h>
#include <asm/intel_pt.h>
#include <asm/perf_event.h>

#include "capabilities.h"
#include "../kvm_cache_regs.h"
Expand Down Expand Up @@ -104,15 +105,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
return pmu->version > 1;
}

#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)

void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);

int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);

struct lbr_desc {
/* Basic info about guest LBR records. */
struct x86_pmu_lbr records;
Expand Down Expand Up @@ -542,6 +534,25 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
}

static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
{
return &to_vmx(vcpu)->lbr_desc;
}

static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
{
return &vcpu_to_lbr_desc(vcpu)->records;
}

static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
{
return !!vcpu_to_lbr_records(vcpu)->nr;
}

void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);

static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
Expand Down
13 changes: 8 additions & 5 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -3413,6 +3413,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
u64 steal;
u32 version;

Expand All @@ -3430,13 +3431,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);

if (unlikely(slots->generation != ghc->generation ||
gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;

/* We rely on the fact that it fits in a single page. */
BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);

if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot)
return;
}
Expand Down Expand Up @@ -3545,9 +3545,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;

vcpu->arch.perf_capabilities = data;

kvm_pmu_refresh(vcpu);
return 0;
}
}
case MSR_EFER:
return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
Expand Down Expand Up @@ -4714,6 +4714,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
struct kvm_steal_time __user *st;
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;

/*
* The vCPU can be marked preempted if and only if the VM-Exit was on
Expand Down Expand Up @@ -4741,6 +4742,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
slots = kvm_memslots(vcpu->kvm);

if (unlikely(slots->generation != ghc->generation ||
gpa != ghc->gpa ||
kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;

Expand Down Expand Up @@ -13019,6 +13021,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
fault.error_code = error_code;
fault.nested_page_fault = false;
fault.address = gva;
fault.async_page_fault = false;
}
vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
}
Expand Down
Loading

0 comments on commit e18a904

Please sign in to comment.