Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Marcelo Tosatti:
 "KVM bug fixes (ARM and x86)"

* git://git.kernel.org/pub/scm/virt/kvm/kvm:
  arm/arm64: KVM: Keep elrsr/aisr in sync with software model
  KVM: VMX: Set msr bitmap correctly if vcpu is in guest mode
  arm/arm64: KVM: fix missing unlock on error in kvm_vgic_create()
  kvm: x86: i8259: return initialized data on invalid-size read
  arm64: KVM: Fix outdated comment about VTCR_EL2.PS
  arm64: KVM: Do not use pgd_index to index stage-2 pgd
  arm64: KVM: Fix stage-2 PGD allocation to have per-page refcounting
  kvm: move advertising of KVM_CAP_IRQFD to common code
  • Loading branch information
torvalds committed Mar 17, 2015
2 parents ab676b7 + f710a12 commit 2fc6775
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 81 deletions.
13 changes: 6 additions & 7 deletions arch/arm/include/asm/kvm_mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})

#define kvm_pgd_index(addr) pgd_index(addr)

static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}


#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)

#define KVM_PREALLOC_LEVEL 0

static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
return 0;
return kvm->arch.pgd;
}

static inline void kvm_free_hwpgd(struct kvm *kvm) { }

static inline void *kvm_get_hwpgd(struct kvm *kvm)
static inline unsigned int kvm_get_hwpgd_size(void)
{
return kvm->arch.pgd;
return PTRS_PER_S2_PGD * sizeof(pgd_t);
}

struct kvm;
Expand Down
75 changes: 53 additions & 22 deletions arch/arm/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;

pgd = pgdp + pgd_index(addr);
pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
Expand Down Expand Up @@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;

pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
Expand Down Expand Up @@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}

/* Free the HW pgd, one page at a time */
static void kvm_free_hwpgd(void *hwpgd)
{
free_pages_exact(hwpgd, kvm_get_hwpgd_size());
}

/* Allocate the HW PGD, making sure that each page gets its own refcount */
static void *kvm_alloc_hwpgd(void)
{
unsigned int size = kvm_get_hwpgd_size();

return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
}

/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
Expand All @@ -645,46 +659,64 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
int ret;
pgd_t *pgd;
void *hwpgd;

if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}

hwpgd = kvm_alloc_hwpgd();
if (!hwpgd)
return -ENOMEM;

/* When the kernel uses more levels of page tables than the
* guest, we allocate a fake PGD and pre-populate it to point
* to the next-level page table, which will be the real
* initial page table pointed to by the VTTBR.
*
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for
* the PMD and the kernel will use folded pud.
* When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
* pages.
*/
if (KVM_PREALLOC_LEVEL > 0) {
int i;

/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
* alignment requirement for this allocation.
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);

if (!pgd) {
kvm_free_hwpgd(hwpgd);
return -ENOMEM;
}

/* Plug the HW PGD into the fake one. */
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
if (KVM_PREALLOC_LEVEL == 1)
pgd_populate(NULL, pgd + i,
(pud_t *)hwpgd + i * PTRS_PER_PUD);
else if (KVM_PREALLOC_LEVEL == 2)
pud_populate(NULL, pud_offset(pgd, 0) + i,
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
}
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
pgd = (pgd_t *)hwpgd;
}

if (!pgd)
return -ENOMEM;

ret = kvm_prealloc_hwpgd(kvm, pgd);
if (ret)
goto out_err;

kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
out_err:
if (KVM_PREALLOC_LEVEL > 0)
kfree(pgd);
else
free_pages((unsigned long)pgd, S2_PGD_ORDER);
return ret;
}

/**
Expand Down Expand Up @@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;

unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
kvm_free_hwpgd(kvm);
kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
else
free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);

kvm->arch.pgd = NULL;
}

Expand All @@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;

pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
Expand Down Expand Up @@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;

pgd = kvm->arch.pgd + pgd_index(addr);
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
Expand Down
5 changes: 3 additions & 2 deletions arch/arm64/include/asm/kvm_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
* (see hyp-init.S).
*
* Note that when using 4K pages, we concatenate two first level page tables
* together.
*
Expand All @@ -138,7 +141,6 @@
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
* 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
Expand All @@ -150,7 +152,6 @@
#else
/*
* Stage2 translation configuration:
* 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
Expand Down
48 changes: 6 additions & 42 deletions arch/arm64/include/asm/kvm_mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))

#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))

/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
Expand All @@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define KVM_PREALLOC_LEVEL (0)
#endif

/**
* kvm_prealloc_hwpgd - allocate inital table for VTTBR
* @kvm: The KVM struct pointer for the VM.
* @pgd: The kernel pseudo pgd
*
* When the kernel uses more levels of page tables than the guest, we allocate
* a fake PGD and pre-populate it to point to the next-level page table, which
* will be the real initial page table pointed to by the VTTBR.
*
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
* allocate 2 consecutive PUD pages.
*/
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
{
unsigned int i;
unsigned long hwpgd;

if (KVM_PREALLOC_LEVEL == 0)
return 0;

hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
if (!hwpgd)
return -ENOMEM;

for (i = 0; i < PTRS_PER_S2_PGD; i++) {
if (KVM_PREALLOC_LEVEL == 1)
pgd_populate(NULL, pgd + i,
(pud_t *)hwpgd + i * PTRS_PER_PUD);
else if (KVM_PREALLOC_LEVEL == 2)
pud_populate(NULL, pud_offset(pgd, 0) + i,
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
}

return 0;
}

static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
Expand All @@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
return pmd_offset(pud, 0);
}

static inline void kvm_free_hwpgd(struct kvm *kvm)
static inline unsigned int kvm_get_hwpgd_size(void)
{
if (KVM_PREALLOC_LEVEL > 0) {
unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
}
if (KVM_PREALLOC_LEVEL > 0)
return PTRS_PER_S2_PGD * PAGE_SIZE;
return PTRS_PER_S2_PGD * sizeof(pgd_t);
}

static inline bool kvm_page_empty(void *ptr)
Expand Down
1 change: 0 additions & 1 deletion arch/s390/kvm/kvm-s390.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/i8259.c
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
return -EOPNOTSUPP;

if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
Expand Down
11 changes: 7 additions & 4 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;

if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
if (is_guest_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_nested;
else if (irqchip_in_kernel(vcpu->kvm) &&
apic_x2apic_mode(vcpu->arch.apic)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
Expand Down Expand Up @@ -9218,9 +9221,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}

if (cpu_has_vmx_msr_bitmap() &&
exec_control & CPU_BASED_USE_MSR_BITMAPS &&
nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
exec_control & CPU_BASED_USE_MSR_BITMAPS) {
nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
/* MSR_BITMAP will be set by following vmx_set_efer. */
} else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;

Expand Down
1 change: 0 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2744,7 +2744,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
case KVM_CAP_PIT2:
Expand Down
1 change: 1 addition & 0 deletions include/kvm/arm_vgic.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ struct vgic_ops {
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
void (*clear_eisr)(struct kvm_vcpu *vcpu);
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
void (*enable_underflow)(struct kvm_vcpu *vcpu);
void (*disable_underflow)(struct kvm_vcpu *vcpu);
Expand Down
8 changes: 8 additions & 0 deletions virt/kvm/arm/vgic-v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
else
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
}

static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
Expand All @@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
}

static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
{
vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
}

static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
Expand Down Expand Up @@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
.get_elrsr = vgic_v2_get_elrsr,
.get_eisr = vgic_v2_get_eisr,
.clear_eisr = vgic_v2_clear_eisr,
.get_interrupt_status = vgic_v2_get_interrupt_status,
.enable_underflow = vgic_v2_enable_underflow,
.disable_underflow = vgic_v2_disable_underflow,
Expand Down
8 changes: 8 additions & 0 deletions virt/kvm/arm/vgic-v3.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
{
if (!(lr_desc.state & LR_STATE_MASK))
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
else
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
}

static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
Expand All @@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
}

static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
{
vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
}

static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
{
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
Expand Down Expand Up @@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
.get_elrsr = vgic_v3_get_elrsr,
.get_eisr = vgic_v3_get_eisr,
.clear_eisr = vgic_v3_clear_eisr,
.get_interrupt_status = vgic_v3_get_interrupt_status,
.enable_underflow = vgic_v3_enable_underflow,
.disable_underflow = vgic_v3_disable_underflow,
Expand Down
Loading

0 comments on commit 2fc6775

Please sign in to comment.