Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull KVM changes from Paolo Bonzini:
 "Here are the 3.13 KVM changes.  There was a lot of work on the PPC
  side: the HV and emulation flavors can now coexist in a single kernel
  is probably the most interesting change from a user point of view.

  On the x86 side there are nested virtualization improvements and a few
  bugfixes.

  ARM got transparent huge page support, improved overcommit, and
  support for big endian guests.

  Finally, there is a new interface to connect KVM with VFIO.  This
  helps with devices that use NoSnoop PCI transactions, letting the
  driver in the guest execute WBINVD instructions.  This includes some
  nVidia cards on Windows, that fail to start without these patches and
  the corresponding userspace changes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits)
  kvm, vmx: Fix lazy FPU on nested guest
  arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu
  arm/arm64: KVM: MMIO support for BE guest
  kvm, cpuid: Fix sparse warning
  kvm: Delete prototype for non-existent function kvm_check_iopl
  kvm: Delete prototype for non-existent function complete_pio
  hung_task: add method to reset detector
  pvclock: detect watchdog reset at pvclock read
  kvm: optimize out smp_mb after srcu_read_unlock
  srcu: API for barrier after srcu read unlock
  KVM: remove vm mmap method
  KVM: IOMMU: hva align mapping page size
  KVM: x86: trace cpuid emulation when called from emulator
  KVM: emulator: cleanup decode_register_operand() a bit
  KVM: emulator: check rex prefix inside decode_register()
  KVM: x86: fix emulation of "movzbl %bpl, %eax"
  kvm_host: typo fix
  KVM: x86: emulate SAHF instruction
  MAINTAINERS: add tree for kvm.git
  Documentation/kvm: add a 00-INDEX file
  ...
  • Loading branch information
torvalds committed Nov 15, 2013
2 parents eda670c + e504c90 commit f080480
Show file tree
Hide file tree
Showing 133 changed files with 5,170 additions and 2,239 deletions.
24 changes: 24 additions & 0 deletions Documentation/virtual/kvm/00-INDEX
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
00-INDEX
- this file.
api.txt
- KVM userspace API.
cpuid.txt
- KVM-specific cpuid leaves (x86).
devices/
- KVM_CAP_DEVICE_CTRL userspace API.
hypercalls.txt
- KVM hypercalls.
locking.txt
- notes on KVM locks.
mmu.txt
- the x86 kvm shadow mmu.
msr.txt
- KVM-specific MSRs (x86).
nested-vmx.txt
- notes on nested virtualization for Intel x86 processors.
ppc-pv.txt
- the paravirtualization interface on PowerPC.
review-checklist.txt
- review checklist for KVM patches.
timekeeping.txt
- timekeeping virtualization for x86-based architectures.
152 changes: 145 additions & 7 deletions Documentation/virtual/kvm/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 {
struct kvm_cpuid_entry2 entries[0];
};

#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)

struct kvm_cpuid_entry2 {
__u32 function;
Expand Down Expand Up @@ -1810,6 +1810,50 @@ registers, find a list below:
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
PPC | KVM_REG_PPC_TB_OFFSET | 64
PPC | KVM_REG_PPC_SPMC1 | 32
PPC | KVM_REG_PPC_SPMC2 | 32
PPC | KVM_REG_PPC_IAMR | 64
PPC | KVM_REG_PPC_TFHAR | 64
PPC | KVM_REG_PPC_TFIAR | 64
PPC | KVM_REG_PPC_TEXASR | 64
PPC | KVM_REG_PPC_FSCR | 64
PPC | KVM_REG_PPC_PSPB | 32
PPC | KVM_REG_PPC_EBBHR | 64
PPC | KVM_REG_PPC_EBBRR | 64
PPC | KVM_REG_PPC_BESCR | 64
PPC | KVM_REG_PPC_TAR | 64
PPC | KVM_REG_PPC_DPDES | 64
PPC | KVM_REG_PPC_DAWR | 64
PPC | KVM_REG_PPC_DAWRX | 64
PPC | KVM_REG_PPC_CIABR | 64
PPC | KVM_REG_PPC_IC | 64
PPC | KVM_REG_PPC_VTB | 64
PPC | KVM_REG_PPC_CSIGR | 64
PPC | KVM_REG_PPC_TACR | 64
PPC | KVM_REG_PPC_TCSCR | 64
PPC | KVM_REG_PPC_PID | 64
PPC | KVM_REG_PPC_ACOP | 64
PPC | KVM_REG_PPC_VRSAVE | 32
PPC | KVM_REG_PPC_LPCR | 64
PPC | KVM_REG_PPC_PPR | 64
PPC | KVM_REG_PPC_ARCH_COMPAT 32
PPC | KVM_REG_PPC_TM_GPR0 | 64
...
PPC | KVM_REG_PPC_TM_GPR31 | 64
PPC | KVM_REG_PPC_TM_VSR0 | 128
...
PPC | KVM_REG_PPC_TM_VSR63 | 128
PPC | KVM_REG_PPC_TM_CR | 64
PPC | KVM_REG_PPC_TM_LR | 64
PPC | KVM_REG_PPC_TM_CTR | 64
PPC | KVM_REG_PPC_TM_FPSCR | 64
PPC | KVM_REG_PPC_TM_AMR | 64
PPC | KVM_REG_PPC_TM_PPR | 64
PPC | KVM_REG_PPC_TM_VRSAVE | 64
PPC | KVM_REG_PPC_TM_VSCR | 32
PPC | KVM_REG_PPC_TM_DSCR | 64
PPC | KVM_REG_PPC_TM_TAR | 64

ARM registers are mapped using the lower 32 bits. The upper 16 of that
is the register group type, or coprocessor number:
Expand Down Expand Up @@ -2304,7 +2348,31 @@ Possible features:
Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).


4.83 KVM_GET_REG_LIST
4.83 KVM_ARM_PREFERRED_TARGET

Capability: basic
Architectures: arm, arm64
Type: vm ioctl
Parameters: struct struct kvm_vcpu_init (out)
Returns: 0 on success; -1 on error
Errors:
ENODEV: no preferred target available for the host

This queries KVM for preferred CPU target type which can be emulated
by KVM on underlying host.

The ioctl returns struct kvm_vcpu_init instance containing information
about preferred CPU target type and recommended features for it. The
kvm_vcpu_init->features bitmap returned will have feature bits set if
the preferred target recommends setting these features, but this is
not mandatory.

The information returned by this ioctl can be used to prepare an instance
of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
in VCPU matching underlying host.


4.84 KVM_GET_REG_LIST

Capability: basic
Architectures: arm, arm64
Expand All @@ -2323,8 +2391,7 @@ struct kvm_reg_list {
This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.


4.84 KVM_ARM_SET_DEVICE_ADDR
4.85 KVM_ARM_SET_DEVICE_ADDR

Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
Architectures: arm, arm64
Expand Down Expand Up @@ -2362,7 +2429,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
base addresses will return -EEXIST.

4.85 KVM_PPC_RTAS_DEFINE_TOKEN
4.86 KVM_PPC_RTAS_DEFINE_TOKEN

Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
Expand Down Expand Up @@ -2661,6 +2728,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
};


4.81 KVM_GET_EMULATED_CPUID

Capability: KVM_CAP_EXT_EMUL_CPUID
Architectures: x86
Type: system ioctl
Parameters: struct kvm_cpuid2 (in/out)
Returns: 0 on success, -1 on error

struct kvm_cpuid2 {
__u32 nent;
__u32 flags;
struct kvm_cpuid_entry2 entries[0];
};

The member 'flags' is used for passing flags from userspace.

#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)

struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding[3];
};

This ioctl returns x86 cpuid features which are emulated by
kvm.Userspace can use the information returned by this ioctl to query
which features are emulated by kvm instead of being present natively.

Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
structure with the 'nent' field indicating the number of entries in
the variable-size array 'entries'. If the number of entries is too low
to describe the cpu capabilities, an error (E2BIG) is returned. If the
number is too high, the 'nent' field is adjusted and an error (ENOMEM)
is returned. If the number is just right, the 'nent' field is adjusted
to the number of valid entries in the 'entries' array, which is then
filled.

The entries returned are the set CPUID bits of the respective features
which kvm emulates, as returned by the CPUID instruction, with unknown
or unsupported feature bits cleared.

Features like x2apic, for example, may not be present in the host cpu
but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
emulated efficiently and thus not included here.

The fields in each entry are defined as follows:

function: the eax value used to obtain the entry
index: the ecx value used to obtain the entry (for entries that are
affected by ecx)
flags: an OR of zero or more of the following:
KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
if the index field is valid
KVM_CPUID_FLAG_STATEFUL_FUNC:
if cpuid for this function returns different values for successive
invocations; there will be several entries with the same function,
all with this flag set
KVM_CPUID_FLAG_STATE_READ_NEXT:
for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
the first entry to be read by a cpu
eax, ebx, ecx, edx: the values returned by the cpuid instruction for
this function/index combination


6. Capabilities that can be enabled
-----------------------------------

Expand Down
7 changes: 7 additions & 0 deletions Documentation/virtual/kvm/cpuid.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
|| || writing to msr 0x4b564d02
------------------------------------------------------------------------------
KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
|| || writing to msr 0x4b564d03.
------------------------------------------------------------------------------
KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
|| || handler can be enabled by writing
|| || to msr 0x4b564d04.
------------------------------------------------------------------------------
KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|| || before enabling paravirtualized
|| || spinlock support.
Expand Down
22 changes: 22 additions & 0 deletions Documentation/virtual/kvm/devices/vfio.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
VFIO virtual device
===================

Device types supported:
KVM_DEV_TYPE_VFIO

Only one VFIO instance may be created per VM. The created device
tracks VFIO groups in use by the VM and features of those groups
important to the correctness and acceleration of the VM. As groups
are enabled and disabled for use by the VM, KVM should be updated
about their presence. When registered with KVM, a reference to the
VFIO-group is held by KVM.

Groups:
KVM_DEV_VFIO_GROUP

KVM_DEV_VFIO_GROUP attributes:
KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking

For each, kvm_device_attr.addr points to an int32_t file descriptor
for the VFIO group.
19 changes: 17 additions & 2 deletions Documentation/virtual/kvm/locking.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
------------

Name: kvm_lock
Type: raw_spinlock
Type: spinlock_t
Arch: any
Protects: - vm_list
- hardware virtualization enable/disable

Name: kvm_count_lock
Type: raw_spinlock_t
Arch: any
Protects: - hardware virtualization enable/disable
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.

Expand All @@ -151,3 +155,14 @@ Type: spinlock_t
Arch: any
Protects: -shadow page/shadow tlb entry
Comment: it is a spinlock since it is used in mmu notifier.

Name: kvm->srcu
Type: srcu lock
Arch: any
Protects: - kvm->memslots
- kvm->buses
Comment: The srcu read lock must be held while accessing memslots (e.g.
when using gfn_to_* functions) and while accessing in-kernel
MMIO/PIO address->device structure mapping (kvm->buses).
The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
if it is needed by multiple functions.
3 changes: 2 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -4871,7 +4871,8 @@ KERNEL VIRTUAL MACHINE (KVM)
M: Gleb Natapov <gleb@redhat.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
W: http://linux-kvm.org
W: http://www.linux-kvm.org
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
S: Supported
F: Documentation/*/kvm*.txt
F: Documentation/virtual/kvm/
Expand Down
9 changes: 6 additions & 3 deletions arch/arm/include/asm/kvm_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
* TSC: Trap SMC
* TSW: Trap cache operations by set/way
* TWI: Trap WFI
* TWE: Trap WFE
* TIDCP: Trap L2CTLR/L2ECTLR
* BSU_IS: Upgrade barriers to the inner shareable domain
* FB: Force broadcast of all maintainance operations
Expand All @@ -67,7 +68,7 @@
*/
#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
HCR_SWIO | HCR_TIDCP)
HCR_TWE | HCR_SWIO | HCR_TIDCP)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)

/* System Control Register (SCTLR) bits */
Expand Down Expand Up @@ -95,12 +96,12 @@
#define TTBCR_IRGN1 (3 << 24)
#define TTBCR_EPD1 (1 << 23)
#define TTBCR_A1 (1 << 22)
#define TTBCR_T1SZ (3 << 16)
#define TTBCR_T1SZ (7 << 16)
#define TTBCR_SH0 (3 << 12)
#define TTBCR_ORGN0 (3 << 10)
#define TTBCR_IRGN0 (3 << 8)
#define TTBCR_EPD0 (1 << 7)
#define TTBCR_T0SZ 3
#define TTBCR_T0SZ (7 << 0)
#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)

/* Hyp System Trap Register */
Expand Down Expand Up @@ -208,6 +209,8 @@
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)

#define HSR_WFI_IS_WFE (1U << 0)

#define HSR_HVC_IMM_MASK ((1UL << 16) - 1)

#define HSR_DABT_S1PTW (1U << 7)
Expand Down
2 changes: 1 addition & 1 deletion arch/arm/include/asm/kvm_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#define c6_IFAR 17 /* Instruction Fault Address Register */
#define c7_PAR 18 /* Physical Address Register */
#define c7_PAR_high 19 /* PAR top 32 bits */
#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */
#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
#define c10_PRRR 21 /* Primary Region Remap Register */
#define c10_NMRR 22 /* Normal Memory Remap Register */
#define c12_VBAR 23 /* Vector Base Address Register */
Expand Down
Loading

0 comments on commit f080480

Please sign in to comment.