Skip to content

Commit

Permalink
KVM: Use interval tree to do fast hva lookup in memslots
Browse files Browse the repository at this point in the history
The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.

This significantly hurts performance of per-hva operations with higher
memslot counts.

Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.

[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <d66b9974becaa9839be9c4e1a5de97b177b4ac20.1638817640.git.maciej.szmigiero@oracle.com>
  • Loading branch information
maciejsszmigiero authored and bonzini committed Dec 8, 2021
1 parent 26b8345 commit ed92273
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 14 deletions.
1 change: 1 addition & 0 deletions arch/arm64/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ menuconfig KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select INTERVAL_TREE
help
Support hosting virtualized guest machines.

Expand Down
1 change: 1 addition & 0 deletions arch/mips/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ config KVM
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
select INTERVAL_TREE
help
Support for hosting Guest kernels.

Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ config KVM
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select INTERVAL_TREE

config KVM_BOOK3S_HANDLER
bool
Expand Down
1 change: 1 addition & 0 deletions arch/s390/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ config KVM
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
select INTERVAL_TREE
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ config KVM
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU
select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
help
Support hosting fully virtualized guest machines using hardware
Expand Down
3 changes: 3 additions & 0 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <linux/nospec.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
#include <linux/interval_tree.h>
#include <linux/xarray.h>
#include <asm/signal.h>

Expand Down Expand Up @@ -428,6 +429,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)

struct kvm_memory_slot {
struct hlist_node id_node;
struct interval_tree_node hva_node;
gfn_t base_gfn;
unsigned long npages;
unsigned long *dirty_bitmap;
Expand Down Expand Up @@ -529,6 +531,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
*/
struct kvm_memslots {
u64 generation;
struct rb_root_cached hva_tree;
/*
* The mapping table from slot id to the index in memslots[].
*
Expand Down
53 changes: 39 additions & 14 deletions virt/kvm/kvm_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,12 @@ static void kvm_null_fn(void)
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)

/* Iterate over each memslot intersecting [start, last] (inclusive) range */
#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
node; \
node = interval_tree_iter_next(node, start, last)) \

static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
const struct kvm_hva_range *range)
{
Expand All @@ -521,6 +527,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
struct kvm_memslots *slots;
int i, idx;

if (WARN_ON_ONCE(range->end <= range->start))
return 0;

/* A null handler is allowed if and only if on_lock() is provided. */
if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
IS_KVM_NULL_FN(range->handler)))
Expand All @@ -529,15 +538,17 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
idx = srcu_read_lock(&kvm->srcu);

for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct interval_tree_node *node;

slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, slots) {
kvm_for_each_memslot_in_hva_range(node, slots,
range->start, range->end - 1) {
unsigned long hva_start, hva_end;

slot = container_of(node, struct kvm_memory_slot, hva_node);
hva_start = max(range->start, slot->userspace_addr);
hva_end = min(range->end, slot->userspace_addr +
(slot->npages << PAGE_SHIFT));
if (hva_start >= hva_end)
continue;

/*
* To optimize for the likely case where the address
Expand Down Expand Up @@ -873,6 +884,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
if (!slots)
return NULL;

slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash);

return slots;
Expand Down Expand Up @@ -1277,21 +1289,28 @@ static void kvm_replace_memslot(struct kvm_memslots *slots,
struct kvm_memory_slot *new)
{
/*
* Remove the old memslot from the hash list, copying the node data
* would corrupt the list.
* Remove the old memslot from the hash list and interval tree, copying
* the node data would corrupt the structures.
*/
if (old) {
hash_del(&old->id_node);
interval_tree_remove(&old->hva_node, &slots->hva_tree);

if (!new)
return;

/* Copy the source *data*, not the pointer, to the destination. */
*new = *old;
} else {
/* If @old is NULL, initialize @new's hva range. */
new->hva_node.start = new->userspace_addr;
new->hva_node.last = new->userspace_addr +
(new->npages << PAGE_SHIFT) - 1;
}

/* (Re)Add the new memslot. */
hash_add(slots->id_hash, &new->id_node, new->id);
interval_tree_insert(&new->hva_node, &slots->hva_tree);
}

static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src)
Expand Down Expand Up @@ -1322,7 +1341,7 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
atomic_set(&slots->last_used_slot, 0);

/*
* Remove the to-be-deleted memslot from the list _before_ shifting
* Remove the to-be-deleted memslot from the list/tree _before_ shifting
* the trailing memslots forward, its data will be overwritten.
* Defer the (somewhat pointless) copying of the memslot until after
* the last slot has been shifted to avoid overwriting said last slot.
Expand All @@ -1349,7 +1368,8 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
* itself is not preserved in the array, i.e. not swapped at this time, only
* its new index into the array is tracked. Returns the changed memslot's
* current index into the memslots array.
* The memslot at the returned index will not be in @slots->id_hash by then.
* The memslot at the returned index will not be in @slots->hva_tree or
* @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the changed slot.
*/
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
Expand All @@ -1363,10 +1383,10 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
return -1;

/*
* Delete the slot from the hash table before sorting the remaining
* slots, the slot's data may be overwritten when copying slots as part
* of the sorting proccess. update_memslots() will unconditionally
* rewrite the entire slot and re-add it to the hash table.
* Delete the slot from the hash table and interval tree before sorting
* the remaining slots, the slot's data may be overwritten when copying
* slots as part of the sorting proccess. update_memslots() will
* unconditionally rewrite and re-add the entire slot.
*/
kvm_replace_memslot(slots, oldslot, NULL);

Expand All @@ -1392,10 +1412,12 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
* is not preserved in the array, i.e. not swapped at this time, only its new
* index into the array is tracked. Returns the changed memslot's final index
* into the memslots array.
* The memslot at the returned index will not be in @slots->id_hash by then.
* The memslot at the returned index will not be in @slots->hva_tree or
* @slots->id_hash by then.
* @memslot is a detached struct with desired final data of the new or
* changed slot.
* Assumes that the memslot at @start index is not in @slots->id_hash.
* Assumes that the memslot at @start index is not in @slots->hva_tree or
* @slots->id_hash.
*/
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
struct kvm_memory_slot *memslot,
Expand Down Expand Up @@ -1588,9 +1610,12 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,

memcpy(slots, old, kvm_memslots_size(old->used_slots));

slots->hva_tree = RB_ROOT_CACHED;
hash_init(slots->id_hash);
kvm_for_each_memslot(memslot, slots)
kvm_for_each_memslot(memslot, slots) {
interval_tree_insert(&memslot->hva_node, &slots->hva_tree);
hash_add(slots->id_hash, &memslot->id_node, memslot->id);
}

return slots;
}
Expand Down

0 comments on commit ed92273

Please sign in to comment.