Add new data type, struct kvm_gfn_ring, and support functions for ring-based dirty memory logging. Add new ioctl, KVM_RESET_DIRTY_PAGES, for dirty trap reset. Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 -- include/linux/kvm_gfn_ring.h | 68 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 35 +++++++++++++++++++++ virt/kvm/kvm_main.c | 16 ++++++++++ 4 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 include/linux/kvm_gfn_ring.h diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a7066dc..6646aa5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,8 +43,6 @@ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#define KVM_PIO_PAGE_OFFSET 1 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 #define KVM_HALT_POLL_NS_DEFAULT 400000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS diff --git a/include/linux/kvm_gfn_ring.h b/include/linux/kvm_gfn_ring.h new file mode 100644 index 0000000..9d5ca99 --- /dev/null +++ b/include/linux/kvm_gfn_ring.h @@ -0,0 +1,68 @@ +#ifndef KVM_GFN_RING_H +#define KVM_GFN_RING_H + +/* + * struct kvm_dirty_ring is defined in include/uapi/linux/kvm.h. + * + * dirty_ring: shared with userspace via mmap. dirty_ring->dirty_gfns + * is the compact list that holds the dirty pages. + * dirty_index: free running counter that points to the next slot in + * dirty_ring->dirty_gfns where a new dirty page should go. + * reset_index: free running counter that points to the next dirty page + * in dirty_ring->dirty_gfns for which dirty trap needs to + * be reenabled + * size: size of the compact list, dirty_ring->dirty_gfns + * soft_limit: when the number of dirty pages in the list reaches this + * limit, vcpu that owns this ring should exit to userspace + * to allow userspace to harvest all the dirty pages + * lock: protects dirty_ring, only in use if this is the global + * ring + * + * The number of dirty pages in the ring is calculated by, + * dirty_index - reset_index + * + * kernel increments dirty_ring->indices.avail_index after dirty index + * is incremented. When userspace harvests the dirty pages, it increments + * dirty_ring->indices.fetch_index up to dirty_ring->indices.avail_index. + * When kernel reenables dirty traps for the dirty pages, it increments + * reset_index up to dirty_ring->indices.fetch_index. + * + */ +struct kvm_gfn_ring { + u16 dirty_index; + u16 reset_index; + u32 size; + u32 soft_limit; + spinlock_t lock; + struct kvm_dirty_ring *dirty_ring; +}; + +int kvm_gfn_ring_alloc(struct kvm_gfn_ring *gfnring, + u32 size, + u32 limit); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_gfn_ring_reset(struct kvm *kvm, + struct kvm_gfn_ring *gfnring); + +/* + * returns 0: successfully pushed + * 1: successfully pushed, soft limit reached, + * vcpu should exit to userspace + * -EBUSY: unable to push, dirty ring full. + */ +int kvm_gfn_ring_push(struct kvm_gfn_ring *gfnring, + u32 slot, + u64 offset, + bool locked); + +/* for use in vm_operations_struct */ +struct page *kvm_gfn_ring_get_page(struct kvm_gfn_ring *ring, + u32 i); + +void kvm_gfn_ring_free(struct kvm_gfn_ring *ring); + +#endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 117f1f9..1a1ba4d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -214,6 +214,10 @@ struct kvm_hyperv_exit { /* Encounter unexpected vm-exit due to delivery event. */ #define KVM_INTERNAL_ERROR_DELIVERY_EV 3 +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ @@ -1283,6 +1287,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_X86_SMM */ #define KVM_SMI _IO(KVMIO, 0xb7) +#define KVM_RESET_DIRTY_PAGES _IO(KVMIO, 0xe0) + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1336,4 +1342,33 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* + * The following are the requirements for supporting dirty log ring + * (by enabling KVM_DIRTY_LOG_PAGE_OFFSET). + * + * 1. Memory accesses by KVM should call kvm_vcpu_write_* instead + * of kvm_write_* so that the global dirty ring is not filled up + * too quickly. + * 2. kvm_arch_mmu_enable_log_dirty_pt_masked should be defined for + * enabling dirty logging. + * 3. There should not be a separate step to synchronize hardware + * dirty bitmap with KVM's. + */ + +struct kvm_dirty_gfn { + __u32 pad; + __u32 slot; + __u64 offset; +}; + +struct kvm_dirty_ring { + union { + struct { + __u16 avail_index; /* set by kernel */ + __u16 fetch_index; /* set by userspace */ + } indices; + struct kvm_dirty_gfn dirty_gfns[0]; + }; +}; + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f2744ce..016be4d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2945,11 +2945,18 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return kvm_vm_ioctl_check_extension(kvm, arg); } +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, __u32 size) { return -EINVAL; } +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) +{ + return -EINVAL; +} +#endif + int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -2961,7 +2968,11 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { case KVM_CAP_DIRTY_LOG_RING: +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); +#else + break; +#endif default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3135,6 +3146,11 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET + case KVM_RESET_DIRTY_PAGES: + r = kvm_vm_ioctl_reset_dirty_pages(kvm); + break; +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- 2.5.0