Add new data type, struct kvm_gfn_ring, and support functions for ring-based dirty memory logging. Add new ioctl, KVM_RESET_DIRTY_PAGES, for dirty trap reset. Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx> --- Documentation/virtual/kvm/api.txt | 15 ++++++++++++++- arch/x86/include/asm/kvm_host.h | 1 + include/linux/kvm_gfn_ring.h | 37 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 31 +++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 16 ++++++++++++++++ 5 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 include/linux/kvm_gfn_ring.h diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 453c520..01f4f25 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3182,7 +3182,7 @@ of IOMMU pages. The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. -4.98 KVM_REINJECT_CONTROL +4.99 KVM_REINJECT_CONTROL Capability: KVM_CAP_REINJECT_CONTROL Architectures: x86 @@ -3206,6 +3206,19 @@ struct kvm_reinject_control { pit_reinject = 0 (!reinject mode) is recommended, unless running an old operating system that uses the PIT for timing (e.g. Linux 2.4.x). +4.100 KVM_RESET_DIRTY_PAGES + +Capability: KVM_CAP_DIRTY_LOG_RING +Architectures: x86 +Type: vm ioctl +Parameters: none +Returns: 0 on success, + -EINVAL if dirty log size is 0, which means dirty tracking using + ring is not enabled + +This ioctl is used to reset the dirty traps for all the pages in the dirty +page rings, in preparation for the next iteration of dirty tracking. + 5. The kvm_run structure ------------------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a7066dc..2b385f2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -45,6 +45,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_HALT_POLL_NS_DEFAULT 400000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS diff --git a/include/linux/kvm_gfn_ring.h b/include/linux/kvm_gfn_ring.h new file mode 100644 index 0000000..a864b83 --- /dev/null +++ b/include/linux/kvm_gfn_ring.h @@ -0,0 +1,37 @@ +#ifndef KVM_GFN_RING_H +#define KVM_GFN_RING_H + +struct kvm_gfn_ring { + u16 dirty_index; + u16 reset_index; + u32 size; + spinlock_t lock; + struct kvm_dirty_list *dirty_list; +}; + +int kvm_gfn_ring_alloc(struct kvm_gfn_ring *gfnring, + u32 size); + +/* + * called with kvm->slots_lock held, returns the number of + * processed pages. + */ +int kvm_gfn_ring_reset(struct kvm *kvm, + struct kvm_gfn_ring *gfnring); + +/* + * returns number of items in ring after adding the element, + * or -EBUSY if it was full. + */ +int kvm_gfn_ring_push(struct kvm_gfn_ring *gfnring, + u32 slot, + u64 offset, + bool locked); + +/* for use in vm_operations_struct */ +struct page *kvm_gfn_ring_get_page(struct kvm_gfn_ring *ring, + u32 i); + +void kvm_gfn_ring_free(struct kvm_gfn_ring *ring); + +#endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 117f1f9..943c5a35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1283,6 +1283,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_X86_SMM */ #define KVM_SMI _IO(KVMIO, 0xb7) +#define KVM_RESET_DIRTY_PAGES _IO(KVMIO, 0xe0) + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1336,4 +1338,33 @@ struct kvm_assigned_msix_entry { #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +/* + * The following are the requirements for supporting dirty log ring + * (by enabling KVM_DIRTY_LOG_PAGE_OFFSET). + * + * 1. Memory accesses by KVM should call kvm_vcpu_write_* instead + * of kvm_write_* so that the global dirty ring is not filled up + * too quickly. + * 2. kvm_arch_mmu_enable_log_dirty_pt_masked should be defined for + * enabling dirty logging. + * 3. There should not be a separate step to synchronize hardware + * dirty bitmap with KVM's. + */ + +struct kvm_dirty_gfn { + __u32 pad; + __u32 slot; + __u64 offset; +}; + +struct kvm_dirty_list { + union { + struct { + __u16 avail_index; /* set by kernel */ + __u16 fetch_index; /* set by userspace */ + } indices; + struct kvm_dirty_gfn dirty_gfns[0]; + }; +}; + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f2744ce..016be4d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2945,11 +2945,18 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return kvm_vm_ioctl_check_extension(kvm, arg); } +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, __u32 size) { return -EINVAL; } +static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm) +{ + return -EINVAL; +} +#endif + int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -2961,7 +2968,11 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { case KVM_CAP_DIRTY_LOG_RING: +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); +#else + break; +#endif default: return kvm_vm_ioctl_enable_cap(kvm, cap); } @@ -3135,6 +3146,11 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; +#ifdef KVM_DIRTY_LOG_PAGE_OFFSET + case KVM_RESET_DIRTY_PAGES: + r = kvm_vm_ioctl_reset_dirty_pages(kvm); + break; +#endif /* KVM_DIRTY_LOG_PAGE_OFFSET */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- 2.5.0