Introduce new memory tracking ioctls to support performant checkpoint/restore implementations. See patch 6 for details on the new ioctls. Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx> --- include/uapi/linux/kvm.h | 94 +++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 96 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a7f1f80..2bce4db 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1312,4 +1312,98 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +#define KVM_MT_VERSION 1 +struct mt_setup { + __u32 version; + + /* which operation to perform? */ +#define KVM_MT_OP_INIT 1 +#define KVM_MT_OP_CLEANUP 2 + __u32 op; + + /* + * flags bit defs: + */ + + /* + * Features. + * 1. Avoid logging duplicate entries + */ +#define KVM_MT_OPTION_NO_DUPS (1 << 2) + + __u32 flags; + + /* max number of dirty pages per checkpoint cycle */ + __u32 max_dirty; +}; + +struct mt_enable { + __u32 flags; /* 1 -> on, 0 -> off */ +}; + +#define MT_OFFSET_MASK (0x0000ffffffffffffUL) + +#define MT_MAKE_SLOT_OFFSET(slot, offset) \ + do { \ + __u64 slot_off = offset & MT_OFFSET_MASK; \ + slot_off |= ((__u64)slot << 48); \ + slot_off; \ + } while (0) + +#define MT_OFFSET_FROM_SLOT_OFFSET(slot_off) \ + (slot_off & MT_OFFSET_MASK) + +#define MT_SLOT_FROM_SLOT_OFFSET(slot_off) \ + (slot_off >> 48) + +struct mt_gfn_list { + __s32 count; + __u32 max_dirty; + __u64 *gfnlist; +}; + +struct mt_prepare_cp { + __s64 cpid; +}; + +struct mt_sublist_fetch_info { + struct mt_gfn_list gfn_info; + + /* + * flags bit defs: + */ + + /* caller sleeps until dirty count is reached */ +#define MT_FETCH_WAIT (1 << 0) + /* dirty tracking is re-armed for each page in returned list */ +#define MT_FETCH_REARM (1 << 1) + + __u32 flags; +}; + +struct mt_dirty_trigger { + /* force vcpus to exit when trigger is reached */ + __u32 dirty_trigger; +}; + +/* Initialize/Cleanup MT data structures, allocate/free list buffers, etc. */ +#define KVM_INIT_MT _IOW(KVMIO, 0xf0, struct mt_setup) +/* Active/Deactivate Memory Tracking */ +#define KVM_ENABLE_MT _IOW(KVMIO, 0xf1, struct mt_enable) +/* notify MT subsystem that VM is about to be unpaused */ +#define KVM_PREPARE_MT_CP _IOW(KVMIO, 0xf2, struct mt_prepare_cp) +/* Rearm dirty traps for specified pages */ +#define KVM_REARM_DIRTY_PAGES _IO(KVMIO, 0xf3) +/* notify MT subsystem no more pages will be dirtied this cycle */ +#define KVM_MT_VM_QUIESCED _IO(KVMIO, 0xf4) +/* + * Return specified number of dirty pages. May return fewer than requested. + * Optionally, caller can request to sleep until desired number is reached. + * The KVM_MT_VM_QUIESCED call above will wake this sleeper even if the + * number of dirty pages is not yet the requested amount. + */ +#define KVM_MT_SUBLIST_FETCH _IOWR(KVMIO, 0xf5, struct mt_sublist_fetch_info) +/* Set VM exit trigger point based on dirty page count */ +#define KVM_MT_DIRTY_TRIGGER _IOW(KVMIO, 0xf6, struct mt_dirty_trigger) + #endif /* __LINUX_KVM_H */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4fd482f..8a582e5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2752,6 +2752,43 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return kvm_vm_ioctl_check_extension(kvm, arg); } +static int kvm_vm_ioctl_mt_init(struct kvm *kvm, struct mt_setup *mts) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_enable(struct kvm *kvm, struct mt_enable *mte) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_prepare_cp(struct kvm *kvm, + struct mt_prepare_cp *mtpcp) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_rearm_gfns(struct kvm *kvm) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_quiesced(struct kvm *kvm) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_sublist_fetch(struct kvm *kvm, + struct mt_sublist_fetch_info *mtsfi) +{ + return -EINVAL; +} + +static int kvm_vm_ioctl_mt_dirty_trigger(struct kvm *kvm, int dirty_trigger) +{ + return -EINVAL; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2909,6 +2946,65 @@ out_free_irq_routing: case KVM_CHECK_EXTENSION: r = kvm_vm_ioctl_check_extension_generic(kvm, arg); break; + case KVM_INIT_MT: { + struct mt_setup mts; + + r = -EFAULT; + if (copy_from_user(&mts, (void __user *)arg, sizeof(mts))) + goto out; + r = kvm_vm_ioctl_mt_init(kvm, &mts); + break; + } + case KVM_ENABLE_MT: { + struct mt_enable mte; + + r = -EFAULT; + if (copy_from_user(&mte, (void __user *)arg, sizeof(mte))) + goto out; + r = kvm_vm_ioctl_mt_enable(kvm, &mte); + break; + } + case KVM_PREPARE_MT_CP: { + struct mt_prepare_cp mtpcp; + + r = -EFAULT; + if (copy_from_user(&mtpcp, (void __user *)arg, sizeof(mtpcp))) + goto out; + r = kvm_vm_ioctl_mt_prepare_cp(kvm, &mtpcp); + break; + } + case KVM_REARM_DIRTY_PAGES: { + r = kvm_vm_ioctl_mt_rearm_gfns(kvm); + break; + } + case KVM_MT_VM_QUIESCED: { + r = kvm_vm_ioctl_mt_quiesced(kvm); + break; + } + case KVM_MT_SUBLIST_FETCH: { + struct mt_sublist_fetch_info mtsfi; + + r = -EFAULT; + if (copy_from_user(&mtsfi, (void __user *)arg, sizeof(mtsfi))) + goto out; + r = kvm_vm_ioctl_mt_sublist_fetch(kvm, &mtsfi); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user((void __user *)arg, &mtsfi, sizeof(mtsfi))) + goto out; + r = 0; + break; + } + case KVM_MT_DIRTY_TRIGGER: { + struct mt_dirty_trigger mtdt; + + r = -EFAULT; + if (copy_from_user(&mtdt, (void __user *)arg, sizeof(mtdt))) + goto out; + r = kvm_vm_ioctl_mt_dirty_trigger(kvm, mtdt.dirty_trigger); + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html