Add a new IOCTL pair to retrieve or set the VCPU state in one chunk. More precisely, the IOCTL is able to process a list of substates to be read or written. This list is easily extensible without breaking the existing ABI, thus we will no longer have to add new IOCTLs when we discover a missing VCPU state field or want to support new hardware features. This patch establishes the generic infrastructure for KVM_GET/ SET_VCPU_STATE and adds support for the generic substates REGS, SREGS, FPU, and MP. To avoid code duplication, the entry point for the corresponding original IOCTLs are converted to make use of the new infrastructure internally, too. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- Documentation/kvm/api.txt | 73 ++++++++++ arch/ia64/kvm/kvm-ia64.c | 12 ++ arch/powerpc/kvm/powerpc.c | 12 ++ arch/s390/kvm/kvm-s390.c | 12 ++ arch/x86/kvm/x86.c | 12 ++ include/linux/kvm.h | 24 +++ include/linux/kvm_host.h | 5 + virt/kvm/kvm_main.c | 318 +++++++++++++++++++++++++++++++------------- 8 files changed, 376 insertions(+), 92 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 5a4bc8c..7c0be8d 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -593,6 +593,49 @@ struct kvm_irqchip { } chip; }; +4.27 KVM_GET/SET_VCPU_STATE + +Capability: KVM_CAP_VCPU_STATE +Architectures: all (substate support may vary across architectures) +Type: vcpu ioctl +Parameters: struct kvm_vcpu_state (in/out) +Returns: 0 on success, -1 on error + +Reads or sets one or more vcpu substates. + +The data structures exchanged between user space and kernel are organized +in two layers. Layer one is the header structure kvm_vcpu_state: + +struct kvm_vcpu_state { + __u32 nsubstates; /* number of elements in substates */ + __u32 nprocessed; /* return value: successfully processed substates */ + struct kvm_vcpu_substate substates[0]; +}; + +The kernel accepts up to KVM_MAX_VCPU_SUBSTATES elements in the substates +array. An element is described by kvm_vcpu_substate: + +struct kvm_vcpu_substate { + __u32 type; /* KVM_VCPU_STATE_* or KVM_$(ARCH)_VCPU_STATE_* */ + __u32 pad; + __s64 offset; /* payload offset to kvm_vcpu_state in bytes */ +}; + +Layer two are the substate-specific payload structures. See section 6 for a +list of supported substates and their payload format. + +Exemplary setup for a single-substate query via KVM_GET_VCPU_STATE: + + struct { + struct kvm_vcpu_state header; + struct kvm_vcpu_substate substates[1]; + } request; + struct kvm_regs regs; + + request.header.nsubstates = 1; + request.header.substates[0].type = KVM_VCPU_STATE_REGS; + request.header.substates[0].offset = (size_t)®s - (size_t)&request; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by @@ -757,3 +800,33 @@ powerpc specific. char padding[256]; }; }; + +6. Supported vcpu substates + +6.1 KVM_VCPU_STATE_REGS + +Architectures: all +Payload: struct kvm_regs (see KVM_GET_REGS) +Deprecates: KVM_GET/SET_REGS + +6.2 KVM_VCPU_STATE_SREGS + +Architectures: all +Payload: struct kvm_sregs (see KVM_GET_SREGS) +Deprecates: KVM_GET/SET_SREGS + +6.3 KVM_VCPU_STATE_FPU + +Architectures: all +Payload: struct kvm_fpu (see KVM_GET_FPU) +Deprecates: KVM_GET/SET_FPU + +6.4 KVM_VCPU_STATE_MP + +Architectures: x86, ia64 +Payload: struct kvm_mp_state +Deprecates: KVM_GET/SET_MP_STATE + +struct kvm_mp_state { + __u32 mp_state; /* KVM_MP_STATE_* */ +}; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5fdeec5..c3450a6 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1991,3 +1991,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_put(vcpu); return r; } + +int kvm_arch_vcpu_get_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5902bbc..3336ad5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -436,3 +436,15 @@ int kvm_arch_init(void *opaque) void kvm_arch_exit(void) { } + +int kvm_arch_vcpu_get_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5445058..978ed6c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -450,6 +450,18 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +int kvm_arch_vcpu_get_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + static void __vcpu_run(struct kvm_vcpu *vcpu) { memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9601bc6..685215b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4674,6 +4674,18 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); +int kvm_arch_vcpu_get_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + return -EINVAL; +} + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { if (vcpu->arch.time_page) { diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7d8c382..421dbf8 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -393,6 +393,26 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +/* for KVM_GET_VCPU_STATE and KVM_SET_VCPU_STATE */ +#define KVM_VCPU_STATE_REGS 0 +#define KVM_VCPU_STATE_SREGS 1 +#define KVM_VCPU_STATE_FPU 2 +#define KVM_VCPU_STATE_MP 3 + +struct kvm_vcpu_substate { + __u32 type; /* KVM_VCPU_STATE_* or KVM_$(ARCH)_VCPU_STATE_* */ + __u32 pad; + __s64 offset; /* payload offset to kvm_vcpu_state in bytes */ +}; + +#define KVM_MAX_VCPU_SUBSTATES 64 + +struct kvm_vcpu_state { + __u32 nsubstates; /* number of elements in substates */ + __u32 nprocessed; /* return value: successfully processed substates */ + struct kvm_vcpu_substate substates[0]; +}; + #define KVMIO 0xAE /* @@ -480,6 +500,7 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#define KVM_CAP_VCPU_STATE 38 #ifdef KVM_CAP_IRQ_ROUTING @@ -642,6 +663,9 @@ struct kvm_irqfd { /* IA64 stack access */ #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) +/* Available with KVM_CAP_VCPU_STATE */ +#define KVM_GET_VCPU_STATE _IOR(KVMIO, 0x9f, struct kvm_vcpu_state) +#define KVM_SET_VCPU_STATE _IOW(KVMIO, 0xa0, struct kvm_vcpu_state) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bd5a616..7419f32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -332,6 +332,11 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); +int kvm_arch_vcpu_get_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate); +int kvm_arch_vcpu_set_substate(struct kvm_vcpu *vcpu, uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate); + int kvm_arch_init(void *opaque); void kvm_arch_exit(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c4289c0..d8dac51 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1246,124 +1246,224 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } -static long kvm_vcpu_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) +static int kvm_vcpu_get_substate(struct kvm_vcpu *vcpu, + uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) { - struct kvm_vcpu *vcpu = filp->private_data; - void __user *argp = (void __user *)arg; + void __user *argp = (void __user *)arg_base + substate->offset; int r; - struct kvm_fpu *fpu = NULL; - struct kvm_sregs *kvm_sregs = NULL; - if (vcpu->kvm->mm != current->mm) - return -EIO; - switch (ioctl) { - case KVM_RUN: - r = -EINVAL; - if (arg) - goto out; - r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); - break; - case KVM_GET_REGS: { + switch (substate->type) { + case KVM_VCPU_STATE_REGS: { struct kvm_regs *kvm_regs; - r = -ENOMEM; kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); + r = -ENOMEM; if (!kvm_regs) - goto out; + break; r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); if (r) - goto out_free1; + goto out_free_regs; r = -EFAULT; if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) - goto out_free1; + goto out_free_regs; r = 0; -out_free1: +out_free_regs: kfree(kvm_regs); break; } - case KVM_SET_REGS: { - struct kvm_regs *kvm_regs; + case KVM_VCPU_STATE_SREGS: { + struct kvm_sregs *kvm_sregs; + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); - if (!kvm_regs) - goto out; - r = -EFAULT; - if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) - goto out_free2; - r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); + if (!kvm_sregs) + break; + r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); if (r) - goto out_free2; + goto out_free_sregs; + r = -EFAULT; + if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) + goto out_free_sregs; r = 0; -out_free2: - kfree(kvm_regs); +out_free_sregs: + kfree(kvm_sregs); break; } - case KVM_GET_SREGS: { - kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); + case KVM_VCPU_STATE_FPU: { + struct kvm_fpu *kvm_fpu; + + kvm_fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); r = -ENOMEM; - if (!kvm_sregs) - goto out; - r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); + if (!kvm_fpu) + break; + r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, kvm_fpu); if (r) - goto out; + break; r = -EFAULT; - if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) - goto out; + if (copy_to_user(argp, kvm_fpu, sizeof(struct kvm_fpu))) + goto out_free_fpu; + r = 0; +out_free_fpu: + kfree(kvm_fpu); + break; + } + case KVM_VCPU_STATE_MP: { + struct kvm_mp_state mp_state; + + r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); + if (r) + break; + r = -EFAULT; + if (copy_to_user(argp, &mp_state, sizeof(struct kvm_mp_state))) + break; + r = 0; + break; + } + default: + r = kvm_arch_vcpu_get_substate(vcpu, arg_base, substate); + } + return r; +} + +static int kvm_vcpu_set_substate(struct kvm_vcpu *vcpu, + uint8_t __user *arg_base, + struct kvm_vcpu_substate *substate) +{ + void __user *argp = (void __user *)arg_base + substate->offset; + int r; + + switch (substate->type) { + case KVM_VCPU_STATE_REGS: { + struct kvm_regs *kvm_regs; + + kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_regs) + break; + r = -EFAULT; + if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) + goto out_free_regs; + r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); + if (r) + goto out_free_regs; r = 0; +out_free_regs: + kfree(kvm_regs); break; } - case KVM_SET_SREGS: { + case KVM_VCPU_STATE_SREGS: { + struct kvm_sregs *kvm_sregs; + kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); r = -ENOMEM; if (!kvm_sregs) - goto out; + break; r = -EFAULT; if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) - goto out; + goto out_free_sregs; r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); if (r) - goto out; + goto out_free_sregs; r = 0; +out_free_sregs: + kfree(kvm_sregs); break; } - case KVM_GET_MP_STATE: { - struct kvm_mp_state mp_state; + case KVM_VCPU_STATE_FPU: { + struct kvm_fpu *kvm_fpu; - r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); - if (r) - goto out; + kvm_fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); + r = -ENOMEM; + if (!kvm_fpu) + break; r = -EFAULT; - if (copy_to_user(argp, &mp_state, sizeof mp_state)) - goto out; + if (copy_from_user(kvm_fpu, argp, sizeof(struct kvm_fpu))) + goto out_free_fpu; + r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, kvm_fpu); + if (r) + goto out_free_fpu; r = 0; +out_free_fpu: + kfree(kvm_fpu); break; } - case KVM_SET_MP_STATE: { + case KVM_VCPU_STATE_MP: { struct kvm_mp_state mp_state; r = -EFAULT; - if (copy_from_user(&mp_state, argp, sizeof mp_state)) - goto out; + if (copy_from_user(&mp_state, argp, + sizeof(struct kvm_mp_state))) + break; r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); - if (r) - goto out; - r = 0; break; } + default: + r = kvm_arch_vcpu_set_substate(vcpu, arg_base, substate); + } + return r; +} + + +static long kvm_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_vcpu_substate substate; + int r; + + if (vcpu->kvm->mm != current->mm) + return -EIO; + switch (ioctl) { + case KVM_RUN: + r = -EINVAL; + if (arg) + break; + r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); + break; + case KVM_GET_REGS: + substate.type = KVM_VCPU_STATE_REGS; + substate.offset = 0; + r = kvm_vcpu_get_substate(vcpu, argp, &substate); + break; + case KVM_SET_REGS: + substate.type = KVM_VCPU_STATE_REGS; + substate.offset = 0; + r = kvm_vcpu_set_substate(vcpu, argp, &substate); + break; + case KVM_GET_SREGS: + substate.type = KVM_VCPU_STATE_SREGS; + substate.offset = 0; + r = kvm_vcpu_get_substate(vcpu, argp, &substate); + break; + case KVM_SET_SREGS: + substate.type = KVM_VCPU_STATE_SREGS; + substate.offset = 0; + r = kvm_vcpu_set_substate(vcpu, argp, &substate); + break; + case KVM_GET_MP_STATE: + substate.type = KVM_VCPU_STATE_MP; + substate.offset = 0; + r = kvm_vcpu_get_substate(vcpu, argp, &substate); + break; + case KVM_SET_MP_STATE: + substate.type = KVM_VCPU_STATE_MP; + substate.offset = 0; + r = kvm_vcpu_set_substate(vcpu, argp, &substate); + break; case KVM_TRANSLATE: { struct kvm_translation tr; r = -EFAULT; if (copy_from_user(&tr, argp, sizeof tr)) - goto out; + break; r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); if (r) - goto out; + break; r = -EFAULT; if (copy_to_user(argp, &tr, sizeof tr)) - goto out; + break; r = 0; break; } @@ -1372,10 +1472,10 @@ out_free2: r = -EFAULT; if (copy_from_user(&dbg, argp, sizeof dbg)) - goto out; + break; r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); if (r) - goto out; + break; r = 0; break; } @@ -1389,53 +1489,86 @@ out_free2: r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, sizeof kvm_sigmask)) - goto out; + break; r = -EINVAL; if (kvm_sigmask.len != sizeof sigset) - goto out; + break; r = -EFAULT; if (copy_from_user(&sigset, sigmask_arg->sigset, sizeof sigset)) - goto out; + break; p = &sigset; } r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); break; } - case KVM_GET_FPU: { - fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); - r = -ENOMEM; - if (!fpu) - goto out; - r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); - if (r) - goto out; - r = -EFAULT; - if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) - goto out; - r = 0; + case KVM_GET_FPU: + substate.type = KVM_VCPU_STATE_FPU; + substate.offset = 0; + r = kvm_vcpu_get_substate(vcpu, argp, &substate); break; - } - case KVM_SET_FPU: { - fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); - r = -ENOMEM; - if (!fpu) - goto out; + case KVM_SET_FPU: + substate.type = KVM_VCPU_STATE_FPU; + substate.offset = 0; + r = kvm_vcpu_set_substate(vcpu, argp, &substate); + break; + case KVM_GET_VCPU_STATE: + case KVM_SET_VCPU_STATE: { + struct kvm_vcpu_state __user *user_head = argp; + struct kvm_vcpu_substate *substates = NULL; + uint8_t __user *arg_base = argp; + struct kvm_vcpu_state head; + size_t size; + int i; + r = -EFAULT; - if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) - goto out; - r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); - if (r) - goto out; + if (copy_from_user(&head, user_head, + sizeof(struct kvm_vcpu_state))) + break; + + head.nprocessed = 0; + + size = head.nsubstates * sizeof(struct kvm_vcpu_substate); + if (head.nsubstates <= 1) { + substates = &substate; + } else { + r = -E2BIG; + if (head.nsubstates > KVM_MAX_VCPU_SUBSTATES) + goto vcpu_state_out; + + substates = kmalloc(size, GFP_KERNEL); + r = -ENOMEM; + if (!substates) + goto vcpu_state_out; + } + + r = -EFAULT; + if (copy_from_user(substates, user_head->substates, size)) + goto vcpu_state_out; + + for (i = 0; i < head.nsubstates; i++) { + if (ioctl == KVM_GET_VCPU_STATE) + r = kvm_vcpu_get_substate(vcpu, arg_base, + &substates[i]); + else + r = kvm_vcpu_set_substate(vcpu, arg_base, + &substates[i]); + if (r < 0) + goto vcpu_state_out; + head.nprocessed++; + } r = 0; +vcpu_state_out: + if (copy_to_user(&user_head->nprocessed, &head.nprocessed, + sizeof(head.nprocessed))) + r = -EFAULT; + if (head.nsubstates > 1) + kfree(substates); break; } default: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } -out: - kfree(fpu); - kfree(kvm_sregs); return r; } @@ -1601,6 +1734,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: + case KVM_CAP_VCPU_STATE: #ifdef CONFIG_KVM_APIC_ARCHITECTURE case KVM_CAP_SET_BOOT_CPU_ID: #endif -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html