Dave Martin <Dave.Martin@xxxxxxx> writes: > This patch adds the necessary API extensions to allow userspace to > detect SVE support for guests and enable it. > > A new capability KVM_CAP_ARM_SVE is defined to allow userspace to > detect the availability of the KVM SVE API extensions in the usual > way. > > Userspace needs to enable SVE explicitly per vcpu and configure the > set of SVE vector lengths available to the guest before the vcpu is > allowed to run. For these purposes, a new arm64-specific vcpu > ioctl KVM_ARM_SVE_CONFIG is added, with the following subcommands > (in rough order of expected use): > > KVM_ARM_SVE_CONFIG_QUERY: report the set of vector lengths > supported by this host. > > The resulting set can be supplied directly to > KVM_ARM_SVE_CONFIG_SET in order to obtain the maximal possible > set, or used to inform userspace's decision on the appropriate > set of vector lengths (possibly taking into account the > configuration of other nodes in the cluster so that the VM can > migrate freely). > > KVM_ARM_SVE_CONFIG_SET: enable SVE for this vcpu and configure the > set of vector lengths it offers to the guest. > > This can only be done once, before the vcpu is run. > > KVM_ARM_SVE_CONFIG_GET: report the set of vector lengths available > to the guest on this vcpu (for use when snapshotting or > migrating a VM). > > Signed-off-by: Dave Martin <Dave.Martin@xxxxxxx> > --- > > Changes since RFCv1: > > * The new feature bit for PREFERRED_TARGET / VCPU_INIT is gone in > favour of a capability and a new ioctl to enable/configure SVE. > > Perhaps the SVE configuration could be done via device attributes, > but it still has to be done early, so crowbarring support for this > behind a generic API may cause more trouble than it solves. > > This is still up for discussion if anybody feels strongly about it. > > * An ioctl KVM_ARM_SVE_CONFIG has been added to report the set of > vector lengths available and configure SVE for a vcpu. > > To reduce ioctl namespace pollution the new operations are grouped > as subcommands under a single ioctl, since they use the same > argument format anyway. > --- > arch/arm64/include/asm/kvm_host.h | 8 +- > arch/arm64/include/uapi/asm/kvm.h | 14 ++++ > arch/arm64/kvm/guest.c | 164 +++++++++++++++++++++++++++++++++++++- > arch/arm64/kvm/reset.c | 50 ++++++++++++ > include/uapi/linux/kvm.h | 4 + > 5 files changed, 238 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index bbde597..5225485 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -52,6 +52,12 @@ > > DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); > > +#ifdef CONFIG_ARM64_SVE > +bool kvm_sve_supported(void); > +#else > +static inline bool kvm_sve_supported(void) { return false; } > +#endif > + > int __attribute_const__ kvm_target_cpu(void); > int kvm_reset_vcpu(struct kvm_vcpu *vcpu); > int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); > @@ -441,7 +447,7 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} > static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} > static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} > > -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); > > void kvm_arm_init_debug(void); > void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h > index 1ff68fa..94f6932 100644 > --- a/arch/arm64/include/uapi/asm/kvm.h > +++ b/arch/arm64/include/uapi/asm/kvm.h > @@ -32,6 +32,7 @@ > #define KVM_NR_SPSR 5 > > #ifndef __ASSEMBLY__ > +#include <linux/kernel.h> > #include <linux/psci.h> > #include <linux/types.h> > #include <asm/ptrace.h> > @@ -108,6 +109,19 @@ struct kvm_vcpu_init { > __u32 features[7]; > }; > > +/* Vector length set for KVM_ARM_SVE_CONFIG */ > +struct kvm_sve_vls { > + __u16 cmd; > + __u16 max_vq; > + __u16 _reserved[2]; > + __u64 required_vqs[__KERNEL_DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 64)]; > +}; > + > +/* values for cmd: */ > +#define KVM_ARM_SVE_CONFIG_QUERY 0 /* query what the host can support */ > +#define KVM_ARM_SVE_CONFIG_SET 1 /* enable SVE for vcpu and set VLs */ > +#define KVM_ARM_SVE_CONFIG_GET 2 /* read the set of VLs for a vcpu */ > + > struct kvm_sregs { > }; > > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c > index 331b85e..d96145a 100644 > --- a/arch/arm64/kvm/guest.c > +++ b/arch/arm64/kvm/guest.c > @@ -26,6 +26,9 @@ > #include <linux/module.h> > #include <linux/vmalloc.h> > #include <linux/fs.h> > +#include <linux/slab.h> > +#include <linux/string.h> > +#include <linux/types.h> > #include <kvm/arm_psci.h> > #include <asm/cputype.h> > #include <linux/uaccess.h> > @@ -56,6 +59,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > return 0; > } > > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) > +{ > + kfree(vcpu->arch.sve_state); > +} > + > static u64 core_reg_offset_from_id(u64 id) > { > return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); > @@ -546,10 +554,164 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) > return 0; > } > > +#define VQS_PER_U64 64 > +#define vq_word(vqs, vq) (&(vqs)[((vq) - SVE_VQ_MIN) / VQS_PER_U64]) > +#define vq_mask(vq) ((u64)1 << (((vq) - SVE_VQ_MIN) % VQS_PER_U64)) > + > +static void set_vq(u64 *vqs, unsigned int vq) > +{ > + *vq_word(vqs, vq) |= vq_mask(vq); > +} > + > +static bool vq_set(const u64 *vqs, unsigned int vq) > +{ > + return *vq_word(vqs, vq) & vq_mask(vq); > +} > + > +static int kvm_vcpu_set_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls *vls, > + struct kvm_sve_vls __user *userp) > +{ > + unsigned int vq, max_vq; > + int ret; > + > + if (vcpu->arch.has_run_once || vcpu_has_sve(vcpu)) > + return -EBADFD; /* too late, or already configured */ > + > + BUG_ON(vcpu->arch.sve_max_vl || vcpu->arch.sve_state); > + > + if (vls->max_vq < SVE_VQ_MIN || vls->max_vq > SVE_VQ_MAX) > + return -EINVAL; > + > + max_vq = 0; > + for (vq = SVE_VQ_MIN; vq <= vls->max_vq; ++vq) { > + bool available = sve_vq_available(vq); > + bool required = vq_set(vls->required_vqs, vq); > + > + if (required != available) > + break; > + > + if (required) > + max_vq = vq; > + } > + > + if (max_vq < SVE_VQ_MIN) > + return -EINVAL; > + > + vls->max_vq = max_vq; > + ret = put_user(vls->max_vq, &userp->max_vq); > + if (ret) > + return ret; > + > + /* > + * kvm_reset_vcpu() may already have run in KVM_VCPU_INIT, so we > + * rely on kzalloc() being sufficient to reset the guest SVE > + * state here for a new vcpu. > + * > + * Subsequent resets after vcpu initialisation are handled by > + * kvm_reset_sve(). > + */ > + vcpu->arch.sve_state = kzalloc(SVE_SIG_REGS_SIZE(vls->max_vq), > + GFP_KERNEL); > + if (!vcpu->arch.sve_state) > + return -ENOMEM; > + > + vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; > + vcpu->arch.sve_max_vl = sve_vl_from_vq(vls->max_vq); > + > + return 0; > +} > + > +static int __kvm_vcpu_query_sve_vls(struct kvm_sve_vls *vls, > + unsigned int max_vq, struct kvm_sve_vls __user *userp) > +{ > + unsigned int vq, max_available_vq; > + > + memset(&vls->required_vqs, 0, sizeof(vls->required_vqs)); > + > + BUG_ON(max_vq < SVE_VQ_MIN || max_vq > SVE_VQ_MAX); > + > + max_available_vq = 0; > + for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) > + if (sve_vq_available(vq)) { > + set_vq(vls->required_vqs, vq); > + max_available_vq = vq; > + } > + > + if (WARN_ON(max_available_vq < SVE_VQ_MIN)) > + return -EIO; > + > + vls->max_vq = max_available_vq; > + if (copy_to_user(userp, vls, sizeof(*vls))) > + return -EFAULT; > + > + return 0; > +} > + > +static int kvm_vcpu_query_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls *vls, > + struct kvm_sve_vls __user *userp) > +{ > + BUG_ON(!sve_vl_valid(sve_max_vl)); > + > + return __kvm_vcpu_query_sve_vls(vls, > + sve_vq_from_vl(sve_max_vl), userp); > +} > + > +static int kvm_vcpu_get_sve_vls(struct kvm_vcpu *vcpu, struct kvm_sve_vls *vls, > + struct kvm_sve_vls __user *userp) > +{ > + if (!vcpu_has_sve(vcpu)) > + return -EBADFD; /* not configured yet */ > + > + BUG_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)); > + > + return __kvm_vcpu_query_sve_vls(vls, > + sve_vq_from_vl(vcpu->arch.sve_max_vl), userp); > +} > + > +static int kvm_vcpu_sve_config(struct kvm_vcpu *vcpu, > + struct kvm_sve_vls __user *userp) > +{ > + struct kvm_sve_vls vls; > + > + if (!kvm_sve_supported()) > + return -EINVAL; > + > + if (copy_from_user(&vls, userp, sizeof(vls))) > + return -EFAULT; > + > + /* > + * For forwards compatibility, flush any set bits in _reserved[] > + * to tell userspace that we didn't look at them: > + */ > + memset(&vls._reserved, 0, sizeof vls._reserved); > + > + switch (vls.cmd) { > + case KVM_ARM_SVE_CONFIG_QUERY: > + return kvm_vcpu_query_sve_vls(vcpu, &vls, userp); > + > + case KVM_ARM_SVE_CONFIG_SET: > + return kvm_vcpu_set_sve_vls(vcpu, &vls, userp); > + > + case KVM_ARM_SVE_CONFIG_GET: > + return kvm_vcpu_get_sve_vls(vcpu, &vls, userp); > + > + default: > + return -EINVAL; > + } > +} > + > int kvm_arm_arch_vcpu_ioctl(struct kvm_vcpu *vcpu, > unsigned int ioctl, unsigned long arg) > { > - return -EINVAL; > + void __user *userp = (void __user *)arg; > + > + switch (ioctl) { > + case KVM_ARM_SVE_CONFIG: > + return kvm_vcpu_sve_config(vcpu, userp); > + > + default: > + return -EINVAL; > + } > } > > int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c > index e37c78b..c2edcde 100644 > --- a/arch/arm64/kvm/reset.c > +++ b/arch/arm64/kvm/reset.c > @@ -19,10 +19,12 @@ > * along with this program. If not, see <http://www.gnu.org/licenses/>. > */ > > +#include <linux/atomic.h> > #include <linux/errno.h> > #include <linux/kvm_host.h> > #include <linux/kvm.h> > #include <linux/hw_breakpoint.h> > +#include <linux/string.h> > > #include <kvm/arm_arch_timer.h> > > @@ -54,6 +56,31 @@ static bool cpu_has_32bit_el1(void) > return !!(pfr0 & 0x20); > } > > +#ifdef CONFIG_ARM64_SVE > +bool kvm_sve_supported(void) > +{ > + static bool warn_printed = false; > + > + if (!system_supports_sve()) > + return false; > + > + /* > + * For now, consider the hardware broken if implementation > + * differences between CPUs in the system result in the set of > + * vector lengths safely virtualisable for guests being less > + * than the set provided to userspace: > + */ > + if (sve_max_virtualisable_vl != sve_max_vl) { > + if (!xchg(&warn_printed, true)) > + kvm_err("Hardware SVE implementations > mismatched: suppressing SVE for guests."); This seems like you are re-inventing WARN_ONCE for the sake of having "kvm [%i]: " in your printk string. > + > + return false; > + } > + > + return true; > +} > +#endif > + > /** > * kvm_arch_dev_ioctl_check_extension > * > @@ -85,6 +112,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_VCPU_EVENTS: > r = 1; > break; > + case KVM_CAP_ARM_SVE: > + r = kvm_sve_supported(); > + break; For debugging we actually use the return value to indicate how many WP/BPs we have. We could do the same here for max number of VQs but I guess KVM_ARM_SVE_CONFIG_QUERY reports a much richer set of information. However this does beg the question of how useful all this extra information is to the guest program? A dumber implementation would be: QEMU | Kernel KVM_CAP_ARM_SVE ---------> Max VQ=n VQ/0 <--------- We want n < max VQ KVM_ARM_SVE_CONFIG(n) ----> Unsupported VQ EINVAL <-------- Weird HW can't support our choice of n. Give up or try another value. KVM_ARM_SVE_CONFIG(n-1) ---> That's OK 0 (OK) <--------- It imposes more heavy lifting on the userspace side of things but I would expect the "normal" case would be sane hardware supports all VLs from Max VQ to 1. And for cases where it doesn't iterating through several KVM_ARM_SVE_CONFIG steps is a start-up cost not a runtime one. This would mean one capability and one SVE_CONFIG sub-command with a single parameter. Would could always add the extend the interface later but I wonder if we are gold plating the API too early here? What do the maintainers thing? > default: > r = 0; > } > @@ -92,6 +122,21 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) > return r; > } > > +int kvm_reset_sve(struct kvm_vcpu *vcpu) > +{ > + if (!vcpu_has_sve(vcpu)) > + return 0; > + > + if (WARN_ON(!vcpu->arch.sve_state || > + !sve_vl_valid(vcpu->arch.sve_max_vl))) > + return -EIO; For some reason using WARN_ON for side effects seems sketchy but while BUG_ON can compile away to nothing it seems WARN_ON has been designed to always give you a result of the condition so never mind... > + > + memset(vcpu->arch.sve_state, 0, > + SVE_SIG_REGS_SIZE(sve_vq_from_vl(vcpu->arch.sve_max_vl))); > + > + return 0; > +} > + > /** > * kvm_reset_vcpu - sets core registers and sys_regs to reset value > * @vcpu: The VCPU pointer > @@ -103,6 +148,7 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) > int kvm_reset_vcpu(struct kvm_vcpu *vcpu) > { > const struct kvm_regs *cpu_reset; > + int ret; > > switch (vcpu->arch.target) { > default: > @@ -120,6 +166,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) > /* Reset core registers */ > memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); > > + ret = kvm_reset_sve(vcpu); > + if (ret) > + return ret; > + > /* Reset system registers */ > kvm_reset_sys_regs(vcpu); > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 7c3c5cc..488ca56 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -953,6 +953,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_NESTED_STATE 157 > #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 > #define KVM_CAP_MSR_PLATFORM_INFO 159 > +#define KVM_CAP_ARM_SVE 160 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1400,6 +1401,9 @@ struct kvm_enc_region { > #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) > #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) > > +/* Available with KVM_CAP_ARM_SVE */ > +#define KVM_ARM_SVE_CONFIG _IOWR(KVMIO, 0xc0, struct kvm_sve_vls) > + > /* Secure Encrypted Virtualization command */ > enum sev_cmd_id { > /* Guest initialization commands */ -- Alex Bennée _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm