On 03/02/2015 13:11, Thomas Huth wrote: > On s390, we've got to make sure to hold the IPTE lock while accessing > virtual memory. So let's add an ioctl for reading and writing virtual > memory to provide this feature for userspace, too. > > Signed-off-by: Thomas Huth <thuth@xxxxxxxxxxxxxxxxxx> > Reviewed-by: Dominik Dingel <dingel@xxxxxxxxxxxxxxxxxx> > Reviewed-by: David Hildenbrand <dahi@xxxxxxxxxxxxxxxxxx> > --- > Documentation/virtual/kvm/api.txt | 44 +++++++++++++++++++++++++ > arch/s390/kvm/gaccess.c | 22 +++++++++++++ > arch/s390/kvm/gaccess.h | 2 + > arch/s390/kvm/kvm-s390.c | 63 +++++++++++++++++++++++++++++++++++++ > include/uapi/linux/kvm.h | 21 ++++++++++++ > 5 files changed, 152 insertions(+), 0 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index b112efc..bf44b53 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -2716,6 +2716,50 @@ The fields in each entry are defined as follows: > eax, ebx, ecx, edx: the values returned by the cpuid instruction for > this function/index combination > > +4.89 KVM_GUEST_MEM_OP > + > +Capability: KVM_CAP_MEM_OP Put "virtual" somewhere in the ioctl name and capability? > +Architectures: s390 > +Type: vcpu ioctl > +Parameters: struct kvm_guest_mem_op (in) > +Returns: = 0 on success, > + < 0 on generic error (e.g. -EFAULT or -ENOMEM), > + > 0 if an exception occurred while walking the page tables > + > +Read or write data from/to the virtual memory of a VPCU. > + > +Parameters are specified via the following structure: > + > +struct kvm_guest_mem_op { > + __u64 gaddr; /* the guest address */ > + __u64 flags; /* arch specific flags */ > + __u32 size; /* amount of bytes */ > + __u32 op; /* type of operation */ > + __u64 buf; /* buffer in userspace */ > + __u8 reserved[32]; /* should be set to 0 */ > +}; > + > +The type of operation is specified in the "op" field, either KVM_MEMOP_VIRTREAD > +for reading from memory, KVM_MEMOP_VIRTWRITE for writing to memory, or > +KVM_MEMOP_CHECKVIRTREAD or KVM_MEMOP_CHECKVIRTWRITE to check whether the Better: #define KVM_MEMOP_READ 0 #define KVM_MEMOP_WRITE 1 and in the flags field: #define KVM_MEMOP_F_CHECK_ONLY (1 << 1) > +corresponding memory access would create an access exception (without > +changing the data in the memory at the destination). In case an access > +exception occurred while walking the MMU tables of the guest, the ioctl > +returns a positive error number to indicate the type of exception. The > +exception is raised directly at the corresponding VCPU if the bit > +KVM_MEMOP_F_INJECT_EXC is set in the "flags" field. KVM_MEMOP_F_INJECT_EXCEPTION. > +The logical (virtual) start address of the memory region has to be specified > +in the "gaddr" field, and the length of the region in the "size" field. > +"buf" is the buffer supplied by the userspace application where the read data > +should be written to for KVM_MEMOP_VIRTREAD, or where the data that should > +be written is stored for a KVM_MEMOP_VIRTWRITE. "buf" can be NULL for both > +CHECK operations. "buf" is unused and can be NULL for both CHECK operations. > +The "reserved" field is meant for future extensions. It must currently be > +set to 0. Not really true, as you don't check it. So "It is not used by KVM with the currently defined set of flags" is a better explanation. Paolo > + > 5. The kvm_run structure > ------------------------ > > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > index 8a1be90..d912362 100644 > --- a/arch/s390/kvm/gaccess.c > +++ b/arch/s390/kvm/gaccess.c > @@ -697,6 +697,28 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, > } > > /** > + * check_gva_range - test a range of guest virtual addresses for accessibility > + */ > +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, > + unsigned long length, int is_write) > +{ > + unsigned long gpa; > + unsigned long currlen; > + int rc = 0; > + > + ipte_lock(vcpu); > + while (length > 0 && !rc) { > + currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); > + rc = guest_translate_address(vcpu, gva, &gpa, is_write); > + gva += currlen; > + length -= currlen; > + } > + ipte_unlock(vcpu); > + > + return rc; > +} > + > +/** > * kvm_s390_check_low_addr_protection - check for low-address protection > * @ga: Guest address > * > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > index 0149cf1..268beb7 100644 > --- a/arch/s390/kvm/gaccess.h > +++ b/arch/s390/kvm/gaccess.h > @@ -157,6 +157,8 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, > > int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, > unsigned long *gpa, int write); > +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, > + unsigned long length, int is_write); > > int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, > unsigned long len, int write); > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index b2371c0..c80a640 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -168,6 +168,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_VM_ATTRIBUTES: > case KVM_CAP_MP_STATE: > case KVM_CAP_S390_USER_SIGP: > + case KVM_CAP_MEM_OP: > r = 1; > break; > case KVM_CAP_NR_VCPUS: > @@ -1886,6 +1887,59 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, > return r; > } > > +static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > + struct kvm_guest_mem_op *mop) > +{ > + void __user *uaddr = (void __user *)mop->buf; > + void *tmpbuf = NULL; > + int r, srcu_idx; > + > + if ((mop->flags & ~KVM_MEMOP_F_INJECT_EXC) != 0) > + return -EINVAL; > + > + if (mop->op == KVM_MEMOP_VIRTREAD || mop->op == KVM_MEMOP_VIRTWRITE) { > + if (mop->size > 16 * PAGE_SIZE) > + return -E2BIG; > + tmpbuf = vmalloc(mop->size); > + if (!tmpbuf) > + return -ENOMEM; > + } > + > + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); > + > + switch (mop->op) { > + case KVM_MEMOP_VIRTREAD: > + r = read_guest(vcpu, mop->gaddr, tmpbuf, mop->size); > + if (r == 0) { > + if (copy_to_user(uaddr, tmpbuf, mop->size)) > + r = -EFAULT; > + } > + break; > + case KVM_MEMOP_VIRTWRITE: > + if (copy_from_user(tmpbuf, uaddr, mop->size)) { > + r = -EFAULT; > + break; > + } > + r = write_guest(vcpu, mop->gaddr, tmpbuf, mop->size); > + break; > + case KVM_MEMOP_CHECKVIRTREAD: > + case KVM_MEMOP_CHECKVIRTWRITE: > + r = check_gva_range(vcpu, mop->gaddr, mop->size, > + mop->op == KVM_MEMOP_CHECKVIRTWRITE); > + break; > + default: > + r = -EINVAL; > + } > + > + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); > + > + if (r > 0 && (mop->flags & KVM_MEMOP_F_INJECT_EXC) != 0) > + kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); > + > + vfree(tmpbuf); > + return r; > +} > + > long kvm_arch_vcpu_ioctl(struct file *filp, > unsigned int ioctl, unsigned long arg) > { > @@ -1985,6 +2039,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, > r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); > break; > } > + case KVM_GUEST_MEM_OP: { > + struct kvm_guest_mem_op mem_op; > + > + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) > + r = kvm_s390_guest_mem_op(vcpu, &mem_op); > + else > + r = -EFAULT; > + break; > + } > default: > r = -ENOTTY; > } > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 8055706..528a3d4 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -365,6 +365,24 @@ struct kvm_translation { > __u8 pad[5]; > }; > > +/* for KVM_GUEST_MEM_OP */ > +struct kvm_guest_mem_op { > + /* in */ > + __u64 gaddr; /* the guest address */ > + __u64 flags; /* arch specific flags */ > + __u32 size; /* amount of bytes */ > + __u32 op; /* type of operation */ > + __u64 buf; /* buffer in userspace */ > + __u8 reserved[32]; /* should be set to 0 */ > +}; > +/* types for kvm_guest_mem_op->op */ > +#define KVM_MEMOP_VIRTREAD 0 > +#define KVM_MEMOP_VIRTWRITE 1 > +#define KVM_MEMOP_CHECKVIRTREAD 2 > +#define KVM_MEMOP_CHECKVIRTWRITE 3 > +/* flags for kvm_guest_mem_op->flags */ > +#define KVM_MEMOP_F_INJECT_EXC 0x0001UL /* Inject exception on faults */ > + > /* for KVM_INTERRUPT */ > struct kvm_interrupt { > /* in */ > @@ -760,6 +778,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_PPC_ENABLE_HCALL 104 > #define KVM_CAP_CHECK_EXTENSION_VM 105 > #define KVM_CAP_S390_USER_SIGP 106 > +#define KVM_CAP_MEM_OP 107 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1135,6 +1154,8 @@ struct kvm_s390_ucas_mapping { > #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) > #define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) > #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) > +/* Available with KVM_CAP_MEM_OP */ > +#define KVM_GUEST_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_guest_mem_op) > > #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) > #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html