On Fri, 11 Feb 2022 19:22:07 +0100 Janis Schoetterl-Glausch <scgl@xxxxxxxxxxxxx> wrote: > Storage key checking had not been implemented for instructions emulated > by KVM. Implement it by enhancing the functions used for guest access, > in particular those making use of access_guest which has been renamed > to access_guest_with_key. > Accesses via access_guest_real should not be key checked. > > For actual accesses, key checking is done by > copy_from/to_user_key (which internally uses MVCOS/MVCP/MVCS). > In cases where accessibility is checked without an actual access, > this is performed by getting the storage key and checking if the access > key matches. In both cases, if applicable, storage and fetch protection > override are honored. > > Signed-off-by: Janis Schoetterl-Glausch <scgl@xxxxxxxxxxxxx> > Reviewed-by: Janosch Frank <frankja@xxxxxxxxxxxxx> > Reviewed-by: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx> Reviewed-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/ctl_reg.h | 2 + > arch/s390/include/asm/page.h | 2 + > arch/s390/kvm/gaccess.c | 187 ++++++++++++++++++++++++++++++-- > arch/s390/kvm/gaccess.h | 77 +++++++++++-- > arch/s390/kvm/intercept.c | 12 +- > arch/s390/kvm/kvm-s390.c | 4 +- > 6 files changed, 253 insertions(+), 31 deletions(-) > > diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h > index 04dc65f8901d..c800199a376b 100644 > --- a/arch/s390/include/asm/ctl_reg.h > +++ b/arch/s390/include/asm/ctl_reg.h > @@ -12,6 +12,8 @@ > > #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) > #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) > +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) > +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) > #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) > #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) > #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) > diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h > index d98d17a36c7b..cfc4d6fb2385 100644 > --- a/arch/s390/include/asm/page.h > +++ b/arch/s390/include/asm/page.h > @@ -20,6 +20,8 @@ > #define PAGE_SIZE _PAGE_SIZE > #define PAGE_MASK _PAGE_MASK > #define PAGE_DEFAULT_ACC 0 > +/* storage-protection override */ > +#define PAGE_SPO_ACC 9 > #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) > > #define HPAGE_SHIFT 20 > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > index 4460808c3b9a..7fca0cff4c12 100644 > --- a/arch/s390/kvm/gaccess.c > +++ b/arch/s390/kvm/gaccess.c > @@ -10,6 +10,7 @@ > #include <linux/mm_types.h> > #include <linux/err.h> > #include <linux/pgtable.h> > +#include <linux/bitfield.h> > > #include <asm/gmap.h> > #include "kvm-s390.h" > @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, > return 1; > } > > +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, > + union asce asce) > +{ > + psw_t *psw = &vcpu->arch.sie_block->gpsw; > + unsigned long override; > + > + if (mode == GACC_FETCH || mode == GACC_IFETCH) { > + /* check if fetch protection override enabled */ > + override = vcpu->arch.sie_block->gcr[0]; > + override &= CR0_FETCH_PROTECTION_OVERRIDE; > + /* not applicable if subject to DAT && private space */ > + override = override && !(psw_bits(*psw).dat && asce.p); > + return override; > + } > + return false; > +} > + > +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) > +{ > + return ga < 2048 && ga + len <= 2048; > +} > + > +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) > +{ > + /* check if storage protection override enabled */ > + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; > +} > + > +static bool storage_prot_override_applies(u8 access_control) > +{ > + /* matches special storage protection override key (9) -> allow */ > + return access_control == PAGE_SPO_ACC; > +} > + > +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, > + enum gacc_mode mode, union asce asce, gpa_t gpa, > + unsigned long ga, unsigned int len) > +{ > + u8 storage_key, access_control; > + unsigned long hva; > + int r; > + > + /* access key 0 matches any storage key -> allow */ > + if (access_key == 0) > + return 0; > + /* > + * caller needs to ensure that gfn is accessible, so we can > + * assume that this cannot fail > + */ > + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); > + mmap_read_lock(current->mm); > + r = get_guest_storage_key(current->mm, hva, &storage_key); > + mmap_read_unlock(current->mm); > + if (r) > + return r; > + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); > + /* access key matches storage key -> allow */ > + if (access_control == access_key) > + return 0; > + if (mode == GACC_FETCH || mode == GACC_IFETCH) { > + /* it is a fetch and fetch protection is off -> allow */ > + if (!(storage_key & _PAGE_FP_BIT)) > + return 0; > + if (fetch_prot_override_applicable(vcpu, mode, asce) && > + fetch_prot_override_applies(ga, len)) > + return 0; > + } > + if (storage_prot_override_applicable(vcpu) && > + storage_prot_override_applies(access_control)) > + return 0; > + return PGM_PROTECTION; > +} > + > /** > * guest_range_to_gpas() - Calculate guest physical addresses of page fragments > * covering a logical range > @@ -804,6 +878,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, > * @len: length of range in bytes > * @asce: address-space-control element to use for translation > * @mode: access mode > + * @access_key: access key to mach the range's storage keys against > * > * Translate a logical range to a series of guest absolute addresses, > * such that the concatenation of page fragments starting at each gpa make up > @@ -830,7 +905,8 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, > */ > static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > unsigned long *gpas, unsigned long len, > - const union asce asce, enum gacc_mode mode) > + const union asce asce, enum gacc_mode mode, > + u8 access_key) > { > psw_t *psw = &vcpu->arch.sie_block->gpsw; > unsigned int offset = offset_in_page(ga); > @@ -857,6 +933,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > } > if (rc) > return trans_exc(vcpu, rc, ga, ar, mode, prot); > + rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, > + fragment_len); > + if (rc) > + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); > if (gpas) > *gpas++ = gpa; > offset = 0; > @@ -880,16 +960,54 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, > return rc; > } > > -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > - unsigned long len, enum gacc_mode mode) > +static int > +access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, > + void *data, unsigned int len, u8 access_key) > +{ > + struct kvm_memory_slot *slot; > + bool writable; > + gfn_t gfn; > + hva_t hva; > + int rc; > + > + gfn = gpa >> PAGE_SHIFT; > + slot = gfn_to_memslot(kvm, gfn); > + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); > + > + if (kvm_is_error_hva(hva)) > + return PGM_ADDRESSING; > + /* > + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). > + * Don't try to actually handle that case. > + */ > + if (!writable && mode == GACC_STORE) > + return -EOPNOTSUPP; > + hva += offset_in_page(gpa); > + if (mode == GACC_STORE) > + rc = copy_to_user_key((void __user *)hva, data, len, access_key); > + else > + rc = copy_from_user_key(data, (void __user *)hva, len, access_key); > + if (rc) > + return PGM_PROTECTION; > + if (mode == GACC_STORE) > + mark_page_dirty_in_slot(kvm, slot, gfn); > + return 0; > +} > + > +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > + void *data, unsigned long len, enum gacc_mode mode, > + u8 access_key) > { > psw_t *psw = &vcpu->arch.sie_block->gpsw; > unsigned long nr_pages, idx; > unsigned long gpa_array[2]; > unsigned int fragment_len; > unsigned long *gpas; > + enum prot_type prot; > int need_ipte_lock; > union asce asce; > + bool try_storage_prot_override; > + bool try_fetch_prot_override; > int rc; > > if (!len) > @@ -904,16 +1022,47 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); > if (!gpas) > return -ENOMEM; > + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); > + try_storage_prot_override = storage_prot_override_applicable(vcpu); > need_ipte_lock = psw_bits(*psw).dat && !asce.r; > if (need_ipte_lock) > ipte_lock(vcpu); > - rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); > - for (idx = 0; idx < nr_pages && !rc; idx++) { > + /* > + * Since we do the access further down ultimately via a move instruction > + * that does key checking and returns an error in case of a protection > + * violation, we don't need to do the check during address translation. > + * Skip it by passing access key 0, which matches any storage key, > + * obviating the need for any further checks. As a result the check is > + * handled entirely in hardware on access, we only need to take care to > + * forego key protection checking if fetch protection override applies or > + * retry with the special key 9 in case of storage protection override. > + */ > + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); > + if (rc) > + goto out_unlock; > + for (idx = 0; idx < nr_pages; idx++) { > fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); > - rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len); > + if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { > + rc = access_guest_page(vcpu->kvm, mode, gpas[idx], > + data, fragment_len); > + } else { > + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], > + data, fragment_len, access_key); > + } > + if (rc == PGM_PROTECTION && try_storage_prot_override) > + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], > + data, fragment_len, PAGE_SPO_ACC); > + if (rc == PGM_PROTECTION) > + prot = PROT_TYPE_KEYC; > + if (rc) > + break; > len -= fragment_len; > data += fragment_len; > + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); > } > + if (rc > 0) > + rc = trans_exc(vcpu, rc, ga, ar, mode, prot); > +out_unlock: > if (need_ipte_lock) > ipte_unlock(vcpu); > if (nr_pages > ARRAY_SIZE(gpa_array)) > @@ -940,12 +1089,13 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > } > > /** > - * guest_translate_address - translate guest logical into guest absolute address > + * guest_translate_address_with_key - translate guest logical into guest absolute address > * @vcpu: virtual cpu > * @gva: Guest virtual address > * @ar: Access register > * @gpa: Guest physical address > * @mode: Translation access mode > + * @access_key: access key to mach the storage key with > * > * Parameter semantics are the same as the ones from guest_translate. > * The memory contents at the guest address are not changed. > @@ -953,8 +1103,9 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > * Note: The IPTE lock is not taken during this function, so the caller > * has to take care of this. > */ > -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > - unsigned long *gpa, enum gacc_mode mode) > +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > + unsigned long *gpa, enum gacc_mode mode, > + u8 access_key) > { > union asce asce; > int rc; > @@ -963,7 +1114,17 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); > if (rc) > return rc; > - return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode); > + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, > + access_key); > +} > + > +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > + unsigned long *gpa, enum gacc_mode mode) > +{ > + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; > + > + return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode, > + access_key); > } > > /** > @@ -973,9 +1134,10 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > * @ar: Access register > * @length: Length of test range > * @mode: Translation access mode > + * @access_key: access key to mach the storage keys with > */ > int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > - unsigned long length, enum gacc_mode mode) > + unsigned long length, enum gacc_mode mode, u8 access_key) > { > union asce asce; > int rc = 0; > @@ -984,7 +1146,8 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > if (rc) > return rc; > ipte_lock(vcpu); > - rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode); > + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, > + access_key); > ipte_unlock(vcpu); > > return rc; > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > index 7c72a5e3449f..e5b2f56e7962 100644 > --- a/arch/s390/kvm/gaccess.h > +++ b/arch/s390/kvm/gaccess.h > @@ -186,24 +186,31 @@ enum gacc_mode { > GACC_IFETCH, > }; > > +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > + unsigned long *gpa, enum gacc_mode mode, > + u8 access_key); > + > int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, > u8 ar, unsigned long *gpa, enum gacc_mode mode); > + > int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, > - unsigned long length, enum gacc_mode mode); > + unsigned long length, enum gacc_mode mode, u8 access_key); > > -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > - unsigned long len, enum gacc_mode mode); > +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > + void *data, unsigned long len, enum gacc_mode mode, > + u8 access_key); > > int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > void *data, unsigned long len, enum gacc_mode mode); > > /** > - * write_guest - copy data from kernel space to guest space > + * write_guest_with_key - copy data from kernel space to guest space > * @vcpu: virtual cpu > * @ga: guest address > * @ar: access register > * @data: source address in kernel space > * @len: number of bytes to copy > + * @access_key: access key the storage key needs to match > * > * Copy @len bytes from @data (kernel space) to @ga (guest address). > * In order to copy data to guest space the PSW of the vcpu is inspected: > @@ -214,8 +221,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > * The addressing mode of the PSW is also inspected, so that address wrap > * around is taken into account for 24-, 31- and 64-bit addressing mode, > * if the to be copied data crosses page boundaries in guest address space. > - * In addition also low address and DAT protection are inspected before > - * copying any data (key protection is currently not implemented). > + * In addition low address, DAT and key protection checks are performed before > + * copying any data. > * > * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. > * In case of an access exception (e.g. protection exception) pgm will contain > @@ -243,10 +250,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, > * if data has been changed in guest space in case of an exception. > */ > static inline __must_check > +int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > + void *data, unsigned long len, u8 access_key) > +{ > + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE, > + access_key); > +} > + > +/** > + * write_guest - copy data from kernel space to guest space > + * @vcpu: virtual cpu > + * @ga: guest address > + * @ar: access register > + * @data: source address in kernel space > + * @len: number of bytes to copy > + * > + * The behaviour of write_guest is identical to write_guest_with_key, except > + * that the PSW access key is used instead of an explicit argument. > + */ > +static inline __must_check > int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > unsigned long len) > { > - return access_guest(vcpu, ga, ar, data, len, GACC_STORE); > + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; > + > + return write_guest_with_key(vcpu, ga, ar, data, len, access_key); > +} > + > +/** > + * read_guest_with_key - copy data from guest space to kernel space > + * @vcpu: virtual cpu > + * @ga: guest address > + * @ar: access register > + * @data: destination address in kernel space > + * @len: number of bytes to copy > + * @access_key: access key the storage key needs to match > + * > + * Copy @len bytes from @ga (guest address) to @data (kernel space). > + * > + * The behaviour of read_guest_with_key is identical to write_guest_with_key, > + * except that data will be copied from guest space to kernel space. > + */ > +static inline __must_check > +int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, > + void *data, unsigned long len, u8 access_key) > +{ > + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH, > + access_key); > } > > /** > @@ -259,14 +309,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > * > * Copy @len bytes from @ga (guest address) to @data (kernel space). > * > - * The behaviour of read_guest is identical to write_guest, except that > - * data will be copied from guest space to kernel space. > + * The behaviour of read_guest is identical to read_guest_with_key, except > + * that the PSW access key is used instead of an explicit argument. > */ > static inline __must_check > int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, > unsigned long len) > { > - return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); > + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; > + > + return read_guest_with_key(vcpu, ga, ar, data, len, access_key); > } > > /** > @@ -287,7 +339,10 @@ static inline __must_check > int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, > unsigned long len) > { > - return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); > + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; > + > + return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH, > + access_key); > } > > /** > diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c > index d07ff646d844..8bd42a20d924 100644 > --- a/arch/s390/kvm/intercept.c > +++ b/arch/s390/kvm/intercept.c > @@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) > > kvm_s390_get_regs_rre(vcpu, ®1, ®2); > > - /* Make sure that the source is paged-in */ > - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], > - reg2, &srcaddr, GACC_FETCH); > + /* Ensure that the source is paged-in, no actual access -> no key checking */ > + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2], > + reg2, &srcaddr, GACC_FETCH, 0); > if (rc) > return kvm_s390_inject_prog_cond(vcpu, rc); > rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); > if (rc != 0) > return rc; > > - /* Make sure that the destination is paged-in */ > - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], > - reg1, &dstaddr, GACC_STORE); > + /* Ensure that the source is paged-in, no actual access -> no key checking */ > + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1], > + reg1, &dstaddr, GACC_STORE, 0); > if (rc) > return kvm_s390_inject_prog_cond(vcpu, rc); > rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 2296b1ff1e02..fdbd6c1dc709 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -4713,7 +4713,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > case KVM_S390_MEMOP_LOGICAL_READ: > if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { > r = check_gva_range(vcpu, mop->gaddr, mop->ar, > - mop->size, GACC_FETCH); > + mop->size, GACC_FETCH, 0); > break; > } > r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); > @@ -4725,7 +4725,7 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > case KVM_S390_MEMOP_LOGICAL_WRITE: > if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { > r = check_gva_range(vcpu, mop->gaddr, mop->ar, > - mop->size, GACC_STORE); > + mop->size, GACC_STORE, 0); > break; > } > if (copy_from_user(tmpbuf, uaddr, mop->size)) {