On 1/20/22 09:50, Christian Borntraeger wrote: > > > Am 20.01.22 um 09:11 schrieb Janis Schoetterl-Glausch: >> On 1/19/22 20:27, Christian Borntraeger wrote: >>> Am 18.01.22 um 10:52 schrieb Janis Schoetterl-Glausch: >>>> Storage key checking had not been implemented for instructions emulated >>>> by KVM. Implement it by enhancing the functions used for guest access, >>>> in particular those making use of access_guest which has been renamed >>>> to access_guest_with_key. >>>> Accesses via access_guest_real should not be key checked. >>>> >>>> For actual accesses, key checking is done by __copy_from/to_user_with_key >>>> (which internally uses MVCOS/MVCP/MVCS). >>>> In cases where accessibility is checked without an actual access, >>>> this is performed by getting the storage key and checking >>>> if the access key matches. >>>> In both cases, if applicable, storage and fetch protection override >>>> are honored. >>>> >>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@xxxxxxxxxxxxx> >>>> --- >>>> arch/s390/include/asm/ctl_reg.h | 2 + >>>> arch/s390/include/asm/page.h | 2 + >>>> arch/s390/kvm/gaccess.c | 174 +++++++++++++++++++++++++++++--- >>>> arch/s390/kvm/gaccess.h | 78 ++++++++++++-- >>>> arch/s390/kvm/intercept.c | 12 +-- >>>> arch/s390/kvm/kvm-s390.c | 4 +- >>>> 6 files changed, 241 insertions(+), 31 deletions(-) >>>> >>>> diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h >>>> index 04dc65f8901d..c800199a376b 100644 >>>> --- a/arch/s390/include/asm/ctl_reg.h >>>> +++ b/arch/s390/include/asm/ctl_reg.h >>>> @@ -12,6 +12,8 @@ >>>> #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) >>>> #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) >>>> +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) >>>> +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) >>>> #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) >>>> #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) >>>> #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) >>>> diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h >>>> index d98d17a36c7b..cfc4d6fb2385 100644 >>>> --- a/arch/s390/include/asm/page.h >>>> +++ b/arch/s390/include/asm/page.h >>>> @@ -20,6 +20,8 @@ >>>> #define PAGE_SIZE _PAGE_SIZE >>>> #define PAGE_MASK _PAGE_MASK >>>> #define PAGE_DEFAULT_ACC 0 >>>> +/* storage-protection override */ >>>> +#define PAGE_SPO_ACC 9 >>>> #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) >>>> #define HPAGE_SHIFT 20 >>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c >>>> index 4460808c3b9a..92ab96d55504 100644 >>>> --- a/arch/s390/kvm/gaccess.c >>>> +++ b/arch/s390/kvm/gaccess.c >>>> @@ -10,6 +10,7 @@ >>>> #include <linux/mm_types.h> >>>> #include <linux/err.h> >>>> #include <linux/pgtable.h> >>>> +#include <linux/bitfield.h> >>>> #include <asm/gmap.h> >>>> #include "kvm-s390.h" >>>> @@ -794,6 +795,79 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, >>>> return 1; >>>> } >>>> +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, >>>> + union asce asce) >>>> +{ >>>> + psw_t *psw = &vcpu->arch.sie_block->gpsw; >>>> + unsigned long override; >>>> + >>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) { >>>> + /* check if fetch protection override enabled */ >>>> + override = vcpu->arch.sie_block->gcr[0]; >>>> + override &= CR0_FETCH_PROTECTION_OVERRIDE; >>>> + /* not applicable if subject to DAT && private space */ >>>> + override = override && !(psw_bits(*psw).dat && asce.p); >>>> + return override; >>>> + } >>>> + return false; >>>> +} >>>> + >>>> +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) >>>> +{ >>>> + return ga < 2048 && ga + len <= 2048; >>>> +} >>>> + >>>> +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) >>>> +{ >>>> + /* check if storage protection override enabled */ >>>> + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; >>>> +} >>>> + >>>> +static bool storage_prot_override_applies(char access_control) >>>> +{ >>>> + /* matches special storage protection override key (9) -> allow */ >>>> + return access_control == PAGE_SPO_ACC; >>>> +} >>>> + >>>> +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, char access_key, >>>> + enum gacc_mode mode, union asce asce, gpa_t gpa, >>>> + unsigned long ga, unsigned int len) >>>> +{ >>>> + unsigned char storage_key, access_control; >>>> + unsigned long hva; >>>> + int r; >>>> + >>>> + /* access key 0 matches any storage key -> allow */ >>>> + if (access_key == 0) >>>> + return 0; >>>> + /* >>>> + * caller needs to ensure that gfn is accessible, so we can >>>> + * assume that this cannot fail >>>> + */ >>>> + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); >>>> + mmap_read_lock(current->mm); >>>> + r = get_guest_storage_key(current->mm, hva, &storage_key); >>>> + mmap_read_unlock(current->mm); >>>> + if (r) >>>> + return r; >>>> + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); >>>> + /* access key matches storage key -> allow */ >>>> + if (access_control == access_key) >>>> + return 0; >>>> + if (mode == GACC_FETCH || mode == GACC_IFETCH) { >>>> + /* mismatching keys, no fetch protection -> allowed */ >>>> + if (!(storage_key & _PAGE_FP_BIT)) >>>> + return 0; >>>> + if (fetch_prot_override_applicable(vcpu, mode, asce)) >>>> + if (fetch_prot_override_applies(ga, len)) >>>> + return 0; >>>> + } >>>> + if (storage_prot_override_applicable(vcpu)) >>>> + if (storage_prot_override_applies(access_control)) >>>> + return 0; >>>> + return PGM_PROTECTION; >>>> +} >>> >>> This function is just a pre-check (and early-exit) and we do an additional final check >>> in the MVCOS routing later on, correct? It might actually be faster to get rid of this >> >> No, this exists for those cases that do not do an actual access, that is MEMOPs with >> the check only flag, as well as the TEST PROTECTION emulation. access_guest_with_key >> passes key 0 so we take the early return. It's easy to miss so Janosch suggested a comment there. > > Dont we always call it in guest_range_to_gpas, which is also called for the memory access in > access_guest_with_key? @@ -904,16 +1018,37 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); if (!gpas) return -ENOMEM; + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); + try_storage_prot_override = storage_prot_override_applicable(vcpu); need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); - for (idx = 0; idx < nr_pages && !rc; idx++) { + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); Yes, but the key is 0 in that case, so we don't do any key checking. ^ > >> >>> pre-test and simply rely on MVCOS. MVCOS is usually just some cycles while ISKE to read >>> the key is really slow path and take hundreds of cycles. This would even simplify the >>> patch (assuming that we do proper key checking all the time). >>