On 2/5/21 1:15 PM, Claudio Imbrenda wrote: > On Thu, 4 Feb 2021 17:34:00 +0100 > Janosch Frank <frankja@xxxxxxxxxxxxx> wrote: > >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote: >>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf >>> DAT table entry, or to the invalid entry. >>> >>> Also return some flags in the lower bits of the address: >>> DAT_PROT: indicates that DAT protection applies because of the >>> protection bit in the segment (or, if EDAT, region) tables >>> NOT_PTE: indicates that the address of the DAT table entry returned >>> does not refer to a PTE, but to a segment or region table. >>> >>> Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> >>> Cc: stable@xxxxxxxxxxxxxxx >>> --- >>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- >>> arch/s390/kvm/gaccess.h | 5 ++++- >>> arch/s390/kvm/vsie.c | 8 ++++---- >>> 3 files changed, 30 insertions(+), 9 deletions(-) >>> >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c >>> index 6d6b57059493..2d7bcbfb185e 100644 >>> --- a/arch/s390/kvm/gaccess.c >>> +++ b/arch/s390/kvm/gaccess.c >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rfte.val = ptr; >>> goto shadow_r2t; >>> } >>> + *pgt = ptr + vaddr.rfx * 8; >> >> So pgt either is a table entry if rc > 0 or a pointer to the first pte >> on rc == 0 after this change? > > yes > >> Hrm, if it is really based on RCs than I might be able to come to >> terms with having two things in a ptr with the name pgt. But it needs >> a comment change. > > will do. > >>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, >>> &rfte.val); if (rc) >>> return rc; >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rste.val = ptr; >>> goto shadow_r3t; >>> } >>> + *pgt = ptr + vaddr.rsx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, >>> &rste.val); if (rc) >>> return rc; >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rtte.val = ptr; >>> goto shadow_sgt; >>> } >>> + *pgt = ptr + vaddr.rtx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, >>> &rtte.val); if (rc) >>> return rc; >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, ste.val = ptr; >>> goto shadow_pgt; >>> } >>> + *pgt = ptr + vaddr.sx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, >>> &ste.val); if (rc) >>> return rc; >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, >>> * @vcpu: virtual cpu >>> * @sg: pointer to the shadow guest address space structure >>> * @saddr: faulting address in the shadow gmap >>> + * @pteptr: will contain the address of the faulting DAT table >>> entry, or of >>> + * the valid leaf, plus some flags >> >> pteptr is not the right name if it can be two things > > it cannot be two things there, kvm_s390_shadow_fault always returns a > DAT _entry_ (pte, segment, region). And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t *) as the name would suggest. > >>> * >>> * Returns: - 0 if the shadow fault was successfully resolved >>> * - > 0 (pgm exception code) on exceptions while >>> faulting @@ -1165,11 +1171,11 @@ static int >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >>> * - -ENOMEM if out of memory >>> */ >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, >>> - unsigned long saddr) >>> + unsigned long saddr, unsigned long >>> *pteptr) { >>> union vaddress vaddr; >>> union page_table_entry pte; >>> - unsigned long pgt; >>> + unsigned long pgt = 0; >>> int dat_protection, fake; >>> int rc; >>> >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; >>> goto shadow_page; >>> } >>> - if (!rc) >>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * >>> 8, &pte.val); + >>> + switch (rc) { >>> + case PGM_SEGMENT_TRANSLATION: >>> + case PGM_REGION_THIRD_TRANS: >>> + case PGM_REGION_SECOND_TRANS: >>> + case PGM_REGION_FIRST_TRANS: >>> + pgt |= NOT_PTE; >> >> GACC_TRANSL_ENTRY_INV ? > > no, this is only for non-pte entries > >>> + break; >>> + case 0: >>> + pgt += vaddr.px * 8; >>> + rc = gmap_read_table(sg->parent, pgt, &pte.val); >>> + } >>> + if (*pteptr) >>> + *pteptr = pgt | dat_protection * DAT_PROT; >>> if (!rc && pte.i) >>> rc = PGM_PAGE_TRANSLATION; >>> if (!rc && pte.z) >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h >>> index f4c51756c462..66a6e2cec97a 100644 >>> --- a/arch/s390/kvm/gaccess.h >>> +++ b/arch/s390/kvm/gaccess.h >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); >>> int ipte_lock_held(struct kvm_vcpu *vcpu); >>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, >>> unsigned long gra); >>> +#define DAT_PROT 2 >> >> GACC_TRANSL_ENTRY_PROT > > this is also only for non-pte entries > >>> +#define NOT_PTE 4 >>> + >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap >>> *shadow, >>> - unsigned long saddr); >>> + unsigned long saddr, unsigned long >>> *pteptr); >>> #endif /* __KVM_S390_GACCESS_H */ >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c >>> index c5d0a58b2c29..7db022141db3 100644 >>> --- a/arch/s390/kvm/vsie.c >>> +++ b/arch/s390/kvm/vsie.c >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at >>> offset *mso* */ prefix += scb_s->mso; >>> >>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); >>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, >>> NULL); if (!rc && (scb_s->ecb & ECB_TE)) >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - prefix + PAGE_SIZE); >>> + prefix + PAGE_SIZE, >>> NULL); /* >>> * We don't have to mprotect, we will be called for all >>> unshadows. >>> * SIE will detect if protection applies and trigger a >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page) >>> current->thread.gmap_addr, 1); >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - current->thread.gmap_addr); >>> + current->thread.gmap_addr, >>> NULL); if (rc > 0) { >>> rc = inject_fault(vcpu, rc, >>> current->thread.gmap_addr, >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu >>> *vcpu, { >>> if (vsie_page->fault_addr) >>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - vsie_page->fault_addr); >>> + vsie_page->fault_addr, >>> NULL); >> >> Ok >> >>> vsie_page->fault_addr = 0; >>> } >>> >>> >> >