Hi Marc, On Mon, Jul 08, 2024 at 05:57:58PM +0100, Marc Zyngier wrote: > In order to plug the brokenness of our current AT implementation, > we need a SW walker that is going to... err.. walk the S1 tables > and tell us what it finds. > > Of course, it builds on top of our S2 walker, and share similar > concepts. The beauty of it is that since it uses kvm_read_guest(), > it is able to bring back pages that have been otherwise evicted. > > This is then plugged in the two AT S1 emulation functions as > a "slow path" fallback. I'm not sure it is that slow, but hey. > > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> > [..] > +static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) > +{ > + bool perm_fail, ur, uw, ux, pr, pw, pan; > + struct s1_walk_result wr = {}; > + struct s1_walk_info wi = {}; > + int ret, idx, el; > + > + /* > + * We only get here from guest EL2, so the translation regime > + * AT applies to is solely defined by {E2H,TGE}. > + */ > + el = (vcpu_el2_e2h_is_set(vcpu) && > + vcpu_el2_tge_is_set(vcpu)) ? 2 : 1; > + > + ret = setup_s1_walk(vcpu, &wi, &wr, vaddr, el); > + if (ret) > + goto compute_par; > + > + if (wr.level == S1_MMU_DISABLED) > + goto compute_par; > + > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + > + ret = walk_s1(vcpu, &wi, &wr, vaddr); > + > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + > + if (ret) > + goto compute_par; > + > + /* FIXME: revisit when adding indirect permission support */ > + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3) && > + !wi.nvhe) { > + u64 sctlr; > + > + if (el == 1) > + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); > + else > + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); > + > + ux = (sctlr & SCTLR_EL1_EPAN) && !(wr.desc & PTE_UXN); > + } else { > + ux = false; > + } > + > + pw = !(wr.desc & PTE_RDONLY); > + > + if (wi.nvhe) { > + ur = uw = false; > + pr = true; > + } else { > + if (wr.desc & PTE_USER) { > + ur = pr = true; > + uw = pw; > + } else { > + ur = uw = false; > + pr = true; > + } > + } > + > + /* Apply the Hierarchical Permission madness */ > + if (wi.nvhe) { > + wr.APTable &= BIT(1); > + wr.PXNTable = wr.UXNTable; > + } > + > + ur &= !(wr.APTable & BIT(0)); > + uw &= !(wr.APTable != 0); > + ux &= !wr.UXNTable; > + > + pw &= !(wr.APTable & BIT(1)); > + > + pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; > + > + perm_fail = false; > + > + switch (op) { > + case OP_AT_S1E1RP: > + perm_fail |= pan && (ur || uw || ux); > + fallthrough; > + case OP_AT_S1E1R: > + case OP_AT_S1E2R: > + perm_fail |= !pr; > + break; > + case OP_AT_S1E1WP: > + perm_fail |= pan && (ur || uw || ux); > + fallthrough; > + case OP_AT_S1E1W: > + case OP_AT_S1E2W: > + perm_fail |= !pw; > + break; > + case OP_AT_S1E0R: > + perm_fail |= !ur; > + break; > + case OP_AT_S1E0W: > + perm_fail |= !uw; > + break; > + default: > + BUG(); > + } > + > + if (perm_fail) { > + struct s1_walk_result tmp; I was wondering if you would consider initializing 'tmp' to the empty struct here. That makes it consistent with the initialization of 'wr' in the !perm_fail case and I think it will make the code more robust wrt to changes to compute_par_s1() and what fields it accesses. Thanks, Alex > + > + tmp.failed = true; > + tmp.fst = ESR_ELx_FSC_PERM | wr.level; > + tmp.s2 = false; > + tmp.ptw = false; > + > + wr = tmp; > + } > + > +compute_par: > + return compute_par_s1(vcpu, &wr); > +}