On Wed, Jan 17, 2018 at 05:52:21PM +0000, Julien Thierry wrote: > > > On 12/01/18 12:07, Christoffer Dall wrote: > >We are about to defer saving and restoring some groups of system > >registers to vcpu_put and vcpu_load on supported systems. This means > >that we need some infrastructure to access system registes which > >supports either accessing the memory backing of the register or directly > >accessing the system registers, depending on the state of the system > >when we access the register. > > > >We do this by defining a set of read/write accessors for each system > >register, and letting each system register be defined as "immediate" or > >"deferrable". Immediate registers are always saved/restored in the > >world-switch path, but deferrable registers are only saved/restored in > >vcpu_put/vcpu_load when supported and sysregs_loaded_on_cpu will be set > >in that case. > > > > The patch is fine, however I'd suggest adding a comment in the pointing out > that the IMMEDIATE/DEFERRABLE apply to save/restore to the vcpu struct. > Instinctively I would expect the deferrable/immediate to apply to the actual > hardware register access, so a comment would prevent people like me to get > on the wrong track. > I tried to explain that a bit in the first sentence of the commit message, but I can try to make it more clear that we introduce terminology. > >Not that we don't use the deferred mechanism yet in this patch, but only > >introduce infrastructure. This is to improve convenience of review in > >the subsequent patches where it is clear which registers become > >deferred. > > > > [ Most of this logic was contributed by Marc Zyngier ] > > > >Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > >Signed-off-by: Christoffer Dall <christoffer.dall@xxxxxxxxxx> > > Reviewed-by: Julien Thierry <julien.thierry@xxxxxxx> > > >--- > > arch/arm64/include/asm/kvm_host.h | 8 +- > > arch/arm64/kvm/sys_regs.c | 160 ++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 166 insertions(+), 2 deletions(-) > > > >diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > >index 91272c35cc36..4b5ef82f6bdb 100644 > >--- a/arch/arm64/include/asm/kvm_host.h > >+++ b/arch/arm64/include/asm/kvm_host.h > >@@ -281,6 +281,10 @@ struct kvm_vcpu_arch { > > /* Detect first run of a vcpu */ > > bool has_run_once; > >+ > >+ /* True when deferrable sysregs are loaded on the physical CPU, > >+ * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ > >+ bool sysregs_loaded_on_cpu; > > }; > > #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) > >@@ -293,8 +297,8 @@ struct kvm_vcpu_arch { > > */ > > #define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) > >-#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r) > >-#define vcpu_write_sys_reg(v,r,n) do { __vcpu_sys_reg(v,r) = n; } while (0) > >+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg); > >+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val); > > /* > > * CP14 and CP15 live in the same array, as they are backed by the > >diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > >index 96398d53b462..9d353a6a55c9 100644 > >--- a/arch/arm64/kvm/sys_regs.c > >+++ b/arch/arm64/kvm/sys_regs.c > >@@ -35,6 +35,7 @@ > > #include <asm/kvm_coproc.h> > > #include <asm/kvm_emulate.h> > > #include <asm/kvm_host.h> > >+#include <asm/kvm_hyp.h> > > #include <asm/kvm_mmu.h> > > #include <asm/perf_event.h> > > #include <asm/sysreg.h> > >@@ -76,6 +77,165 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, > > return false; > > } > >+struct sys_reg_accessor { > >+ u64 (*rdsr)(struct kvm_vcpu *, int); > >+ void (*wrsr)(struct kvm_vcpu *, int, u64); > > Nit: > > Why use a signed integer for the register index argument? > The type name is short? ;) No particular reason, could be an unsigned int, but I don't think it matters here does it? > >+}; > >+ > >+#define DECLARE_IMMEDIATE_SR(i) \ > >+ static u64 __##i##_read(struct kvm_vcpu *vcpu, int r) \ > >+ { \ > >+ return __vcpu_sys_reg(vcpu, r); \ > >+ } \ > >+ \ > >+ static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v) \ > >+ { \ > >+ __vcpu_sys_reg(vcpu, r) = v; \ > >+ } \ > >+ > >+#define DECLARE_DEFERRABLE_SR(i, s) \ > >+ static u64 __##i##_read(struct kvm_vcpu *vcpu, int r) \ > >+ { \ > >+ if (vcpu->arch.sysregs_loaded_on_cpu) { \ > >+ WARN_ON(kvm_arm_get_running_vcpu() != vcpu); \ > >+ return read_sysreg_s((s)); \ > >+ } \ > >+ return __vcpu_sys_reg(vcpu, r); \ > >+ } \ > >+ \ > >+ static void __##i##_write(struct kvm_vcpu *vcpu, int r, u64 v) \ > >+ { \ > >+ if (vcpu->arch.sysregs_loaded_on_cpu) { \ > >+ WARN_ON(kvm_arm_get_running_vcpu() != vcpu); \ > >+ write_sysreg_s(v, (s)); \ > >+ } else { \ > >+ __vcpu_sys_reg(vcpu, r) = v; \ > >+ } \ > >+ } \ > >+ > >+ > >+#define SR_HANDLER_RANGE(i,e) \ > >+ [i ... e] = (struct sys_reg_accessor) { \ > >+ .rdsr = __##i##_read, \ > >+ .wrsr = __##i##_write, \ > > Nit: > Could we have __vcpu_##i##_read and __vcpu_##i##_write? > They don't necessarily read from the vcpu do they? Unrelated: I also thought about just having a single function a switch statement instead, which may make it easier to follow the code as there would be no macros generating functions, but it would be slightly less declarative. For example: u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg) { if (!vcpu->arch.sysregs_loaded_on_cpu) goto immediate_read; /* * All system registers listed in the switch are deferred * save/restored on VHE systems. */ switch (reg) { case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1)); case SCTLR_EL1: return read_sysreg_s(sctlr_EL12)); case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1)); case CPACR_EL1: return read_sysreg_s(cpacr_EL12)); case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12)); case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12)); case TCR_EL1: return read_sysreg_s(tcr_EL12)); case ESR_EL1: return read_sysreg_s(esr_EL12)); case AFSR0_EL1: return read_sysreg_s(afsr0_EL12)); case AFSR1_EL1: return read_sysreg_s(afsr1_EL12)); case FAR_EL1: return read_sysreg_s(far_EL12)); case MAIR_EL1: return read_sysreg_s(mair_EL12)); case VBAR_EL1: return read_sysreg_s(vbar_EL12)); case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12)); case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0)); case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0)); case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1)); case AMAIR_EL1: return read_sysreg_s(amair_EL12)); case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12)); case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1)); case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2)); case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2)); case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2)); } immediate_read: return __vcpu_sys_reg(vcpu, reg); } Since you're having a look at this, what are your thoughts? Marc, what's your preference? Thanks, -Christoffer > >+ } > >+ > >+#define SR_HANDLER(i) SR_HANDLER_RANGE(i, i) > >+ > >+static void bad_sys_reg(int reg) > >+{ > >+ WARN_ONCE(1, "Bad system register access %d\n", reg); > >+} > >+ > >+static u64 __default_read_sys_reg(struct kvm_vcpu *vcpu, int reg) > >+{ > >+ bad_sys_reg(reg); > >+ return 0; > >+} > >+ > >+static void __default_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val) > >+{ > >+ bad_sys_reg(reg); > >+} > >+ > >+/* Ordered as in enum vcpu_sysreg */ > >+DECLARE_IMMEDIATE_SR(MPIDR_EL1); > >+DECLARE_IMMEDIATE_SR(CSSELR_EL1); > >+DECLARE_IMMEDIATE_SR(SCTLR_EL1); > >+DECLARE_IMMEDIATE_SR(ACTLR_EL1); > >+DECLARE_IMMEDIATE_SR(CPACR_EL1); > >+DECLARE_IMMEDIATE_SR(TTBR0_EL1); > >+DECLARE_IMMEDIATE_SR(TTBR1_EL1); > >+DECLARE_IMMEDIATE_SR(TCR_EL1); > >+DECLARE_IMMEDIATE_SR(ESR_EL1); > >+DECLARE_IMMEDIATE_SR(AFSR0_EL1); > >+DECLARE_IMMEDIATE_SR(AFSR1_EL1); > >+DECLARE_IMMEDIATE_SR(FAR_EL1); > >+DECLARE_IMMEDIATE_SR(MAIR_EL1); > >+DECLARE_IMMEDIATE_SR(VBAR_EL1); > >+DECLARE_IMMEDIATE_SR(CONTEXTIDR_EL1); > >+DECLARE_IMMEDIATE_SR(TPIDR_EL0); > >+DECLARE_IMMEDIATE_SR(TPIDRRO_EL0); > >+DECLARE_IMMEDIATE_SR(TPIDR_EL1); > >+DECLARE_IMMEDIATE_SR(AMAIR_EL1); > >+DECLARE_IMMEDIATE_SR(CNTKCTL_EL1); > >+DECLARE_IMMEDIATE_SR(PAR_EL1); > >+DECLARE_IMMEDIATE_SR(MDSCR_EL1); > >+DECLARE_IMMEDIATE_SR(MDCCINT_EL1); > >+DECLARE_IMMEDIATE_SR(PMCR_EL0); > >+DECLARE_IMMEDIATE_SR(PMSELR_EL0); > >+DECLARE_IMMEDIATE_SR(PMEVCNTR0_EL0); > >+/* PMEVCNTR30_EL0 */ > >+DECLARE_IMMEDIATE_SR(PMCCNTR_EL0); > >+DECLARE_IMMEDIATE_SR(PMEVTYPER0_EL0); > >+/* PMEVTYPER30_EL0 */ > >+DECLARE_IMMEDIATE_SR(PMCCFILTR_EL0); > >+DECLARE_IMMEDIATE_SR(PMCNTENSET_EL0); > >+DECLARE_IMMEDIATE_SR(PMINTENSET_EL1); > >+DECLARE_IMMEDIATE_SR(PMOVSSET_EL0); > >+DECLARE_IMMEDIATE_SR(PMSWINC_EL0); > >+DECLARE_IMMEDIATE_SR(PMUSERENR_EL0); > >+DECLARE_IMMEDIATE_SR(DACR32_EL2); > >+DECLARE_IMMEDIATE_SR(IFSR32_EL2); > >+DECLARE_IMMEDIATE_SR(FPEXC32_EL2); > >+DECLARE_IMMEDIATE_SR(DBGVCR32_EL2); > >+ > >+static const struct sys_reg_accessor sys_reg_accessors[NR_SYS_REGS] = { > >+ [0 ... NR_SYS_REGS - 1] = { > >+ .rdsr = __default_read_sys_reg, > >+ .wrsr = __default_write_sys_reg, > >+ }, > >+ > >+ SR_HANDLER(MPIDR_EL1), > >+ SR_HANDLER(CSSELR_EL1), > >+ SR_HANDLER(SCTLR_EL1), > >+ SR_HANDLER(ACTLR_EL1), > >+ SR_HANDLER(CPACR_EL1), > >+ SR_HANDLER(TTBR0_EL1), > >+ SR_HANDLER(TTBR1_EL1), > >+ SR_HANDLER(TCR_EL1), > >+ SR_HANDLER(ESR_EL1), > >+ SR_HANDLER(AFSR0_EL1), > >+ SR_HANDLER(AFSR1_EL1), > >+ SR_HANDLER(FAR_EL1), > >+ SR_HANDLER(MAIR_EL1), > >+ SR_HANDLER(VBAR_EL1), > >+ SR_HANDLER(CONTEXTIDR_EL1), > >+ SR_HANDLER(TPIDR_EL0), > >+ SR_HANDLER(TPIDRRO_EL0), > >+ SR_HANDLER(TPIDR_EL1), > >+ SR_HANDLER(AMAIR_EL1), > >+ SR_HANDLER(CNTKCTL_EL1), > >+ SR_HANDLER(PAR_EL1), > >+ SR_HANDLER(MDSCR_EL1), > >+ SR_HANDLER(MDCCINT_EL1), > >+ SR_HANDLER(PMCR_EL0), > >+ SR_HANDLER(PMSELR_EL0), > >+ SR_HANDLER_RANGE(PMEVCNTR0_EL0, PMEVCNTR30_EL0), > >+ SR_HANDLER(PMCCNTR_EL0), > >+ SR_HANDLER_RANGE(PMEVTYPER0_EL0, PMEVTYPER30_EL0), > >+ SR_HANDLER(PMCCFILTR_EL0), > >+ SR_HANDLER(PMCNTENSET_EL0), > >+ SR_HANDLER(PMINTENSET_EL1), > >+ SR_HANDLER(PMOVSSET_EL0), > >+ SR_HANDLER(PMSWINC_EL0), > >+ SR_HANDLER(PMUSERENR_EL0), > >+ SR_HANDLER(DACR32_EL2), > >+ SR_HANDLER(IFSR32_EL2), > >+ SR_HANDLER(FPEXC32_EL2), > >+ SR_HANDLER(DBGVCR32_EL2), > >+}; > >+ > >+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg) > >+{ > >+ return sys_reg_accessors[reg].rdsr(vcpu, reg); > >+} > >+ > >+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 val) > >+{ > >+ sys_reg_accessors[reg].wrsr(vcpu, reg, val); > >+} > >+ > > /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ > > static u32 cache_levels; > > > > -- > Julien Thierry