On Tue, Aug 17, 2021, Kees Cook wrote: > arch/x86/kvm/emulate.c | 3 +-- > arch/x86/kvm/kvm_emulate.h | 19 +++++++++++-------- > 2 files changed, 12 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 2837110e66ed..2608a047e769 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -5377,8 +5377,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) > > void init_decode_cache(struct x86_emulate_ctxt *ctxt) > { > - memset(&ctxt->rip_relative, 0, > - (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); > + memset(&ctxt->decode_cache, 0, sizeof(ctxt->decode_cache)); > > ctxt->io_read.pos = 0; > ctxt->io_read.end = 0; > diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h > index 68b420289d7e..9b8afcb8ad39 100644 > --- a/arch/x86/kvm/kvm_emulate.h > +++ b/arch/x86/kvm/kvm_emulate.h > @@ -341,14 +341,17 @@ struct x86_emulate_ctxt { > * the rest are initialized unconditionally in x86_decode_insn > * or elsewhere > */ > - bool rip_relative; > - u8 rex_prefix; > - u8 lock_prefix; > - u8 rep_prefix; > - /* bitmaps of registers in _regs[] that can be read */ > - u32 regs_valid; > - /* bitmaps of registers in _regs[] that have been written */ > - u32 regs_dirty; > + struct_group(decode_cache, This is somewhat misleading because half of this struct is the so called "decode cache", not just these six fields. KVM's "optimization" is quite ridiculous as this has never been such a hot path that saving a few mov instructions would be noticeable. And hilariously, the "optimization" is completely unnecessary because both gcc and clang are clever enough to batch the first five into a movq even when zeroing the fields individually. So, I would much prefer to go with the following: >From dbdca1f4cd01fee418c252e54c360d518b2b1ad6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@xxxxxxxxxx> Date: Wed, 18 Aug 2021 08:03:08 -0700 Subject: [PATCH] KVM: x86: Replace memset() "optimization" with normal per-field writes Explicitly zero select fields in the emulator's decode cache instead of zeroing the fields via a gross memset() that spans six fields. gcc and clang are both clever enough to batch the first five fields into a single quadword MOV, i.e. memset() and individually zeroing generate identical code. Removing the wart also prepares KVM for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memset(). No functional change intended. Reported-by: Kees Cook <keescook@xxxxxxxxxxxx> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/kvm/emulate.c | 9 +++++++-- arch/x86/kvm/kvm_emulate.h | 6 +----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2837110e66ed..bf81fd017e7f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5377,8 +5377,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->rip_relative, 0, - (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); + /* Clear fields that are set conditionally but read without a guard. */ + ctxt->rip_relative = false; + ctxt->rex_prefix = 0; + ctxt->lock_prefix = 0; + ctxt->rep_prefix = 0; + ctxt->regs_valid = 0; + ctxt->regs_dirty = 0; ctxt->io_read.pos = 0; ctxt->io_read.end = 0; diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 68b420289d7e..bc1fecacccd4 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -336,11 +336,7 @@ struct x86_emulate_ctxt { fastop_t fop; }; int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ + bool rip_relative; u8 rex_prefix; u8 lock_prefix; -- 2.33.0.rc1.237.g0d66db33f3-goog > + bool rip_relative; > + u8 rex_prefix; > + u8 lock_prefix; > + u8 rep_prefix; > + /* bitmaps of registers in _regs[] that can be read */ > + u32 regs_valid; > + /* bitmaps of registers in _regs[] that have been written */ > + u32 regs_dirty; > + ); > + > /* modrm */ > u8 modrm; > u8 modrm_mod; > -- > 2.30.2 >