On Fri, Nov 10, 2017 at 01:49:45AM -0800, Wanpeng Li wrote: > +static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); > + > +static void kvm_flush_tlb_others(const struct cpumask *cpumask, > + const struct flush_tlb_info *info) > +{ > + u8 state; > + int cpu; > + struct kvm_steal_time *src; > + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); > + > + if (unlikely(!flushmask)) > + return; > + > + cpumask_copy(flushmask, cpumask); > + /* > + * We have to call flush only on online vCPUs. And > + * queue flush_on_enter for pre-empted vCPUs > + */ > + for_each_cpu(cpu, cpumask) { > + src = &per_cpu(steal_time, cpu); > + state = src->preempted; I think that wants to be: state = READ_ONCE(src->preempted); Because without that its possible for state to get re-loaded between the check here: > + if ((state & KVM_VCPU_PREEMPTED)) { and its use here. > + if (cmpxchg(&src->preempted, state, state | > + KVM_VCPU_SHOULD_FLUSH) == state) You can actually write that like: if (try_cmpxchg(&src->preempted, state, state | KVM_VCPU_SHOULD_FLUSH)) Which should generate ever so slightly better code (it uses the cmpxchg ZF instead of doing a superfluous compare). > + __cpumask_clear_cpu(cpu, flushmask); > + } > + } > + > + native_flush_tlb_others(flushmask, info); > +}