Re: [PATCH 4/5] KVM: add __kvm_request_needs_mb

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Thu, 23 Feb 2017 12:01:20 +0100

On 16/02/2017 17:04, Radim Krčmář wrote:
> A macro to optimize requests that do not need a memory barrier because
> they have no dependencies.  An architecture can implement a function
> that says which requests do not need memory barriers when handling them.
> 
> Signed-off-by: Radim Krčmář <rkrcmar@xxxxxxxxxx>

I would leave this for a separate series, otherwise looks nice (though I
was skeptical at first ;)).

Paolo

> ---
>  include/linux/kvm_host.h | 41 +++++++++++++++++++++++++++++++++++++----
>  virt/kvm/kvm_main.c      |  3 ++-
>  2 files changed, 39 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index d899473859d3..2cc438685af8 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1097,8 +1097,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>   *  2) remote request with no data (= kick)
>   *  3) remote request with data (= kick + mb)
>   *
> - * TODO: the API is inconsistent -- a request doesn't call kvm_vcpu_kick(), but
> - * forces smp_wmb() for all requests.
> + * TODO: the API does not distinguish local and remote requests -- remote
> + * should contain kvm_vcpu_kick().
>   */
>  
>  static inline void __kvm_request_set(unsigned req, struct kvm_vcpu *vcpu)
> @@ -1106,6 +1106,37 @@ static inline void __kvm_request_set(unsigned req, struct kvm_vcpu *vcpu)
>  	set_bit(req, &vcpu->requests);
>  }
>  
> +/*
> + * __kvm_request_needs_mb is used to improve performance, so it should have no
> + * runtime overhead.
> + */
> +static inline bool __kvm_request_needs_mb(int req)
> +{
> +	/*
> +	 * This barrier lets callers avoid the following pattern:
> +	 *   if (__kvm_request_needs_mb(req))
> +	 *      ...
> +	 *   else
> +	 *      barrier();
> +	 */
> +	barrier();
> +
> +	if (!__builtin_constant_p(req))
> +		return true;
> +
> +#ifdef kvm_arch_request_needs_mb
> +	/*
> +	 * GCC optimizes pure kvm_arch_request_needs_mb() with a constant input
> +	 * into a contant, but __builtin_constant_p() is not so clever, so we
> +	 * cannot ensure that with:
> +	 * BUILD_BUG_ON(!__builtin_constant_p(kvm_arch_request_needs_mb(req)));
> +	 */
> +	return kvm_arch_request_needs_mb(req);
> +#else
> +	return true;
> +#endif
> +}
> +
>  static inline void kvm_request_set(unsigned req, struct kvm_vcpu *vcpu)
>  {
>  	/*
> @@ -1113,7 +1144,8 @@ static inline void kvm_request_set(unsigned req, struct kvm_vcpu *vcpu)
>  	 * kvm_request_test_and_clear's caller.
>  	 * Paired with the smp_mb__after_atomic in kvm_request_test_and_clear.
>  	 */
> -	smp_wmb();
> +	if (__kvm_request_needs_mb(req))
> +		smp_wmb();
>  	__kvm_request_set(req, vcpu);
>  }
>  
> @@ -1137,7 +1169,8 @@ static inline bool kvm_request_test_and_clear(unsigned req, struct kvm_vcpu *vcp
>  		 * kvm_request_test_and_clear's caller.
>  		 * Paired with the smp_wmb in kvm_request_set.
>  		 */
> -		smp_mb__after_atomic();
> +		if (__kvm_request_needs_mb(req))
> +			smp_mb__after_atomic();
>  		return true;
>  	} else {
>  		return false;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 2250920ec965..ced3e4cb1df0 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -179,7 +179,8 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
>  	me = get_cpu();
>  
>  	/* Paired with the smp_mb__after_atomic in kvm_request_test_and_clear. */
> -	smp_wmb();
> +	if (__kvm_request_needs_mb(req))
> +		smp_wmb();
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		__kvm_request_set(req, vcpu);
>