A macro to optimize requests that do not need a memory barrier because they have no dependencies. An architecture can implement a function that says which requests do not need memory barriers when handling them. Signed-off-by: Radim Krčmář <rkrcmar@xxxxxxxxxx> --- include/linux/kvm_host.h | 41 +++++++++++++++++++++++++++++++++++++---- virt/kvm/kvm_main.c | 3 ++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d899473859d3..2cc438685af8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1097,8 +1097,8 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) * 2) remote request with no data (= kick) * 3) remote request with data (= kick + mb) * - * TODO: the API is inconsistent -- a request doesn't call kvm_vcpu_kick(), but - * forces smp_wmb() for all requests. + * TODO: the API does not distinguish local and remote requests -- remote + * should contain kvm_vcpu_kick(). */ static inline void __kvm_request_set(unsigned req, struct kvm_vcpu *vcpu) @@ -1106,6 +1106,37 @@ static inline void __kvm_request_set(unsigned req, struct kvm_vcpu *vcpu) set_bit(req, &vcpu->requests); } +/* + * __kvm_request_needs_mb is used to improve performance, so it should have no + * runtime overhead. + */ +static inline bool __kvm_request_needs_mb(int req) +{ + /* + * This barrier lets callers avoid the following pattern: + * if (__kvm_request_needs_mb(req)) + * ... + * else + * barrier(); + */ + barrier(); + + if (!__builtin_constant_p(req)) + return true; + +#ifdef kvm_arch_request_needs_mb + /* + * GCC optimizes pure kvm_arch_request_needs_mb() with a constant input + * into a contant, but __builtin_constant_p() is not so clever, so we + * cannot ensure that with: + * BUILD_BUG_ON(!__builtin_constant_p(kvm_arch_request_needs_mb(req))); + */ + return kvm_arch_request_needs_mb(req); +#else + return true; +#endif +} + static inline void kvm_request_set(unsigned req, struct kvm_vcpu *vcpu) { /* @@ -1113,7 +1144,8 @@ static inline void kvm_request_set(unsigned req, struct kvm_vcpu *vcpu) * kvm_request_test_and_clear's caller. * Paired with the smp_mb__after_atomic in kvm_request_test_and_clear. */ - smp_wmb(); + if (__kvm_request_needs_mb(req)) + smp_wmb(); __kvm_request_set(req, vcpu); } @@ -1137,7 +1169,8 @@ static inline bool kvm_request_test_and_clear(unsigned req, struct kvm_vcpu *vcp * kvm_request_test_and_clear's caller. * Paired with the smp_wmb in kvm_request_set. */ - smp_mb__after_atomic(); + if (__kvm_request_needs_mb(req)) + smp_mb__after_atomic(); return true; } else { return false; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2250920ec965..ced3e4cb1df0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -179,7 +179,8 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) me = get_cpu(); /* Paired with the smp_mb__after_atomic in kvm_request_test_and_clear. */ - smp_wmb(); + if (__kvm_request_needs_mb(req)) + smp_wmb(); kvm_for_each_vcpu(i, vcpu, kvm) { __kvm_request_set(req, vcpu); -- 2.11.1