Re: [PATCH v3 09/11] KVM: x86/mmu: serialize vCPUs to zap gfn when guest MTRRs are honored

Yan Zhao <yan.y.zhao@xxxxxxxxx> · Fri, 16 Jun 2023 15:37:29 +0800

On Fri, Jun 16, 2023 at 03:45:50PM +0800, Yuan Yao wrote:
> > +/*
> > + * Add @range into kvm->arch.mtrr_zap_list and sort the list in
> > + * "length" ascending + "start" descending order, so that
> > + * ranges consuming more zap cycles can be dequeued later and their
> > + * chances of being found duplicated are increased.
> > + */
> > +static void kvm_add_mtrr_zap_list(struct kvm *kvm, struct mtrr_zap_range *range)
> > +{
> > +	struct list_head *head = &kvm->arch.mtrr_zap_list;
> > +	u64 len = range->end - range->start;
> > +	struct mtrr_zap_range *cur, *n;
> > +	bool added = false;
> > +
> > +	spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > +	if (list_empty(head)) {
> > +		list_add(&range->node, head);
> > +		spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +		return;
> > +	}
> > +
> > +	list_for_each_entry_safe(cur, n, head, node) {
> > +		u64 cur_len = cur->end - cur->start;
> > +
> > +		if (len < cur_len)
> > +			break;
> > +
> > +		if (len > cur_len)
> > +			continue;
> > +
> > +		if (range->start > cur->start)
> > +			break;
> > +
> > +		if (range->start < cur->start)
> > +			continue;
> > +
> > +		/* equal len & start, no need to add */
> > +		added = true;
> 
> Possible/worth to ignore the range already covered
> by queued range ?

I may not get you correctly, but
the "added" here means an queued range with exactly same start + len
found, so free and drop adding the new range here.

> 
> > +		kfree(range);
> > +		break;
> > +	}
> > +
> > +	if (!added)
> > +		list_add_tail(&range->node, &cur->node);
> > +
> > +	spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +}
> > +
> > +static void kvm_zap_mtrr_zap_list(struct kvm *kvm)
> > +{
> > +	struct list_head *head = &kvm->arch.mtrr_zap_list;
> > +	struct mtrr_zap_range *cur = NULL;
> > +
> > +	spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > +	while (!list_empty(head)) {
> > +		u64 start, end;
> > +
> > +		cur = list_first_entry(head, typeof(*cur), node);
> > +		start = cur->start;
> > +		end = cur->end;
> > +		list_del(&cur->node);
> > +		kfree(cur);
> > +		spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +
> > +		kvm_zap_gfn_range(kvm, start, end);
> > +
> > +		spin_lock(&kvm->arch.mtrr_zap_list_lock);
> > +	}
> > +
> > +	spin_unlock(&kvm->arch.mtrr_zap_list_lock);
> > +}
> > +
> > +static void kvm_zap_or_wait_mtrr_zap_list(struct kvm *kvm)
> > +{
> > +	if (atomic_cmpxchg_acquire(&kvm->arch.mtrr_zapping, 0, 1) == 0) {
> > +		kvm_zap_mtrr_zap_list(kvm);
> > +		atomic_set_release(&kvm->arch.mtrr_zapping, 0);
> > +		return;
> > +	}
> > +
> > +	while (atomic_read(&kvm->arch.mtrr_zapping))
> > +		cpu_relax();
> > +}
> > +
> > +static void kvm_mtrr_zap_gfn_range(struct kvm_vcpu *vcpu,
> > +				   gfn_t gfn_start, gfn_t gfn_end)
> > +{
> > +	struct mtrr_zap_range *range;
> > +
> > +	range = kmalloc(sizeof(*range), GFP_KERNEL_ACCOUNT);
> > +	if (!range)
> > +		goto fail;
> > +
> > +	range->start = gfn_start;
> > +	range->end = gfn_end;
> > +
> > +	kvm_add_mtrr_zap_list(vcpu->kvm, range);
> > +
> > +	kvm_zap_or_wait_mtrr_zap_list(vcpu->kvm);
> > +	return;
> > +
> > +fail:
> > +	kvm_clear_mtrr_zap_list(vcpu->kvm);
> A very small chance race condition that incorrectly
> clear the queued ranges which have not been zapped by another thread ?
> Like below:
> 
> Thread A                         |  Thread B
> kvm_add_mtrr_zap_list()          |
>                                  |  kvm_clear_mtrr_zap_list()
> kvm_zap_or_wait_mtrr_zap_list()  |
> 
> Call kvm_clear_mtrr_zap_list() here looks unnecessary, other
> threads(B here) who put thing in the queue will take care them well.

> > +   kvm_zap_gfn_range(vcpu->kvm, gfn_start, gfn_end); 

Yes, if gfn_start and gfn_end here are not 0 and ~0ULL, the
kvm_clear_mtrr_zap_list() is not necessary.
Though in reality, they are always 0-~0ULL, I agree dropping the
kvm_clear_mtrr_zap_list() here is better.

Thanks!