Re: [patch 2/2] [PATCH] kvm-s390: pseudo page fault support

Avi Kivity <avi@xxxxxxxxxx> · Thu, 17 Nov 2011 15:18:03 +0200

On 11/17/2011 01:19 PM, Carsten Otte wrote:
> From: Carsten Otte <cotte@xxxxxxxxxx>
>
> This patch adds support for pseudo page faults. The corresponding
> interface is implemented according to the documentation in CP
> programming services.
> Diagnose 258 allows to register compare and check masks for pseudo
> page faults, and the guest can cancel these masks again. For both
> operations, like everywhere else in KVM on z, access register mode
> is not supported (ALET is assumed to be 0).
> In case a major fault is recognized for a virtual machine, the page
> fault path triggers IO and kvm_s390_handle_pfault is called in order
> to determine if the fault can be handled asynchronously. In case the
> fault condition qualifies for asynchronous treatment, the guest is
> notified. Otherwise the vcpu thread synchronously waits for the page
> to become available prior to reentry into SIE.
> One kernel thread per virtual machine gets notified for all
> asynchronous page fault events for its VM. Subsequently it waits for
> the page to be faulted in by calling fault_in_user_pages, and it
> notifies the guest that the page fault operation has completed.
>
> +static void kvm_s390_pfault_sync(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long uaddr = gmap_fault(current->thread.gmap_addr,
> +		vcpu->arch.gmap);
> +
> +	if (IS_ERR_VALUE(uaddr))
> +		return;
> +
> +	VCPU_EVENT(vcpu, 5, "synchronous page fault at guest %lx user %lx",
> +		   current->thread.gmap_addr, uaddr);
> +
> +	fault_in_pages_readable((char __user *)uaddr, PAGE_SIZE);
> +}

These may make sense as tracepoints (this is what x86 does).  The
kvm_stat script knows how to pick them up and generate an event
histogram dynamically, along with all the other goodies tracepoints bring.

> +
> +static void kvm_s390_pfault_async(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long uaddr = gmap_fault(current->thread.gmap_addr,
> +					 vcpu->arch.gmap);
> +	struct pfault_event *event;
> +	struct kvm_s390_interrupt_info *init, *done;
> +	unsigned long pfault_token;
> +
> +	if (IS_ERR_VALUE(uaddr))
> +		return;
> +
> +	if (!kvm_s390_should_pfault(vcpu)) {
> +		kvm_s390_pfault_sync(vcpu);
> +		return;
> +	}
> +
> +	copy_from_guest(vcpu, &pfault_token, vcpu->arch.pfault_token,
> +			8);

Missing error check?

> +
> +	init = kzalloc(sizeof(*init), GFP_ATOMIC);
> +	if (!init)
> +		return;
> +
> +	done = kzalloc(sizeof(*done), GFP_ATOMIC);
> +	if (!done)
> +		goto out_init;
> +
> +	event = kzalloc(sizeof(*event), GFP_ATOMIC);
> +	if (!event)
> +		goto out_done;

Three allocs?  Maybe combine them?  Even if their lifetimes are not
exactly the same.

> +
> +	init->type = KVM_S390_INT_PFAULT_INIT;
> +	init->ext.ext_params2 = pfault_token;
> +
> +	done->type = KVM_S390_INT_PFAULT_DONE;
> +	done->ext.ext_params2 = pfault_token;
> +
> +	event->inti = done;
> +	event->uaddr = uaddr;
> +	event->local_int = &vcpu->arch.local_int;
> +
> +	VCPU_EVENT(vcpu, 5,
> +		   "initiating pfault for token %lx at guest %lx user %lx",
> +		   pfault_token, current->thread.gmap_addr, uaddr);
> +
> +	__kvm_s390_inject_vcpu(&vcpu->arch.local_int, init);
> +
> +	spin_lock_bh(&vcpu->kvm->arch.pfault_list_lock);
> +	list_add_tail(&event->pfault_list_element,
> +		      &vcpu->kvm->arch.pfault_list);
> +	wake_up(&vcpu->kvm->arch.pfault_wait);
> +	spin_unlock_bh(&vcpu->kvm->arch.pfault_list_lock);
> +	return;
> +
> +out_done:
> +	kfree(done);
> +out_init:
> +	kfree(init);
> +}
> +
> +void kvm_s390_handle_pfault(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long mask;
> +
> +	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
> +		goto synchronous;
> +
> +	mask = vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select;
> +
> +	if (mask != vcpu->arch.pfault_compare)
> +		goto synchronous;
> +
> +	kvm_s390_pfault_async(vcpu);
> +	return;
> +synchronous:
> +	kvm_s390_pfault_sync(vcpu);
> +}
> +
> +static int pfault_thread_fn(void *data)
> +{
> +	struct pfault_event *event, *n;
> +	struct pfault_event *dequeued;
> +	struct kvm *kvm = (struct kvm *)data;
> +	wait_queue_t wait;
> +
> +	init_waitqueue_entry(&wait, current);
> +	add_wait_queue(&kvm->arch.pfault_wait, &wait);
> +	while (1) {
> +		spin_lock_bh(&kvm->arch.pfault_list_lock);
> +		current->state = TASK_INTERRUPTIBLE;
> +		dequeued = NULL;
> +		list_for_each_entry_safe(event, n, &kvm->arch.pfault_list,
> +					 pfault_list_element) {
> +			if (!dequeued) {
> +				list_del_init(&event->pfault_list_element);
> +				dequeued = event;
> +			}
> +		}
> +		spin_unlock_bh(&kvm->arch.pfault_list_lock);
> +		if (kthread_should_stop()) {
> +			current->state = TASK_RUNNING;
> +			remove_wait_queue(&kvm->arch.pfault_wait, &wait);
> +			return 0;
> +		}
> +		if (dequeued) {
> +			current->state = TASK_RUNNING;
> +			fault_in_pages_readable((char __user *)dequeued->uaddr,
> +						PAGE_SIZE);
> +			__kvm_s390_inject_vcpu(dequeued->local_int,
> +					       dequeued->inti);
> +			kfree(dequeued);
> +		} else {
> +			schedule();
> +		}
> +	}
> +}

Is this duplicating virt/kvm/async_pf.c?

-- 
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html