On 11/17/2011 01:19 PM, Carsten Otte wrote: > From: Carsten Otte <cotte@xxxxxxxxxx> > > This patch adds support for pseudo page faults. The corresponding > interface is implemented according to the documentation in CP > programming services. > Diagnose 258 allows to register compare and check masks for pseudo > page faults, and the guest can cancel these masks again. For both > operations, like everywhere else in KVM on z, access register mode > is not supported (ALET is assumed to be 0). > In case a major fault is recognized for a virtual machine, the page > fault path triggers IO and kvm_s390_handle_pfault is called in order > to determine if the fault can be handled asynchronously. In case the > fault condition qualifies for asynchronous treatment, the guest is > notified. Otherwise the vcpu thread synchronously waits for the page > to become available prior to reentry into SIE. > One kernel thread per virtual machine gets notified for all > asynchronous page fault events for its VM. Subsequently it waits for > the page to be faulted in by calling fault_in_user_pages, and it > notifies the guest that the page fault operation has completed. > > +static void kvm_s390_pfault_sync(struct kvm_vcpu *vcpu) > +{ > + unsigned long uaddr = gmap_fault(current->thread.gmap_addr, > + vcpu->arch.gmap); > + > + if (IS_ERR_VALUE(uaddr)) > + return; > + > + VCPU_EVENT(vcpu, 5, "synchronous page fault at guest %lx user %lx", > + current->thread.gmap_addr, uaddr); > + > + fault_in_pages_readable((char __user *)uaddr, PAGE_SIZE); > +} These may make sense as tracepoints (this is what x86 does). The kvm_stat script knows how to pick them up and generate an event histogram dynamically, along with all the other goodies tracepoints bring. > + > +static void kvm_s390_pfault_async(struct kvm_vcpu *vcpu) > +{ > + unsigned long uaddr = gmap_fault(current->thread.gmap_addr, > + vcpu->arch.gmap); > + struct pfault_event *event; > + struct kvm_s390_interrupt_info *init, *done; > + unsigned long pfault_token; > + > + if (IS_ERR_VALUE(uaddr)) > + return; > + > + if (!kvm_s390_should_pfault(vcpu)) { > + kvm_s390_pfault_sync(vcpu); > + return; > + } > + > + copy_from_guest(vcpu, &pfault_token, vcpu->arch.pfault_token, > + 8); Missing error check? > + > + init = kzalloc(sizeof(*init), GFP_ATOMIC); > + if (!init) > + return; > + > + done = kzalloc(sizeof(*done), GFP_ATOMIC); > + if (!done) > + goto out_init; > + > + event = kzalloc(sizeof(*event), GFP_ATOMIC); > + if (!event) > + goto out_done; Three allocs? Maybe combine them? Even if their lifetimes are not exactly the same. > + > + init->type = KVM_S390_INT_PFAULT_INIT; > + init->ext.ext_params2 = pfault_token; > + > + done->type = KVM_S390_INT_PFAULT_DONE; > + done->ext.ext_params2 = pfault_token; > + > + event->inti = done; > + event->uaddr = uaddr; > + event->local_int = &vcpu->arch.local_int; > + > + VCPU_EVENT(vcpu, 5, > + "initiating pfault for token %lx at guest %lx user %lx", > + pfault_token, current->thread.gmap_addr, uaddr); > + > + __kvm_s390_inject_vcpu(&vcpu->arch.local_int, init); > + > + spin_lock_bh(&vcpu->kvm->arch.pfault_list_lock); > + list_add_tail(&event->pfault_list_element, > + &vcpu->kvm->arch.pfault_list); > + wake_up(&vcpu->kvm->arch.pfault_wait); > + spin_unlock_bh(&vcpu->kvm->arch.pfault_list_lock); > + return; > + > +out_done: > + kfree(done); > +out_init: > + kfree(init); > +} > + > +void kvm_s390_handle_pfault(struct kvm_vcpu *vcpu) > +{ > + unsigned long mask; > + > + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) > + goto synchronous; > + > + mask = vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select; > + > + if (mask != vcpu->arch.pfault_compare) > + goto synchronous; > + > + kvm_s390_pfault_async(vcpu); > + return; > +synchronous: > + kvm_s390_pfault_sync(vcpu); > +} > + > +static int pfault_thread_fn(void *data) > +{ > + struct pfault_event *event, *n; > + struct pfault_event *dequeued; > + struct kvm *kvm = (struct kvm *)data; > + wait_queue_t wait; > + > + init_waitqueue_entry(&wait, current); > + add_wait_queue(&kvm->arch.pfault_wait, &wait); > + while (1) { > + spin_lock_bh(&kvm->arch.pfault_list_lock); > + current->state = TASK_INTERRUPTIBLE; > + dequeued = NULL; > + list_for_each_entry_safe(event, n, &kvm->arch.pfault_list, > + pfault_list_element) { > + if (!dequeued) { > + list_del_init(&event->pfault_list_element); > + dequeued = event; > + } > + } > + spin_unlock_bh(&kvm->arch.pfault_list_lock); > + if (kthread_should_stop()) { > + current->state = TASK_RUNNING; > + remove_wait_queue(&kvm->arch.pfault_wait, &wait); > + return 0; > + } > + if (dequeued) { > + current->state = TASK_RUNNING; > + fault_in_pages_readable((char __user *)dequeued->uaddr, > + PAGE_SIZE); > + __kvm_s390_inject_vcpu(dequeued->local_int, > + dequeued->inti); > + kfree(dequeued); > + } else { > + schedule(); > + } > + } > +} Is this duplicating virt/kvm/async_pf.c? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html