patch for virtual machine oriented scheduling(7)

alex <tomorrowanewday@xxxxxxxxx> · Wed, 22 Apr 2009 23:02:51 +0800

the procedure of loading/unloading KVM modules should be changed.
------------------------------------------------------------------------------------------------------------

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 363af32..e8239b6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -58,6 +58,10 @@
 #include "irq.h"
 #endif

+#include <linux/sched-if.h>
+#include <linux/ipi.h>
+#include <linux/trace.h>
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");

@@ -778,6 +782,8 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct
kvm *kvm, unsigned id)
       vcpu->vcpu_id = id;
       init_waitqueue_head(&vcpu->wq);

+       vcpu->thread = current;
+
       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
       if (!page) {
               r = -ENOMEM;
@@ -929,7 +935,7 @@ static const struct mmu_notifier_ops
kvm_mmu_notifier_ops = {
 };
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */

-static struct kvm *kvm_create_vm(void)
+static struct kvm *kvm_create_vm(unsigned int vm_type)
 {
       struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
@@ -938,6 +944,22 @@ static struct kvm *kvm_create_vm(void)

       if (IS_ERR(kvm))
               goto out;
+
+       if (vm_type != IDLE_VM) {
+               if(vm_type == HOST_VM){
+                   kvm->is_paused_by_controller = 1;
+                   atomic_inc(&kvm->pause_count);
+               };
+       }else
+               idle_vm_kvm = kvm;
+
+       if (vm_type == HOST_VM) host_vm_kvm = kvm;
+       if ( sched_init_vm(kvm) != 0) {
+               printk("func %s line %d sched_init_vm failed\n",
+                       __FUNCTION__, __LINE__);
+               goto out;
+       }
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
       INIT_LIST_HEAD(&kvm->irq_routing);
       INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -1017,10 +1039,20 @@ void kvm_free_physmem(struct kvm *kvm)

 static void kvm_destroy_vm(struct kvm *kvm)
 {
+       int i;
       struct mm_struct *mm = kvm->mm;

       kvm_arch_sync_events(kvm);
       spin_lock(&kvm_lock);
+       if(!kvm->is_paused_by_controller)
+           vm_pause(kvm);
+       for(i = 0; i < KVM_MAX_VCPUS; ++i) {
+           if (kvm->vcpus[i]) {
+               sched_destroy_vcpu(kvm->vcpus[i]);
+           }
+       }
+       sched_destroy_vm(kvm);
+
       list_del(&kvm->vm_list);
       spin_unlock(&kvm_lock);
       kvm_free_irq_routing(kvm);
@@ -1622,9 +1654,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                       break;

               vcpu_put(vcpu);
+               set_bit(_VPF_blocked, &vcpu->pause_flags);
+               tasklet_schedule(&per_cpu(schedule_data,
raw_smp_processor_id()).sched_tasklet);
               schedule();
               vcpu_load(vcpu);
       }
+       clear_bit(_VPF_blocked, &vcpu->pause_flags);
+       vcpu_wake(vcpu);
+       tasklet_schedule(&per_cpu(schedule_data,
raw_smp_processor_id()).sched_tasklet);

       finish_wait(&vcpu->wq, &wait);
 }
@@ -1702,6 +1739,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, int n)
 {
       int r;
       struct kvm_vcpu *vcpu;
+       struct timespec now;

       if (!valid_vcpu(n))
               return -EINVAL;
@@ -1712,7 +1750,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, int n)

       preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);

-       r = kvm_arch_vcpu_setup(vcpu);
+       r = 0;
+       if (!(unlikely(is_idle_vm(kvm) || is_host_vm(kvm))))
+           r = kvm_arch_vcpu_setup(vcpu);
       if (r)
               return r;

@@ -1725,10 +1765,31 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, int n)
       mutex_unlock(&kvm->lock);

       /* Now it's all set up, let userspace reach it */
-       kvm_get_kvm(kvm);
-       r = create_vcpu_fd(vcpu);
-       if (r < 0)
-               goto unlink;
+       if( unlikely(is_idle_vm(kvm) || is_host_vm(kvm))) {
+               vcpu->pause_flags = 0;
+               atomic_set(&vcpu->pause_count, 0);
+               if(is_idle_vm(kvm))
+                       printk(" idle domain ");
+               else
+                       printk(" host domain ");
+               printk("vcpu %p created\n", vcpu);
+       }else {
+               kvm_get_kvm(kvm);
+               r = create_vcpu_fd(vcpu);
+               if (r < 0)
+                       goto unlink;
+
+               set_bit(_VPF_blocked, &vcpu->pause_flags);
+       }
+       if ( is_host_vm(kvm)){
+               set_bit(_VPF_blocked, &vcpu->pause_flags);
+       }
+       vcpu->status = VCPU_YIELD;
+       vcpu->runstate.state = is_idle_vcpu(vcpu)?
RUNSTATE_running:RUNSTATE_offline;
+       now = current_kernel_time();
+       vcpu->runstate.state_entry_time = timespec_to_ns(&now);
+       if (sched_init_vcpu(vcpu,  n % num_online_cpus()) != 0)
+           goto unlink;
       return r;

 unlink:
@@ -1845,12 +1906,19 @@ static long kvm_vcpu_ioctl(struct file *filp,
       if (vcpu->kvm->mm != current->mm)
               return -EIO;
       switch (ioctl) {
-       case KVM_RUN:
+       case KVM_RUN: {
+               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1};
+
               r = -EINVAL;
               if (arg)
                       goto out;
+               if(test_and_set_bool(vcpu->set_rt)){
+                   sched_setscheduler(current, SCHED_RR, &param);
+               }
+
               r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
               break;
+        }
       case KVM_GET_REGS: {
               struct kvm_regs *kvm_regs;

@@ -2247,7 +2315,7 @@ static int kvm_dev_ioctl_create_vm(void)
       int fd;
       struct kvm *kvm;

-       kvm = kvm_create_vm();
+       kvm = kvm_create_vm(NORMAL_VM);
       if (IS_ERR(kvm))
               return PTR_ERR(kvm);
       fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0);
@@ -2558,11 +2626,54 @@ static void kvm_sched_out(struct preempt_notifier *pn,
       kvm_arch_vcpu_put(vcpu);
 }

+struct kvm *idle_vm_kvm;
+struct kvm *host_vm_kvm;
+EXPORT_SYMBOL(idle_vm_kvm);
+EXPORT_SYMBOL(host_vm_kvm);
+
+static int init_idle_vm(void)
+{
+       struct kvm *idle_vm;
+       int cpu;
+
+       /* Domain creation requires that scheduler structures are
initialised. */
+       scheduler_init();
+
+       idle_vm = kvm_create_vm(IDLE_VM);
+       if (IS_ERR(idle_vm))
+               BUG_ON(1);
+
+
+       /* XXX: should we put  the newly created vcpu to runqueue?*/
+       for_each_online_cpu(cpu) {
+           if (kvm_vm_ioctl_create_vcpu(idle_vm_kvm, cpu) < 0) {
+               int i;
+               for(i=0;i<cpu;i++)
+                   kvm_arch_vcpu_destroy(idle_vm->vcpus[i]);
+               printk("creating idle vcpus failed. quit!\n");
+               return -1;
+           }
+       }
+
+       scheduler_start();
+       return 0;
+}
+
+extern void kvm_force_tasklet_schedule(void* data);
+
+long (*sched_setaffinity_p)(pid_t pid, cpumask_t* in_mask);
+EXPORT_SYMBOL_GPL(sched_setaffinity_p);
+
 int kvm_init(void *opaque, unsigned int vcpu_size,
                 struct module *module)
 {
       int r;
       int cpu;
+       int my_cpu = raw_smp_processor_id();
+
+       for_each_online_cpu ( cpu ){
+           init_pending_ipi_buf(cpu);
+       }

       kvm_init_debug();

@@ -2588,6 +2699,16 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
       if (r < 0)
               goto out_free_0a;

+       for_each_online_cpu(cpu){
+           if(init_trace_buf(cpu)){
+               int i;
+               for(i=0;i<cpu;i++)
+                   free_trace_buf(i);
+               printk("alloc trace buf failed. quit!\n");
+               goto out_free_1;
+           }
+       }
+
       for_each_online_cpu(cpu) {
               smp_call_function_single(cpu,
                               kvm_arch_check_processor_compat,
@@ -2632,6 +2753,33 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
       kvm_preempt_ops.sched_in = kvm_sched_in;
       kvm_preempt_ops.sched_out = kvm_sched_out;

+       if(init_idle_vm()) goto out;
+
+       /* create the host vm */
+       kvm_create_vm(HOST_VM);
+       for_each_online_cpu(cpu) {
+           if (kvm_vm_ioctl_create_vcpu(host_vm_kvm, cpu) < 0) {
+               kvm_destroy_vm(host_vm_kvm);
+               kvm_destroy_vm(idle_vm_kvm);
+               goto out;
+           }
+       }
+
+       for(cpu = 0; cpu < KVM_MAX_VCPUS; ++cpu) {
+               struct kvm_vcpu* vcpu = host_vm_kvm->vcpus[cpu];
+               if (vcpu)
+                       clear_bit(_VPF_blocked, &vcpu->pause_flags);
+       }
+
+       get_cpu();
+       kvm_force_tasklet_schedule(NULL);
+       for_each_online_cpu(cpu) {
+           if(cpu != my_cpu)
+               smp_call_function_mask(cpumask_of_cpu(cpu),
kvm_force_tasklet_schedule, NULL, 1);
+       }
+       put_cpu();
+       vm_unpause_by_systemcontroller(host_vm_kvm);
+
       return 0;

 out_free:
@@ -2661,6 +2809,12 @@ EXPORT_SYMBOL_GPL(kvm_init);

 void kvm_exit(void)
 {
+       stop_auto_schedule();
+       vm_pause_by_systemcontroller(host_vm_kvm);
+       kvm_destroy_vm(host_vm_kvm);
+       wait_scheduler_stops();
+       kvm_destroy_vm(idle_vm_kvm);
+       scheduler_destroy();
       kvm_trace_cleanup();
       misc_deregister(&kvm_dev);
       kmem_cache_destroy(kvm_vcpu_cache);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html