Tasks which are going to be running with the KVM address space have to be mapped with their core data (stack, mm, pgd..) so that they can (at least) switch back to the kernel address space. For now, assume that these tasks are the ones running vcpu, and that there's a 1:1 mapping between a task and vcpu. This should eventually be improved to be independent of any task/vcpu mapping. Also check that the task effectively entering the KVM address space is mapped. Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx> --- arch/x86/kvm/isolation.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/isolation.h | 2 + arch/x86/kvm/vmx/vmx.c | 8 ++ include/linux/sched.h | 5 + 4 files changed, 197 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/isolation.c b/arch/x86/kvm/isolation.c index d3ac014..e7979b3 100644 --- a/arch/x86/kvm/isolation.c +++ b/arch/x86/kvm/isolation.c @@ -64,6 +64,20 @@ struct pgt_directory_group { ((typeof(entry))(((unsigned long)(entry)) & PAGE_MASK)) /* + * Variables to keep track of tasks mapped into the KVM address space. + */ +struct kvm_task_mapping { + struct list_head list; + struct task_struct *task; + void *stack; + struct mm_struct *mm; + pgd_t *pgd; +}; + +static LIST_HEAD(kvm_task_mapping_list); +static DEFINE_MUTEX(kvm_task_mapping_lock); + +/* * Variables to keep track of address ranges mapped into the KVM * address space. */ @@ -1027,6 +1041,160 @@ int kvm_copy_percpu_mapping(void *percpu_ptr, size_t size) } EXPORT_SYMBOL(kvm_copy_percpu_mapping); +static void kvm_clear_task_mapping(struct kvm_task_mapping *task_mapping) +{ + if (task_mapping->task) { + kvm_clear_range_mapping(task_mapping->task); + task_mapping->task = NULL; + } + if (task_mapping->stack) { + kvm_clear_range_mapping(task_mapping->stack); + task_mapping->stack = NULL; + } + if (task_mapping->mm) { + kvm_clear_range_mapping(task_mapping->mm); + task_mapping->mm = NULL; + } + if (task_mapping->pgd) { + kvm_clear_range_mapping(task_mapping->pgd); + task_mapping->pgd = NULL; + } +} + +static int kvm_copy_task_mapping(struct task_struct *tsk, + struct kvm_task_mapping *task_mapping) +{ + int err; + + err = kvm_copy_ptes(tsk, sizeof(struct task_struct)); + if (err) + goto out_clear_task_mapping; + task_mapping->task = tsk; + + err = kvm_copy_ptes(tsk->stack, THREAD_SIZE); + if (err) + goto out_clear_task_mapping; + task_mapping->stack = tsk->stack; + + err = kvm_copy_ptes(tsk->active_mm, sizeof(struct mm_struct)); + if (err) + goto out_clear_task_mapping; + task_mapping->mm = tsk->active_mm; + + err = kvm_copy_ptes(tsk->active_mm->pgd, + PAGE_SIZE << PGD_ALLOCATION_ORDER); + if (err) + goto out_clear_task_mapping; + task_mapping->pgd = tsk->active_mm->pgd; + + return 0; + +out_clear_task_mapping: + kvm_clear_task_mapping(task_mapping); + return err; +} + +int kvm_add_task_mapping(struct task_struct *tsk) +{ + struct kvm_task_mapping *task_mapping; + int err; + + mutex_lock(&kvm_task_mapping_lock); + + if (tsk->kvm_mapped) { + mutex_unlock(&kvm_task_mapping_lock); + return 0; + } + + task_mapping = kzalloc(sizeof(struct kvm_task_mapping), GFP_KERNEL); + if (!task_mapping) { + mutex_unlock(&kvm_task_mapping_lock); + return -ENOMEM; + } + INIT_LIST_HEAD(&task_mapping->list); + + /* + * Ensure that the task and its stack are mapped into the KVM + * address space. Also map the task mm to be able to switch back + * to the original mm, and its PGD directory. + */ + pr_debug("mapping task %px\n", tsk); + err = kvm_copy_task_mapping(tsk, task_mapping); + if (err) { + kfree(task_mapping); + mutex_unlock(&kvm_task_mapping_lock); + return err; + } + + get_task_struct(tsk); + list_add(&task_mapping->list, &kvm_task_mapping_list); + tsk->kvm_mapped = true; + + mutex_unlock(&kvm_task_mapping_lock); + + return 0; +} +EXPORT_SYMBOL(kvm_add_task_mapping); + +static struct kvm_task_mapping *kvm_find_task_mapping(struct task_struct *tsk) +{ + struct kvm_task_mapping *task_mapping; + + list_for_each_entry(task_mapping, &kvm_task_mapping_list, list) { + if (task_mapping->task == tsk) + return task_mapping; + } + return NULL; +} + +void kvm_cleanup_task_mapping(struct task_struct *tsk) +{ + struct kvm_task_mapping *task_mapping; + + if (!tsk->kvm_mapped) + return; + + task_mapping = kvm_find_task_mapping(tsk); + if (!task_mapping) { + pr_debug("KVM isolation: mapping not found for mapped task %px\n", + tsk); + tsk->kvm_mapped = false; + mutex_unlock(&kvm_task_mapping_lock); + return; + } + + pr_debug("unmapping task %px\n", tsk); + + list_del(&task_mapping->list); + kvm_clear_task_mapping(task_mapping); + kfree(task_mapping); + tsk->kvm_mapped = false; + put_task_struct(tsk); + mutex_unlock(&kvm_task_mapping_lock); +} +EXPORT_SYMBOL(kvm_cleanup_task_mapping); + +/* + * Mark all tasks which have being mapped into the KVM address space + * as not mapped. This only clears the mapping attribute in the task + * structure, but page table mappings remain in the KVM page table. + * They will be effectively removed when deleting the KVM page table. + */ +static void kvm_reset_all_task_mapping(void) +{ + struct kvm_task_mapping *task_mapping; + struct task_struct *tsk; + + mutex_lock(&kvm_task_mapping_lock); + list_for_each_entry(task_mapping, &kvm_task_mapping_list, list) { + tsk = task_mapping->task; + pr_debug("clear mapping for task %px\n", tsk); + tsk->kvm_mapped = false; + put_task_struct(tsk); + } + mutex_unlock(&kvm_task_mapping_lock); +} + static int kvm_isolation_init_page_table(void) { @@ -1195,6 +1363,7 @@ static void kvm_isolation_uninit_mm(void) destroy_context(&kvm_mm); + kvm_reset_all_task_mapping(); kvm_isolation_uninit_page_table(); kvm_free_all_range_mapping(); @@ -1227,6 +1396,8 @@ int kvm_isolation_init_vm(struct kvm *kvm) if (!kvm_isolation()) return 0; + pr_debug("mapping kvm srcu sda\n"); + return (kvm_copy_percpu_mapping(kvm->srcu.sda, sizeof(struct srcu_data))); } @@ -1236,6 +1407,8 @@ void kvm_isolation_destroy_vm(struct kvm *kvm) if (!kvm_isolation()) return; + pr_debug("unmapping kvm srcu sda\n"); + kvm_clear_percpu_mapping(kvm->srcu.sda); } @@ -1276,12 +1449,21 @@ void kvm_may_access_sensitive_data(struct kvm_vcpu *vcpu) void kvm_isolation_enter(void) { + int err; + if (kvm_isolation()) { /* * Switches to kvm_mm should happen from vCPU thread, * which should not be a kernel thread with no mm */ BUG_ON(current->active_mm == NULL); + + err = kvm_add_task_mapping(current); + if (err) { + pr_err("KVM isolation cancelled (failed to map task %px)", + current); + return; + } /* TODO: switch to kvm_mm */ } } diff --git a/arch/x86/kvm/isolation.h b/arch/x86/kvm/isolation.h index 33e9a87..2d7d016 100644 --- a/arch/x86/kvm/isolation.h +++ b/arch/x86/kvm/isolation.h @@ -32,5 +32,7 @@ static inline bool kvm_isolation(void) extern void kvm_clear_range_mapping(void *ptr); extern int kvm_copy_percpu_mapping(void *percpu_ptr, size_t size); extern void kvm_clear_percpu_mapping(void *percpu_ptr); +extern int kvm_add_task_mapping(struct task_struct *tsk); +extern void kvm_cleanup_task_mapping(struct task_struct *tsk); #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cbbaf58..9ed31c2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6576,6 +6576,9 @@ static void vmx_unmap_vcpu(struct vcpu_vmx *vmx) kvm_clear_range_mapping(vmx->vmcs01.msr_bitmap); kvm_clear_range_mapping(vmx->vcpu.arch.pio_data); kvm_clear_range_mapping(vmx->vcpu.arch.apic); + + /* XXX assume there's a 1:1 mapping between a task and a vcpu */ + kvm_cleanup_task_mapping(current); } static int vmx_map_vcpu(struct vcpu_vmx *vmx) @@ -6614,6 +6617,11 @@ static int vmx_map_vcpu(struct vcpu_vmx *vmx) if (rv) goto out_unmap_vcpu; + /* XXX assume there's a 1:1 mapping between a task and a vcpu */ + rv = kvm_add_task_mapping(current); + if (rv) + goto out_unmap_vcpu; + return 0; out_unmap_vcpu: diff --git a/include/linux/sched.h b/include/linux/sched.h index 50606a6..80e1d75 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1199,6 +1199,11 @@ struct task_struct { unsigned long prev_lowest_stack; #endif +#ifdef CONFIG_HAVE_KVM + /* Is the task mapped into the KVM address space? */ + bool kvm_mapped; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. -- 1.7.1