From: Shaoqin Huang <shaoqin.huang@xxxxxxxxx> Add init page state of PKVM_PAGE_OWNED into PTE of host EPT page table when first creating host EPT mapping and handling EPT violation for MMIO ranges. Besides, unmapping pages in host EPT (eg. pkvm code and data memory regions) has an implicit effect. After unmapping, the PTE of these pages are cleared to 0, that means they are under page state of PKVM_NOPAGE and owned by pKVM hypervisor (owner_id = 0). Also refine pgtable map API for page state only changes and ensure page state will not be lost during pgtable split. Signed-off-by: Shaoqin Huang <shaoqin.huang@xxxxxxxxx> Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx> --- arch/x86/kvm/vmx/pkvm/hyp/ept.c | 3 ++- arch/x86/kvm/vmx/pkvm/hyp/init_finalise.c | 6 +++--- arch/x86/kvm/vmx/pkvm/hyp/pgtable.c | 19 +++++++++++++++---- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/pkvm/hyp/ept.c b/arch/x86/kvm/vmx/pkvm/hyp/ept.c index de68f8c9eeb0..2a4d6cc7fa81 100644 --- a/arch/x86/kvm/vmx/pkvm/hyp/ept.c +++ b/arch/x86/kvm/vmx/pkvm/hyp/ept.c @@ -19,6 +19,7 @@ #include "ept.h" #include "memory.h" #include "vmx.h" +#include "mem_protect.h" #include "debug.h" static struct hyp_pool host_ept_pool; @@ -217,7 +218,7 @@ int handle_host_ept_violation(unsigned long gpa) unsigned long hpa; struct mem_range range, cur; bool is_memory = find_mem_range(gpa, &range); - u64 prot = HOST_EPT_DEF_MMIO_PROT; + u64 prot = pkvm_mkstate(HOST_EPT_DEF_MMIO_PROT, PKVM_PAGE_OWNED); int level; int ret; diff --git a/arch/x86/kvm/vmx/pkvm/hyp/init_finalise.c b/arch/x86/kvm/vmx/pkvm/hyp/init_finalise.c index 8d52a20f6497..305b201a787e 100644 --- a/arch/x86/kvm/vmx/pkvm/hyp/init_finalise.c +++ b/arch/x86/kvm/vmx/pkvm/hyp/init_finalise.c @@ -19,6 +19,7 @@ #include "vmx.h" #include "nested.h" #include "debug.h" +#include "mem_protect.h" void *pkvm_mmu_pgt_base; void *pkvm_vmemmap_base; @@ -182,8 +183,7 @@ static int create_host_ept_mapping(void) /* * Create EPT mapping for memory with WB + RWX property */ - entry_prot = HOST_EPT_DEF_MEM_PROT; - + entry_prot = pkvm_mkstate(HOST_EPT_DEF_MEM_PROT, PKVM_PAGE_OWNED); for (i = 0; i < hyp_memblock_nr; i++) { reg = &hyp_memory[i]; ret = pkvm_host_ept_map((unsigned long)reg->base, @@ -198,7 +198,7 @@ static int create_host_ept_mapping(void) * The holes in memblocks are treated as MMIO with the * mapping UC + RWX. */ - entry_prot = HOST_EPT_DEF_MMIO_PROT; + entry_prot = pkvm_mkstate(HOST_EPT_DEF_MMIO_PROT, PKVM_PAGE_OWNED); for (i = 0; i < hyp_memblock_nr; i++, phys = reg->base + reg->size) { reg = &hyp_memory[i]; ret = pkvm_host_ept_map(phys, phys, (unsigned long)reg->base - phys, diff --git a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c index 95aef57d8ed7..5854a30dbf8b 100644 --- a/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c +++ b/arch/x86/kvm/vmx/pkvm/hyp/pgtable.c @@ -7,6 +7,7 @@ #include "pgtable.h" #include "memory.h" +#include "mem_protect.h" #include "debug.h" #include "bug.h" @@ -130,6 +131,10 @@ static int pgtable_map_try_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr, } if (pgtable_pte_is_counted(old)) { + /* if just modify the page state, do set_pte directly */ + if (!((old ^ new) & ~PKVM_PAGE_STATE_PROT_MASK)) + goto set_pte; + if (pgt_ops->pgt_entry_present(ptep)) { pgt_ops->pgt_set_entry(ptep, 0); flush_data->flushtlb |= true; @@ -140,6 +145,7 @@ static int pgtable_map_try_leaf(struct pkvm_pgtable *pgt, unsigned long vaddr, if (pgtable_pte_is_counted(new)) mm_ops->get_page(ptep); +set_pte: pgt_ops->pgt_set_entry(ptep, new); if (pkvm_phys_is_valid(data->phys)) data->phys += page_level_size(level); @@ -175,6 +181,10 @@ static int pgtable_map_walk_leaf(struct pkvm_pgtable *pgt, return -ENOMEM; if (pgt_ops->pgt_entry_huge(ptep)) { + u64 prot = pgt_ops->pgt_entry_to_prot(ptep); + + prot = pkvm_mkstate(prot, pkvm_getstate(*(u64 *)ptep)); + /* * Split the large mapping and reuse the * large mapping's prot. The translation @@ -184,8 +194,7 @@ static int pgtable_map_walk_leaf(struct pkvm_pgtable *pgt, mm_ops->put_page(ptep); pgtable_split(pgt_ops, mm_ops, ALIGN_DOWN(vaddr, size), pgt_ops->pgt_entry_to_phys(ptep), - size, page, level - 1, - pgt_ops->pgt_entry_to_prot(ptep)); + size, page, level - 1, prot); } mm_ops->get_page(ptep); @@ -283,10 +292,13 @@ static int pgtable_unmap_cb(struct pkvm_pgtable *pgt, unsigned long vaddr, /* * if it is huge pte, split and goto next level. */ + u64 prot = pgt_ops->pgt_entry_to_prot(ptep); void *page = mm_ops->zalloc_page(); if (!page) return -ENOMEM; + + prot = pkvm_mkstate(prot, pkvm_getstate(*(u64 *)ptep)); /* * Split the large mapping and reuse the * large mapping's prot. The translation @@ -295,8 +307,7 @@ static int pgtable_unmap_cb(struct pkvm_pgtable *pgt, unsigned long vaddr, */ pgtable_split(pgt_ops, mm_ops, ALIGN_DOWN(vaddr, size), pgt_ops->pgt_entry_to_phys(ptep), - size, page, level - 1, - pgt_ops->pgt_entry_to_prot(ptep)); + size, page, level - 1, prot); pgt_ops->pgt_set_entry(ptep, pgt->table_prot | mm_ops->virt_to_phys(page)); return 0; } -- 2.25.1