New version below. Changes: - __pmdp_for_addr and ptep_for_addr now take a vma as argument - check if a vma exists has moved to gmap_fault and kvm_s390_keyop - kvm_s390_keyop verifies that a vma is writable so that it's safe to set the SWC bit Subject: [PATCH] kvm-s390: storage key interface From: Carsten Otte <cotte@xxxxxxxxxx> This patch introduces an interface to access the guest visible storage keys. It supports three operations that model the behavior that SSKE/ISKE/RRBE instructions would have if they were issued by the guest. These instructions are all documented in the z architecture principles of operation book. Signed-off-by: Carsten Otte <cotte@xxxxxxxxxx> --- --- Documentation/virtual/kvm/api.txt | 38 ++++++++++++++ arch/s390/include/asm/kvm_host.h | 4 + arch/s390/include/asm/pgtable.h | 1 arch/s390/kvm/kvm-s390.c | 103 ++++++++++++++++++++++++++++++++++++-- arch/s390/mm/pgtable.c | 70 +++++++++++++++++++------ include/linux/kvm.h | 7 ++ 6 files changed, 202 insertions(+), 21 deletions(-) Index: linux-2.5-cecsim/Documentation/virtual/kvm/api.txt =================================================================== --- linux-2.5-cecsim.orig/Documentation/virtual/kvm/api.txt +++ linux-2.5-cecsim/Documentation/virtual/kvm/api.txt @@ -1494,6 +1494,44 @@ table upfront. This is useful to handle controlled virtual machines to fault in the virtual cpu's lowcore pages prior to calling the KVM_RUN ioctl. +4.67 KVM_S390_KEYOP + +Capability: KVM_CAP_S390_UCONTROL +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_keyop (in+out) +Returns: 0 in case of success + +The parameter looks like this: + struct kvm_s390_keyop { + __u64 user_addr; + __u8 key; + __u8 operation; + }; + +user_addr contains the userspace address of a memory page +key contains the guest visible storage key as defined by the + z Architecture Principles of Operation book, including key + value for key controlled storage protection, the fetch + protection bit, and the reference and change indicator bits +operation indicates the key operation that should be performed + +The following operations are supported: +KVM_S390_KEYOP_SSKE: + This operation behaves just like the set storage key extended (SSKE) + instruction would, if it were issued by the guest. The storage key + provided in "key" is placed in the guest visible storage key. +KVM_S390_KEYOP_ISKE: + This operation behaves just like the insert storage key extended (ISKE) + instruction would, if it were issued by the guest. After this call, + the guest visible storage key is presented in the "key" field. +KVM_S390_KEYOP_RRBE: + This operation behaves just like the reset referenced bit extended + (RRBE) instruction would, if it were issued by the guest. The guest + visible reference bit is cleared, and the value presented in the "key" + field after this call has the reference bit set to 1 in case the + guest view of the reference bit was 1 prior to this call. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by Index: linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h =================================================================== --- linux-2.5-cecsim.orig/arch/s390/include/asm/kvm_host.h +++ linux-2.5-cecsim/arch/s390/include/asm/kvm_host.h @@ -24,6 +24,10 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_S390_KEYOP_SSKE 0x01 +#define KVM_S390_KEYOP_ISKE 0x02 +#define KVM_S390_KEYOP_RRBE 0x03 + struct sca_entry { atomic_t scn; __u32 reserved; Index: linux-2.5-cecsim/arch/s390/include/asm/pgtable.h =================================================================== --- linux-2.5-cecsim.orig/arch/s390/include/asm/pgtable.h +++ linux-2.5-cecsim/arch/s390/include/asm/pgtable.h @@ -1237,6 +1237,7 @@ static inline pte_t mk_swap_pte(unsigned extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); +extern pte_t *ptep_for_addr(unsigned long addr, struct vm_area_struct *); /* * No page table caches to initialise Index: linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c =================================================================== --- linux-2.5-cecsim.orig/arch/s390/kvm/kvm-s390.c +++ linux-2.5-cecsim/arch/s390/kvm/kvm-s390.c @@ -112,13 +112,127 @@ void kvm_arch_exit(void) { } +static long kvm_s390_keyop(struct kvm_s390_keyop *kop) +{ + struct vm_area_struct *vma; + unsigned long addr = kop->user_addr; + pte_t *ptep; + pgste_t pgste; + int r; + unsigned long skey; + unsigned long bits; + + /* make sure this process is a hypervisor */ + r = -EINVAL; + if (!mm_has_pgste(current->mm)) + goto out; + + r = -EFAULT; + if (addr >= PGDIR_SIZE) + goto out; + + down_read(¤t->mm->mmap_sem); + r = -EFAULT; + vma = find_vma(current->mm, addr); + if (!vma || (vma->vm_start > addr)) + goto out_unlock; + + ptep = ptep_for_addr(addr, vma); + if (IS_ERR(ptep)) { + r = PTR_ERR(ptep); + goto out_unlock; + } + + spin_lock(¤t->mm->page_table_lock); + pgste = pgste_get_lock(ptep); + + switch (kop->operation) { + case KVM_S390_KEYOP_SSKE: + if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) { + r = -EACCES; + break; + } + pgste = pgste_update_all(ptep, pgste); + /* set the real key back w/o rc bits */ + skey = kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + if (pte_present(*ptep)) { + page_set_storage_key(pte_val(*ptep), skey, 1); + /* avoid race clobbering changed bit */ + pte_val(*ptep) |= _PAGE_SWC; + } + /* put acc+f plus guest referenced and changed into the pgste */ + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT + | RCP_GC_BIT); + bits = (kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)); + pgste_val(pgste) |= bits << 56; + bits = (kop->key & (_PAGE_CHANGED | _PAGE_REFERENCED)); + pgste_val(pgste) |= bits << 48; + r = 0; + break; + case KVM_S390_KEYOP_ISKE: + if (pte_present(*ptep)) { + skey = page_get_storage_key(pte_val(*ptep)); + kop->key = skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + } else { + skey = 0; + kop->key = (pgste_val(pgste) >> 56) & + (_PAGE_ACC_BITS | _PAGE_FP_BIT); + } + kop->key |= skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + kop->key |= (pgste_val(pgste) >> 48) & + (_PAGE_CHANGED | _PAGE_REFERENCED); + r = 0; + break; + case KVM_S390_KEYOP_RRBE: + pgste = pgste_update_all(ptep, pgste); + kop->key = 0; + if (pgste_val(pgste) & RCP_GR_BIT) + kop->key |= _PAGE_REFERENCED; + pgste_val(pgste) &= ~RCP_GR_BIT; + r = 0; + break; + default: + r = -EINVAL; + } + pgste_set_unlock(ptep, pgste); + spin_unlock(¤t->mm->page_table_lock); + +out_unlock: + up_read(¤t->mm->mmap_sem); +out: + return r; +} + /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - if (ioctl == KVM_S390_ENABLE_SIE) - return s390_enable_sie(); - return -EINVAL; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_ENABLE_SIE: + r = s390_enable_sie(); + break; + case KVM_S390_KEYOP: { + struct kvm_s390_keyop kop; + r = -EFAULT; + if (copy_from_user(&kop, argp, sizeof(struct kvm_s390_keyop))) + break; + r = kvm_s390_keyop(&kop); + if (r) + break; + r = -EFAULT; + if (copy_to_user(argp, &kop, sizeof(struct kvm_s390_keyop))) + break; + r = 0; + break; + } + default: + r = -ENOTTY; + } + + return r; } int kvm_dev_ioctl_check_extension(long ext) Index: linux-2.5-cecsim/arch/s390/mm/pgtable.c =================================================================== --- linux-2.5-cecsim.orig/arch/s390/mm/pgtable.c +++ linux-2.5-cecsim/arch/s390/mm/pgtable.c @@ -382,6 +382,29 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); +static pmd_t *__pmdp_for_addr(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return ERR_PTR(-ENOMEM); + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return ERR_PTR(-ENOMEM); + + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, addr)) + return ERR_PTR(-ENOMEM); + + return pmd; +} + /* * this function is assumed to be called with mmap_sem held */ @@ -391,10 +414,8 @@ unsigned long __gmap_fault(unsigned long struct mm_struct *mm; struct gmap_pgtable *mp; struct gmap_rmap *rmap; - struct vm_area_struct *vma; struct page *page; - pgd_t *pgd; - pud_t *pud; + struct vm_area_struct *vma; pmd_t *pmd; current->thread.gmap_addr = address; @@ -422,21 +443,15 @@ unsigned long __gmap_fault(unsigned long return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) + if (!vma || (vma->vm_start > vmaddr)) return -EFAULT; - /* Walk the parent mm page table */ - pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; + pmd = __pmdp_for_addr(mm, vma, vmaddr); + if (IS_ERR(pmd)) + return PTR_ERR(pmd); + /* pmd now points to a valid segment table entry. */ rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); if (!rmap) @@ -795,6 +810,20 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); +pte_t *ptep_for_addr(unsigned long addr, struct vm_area_struct *vma) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = __pmdp_for_addr(current->mm, vma, addr); + if (IS_ERR(pmd)) + return (pte_t *)pmd; + + pte = pte_offset(pmd, addr); + return pte; +} +EXPORT_SYMBOL_GPL(ptep_for_addr); + #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) bool kernel_page_present(struct page *page) { Index: linux-2.5-cecsim/include/linux/kvm.h =================================================================== --- linux-2.5-cecsim.orig/include/linux/kvm.h +++ linux-2.5-cecsim/include/linux/kvm.h @@ -450,6 +450,13 @@ struct kvm_ppc_pvinfo { #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) #define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) + +struct kvm_s390_keyop { + __u64 user_addr; + __u8 key; + __u8 operation; +}; +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x09, struct kvm_s390_keyop) /* * Check if a kvm extension is available. Argument is extension number, * return is 1 (yes) or 0 (no, sorry). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html