This patch introduces an interface to access the guest visible storage keys. It supports three operations that model the behavior that SSKE/ISKE/RRBE instructions would have if they were issued by the guest. These instructions are all documented in the z architecture principles of operation book. Signed-off-by: Carsten Otte <cotte@xxxxxxxxxx> --- --- Documentation/virtual/kvm/api.txt | 38 +++++++++++++ arch/s390/include/asm/kvm_host.h | 4 + arch/s390/include/asm/pgtable.h | 1 arch/s390/kvm/kvm-s390.c | 106 ++++++++++++++++++++++++++++++++++++-- arch/s390/mm/pgtable.c | 70 ++++++++++++++++++------- include/linux/kvm.h | 7 ++ 6 files changed, 205 insertions(+), 21 deletions(-) --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1514,6 +1514,44 @@ table upfront. This is useful to handle controlled virtual machines to fault in the virtual cpu's lowcore pages prior to calling the KVM_RUN ioctl. +4.67 KVM_S390_KEYOP + +Capability: KVM_CAP_UCONTROL +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_keyop (in+out) +Returns: 0 in case of success + +The parameter looks like this: + struct kvm_s390_keyop { + __u64 user_addr; + __u8 key; + __u8 operation; + }; + +user_addr contains the userspace address of a memory page +key contains the guest visible storage key as defined by the + z Architecture Principles of Operation book, including key + value for key controlled storage protection, the fetch + protection bit, and the reference and change indicator bits +operation indicates the key operation that should be performed + +The following operations are supported: +KVM_S390_KEYOP_SSKE: + This operation behaves just like the set storage key extended (SSKE) + instruction would, if it were issued by the guest. The storage key + provided in "key" is placed in the guest visible storage key. +KVM_S390_KEYOP_ISKE: + This operation behaves just like the insert storage key extended (ISKE) + instruction would, if it were issued by the guest. After this call, + the guest visible storage key is presented in the "key" field. +KVM_S390_KEYOP_RRBE: + This operation behaves just like the reset referenced bit extended + (RRBE) instruction would, if it were issued by the guest. The guest + visible reference bit is cleared, and the value presented in the "key" + field after this call has the reference bit set to 1 in case the + guest view of the reference bit was 1 prior to this call. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -24,6 +24,10 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_S390_KEYOP_SSKE 0x01 +#define KVM_S390_KEYOP_ISKE 0x02 +#define KVM_S390_KEYOP_RRBE 0x03 + struct sca_entry { atomic_t scn; __u32 reserved; --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1254,6 +1254,7 @@ static inline pte_t mk_swap_pte(unsigned extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); +extern pte_t *ptep_for_addr(unsigned long addr); /* * No page table caches to initialise --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -112,13 +112,113 @@ void kvm_arch_exit(void) { } +static long kvm_s390_keyop(struct kvm_s390_keyop *kop) +{ + unsigned long addr = kop->user_addr; + pte_t *ptep; + pgste_t pgste; + int r; + unsigned long skey; + unsigned long bits; + + /* make sure this process is a hypervisor */ + r = -EINVAL; + if (!mm_has_pgste(current->mm)) + goto out; + + r = -ENXIO; + if (addr >= PGDIR_SIZE) + goto out; + + spin_lock(¤t->mm->page_table_lock); + ptep = ptep_for_addr(addr); + if (!ptep) + goto out_unlock; + + pgste = pgste_get_lock(ptep); + + switch (kop->operation) { + case KVM_S390_KEYOP_SSKE: + pgste = pgste_update_all(ptep, pgste); + /* set the real key back w/o rc bits */ + skey = kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + if (pte_present(*ptep)) { + page_set_storage_key(pte_val(*ptep), skey, 1); + /* avoid race clobbering changed bit */ + pte_val(*ptep) |= _PAGE_SWC; + } + /* put acc+f plus guest refereced and changed into the pgste */ + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT | RCP_GR_BIT + | RCP_GC_BIT); + bits = (kop->key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)); + pgste_val(pgste) |= bits << 56; + bits = (kop->key & (_PAGE_CHANGED | _PAGE_REFERENCED)); + pgste_val(pgste) |= bits << 48; + r = 0; + break; + case KVM_S390_KEYOP_ISKE: + if (pte_present(*ptep)) { + skey = page_get_storage_key(pte_val(*ptep)); + kop->key = skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + } else { + skey = 0; + kop->key = (pgste_val(pgste) >> 56) & + (_PAGE_ACC_BITS | _PAGE_FP_BIT); + } + kop->key |= skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + kop->key |= (pgste_val(pgste) >> 48) & + (_PAGE_CHANGED | _PAGE_REFERENCED); + r = 0; + break; + case KVM_S390_KEYOP_RRBE: + pgste = pgste_update_all(ptep, pgste); + kop->key = 0; + if (pgste_val(pgste) & RCP_GR_BIT) + kop->key |= _PAGE_REFERENCED; + pgste_val(pgste) &= ~RCP_GR_BIT; + r = 0; + break; + default: + r = -EINVAL; + } + pgste_set_unlock(ptep, pgste); + +out_unlock: + spin_unlock(¤t->mm->page_table_lock); +out: + return r; +} + /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - if (ioctl == KVM_S390_ENABLE_SIE) - return s390_enable_sie(); - return -EINVAL; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_ENABLE_SIE: + r = s390_enable_sie(); + break; + case KVM_S390_KEYOP: { + struct kvm_s390_keyop kop; + r = -EFAULT; + if (copy_from_user(&kop, argp, sizeof(struct kvm_s390_keyop))) + break; + r = kvm_s390_keyop(&kop); + if (r) + break; + r = -EFAULT; + if (copy_to_user(argp, &kop, sizeof(struct kvm_s390_keyop))) + break; + r = 0; + break; + } + default: + r = -ENOTTY; + } + + return r; } int kvm_dev_ioctl_check_extension(long ext) --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -393,6 +393,33 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); +static pmd_t *__pmdp_for_addr(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + vma = find_vma(mm, addr); + if (!vma) + return ERR_PTR(-EINVAL); + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return ERR_PTR(-ENOMEM); + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return ERR_PTR(-ENOMEM); + + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, addr)) + return ERR_PTR(-ENOMEM); + + return pmd; +} + /* * this function is assumed to be called with mmap_sem held */ @@ -402,10 +429,7 @@ unsigned long __gmap_fault(unsigned long struct mm_struct *mm; struct gmap_pgtable *mp; struct gmap_rmap *rmap; - struct vm_area_struct *vma; struct page *page; - pgd_t *pgd; - pud_t *pud; pmd_t *pmd; current->thread.gmap_addr = address; @@ -433,21 +457,11 @@ unsigned long __gmap_fault(unsigned long return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; - - /* Walk the parent mm page table */ - pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; + + pmd = __pmdp_for_addr(mm, vmaddr); + if (IS_ERR(pmd)) + return PTR_ERR(pmd); + /* pmd now points to a valid segment table entry. */ rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); if (!rmap) @@ -806,6 +820,26 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); +pte_t *ptep_for_addr(unsigned long addr) +{ + pmd_t *pmd; + pte_t *rc; + + down_read(¤t->mm->mmap_sem); + + pmd = __pmdp_for_addr(current->mm, addr); + if (IS_ERR(pmd)) { + rc = (pte_t *)pmd; + goto up_out; + } + + rc = pte_offset(pmd, addr); +up_out: + up_read(¤t->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(ptep_for_addr); + #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) bool kernel_page_present(struct page *page) { --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -449,6 +449,13 @@ struct kvm_ppc_pvinfo { #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) #define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) + +struct kvm_s390_keyop { + __u64 user_addr; + __u8 key; + __u8 operation; +}; +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x09, struct kvm_s390_keyop) /* * Check if a kvm extension is available. Argument is extension number, * return is 1 (yes) or 0 (no, sorry). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html