From: Carsten Otte <cotte@xxxxxxxxxx> This patch gives userspace access to the guest visible storage keys. Three operations are supported: KVM_S390_KEYOP_SSKE for setting storage keys, similar to the set storage key extended (SSKE) instruction. KVM_S390_KEYOP_ISKE for reading storage key content, similar to the insert storage key extended (ISKE) instruction. KVM_s390_KEYOP_RRBE for reading and resetting the page reference bit, similar to the reset reference bit extended (RRBE) instruction. Note that all functions take userspace addresses as input, which typically differ from guest addresses. This work was requested by Alex Graf for guest live migration: Different from x86 the guest's view of dirty and reference information is not stored in the page table entrys that are part of the guest address space but are stored in the storage key instead. Thus, the storage key needs to be read, transfered, and written back on the migration target side. Signed-off-by: Carsten Otte <cotte@xxxxxxxxxx> --- arch/s390/include/asm/kvm_host.h | 4 + arch/s390/kvm/kvm-s390.c | 149 ++++++++++++++++++++++++++++++++++++++- include/linux/kvm.h | 7 + 3 files changed, 157 insertions(+), 3 deletions(-) Index: linux-2.6/arch/s390/include/asm/kvm_host.h =================================================================== --- linux-2.6.orig/arch/s390/include/asm/kvm_host.h +++ linux-2.6/arch/s390/include/asm/kvm_host.h @@ -47,6 +47,10 @@ struct sca_block { #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +#define KVM_S390_KEYOP_SSKE 0x01 +#define KVM_S390_KEYOP_ISKE 0x02 +#define KVM_S390_KEYOP_RRBE 0x03 + #define CPUSTAT_HOST 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 Index: linux-2.6/arch/s390/kvm/kvm-s390.c =================================================================== --- linux-2.6.orig/arch/s390/kvm/kvm-s390.c +++ linux-2.6/arch/s390/kvm/kvm-s390.c @@ -107,13 +107,156 @@ void kvm_arch_exit(void) { } +static inline int addr_test_and_clear_young(unsigned long physpage) +{ + int ccode; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (ccode) : "a" (physpage) : "cc" ); + return ccode & 2; +} + + +static long kvm_s390_keyop(struct kvm_s390_keyop *kop) +{ + unsigned long addr = kop->user_addr; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep; + unsigned long *pgste; + int r; + unsigned int skey; + + /* make sure this process is a hypervisor */ + r = -EINVAL; + if (!current->mm->context.has_pgste) + goto out; + + r = -ENXIO; + if (addr >= PGDIR_SIZE) + goto out; + + spin_lock(¤t->mm->page_table_lock); + + pgd = pgd_offset(current->mm, addr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto out_unlock; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto out_unlock; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto out_unlock; + + ptep = pte_offset(pmd, addr); + pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + + switch (kop->operation) { + case KVM_S390_KEYOP_SSKE: + /* fetch real key and copy down rc bits */ + if (pte_present(*ptep)) + skey = page_get_storage_key(pte_val(*ptep)); + else + skey = 0; + if (skey & _PAGE_CHANGED) { + set_bit_simple(KVM_UD_BIT, pgste); + } + set_bit_simple(RCP_HC_BIT, pgste); + set_bit_simple(RCP_HR_BIT, pgste); + /* set the real key back w/o rc bits */ + skey = kop->key & 0xf8; + if (pte_present(*ptep)) + page_set_storage_key(pte_val(*ptep), skey, 1); + /* put user content into the pgste */ + *pgste &= 0x07f9fffffffffffful; + *pgste |= (kop->key & 0xf8ul) << 56; + if (kop->key & _PAGE_CHANGED) + set_bit_simple(RCP_GC_BIT, pgste); + else + clear_bit_simple(RCP_GC_BIT, pgste); + if (kop->key & _PAGE_REFERENCED) + set_bit_simple(RCP_GR_BIT, pgste); + else + clear_bit_simple(RCP_GR_BIT, pgste); + rcp_unlock(ptep); + r = 0; + break; + case KVM_S390_KEYOP_ISKE: + rcp_lock(ptep); + if (pte_present(*ptep)) + skey = page_get_storage_key(pte_val(*ptep)); + else + skey = 0; + kop->key = (*pgste & 0xf800000000000000ul) >> 56; + if ((skey & _PAGE_CHANGED) + || test_bit(RCP_GC_BIT, pgste)) + kop->key |= _PAGE_CHANGED; + if ((skey & _PAGE_REFERENCED) + || test_bit(RCP_GR_BIT, pgste)) + kop->key |= _PAGE_REFERENCED; + rcp_unlock(ptep); + r = 0; + break; + case KVM_S390_KEYOP_RRBE: + rcp_lock(ptep); + kop->key = 0; + if (pte_present(*ptep) + && addr_test_and_clear_young(pte_val(*ptep))) { + kop->key |= _PAGE_REFERENCED; + set_bit_simple(RCP_HR_BIT, pgste); + } + if (test_bit(RCP_GR_BIT, pgste)) + kop->key |= _PAGE_REFERENCED; + clear_bit_simple(RCP_GR_BIT, pgste); + rcp_unlock(ptep); + r = 0; + break; + default: + r = -EINVAL; + } + +out_unlock: + spin_unlock(¤t->mm->page_table_lock); +out: + return r; +} + /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - if (ioctl == KVM_S390_ENABLE_SIE) - return s390_enable_sie(); - return -EINVAL; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_ENABLE_SIE: + r = s390_enable_sie(); + break; + case KVM_S390_KEYOP: { + struct kvm_s390_keyop kop; + r = -EFAULT; + if (copy_from_user(&kop, argp, sizeof(struct kvm_s390_keyop))) + break; + r = kvm_s390_keyop(&kop); + if (r) + break; + r = -EFAULT; + if (copy_to_user(argp, &kop, sizeof(struct kvm_s390_keyop))) + break; + r = 0; + break; + } + default: + r = -EINVAL; + } + + return r; } int kvm_dev_ioctl_check_extension(long ext) Index: linux-2.6/include/linux/kvm.h =================================================================== --- linux-2.6.orig/include/linux/kvm.h +++ linux-2.6/include/linux/kvm.h @@ -432,6 +432,13 @@ struct kvm_ppc_pvinfo { #define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) #define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) + +struct kvm_s390_keyop { + __u64 user_addr; + __u8 key; + __u8 operation; +}; +#define KVM_S390_KEYOP _IOWR(KVMIO, 0x09, struct kvm_s390_keyop) /* * Check if a kvm extension is available. Argument is extension number, * return is 1 (yes) or 0 (no, sorry). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html