Re: [RFC PATCH v5 1/2] ARM: KVM: one_reg coproc set and get BE fixes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 29 May 2014 08:30, Christoffer Dall <christoffer.dall@xxxxxxxxxx> wrote:
> On Wed, May 28, 2014 at 11:05:42PM -0700, Victor Kamensky wrote:
>> Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE
>> image. Before this fix get/set_one_reg functions worked correctly only in
>> LE case - reg_from_user was taking 'void *' kernel address that actually could
>> be target/source memory of either 4 bytes size or 8 bytes size, and code copied
>> from/to user memory that could hold either 4 bytes register, 8 byte register
>> or pair of 4 bytes registers.
>>
>> In order to work in endian agnostic way we will enforce rule that reg_from_user
>> to reg_to_user can copy register value only to kernel variable with size that
>> matches register size. For this purpose change functions to macros that check
>> sizeof(*val) against register size.
>>
>> In few place where size mismatch existed fix issue on macro caller side.
>>
>> Signed-off-by: Victor Kamensky <victor.kamensky@xxxxxxxxxx>
>> ---
>>  arch/arm/kvm/coproc.c | 95 +++++++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 74 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
>> index c58a351..5d45be5 100644
>> --- a/arch/arm/kvm/coproc.c
>> +++ b/arch/arm/kvm/coproc.c
>> @@ -682,26 +682,43 @@ static struct coproc_reg invariant_cp15[] = {
>>       { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
>>  };
>>
>> -static int reg_from_user(void *val, const void __user *uaddr, u64 id)
>> -{
>> -     /* This Just Works because we are little endian. */
>> -     if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
>> -             return -EFAULT;
>> -     return 0;
>> -}
>> +/*
>> + * Reads a register value from a userspace address to a kernel
>> + * variable. Note register size must match sizeof(*__val).
>> + */
>> +#define reg_from_user(__val, __uaddr, __id) ({                               \
>> +     int __ret = 0;                                                  \
>> +     unsigned long __regsize = KVM_REG_SIZE(__id);                   \
>> +     if (sizeof(*(__val)) != __regsize) {                            \
>> +             __ret = -ENOENT;                                        \
>> +     } else {                                                        \
>> +         if (copy_from_user((__val), (__uaddr), __regsize) != 0)     \
>> +             __ret = -EFAULT;                                        \
>> +     }                                                               \
>> +     __ret;                                                          \
>> +})
>>
>> -static int reg_to_user(void __user *uaddr, const void *val, u64 id)
>> -{
>> -     /* This Just Works because we are little endian. */
>> -     if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
>> -             return -EFAULT;
>> -     return 0;
>> -}
>> +/*
>> + * Writes a register value to a userspace address from a kernel variable.
>> + * Note register size must match sizeof(*__val).
>> + */
>> +#define reg_to_user(__uaddr, __val, __id) ({                         \
>> +     int __ret = 0;                                                  \
>> +     unsigned long __regsize = KVM_REG_SIZE(__id);                   \
>> +     if (sizeof(*(__val)) != __regsize) {                            \
>> +             __ret = -ENOENT;                                        \
>> +     } else {                                                        \
>> +         if (copy_to_user((__uaddr), (__val), __regsize) != 0)       \
>> +             __ret = -EFAULT;                                        \
>> +     }                                                               \
>> +     __ret;                                                          \
>> +})
>
> Ahh, no.  Please don't.  If you really think we want checks that we know
> how to program, then do the _u32 and _u64 versions and have a BUG_ON()
> in there if the KVM_REG_SIZE does not match.

No BUG_ON should be done on data that is driven by userspace.
I got burn on that once already, but fortunately got caught in
review :).

> I don't think this is needed though, because the special (oddball as you
> call them) cases are so rare.

OK, we can live without defense mechanism for now and
come back to it if breakage occur. LE will not notice an issue in
this area, it will come up only in BE case. Testing will catch it.

>>
>>  static int get_invariant_cp15(u64 id, void __user *uaddr)
>>  {
>>       struct coproc_params params;
>>       const struct coproc_reg *r;
>> +     int ret;
>>
>>       if (!index_to_params(id, &params))
>>               return -ENOENT;
>> @@ -710,7 +727,14 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
>>       if (!r)
>>               return -ENOENT;
>>
>> -     return reg_to_user(uaddr, &r->val, id);
>> +     ret = -ENOENT;
>> +     if (KVM_REG_SIZE(id) == 4) {
>> +             u32 val = r->val;
>> +             ret = reg_to_user(uaddr, &val, id);
>> +     } else if (KVM_REG_SIZE(id) == 8) {
>> +             ret = reg_to_user(uaddr, &r->val, id);
>> +     }
>> +     return ret;
>>  }
>>
>>  static int set_invariant_cp15(u64 id, void __user *uaddr)
>> @@ -718,7 +742,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
>>       struct coproc_params params;
>>       const struct coproc_reg *r;
>>       int err;
>> -     u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
>> +     u64 val;
>>
>>       if (!index_to_params(id, &params))
>>               return -ENOENT;
>> @@ -726,7 +750,15 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
>>       if (!r)
>>               return -ENOENT;
>>
>> -     err = reg_from_user(&val, uaddr, id);
>> +     err = -ENOENT;
>> +     if (KVM_REG_SIZE(id) == 4) {
>> +             u32 val32;
>> +             err = reg_from_user(&val32, uaddr, id);
>> +             if (!err)
>> +                     val = val32;
>> +     } else if (KVM_REG_SIZE(id) == 8) {
>> +             err = reg_from_user(&val, uaddr, id);
>> +     }
>>       if (err)
>>               return err;
>>
>> @@ -1004,6 +1036,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>  {
>>       const struct coproc_reg *r;
>>       void __user *uaddr = (void __user *)(long)reg->addr;
>> +     int ret;
>>
>>       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
>>               return demux_c15_get(reg->id, uaddr);
>> @@ -1015,14 +1048,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>       if (!r)
>>               return get_invariant_cp15(reg->id, uaddr);
>>
>> -     /* Note: copies two regs if size is 64 bit. */
>> -     return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
>> +     ret = -ENOENT;
>> +     if (KVM_REG_SIZE(reg->id) == 8) {
>> +             u64 val;
>> +             /* Note: copies two u32 regs */
>> +             memcpy(&val, &(vcpu->arch.cp15[r->reg]), sizeof(val));
>
> Are you not creating a wrongly ordered u64 on BE systems here?  Surely
> the least significant word will be at the lowest address.

Good catch. memcpy logic is the same that I had before,
but I was wondering about it yesterday myself, so I looked
at both directions (hypervisor/emulator) from this code today.
Yes, there is an issue in the code.

It is a bit long description, please bear with me,
I would like to get your opinion on that.

It depends what is canonical form. In current case it is
pair of u32 registers and their indexes are

#define c2_TTBR0    6    /* Translation Table Base Register 0 */
#define c2_TTBR0_high    7    /* TTBR0 top 32 bits */
#define c2_TTBR1    8    /* Translation Table Base Register 1 */
#define c2_TTBR1_high    9    /* TTBR1 top 32 bits */

i.e high word index follows low word. And guest
restore/save code matches this view. For example

    add    r12, vcpu, #CP15_OFFSET(c2_TTBR0)
    ldrd    r6, r7, [r12]
...
    mcrr    p15, 0, r6, r7, c2    @ TTBR 0

ldrd does not swap words (i.e @r12 word goes to r6 (low TTBR0),
and @r12+4 goes into r7 (high TTBR0) and it matches mcrr low,
high. So hypervisor save/restore context will get it right.

and in guest emulation program .. does not get it right.
Current emulator code does not really look at received values it
just stores it back. That is why it was not noticed.

Now to fix it first option if we keep current canonical form
(two u32 register low word first followed by high word) we
can change memcpy like this so BE emulator (userspace)
will see correct value

        val = (vcpu->arch.cp15[r->reg + 1] << 32) |
              vcpu->arch.cp15[r->reg];
Effectively it will be memcpy in LE case and word byteswap
in BE case

and for set similar changes example below

With this canonical form userspace client will see
correct u64 value of TTBR0/TTBR1 and hypervisor
code would get word in mcrr mrrc instruction in
right positions. The only issue with this approach if
some code in kernel will look at &(vcpu->arch.cp15[r->reg]
address as u64 value it will see incorrectly in BE case.
But we don't have such code now.

If we decide that canonical form is such that
&(vcpu->arch.cp15[r->reg] address should hold correct
u64 value we would need to change indexes, description
add word swaps somewhere in context restore/save
code, access_vm_reg etc. Personally I prefer not to
choose this canonical form.

Looking for your guidance on suggestion how to go
about it. I'll post patch for my proposal latter tonight.

> I think you need a function to copy cp15_64_to_u64() thingy which is
> taking BE/LE into consideration.  You had this for one of the other
> patches that you may be able to reuse.

Those are V8 aarch32 macros, there are no such macros (like
vcpu_cp15) in V7. I was looking for similar in V7 but could not find. It
seems V7 code directly works with vcpu->arch.cp15[]. Or did I miss
it? In context of above description is it important?

> I commented on this in the last revision too, am I getting it wrong?
>
>> +             ret = reg_to_user(uaddr, &val, reg->id);
>> +     } else if (KVM_REG_SIZE(reg->id) == 4) {
>> +             ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
>> +     }
>> +
>> +     return ret;
>>  }
>>
>>  int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>  {
>>       const struct coproc_reg *r;
>>       void __user *uaddr = (void __user *)(long)reg->addr;
>> +     int ret;
>>
>>       if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
>>               return demux_c15_set(reg->id, uaddr);
>> @@ -1034,8 +1077,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
>>       if (!r)
>>               return set_invariant_cp15(reg->id, uaddr);
>>
>> -     /* Note: copies two regs if size is 64 bit */
>> -     return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
>> +     ret = -ENOENT;
>> +     if (KVM_REG_SIZE(reg->id) == 8) {
>> +             u64 val;
>> +             /* Note: copies two u32 regs */
>> +             ret = reg_from_user(&val, uaddr, reg->id);
>> +             if (!ret)
>> +                     memcpy(&(vcpu->arch.cp15[r->reg]), &val, sizeof(val));
>
> same as above.

If my suggestion, approach 1, to fix followed instead of
memcpy it would be

        if (!ret) {
            vcpu->arch.cp15[r->reg] = val & 0xffffffff;
            vcpu->arch.cp15[r->reg + 1] = val >> 32;
        }

>> +     } else if (KVM_REG_SIZE(reg->id) == 4) {
>> +             ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
>> +     }
>> +
>> +     return ret;
>>  }
>>
>>  static unsigned int num_demux_regs(void)
>> --
>> 1.8.1.4
>>
>
> Otherwise, we are certainly moving in the right direction.

As always appreciate your time!

Thanks,
Victor

> Thanks!
> -Christoffer
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm




[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux