Re: [PATCH v8 10/15] ARM: KVM: Inject IRQs and FIQs from userspace

Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> · Tue, 19 Jun 2012 23:17:58 -0400

On Tue, Jun 19, 2012 at 4:49 AM, Avi Kivity <avi@xxxxxxxxxx> wrote:
> On 06/18/2012 11:56 PM, Christoffer Dall wrote:
>> On Mon, Jun 18, 2012 at 9:32 AM, Avi Kivity <avi@xxxxxxxxxx> wrote:
>>> On 06/15/2012 10:08 PM, Christoffer Dall wrote:
>>>> From: Christoffer Dall <cdall@xxxxxxxxxxxxxxx>
>>>>
>>>> Userspace can inject IRQs and FIQs through the KVM_IRQ_LINE VM ioctl.
>>>> This ioctl is used since the sematics are in fact two lines that can be
>>>> either raised or lowered on the VCPU - the IRQ and FIQ lines.
>>>>
>>>> KVM needs to know which VCPU it must operate on and whether the FIQ or
>>>> IRQ line is raised/lowered. Hence both pieces of information is packed
>>>> in the kvm_irq_level->irq field. The irq fild value will be:
>>>>   IRQ: vcpu_index << 1
>>>>   FIQ: (vcpu_index << 1) | 1
>>>>
>>>> This is documented in Documentation/kvm/api.txt.
>>>>
>>>> The effect of the ioctl is simply to simply raise/lower the
>>>> corresponding irq_line field on the VCPU struct, which will cause the
>>>> world-switch code to raise/lower virtual interrupts when running the
>>>> guest on next switch. The wait_for_interrupt flag is also cleared for
>>>> raised IRQs or FIQs causing an idle VCPU to become active again. CPUs
>>>> in guest mode are kicked to make sure they refresh their interrupt status.
>>>
>>>>
>>>> +static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>>>> +                                   struct kvm_irq_level *irq_level)
>>>> +{
>>>> +     int mask;
>>>> +     unsigned int vcpu_idx;
>>>> +     struct kvm_vcpu *vcpu;
>>>> +     unsigned long old, new, *ptr;
>>>> +
>>>> +     vcpu_idx = irq_level->irq >> 1;
>>>> +     if (vcpu_idx >= KVM_MAX_VCPUS)
>>>> +             return -EINVAL;
>>>> +
>>>> +     vcpu = kvm_get_vcpu(kvm, vcpu_idx);
>>>> +     if (!vcpu)
>>>> +             return -EINVAL;
>>>> +
>>>> +     if ((irq_level->irq & 1) == KVM_ARM_IRQ_LINE)
>>>> +             mask = HCR_VI;
>>>> +     else /* KVM_ARM_FIQ_LINE */
>>>> +             mask = HCR_VF;
>>>> +
>>>> +     trace_kvm_set_irq(irq_level->irq, irq_level->level, 0);
>>>> +
>>>> +     ptr = (unsigned long *)&vcpu->arch.irq_lines;
>>>> +     do {
>>>> +             old = ACCESS_ONCE(*ptr);
>>>> +             if (irq_level->level)
>>>> +                     new = old | mask;
>>>> +             else
>>>> +                     new = old & ~mask;
>>>> +
>>>> +             if (new == old)
>>>> +                     return 0; /* no change */
>>>> +     } while (cmpxchg(ptr, old, new) != old);
>>>
>>> Isn't this a complicated
>>>
>>>
>>>   if (level)
>>>       set_bit()
>>>   else
>>>       clear_bit()
>>>
>>> ?
>>>
>>
>> we need to atomically know if we changed either the FIQ/IRQ fields and
>> atomically update without locking.
>
> So use test_and_set_bit()/test_and_clear_bit().
>
>> (I think you suggested this
>> approach, in fact).
>
> I think so too, but it only makes sense if you need to consider both
> fields simultaneously (which you don't here).  For example, if IRQ was
> asserted while FIQ was already raised, you don't need to kick.  But
> that's not the case according to the below.
>

ok, fair enough. I simplified it a bit.

>>
>>>> +
>>>> +     /*
>>>> +      * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
>>>> +      * trigger a world-switch round on the running physical CPU to set the
>>>> +      * virtual IRQ/FIQ fields in the HCR appropriately.
>>>> +      */
>>>> +     kvm_vcpu_kick(vcpu);
>>>
>>> No need to wake when the line is asserted so you can make this
>>> conditional on level.
>>>
>>
>> we need to trigger a world switch to update the virtualized register
>> from the actual running physical CPU if we changed any of the IRQ/FIQ
>> fields. We return in the loop above if we didn't change anything.
>
> Okay.
>
>
>>
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>>  long kvm_arch_vcpu_ioctl(struct file *filp,
>>>>                        unsigned int ioctl, unsigned long arg)
>>>>  {
>>>> @@ -298,7 +345,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>>>>  long kvm_arch_vm_ioctl(struct file *filp,
>>>>                      unsigned int ioctl, unsigned long arg)
>>>>  {
>>>> -     return -EINVAL;
>>>> +     struct kvm *kvm = filp->private_data;
>>>> +     void __user *argp = (void __user *)arg;
>>>> +
>>>> +     switch (ioctl) {
>>>> +     case KVM_IRQ_LINE: {
>>>> +             struct kvm_irq_level irq_event;
>>>> +
>>>> +             if (copy_from_user(&irq_event, argp, sizeof irq_event))
>>>> +                     return -EFAULT;
>>>> +             return kvm_arch_vm_ioctl_irq_line(kvm, &irq_event);
>>>> +     }
>>>> +     default:
>>>> +             return -EINVAL;
>>>> +     }
>>>>  }
>>>
>>> Should be in common code guarded by the define introduced previously.
>>>
>>>
>>
>> you mean like this: ?
>>
>>
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index a9f209b..5bf2193 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -535,8 +535,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu,
>> struct kvm_run *run)
>>       return ret;
>>  }
>>
>> -static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
>> -                                   struct kvm_irq_level *irq_level)
>> +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
>>  {
>>       int mask;
>>       unsigned int vcpu_idx;
>> @@ -596,20 +595,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>> struct kvm_dirty_log *log)
>>  long kvm_arch_vm_ioctl(struct file *filp,
>>                      unsigned int ioctl, unsigned long arg)
>>  {
>> -     struct kvm *kvm = filp->private_data;
>> -     void __user *argp = (void __user *)arg;
>> -
>> -     switch (ioctl) {
>> -     case KVM_IRQ_LINE: {
>> -             struct kvm_irq_level irq_event;
>> -
>> -             if (copy_from_user(&irq_event, argp, sizeof irq_event))
>> -                     return -EFAULT;
>> -             return kvm_arch_vm_ioctl_irq_line(kvm, &irq_event);
>> -     }
>> -     default:
>> -             return -EINVAL;
>> -     }
>> +     return -EINVAL;
>>  }
>>
>>  static void cpu_set_vector(void *vector)
>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>> index bd77cb5..122a4b2 100644
>> --- a/arch/ia64/kvm/kvm-ia64.c
>> +++ b/arch/ia64/kvm/kvm-ia64.c
>> @@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
>> *vcpu, struct kvm_regs *regs)
>>       return 0;
>>  }
>>
>> +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
>> +{
>> +     if (!irqchip_in_kernel(kvm))
>> +             return -ENXIO;
>> +
>> +     irq_event->statusstatus = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>> +                                           irq_event->irq, irq_event->level);
>> +     return 0;
>> +}
>> +
>>  long kvm_arch_vm_ioctl(struct file *filp,
>>               unsigned int ioctl, unsigned long arg)
>>  {
>> @@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>                       goto out;
>>               }
>>               break;
>> -     case KVM_IRQ_LINE_STATUS:
>> -     case KVM_IRQ_LINE: {
>> -             struct kvm_irq_level irq_event;
>> -
>> -             r = -EFAULT;
>> -             if (copy_from_user(&irq_event, argp, sizeof irq_event))
>> -                     goto out;
>> -             r = -ENXIO;
>> -             if (irqchip_in_kernel(kvm)) {
>> -                     __s32 status;
>> -                     status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>> -                                 irq_event.irq, irq_event.level);
>> -                     if (ioctl == KVM_IRQ_LINE_STATUS) {
>> -                             r = -EFAULT;
>> -                             irq_event.status = status;
>> -                             if (copy_to_user(argp, &irq_event,
>> -                                                     sizeof irq_event))
>> -                                     goto out;
>> -                     }
>> -                     r = 0;
>> -             }
>> -             break;
>> -             }
>>       case KVM_GET_IRQCHIP: {
>>               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
>>               struct kvm_irqchip chip;
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a01a424..c9c4186 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -3144,6 +3144,16 @@ out:
>>       return r;
>>  }
>>
>> +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
>> +{
>> +     if (!irqchip_in_kernel(kvm))
>> +             return -ENXIO;
>> +
>> +     irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>> +                                     irq_event->irq, irq_event->level);
>> +     return 0;
>> +}
>> +
>>  long kvm_arch_vm_ioctl(struct file *filp,
>>                      unsigned int ioctl, unsigned long arg)
>>  {
>> @@ -3250,29 +3260,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>       create_pit_unlock:
>>               mutex_unlock(&kvm->slots_lock);
>>               break;
>> -     case KVM_IRQ_LINE_STATUS:
>> -     case KVM_IRQ_LINE: {
>> -             struct kvm_irq_level irq_event;
>> -
>> -             r = -EFAULT;
>> -             if (copy_from_user(&irq_event, argp, sizeof irq_event))
>> -                     goto out;
>> -             r = -ENXIO;
>> -             if (irqchip_in_kernel(kvm)) {
>> -                     __s32 status;
>> -                     status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>> -                                     irq_event.irq, irq_event.level);
>> -                     if (ioctl == KVM_IRQ_LINE_STATUS) {
>> -                             r = -EFAULT;
>> -                             irq_event.status = status;
>> -                             if (copy_to_user(argp, &irq_event,
>> -                                                     sizeof irq_event))
>> -                                     goto out;
>> -                     }
>> -                     r = 0;
>> -             }
>> -             break;
>> -     }
>>       case KVM_GET_IRQCHIP: {
>>               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
>>               struct kvm_irqchip *chip;
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index e3c86f8..96aa7fb 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -494,6 +494,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
>>                                  struct
>>                                  kvm_userspace_memory_region *mem,
>>                                  int user_alloc);
>> +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
>>  long kvm_arch_vm_ioctl(struct file *filp,
>>                      unsigned int ioctl, unsigned long arg);
>>
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 636bd08..1d33877 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -2093,6 +2093,25 @@ static long kvm_vm_ioctl(struct file *filp,
>>               break;
>>       }
>>  #endif
>> +#ifdef __KVM_HAVE_IRQ_LINE
>> +     case KVM_IRQ_LINE_STATUS:
>> +     case KVM_IRQ_LINE: {
>> +             struct kvm_irq_level irq_event;
>> +
>> +             r = -EFAULT;
>> +             if (copy_from_user(&irq_event, argp, sizeof irq_event))
>> +                     goto out;
>> +
>> +             r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
>> +
>> +             if (ioctl == KVM_IRQ_LINE_STATUS) {
>> +                     if (copy_to_user(argp, &irq_event, sizeof irq_event))
>> +                             r = -EFAULT;
>> +             }
>> +
>> +             break;
>> +     }
>> +#endif
>>       default:
>>               r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>>               if (r == -ENOTTY)
>
> Yup.  Note it brings in KVM_IRQ_LINE_STATUS for ARM.  I think it makes
> sense if you have guests that do timekeeping by counting interrupts (as
> opposed to reading a wall clock register).
>
> --
> error compiling committee.c: too many arguments to function
>
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html