Re: [PATCH 08/10] Move IO APIC to its own lock.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 15, 2009 at 02:57:59PM -0300, Marcelo Tosatti wrote:
> On Tue, Jul 14, 2009 at 05:30:43PM +0300, Gleb Natapov wrote:
> > 
> > Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx>
> > ---
> >  arch/ia64/kvm/kvm-ia64.c |   27 ++++++++++++++++++------
> >  arch/x86/kvm/lapic.c     |    5 +---
> >  arch/x86/kvm/x86.c       |   30 ++++++++++++++++++---------
> >  virt/kvm/ioapic.c        |   50 ++++++++++++++++++++++++++++-----------------
> >  virt/kvm/ioapic.h        |    1 +
> >  5 files changed, 73 insertions(+), 40 deletions(-)
> > 
> > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> > index 0ad09f0..dd7ef2d 100644
> > --- a/arch/ia64/kvm/kvm-ia64.c
> > +++ b/arch/ia64/kvm/kvm-ia64.c
> > @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
> >  
> >  	r = 0;
> >  	switch (chip->chip_id) {
> > -	case KVM_IRQCHIP_IOAPIC:
> > -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
> > -				sizeof(struct kvm_ioapic_state));
> > +	case KVM_IRQCHIP_IOAPIC: {
> > +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > +		if (ioapic) {
> > +			spin_lock(&ioapic->lock);
> > +			memcpy(&chip->chip.ioapic, ioapic,
> > +			       sizeof(struct kvm_ioapic_state));
> > +			spin_unlock(&ioapic->lock);
> > +		} else
> > +			r = -EINVAL;
> > +	}
> >  		break;
> >  	default:
> >  		r = -EINVAL;
> > @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> >  
> >  	r = 0;
> >  	switch (chip->chip_id) {
> > -	case KVM_IRQCHIP_IOAPIC:
> > -		memcpy(ioapic_irqchip(kvm),
> > -				&chip->chip.ioapic,
> > -				sizeof(struct kvm_ioapic_state));
> > +	case KVM_IRQCHIP_IOAPIC: {
> > +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > +		if (ioapic) {
> > +			spin_lock(&ioapic->lock);
> > +			memcpy(ioapic, &chip->chip.ioapic,
> > +			       sizeof(struct kvm_ioapic_state));
> > +			spin_unlock(&ioapic->lock);
> > +		} else
> > +			r = -EINVAL;
> > +	}
> >  		break;
> >  	default:
> >  		r = -EINVAL;
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index e2e2849..61ffcfc 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> >  		trigger_mode = IOAPIC_LEVEL_TRIG;
> >  	else
> >  		trigger_mode = IOAPIC_EDGE_TRIG;
> > -	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
> > -		mutex_lock(&apic->vcpu->kvm->irq_lock);
> > +	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
> >  		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> > -		mutex_unlock(&apic->vcpu->kvm->irq_lock);
> > -	}
> >  }
> >  
> >  static void apic_send_ipi(struct kvm_lapic *apic)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 48567fa..de99b84 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2010,10 +2010,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> >  			&pic_irqchip(kvm)->pics[1],
> >  			sizeof(struct kvm_pic_state));
> >  		break;
> > -	case KVM_IRQCHIP_IOAPIC:
> > -		memcpy(&chip->chip.ioapic,
> > -			ioapic_irqchip(kvm),
> > -			sizeof(struct kvm_ioapic_state));
> > +	case KVM_IRQCHIP_IOAPIC: {
> > +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > +		if (ioapic) {
> > +			spin_lock(&ioapic->lock);
> > +			memcpy(&chip->chip.ioapic, ioapic,
> > +			       sizeof(struct kvm_ioapic_state));
> > +			spin_unlock(&ioapic->lock);
> > +		} else
> > +			r = -EINVAL;
> > +	}
> >  		break;
> >  	default:
> >  		r = -EINVAL;
> > @@ -2042,12 +2048,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> >  			sizeof(struct kvm_pic_state));
> >  		spin_unlock(&pic_irqchip(kvm)->lock);
> >  		break;
> > -	case KVM_IRQCHIP_IOAPIC:
> > -		mutex_lock(&kvm->irq_lock);
> > -		memcpy(ioapic_irqchip(kvm),
> > -			&chip->chip.ioapic,
> > -			sizeof(struct kvm_ioapic_state));
> > -		mutex_unlock(&kvm->irq_lock);
> > +	case KVM_IRQCHIP_IOAPIC: {
> > +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > +		if (ioapic) {
> > +			spin_lock(&ioapic->lock);
> > +			memcpy(ioapic, &chip->chip.ioapic,
> > +			       sizeof(struct kvm_ioapic_state));
> > +			spin_unlock(&ioapic->lock);
> > +		} else
> > +			r = -EINVAL;
> > +	}
> >  		break;
> >  	default:
> >  		r = -EINVAL;
> > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> > index fa05f67..300ee3b 100644
> > --- a/virt/kvm/ioapic.c
> > +++ b/virt/kvm/ioapic.c
> > @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> >  	union kvm_ioapic_redirect_entry entry;
> >  	int ret = 1;
> >  
> > +	spin_lock(&ioapic->lock);
> >  	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
> >  		entry = ioapic->redirtbl[irq];
> >  		level ^= entry.fields.polarity;
> > @@ -196,34 +197,44 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> >  		}
> >  		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
> >  	}
> > +	spin_unlock(&ioapic->lock);
> > +
> >  	return ret;
> >  }
> >  
> > -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
> > -				    int trigger_mode)
> > +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
> > +				     int trigger_mode)
> >  {
> > -	union kvm_ioapic_redirect_entry *ent;
> > +	int i;
> > +
> > +	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> > +		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
> > +
> > +		if (ent->fields.vector != vector)
> > +			continue;
> >  
> > -	ent = &ioapic->redirtbl[pin];
> > +		/* ack notifier may call back into ioapic via kvm_set_irq()  */
> > +		spin_unlock(&ioapic->lock);
> > +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> > +		spin_lock(&ioapic->lock);
> 
> While traversing the IOAPIC pins you drop the lock and acquire it again,
> which is error prone: what if the redirection table is changed in
> between, for example?
> 
Yes, the ack notifiers is a problematic part. The only thing that
current ack notifiers do is set_irq_level(0) so this is not the problem
in practice. I'll see if I can eliminate this dropping of the lock here.

> Also, irq_lock was held during ack and mask notifier callbacks, so they
> (the callbacks) did not have to worry about concurrency.
> 
Is it possible to have more then one ack for the same interrupt line at
the same time?

> You questioned the purpose of irq_lock earlier, and thats what it is:
> synchronization between pic and ioapic blur at the irq notifiers.
> 
Pic has its own locking and it fails miserably in regards ack notifiers.
I bet nobody tried device assignment with pic. It will not work.

irq_lock is indeed used to synchronization ioapic access, but the way
it is done requires calling kvm_set_irq() with lock held and this adds
unnecessary lock on a msi irq injection path.

> Using RCU to synchronize ack/mask notifier list walk versus list
> addition is good, but i'm not entirely sure that smaller locks like you
> are proposing is a benefit long term (things become much more tricky).
Without removing irq_lock RCU is useless since the list walking is always
done with irq_lock held. I see smaller locks as simplification BTW. The
thing is tricky now, or so I felt while trying to understand what
irq_lock actually protects. With smaller locks with names that clearly
indicates which data structure it protects I feel the code is much
easy to understand.

> 
> Maybe it is the only way forward (and so you push this complexity to the
> ack/mask notifiers), but i think it can be avoided until its proven that
> there is contention on irq_lock (which is reduced by faster search).
This is not about contention. This is about existence of the lock in the
first place. With the patch set there is no lock at msi irq injection
path at all and this is better than having lock no matter if the lock is
congested or not. There is still a lock on ioapic path (which MSI does
not use) and removing of this lock should be done only after we see
that it is congested, because removing it involves adding a number of
atomic accesses, so it is not clear win without lock contention.
(removing it will also solve ack notification problem hmmm)

> 
> 
> > -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
> > +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
> > +			continue;
> >  
> > -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
> >  		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
> >  		ent->fields.remote_irr = 0;
> > -		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
> > -			ioapic_service(ioapic, pin);
> > +		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
> > +			ioapic_service(ioapic, i);
> >  	}
> >  }
> >  
> >  void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
> >  {
> >  	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> > -	int i;
> >  
> > -	for (i = 0; i < IOAPIC_NUM_PINS; i++)
> > -		if (ioapic->redirtbl[i].fields.vector == vector)
> > -			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
> > +	spin_lock(&ioapic->lock);
> > +	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
> > +	spin_unlock(&ioapic->lock);
> >  }
> >  
> >  static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
> > @@ -248,8 +259,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> >  	ioapic_debug("addr %lx\n", (unsigned long)addr);
> >  	ASSERT(!(addr & 0xf));	/* check alignment */
> >  
> > -	mutex_lock(&ioapic->kvm->irq_lock);
> >  	addr &= 0xff;
> > +	spin_lock(&ioapic->lock);
> >  	switch (addr) {
> >  	case IOAPIC_REG_SELECT:
> >  		result = ioapic->ioregsel;
> > @@ -263,6 +274,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> >  		result = 0;
> >  		break;
> >  	}
> > +	spin_unlock(&ioapic->lock);
> > +
> >  	switch (len) {
> >  	case 8:
> >  		*(u64 *) val = result;
> > @@ -275,7 +288,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> >  	default:
> >  		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
> >  	}
> > -	mutex_unlock(&ioapic->kvm->irq_lock);
> >  	return 0;
> >  }
> >  
> > @@ -291,15 +303,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> >  		     (void*)addr, len, val);
> >  	ASSERT(!(addr & 0xf));	/* check alignment */
> >  
> > -	mutex_lock(&ioapic->kvm->irq_lock);
> >  	if (len == 4 || len == 8)
> >  		data = *(u32 *) val;
> >  	else {
> >  		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
> > -		goto unlock;
> > +		return 0;
> >  	}
> >  
> >  	addr &= 0xff;
> > +	spin_lock(&ioapic->lock);
> >  	switch (addr) {
> >  	case IOAPIC_REG_SELECT:
> >  		ioapic->ioregsel = data;
> > @@ -310,15 +322,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> >  		break;
> >  #ifdef	CONFIG_IA64
> >  	case IOAPIC_REG_EOI:
> > -		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
> > +		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
> >  		break;
> >  #endif
> >  
> >  	default:
> >  		break;
> >  	}
> > -unlock:
> > -	mutex_unlock(&ioapic->kvm->irq_lock);
> > +	spin_unlock(&ioapic->lock);
> >  	return 0;
> >  }
> >  
> > @@ -347,6 +358,7 @@ int kvm_ioapic_init(struct kvm *kvm)
> >  	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
> >  	if (!ioapic)
> >  		return -ENOMEM;
> > +	spin_lock_init(&ioapic->lock);
> >  	kvm->arch.vioapic = ioapic;
> >  	kvm_ioapic_reset(ioapic);
> >  	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
> > diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
> > index 7080b71..557107e 100644
> > --- a/virt/kvm/ioapic.h
> > +++ b/virt/kvm/ioapic.h
> > @@ -44,6 +44,7 @@ struct kvm_ioapic {
> >  	struct kvm_io_device dev;
> >  	struct kvm *kvm;
> >  	void (*ack_notifier)(void *opaque, int irq);
> > +	spinlock_t lock;
> >  };
> >  
> >  #ifdef DEBUG
> > -- 
> > 1.6.2.1
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@xxxxxxxxxxxxxxx
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux