On 2014/12/18 0:45, Russ Anderson wrote: > On Wed, Dec 17, 2014 at 10:41:51AM +0800, Jiang Liu wrote: >> On 2014/12/17 1:29, Dimitri Sivanich wrote: >>> I answered my own question, this had never been tested on UV. >>> >>> The gru driver fails with: >>> SGI GRU Device Driver: uv_setup_irq failed, errno=22 >>> >>> The info->type in uv_domain_alloc() is not set to X86_IRQ_ALLOC_TYPE_UV >>> (info->type is never set to that value anywhere). >>> >>> Adding the following to uv_setup_irq allows it to work: >>> >>> --- linux.orig/arch/x86/platform/uv/uv_irq.c >>> +++ linux/arch/x86/platform/uv/uv_irq.c >>> @@ -187,6 +187,7 @@ int uv_setup_irq(char *irq_name, int cpu >>> return -ENOMEM; >>> >>> init_irq_alloc_info(&info, cpumask_of(cpu)); >>> + info.type = X86_IRQ_ALLOC_TYPE_UV; >>> info.uv_limit = limit; >>> info.uv_blade = mmr_blade; >>> info.uv_offset = mmr_offset; >>> >>> On Mon, Dec 15, 2014 at 03:37:35PM -0600, Dimitri Sivanich wrote: >> Hi Dimitri, >> Thanks for reporting and fixing this bug. We will rebase the >> tip/x86/apic branch and fold the above patch into the original patch. >> May I assume a Tested-by from you? I have no UV systems for testing. > > 287 lines of change in uv_irq.c completely untested in linux-next? Ouch. > > Dimitri tested the one line change above, because the driver would > not even load without it. It will take some time to look through > your extensive changes to understand and verify it works. Hi Anderson, The whole patch set has been exposed to linux-next through tip/x86/apic, but not sure why this bug hasn't been discovered. Will Cc you and Dimitri next time. Regards! Gerry > > Thanks. > >> Regards! >> Gerry >> >>>> Was this patch ever tested on a UV system? >>>> >>>> Also, adding some SGI folks to the CC list, since there were none listed before. >>>> >>>> On Tue, Nov 25, 2014 at 01:53:31PM +0800, Jiang Liu wrote: >>>>> Enhance UV code to support hierarchy irqdomain, it helps to make >>>>> the architecture more clear. >>>>> >>>>> We should construct hwirq based on mmr_blade and mmr_offset, but >>>>> mmr_offset is type of unsigned long, it may exceed the range of >>>>> irq_hw_number_t. So help about the way to construct hwirq based >>>>> on mmr_blade and mmr_offset is welcomed! >>>>> >>>>> Signed-off-by: Jiang Liu <jiang.liu@xxxxxxxxxxxxxxx> >>>>> --- >>>>> arch/x86/include/asm/hw_irq.h | 9 ++ >>>>> arch/x86/platform/uv/uv_irq.c | 287 ++++++++++++++++------------------------- >>>>> 2 files changed, 117 insertions(+), 179 deletions(-) >>>>> >>>>> diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h >>>>> index 46dec7e37829..bbf90fe2a224 100644 >>>>> --- a/arch/x86/include/asm/hw_irq.h >>>>> +++ b/arch/x86/include/asm/hw_irq.h >>>>> @@ -123,6 +123,7 @@ enum irq_alloc_type { >>>>> X86_IRQ_ALLOC_TYPE_MSI, >>>>> X86_IRQ_ALLOC_TYPE_MSIX, >>>>> X86_IRQ_ALLOC_TYPE_DMAR, >>>>> + X86_IRQ_ALLOC_TYPE_UV, >>>>> }; >>>>> >>>>> struct irq_alloc_info { >>>>> @@ -169,6 +170,14 @@ struct irq_alloc_info { >>>>> void *ht_update; >>>>> }; >>>>> #endif >>>>> +#ifdef CONFIG_X86_UV >>>>> + struct { >>>>> + int uv_limit; >>>>> + int uv_blade; >>>>> + unsigned long uv_offset; >>>>> + char *uv_name; >>>>> + }; >>>>> +#endif >>>>> }; >>>>> }; >>>>> >>>>> diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c >>>>> index 474912d03f40..c237ed34a498 100644 >>>>> --- a/arch/x86/platform/uv/uv_irq.c >>>>> +++ b/arch/x86/platform/uv/uv_irq.c >>>>> @@ -19,17 +19,31 @@ >>>>> #include <asm/uv/uv_hub.h> >>>>> >>>>> /* MMR offset and pnode of hub sourcing interrupts for a given irq */ >>>>> -struct uv_irq_2_mmr_pnode{ >>>>> - struct rb_node list; >>>>> +struct uv_irq_2_mmr_pnode { >>>>> unsigned long offset; >>>>> int pnode; >>>>> - int irq; >>>>> }; >>>>> >>>>> -static DEFINE_SPINLOCK(uv_irq_lock); >>>>> -static struct rb_root uv_irq_root; >>>>> +static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) >>>>> +{ >>>>> + unsigned long mmr_value; >>>>> + struct uv_IO_APIC_route_entry *entry; >>>>> + >>>>> + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != >>>>> + sizeof(unsigned long)); >>>>> + >>>>> + mmr_value = 0; >>>>> + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; >>>>> + entry->vector = cfg->vector; >>>>> + entry->delivery_mode = apic->irq_delivery_mode; >>>>> + entry->dest_mode = apic->irq_dest_mode; >>>>> + entry->polarity = 0; >>>>> + entry->trigger = 0; >>>>> + entry->mask = 0; >>>>> + entry->dest = cfg->dest_apicid; >>>>> >>>>> -static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); >>>>> + uv_write_global_mmr64(info->pnode, info->offset, mmr_value); >>>>> +} >>>>> >>>>> static void uv_noop(struct irq_data *data) { } >>>>> >>>>> @@ -38,6 +52,24 @@ static void uv_ack_apic(struct irq_data *data) >>>>> ack_APIC_irq(); >>>>> } >>>>> >>>>> +static int >>>>> +uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, >>>>> + bool force) >>>>> +{ >>>>> + struct irq_data *parent = data->parent_data; >>>>> + struct irq_cfg *cfg = irqd_cfg(data); >>>>> + int ret; >>>>> + >>>>> + ret = parent->chip->irq_set_affinity(parent, mask, force); >>>>> + if (ret >= 0) { >>>>> + uv_program_mmr(cfg, data->chip_data); >>>>> + if (cfg->move_in_progress) >>>>> + send_cleanup_vector(cfg); >>>>> + } >>>>> + >>>>> + return ret; >>>>> +} >>>>> + >>>>> static struct irq_chip uv_irq_chip = { >>>>> .name = "UV-CORE", >>>>> .irq_mask = uv_noop, >>>>> @@ -46,179 +78,99 @@ static struct irq_chip uv_irq_chip = { >>>>> .irq_set_affinity = uv_set_irq_affinity, >>>>> }; >>>>> >>>>> -/* >>>>> - * Add offset and pnode information of the hub sourcing interrupts to the >>>>> - * rb tree for a specific irq. >>>>> - */ >>>>> -static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) >>>>> +static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq, >>>>> + unsigned int nr_irqs, void *arg) >>>>> { >>>>> - struct rb_node **link = &uv_irq_root.rb_node; >>>>> - struct rb_node *parent = NULL; >>>>> - struct uv_irq_2_mmr_pnode *n; >>>>> - struct uv_irq_2_mmr_pnode *e; >>>>> - unsigned long irqflags; >>>>> - >>>>> - n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, >>>>> - uv_blade_to_memory_nid(blade)); >>>>> - if (!n) >>>>> + struct uv_irq_2_mmr_pnode *chip_data; >>>>> + struct irq_alloc_info *info = arg; >>>>> + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); >>>>> + int ret; >>>>> + >>>>> + if (nr_irqs > 1 || !info || info->type != X86_IRQ_ALLOC_TYPE_UV) >>>>> + return -EINVAL; >>>>> + >>>>> + chip_data = kmalloc_node(sizeof(*chip_data), GFP_KERNEL, >>>>> + irq_data->node); >>>>> + if (!chip_data) >>>>> return -ENOMEM; >>>>> >>>>> - n->irq = irq; >>>>> - n->offset = offset; >>>>> - n->pnode = uv_blade_to_pnode(blade); >>>>> - spin_lock_irqsave(&uv_irq_lock, irqflags); >>>>> - /* Find the right place in the rbtree: */ >>>>> - while (*link) { >>>>> - parent = *link; >>>>> - e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); >>>>> - >>>>> - if (unlikely(irq == e->irq)) { >>>>> - /* irq entry exists */ >>>>> - e->pnode = uv_blade_to_pnode(blade); >>>>> - e->offset = offset; >>>>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags); >>>>> - kfree(n); >>>>> - return 0; >>>>> - } >>>>> - >>>>> - if (irq < e->irq) >>>>> - link = &(*link)->rb_left; >>>>> + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); >>>>> + if (ret >= 0) { >>>>> + if (info->uv_limit == UV_AFFINITY_CPU) >>>>> + irq_set_status_flags(virq, IRQ_NO_BALANCING); >>>>> else >>>>> - link = &(*link)->rb_right; >>>>> + irq_set_status_flags(virq, IRQ_MOVE_PCNTXT); >>>>> + >>>>> + chip_data->pnode = uv_blade_to_pnode(info->uv_blade); >>>>> + chip_data->offset = info->uv_offset; >>>>> + irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data, >>>>> + handle_percpu_irq, NULL, info->uv_name); >>>>> + } else { >>>>> + kfree(chip_data); >>>>> } >>>>> >>>>> - /* Insert the node into the rbtree. */ >>>>> - rb_link_node(&n->list, parent, link); >>>>> - rb_insert_color(&n->list, &uv_irq_root); >>>>> - >>>>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags); >>>>> - return 0; >>>>> + return ret; >>>>> } >>>>> >>>>> -/* Retrieve offset and pnode information from the rb tree for a specific irq */ >>>>> -int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) >>>>> +static void uv_domain_free(struct irq_domain *domain, unsigned int virq, >>>>> + unsigned int nr_irqs) >>>>> { >>>>> - struct uv_irq_2_mmr_pnode *e; >>>>> - struct rb_node *n; >>>>> - unsigned long irqflags; >>>>> - >>>>> - spin_lock_irqsave(&uv_irq_lock, irqflags); >>>>> - n = uv_irq_root.rb_node; >>>>> - while (n) { >>>>> - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); >>>>> - >>>>> - if (e->irq == irq) { >>>>> - *offset = e->offset; >>>>> - *pnode = e->pnode; >>>>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags); >>>>> - return 0; >>>>> - } >>>>> - >>>>> - if (irq < e->irq) >>>>> - n = n->rb_left; >>>>> - else >>>>> - n = n->rb_right; >>>>> - } >>>>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags); >>>>> - return -1; >>>>> + struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); >>>>> + >>>>> + BUG_ON(nr_irqs != 1); >>>>> + kfree(irq_data->chip_data); >>>>> + irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT); >>>>> + irq_clear_status_flags(virq, IRQ_NO_BALANCING); >>>>> + irq_domain_free_irqs_top(domain, virq, nr_irqs); >>>>> } >>>>> >>>>> /* >>>>> * Re-target the irq to the specified CPU and enable the specified MMR located >>>>> * on the specified blade to allow the sending of MSIs to the specified CPU. >>>>> */ >>>>> -static int >>>>> -arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, >>>>> - unsigned long mmr_offset, int limit) >>>>> +static void uv_domain_activate(struct irq_domain *domain, >>>>> + struct irq_data *irq_data) >>>>> { >>>>> - struct irq_cfg *cfg = irq_cfg(irq); >>>>> - unsigned long mmr_value; >>>>> - struct uv_IO_APIC_route_entry *entry; >>>>> - int mmr_pnode; >>>>> - >>>>> - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != >>>>> - sizeof(unsigned long)); >>>>> - >>>>> - if (limit == UV_AFFINITY_CPU) >>>>> - irq_set_status_flags(irq, IRQ_NO_BALANCING); >>>>> - else >>>>> - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); >>>>> - >>>>> - irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, >>>>> - irq_name); >>>>> - >>>>> - mmr_value = 0; >>>>> - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; >>>>> - entry->vector = cfg->vector; >>>>> - entry->delivery_mode = apic->irq_delivery_mode; >>>>> - entry->dest_mode = apic->irq_dest_mode; >>>>> - entry->polarity = 0; >>>>> - entry->trigger = 0; >>>>> - entry->mask = 0; >>>>> - entry->dest = cfg->dest_apicid; >>>>> - >>>>> - mmr_pnode = uv_blade_to_pnode(mmr_blade); >>>>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); >>>>> - >>>>> - if (cfg->move_in_progress) >>>>> - send_cleanup_vector(cfg); >>>>> - >>>>> - return irq; >>>>> + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); >>>>> } >>>>> >>>>> /* >>>>> * Disable the specified MMR located on the specified blade so that MSIs are >>>>> * longer allowed to be sent. >>>>> */ >>>>> -static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) >>>>> +static void uv_domain_deactivate(struct irq_domain *domain, >>>>> + struct irq_data *irq_data) >>>>> { >>>>> unsigned long mmr_value; >>>>> struct uv_IO_APIC_route_entry *entry; >>>>> >>>>> - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != >>>>> - sizeof(unsigned long)); >>>>> - >>>>> mmr_value = 0; >>>>> entry = (struct uv_IO_APIC_route_entry *)&mmr_value; >>>>> entry->mask = 1; >>>>> - >>>>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); >>>>> + uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); >>>>> } >>>>> >>>>> -static int >>>>> -uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, >>>>> - bool force) >>>>> -{ >>>>> - struct irq_cfg *cfg = irqd_cfg(data); >>>>> - unsigned int dest; >>>>> - unsigned long mmr_value, mmr_offset; >>>>> - struct uv_IO_APIC_route_entry *entry; >>>>> - int mmr_pnode; >>>>> - >>>>> - if (apic_set_affinity(data, mask, &dest)) >>>>> - return -1; >>>>> - >>>>> - mmr_value = 0; >>>>> - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; >>>>> - >>>>> - entry->vector = cfg->vector; >>>>> - entry->delivery_mode = apic->irq_delivery_mode; >>>>> - entry->dest_mode = apic->irq_dest_mode; >>>>> - entry->polarity = 0; >>>>> - entry->trigger = 0; >>>>> - entry->mask = 0; >>>>> - entry->dest = dest; >>>>> - >>>>> - /* Get previously stored MMR and pnode of hub sourcing interrupts */ >>>>> - if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) >>>>> - return -1; >>>>> - >>>>> - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); >>>>> +static struct irq_domain_ops uv_domain_ops = { >>>>> + .alloc = uv_domain_alloc, >>>>> + .free = uv_domain_free, >>>>> + .activate = uv_domain_activate, >>>>> + .deactivate = uv_domain_deactivate, >>>>> +}; >>>>> >>>>> - if (cfg->move_in_progress) >>>>> - send_cleanup_vector(cfg); >>>>> +static struct irq_domain *uv_get_irq_domain(void) >>>>> +{ >>>>> + static struct irq_domain *uv_domain; >>>>> + static DEFINE_MUTEX(uv_lock); >>>>> + >>>>> + mutex_lock(&uv_lock); >>>>> + if (uv_domain == NULL) { >>>>> + uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL); >>>>> + if (uv_domain) >>>>> + uv_domain->parent = x86_vector_domain; >>>>> + } >>>>> + mutex_unlock(&uv_lock); >>>>> >>>>> - return IRQ_SET_MASK_OK_NOCOPY; >>>>> + return uv_domain; >>>>> } >>>>> >>>>> /* >>>>> @@ -229,23 +181,20 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, >>>>> int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, >>>>> unsigned long mmr_offset, int limit) >>>>> { >>>>> - int ret, irq; >>>>> struct irq_alloc_info info; >>>>> + struct irq_domain *domain = uv_get_irq_domain(); >>>>> + >>>>> + if (!domain) >>>>> + return -ENOMEM; >>>>> >>>>> init_irq_alloc_info(&info, cpumask_of(cpu)); >>>>> - irq = irq_domain_alloc_irqs(NULL, 1, uv_blade_to_memory_nid(mmr_blade), >>>>> - &info); >>>>> - if (irq <= 0) >>>>> - return -EBUSY; >>>>> - >>>>> - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, >>>>> - limit); >>>>> - if (ret == irq) >>>>> - uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); >>>>> - else >>>>> - irq_domain_free_irqs(irq, 1); >>>>> + info.uv_limit = limit; >>>>> + info.uv_blade = mmr_blade; >>>>> + info.uv_offset = mmr_offset; >>>>> + info.uv_name = irq_name; >>>>> >>>>> - return ret; >>>>> + return irq_domain_alloc_irqs(domain, 1, >>>>> + uv_blade_to_memory_nid(mmr_blade), &info); >>>>> } >>>>> EXPORT_SYMBOL_GPL(uv_setup_irq); >>>>> >>>>> @@ -258,26 +207,6 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); >>>>> */ >>>>> void uv_teardown_irq(unsigned int irq) >>>>> { >>>>> - struct uv_irq_2_mmr_pnode *e; >>>>> - struct rb_node *n; >>>>> - unsigned long irqflags; >>>>> - >>>>> - spin_lock_irqsave(&uv_irq_lock, irqflags); >>>>> - n = uv_irq_root.rb_node; >>>>> - while (n) { >>>>> - e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); >>>>> - if (e->irq == irq) { >>>>> - arch_disable_uv_irq(e->pnode, e->offset); >>>>> - rb_erase(n, &uv_irq_root); >>>>> - kfree(e); >>>>> - break; >>>>> - } >>>>> - if (irq < e->irq) >>>>> - n = n->rb_left; >>>>> - else >>>>> - n = n->rb_right; >>>>> - } >>>>> - spin_unlock_irqrestore(&uv_irq_lock, irqflags); >>>>> irq_domain_free_irqs(irq, 1); >>>>> } >>>>> EXPORT_SYMBOL_GPL(uv_teardown_irq); >>>>> -- >>>>> 1.7.10.4 >>>>> >>>>> -- >>>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >>>>> the body of a message to majordomo@xxxxxxxxxxxxxxx >>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html >>>>> Please read the FAQ at http://www.tux.org/lkml/ >>> -- >>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >>> the body of a message to majordomo@xxxxxxxxxxxxxxx >>> More majordomo info at http://vger.kernel.org/majordomo-info.html >>> Please read the FAQ at http://www.tux.org/lkml/ >>> > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html