On Wed, Dec 19, 2018 at 4:23 PM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote: > > On Tue, 4 Dec 2018, Dou Liyang wrote: > > > Now, Spreading the interrupt affinity info by a cpumask pointer is not > > enough, meets a problem[1] and hard to expand in the future. > > > > Fix it by: > > > > +-----------------------------------+ > > | | > > | struct cpumask *affinity | > > | | > > +-----------------------------------+ > > | > > +------------------v-------------------+ > > | | > > | struct irq_affinity_desc { | > > | struct cpumask mask; | > > | unsigned int is_managed : 1; | > > | }; | > > | | > > +--------------------------------------+ > > > > So, I've applied that lot for 4.21 (or whatever number it will be). That's > only the first step for solving Kashyap's problem. > > IIRC, then Kashap wanted to get initial interrupt spreading for these extra > magic interrupts as well, but not have them marked managed. > > That's trivial to do now with the two queued changes in that area: > > - The rework above > > - The support for interrupt sets from Jens > > Just adding a small bitfield to struct irq_affinity which allows to tell > the core that a particular interrupt set is not managed does the trick. > > Untested patch below. > > Kashyap, is that what you were looking for and if so, does it work? Thomas, We could not test these patches as they did net get applied to latest linux-block tree cleanly. Our requirement is: 1. extra interrupts should be un-managed and 2. should be spread to CPUs of local NUMA node. If interrupts are un-managed but not spread as per our requirement, then still driver/userspace apps can manage by spreading them as required by calling API- irq_set_affinity_hint(). Thanks, Sumit > > Thanks, > > tglx > > 8<----------------- > > Subject: genirq/affinity: Add support for non-managed affinity sets > From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > Date: Tue, 18 Dec 2018 16:46:47 +0100 > > Some drivers need an extra set of interrupts which are not marked managed, > but should get initial interrupt spreading. > > Add a bitmap to struct irq_affinity which allows the driver to mark a > particular set of interrupts as non managed. Check the bitmap during > spreading and use the result to mark the interrupts in the sets > accordingly. > > The unmanaged interrupts get initial spreading, but user space can change > their affinity later on. > > Usage example: > > struct irq_affinity affd = { .pre_vectors = 2 }; > int sets[2]; > > /* Fill in sets[] */ > > affd.nr_sets = 2; > affd.sets = &sets; > affd.unmanaged_sets = 0x02; > > ...... > > So both sets are properly spread out, but the second set is not marked > managed. > > Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > --- > include/linux/interrupt.h | 10 ++++++---- > kernel/irq/affinity.c | 24 ++++++++++++++---------- > 2 files changed, 20 insertions(+), 14 deletions(-) > > --- a/kernel/irq/affinity.c > +++ b/kernel/irq/affinity.c > @@ -99,7 +99,8 @@ static int __irq_build_affinity_masks(co > cpumask_var_t *node_to_cpumask, > const struct cpumask *cpu_mask, > struct cpumask *nmsk, > - struct irq_affinity_desc *masks) > + struct irq_affinity_desc *masks, > + bool managed) > { > int n, nodes, cpus_per_vec, extra_vecs, done = 0; > int last_affv = firstvec + numvecs; > @@ -154,6 +155,7 @@ static int __irq_build_affinity_masks(co > } > irq_spread_init_one(&masks[curvec].mask, nmsk, > cpus_per_vec); > + masks[curvec].is_managed = managed; > } > > done += v; > @@ -176,7 +178,8 @@ static int __irq_build_affinity_masks(co > static int irq_build_affinity_masks(const struct irq_affinity *affd, > int startvec, int numvecs, int firstvec, > cpumask_var_t *node_to_cpumask, > - struct irq_affinity_desc *masks) > + struct irq_affinity_desc *masks, > + bool managed) > { > int curvec = startvec, nr_present, nr_others; > int ret = -ENOMEM; > @@ -196,7 +199,8 @@ static int irq_build_affinity_masks(cons > /* Spread on present CPUs starting from affd->pre_vectors */ > nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, > firstvec, node_to_cpumask, > - cpu_present_mask, nmsk, masks); > + cpu_present_mask, nmsk, masks, > + managed); > > /* > * Spread on non present CPUs starting from the next vector to be > @@ -211,7 +215,7 @@ static int irq_build_affinity_masks(cons > cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); > nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, > firstvec, node_to_cpumask, > - npresmsk, nmsk, masks); > + npresmsk, nmsk, masks, managed); > put_online_cpus(); > > if (nr_present < numvecs) > @@ -268,10 +272,11 @@ irq_create_affinity_masks(int nvecs, con > > for (i = 0, usedvecs = 0; i < nr_sets; i++) { > int this_vecs = affd->sets ? affd->sets[i] : affvecs; > + bool managed = !test_bit(i, &affd->unmanaged_sets); > int ret; > > - ret = irq_build_affinity_masks(affd, curvec, this_vecs, > - curvec, node_to_cpumask, masks); > + ret = irq_build_affinity_masks(affd, curvec, this_vecs, curvec, > + node_to_cpumask, masks, managed); > if (ret) { > kfree(masks); > masks = NULL; > @@ -289,10 +294,6 @@ irq_create_affinity_masks(int nvecs, con > for (; curvec < nvecs; curvec++) > cpumask_copy(&masks[curvec].mask, irq_default_affinity); > > - /* Mark the managed interrupts */ > - for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) > - masks[i].is_managed = 1; > - > outnodemsk: > free_node_to_cpumask(node_to_cpumask); > return masks; > @@ -316,6 +317,9 @@ int irq_calc_affinity_vectors(int minvec > if (affd->nr_sets) { > int i; > > + if (WARN_ON_ONCE(affd->nr_sets > BITS_PER_LONG)) > + return 0; > + > for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) > set_vecs += affd->sets[i]; > } else { > --- a/include/linux/interrupt.h > +++ b/include/linux/interrupt.h > @@ -249,12 +249,14 @@ struct irq_affinity_notify { > * the MSI(-X) vector space > * @nr_sets: Length of passed in *sets array > * @sets: Number of affinitized sets > + * @unmanaged_sets: Bitmap to mark members of @sets as unmanaged > */ > struct irq_affinity { > - int pre_vectors; > - int post_vectors; > - int nr_sets; > - int *sets; > + int pre_vectors; > + int post_vectors; > + int nr_sets; > + int *sets; > + unsigned long unmanaged_sets; > }; > > /**