On 25/09/2019 14:04, Zenghui Yu wrote: > Hi Marc, > > Some questions about this patch, mostly to confirm that I would > understand things here correctly. > > On 2019/9/24 2:25, Marc Zyngier wrote: >> GICv4.1 defines a new VPE table that is potentially shared between >> both the ITSs and the redistributors, following complicated affinity >> rules. >> >> To make things more confusing, the programming of this table at >> the redistributor level is reusing the GICv4.0 GICR_VPROPBASER register >> for something completely different. >> >> The code flow is somewhat complexified by the need to respect the >> affinities required by the HW, meaning that tables can either be >> inherited from a previously discovered ITS or redistributor. >> >> Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> >> --- > > [...] > >> @@ -1962,6 +1965,65 @@ static bool its_parse_indirect_baser(struct its_node *its, >> return indirect; >> } >> >> +static u32 compute_common_aff(u64 val) >> +{ >> + u32 aff, clpiaff; >> + >> + aff = FIELD_GET(GICR_TYPER_AFFINITY, val); >> + clpiaff = FIELD_GET(GICR_TYPER_COMMON_LPI_AFF, val); >> + >> + return aff & ~(GENMASK(31, 0) >> (clpiaff * 8)); >> +} >> + >> +static u32 compute_its_aff(struct its_node *its) >> +{ >> + u64 val; >> + u32 svpet; >> + >> + /* >> + * Reencode the ITS SVPET and MPIDR as a GICR_TYPER, and compute >> + * the resulting affinity. We then use that to see if this match >> + * our own affinity. >> + */ >> + svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer); >> + val = FIELD_PREP(GICR_TYPER_COMMON_LPI_AFF, svpet); >> + val |= FIELD_PREP(GICR_TYPER_AFFINITY, its->mpidr); >> + return compute_common_aff(val); >> +} >> + >> +static struct its_node *find_sibbling_its(struct its_node *cur_its) >> +{ >> + struct its_node *its; >> + u32 aff; >> + >> + if (!FIELD_GET(GITS_TYPER_SVPET, cur_its->typer)) >> + return NULL; >> + >> + aff = compute_its_aff(cur_its); >> + >> + list_for_each_entry(its, &its_nodes, entry) { >> + u64 baser; >> + >> + if (!is_v4_1(its) || its == cur_its) >> + continue; >> + >> + if (!FIELD_GET(GITS_TYPER_SVPET, its->typer)) >> + continue; >> + >> + if (aff != compute_its_aff(its)) >> + continue; >> + >> + /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */ >> + baser = its->tables[2].val; >> + if (!(baser & GITS_BASER_VALID)) >> + continue; >> + >> + return its; >> + } >> + >> + return NULL; >> +} >> + >> static void its_free_tables(struct its_node *its) >> { >> int i; >> @@ -2004,6 +2066,15 @@ static int its_alloc_tables(struct its_node *its) >> break; >> >> case GITS_BASER_TYPE_VCPU: >> + if (is_v4_1(its)) { >> + struct its_node *sibbling; >> + >> + if ((sibbling = find_sibbling_its(its))) { >> + its->tables[2] = sibbling->tables[2]; > > This means thst the vPE table is shared between ITSs which are under the > same affinity group? That's indeed what this is trying to do... > Don't we need to set GITS_BASER (by its_setup_baser()) here to tell this > ITS where the vPE table lacates? Absolutely. This is a bug. I didn't spot it as my model only has a single ITS. > >> + continue; >> + } >> + } >> + >> indirect = its_parse_indirect_baser(its, baser, >> psz, &order, >> ITS_MAX_VPEID_BITS); >> @@ -2025,6 +2096,212 @@ static int its_alloc_tables(struct its_node *its) >> return 0; >> } >> >> +static u64 inherit_vpe_l1_table_from_its(void) >> +{ >> + struct its_node *its; >> + u64 val; >> + u32 aff; >> + >> + val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); >> + aff = compute_common_aff(val); >> + >> + list_for_each_entry(its, &its_nodes, entry) { >> + u64 baser; >> + >> + if (!is_v4_1(its)) >> + continue; >> + >> + if (!FIELD_GET(GITS_TYPER_SVPET, its->typer)) >> + continue; >> + >> + if (aff != compute_its_aff(its)) >> + continue; >> + >> + /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */ >> + baser = its->tables[2].val; >> + if (!(baser & GITS_BASER_VALID)) >> + continue; >> + >> + /* We have a winner! */ >> + val = GICR_VPROPBASER_4_1_VALID; >> + if (baser & GITS_BASER_INDIRECT) >> + val |= GICR_VPROPBASER_4_1_INDIRECT; >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, >> + FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)); >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, >> + GITS_BASER_ADDR_48_to_52(baser) >> 12); > > I remember the spec says, > GITS_BASER2 "points to" the ITS *vPE table*, which provides mappings > from the vPEID to target Redistributor and associated vpt address. > In GICv4.1 GICR_VPROPBASER "points to" the *vPE Configuration table*, > which stores the locations of each vPE's LPI configuration and pending > table. > > And the code here says, if we can find an ITS (which is under the same > CommonLPIAff group with this Redistributor) has already been probed and > allocated an vPE table, then use this vPE table as this Redist's vPE > Configuration table. > So I infer that in GICv4.1, *vPE table* and *vPE Configuration table* > are actually the same concept? And this table is shared between ITSs > and Redists which are under the same affinity group? > Please fix me if I have any misunderstanding. Indeed. The whole idea is that ITSs and RDs can share a vPE table if they are in the same affinity group (such as a socket, for example). This is what is missing from v4.0 where the ITS knows about vPEs, but doesn't know about residency. With that in place, the HW is able to do a lot more for us. > >> + val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, >> + FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); >> + val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, >> + FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1); >> + >> + return val; >> + } >> + >> + return 0; >> +} >> + >> +static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask) >> +{ >> + u32 aff; >> + u64 val; >> + int cpu; >> + >> + val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); >> + aff = compute_common_aff(val); >> + >> + for_each_possible_cpu(cpu) { >> + void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base; >> + u32 tmp; >> + >> + if (!base || cpu == smp_processor_id()) >> + continue; >> + >> + val = gic_read_typer(base + GICR_TYPER); >> + tmp = compute_common_aff(val); >> + if (tmp != aff) >> + continue; >> + >> + /* >> + * At this point, we have a victim. This particular CPU >> + * has already booted, and has an affinity that matches >> + * ours wrt CommonLPIAff. Let's use its own VPROPBASER. >> + * Make sure we don't write the Z bit in that case. >> + */ >> + val = gits_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); >> + val &= ~GICR_VPROPBASER_4_1_Z; >> + >> + *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask; >> + >> + return val; >> + } >> + >> + return 0; >> +} >> + >> +static int allocate_vpe_l1_table(void) >> +{ >> + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); >> + u64 val, esz, gpsz, npg, pa; >> + unsigned int psz = SZ_64K; >> + unsigned int np, epp; >> + struct page *page; >> + >> + if (!gic_rdists->has_rvpeid) >> + return 0; >> + >> + /* >> + * if VPENDBASER.Valid is set, disable any previously programmed >> + * VPE by setting PendingLast while clearing Valid. This has the >> + * effect of making sure no doorbell will be generated and we can >> + * then safely clear VPROPBASER.Valid. >> + */ >> + if (gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid) >> + gits_write_vpendbaser(GICR_VPENDBASER_PendingLast, >> + vlpi_base + GICR_VPENDBASER); >> + >> + /* >> + * If we can inherit the configuration from another RD, let's do >> + * so. Otherwise, we have to go through the allocation process. We >> + * assume that all RDs have the exact same requirements, as >> + * nothing will work otherwise. >> + */ >> + val = inherit_vpe_l1_table_from_rd(&gic_data_rdist()->vpe_table_mask); >> + if (val & GICR_VPROPBASER_4_1_VALID) >> + goto out; >> + >> + gic_data_rdist()->vpe_table_mask = kzalloc(sizeof(cpumask_t), GFP_KERNEL); > > I think we should check the allocation failure here. Sure. > >> + >> + val = inherit_vpe_l1_table_from_its(); >> + if (val & GICR_VPROPBASER_4_1_VALID) >> + goto out; >> + >> + /* First probe the page size */ >> + val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K); >> + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); >> + val = gits_read_vpropbaser(vlpi_base + GICR_VPROPBASER); >> + gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val); >> + >> + switch (gpsz) { >> + default: >> + gpsz = GIC_PAGE_SIZE_4K; >> + /* fall through */ >> + case GIC_PAGE_SIZE_4K: >> + psz = SZ_4K; >> + break; >> + case GIC_PAGE_SIZE_16K: >> + psz = SZ_16K; >> + break; >> + case GIC_PAGE_SIZE_64K: >> + psz = SZ_64K; >> + break; >> + } >> + >> + /* >> + * Start populating the register from scratch, including RO fields >> + * (which we want to print in debug cases...) >> + */ >> + val = 0; >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, gpsz); >> + esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val); >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ENTRY_SIZE, esz); > > 'esz' seems always be 0 here? Yup, that's broken. Not sure what I had in mind here. esz (element size) should be extracted before resetting val, and ORed back in the register copy for debug purposes. > >> + >> + /* How many entries per GIC page? */ >> + esz++; >> + epp = psz / (esz * SZ_8); >> + >> + /* >> + * If we need more than just a single L1 page, flag the table >> + * as indirect and compute the number of required L1 pages. >> + */ >> + if (epp < ITS_MAX_VPEID) { >> + int nl2; >> + >> + gic_rdists->flags |= RDIST_FLAGS_VPE_INDIRECT; >> + val |= GICR_VPROPBASER_4_1_INDIRECT; >> + >> + /* Number of L2 pages required to cover the VPEID space */ >> + nl2 = DIV_ROUND_UP(ITS_MAX_VPEID, epp); >> + >> + /* Number of L1 pages to point to the L2 pages */ >> + npg = DIV_ROUND_UP(nl2 * SZ_8, psz); >> + } else { >> + npg = 1; >> + } >> + >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg); >> + >> + /* Right, that's the number of CPU pages we need for L1 */ >> + np = DIV_ROUND_UP(npg * psz, PAGE_SIZE); >> + >> + pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %lld\n", >> + np, npg, psz, epp, esz); >> + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(np * PAGE_SIZE)); >> + if (!page) >> + return -ENOMEM; >> + >> + gic_data_rdist()->vpe_l1_page = page; >> + pa = virt_to_phys(page_address(page)); >> + WARN_ON(!IS_ALIGNED(pa, psz)); >> + >> + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12); >> + val |= GICR_VPROPBASER_RaWb; >> + val |= GICR_VPROPBASER_InnerShareable; >> + val |= GICR_VPROPBASER_4_1_Z; >> + val |= GICR_VPROPBASER_4_1_VALID; >> + >> +out: >> + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); >> + cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask); >> + >> + pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n", >> + smp_processor_id(), val, >> + cpumask_pr_args(gic_data_rdist()->vpe_table_mask)); >> + >> + return 0; >> +} >> + >> static int its_alloc_collections(struct its_node *its) >> { >> int i; >> @@ -2224,7 +2501,7 @@ static void its_cpu_init_lpis(void) >> val |= GICR_CTLR_ENABLE_LPIS; >> writel_relaxed(val, rbase + GICR_CTLR); >> >> - if (gic_rdists->has_vlpis) { >> + if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { >> void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); >> >> /* >> @@ -2248,6 +2525,16 @@ static void its_cpu_init_lpis(void) >> WARN_ON(val & GICR_VPENDBASER_Dirty); >> } >> >> + if (allocate_vpe_l1_table()) { >> + /* >> + * If the allocation has failed, we're in massive trouble. >> + * Disable direct injection, and pray that no VM was >> + * already running... >> + */ >> + gic_rdists->has_rvpeid = false; >> + gic_rdists->has_vlpis = false; >> + } >> + >> /* Make sure the GIC has seen the above */ >> dsb(sy); >> out: Thanks for having had a look! M. -- Jazz is not dead, it just smells funny... _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm