On Tue, Jan 14, 2020 at 12:38:19PM +0000, Will Deacon wrote: > > +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, > > + int ssid, bool leaf) > > +{ > > + size_t i; > > + unsigned long flags; > > + struct arm_smmu_master *master; > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > + struct arm_smmu_cmdq_ent cmd = { > > + .opcode = CMDQ_OP_CFGI_CD, > > + .cfgi = { > > + .ssid = ssid, > > + .leaf = leaf, > > + }, > > + }; > > + > > + spin_lock_irqsave(&smmu_domain->devices_lock, flags); > > + list_for_each_entry(master, &smmu_domain->devices, domain_head) { > > + for (i = 0; i < master->num_sids; i++) { > > + cmd.cfgi.sid = master->sids[i]; > > + arm_smmu_cmdq_issue_cmd(smmu, &cmd); > > + } > > + } > > + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > > + > > + arm_smmu_cmdq_issue_sync(smmu); > > Can you send a follow-up patch converting this to batch submission, please? Ok > > +} > > + > > static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, > > struct arm_smmu_cd_table *table, > > size_t num_entries) > > @@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) > > return val; > > } > > > > -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, > > - struct arm_smmu_s1_cfg *cfg) > > +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, > > + int ssid, struct arm_smmu_ctx_desc *cd) > > { > > - u64 val; > > - __le64 *cdptr = cfg->table.ptr; > > - > > /* > > - * We don't need to issue any invalidation here, as we'll invalidate > > - * the STE when installing the new entry anyway. > > + * This function handles the following cases: > > + * > > + * (1) Install primary CD, for normal DMA traffic (SSID = 0). > > + * (2) Install a secondary CD, for SID+SSID traffic. > > + * (3) Update ASID of a CD. Atomically write the first 64 bits of the > > + * CD, then invalidate the old entry and mappings. > > + * (4) Remove a secondary CD. > > */ > > - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | > > -#ifdef __BIG_ENDIAN > > - CTXDESC_CD_0_ENDI | > > -#endif > > - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > > - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | > > - CTXDESC_CD_0_V; > > + u64 val; > > + bool cd_live; > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > + __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid * > > + CTXDESC_CD_DWORDS; > > > > - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > > - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > > - val |= CTXDESC_CD_0_S; > > + val = le64_to_cpu(cdptr[0]); > > + cd_live = !!(val & CTXDESC_CD_0_V); > > > > - cdptr[0] = cpu_to_le64(val); > > + if (!cd) { /* (4) */ > > + val = 0; > > + } else if (cd_live) { /* (3) */ > > + val &= ~CTXDESC_CD_0_ASID; > > + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); > > + /* > > + * Until CD+TLB invalidation, both ASIDs may be used for tagging > > + * this substream's traffic > > + */ > > I don't think you need to change anything here, but I do find it a little > scary that we can modify live CDs like this. However, given that the > hardware is permitted to cache the structures regardless of validity, it > appears to be the only option. Terrifying! > > > + } else { /* (1) and (2) */ > > + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); > > Can you use FIELD_PREP here too? No, FIELD_PREP will shift ttbr left by 4 bits > > + cdptr[2] = 0; > > + cdptr[3] = cpu_to_le64(cd->mair); > > + > > + /* > > + * STE is live, and the SMMU might read dwords of this CD in any > > + * order. Ensure that it observes valid values before reading > > + * V=1. > > + */ > > + arm_smmu_sync_cd(smmu_domain, ssid, true); > > > > - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; > > - cdptr[1] = cpu_to_le64(val); > > + val = arm_smmu_cpu_tcr_to_cd(cd->tcr) | > > +#ifdef __BIG_ENDIAN > > + CTXDESC_CD_0_ENDI | > > +#endif > > + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > > + CTXDESC_CD_0_AA64 | > > + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | > > + CTXDESC_CD_0_V; > > > > - cdptr[3] = cpu_to_le64(cfg->cd.mair); > > + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > > + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > > + val |= CTXDESC_CD_0_S; > > + } > > + > > + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); > > Can you add a comment here citing 3.21.3 ("Configuration structures and > configuration invalidation completion") please? Specifically, the note that > states: > > | The size of single-copy atomic reads made by the SMMU is IMPLEMENTATION > | DEFINED but must be at least 64 bits. > > Because that's really crucial to the WRITE_ONCE() above! > > Shouldn't we also do the same thing for the STE side of things? I think so, > and you can just comment of them with the quote and cite the comment from > the other callsite. Yes, makes sense Thanks, Jean