pnv_tce() returns a pointer to a TCE entry and originally a TCE table would be pre-allocated. For the default case of 2GB window the table needs only a single level and that is fine. However if more levels are requested, it is possible to get a race when 2 threads want a pointer to a TCE entry from the same page of TCEs. This adds a spinlock to handle the race. The alloc==true case is not possible in the real mode so spinlock is safe for KVM as well. CC: stable@xxxxxxxxxxxxxxx # v4.19+ Fixes: a68bd1267b72 ("powerpc/powernv/ioda: Allocate indirect TCE levels on demand") Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> --- This fixes EEH's from https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=110810 --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/platforms/powernv/pci-ioda-tce.c | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 2c1845e5e851..1825b4cc0097 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -111,6 +111,7 @@ struct iommu_table { struct iommu_table_ops *it_ops; struct kref it_kref; int it_nid; + spinlock_t it_lock; }; #define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \ diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index e28f03e1eb5e..9a19d61e2b12 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -29,6 +29,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_size = tce_size >> 3; tbl->it_busno = 0; tbl->it_type = TCE_PCI; + spin_lock_init(&tbl->it_lock); } static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) @@ -60,18 +61,22 @@ static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) unsigned long tce; if (tmp[n] == 0) { - __be64 *tmp2; - if (!alloc) return NULL; - tmp2 = pnv_alloc_tce_level(tbl->it_nid, - ilog2(tbl->it_level_size) + 3); - if (!tmp2) - return NULL; + spin_lock(&tbl->it_lock); + if (tmp[n] == 0) { + __be64 *tmp2; - tmp[n] = cpu_to_be64(__pa(tmp2) | - TCE_PCI_READ | TCE_PCI_WRITE); + tmp2 = pnv_alloc_tce_level(tbl->it_nid, + ilog2(tbl->it_level_size) + 3); + if (tmp2) + tmp[n] = cpu_to_be64(__pa(tmp2) | + TCE_PCI_READ | TCE_PCI_WRITE); + } + spin_unlock(&tbl->it_lock); + if (tmp[n] == 0) + return NULL; } tce = be64_to_cpu(tmp[n]); -- 2.17.1