On Tue, Aug 13, 2024 at 9:39 AM Rahul Jain <Rahul.Jain@xxxxxxx> wrote: > > when trying to enable p2p the amdgpu_device_is_peer_accessible() > checks the condition where address_mask overlaps the aper_base > and hence returns 0, due to which the p2p disables for this platform > > IOMMU should remap the BAR addresses so the device can access > them. Hence check if iommu_remap and return true. > > Signed-off-by: Rahul Jain <Rahul.Jain@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 +++++++++++++++++----- > 2 files changed, 26 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index f3980b40f2ce..618b44f5df85 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1138,6 +1138,8 @@ struct amdgpu_device { > > bool ram_is_direct_mapped; > > + bool iommu_remap; > + > struct list_head ras_list; > > struct ip_discovery_top *ip_top; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index a6b8d0ba4758..927b076aa952 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -3950,6 +3950,9 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) We may want to rename amdgpu_device_check_iommu_direct_map() to amdgpu_device_check_iommu() since we are getting several pieces of data now. > domain = iommu_get_domain_for_dev(adev->dev); > if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) > adev->ram_is_direct_mapped = true; > + > + if (!domain || domain->type != IOMMU_DOMAIN_IDENTITY) > + adev->iommu_remap = true; This is wrong, it should be: if (domain && (domain->type == IOMMU_DOMAIN_DMA || domain->type == IOMMU_DOMAIN_DMA_FQ)) > } > > static const struct attribute *amdgpu_dev_attributes[] = { > @@ -6127,6 +6130,19 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, > struct amdgpu_device *peer_adev) > { > #ifdef CONFIG_HSA_AMD_P2P > + /** > + * There are chances when we are in amdgpu_device_init > + * and either of adev have not called amdgpu_device_check_iommu_direct_map, > + * Hence check if iommu_remap is available or not. > + */ > + if (!adev->iommu_remap) > + amdgpu_device_check_iommu_direct_map(adev); > + if (!peer_adev->iommu_remap) > + amdgpu_device_check_iommu_direct_map(peer_adev); This is ugly. We should fix this up so we guarantee that amdgpu_device_check_iommu_direct_map() has been called by this point or just check this explicitly everywhere rather than caching it in the adev structure. > + > + bool remap = adev->iommu_remap; > + bool peer_remap = peer_adev->iommu_remap; Need to declare variables before code. Also this function looks at whether peer_adev can access adev via DMA, so we only need to look at peer_adev for DMA remapping. > + > uint64_t address_mask = peer_adev->dev->dma_mask ? > ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); > resource_size_t aper_limit = > @@ -6135,13 +6151,15 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, > !adev->gmc.xgmi.connected_to_cpu && > !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); > > - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && > - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && > - !(adev->gmc.aper_base & address_mask || > - aper_limit & address_mask)); > -#else > - return false; > + if (remap && peer_remap) > + return pcie_p2p && p2p_access; > + else > + return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && > + adev->gmc.real_vram_size == adev->gmc.visible_vram_size && > + !(adev->gmc.aper_base & address_mask || > + aper_limit & address_mask)); > #endif > + return false; This needs to be fixed up: if (peer_remap) /* IOMMU is remapping DMA for peer_adev so all accesses should be within peer_adev's DMA mask */ return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size); else /* No IOMMU remapping so make sure the adev's aperture fits into peer_adev's dma mask */ return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && !(adev->gmc.aper_base & address_mask || aper_limit & address_mask)); Alex > } > > int amdgpu_device_baco_enter(struct drm_device *dev) > -- > 2.34.1 >