Re: [PATCH] drm/ttm: use dma_alloc_pages for the page pool

Christian König <christian.koenig@xxxxxxx> · Tue, 11 May 2021 09:35:20 +0200

Am 11.05.21 um 08:05 schrieb Christoph Hellwig:
Use the dma_alloc_pages allocator for the TTM pool allocator.
This allocator is a front end to the page allocator which takes the
DMA mask of the device into account, thus offering the best of both
worlds of the two existing allocator versions.  This conversion also
removes the ugly layering violation where the TTM pool assumes what
kind of virtual address dma_alloc_attrs can return.

Oh yes please. But please split that up into multiple patches.

At least one switching from dma_alloc_attrs() to dma_alloc_pages() 
separately.

We certainly going to need the drm_need_swiotlb() for userptr support 
(unless we add some approach for drivers to opt out of swiotlb).

Then while I really want to get rid of GFP_DMA32 as well I'm not 100% 
sure if we can handle this without the flag.

And last we need something better to store the DMA address and order 
than allocating a separate memory object for each page.

Christian.


Signed-off-by: Christoph Hellwig <hch@xxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   4 +-
  drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |   1 -
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |   1 -
  drivers/gpu/drm/drm_cache.c             |  31 -----
  drivers/gpu/drm/drm_gem_vram_helper.c   |   3 +-
  drivers/gpu/drm/nouveau/nouveau_ttm.c   |   8 +-
  drivers/gpu/drm/qxl/qxl_ttm.c           |   3 +-
  drivers/gpu/drm/radeon/radeon.h         |   1 -
  drivers/gpu/drm/radeon/radeon_device.c  |   1 -
  drivers/gpu/drm/radeon/radeon_ttm.c     |   4 +-
  drivers/gpu/drm/ttm/ttm_device.c        |   7 +-
  drivers/gpu/drm/ttm/ttm_pool.c          | 178 ++++--------------------
  drivers/gpu/drm/ttm/ttm_tt.c            |  25 +---
  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |   4 +-
  include/drm/drm_cache.h                 |   1 -
  include/drm/ttm/ttm_device.h            |   3 +-
  include/drm/ttm/ttm_pool.h              |   9 +-
  20 files changed, 41 insertions(+), 246 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc3a69296321b3..5f40527eeef1ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -819,7 +819,6 @@ struct amdgpu_device {
  	int				usec_timeout;
  	const struct amdgpu_asic_funcs	*asic_funcs;
  	bool				shutdown;
-	bool				need_swiotlb;
  	bool				accel_working;
  	struct notifier_block		acpi_nb;
  	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3bef0432cac2f7..9bf17b44cba6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1705,9 +1705,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
  	/* No others user of address space so set it to 0 */
  	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
  			       adev_to_drm(adev)->anon_inode->i_mapping,
-			       adev_to_drm(adev)->vma_offset_manager,
-			       adev->need_swiotlb,
-			       dma_addressing_limited(adev->dev));
+			       adev_to_drm(adev)->vma_offset_manager);
  	if (r) {
  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
  		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 405d6ad09022ca..2d4fa754513033 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -846,7 +846,6 @@ static int gmc_v6_0_sw_init(void *handle)
  		dev_warn(adev->dev, "No suitable DMA available.\n");
  		return r;
  	}
-	adev->need_swiotlb = drm_need_swiotlb(44);
  
  	r = gmc_v6_0_init_microcode(adev);
  	if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 210ada2289ec9c..a504db24f4c2a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1025,7 +1025,6 @@ static int gmc_v7_0_sw_init(void *handle)
  		pr_warn("No suitable DMA available\n");
  		return r;
  	}
-	adev->need_swiotlb = drm_need_swiotlb(40);
  
  	r = gmc_v7_0_init_microcode(adev);
  	if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index e4f27b3f28fb58..42e7b1eb84b3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1141,7 +1141,6 @@ static int gmc_v8_0_sw_init(void *handle)
  		pr_warn("No suitable DMA available\n");
  		return r;
  	}
-	adev->need_swiotlb = drm_need_swiotlb(40);
  
  	r = gmc_v8_0_init_microcode(adev);
  	if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 455bb91060d0bc..f74784b3423740 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1548,7 +1548,6 @@ static int gmc_v9_0_sw_init(void *handle)
  		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
  		return r;
  	}
-	adev->need_swiotlb = drm_need_swiotlb(44);
  
  	if (adev->gmc.xgmi.supported) {
  		r = adev->gfxhub.funcs->get_xgmi_info(adev);
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 79a50ef1250fd1..7e6eb4b33d0069 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -178,34 +178,3 @@ drm_clflush_virt_range(void *addr, unsigned long length)
  #endif
  }
  EXPORT_SYMBOL(drm_clflush_virt_range);
-
-bool drm_need_swiotlb(int dma_bits)
-{
-	struct resource *tmp;
-	resource_size_t max_iomem = 0;
-
-	/*
-	 * Xen paravirtual hosts require swiotlb regardless of requested dma
-	 * transfer size.
-	 *
-	 * NOTE: Really, what it requires is use of the dma_alloc_coherent
-	 *       allocator used in ttm_dma_populate() instead of
-	 *       ttm_populate_and_map_pages(), which bounce buffers so much in
-	 *       Xen it leads to swiotlb buffer exhaustion.
-	 */
-	if (xen_pv_domain())
-		return true;
-
-	/*
-	 * Enforce dma_alloc_coherent when memory encryption is active as well
-	 * for the same reasons as for Xen paravirtual hosts.
-	 */
-	if (mem_encrypt_active())
-		return true;
-
-	for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
-		max_iomem = max(max_iomem,  tmp->end);
-
-	return max_iomem > ((u64)1 << dma_bits);
-}
-EXPORT_SYMBOL(drm_need_swiotlb);
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 2b7c3a07956d59..6ce93d2d84d0e9 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -1034,8 +1034,7 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
  
  	ret = ttm_device_init(&vmm->bdev, &bo_driver, dev->dev,
  				 dev->anon_inode->i_mapping,
-				 dev->vma_offset_manager,
-				 false, true);
+				 dev->vma_offset_manager);
  	if (ret)
  		return ret;
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index e8b506a6685b56..4876a637933980 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -285,7 +285,6 @@ nouveau_ttm_init(struct nouveau_drm *drm)
  	struct nvkm_pci *pci = device->pci;
  	struct nvif_mmu *mmu = &drm->client.mmu;
  	struct drm_device *dev = drm->dev;
-	bool need_swiotlb = false;
  	int typei, ret;
  
  	ret = nouveau_ttm_init_host(drm, 0);
@@ -320,14 +319,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)
  		drm->agp.cma = pci->agp.cma;
  	}
  
-#if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
-	need_swiotlb = is_swiotlb_active();
-#endif
-
  	ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
  				  dev->anon_inode->i_mapping,
-				  dev->vma_offset_manager, need_swiotlb,
-				  drm->client.mmu.dmabits <= 32);
+				  dev->vma_offset_manager);
  	if (ret) {
  		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
  		return ret;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 47afe95d04a1a7..823eb562ba3d53 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -186,8 +186,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
  	/* No others user of address space so set it to 0 */
  	r = ttm_device_init(&qdev->mman.bdev, &qxl_bo_driver, NULL,
  			    qdev->ddev.anon_inode->i_mapping,
-			    qdev->ddev.vma_offset_manager,
-			    false, false);
+			    qdev->ddev.vma_offset_manager);
  	if (r) {
  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
  		return r;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 42281fce552e6e..7d41302c55886d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2379,7 +2379,6 @@ struct radeon_device {
  	struct radeon_wb		wb;
  	struct radeon_dummy_page	dummy_page;
  	bool				shutdown;
-	bool				need_swiotlb;
  	bool				accel_working;
  	bool				fastfb_working; /* IGP feature*/
  	bool				needs_reset, in_reset;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index cc445c4cba2e3d..3efdb5fb55ad57 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1385,7 +1385,6 @@ int radeon_device_init(struct radeon_device *rdev,
  		pr_warn("radeon: No suitable DMA available\n");
  		return r;
  	}
-	rdev->need_swiotlb = drm_need_swiotlb(dma_bits);
  
  	/* Registers mapping */
  	/* TODO: block userspace mapping of io register */
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 380b3007fd0b0d..28334f86eaa24a 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -717,9 +717,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
  	/* No others user of address space so set it to 0 */
  	r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev,
  			       rdev->ddev->anon_inode->i_mapping,
-			       rdev->ddev->vma_offset_manager,
-			       rdev->need_swiotlb,
-			       dma_addressing_limited(&rdev->pdev->dev));
+			       rdev->ddev->vma_offset_manager);
  	if (r) {
  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
  		return r;
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 510e3e001dabe0..289aab652486ef 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -198,8 +198,6 @@ static void ttm_device_delayed_workqueue(struct work_struct *work)
   * @dev: The core kernel device pointer for DMA mappings and allocations.
   * @mapping: The address space to use for this bo.
   * @vma_manager: A pointer to a vma manager.
- * @use_dma_alloc: If coherent DMA allocation API should be used.
- * @use_dma32: If we should use GFP_DMA32 for device memory allocations.
   *
   * Initializes a struct ttm_device:
   * Returns:
@@ -207,8 +205,7 @@ static void ttm_device_delayed_workqueue(struct work_struct *work)
   */
  int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
  		    struct device *dev, struct address_space *mapping,
-		    struct drm_vma_offset_manager *vma_manager,
-		    bool use_dma_alloc, bool use_dma32)
+		    struct drm_vma_offset_manager *vma_manager)
  {
  	struct ttm_global *glob = &ttm_glob;
  	int ret;
@@ -223,7 +220,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
  	bdev->funcs = funcs;
  
  	ttm_init_sysman(bdev);
-	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
+	ttm_pool_init(&bdev->pool, dev);
  
  	bdev->vma_manager = vma_manager;
  	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index cb38b1a17b0985..51b6585e54470c 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -50,11 +50,10 @@
   * struct ttm_pool_dma - Helper object for coherent DMA mappings
   *
   * @addr: original DMA address returned for the mapping
- * @vaddr: original vaddr return for the mapping and order in the lower bits
   */
  struct ttm_pool_dma {
  	dma_addr_t addr;
-	unsigned long vaddr;
+	unsigned int order;
  };
  
  static unsigned long page_pool_size;
@@ -64,12 +63,6 @@ module_param(page_pool_size, ulong, 0644);
  
  static atomic_long_t allocated_pages;
  
-static struct ttm_pool_type global_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_uncached[MAX_ORDER];
-
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
-
  static struct mutex shrinker_lock;
  static struct list_head shrinker_list;
  static struct shrinker mm_shrinker;
@@ -78,10 +71,8 @@ static struct shrinker mm_shrinker;
  static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
  					unsigned int order)
  {
-	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
  	struct ttm_pool_dma *dma;
  	struct page *p;
-	void *vaddr;
  
  	/* Don't set the __GFP_COMP flag for higher order allocations.
  	 * Mapping pages directly into an userspace process and calling
@@ -91,34 +82,16 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
  		gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN |
  			__GFP_KSWAPD_RECLAIM;
  
-	if (!pool->use_dma_alloc) {
-		p = alloc_pages(gfp_flags, order);
-		if (p)
-			p->private = order;
-		return p;
-	}
-
  	dma = kmalloc(sizeof(*dma), GFP_KERNEL);
  	if (!dma)
  		return NULL;
  
-	if (order)
-		attr |= DMA_ATTR_NO_WARN;
-
-	vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE,
-				&dma->addr, gfp_flags, attr);
-	if (!vaddr)
+	p = dma_alloc_pages(pool->dev, (1ULL << order) * PAGE_SIZE,
+				&dma->addr, DMA_BIDIRECTIONAL, gfp_flags);
+	if (!p)
  		goto error_free;
  
-	/* TODO: This is an illegal abuse of the DMA API, but we need to rework
-	 * TTM page fault handling and extend the DMA API to clean this up.
-	 */
-	if (is_vmalloc_addr(vaddr))
-		p = vmalloc_to_page(vaddr);
-	else
-		p = virt_to_page(vaddr);
-
-	dma->vaddr = (unsigned long)vaddr | order;
+	dma->order = order;
  	p->private = (unsigned long)dma;
  	return p;
  
@@ -131,9 +104,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
  static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
  			       unsigned int order, struct page *p)
  {
-	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
  	struct ttm_pool_dma *dma;
-	void *vaddr;
  
  #ifdef CONFIG_X86
  	/* We don't care that set_pages_wb is inefficient here. This is only
@@ -143,18 +114,14 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
  		set_pages_wb(p, 1 << order);
  #endif
  
-	if (!pool || !pool->use_dma_alloc) {
+	if (!pool) {
  		__free_pages(p, order);
  		return;
  	}
  
-	if (order)
-		attr |= DMA_ATTR_NO_WARN;
-
  	dma = (void *)p->private;
-	vaddr = (void *)(dma->vaddr & PAGE_MASK);
-	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr,
-		       attr);
+	dma_free_pages(pool->dev, (1UL << order) * PAGE_SIZE, p, dma->addr,
+			DMA_BIDIRECTIONAL);
  	kfree(dma);
  }
  
@@ -184,20 +151,12 @@ static int ttm_pool_apply_caching(struct page **first, struct page **last,
  static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
  			struct page *p, dma_addr_t **dma_addr)
  {
-	dma_addr_t addr;
+	struct ttm_pool_dma *dma = (void *)p->private;
+	dma_addr_t addr = dma->addr;
  	unsigned int i;
  
-	if (pool->use_dma_alloc) {
-		struct ttm_pool_dma *dma = (void *)p->private;
-
-		addr = dma->addr;
-	} else {
-		size_t size = (1ULL << order) * PAGE_SIZE;
-
-		addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(pool->dev, addr))
-			return -EFAULT;
-	}
+	dma_sync_single_for_device(pool->dev, addr,
+			(1ULL << dma->order) * PAGE_SIZE, DMA_BIDIRECTIONAL);
  
  	for (i = 1 << order; i ; --i) {
  		*(*dma_addr)++ = addr;
@@ -211,12 +170,8 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
  static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
  			   unsigned int num_pages)
  {
-	/* Unmapped while freeing the page */
-	if (pool->use_dma_alloc)
-		return;
-
-	dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT,
-		       DMA_BIDIRECTIONAL);
+	dma_sync_single_for_cpu(pool->dev, dma_addr, num_pages * PAGE_SIZE,
+				DMA_BIDIRECTIONAL);
  }
  
  /* Give pages into a specific pool_type */
@@ -286,27 +241,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
  						  enum ttm_caching caching,
  						  unsigned int order)
  {
-	if (pool->use_dma_alloc)
-		return &pool->caching[caching].orders[order];
-
-#ifdef CONFIG_X86
-	switch (caching) {
-	case ttm_write_combined:
-		if (pool->use_dma32)
-			return &global_dma32_write_combined[order];
-
-		return &global_write_combined[order];
-	case ttm_uncached:
-		if (pool->use_dma32)
-			return &global_dma32_uncached[order];
-
-		return &global_uncached[order];
-	default:
-		break;
-	}
-#endif
-
-	return NULL;
+	return &pool->caching[caching].orders[order];
  }
  
  /* Free pages using the global shrinker list */
@@ -336,13 +271,9 @@ static unsigned int ttm_pool_shrink(void)
  /* Return the allocation order based for a page */
  static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
  {
-	if (pool->use_dma_alloc) {
-		struct ttm_pool_dma *dma = (void *)p->private;
+	struct ttm_pool_dma *dma = (void *)p->private;
  
-		return dma->vaddr & ~PAGE_MASK;
-	}
-
-	return p->private;
+	return dma->order;
  }
  
  /**
@@ -364,7 +295,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
  	dma_addr_t *dma_addr = tt->dma_address;
  	struct page **caching = tt->pages;
  	struct page **pages = tt->pages;
-	gfp_t gfp_flags = GFP_USER;
+	gfp_t gfp_flags = GFP_HIGHUSER;
  	unsigned int i, order;
  	struct page *p;
  	int r;
@@ -378,11 +309,6 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
  	if (ctx->gfp_retry_mayfail)
  		gfp_flags |= __GFP_RETRY_MAYFAIL;
  
-	if (pool->use_dma32)
-		gfp_flags |= GFP_DMA32;
-	else
-		gfp_flags |= GFP_HIGHUSER;
-
  	for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages;
  	     order = min_t(unsigned int, order, __fls(num_pages))) {
  		bool apply_caching = false;
@@ -489,28 +415,20 @@ EXPORT_SYMBOL(ttm_pool_free);
   *
   * @pool: the pool to initialize
   * @dev: device for DMA allocations and mappings
- * @use_dma_alloc: true if coherent DMA alloc should be used
- * @use_dma32: true if GFP_DMA32 should be used
   *
   * Initialize the pool and its pool types.
   */
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
-		   bool use_dma_alloc, bool use_dma32)
+void ttm_pool_init(struct ttm_pool *pool, struct device *dev)
  {
  	unsigned int i, j;
  
-	WARN_ON(!dev && use_dma_alloc);
+	WARN_ON(!dev);
  
  	pool->dev = dev;
-	pool->use_dma_alloc = use_dma_alloc;
-	pool->use_dma32 = use_dma32;
-
-	if (use_dma_alloc) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < MAX_ORDER; ++j)
-				ttm_pool_type_init(&pool->caching[i].orders[j],
-						   pool, i, j);
-	}
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+		for (j = 0; j < MAX_ORDER; ++j)
+			ttm_pool_type_init(&pool->caching[i].orders[j], pool, i,
+					   j);
  }
  
  /**
@@ -525,11 +443,9 @@ void ttm_pool_fini(struct ttm_pool *pool)
  {
  	unsigned int i, j;
  
-	if (pool->use_dma_alloc) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < MAX_ORDER; ++j)
-				ttm_pool_type_fini(&pool->caching[i].orders[j]);
-	}
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+		for (j = 0; j < MAX_ORDER; ++j)
+			ttm_pool_type_fini(&pool->caching[i].orders[j]);
  }
  
  /* As long as pages are available make sure to release at least one */
@@ -603,18 +519,6 @@ static void ttm_pool_debugfs_footer(struct seq_file *m)
  static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
  {
  	ttm_pool_debugfs_header(m);
-
-	mutex_lock(&shrinker_lock);
-	seq_puts(m, "wc\t:");
-	ttm_pool_debugfs_orders(global_write_combined, m);
-	seq_puts(m, "uc\t:");
-	ttm_pool_debugfs_orders(global_uncached, m);
-	seq_puts(m, "wc 32\t:");
-	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
-	seq_puts(m, "uc 32\t:");
-	ttm_pool_debugfs_orders(global_dma32_uncached, m);
-	mutex_unlock(&shrinker_lock);
-
  	ttm_pool_debugfs_footer(m);
  
  	return 0;
@@ -633,11 +537,6 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
  {
  	unsigned int i;
  
-	if (!pool->use_dma_alloc) {
-		seq_puts(m, "unused\n");
-		return 0;
-	}
-
  	ttm_pool_debugfs_header(m);
  
  	mutex_lock(&shrinker_lock);
@@ -688,25 +587,12 @@ DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
   */
  int ttm_pool_mgr_init(unsigned long num_pages)
  {
-	unsigned int i;
-
  	if (!page_pool_size)
  		page_pool_size = num_pages;
  
  	mutex_init(&shrinker_lock);
  	INIT_LIST_HEAD(&shrinker_list);
  
-	for (i = 0; i < MAX_ORDER; ++i) {
-		ttm_pool_type_init(&global_write_combined[i], NULL,
-				   ttm_write_combined, i);
-		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
-
-		ttm_pool_type_init(&global_dma32_write_combined[i], NULL,
-				   ttm_write_combined, i);
-		ttm_pool_type_init(&global_dma32_uncached[i], NULL,
-				   ttm_uncached, i);
-	}
-
  #ifdef CONFIG_DEBUG_FS
  	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
  			    &ttm_pool_debugfs_globals_fops);
@@ -727,16 +613,6 @@ int ttm_pool_mgr_init(unsigned long num_pages)
   */
  void ttm_pool_mgr_fini(void)
  {
-	unsigned int i;
-
-	for (i = 0; i < MAX_ORDER; ++i) {
-		ttm_pool_type_fini(&global_write_combined[i]);
-		ttm_pool_type_fini(&global_uncached[i]);
-
-		ttm_pool_type_fini(&global_dma32_write_combined[i]);
-		ttm_pool_type_fini(&global_dma32_uncached[i]);
-	}
-
  	unregister_shrinker(&mm_shrinker);
  	WARN_ON(!list_empty(&shrinker_list));
  }
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index a1a25410ec742d..d8f30ce4c65655 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -51,7 +51,6 @@ MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages");
  module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644);
  
  static atomic_long_t ttm_pages_allocated;
-static atomic_long_t ttm_dma32_pages_allocated;
  
  /*
   * Allocates a ttm structure for the given BO.
@@ -317,17 +316,10 @@ int ttm_tt_populate(struct ttm_device *bdev,
  	if (ttm_tt_is_populated(ttm))
  		return 0;
  
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
  		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
-		if (bdev->pool.use_dma32)
-			atomic_long_add(ttm->num_pages,
-					&ttm_dma32_pages_allocated);
-	}
-
-	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
-	       atomic_long_read(&ttm_dma32_pages_allocated) >
-	       ttm_dma32_pages_limit) {
  
+	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit) {
  		ret = ttm_global_swapout(ctx, GFP_KERNEL);
  		if (ret == 0)
  			break;
@@ -355,12 +347,8 @@ int ttm_tt_populate(struct ttm_device *bdev,
  	return 0;
  
  error:
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
  		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
-		if (bdev->pool.use_dma32)
-			atomic_long_sub(ttm->num_pages,
-					&ttm_dma32_pages_allocated);
-	}
  	return ret;
  }
  EXPORT_SYMBOL(ttm_tt_populate);
@@ -390,13 +378,8 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
  	else
  		ttm_pool_free(&bdev->pool, ttm);
  
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
  		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
-		if (bdev->pool.use_dma32)
-			atomic_long_sub(ttm->num_pages,
-					&ttm_dma32_pages_allocated);
-	}
-
  	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
  }
  
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 399f70d340eb5b..0c5783651ae187 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -881,9 +881,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
  	ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
  			      dev_priv->drm.dev,
  			      dev_priv->drm.anon_inode->i_mapping,
-			      &dev_priv->vma_manager,
-			      dev_priv->map_mode == vmw_dma_alloc_coherent,
-			      false);
+			      &dev_priv->vma_manager);
  	if (unlikely(ret != 0)) {
  		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
  		goto out_no_bdev;
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index e9ad4863d91568..c3122588811731 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -38,7 +38,6 @@
  void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
  void drm_clflush_sg(struct sg_table *st);
  void drm_clflush_virt_range(void *addr, unsigned long length);
-bool drm_need_swiotlb(int dma_bits);
  
  
  static inline bool drm_arch_can_wc_memory(void)
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 7c8f87bd52d383..8b5281b982b7da 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -310,8 +310,7 @@ static inline void ttm_set_driver_manager(struct ttm_device *bdev, int type,
  
  int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
  		    struct device *dev, struct address_space *mapping,
-		    struct drm_vma_offset_manager *vma_manager,
-		    bool use_dma_alloc, bool use_dma32);
+		    struct drm_vma_offset_manager *vma_manager);
  void ttm_device_fini(struct ttm_device *bdev);
  
  #endif
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index 4321728bdd11f8..7c7800c934e272 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -60,16 +60,10 @@ struct ttm_pool_type {
  /**
   * ttm_pool - Pool for all caching and orders
   *
- * @use_dma_alloc: if coherent DMA allocations should be used
- * @use_dma32: if GFP_DMA32 should be used
   * @caching: pools for each caching/order
   */
  struct ttm_pool {
  	struct device *dev;
-
-	bool use_dma_alloc;
-	bool use_dma32;
-
  	struct {
  		struct ttm_pool_type orders[MAX_ORDER];
  	} caching[TTM_NUM_CACHING_TYPES];
@@ -79,8 +73,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
  		   struct ttm_operation_ctx *ctx);
  void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
  
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
-		   bool use_dma_alloc, bool use_dma32);
+void ttm_pool_init(struct ttm_pool *pool, struct device *dev);
  void ttm_pool_fini(struct ttm_pool *pool);
  
  int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);