The GTT handling has changed from generation to generation. There was a particularly large shift around the gen5 time frame. The GTT itself those provides a few core functions which make it a nice candidate for a dispatch table. A similar concepts exists in the AGP layer, and were it not for design constraints in those functions and code duplication in i915, that would have been sufficient. In any case, it's the same idea. One immediately obvious thing to implement is our gmch probing. The init function was getting massively bloated. Fundamentally, all that's needed from GMCH probing is the GTT size, and the stolen size. It makes design sense to put the mappable calculation in there as well, but the code turns out a bit nicer without it (IMO). As some examples though of things which are nice for this table which will come up: PTE encoding, range clearing, binding objects, entry updates, etc. The intel_gtt bridge thing is still here, but the subsequent patches will finish ripping that out. A couple things worth note: + There are some additional cleanups in this patch to gem_gtt_init. Those can be extracted if requested. + The DRM_INFO and DRM_DEBUG_DRIVER displaying the GTT info wasn't done on pre-GEN6 before hand. Now is is. - There is a pretty ugly hack regarding the kfree of the gtt struct. It will be gone soon, I promise. I only left it because it shows reviewers what the patch is striving to achieve with regard to the cleanup of the gtt init function. I can fix it up if requested. v2: * Pass in the gtt struct instead of drm_device. This doesn't quite match the typical dispatch table style, but it's much easier to get at the things we care about this way, and a gtt->gtt_ops will always be 1:1. * Drop unnecessary BUG_ON for new platforms... It's silly. Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 11 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 184 ++++++++++++++++++++++-------------- 3 files changed, 124 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 11c7aa8..fdfd6be 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1665,7 +1665,7 @@ out_mtrrfree: out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: - i915_gem_gtt_fini(dev); + dev_priv->gtt.gtt_ops->gmch_remove(&dev_priv->gtt); put_bridge: pci_dev_put(dev_priv->bridge_dev); free_priv: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c84743b..006caaa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -364,6 +364,13 @@ struct intel_device_info { u8 has_llc:1; }; +struct i915_gtt; +struct i915_gtt_operations { + int (*gmch_probe)(struct i915_gtt *gtt, + size_t *gtt_total, size_t *stolen); + void (*gmch_remove)(struct i915_gtt *gtt); +}; + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -374,6 +381,7 @@ struct intel_device_info { struct i915_gtt { unsigned long start; /* Start offset of used GTT */ size_t total; /* Total size GTT can map */ + size_t stolen_size; /* Total size of stolen memory */ unsigned long mappable_end; /* End offset that we can CPU map */ struct io_mapping *mappable; /* Mapping to our CPU mappable region */ @@ -385,6 +393,8 @@ struct i915_gtt { bool do_idle_maps; dma_addr_t scratch_page_dma; struct page *scratch_page; + + const struct i915_gtt_operations *gtt_ops; }; #define I915_PPGTT_PD_ENTRIES 512 @@ -1663,7 +1673,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); int i915_gem_gtt_init(struct drm_device *dev); -void i915_gem_gtt_fini(struct drm_device *dev); static inline void i915_gem_chipset_flush(struct drm_device *dev) { if (INTEL_INFO(dev)->gen < 6) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a0ba4a9..11fb82c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -673,14 +673,14 @@ static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) return snb_gmch_ctl << 20; } -static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl) +static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; snb_gmch_ctl &= SNB_GMCH_GMS_MASK; return snb_gmch_ctl << 25; /* 32 MB units */ } -static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl) +static inline size_t gen7_get_stolen_size(u16 snb_gmch_ctl) { static const int stolen_decoder[] = { 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352}; @@ -689,103 +689,145 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl) return stolen_decoder[snb_gmch_ctl] << 20; } -int i915_gem_gtt_init(struct drm_device *dev) +static int gen6_gmch_probe(struct i915_gtt *gtt, + size_t *gtt_total, + size_t *stolen) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = + container_of(gtt, struct drm_i915_private, gtt); + struct drm_device *dev = dev_priv->dev; phys_addr_t gtt_bus_addr; + unsigned int gtt_size; u16 snb_gmch_ctl; int ret; - dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2); - dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2); + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); - /* On modern platforms we need not worry ourself with the legacy - * hostbridge query stuff. Skip it entirely - */ - if (INTEL_INFO(dev)->gen < 6) { - ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - return -EIO; - } + if (IS_GEN7(dev)) + *stolen = gen7_get_stolen_size(snb_gmch_ctl); + else + *stolen = gen6_get_stolen_size(snb_gmch_ctl); - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - intel_gmch_remove(); - return -ENODEV; - } + *gtt_total = (gtt_size / sizeof(gtt_pte_t)) << PAGE_SHIFT; - dev_priv->gtt.do_idle_maps = needs_idle_maps(dev); + /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ + gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20); + dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); + if (!dev_priv->gtt.gsm) { + DRM_ERROR("Failed to map the gtt page table\n"); + return -ENOMEM; + } - return 0; + ret = setup_scratch_page(dev); + if (ret) + DRM_ERROR("Scratch setup failed\n"); + + return ret; +} + +void gen6_gmch_remove(struct i915_gtt *gtt) +{ + struct drm_i915_private *dev_priv = + container_of(gtt, struct drm_i915_private, gtt); + iounmap(gtt->gsm); + teardown_scratch_page(dev_priv->dev); + kfree(dev_priv->mm.gtt); +} + +static const struct i915_gtt_operations gen6_gtt_ops = { + .gmch_probe = gen6_gmch_probe, + .gmch_remove = gen6_gmch_remove, +}; + +static int i915_gmch_probe(struct i915_gtt *gtt, + size_t *gtt_total, + size_t *stolen) +{ + struct drm_i915_private *dev_priv = + container_of(gtt, struct drm_i915_private, gtt); + int ret; + + /* This is a temporary hack to make the code cleaner in + * i915_gem_gtt_init. I promise it will go away very shortly. */ + kfree(dev_priv->mm.gtt); + + ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; } - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + intel_gmch_remove(); + return -ENODEV; + } + + dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); + + *gtt_total = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; + *stolen = dev_priv->mm.gtt->stolen_size << PAGE_SHIFT; + + return 0; +} + +static void i915_gmch_remove(struct i915_gtt *gtt) +{ + intel_gmch_remove(); +} + +static const struct i915_gtt_operations legacy_gtt_ops = { + .gmch_probe = i915_gmch_probe, + .gmch_remove = i915_gmch_remove, +}; + +int i915_gem_gtt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_gtt *gtt = &dev_priv->gtt; + unsigned long gtt_size; + int ret; dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); if (!dev_priv->mm.gtt) return -ENOMEM; - /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ - gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20); - - /* i9xx_setup */ - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - dev_priv->mm.gtt->gtt_total_entries = - gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t); - if (INTEL_INFO(dev)->gen < 7) - dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - else - dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl); + gtt->mappable_base = pci_resource_start(dev->pdev, 2); + gtt->mappable_end = pci_resource_len(dev->pdev, 2); /* 64/512MB is the current min/max we actually know of, but this is just a * coarse sanity check. */ - if ((dev_priv->gtt.mappable_end < (64<<20) || - (dev_priv->gtt.mappable_end > (512<<20)))) { - DRM_ERROR("Unknown GMADR size (%lx)\n", - dev_priv->gtt.mappable_end); - ret = -ENXIO; - goto err_out; + if ((gtt->mappable_end < (64<<20) || (gtt->mappable_end > (512<<20)))) { + DRM_ERROR("Unknown GMADR size (%lx)\n", gtt->mappable_end); + return -ENXIO; } - ret = setup_scratch_page(dev); + if (INTEL_INFO(dev)->gen <= 5) + gtt->gtt_ops = &legacy_gtt_ops; + else + gtt->gtt_ops = &gen6_gtt_ops; + + ret = gtt->gtt_ops->gmch_probe(gtt, &dev_priv->gtt.total, + &dev_priv->gtt.stolen_size); if (ret) { - DRM_ERROR("Scratch setup failed\n"); - goto err_out; + kfree(dev_priv->mm.gtt); + return ret; } - dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, - dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); - if (!dev_priv->gtt.gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); - teardown_scratch_page(dev); - ret = -ENOMEM; - goto err_out; - } + dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT; + dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size; + + gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t); /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ - DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8); + DRM_INFO("Memory usable by graphics device = %zdM\n", dev_priv->gtt.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20); - DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); + DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", dev_priv->gtt.stolen_size >> 20); return 0; - -err_out: - kfree(dev_priv->mm.gtt); - if (INTEL_INFO(dev)->gen < 6) - intel_gmch_remove(); - return ret; -} - -void i915_gem_gtt_fini(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - iounmap(dev_priv->gtt.gsm); - teardown_scratch_page(dev); - if (INTEL_INFO(dev)->gen < 6) - intel_gmch_remove(); - kfree(dev_priv->mm.gtt); } -- 1.8.1.1