Before going further I wanted to run these 3 patches by the ML... The GTT handling has changed from generation to generation. There was a particularly large shift around the gen5 time frame. The GTT itself those provides a few core functions which make it a nice candidate for a dispatch table. A similar concepts exists in the AGP layer, and were it not for design constraints in those functions and code duplication in i915, that would have been sufficient. In any case, it's the same idea. One immediately obvious thing to implement is our gmch probing. The init function was getting massively bloated. Fundamentally, all that's needed from GMCH probing is the GTT size, and the stolen size. As some examples though of things which are nice for this table which will come up: pte encoding, range clearing, entry updates, etc. I've been going back and forth regarding whether or not to put the stolen size calculation into the dispatch table... The intel_gtt bridge thing is still here, but the subsequent patches will finish ripping that out. A couple things worth note: + The DRM_INFO and DRM_DEBUG_DRIVER displaying the GTT info wasn't done on pre-GEN6 before hand. Now is is. - There is a pretty ugly hack regarding the kfree of the gtt struct. That ill be gone soon, I promise. I only left it because it shows reviewers what the patch is striving to achieve with regard to the cleanup of the gtt init function. I can fix it up if requested. Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 9 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 174 ++++++++++++++++++++++-------------- 3 files changed, 119 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 11c7aa8..34bf128 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1665,7 +1665,7 @@ out_mtrrfree: out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: - i915_gem_gtt_fini(dev); + dev_priv->gtt.gtt_ops->gmch_remove(dev); put_bridge: pci_dev_put(dev_priv->bridge_dev); free_priv: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c84743b..c8d6bf98 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -364,6 +364,12 @@ struct intel_device_info { u8 has_llc:1; }; +struct i915_gtt_operations { + int (*gmch_probe)(struct drm_device *dev, size_t *gtt_total, + size_t *stolen); + void (*gmch_remove)(struct drm_device *dev); +}; + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -374,6 +380,7 @@ struct intel_device_info { struct i915_gtt { unsigned long start; /* Start offset of used GTT */ size_t total; /* Total size GTT can map */ + size_t stolen_size; /* Total size of stolen memory */ unsigned long mappable_end; /* End offset that we can CPU map */ struct io_mapping *mappable; /* Mapping to our CPU mappable region */ @@ -385,6 +392,8 @@ struct i915_gtt { bool do_idle_maps; dma_addr_t scratch_page_dma; struct page *scratch_page; + + const struct i915_gtt_operations *gtt_ops; }; #define I915_PPGTT_PD_ENTRIES 512 diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a0ba4a9..6266606 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -673,14 +673,14 @@ static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) return snb_gmch_ctl << 20; } -static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl) +static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; snb_gmch_ctl &= SNB_GMCH_GMS_MASK; return snb_gmch_ctl << 25; /* 32 MB units */ } -static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl) +static inline size_t gen7_get_stolen_size(u16 snb_gmch_ctl) { static const int stolen_decoder[] = { 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352}; @@ -689,56 +689,127 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl) return stolen_decoder[snb_gmch_ctl] << 20; } -int i915_gem_gtt_init(struct drm_device *dev) +static int gen6_gmch_probe(struct drm_device *dev, size_t *gtt_total, + size_t *stolen) { - struct drm_i915_private *dev_priv = dev->dev_private; - phys_addr_t gtt_bus_addr; + unsigned int gtt_size; u16 snb_gmch_ctl; int ret; - dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2); - dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2); + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); - /* On modern platforms we need not worry ourself with the legacy - * hostbridge query stuff. Skip it entirely - */ - if (INTEL_INFO(dev)->gen < 6) { - ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - return -EIO; - } + if (IS_GEN7(dev)) + *stolen = gen7_get_stolen_size(snb_gmch_ctl); + else + *stolen = gen6_get_stolen_size(snb_gmch_ctl); - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - intel_gmch_remove(); - return -ENODEV; - } + *gtt_total = (gtt_size / sizeof(gtt_pte_t)) << PAGE_SHIFT; - dev_priv->gtt.do_idle_maps = needs_idle_maps(dev); + ret = setup_scratch_page(dev); + if (ret) + DRM_ERROR("Scratch setup failed\n"); - return 0; + return ret; +} + +void gen6_gmch_remove(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + iounmap(dev_priv->gtt.gsm); + teardown_scratch_page(dev); + kfree(dev_priv->mm.gtt); +} + +static const struct i915_gtt_operations gen6_gtt_ops = { + .gmch_probe = gen6_gmch_probe, + .gmch_remove = gen6_gmch_remove, +}; + +static int i915_gmch_probe(struct drm_device *dev, size_t *gtt_total, + size_t *stolen) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + /* This is a temporary hack to make the code cleaner in + * i915_gem_gtt_init. I promise it will go away very shortly. */ + kfree(dev_priv->mm.gtt); + + BUG_ON(INTEL_INFO(dev)->gen >5); + + ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; } - if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + intel_gmch_remove(); + return -ENODEV; + } + + dev_priv->gtt.do_idle_maps = needs_idle_maps(dev); + + *gtt_total = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; + *stolen = dev_priv->mm.gtt->stolen_size << PAGE_SHIFT; + + return 0; +} + +static void i915_gmch_remove(struct drm_device *dev) +{ + BUG_ON(INTEL_INFO(dev)->gen > 5); + intel_gmch_remove(); +} + +static const struct i915_gtt_operations legacy_gtt_ops = { + .gmch_probe = i915_gmch_probe, + .gmch_remove = i915_gmch_remove, +}; + +int i915_gem_gtt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + phys_addr_t gtt_bus_addr; + unsigned long gtt_size; + int ret; + + if (INTEL_INFO(dev)->gen <= 5) + dev_priv->gtt.gtt_ops = &legacy_gtt_ops; + else + dev_priv->gtt.gtt_ops = &gen6_gtt_ops; + + dev_priv->gtt.mappable_base = pci_resource_start(dev->pdev, 2); + dev_priv->gtt.mappable_end = pci_resource_len(dev->pdev, 2); dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); if (!dev_priv->mm.gtt) return -ENOMEM; + ret = dev_priv->gtt.gtt_ops->gmch_probe(dev, &dev_priv->gtt.total, + &dev_priv->gtt.stolen_size); + if (ret) { + kfree(dev_priv->mm.gtt); + return ret; + } + + dev_priv->mm.gtt->gtt_total_entries = dev_priv->gtt.total >> PAGE_SHIFT; + dev_priv->mm.gtt->stolen_size = dev_priv->gtt.stolen_size; + /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20); - - /* i9xx_setup */ - pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - dev_priv->mm.gtt->gtt_total_entries = - gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t); - if (INTEL_INFO(dev)->gen < 7) - dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); - else - dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl); + gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t); + dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); + if (!dev_priv->gtt.gsm) { + DRM_ERROR("Failed to map the gtt page table\n"); + ret = -ENOMEM; + goto err_out; + } /* 64/512MB is the current min/max we actually know of, but this is just a * coarse sanity check. @@ -751,41 +822,14 @@ int i915_gem_gtt_init(struct drm_device *dev) goto err_out; } - ret = setup_scratch_page(dev); - if (ret) { - DRM_ERROR("Scratch setup failed\n"); - goto err_out; - } - - dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, - dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); - if (!dev_priv->gtt.gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); - teardown_scratch_page(dev); - ret = -ENOMEM; - goto err_out; - } - /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ - DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8); + DRM_INFO("Memory usable by graphics device = %zdM\n", dev_priv->gtt.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %ldM\n", dev_priv->gtt.mappable_end >> 20); - DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); + DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", dev_priv->gtt.stolen_size >> 20); return 0; err_out: - kfree(dev_priv->mm.gtt); - if (INTEL_INFO(dev)->gen < 6) - intel_gmch_remove(); + dev_priv->gtt.gtt_ops->gmch_remove(dev); return ret; } - -void i915_gem_gtt_fini(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - iounmap(dev_priv->gtt.gsm); - teardown_scratch_page(dev); - if (INTEL_INFO(dev)->gen < 6) - intel_gmch_remove(); - kfree(dev_priv->mm.gtt); -} -- 1.8.1.1