Periodically check the scratch page to see if it changes. Scratch page changes almost always indicate something is wrong. We never expect a non-zero filled page, so potentially we could directly put that md5 value for checking. I think the code as it is will be a bit more robust. Pretty RFC here since I've only compiled and run it for a few seconds. Recommended-by: Stephane Marchesin <marcheu at chromium.org> Signed-off-by: Ben Widawsky <ben at bwidawsk.net> --- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/i915/i915_drv.c | 5 +++ drivers/gpu/drm/i915/i915_drv.h | 14 +++++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 73 +++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 19b8e0d..44efe74 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -125,6 +125,8 @@ config DRM_I915 depends on DRM depends on AGP depends on AGP_INTEL + select CRYPTO + select CRYPTO_MD5 # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs select SHMEM diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9ebe895..a1f6142 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -128,6 +128,11 @@ module_param_named(disable_power_well, i915_disable_power_well, int, 0600); MODULE_PARM_DESC(disable_power_well, "Disable the power well when possible (default: false)"); +int i915_enable_scratch_checker __read_mostly = 0; +module_param_named(enable_scratch_checker, i915_enable_scratch_checker, int, 0600); +MODULE_PARM_DESC(enable_scratch_checker, + "Enable periodic timer to find stray writes to the scratch page (default: false)"); + static struct drm_driver driver; extern int intel_agp_enabled; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d5dcf7f..c1f4ef7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -43,6 +43,8 @@ #include <linux/intel-iommu.h> #include <linux/kref.h> #include <linux/pm_qos.h> +#include <linux/crypto.h> +#include <crypto/md5.h> /* General customization: */ @@ -869,6 +871,16 @@ struct i915_gpu_error { unsigned int stop_rings; }; +struct i915_scratch_checker { + struct scatterlist sg; /* sg for scratch page */ + struct hash_desc hash; + char last_hash[MD5_DIGEST_SIZE]; + +#define I915_SCRATCH_DIRT_PERIOD 1000 /* in ms */ +#define I915_SCRATCH_DIRT_JIFFIES msecs_to_jiffies(I915_SCRATCH_DIRT_PERIOD) + struct timer_list timer; +}; + enum modeset_restore { MODESET_ON_LID_OPEN, MODESET_DONE, @@ -1058,6 +1070,7 @@ typedef struct drm_i915_private { struct drm_mm_node *compressed_llb; struct i915_gpu_error gpu_error; + struct i915_scratch_checker scratch_checker; /* list of fbdev register on this device */ struct intel_fbdev *fbdev; @@ -1435,6 +1448,7 @@ extern bool i915_enable_hangcheck __read_mostly; extern int i915_enable_ppgtt __read_mostly; extern unsigned int i915_preliminary_hw_support __read_mostly; extern int i915_disable_power_well __read_mostly; +extern int i915_enable_scratch_checker __read_mostly; extern int i915_suspend(struct drm_device *dev, pm_message_t state); extern int i915_resume(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 50df194..df7a3a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -660,6 +660,72 @@ void i915_gem_init_global_gtt(struct drm_device *dev) i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } +static void scratch_checker(unsigned long data) +{ + struct drm_i915_private *dev_priv = (struct drm_i915_private *)data; + char hash[MD5_DIGEST_SIZE]; + int ret; + + ret = crypto_hash_digest(&dev_priv->scratch_checker.hash, + &dev_priv->scratch_checker.sg, PAGE_SIZE, + hash); + if (ret) { + DRM_DEBUG_DRIVER("Couldn't hash scratch\n"); + goto out; + } + + if (!memcmp(hash, dev_priv->scratch_checker.last_hash, MD5_DIGEST_SIZE)) + goto out; + + DRM_DEBUG("Scratch page contents changed\n"); + memcpy(dev_priv->scratch_checker.last_hash, hash, MD5_DIGEST_SIZE); + +out: + mod_timer(&dev_priv->scratch_checker.timer, + round_jiffies_up(jiffies + I915_SCRATCH_DIRT_JIFFIES)); +} + +static void scratch_checker_init(struct drm_i915_private *dev_priv) +{ + int ret; + + sg_init_table(&dev_priv->scratch_checker.sg, 1); + sg_set_page(&dev_priv->scratch_checker.sg, dev_priv->gtt.scratch_page, + 4096, 0); + + dev_priv->scratch_checker.hash.tfm = crypto_alloc_hash("md5", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(dev_priv->scratch_checker.hash.tfm)) + return; + + ret = crypto_hash_init(&dev_priv->scratch_checker.hash); + if (ret) + goto err; + + ret = crypto_hash_digest(&dev_priv->scratch_checker.hash, + &dev_priv->scratch_checker.sg, PAGE_SIZE, + dev_priv->scratch_checker.last_hash); + if (ret) + goto err; + + setup_timer(&dev_priv->scratch_checker.timer, scratch_checker, + (unsigned long) dev_priv); + return; + +err: + crypto_free_hash(dev_priv->scratch_checker.hash.tfm); + dev_priv->scratch_checker.hash.tfm = NULL; +} + +static void scratch_checker_fini(struct drm_i915_private *dev_priv) +{ + if (dev_priv->scratch_checker.hash.tfm) + return; + + del_timer_sync(&dev_priv->scratch_checker.timer); + crypto_free_hash(dev_priv->scratch_checker.hash.tfm); +} + static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -682,6 +748,7 @@ static int setup_scratch_page(struct drm_device *dev) #endif dev_priv->gtt.scratch_page = page; dev_priv->gtt.scratch_page_dma = dma_addr; + scratch_checker_init(dev_priv); return 0; } @@ -689,6 +756,7 @@ static int setup_scratch_page(struct drm_device *dev) static void teardown_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + scratch_checker_fini(dev_priv); set_pages_wb(dev_priv->gtt.scratch_page, 1); pci_unmap_page(dev->pdev, dev_priv->gtt.scratch_page_dma, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); @@ -811,6 +879,7 @@ static void i915_gmch_remove(struct drm_device *dev) intel_gmch_remove(); } + int i915_gem_gtt_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -840,5 +909,9 @@ int i915_gem_gtt_init(struct drm_device *dev) DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", dev_priv->gtt.stolen_size >> 20); + if (i915_enable_scratch_checker) + mod_timer(&dev_priv->scratch_checker.timer, + jiffies + I915_SCRATCH_DIRT_JIFFIES); + return 0; } -- 1.8.2.1