Re: [PATCH v2 1/5] drm/i915/guc: add enable_guc_loading parameter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Hi,

On 27/04/16 19:03, Dave Gordon wrote:
Split the function of "enable_guc_submission" into two separate
options.  The new one ("enable_guc_loading") controls only the
*fetching and loading* of the GuC firmware image. The existing
one is redefined to control only the *use* of the GuC for batch
submission once the firmware is loaded.

In addition, the degree of control has been refined from a simple
bool to an integer key, allowing several options:
  -1 (default)     whatever the platform default is
   0  DISABLE      don't load/use the GuC
   1  BEST EFFORT  try to load/use the GuC, fallback if not available
   2  REQUIRE      must load/use the GuC, else leave the GPU wedged

The new platform default (as coded here) will be to attempt to
load the GuC iff the device has a GuC that requires firmware,
but not yet to use it for submission. A later patch will change
to enable it if appropriate.

Signed-off-by: Dave Gordon <david.s.gordon@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_gem.c            |  1 -
  drivers/gpu/drm/i915/i915_guc_submission.c |  4 +-
  drivers/gpu/drm/i915/i915_params.c         | 14 ++++-
  drivers/gpu/drm/i915/i915_params.h         |  3 +-
  drivers/gpu/drm/i915/intel_guc_loader.c    | 98 ++++++++++++++++--------------
  drivers/gpu/drm/i915/intel_uncore.c        |  2 +-
  6 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d493e79..b04effc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4910,7 +4910,6 @@ int i915_gem_init_engines(struct drm_device *dev)
  		ret = intel_guc_ucode_load(dev);
  		if (ret) {
  			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
-			ret = -EIO;
  			goto out;
  		}
  	}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 72d6665..42d2efa 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -970,7 +970,7 @@ int intel_guc_suspend(struct drm_device *dev)
  	struct intel_context *ctx;
  	u32 data[3];

-	if (!i915.enable_guc_submission)
+	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
  		return 0;

  	ctx = dev_priv->kernel_context;
@@ -996,7 +996,7 @@ int intel_guc_resume(struct drm_device *dev)
  	struct intel_context *ctx;
  	u32 data[3];

-	if (!i915.enable_guc_submission)
+	if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS)
  		return 0;

Not terribly important and probably predates your work, but just spotted how this reads very redundant - guc->guc_fw.guc_fw_something, while it could be much more readable as guc->fw.load_status. Observation only.


  	ctx = dev_priv->kernel_context;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 383c076..6a5578c 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -54,7 +54,8 @@ struct i915_params i915 __read_mostly = {
  	.verbose_state_checks = 1,
  	.nuclear_pageflip = 0,
  	.edp_vswing = 0,
-	.enable_guc_submission = false,
+	.enable_guc_loading = -1,
+	.enable_guc_submission = 0,
  	.guc_log_level = -1,
  	.enable_dp_mst = true,
  	.inject_load_failure = 0,
@@ -198,8 +199,15 @@ struct i915_params i915 __read_mostly = {
  		 "(0=use value from vbt [default], 1=low power swing(200mV),"
  		 "2=default swing(400mV))");

-module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, bool, 0400);
-MODULE_PARM_DESC(enable_guc_submission, "Enable GuC submission (default:false)");
+module_param_named_unsafe(enable_guc_loading, i915.enable_guc_loading, int, 0400);
+MODULE_PARM_DESC(enable_guc_loading,
+		"Enable GuC firmware loading "
+		"(-1=auto [default], 0=never, 1=if available, 2=required)");
+
+module_param_named_unsafe(enable_guc_submission, i915.enable_guc_submission, int, 0400);
+MODULE_PARM_DESC(enable_guc_submission,
+		"Enable GuC submission "
+		"(-1=auto, 0=never [default], 1=if available, 2=required)");

  module_param_named(guc_log_level, i915.guc_log_level, int, 0400);
  MODULE_PARM_DESC(guc_log_level,
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 65e73dd..1323261 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -45,6 +45,8 @@ struct i915_params {
  	int enable_ips;
  	int invert_brightness;
  	int enable_cmd_parser;
+	int enable_guc_loading;
+	int enable_guc_submission;
  	int guc_log_level;
  	int use_mmio_flip;
  	int mmio_debug;
@@ -57,7 +59,6 @@ struct i915_params {
  	bool load_detect_test;
  	bool reset;
  	bool disable_display;
-	bool enable_guc_submission;
  	bool verbose_state_checks;
  	bool nuclear_pageflip;
  	bool enable_dp_mst;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 876e5da..2ec9cf1 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -387,49 +387,37 @@ int intel_guc_ucode_load(struct drm_device *dev)
  {
  	struct drm_i915_private *dev_priv = dev->dev_private;
  	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
+	const char *fw_path = guc_fw->guc_fw_path;
  	int retries, err = 0;

-	if (!i915.enable_guc_submission)
-		return 0;
-
-	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
+	DRM_DEBUG_DRIVER("GuC fw status: path %s, fetch %s, load %s\n",
+		fw_path,
  		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
  		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));

Should load status be anything other than GUC_FIRMWARE_NONE here?

-	direct_interrupts_to_host(dev_priv);
+	/* Loading forbidden, or no firmware to load? */
+	if (!i915.enable_guc_loading)

Nitpick, == 0 would perhaps make it more obvious this is not a boolean.

+		goto fail;
+	if (fw_path == NULL)
+		goto fail;
+	if (*fw_path == '\0') {
+		DRM_ERROR("No GuC firmware known for this platform\n");

It is not an error unless i915.enable_guc_loading == 2, no? And if best effort then it is probably debug or informational.

Also, don't the checks against fw_path (together with the error or debug message) belong in the fw fetch function? If they are invalid fw fetch would have failed and this function would be able to inspect the high level status of that step here, no?

+		goto fail;
+	}

-	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE)
-		return 0;
+	/* Fetch failed, or already fetched but failed to load? */
+	if (guc_fw->guc_fw_fetch_status != GUC_FIRMWARE_SUCCESS)
+		goto fail;
+	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
+		goto fail;

Leads back to the question of load status in this function. So it is expected we always enter here with load status of none? Is it possible to get here with the firmware already loaded already?


-	if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_SUCCESS &&
-	    guc_fw->guc_fw_load_status == GUC_FIRMWARE_FAIL)
-		return -ENOEXEC;
+	direct_interrupts_to_host(dev_priv);

  	guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;

-	DRM_DEBUG_DRIVER("GuC fw fetch status %s\n",
-		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
-
-	switch (guc_fw->guc_fw_fetch_status) {
-	case GUC_FIRMWARE_FAIL:
-		/* something went wrong :( */
-		err = -EIO;
-		goto fail;
-
-	case GUC_FIRMWARE_NONE:
-	case GUC_FIRMWARE_PENDING:
-	default:
-		/* "can't happen" */
-		WARN_ONCE(1, "GuC fw %s invalid guc_fw_fetch_status %s [%d]\n",
-			guc_fw->guc_fw_path,
-			intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
-			guc_fw->guc_fw_fetch_status);
-		err = -ENXIO;
-		goto fail;
-
-	case GUC_FIRMWARE_SUCCESS:
-		break;
-	}
+	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
+		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
+		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));

  	err = i915_guc_submission_init(dev);
  	if (err)
@@ -483,6 +471,7 @@ int intel_guc_ucode_load(struct drm_device *dev)

  fail:
  	DRM_ERROR("GuC firmware load failed, err %d\n", err);

Same as above I think error must be dependent on the requested mode. Some customers are very sensitive to errors which are not really errors so it is bad to log them when they are not.

+
  	if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING)
  		guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL;

@@ -490,6 +479,29 @@ int intel_guc_ucode_load(struct drm_device *dev)
  	i915_guc_submission_disable(dev);
  	i915_guc_submission_fini(dev);

+	/*
+	 * We've failed to load the firmware :(
+	 *
+	 * Decide whether to disable GuC submission and fall back to
+	 * execlist mode, and whether to hide the error by returning
+	 * zero or to return -EIO, which the caller will treat as a
+	 * nonfatal error (i.e. it doesn't prevent driver load, but
+	 * marks the GPU as wedged until reset).
+	 */
+	if (i915.enable_guc_loading > 1) {
+		err = -EIO;
+	} else if (HAS_GUC_SCHED(dev) && !HAS_GUC_UCODE(dev)) {
+		return 0;

i915_gem_init_hw already guards the call to intel_guc_ucode_load with HAS_GUC_UCODE so at the moment at least this is a dead branch.

I don't even understand what is this branch supposed to do? How can there be a platform with no guc fw but guc scheduling?

+	} else if (i915.enable_guc_submission > 1) {
+		err = -EIO;
+	} else {
+		err = 0;
+	}
+
+	i915.enable_guc_submission = 0;
+
+	DRM_DEBUG_DRIVER("falling back to execlist mode, err %d\n", err);
+

This would log when i915.enable_guc_loading is set to 0 which would be confusing. I think in this case the function should bail out much earlier.

  	return err;
  }

@@ -631,8 +643,11 @@ void intel_guc_ucode_init(struct drm_device *dev)
  	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
  	const char *fw_path;

-	if (!HAS_GUC_SCHED(dev))
-		i915.enable_guc_submission = false;
+	/* A negative value means "use platform default" */
+	if (i915.enable_guc_loading < 0)
+		i915.enable_guc_loading = HAS_GUC_UCODE(dev);
+	if (i915.enable_guc_submission < 0)
+		i915.enable_guc_submission = HAS_GUC_SCHED(dev);

With this setup currently there is no difference between -1 and 1. But I can assume maybe in the future we could have -1 mean 2 on some platform which would then justify having four possible values for each?


  	if (!HAS_GUC_UCODE(dev)) {
  		fw_path = NULL;
@@ -641,26 +656,21 @@ void intel_guc_ucode_init(struct drm_device *dev)
  		guc_fw->guc_fw_major_wanted = 6;
  		guc_fw->guc_fw_minor_wanted = 1;
  	} else {
-		i915.enable_guc_submission = false;
  		fw_path = "";	/* unknown device */
  	}

Confusing block, HAS_GUC_UCODE is defined as (IS_GEN9(dev) && !IS_KABYLAKE(dev)) but then here we only support SKL here. Why the former is then not just IS_SKYLAKE?

When BXT support is added this still needs to be modified and would only save touching HAS_GUC_UCODE in the header. But it must be a better reason?


-	if (!i915.enable_guc_submission)
-		return;
-
  	guc_fw->guc_dev = dev;
  	guc_fw->guc_fw_path = fw_path;
  	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
  	guc_fw->guc_fw_load_status = GUC_FIRMWARE_NONE;

+	/* Early (and silent) return if GuC loading is disabled */
+	if (!i915.enable_guc_loading)
+		return;
  	if (fw_path == NULL)
  		return;
-
-	if (*fw_path == '\0') {
-		DRM_ERROR("No GuC firmware known for this platform\n");
-		guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
+	if (*fw_path == '\0')
  		return;
-	}

I also do not understand the complications with fw_path (either NULL or ""). In the two cases fetch status will be either none or fail, respectively, which will equally cause intel_guc_ucode_load to hit the failure path (fw_fetch_status != success).

So Is it really required for the fw_path to can either be NULL or ""?


  	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_PENDING;
  	DRM_DEBUG_DRIVER("GuC firmware pending, path %s\n", fw_path);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4f1dfe6..df698d7 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1758,7 +1758,7 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
  	int ret;
  	unsigned long irqflags;

-	if (!i915.enable_guc_submission)
+	if (!HAS_GUC_UCODE(dev_priv))
  		return -EINVAL;

What if HAS_GUC_UCODE is true but the i915.load_guc_firmware has been set to zero? Should it skip the reset in that case as well?


  	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux