On 09/11/17 17:34, Tvrtko Ursulin wrote:
On 08/11/2017 16:22, Lionel Landwerlin wrote:
With the introduction of asymetric slices in CNL, we cannot rely on
the previous SUBSLICE_MASK getparam. Here we introduce a more detailed
way of querying the Gen's GPU topology that doesn't aggregate numbers.
This is essential for monitoring parts of the GPU with the OA unit,
because counters need to be normalized to the number of
EUs/subslices/slices. The current aggregated numbers like EU_TOTAL do
not gives us sufficient information.
This change introduce a new way to query properties of the GPU, making
room for new queries (some media related topology could be exposed in
the future).
As a bonus we can draw representations of the GPU :
https://imgur.com/a/vuqpa
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
drivers/gpu/drm/i915/intel_query_info.c | 64
+++++++++++++++++++++++++++++++++
include/uapi/drm/i915_drm.h | 55
++++++++++++++++++++++++++++
2 files changed, 119 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_query_info.c
b/drivers/gpu/drm/i915/intel_query_info.c
index 21434c393582..45c5b29e0988 100644
--- a/drivers/gpu/drm/i915/intel_query_info.c
+++ b/drivers/gpu/drm/i915/intel_query_info.c
@@ -25,6 +25,68 @@
#include "i915_drv.h"
#include <uapi/drm/i915_drm.h>
+static int query_info_rcs_topology(struct drm_i915_private *dev_priv,
+ struct drm_i915_query_info *args)
+{
+ const struct sseu_dev_info *sseu = &INTEL_INFO(dev_priv)->sseu;
+ struct drm_i915_rcs_topology_info __user *user_topology =
+ u64_to_user_ptr(args->info_ptr);
+ struct drm_i915_rcs_topology_info topology;
+ u32 data_size, total_size;
+ const u8 *data = NULL;
+ int ret;
+
+ /* Not supported on gen < 8. */
+ if (sseu->max_slices == 0)
+ return -ENODEV;
+
+ switch (args->query_params[0]) {
+ case I915_RCS_TOPOLOGY_SLICE:
+ topology.params[0] = sseu->max_slices;
+ data_size = sizeof(sseu->slice_mask);
+ data = &sseu->slice_mask;
Regardless of the solution, you will need to translate the internal
kernel data into something strictly defined in the uAPI headers,
otherwise we leak internal organization into ABI.
Sure, I'll add an assert to make sure that it is u8, so if it does
switch to u16/u32 at some point, people will remember to maintain the API.
+ break;
+
+ case I915_RCS_TOPOLOGY_SUBSLICE:
+ topology.params[0] = sseu->max_slices;
+ topology.params[1] = ALIGN(sseu->max_subslices, 8) / 8;
+ data_size = sseu->max_slices * topology.params[1];
+ data = sseu->subslices_mask;
+ break;
+
+ case I915_RCS_TOPOLOGY_EU:
+ topology.params[2] = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
+ topology.params[1] = sseu->max_subslices * topology.params[2];
+ topology.params[0] = sseu->max_slices;
+ data_size = sseu->max_slices * topology.params[1];
+ data = sseu->eu_mask;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ total_size = sizeof(topology) + data_size;
+
+ if (args->info_ptr_len == 0) {
+ args->info_ptr_len = total_size;
+ return 0;
+ }
+
+ if (args->info_ptr_len < total_size)
+ return -EINVAL;
+
+ ret = copy_to_user(user_topology, &topology, sizeof(topology));
+ if (ret)
+ return -EFAULT;
+
+ ret = copy_to_user(user_topology + 1, data, data_size);
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
static u8 user_class_map[I915_ENGINE_CLASS_MAX] = {
[I915_ENGINE_CLASS_OTHER] = OTHER_CLASS,
[I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
@@ -112,6 +174,8 @@ int i915_query_info_ioctl(struct drm_device *dev,
void *data,
switch (args->query) {
case I915_QUERY_INFO_ENGINE:
return query_info_engine(dev_priv, args);
+ case I915_QUERY_INFO_RCS_TOPOLOGY:
+ return query_info_rcs_topology(dev_priv, args);
default:
return -EINVAL;
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c6026616300e..5b6a8995a948 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1569,6 +1569,61 @@ struct drm_i915_engine_info {
__u8 rsvd2[6];
};
+/* Query RCS topology.
+ *
+ * drm_i915_query_info.query_params[0] should be set to one of the
+ * I915_RCS_TOPOLOGY_* define.
+ *
+ * drm_i915_gem_query_info.info_ptr will be written to with
+ * drm_i915_rcs_topology_info.
+ */
+#define I915_QUERY_INFO_RCS_TOPOLOGY 1 /* version 1 */
+
+/* Query RCS slice topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ *
+ * data: Each bit indicates whether a slice is available (1) or
fused off (0).
+ * Formula to tell if slice X is available :
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_SLICE 0 /* version 1 */
+/* Query RCS subslice topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ * params[1]: slice stride
+ *
+ * data: each bit indicates whether a subslice is available (1) or
fused off
+ * (0). Formula to tell if slice X subslice Y is available :
+ *
+ * (data[(X * params[1]) + Y / 8] >> (Y % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_SUBSLICE 1 /* version 1 */
+/* Query RCS EU topology
+ *
+ * The meaning of the drm_i915_rcs_topology_info fields is :
+ *
+ * params[0]: number of slices
+ * params[1]: slice stride
+ * params[2]: subslice stride
+ *
+ * data: Each bit indicates whether a subslice is available (1) or
fused off
+ * (0). Formula to tell if slice X subslice Y eu Z is available :
+ *
+ * (data[X * params[1] + Y * params[2] + Z / 8] >> (Z % 8)) & 1
+ */
+#define I915_RCS_TOPOLOGY_EU 2 /* version 1 */
+
+struct drm_i915_rcs_topology_info {
+ __u32 params[6];
+
+ __u8 data[];
You don't use this marker in the patch.
But in general would it be feasible to define and name the returned
data more precisely? Like:
struct drm_engine_rcs_engine_info {
...
/existing_stuff/
...
#define HAS_TOPOLOGY
u32 flags;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} slice_topology;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} subslice_topology;
struct {
u32 this;
u32 that;
...
u8 mask[SOME_FUTURE_PROOF_NUMBER];
} eu_topology;
};
I'm pretty sure we'll need to expose more than these 3 properties here
(slice/subslice/eus) soon.
Some of the components residing in the subslice could be of interest.
So I'm reluctant to have all of this within a single struct which we
can't change the size of.
Having an enum for each property and possibly an associated struct for
each is fine though.
That said, perhaps RCS topology belongs more to the GPU than the
render engine.
Regards,
Tvrtko
+};
struct drm_i915_query_info {
/* in/out: Protocol version requested/supported. When set to 0,
the
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx