Re: [PATCH v4 3/3] drm/ingenic: Add option to alloc cached GEM buffers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi

Am 15.05.21 um 16:53 schrieb Paul Cercueil:
Alloc GEM buffers backed by noncoherent memory on SoCs where it is
actually faster than write-combine.

This dramatically speeds up software rendering on these SoCs, even for
tasks where write-combine memory should in theory be faster (e.g. simple
blits).

v3: The option is now selected per-SoC instead of being a module
     parameter.

Signed-off-by: Paul Cercueil <paul@xxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 56 ++++++++++++++++++++++-
  drivers/gpu/drm/ingenic/ingenic-ipu.c     | 18 ++++++--
  2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
index 09225b770bb8..5f64e8583eec 100644
--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
@@ -9,6 +9,7 @@
  #include <linux/component.h>
  #include <linux/clk.h>
  #include <linux/dma-mapping.h>
+#include <linux/io.h>
  #include <linux/module.h>
  #include <linux/mutex.h>
  #include <linux/of_device.h>
@@ -23,6 +24,7 @@
  #include <drm/drm_color_mgmt.h>
  #include <drm/drm_crtc.h>
  #include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
  #include <drm/drm_drv.h>
  #include <drm/drm_encoder.h>
  #include <drm/drm_gem_cma_helper.h>
@@ -57,6 +59,7 @@ struct ingenic_dma_hwdescs {
  struct jz_soc_info {
  	bool needs_dev_clk;
  	bool has_osd;
+	bool map_noncoherent;
  	unsigned int max_width, max_height;
  	const u32 *formats_f0, *formats_f1;
  	unsigned int num_formats_f0, num_formats_f1;
@@ -410,6 +413,8 @@ static int ingenic_drm_plane_atomic_check(struct drm_plane *plane,
  	     old_plane_state->fb->format->format != new_plane_state->fb->format->format))
  		crtc_state->mode_changed = true;
+ drm_atomic_helper_check_plane_damage(state, new_plane_state);
+
  	return 0;
  }
@@ -544,8 +549,8 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane,
  					    struct drm_atomic_state *state)
  {
  	struct ingenic_drm *priv = drm_device_get_priv(plane->dev);
-	struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state,
-									  plane);
+	struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, plane);
+	struct drm_plane_state *oldstate = drm_atomic_get_old_plane_state(state, plane);
  	struct drm_crtc_state *crtc_state;
  	struct ingenic_dma_hwdesc *hwdesc;
  	unsigned int width, height, cpp, offset;
@@ -553,6 +558,8 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane,
  	u32 fourcc;
if (newstate && newstate->fb) {
+		drm_gem_cma_sync_data(&priv->drm, oldstate, newstate);
+
  		crtc_state = newstate->crtc->state;
addr = drm_fb_cma_get_gem_addr(newstate->fb, newstate, 0);
@@ -742,6 +749,43 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc)
  	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, 0);
  }
+static int ingenic_drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb,
+					     struct drm_file *file_priv,
+					     unsigned int flags,
+					     unsigned int color,
+					     struct drm_clip_rect *clips,
+					     unsigned int num_clips)
+{
+	struct ingenic_drm *priv = drm_device_get_priv(fb->dev);
+
+	if (!priv->soc_info->map_noncoherent)
+		return 0;

I'm not sure you can get away without calling drm_atomic_helper_dirtyfb(). The function does some things with the plane's damage-clips property. If you don't call it here, the plane might pile up unhandled clipping areas. It's better to call it and rely on the test in drm_gem_cma_sync_data(). See below on how to optimize this.

+
+	return drm_atomic_helper_dirtyfb(fb, file_priv, flags,
+					 color, clips, num_clips);
+}
+
+static const struct drm_framebuffer_funcs ingenic_drm_gem_fb_funcs =
{
+	.destroy	= drm_gem_fb_destroy,
+	.create_handle	= drm_gem_fb_create_handle,
+	.dirty          = ingenic_drm_atomic_helper_dirtyfb,
+};

You don't seem to be using this anywhere. You have to implement a custom fb_create for drm_mode_config_funcs. [1]

BUT: I think the overall approach should be to only use this on SoCs with non-coherency setting. Use drm_gem_fb_create() on systems without non-coherency and use drm_gem_fb_create_with_dirty() on systems with non-coherency (i.e., have two instances of drm_mode_config_funcs). Only call drm_plane_enable_fb_damage_clips() on systems with non-coherency.

+
+static struct drm_gem_object *
+ingenic_drm_gem_create_object(struct drm_device *drm, size_t size)
+{
+	struct ingenic_drm *priv = drm_device_get_priv(drm);
+	struct drm_gem_cma_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	obj->map_noncoherent = priv->soc_info->map_noncoherent;
+
+	return &obj->base;
+}
+
  DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops);
static const struct drm_driver ingenic_drm_driver_data = {
@@ -754,6 +798,7 @@ static const struct drm_driver ingenic_drm_driver_data = {
  	.patchlevel		= 0,
.fops = &ingenic_drm_fops,
+	.gem_create_object	= ingenic_drm_gem_create_object,
  	DRM_GEM_CMA_DRIVER_OPS,
.irq_handler = ingenic_drm_irq_handler,
@@ -961,6 +1006,8 @@ static int ingenic_drm_bind(struct device *dev, bool has_components)
  		return ret;
  	}
+ drm_plane_enable_fb_damage_clips(&priv->f1);
+
  	drm_crtc_helper_add(&priv->crtc, &ingenic_drm_crtc_helper_funcs);
ret = drm_crtc_init_with_planes(drm, &priv->crtc, primary,
@@ -989,6 +1036,8 @@ static int ingenic_drm_bind(struct device *dev, bool has_components)
  			return ret;
  		}
+ drm_plane_enable_fb_damage_clips(&priv->f0);
+
  		if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && has_components) {
  			ret = component_bind_all(dev, drm);
  			if (ret) {
@@ -1245,6 +1294,7 @@ static const u32 jz4770_formats_f0[] = {
  static const struct jz_soc_info jz4740_soc_info = {
  	.needs_dev_clk = true,
  	.has_osd = false,
+	.map_noncoherent = false,
  	.max_width = 800,
  	.max_height = 600,
  	.formats_f1 = jz4740_formats,
@@ -1255,6 +1305,7 @@ static const struct jz_soc_info jz4740_soc_info =
{
  static const struct jz_soc_info jz4725b_soc_info = {
  	.needs_dev_clk = false,
  	.has_osd = true,
+	.map_noncoherent = false,
  	.max_width = 800,
  	.max_height = 600,
  	.formats_f1 = jz4725b_formats_f1,
@@ -1266,6 +1317,7 @@ static const struct jz_soc_info jz4725b_soc_info = {
  static const struct jz_soc_info jz4770_soc_info = {
  	.needs_dev_clk = false,
  	.has_osd = true,
+	.map_noncoherent = true,
  	.max_width = 1280,
  	.max_height = 720,
  	.formats_f1 = jz4770_formats_f1,
diff --git a/drivers/gpu/drm/ingenic/ingenic-ipu.c b/drivers/gpu/drm/ingenic/ingenic-ipu.c
index 3b1091e7c0cd..a4d1b500c3ad 100644
--- a/drivers/gpu/drm/ingenic/ingenic-ipu.c
+++ b/drivers/gpu/drm/ingenic/ingenic-ipu.c
@@ -20,10 +20,13 @@
#include <drm/drm_atomic.h>
  #include <drm/drm_atomic_helper.h>
+#include <drm/drm_damage_helper.h>
  #include <drm/drm_drv.h>
  #include <drm/drm_fb_cma_helper.h>
  #include <drm/drm_fourcc.h>
  #include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
  #include <drm/drm_plane.h>
  #include <drm/drm_plane_helper.h>
  #include <drm/drm_property.h>
@@ -285,8 +288,8 @@ static void ingenic_ipu_plane_atomic_update(struct drm_plane *plane,
  					    struct drm_atomic_state *state)
  {
  	struct ingenic_ipu *ipu = plane_to_ingenic_ipu(plane);
-	struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state,
-									  plane);
+	struct drm_plane_state *newstate = drm_atomic_get_new_plane_state(state, plane);
+	struct drm_plane_state *oldstate = drm_atomic_get_new_plane_state(state, plane);

get_old_state ?

  	const struct drm_format_info *finfo;
  	u32 ctrl, stride = 0, coef_index = 0, format = 0;
  	bool needs_modeset, upscaling_w, upscaling_h;
@@ -317,6 +320,8 @@ static void ingenic_ipu_plane_atomic_update(struct drm_plane *plane,
  				JZ_IPU_CTRL_CHIP_EN | JZ_IPU_CTRL_LCDC_SEL);
  	}
+ drm_gem_cma_sync_data(ipu->drm, oldstate, newstate);
+

If you want to optimize, maybe put this line behind

  if (priv->soc_info->map_noncoherent)

  	/* New addresses will be committed in vblank handler... */
  	ipu->addr_y = drm_fb_cma_get_gem_addr(newstate->fb, newstate, 0);
  	if (finfo->num_planes > 1)
@@ -541,7 +546,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane,
if (!new_plane_state->crtc ||
  	    !crtc_state->mode.hdisplay || !crtc_state->mode.vdisplay)
-		return 0;
+		goto out_check_damage;
/* Plane must be fully visible */
  	if (new_plane_state->crtc_x < 0 || new_plane_state->crtc_y < 0 ||
@@ -558,7 +563,7 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane,
  		return -EINVAL;
if (!osd_changed(new_plane_state, old_plane_state))
-		return 0;
+		goto out_check_damage;
crtc_state->mode_changed = true; @@ -592,6 +597,9 @@ static int ingenic_ipu_plane_atomic_check(struct drm_plane *plane,
  	ipu->denom_w = denom_w;
  	ipu->denom_h = denom_h;
+out_check_damage:
+	drm_atomic_helper_check_plane_damage(state, new_plane_state);
+

If you implement my suggestion above, this line could also be behind

  if (priv->soc_info->map_noncoherent)

Best regards

Thomas



[1] https://elixir.bootlin.com/linux/v5.13-rc1/source/drivers/gpu/drm/ingenic/ingenic-drm-drv.c#L808



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer

Attachment: OpenPGP_signature
Description: OpenPGP digital signature


[Index of Archives]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux