Re: [Nouveau] [PATCH v3 2/2] fb/nvaa: Enable non-isometric poller on NVAA/NVAC

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 10, 2014 at 5:53 PM, Pierre Moreau <pierre.morrow@xxxxxxx> wrote:
> (This is a v3 of patch "drm/nouveau/fb/nv50: Add PFB writes")
>
> This fix a GPU lockup on 9400M (NVAC) when using acceleration, see
> https://bugs.freedesktop.org/show_bug.cgi?id=27501
>
> v2:
> - Move code to subdev/fb/nv50.c as suggested by Roy Spliet;
> - Remove arbitrary writes to 100c18/100c24
> - Replace write to 100c1c of arbitrary value by the address of a scratch page
>   as proposed by Ilia Mirkin;
> - Remove enabling of bits 16 and 0 as they don't yield in any changes.
>
> v3:
> - Move code to subdev/fb/nvaa.c as suggested by Ilia Mirkin.
> The following changes were made thanks to information provided by Robert Morell
> from NVidia:
> - Allocate a dma page for use by the pollers;
> - Re-enable pollers at bits 16 and 0;
> - Set pollers address to a proper value.
Hey Pierre,

This patch is incorrect.  As Robert pointed out in an older thread,
the registers don't take the physical address of a page, but a
somewhat odd "negative offset from the end of carveout".

See this example (referring to Robert's last email):

220.926392    read32 #3 +0x00100e10 -> 0x00070000
220.926406    read32 #3 +0x00100e14 -> 0x00010000

carveout_base = 0x70000000
carveout_size = 0x10000000

-- snip --

223.300495    read32 #3 +0x00100c14 -> 0x00000000
223.300521    read32 #3 +0x00100c18 -> 0x00000000
223.300547   write32 #3 +0x00100c18 <- 0x000027ff

base = (0x70000000 + 0x10000000) - ((0x27ff + 1) << 5)
base = 0x7ffb0000 << in carveout

223.300573    read32 #3 +0x00100c14 -> 0x00000000
223.300599   write32 #3 +0x00100c14 <- 0x00000001
223.300625    read32 #3 +0x00100c1c -> 0x00000002
223.300651   write32 #3 +0x00100c1c <- 0x000027fe

base = (0x70000000 + 0x10000000) - ((0x27fe + 1) << 5)
base = 0x7ffb0020 << in carveout

223.300677    read32 #3 +0x00100c14 -> 0x00000001
223.300702   write32 #3 +0x00100c14 <- 0x00000003
223.300728    read32 #3 +0x00100c24 -> 0x00000004
223.300754   write32 #3 +0x00100c24 <- 0x000027fd

base = (0x70000000 + 0x10000000) - ((0x27ff + 1) << 5)
base = 0x7ffb0040 << in carveout

223.300780    read32 #3 +0x00100c14 -> 0x00000003
223.300806   write32 #3 +0x00100c14 <- 0x00010003

It's a bit tricky at that point in the nouveau init process to
allocate device memory, but, we already reserve 1MiB of memory at the
end of "VRAM" (carveout / stolen memory) so we can make use of that.

I've attached 3 patches.  The first two of them, I'll probably merge as-is.

The third patch is an example of how this should probably be done, in
case you want to continue tackling this some more :)

Cheers,
Ben.

>
> Signed-off-by: Pierre Moreau <pierre.morrow@xxxxxxx>
> ---
>  drm/core/subdev/fb/nvaa.h |  1 +
>  nvkm/subdev/fb/nv50.h     |  4 +++
>  nvkm/subdev/fb/nvaa.c     | 74 ++++++++++++++++++++++++++++++++++++++++++++---
>  nvkm/subdev/fb/nvaa.h     | 19 ++++++++++++
>  4 files changed, 94 insertions(+), 4 deletions(-)
>  create mode 120000 drm/core/subdev/fb/nvaa.h
>  create mode 100644 nvkm/subdev/fb/nvaa.h
>
> diff --git a/drm/core/subdev/fb/nvaa.h b/drm/core/subdev/fb/nvaa.h
> new file mode 120000
> index 0000000..b450e8c
> --- /dev/null
> +++ b/drm/core/subdev/fb/nvaa.h
> @@ -0,0 +1 @@
> +../../../../nvkm/subdev/fb/nvaa.h
> \ No newline at end of file
> diff --git a/nvkm/subdev/fb/nv50.h b/nvkm/subdev/fb/nv50.h
> index c5e5a88..0b20975 100644
> --- a/nvkm/subdev/fb/nv50.h
> +++ b/nvkm/subdev/fb/nv50.h
> @@ -9,6 +9,10 @@ struct nv50_fb_priv {
>         dma_addr_t r100c08;
>  };
>
> +#define nv50_fb_create(p,e,c,d,o)                                              \
> +       nv50_fb_ctor((p), (e), (c), (d), sizeof(**o),                          \
> +                       (struct nouveau_object **)o)
> +
>  int  nv50_fb_ctor(struct nouveau_object *, struct nouveau_object *,
>                   struct nouveau_oclass *, void *, u32,
>                   struct nouveau_object **);
> diff --git a/nvkm/subdev/fb/nvaa.c b/nvkm/subdev/fb/nvaa.c
> index cba8e68..b70ab2f 100644
> --- a/nvkm/subdev/fb/nvaa.c
> +++ b/nvkm/subdev/fb/nvaa.c
> @@ -22,15 +22,81 @@
>   * Authors: Ben Skeggs
>   */
>
> -#include "nv50.h"
> +#include "nvaa.h"
> +
> +int
> +nvaa_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
> +            struct nouveau_oclass *oclass, void *data, u32 size,
> +            struct nouveau_object **pobject)
> +{
> +       struct nouveau_device *device = nv_device(parent);
> +       struct nvaa_fb_priv *priv;
> +       int ret;
> +
> +       ret = nv50_fb_create(parent, engine, oclass, data, &priv);
> +       *pobject = nv_object(priv);
> +       if (ret)
> +               return ret;
> +
> +       priv = (struct nvaa_fb_priv *)(*pobject);
> +
> +       priv->r100c18_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (priv->r100c18_page) {
> +               priv->r100c18 = dma_map_page(nv_device_base(device),
> +                                            priv->r100c18_page, 0, PAGE_SIZE,
> +                                            DMA_BIDIRECTIONAL);
> +               if (dma_mapping_error(nv_device_base(device), priv->r100c18))
> +                       return -EFAULT;
> +       } else {
> +               nv_warn(priv, "failed 0x100c18 page alloc\n");
> +       }
> +       return 0;
> +}
> +
> +void
> +nvaa_fb_dtor(struct nouveau_object *object)
> +{
> +       struct nouveau_device *device = nv_device(object);
> +       struct nvaa_fb_priv *priv = (void *)object;
> +
> +       if (priv->r100c18_page) {
> +               dma_unmap_page(nv_device_base(device), priv->r100c18, PAGE_SIZE,
> +                              DMA_BIDIRECTIONAL);
> +               __free_page(priv->r100c18_page);
> +       }
> +
> +       nv50_fb_dtor(object);
> +}
> +
> +int
> +nvaa_fb_init(struct nouveau_object *object)
> +{
> +       struct nvaa_fb_priv *priv = (void *)object;
> +       int ret;
> +
> +       ret = nv50_fb_init(object);
> +       if (ret)
> +               return ret;
> +
> +       /* Enable NISO poller for various clients and set their associated
> +        * read address, only for MCP77/78 and MCP79/7A. (fd#25701)
> +        */
> +       nv_wr32(priv, 0x100c18, priv->r100c18 >> 8);
> +       nv_mask(priv, 0x100c14, 0x00000000, 0x00000001);
> +       nv_wr32(priv, 0x100c1c, (priv->r100c18 >> 8) + 1);
> +       nv_mask(priv, 0x100c14, 0x00000000, 0x00000002);
> +       nv_wr32(priv, 0x100c24, (priv->r100c18 >> 8) + 2);
> +       nv_mask(priv, 0x100c14, 0x00000000, 0x00010000);
> +       return 0;
> +}
>
>  struct nouveau_oclass *
>  nvaa_fb_oclass = &(struct nv50_fb_impl) {
>         .base.base.handle = NV_SUBDEV(FB, 0xaa),
>         .base.base.ofuncs = &(struct nouveau_ofuncs) {
> -               .ctor = nv50_fb_ctor,
> -               .dtor = nv50_fb_dtor,
> -               .init = nv50_fb_init,
> +               .ctor = nvaa_fb_ctor,
> +               .dtor = nvaa_fb_dtor,
> +               .init = nvaa_fb_init,
>                 .fini = _nouveau_fb_fini,
>         },
>         .base.memtype = nv50_fb_memtype_valid,
> diff --git a/nvkm/subdev/fb/nvaa.h b/nvkm/subdev/fb/nvaa.h
> new file mode 100644
> index 0000000..84e1eca
> --- /dev/null
> +++ b/nvkm/subdev/fb/nvaa.h
> @@ -0,0 +1,19 @@
> +#ifndef __NVKM_FB_NVAA_H__
> +#define __NVKM_FB_NVAA_H__
> +
> +#include "nv50.h"
> +
> +struct nvaa_fb_priv {
> +       struct nv50_fb_priv base;
> +       struct page *r100c18_page;
> +       dma_addr_t r100c18;
> +};
> +
> +int  nvaa_fb_ctor(struct nouveau_object *, struct nouveau_object *,
> +                 struct nouveau_oclass *, void *, u32,
> +                 struct nouveau_object **);
> +void nvaa_fb_dtor(struct nouveau_object *);
> +int  nvaa_fb_init(struct nouveau_object *);
> +
> +
> +#endif
> --
> 2.1.3
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/nouveau
From 12b3ad50288ceaba3feae5a11772014a3a334953 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@xxxxxxxxxx>
Date: Thu, 11 Dec 2014 13:19:31 +1000
Subject: [PATCH 1/3] fb/ram/mcp77: subclass nouveau_ram

Signed-off-by: Ben Skeggs <bskeggs@xxxxxxxxxx>
---
 nvkm/subdev/fb/ramnvaa.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/nvkm/subdev/fb/ramnvaa.c b/nvkm/subdev/fb/ramnvaa.c
index 00f2ca7..8ee3d37 100644
--- a/nvkm/subdev/fb/ramnvaa.c
+++ b/nvkm/subdev/fb/ramnvaa.c
@@ -24,6 +24,10 @@
 
 #include "nv50.h"
 
+struct nvaa_ram_priv {
+	struct nouveau_ram base;
+};
+
 static int
 nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 datasize,
@@ -32,26 +36,26 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
 	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
 	struct nouveau_fb *pfb = nouveau_fb(parent);
-	struct nouveau_ram *ram;
+	struct nvaa_ram_priv *priv;
 	int ret;
 
-	ret = nouveau_ram_create(parent, engine, oclass, &ram);
-	*pobject = nv_object(ram);
+	ret = nouveau_ram_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	ram->size = nv_rd32(pfb, 0x10020c);
-	ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32);
+	priv->base.size = nv_rd32(pfb, 0x10020c);
+	priv->base.size = (priv->base.size & 0xffffff00) | ((priv->base.size & 0x000000ff) << 32);
 
-	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (ram->size >> 12) -
+	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) -
 			      (rsvd_head + rsvd_tail), 1);
 	if (ret)
 		return ret;
 
-	ram->type   = NV_MEM_TYPE_STOLEN;
-	ram->stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
-	ram->get = nv50_ram_get;
-	ram->put = nv50_ram_put;
+	priv->base.type   = NV_MEM_TYPE_STOLEN;
+	priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
+	priv->base.get = nv50_ram_get;
+	priv->base.put = nv50_ram_put;
 	return 0;
 }
 
-- 
2.2.0

From 85ca5098d65f31362024bfd96c0b04b571d10cfb Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@xxxxxxxxxx>
Date: Thu, 11 Dec 2014 13:21:24 +1000
Subject: [PATCH 2/3] fb/ram/mcp77: use carveout reg to determine size

Signed-off-by: Ben Skeggs <bskeggs@xxxxxxxxxx>
---
 nvkm/subdev/fb/ramnvaa.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/nvkm/subdev/fb/ramnvaa.c b/nvkm/subdev/fb/ramnvaa.c
index 8ee3d37..f56ee55 100644
--- a/nvkm/subdev/fb/ramnvaa.c
+++ b/nvkm/subdev/fb/ramnvaa.c
@@ -44,16 +44,15 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	priv->base.size = nv_rd32(pfb, 0x10020c);
-	priv->base.size = (priv->base.size & 0xffffff00) | ((priv->base.size & 0x000000ff) << 32);
+	priv->base.type   = NV_MEM_TYPE_STOLEN;
+	priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
+	priv->base.size   = (u64)nv_rd32(pfb, 0x100e14) << 12;
 
 	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) -
 			      (rsvd_head + rsvd_tail), 1);
 	if (ret)
 		return ret;
 
-	priv->base.type   = NV_MEM_TYPE_STOLEN;
-	priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
 	priv->base.get = nv50_ram_get;
 	priv->base.put = nv50_ram_put;
 	return 0;
-- 
2.2.0

From 3b2a91eb59d308bed98637d4bcdfe29a2c9434ad Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@xxxxxxxxxx>
Date: Thu, 11 Dec 2014 13:39:18 +1000
Subject: [PATCH 3/3] EXAMPLEfb/ram/mcp77: niso poller setup

---
 nvkm/subdev/fb/ramnvaa.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/nvkm/subdev/fb/ramnvaa.c b/nvkm/subdev/fb/ramnvaa.c
index f56ee55..9cac747 100644
--- a/nvkm/subdev/fb/ramnvaa.c
+++ b/nvkm/subdev/fb/ramnvaa.c
@@ -26,6 +26,7 @@
 
 struct nvaa_ram_priv {
 	struct nouveau_ram base;
+	u64 poller_base;
 };
 
 static int
@@ -33,8 +34,8 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	      struct nouveau_oclass *oclass, void *data, u32 datasize,
 	      struct nouveau_object **pobject)
 {
-	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
-	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
+	u32 rsvd_head = ( 256 * 1024); /* vga memory */
+	u32 rsvd_tail = (1024 * 1024); /* vbios etc */
 	struct nouveau_fb *pfb = nouveau_fb(parent);
 	struct nvaa_ram_priv *priv;
 	int ret;
@@ -47,9 +48,16 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	priv->base.type   = NV_MEM_TYPE_STOLEN;
 	priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12;
 	priv->base.size   = (u64)nv_rd32(pfb, 0x100e14) << 12;
+	// 0x10000000
 
-	ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) -
-			      (rsvd_head + rsvd_tail), 1);
+	rsvd_tail += 0x1000;
+	priv->poller_base = priv->base.size - rsvd_tail;
+	// 0x10000000 - 0x101000
+	// 0x0feff000 << offset from start of carveout
+
+	ret = nouveau_mm_init(&pfb->vram, rsvd_head >> 12,
+			      (priv->base.size >> 12) -
+			      (rsvd_head + rsvd_tail) >> 12, 1);
 	if (ret)
 		return ret;
 
@@ -58,6 +66,26 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	return 0;
 }
 
+static int
+nvaa_ram_init(struct nouveau_object *object)
+{
+	struct nvaa_ram_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_ram_init(&priv->base);
+	if (ret)
+		return ret;
+
+	dniso  = ((priv->base.size - (priv->poller_base + 0x00)) >> 5) - 1;
+	// 0x807f
+	hostnb = ((priv->base.size - (priv->poller_base + 0x20)) >> 5) - 1;
+	// 0x807e
+	flush  = ((priv->base.size - (priv->poller_base + 0x40)) >> 5) - 1;
+	// 0x807d
+
+	return 0;
+}
+
 struct nouveau_oclass
 nvaa_ram_oclass = {
 	.ofuncs = &(struct nouveau_ofuncs) {
-- 
2.2.0

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel

[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux