Re: [PATCH] RFC: drm/nouveau: Make BAR1 support optional

Thierry Reding <thierry.reding@xxxxxxxxx> · Fri, 8 Nov 2019 19:07:40 +0100

On Fri, Nov 08, 2019 at 05:02:07PM +0100, Thierry Reding wrote:
> From: Thierry Reding <treding@xxxxxxxxxx>
> 
> The purpose of BAR1 is primarily to make memory accesses coherent.
> However, some GPUs do not have BAR1 functionality. For example, the
> GV11B found on the Xavier SoC is DMA coherent and therefore doesn't
> need BAR1.
> 
> Implement a variant of FIFO channels that work without a mapping of
> instance memory through BAR1.
> 
> XXX ensure memory barriers are in place for writes
> 
> Signed-off-by: Thierry Reding <treding@xxxxxxxxxx>
> ---
> Hi Ben,
> 
> I'm sending this a bit out of context (it's part of the larger series to
> enable basic GV11B support) because I wanted to get some early feedback
> from you on this.
> 
> For a bit of background: GV11B as it turns out doesn't really have BAR1
> anymore. The SoC has a coherency fabric which means that basically the
> GPU's system memory accesses are already coherent and hence we no longer
> need to go via BAR1 to ensure that. Functionally the same can be done by
> just writing to the memory via the CPU's virtual mapping.
> 
> So this patch implement basically a second variant of the FIFO channel
> which, instead of taking a physical address and then ioremapping that,
> takes a struct nvkm_memory object. This seems to work, though since I'm
> not really doing much yet (firmware loading fails, etc.) I wouldn't call
> this "done" just yet.
> 
> In fact there are a few things that I'm not happy about. For example I
> think we'll eventually need to have barriers to ensure that the CPU
> write buffers are flushed, etc. It also seems like most users of the
> FIFO channel object will just go and map its buffer once and then only
> access it via the virtual mapping only, without going through the
> ->rd32()/->wr32() callbacks nor unmapping via ->unmap(). That means we
> effectively don't have a good point where we could emit the memory
> barriers.
> 
> I see two possibilities here: 1) make all accesses go through the
> accessors or 2) guard each series of accesses with a pair of nvkm_map()
> and nvkm_done() calls. Both of those would mean that all code paths need
> to be carefully audited.

Actually it looks like this is already working if I return 0 as the
address from the ->unmap() callback. That seems to result in the
->wr32() and ->rd32() callbacks getting called instead of the callers
trying to directly dereference the address, which obviously they now
can't.

So this seems like it could give me exactly what I need to make this
work. Again, this seems to get me past probe, but I see only a single
write at this point, so that's not saying much.

Thierry

> 
> One other thing I'm wondering is if it's okay to put all of this into
> the gk104_fifo implementation. I think the result of parameterizing on
> device->bar is pretty neat. Also, it seems like most of the rest of the
> code would have to be duplicated, or a lot of the gk104_*() function
> exported to a new implementation. So I'm not sure that it's really worth
> it.
> 
> Thierry
> 
>  .../drm/nouveau/include/nvkm/engine/fifo.h    |   7 +-
>  .../gpu/drm/nouveau/nvkm/engine/fifo/chan.c   | 157 ++++++++++++++++--
>  .../gpu/drm/nouveau/nvkm/engine/fifo/chan.h   |   6 +
>  .../gpu/drm/nouveau/nvkm/engine/fifo/gk104.c  |  29 +++-
>  4 files changed, 180 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> index 4bd6e1e7c413..c0fb545efb2b 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> @@ -25,7 +25,12 @@ struct nvkm_fifo_chan {
>  	struct nvkm_gpuobj *inst;
>  	struct nvkm_gpuobj *push;
>  	struct nvkm_vmm *vmm;
> -	void __iomem *user;
> +
> +	union {
> +		struct nvkm_memory *mem;
> +		void __iomem *user;
> +	};
> +
>  	u64 addr;
>  	u32 size;
>  
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> index d83485385934..f47bc96bbb6d 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> @@ -310,7 +310,7 @@ nvkm_fifo_chan_init(struct nvkm_object *object)
>  }
>  
>  static void *
> -nvkm_fifo_chan_dtor(struct nvkm_object *object)
> +__nvkm_fifo_chan_dtor(struct nvkm_object *object)
>  {
>  	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
>  	struct nvkm_fifo *fifo = chan->fifo;
> @@ -324,9 +324,6 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
>  	}
>  	spin_unlock_irqrestore(&fifo->lock, flags);
>  
> -	if (chan->user)
> -		iounmap(chan->user);
> -
>  	if (chan->vmm) {
>  		nvkm_vmm_part(chan->vmm, chan->inst->memory);
>  		nvkm_vmm_unref(&chan->vmm);
> @@ -337,6 +334,17 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
>  	return data;
>  }
>  
> +static void *
> +nvkm_fifo_chan_dtor(struct nvkm_object *object)
> +{
> +	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +	if (chan->user)
> +		iounmap(chan->user);
> +
> +	return __nvkm_fifo_chan_dtor(object);
> +}
> +
>  static const struct nvkm_object_func
>  nvkm_fifo_chan_func = {
>  	.dtor = nvkm_fifo_chan_dtor,
> @@ -349,12 +357,98 @@ nvkm_fifo_chan_func = {
>  	.sclass = nvkm_fifo_chan_child_get,
>  };
>  
> +static void *
> +nvkm_fifo_chan_mem_dtor(struct nvkm_object *object)
> +{
> +	return __nvkm_fifo_chan_dtor(object);
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_map(struct nvkm_object *object, void *argv, u32 argc,
> +		       enum nvkm_object_map *type, u64 *addr, u64 *size)
> +{
> +	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +	pr_info("> %s(object=%px, argv=%px, argc=%u, type=%px, addr=%px, size=%px)\n", __func__, object, argv, argc, type, addr, size);
> +
> +	*type = NVKM_OBJECT_MAP_VA;
> +	*addr = (u64)nvkm_kmap(chan->mem);
> +	*size = chan->size;
> +
> +	pr_info("  type: %d\n", *type);
> +	pr_info("  addr: %016llx\n", *addr);
> +	pr_info("  size: %016llx\n", *size);
> +	pr_info("< %s()\n", __func__);
> +	return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_unmap(struct nvkm_object *object)
> +{
> +	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +	pr_info("> %s(object=%px)\n", __func__, object);
> +
> +	nvkm_done(chan->mem);
> +
> +	pr_info("< %s()\n", __func__);
> +	return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_rd32(struct nvkm_object *object, u64 addr, u32 *data)
> +{
> +	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +	pr_info("> %s(object=%px, addr=%016llx, data=%px)\n", __func__, object, addr, data);
> +
> +	if (unlikely(addr + 4 > chan->size))
> +		return -EINVAL;
> +
> +	*data = nvkm_ro32(chan->mem, addr);
> +
> +	pr_info("  data: %08x\n", *data);
> +	pr_info("< %s()\n", __func__);
> +	return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_wr32(struct nvkm_object *object, u64 addr, u32 data)
> +{
> +	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +	pr_info("> %s(object=%px, addr=%016llx, data=%08x)\n", __func__, object, addr, data);
> +
> +	if (unlikely(addr + 4 > chan->size))
> +		return -EINVAL;
> +
> +	nvkm_wo32(chan->mem, addr, data);
> +
> +	/* XXX add barrier */
> +
> +	pr_info("< %s()\n", __func__);
> +	return 0;
> +}
> +
> +static const struct nvkm_object_func
> +nvkm_fifo_chan_mem_func = {
> +	.dtor = nvkm_fifo_chan_mem_dtor,
> +	.init = nvkm_fifo_chan_init,
> +	.fini = nvkm_fifo_chan_fini,
> +	.ntfy = nvkm_fifo_chan_ntfy,
> +	.map = nvkm_fifo_chan_mem_map,
> +	.unmap = nvkm_fifo_chan_mem_unmap,
> +	.rd32 = nvkm_fifo_chan_mem_rd32,
> +	.wr32 = nvkm_fifo_chan_mem_wr32,
> +	.sclass = nvkm_fifo_chan_child_get,
> +};
> +
>  int
> -nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> -		    struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> -		    u64 hvmm, u64 push, u64 engines, int bar, u32 base,
> -		    u32 user, const struct nvkm_oclass *oclass,
> -		    struct nvkm_fifo_chan *chan)
> +__nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> +		      struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> +		      u64 hvmm, u64 push, u64 engines,
> +		      const struct nvkm_oclass *oclass,
> +		      struct nvkm_fifo_chan *chan)
>  {
>  	struct nvkm_client *client = oclass->client;
>  	struct nvkm_device *device = fifo->engine.subdev.device;
> @@ -362,7 +456,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
>  	unsigned long flags;
>  	int ret;
>  
> -	nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
>  	chan->func = func;
>  	chan->fifo = fifo;
>  	chan->engines = engines;
> @@ -412,6 +505,26 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
>  	__set_bit(chan->chid, fifo->mask);
>  	spin_unlock_irqrestore(&fifo->lock, flags);
>  
> +	return 0;
> +}
> +
> +int
> +nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> +		    struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> +		    u64 hvmm, u64 push, u64 engines, int bar, u32 base,
> +		    u32 user, const struct nvkm_oclass *oclass,
> +		    struct nvkm_fifo_chan *chan)
> +{
> +	struct nvkm_device *device = fifo->engine.subdev.device;
> +	int ret;
> +
> +	nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
> +
> +	ret = __nvkm_fifo_chan_ctor(func, fifo, size, align, zero, hvmm, push,
> +				    engines, oclass, chan);
> +	if (ret)
> +		return ret;
> +
>  	/* determine address of this channel's user registers */
>  	chan->addr = device->func->resource_addr(device, bar) +
>  		     base + user * chan->chid;
> @@ -420,3 +533,27 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
>  	nvkm_fifo_cevent(fifo);
>  	return 0;
>  }
> +
> +int nvkm_fifo_chan_mem_ctor(const struct nvkm_fifo_chan_func *func,
> +			    struct nvkm_fifo *fifo, u32 size, u32 align,
> +			    bool zero, u64 hvmm, u64 push, u64 engines,
> +			    struct nvkm_memory *mem, u32 user,
> +			    const struct nvkm_oclass *oclass,
> +			    struct nvkm_fifo_chan *chan)
> +{
> +	int ret;
> +
> +	nvkm_object_ctor(&nvkm_fifo_chan_mem_func, oclass, &chan->object);
> +
> +	ret = __nvkm_fifo_chan_ctor(func, fifo, size, align, zero, hvmm, push,
> +				    engines, oclass, chan);
> +	if (ret)
> +		return ret;
> +
> +	chan->mem = mem;
> +	chan->addr = user * chan->chid;
> +	chan->size = user;
> +
> +	nvkm_fifo_cevent(fifo);
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> index 177e10562600..71f32b1ebba0 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> @@ -24,6 +24,12 @@ int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *, struct nvkm_fifo *,
>  			u32 size, u32 align, bool zero, u64 vm, u64 push,
>  			u64 engines, int bar, u32 base, u32 user,
>  			const struct nvkm_oclass *, struct nvkm_fifo_chan *);
> +int nvkm_fifo_chan_mem_ctor(const struct nvkm_fifo_chan_func *,
> +			    struct nvkm_fifo *, u32 size, u32 align,
> +			    bool zero, u64 vm, u64 push, u64 engines,
> +			    struct nvkm_memory *mem, u32 user,
> +			    const struct nvkm_oclass *,
> +			    struct nvkm_fifo_chan *);
>  
>  struct nvkm_fifo_chan_oclass {
>  	int (*ctor)(struct nvkm_fifo *, const struct nvkm_oclass *,
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> index 81cbe1cc4804..5404a182eb0a 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> @@ -906,7 +906,6 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
>  	struct gk104_fifo *fifo = gk104_fifo(base);
>  	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
>  	struct nvkm_device *device = subdev->device;
> -	struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
>  	int engn, runl, pbid, ret, i, j;
>  	enum nvkm_devidx engidx;
>  	u32 *map;
> @@ -967,12 +966,19 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
>  	if (ret)
>  		return ret;
>  
> -	ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
> -			   &fifo->user.bar);
> -	if (ret)
> -		return ret;
> +	if (device->bar) {
> +		struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
> +
> +		ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
> +				   &fifo->user.bar);
> +		if (ret)
> +			return ret;
> +
> +		return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar,
> +				       NULL, 0);
> +	}
>  
> -	return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
> +	return 0;
>  }
>  
>  static void
> @@ -998,7 +1004,12 @@ gk104_fifo_init(struct nvkm_fifo *base)
>  		nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */
>  	}
>  
> -	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
> +	/* obsolete on GV100 and later */
> +	if (fifo->user.bar) {
> +		u32 value = 0x10000000 | fifo->user.bar->addr >> 12;
> +
> +		nvkm_wr32(device, 0x002254, value);
> +	}
>  
>  	if (fifo->func->pbdma->init_timeout)
>  		fifo->func->pbdma->init_timeout(fifo);
> @@ -1014,7 +1025,9 @@ gk104_fifo_dtor(struct nvkm_fifo *base)
>  	struct nvkm_device *device = fifo->base.engine.subdev.device;
>  	int i;
>  
> -	nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
> +	if (fifo->user.bar)
> +		nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
> +
>  	nvkm_memory_unref(&fifo->user.mem);
>  
>  	for (i = 0; i < fifo->runlist_nr; i++) {
> -- 
> 2.23.0
> 
Attachment:
signature.asc

Description: PGP signature
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel