On Tue, Nov 20, 2018 at 05:07:31PM +0530, Sharat Masetty wrote: > When the userspace tries to read the crashstate dump, the read side > implementation in the driver currently ascii85 encodes all the binary > buffers and it does this each time the read system call is called. > A userspace tool like cat typically does a page by page read and the > number of read calls depends on the size of the data captured by the > driver. This is certainly not desirable and does not scale well with > large captures. > > This patch encodes the buffer only once in the read path. With this there > is an immediate >10X speed improvement in crashstate save time. > > Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> Reviewed-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> I like this a lot. > --- > Changes from v1: > Addressed comments from Jordan Crouse > > drivers/gpu/drm/msm/adreno/adreno_gpu.c | 80 ++++++++++++++++++++++++--------- > drivers/gpu/drm/msm/msm_gpu.h | 1 + > 2 files changed, 60 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > index bbf8d3e..7749967 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > @@ -441,11 +441,15 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state) > { > int i; > > - for (i = 0; i < ARRAY_SIZE(state->ring); i++) > + for (i = 0; i < ARRAY_SIZE(state->ring); i++) { > kvfree(state->ring[i].bo.data); > + kvfree(state->ring[i].bo.encoded); > + } > > - for (i = 0; state->bos && i < state->nr_bos; i++) > + for (i = 0; state->bos && i < state->nr_bos; i++) { > kvfree(state->bos[i].data); > + kvfree(state->bos[i].encoded); > + } > > kfree(state->bos); > kfree(state->comm); > @@ -472,34 +476,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state) > > #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) > > -static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len) > +static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) > { > - char out[ASCII85_BUFSZ]; > - long l, datalen, i; > + void *buf; > + size_t buf_itr = 0; > + long i, l; > > - if (!ptr || !len) > - return; > + if (!len) > + return NULL; > + > + l = ascii85_encode_len(len); > > /* > - * Only dump the non-zero part of the buffer - rarely will any data > - * completely fill the entire allocated size of the buffer > + * ascii85 outputs either a 5 byte string or a 1 byte string. So we > + * account for the worst case of 5 bytes per dword plus the 1 for '\0' > */ > - for (datalen = 0, i = 0; i < len >> 2; i++) { > - if (ptr[i]) > - datalen = (i << 2) + 1; > + buf = kvmalloc((l * 5) + 1, GFP_KERNEL); > + if (!buf) > + return NULL; > + > + for (i = 0; i < l; i++) { > + ascii85_encode(src[i], buf + buf_itr); > + > + if (src[i] == 0) > + buf_itr += 1; > + else > + buf_itr += 5; > } > > - /* Skip printing the object if it is empty */ > - if (datalen == 0) > + return buf; > +} > + > +static void adreno_show_object(struct drm_printer *p, > + struct msm_gpu_state_bo *bo) > +{ > + if ((!bo->data && !bo->encoded) || !bo->size) > return; > > - l = ascii85_encode_len(datalen); > + if (!bo->encoded) { > + long datalen, i; > + u32 *buf = bo->data; > + > + /* > + * Only dump the non-zero part of the buffer - rarely will > + * any data completely fill the entire allocated size of > + * the buffer. > + */ > + for (datalen = 0, i = 0; i < (bo->size) >> 2; i++) { > + if (buf[i]) > + datalen = ((i + 1) << 2); > + } > + > + bo->encoded = adreno_gpu_ascii85_encode(buf, datalen); > + > + kvfree(bo->data); > + bo->data = NULL; > + > + if (!bo->encoded) > + return; > + } > > drm_puts(p, " data: !!ascii85 |\n"); > drm_puts(p, " "); > > - for (i = 0; i < l; i++) > - drm_puts(p, ascii85_encode(ptr[i], out)); > + drm_puts(p, bo->encoded); > > drm_puts(p, "\n"); > } > @@ -531,8 +571,7 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > drm_printf(p, " wptr: %d\n", state->ring[i].wptr); > drm_printf(p, " size: %d\n", MSM_GPU_RINGBUFFER_SZ); > > - adreno_show_object(p, state->ring[i].bo.data, > - state->ring[i].bo.size); > + adreno_show_object(p, &(state->ring[i].bo)); > } > > if (state->bos) { > @@ -543,8 +582,7 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > state->bos[i].iova); > drm_printf(p, " size: %zd\n", state->bos[i].size); > > - adreno_show_object(p, state->bos[i].data, > - state->bos[i].size); > + adreno_show_object(p, &(state->bos[i])); > } > } > > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h > index a3a6371..737bf45 100644 > --- a/drivers/gpu/drm/msm/msm_gpu.h > +++ b/drivers/gpu/drm/msm/msm_gpu.h > @@ -191,6 +191,7 @@ struct msm_gpu_state_bo { > u64 iova; > size_t size; > void *data; > + char *encoded; > }; > > struct msm_gpu_state { > -- > 1.9.1 > -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project