On Tue, Nov 06, 2018 at 11:40:06AM +0530, Sharat Masetty wrote: > When the userspace tries to read the crashstate dump, the read side > implementation in the driver currently ascii85 encodes all the binary > buffers and it does this each time the read system call is called. > A userspace tool like cat typically does a page by page read and the > number of read calls depends on the size of the data captured by the > driver. This is certainly not desirable and does not scale well with > large captures. > > This patch encodes the buffer only once in the read path. With this there > is an immediate >10X speed improvement in crashstate save time. > > Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> > --- > drivers/gpu/drm/msm/adreno/adreno_gpu.c | 76 ++++++++++++++++++++++++--------- > drivers/gpu/drm/msm/msm_gpu.h | 2 + > 2 files changed, 58 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > index c93702d..e29093e 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > @@ -475,34 +475,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state) > > #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) > > -static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len) > +static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) > { > - char out[ASCII85_BUFSZ]; > - long l, datalen, i; > + void *buf; > + size_t buf_itr = 0; > + long i, l; > > - if (!ptr || !len) > - return; > + if (!len) > + return NULL; > + > + l = ascii85_encode_len(len); > > /* > - * Only dump the non-zero part of the buffer - rarely will any data > - * completely fill the entire allocated size of the buffer > + * ascii85 outputs either a 5 byte string or a 1 byte string. So we > + * account for the worst case of 5 bytes per dword plus the 1 for '\0' > */ > - for (datalen = 0, i = 0; i < len >> 2; i++) { > - if (ptr[i]) > - datalen = (i << 2) + 1; > - } > + buf = kvmalloc((l * 5) + 1, GFP_KERNEL); > + if (!buf) > + return NULL; > > - /* Skip printing the object if it is empty */ > - if (datalen == 0) > + for (i = 0; i < l; i++) > + buf_itr += ascii85_encode_to_buf(src[i], buf + buf_itr); If this is the only use of ascii85_encode_to_buf I don't think we need it in the common header - the same functionality can just be built in here. > + > + return buf; > +} > + > +/* len is expected to be in bytes */ > +static void adreno_show_object(struct drm_printer *p, void **ptr, int len, > + bool *encoded) > +{ > + if (!*ptr || !len) > return; > > - l = ascii85_encode_len(datalen); > + if (!*encoded) { We wouldn't need the encoded bool if you used different pointers for the non-encoded and encoded objects - if (!encoded) { encoded = adreno_gpu_ascii85_encode(raw); kvfree(raw); raw = NULL; } And then just blindly kvfree() both in the destroy function - I think that would be a bit clearer than trying to reuse the buffer. > + long datalen, i; > + u32 *buf = *ptr; > + > + /* > + * Only dump the non-zero part of the buffer - rarely will > + * any data completely fill the entire allocated size of > + * the buffer. > + */ > + for (datalen = 0, i = 0; i < len >> 2; i++) { > + if (buf[i]) > + datalen = ((i + 1) << 2); > + } > + > + /* > + * If we reach here, then the originally captured binary buffer > + * will be replaced with the ascii85 encoded string > + */ > + *ptr = adreno_gpu_ascii85_encode(buf, datalen); > + > + kvfree(buf); > + > + *encoded = true; > + } > + > + if (!*ptr) > + return; > > drm_puts(p, " data: !!ascii85 |\n"); > drm_puts(p, " "); > > - for (i = 0; i < l; i++) > - drm_puts(p, ascii85_encode(ptr[i], out)); > + drm_puts(p, *ptr); > > drm_puts(p, "\n"); > } > @@ -534,8 +570,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > drm_printf(p, " wptr: %d\n", state->ring[i].wptr); > drm_printf(p, " size: %d\n", MSM_GPU_RINGBUFFER_SZ); > > - adreno_show_object(p, state->ring[i].data, > - state->ring[i].data_size); > + adreno_show_object(p, &state->ring[i].data, > + state->ring[i].data_size, &state->ring[i].encoded); This implies we should make a sub struct to store the data / size / state of the buffers so this can turn into: adreno_show_object(&(state->ring[i].object)) We could repurpose msm_gpu_state_bo for this task - this would dovetail nicely into the suggestion above to use individual pointers for the encoded and non-encoded buffers. > } > > if (state->bos) { > @@ -546,8 +582,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, > state->bos[i].iova); > drm_printf(p, " size: %zd\n", state->bos[i].size); > > - adreno_show_object(p, state->bos[i].data, > - state->bos[i].size); > + adreno_show_object(p, &state->bos[i].data, > + state->bos[i].size, &state->bos[i].encoded); And then this would turn into just adreno_show_object(p, &state->bos[i]); > } > } > > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h > index f82bac0..efb49bb 100644 > --- a/drivers/gpu/drm/msm/msm_gpu.h > +++ b/drivers/gpu/drm/msm/msm_gpu.h > @@ -187,6 +187,7 @@ struct msm_gpu_state_bo { > u64 iova; > size_t size; > void *data; > + bool encoded; > }; > > struct msm_gpu_state { > @@ -201,6 +202,7 @@ struct msm_gpu_state { > u32 wptr; > void *data; > int data_size; > + bool encoded; > } ring[MSM_GPU_MAX_RINGS]; > > int nr_registers; > -- > 1.9.1 > -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project