Add support to capture kernel performance counters for different kernel level operations. These counters collects the information for remote call and copies the information to a buffer shared by user. Collection of DSP performance counters is also added as part of this change. DSP updates the performance information in the metadata which is then copied to a buffer passed by the users. Signed-off-by: Ekansh Gupta <quic_ekangupt@xxxxxxxxxxx> --- drivers/misc/fastrpc.c | 143 ++++++++++++++++++++++++++++++++++-- include/uapi/misc/fastrpc.h | 4 +- 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 8e817a763d1d..54a562fc94fb 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -19,6 +19,7 @@ #include <linux/rpmsg.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/delay.h> #include <linux/firmware/qcom/qcom_scm.h> #include <uapi/misc/fastrpc.h> #include <linux/of_reserved_mem.h> @@ -38,6 +39,7 @@ #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 #define FASTRPC_DSP_UTILITIES_HANDLE 2 +#define FASTRPC_MAX_STATIC_HANDLE (20) #define FASTRPC_CTXID_MASK (0xFF0) #define INIT_FILELEN_MAX (2 * 1024 * 1024) #define INIT_FILE_NAMELEN_MAX (128) @@ -106,6 +108,9 @@ #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) +#define FASTRPC_KERNEL_PERF_LIST (PERF_KEY_MAX) +#define FASTRPC_DSP_PERF_LIST 12 + static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp", "sdsp", "cdsp", "cdsp1" }; struct fastrpc_phy_page { @@ -229,6 +234,18 @@ struct fastrpc_map { struct kref refcount; }; +struct fastrpc_perf { + u64 count; + u64 flush; + u64 map; + u64 copy; + u64 link; + u64 getargs; + u64 putargs; + u64 invargs; + u64 invoke; +}; + struct fastrpc_invoke_ctx { int nscalars; int nbufs; @@ -237,6 +254,8 @@ struct fastrpc_invoke_ctx { int tgid; u32 sc; u32 *crc; + u64 *perf_kernel; + u64 *perf_dsp; u64 ctxid; u64 msg_sz; struct kref refcount; @@ -251,6 +270,7 @@ struct fastrpc_invoke_ctx { struct fastrpc_invoke_args *args; struct fastrpc_buf_overlap *olaps; struct fastrpc_channel_ctx *cctx; + struct fastrpc_perf *perf; }; struct fastrpc_session_ctx { @@ -308,6 +328,51 @@ struct fastrpc_user { struct mutex mutex; }; +enum fastrpc_perfkeys { + PERF_COUNT = 0, + PERF_FLUSH = 1, + PERF_MAP = 2, + PERF_COPY = 3, + PERF_LINK = 4, + PERF_GETARGS = 5, + PERF_PUTARGS = 6, + PERF_INVARGS = 7, + PERF_INVOKE = 8, + PERF_KEY_MAX = 9, +}; + +#define PERF_END ((void)0) + +#define PERF(enb, cnt, ff) \ + {\ + struct timespec64 startT = {0};\ + u64 *counter = cnt;\ + if (enb && counter) {\ + ktime_get_boottime_ts64(&startT);\ + } \ + ff ;\ + if (enb && counter) {\ + *counter += getnstimediff(&startT);\ + } \ + } + +#define GET_COUNTER(perf_ptr, offset) \ + (perf_ptr != NULL ?\ + (((offset >= 0) && (offset < PERF_KEY_MAX)) ?\ + (u64 *)(perf_ptr + offset)\ + : (u64 *)NULL) : (u64 *)NULL) + +static inline s64 getnstimediff(struct timespec64 *start) +{ + s64 ns; + struct timespec64 ts, b; + + ktime_get_boottime_ts64(&ts); + b = timespec64_sub(ts, *start); + ns = timespec64_to_ns(&b); + return ns; +} + static void fastrpc_free_map(struct kref *ref) { struct fastrpc_map *map; @@ -497,6 +562,7 @@ static void fastrpc_context_free(struct kref *ref) idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4); spin_unlock_irqrestore(&cctx->lock, flags); + kfree(ctx->perf); kfree(ctx->maps); kfree(ctx->olaps); kfree(ctx); @@ -614,6 +680,13 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc( fastrpc_channel_ctx_get(cctx); ctx->crc = (u32 *)(uintptr_t)inv2->crc; + ctx->perf_dsp = (u64 *)(uintptr_t)inv2->perf_dsp; + ctx->perf_kernel = (u64 *)(uintptr_t)inv2->perf_kernel; + if (ctx->perf_kernel) { + ctx->perf = kzalloc(sizeof(*(ctx->perf)), GFP_KERNEL); + if (!ctx->perf) + return ERR_PTR(-ENOMEM); + } ctx->sc = sc; ctx->retval = -1; ctx->pid = current->pid; @@ -877,7 +950,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) sizeof(struct fastrpc_invoke_buf) + sizeof(struct fastrpc_phy_page)) * ctx->nscalars + sizeof(u64) * FASTRPC_MAX_FDLIST + - sizeof(u32) * FASTRPC_MAX_CRCLIST; + sizeof(u32) * FASTRPC_MAX_CRCLIST + + sizeof(u32) + sizeof(u64) * FASTRPC_DSP_PERF_LIST; return size; } @@ -951,7 +1025,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) metalen = fastrpc_get_meta_size(ctx); pkt_size = fastrpc_get_payload_size(ctx, metalen); + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_MAP), err = fastrpc_create_maps(ctx); + PERF_END); if (err) return err; @@ -987,6 +1063,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) continue; if (ctx->maps[i]) { + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_MAP), struct vm_area_struct *vma = NULL; rpra[i].buf.pv = (u64) ctx->args[i].ptr; @@ -1003,9 +1080,9 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >> PAGE_SHIFT; pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; - + PERF_END); } else { - + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_COPY), if (ctx->olaps[oix].offset == 0) { rlen -= ALIGN(args, FASTRPC_ALIGN) - args; args = ALIGN(args, FASTRPC_ALIGN); @@ -1027,12 +1104,14 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) pages[i].size = (pg_end - pg_start + 1) * PAGE_SIZE; args = args + mlen; rlen -= mlen; + PERF_END); } if (i < inbufs && !ctx->maps[i]) { void *dst = (void *)(uintptr_t)rpra[i].buf.pv; void *src = (void *)(uintptr_t)ctx->args[i].ptr; + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_COPY), if (!kernel) { if (copy_from_user(dst, (void __user *)src, len)) { @@ -1042,6 +1121,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) } else { memcpy(dst, src, len); } + PERF_END); } } @@ -1072,9 +1152,9 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, struct fastrpc_map *mmap = NULL; struct fastrpc_invoke_buf *list; struct fastrpc_phy_page *pages; - u64 *fdlist; - u32 *crclist; - int i, inbufs, outbufs, handles; + u64 *fdlist, *perf_dsp_list; + u32 *crclist, *poll; + int i, inbufs, outbufs, handles, perferr; inbufs = REMOTE_SCALARS_INBUFS(ctx->sc); outbufs = REMOTE_SCALARS_OUTBUFS(ctx->sc); @@ -1083,6 +1163,8 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, pages = fastrpc_phy_page_start(list, ctx->nscalars); fdlist = (uint64_t *)(pages + inbufs + outbufs + handles); crclist = (u32 *)(fdlist + FASTRPC_MAX_FDLIST); + poll = (u32 *)(crclist + FASTRPC_MAX_CRCLIST); + perf_dsp_list = (u64 *)(poll + 1); for (i = inbufs; i < ctx->nbufs; ++i) { if (!ctx->maps[i]) { @@ -1113,6 +1195,14 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, return -EFAULT; } + if (ctx->perf_dsp && perf_dsp_list) { + perferr = copy_to_user((void __user *)ctx->perf_dsp, + perf_dsp_list, + FASTRPC_DSP_PERF_LIST * sizeof(u64)); + if (perferr) + dev_dbg(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); + } + return 0; } @@ -1148,13 +1238,35 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx, } +static void fastrpc_update_invoke_count(u32 handle, u64 *perf_counter, + struct timespec64 *invoket) +{ + /* update invoke count for dynamic handles */ + u64 *invcount, *count; + + if (handle <= FASTRPC_MAX_STATIC_HANDLE) + return; + + invcount = GET_COUNTER(perf_counter, PERF_INVOKE); + if (invcount) + *invcount += getnstimediff(invoket); + + count = GET_COUNTER(perf_counter, PERF_COUNT); + if (count) + *count++; +} + static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct fastrpc_invoke_v2 *inv2) { struct fastrpc_invoke_ctx *ctx = NULL; struct fastrpc_buf *buf, *b; struct fastrpc_invoke inv; u32 handle, sc; - int err = 0; + int err = 0, perferr = 0; + struct timespec64 invoket = {0}; + + if (inv2->perf_kernel) + ktime_get_boottime_ts64(&invoket); if (!fl->sctx) return -EINVAL; @@ -1174,14 +1286,18 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct if (IS_ERR(ctx)) return PTR_ERR(ctx); + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_GETARGS), err = fastrpc_get_args(kernel, ctx); + PERF_END); if (err) goto bail; /* make sure that all CPU memory writes are seen by DSP */ dma_wmb(); + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_LINK), /* Send invoke buffer to remote dsp */ err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle); + PERF_END); if (err) goto bail; @@ -1197,8 +1313,10 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct /* make sure that all memory writes by DSP are seen by CPU */ dma_rmb(); + PERF(ctx->perf_kernel, GET_COUNTER((u64 *)ctx->perf, PERF_PUTARGS), /* populate all the output buffers with results */ err = fastrpc_put_args(ctx, kernel); + PERF_END); if (err) goto bail; @@ -1209,6 +1327,17 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, struct bail: if (err != -ERESTARTSYS && err != -ETIMEDOUT) { + if (ctx) { + if (ctx->perf_kernel && !err) + fastrpc_update_invoke_count(handle, (u64 *)ctx->perf, &invoket); + if (ctx->perf_kernel && ctx->perf && ctx->perf_kernel) { + perferr = copy_to_user((void __user *)ctx->perf_kernel, + ctx->perf, + FASTRPC_KERNEL_PERF_LIST * sizeof(u64)); + if (perferr) + dev_dbg(fl->sctx->dev, "Warning: failed to copy perf data %d\n", perferr); + } + } /* We are done with this compute context */ spin_lock(&fl->lock); list_del(&ctx->node); diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 406b80555d41..1edc7c04b171 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -84,7 +84,9 @@ struct fastrpc_invoke { struct fastrpc_invoke_v2 { struct fastrpc_invoke inv; __u64 crc; - __u32 reserved[16]; + __u64 perf_kernel; + __u64 perf_dsp; + __u32 reserved[12]; }; struct fastrpc_init_create { -- 2.34.1