On Mon, Jan 27, 2025 at 10:12:38AM +0530, Ekansh Gupta wrote: > For any remote call to DSP, after sending an invocation message, > fastRPC driver waits for glink response and during this time the > CPU can go into low power modes. Adding a polling mode support > with which fastRPC driver will poll continuously on a memory > after sending a message to remote subsystem which will eliminate > CPU wakeup and scheduling latencies and reduce fastRPC overhead. > With this change, DSP always sends a glink response which will > get ignored if polling mode didn't time out. Is there a chance to implement actual async I/O protocol with the help of the poll() call instead of hiding the polling / wait inside the invoke2? > > Signed-off-by: Ekansh Gupta <quic_ekangupt@xxxxxxxxxxx> > --- > drivers/misc/fastrpc.c | 122 +++++++++++++++++++++++++++++++++--- > include/uapi/misc/fastrpc.h | 3 +- > 2 files changed, 114 insertions(+), 11 deletions(-) > > diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c > index cfacee0dded5..257a741af115 100644 > --- a/drivers/misc/fastrpc.c > +++ b/drivers/misc/fastrpc.c > @@ -19,6 +19,7 @@ > #include <linux/rpmsg.h> > #include <linux/scatterlist.h> > #include <linux/slab.h> > +#include <linux/delay.h> > #include <linux/firmware/qcom/qcom_scm.h> > #include <uapi/misc/fastrpc.h> > #include <linux/of_reserved_mem.h> > @@ -38,6 +39,7 @@ > #define FASTRPC_CTX_MAX (256) > #define FASTRPC_INIT_HANDLE 1 > #define FASTRPC_DSP_UTILITIES_HANDLE 2 > +#define FASTRPC_MAX_STATIC_HANDLE (20) > #define FASTRPC_CTXID_MASK (0xFF0) > #define INIT_FILELEN_MAX (2 * 1024 * 1024) > #define INIT_FILE_NAMELEN_MAX (128) > @@ -106,6 +108,19 @@ > > #define miscdev_to_fdevice(d) container_of(d, struct fastrpc_device, miscdev) > > +/* Poll response number from remote processor for call completion */ > +#define FASTRPC_POLL_RESPONSE (0xdecaf) > +/* timeout in us for polling until memory barrier */ > +#define FASTRPC_POLL_TIME_MEM_UPDATE (500) > + > +/* Response types supported for RPC calls */ > +enum fastrpc_response_flags { > + /* normal job completion glink response */ > + NORMAL_RESPONSE = 0, > + /* process updates poll memory instead of glink response */ > + POLL_MODE = 1, > +}; > + > static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp", > "sdsp", "cdsp", "cdsp1" }; > struct fastrpc_phy_page { > @@ -238,9 +253,16 @@ struct fastrpc_invoke_ctx { > u32 sc; > u64 *fdlist; > u32 *crclist; > + u32 *poll; > void __user *crc; > u64 ctxid; > u64 msg_sz; > + /* Threads poll for specified timeout and fall back to glink wait */ > + u64 poll_timeout; > + /* work done status flag */ > + bool is_work_done; > + /* response flags from remote processor */ > + enum fastrpc_response_flags rsp_flags; > struct kref refcount; > struct list_head node; /* list of ctxs */ > struct completion work; > @@ -258,6 +280,7 @@ struct fastrpc_invoke_ctx { > struct fastrpc_ctx_args { > struct fastrpc_invoke_args *args; > void __user *crc; > + u64 poll_timeout; > }; > > struct fastrpc_session_ctx { > @@ -619,11 +642,14 @@ static struct fastrpc_invoke_ctx *fastrpc_context_alloc( > fastrpc_channel_ctx_get(cctx); > > ctx->crc = cargs->crc; > + ctx->poll_timeout = cargs->poll_timeout; > ctx->sc = sc; > ctx->retval = -1; > ctx->pid = current->pid; > ctx->client_id = user->client_id; > ctx->cctx = cctx; > + ctx->rsp_flags = NORMAL_RESPONSE; > + ctx->is_work_done = false; > init_completion(&ctx->work); > INIT_WORK(&ctx->put_work, fastrpc_context_put_wq); > > @@ -882,7 +908,8 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) > sizeof(struct fastrpc_invoke_buf) + > sizeof(struct fastrpc_phy_page)) * ctx->nscalars + > sizeof(u64) * FASTRPC_MAX_FDLIST + > - sizeof(u32) * FASTRPC_MAX_CRCLIST; > + sizeof(u32) * FASTRPC_MAX_CRCLIST + > + sizeof(u32); > > return size; > } > @@ -975,6 +1002,8 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) > pages = fastrpc_phy_page_start(list, ctx->nscalars); > ctx->fdlist = (u64 *)(pages + ctx->nscalars); > ctx->crclist = (u32 *)(ctx->fdlist + FASTRPC_MAX_FDLIST); > + ctx->poll = (u32 *)(ctx->crclist + FASTRPC_MAX_CRCLIST); > + > args = (uintptr_t)ctx->buf->virt + metalen; > rlen = pkt_size - metalen; > ctx->rpra = rpra; > @@ -1145,6 +1174,72 @@ static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx, > > } > > +static int poll_for_remote_response(struct fastrpc_invoke_ctx *ctx, u64 timeout) > +{ > + int err = -EIO, i, j; > + > + /* poll on memory for DSP response. Return failure on timeout */ > + for (i = 0, j = 0; i < timeout; i++, j++) { > + if (*ctx->poll == FASTRPC_POLL_RESPONSE) { > + err = 0; > + ctx->is_work_done = true; > + ctx->retval = 0; > + break; > + } > + if (j == FASTRPC_POLL_TIME_MEM_UPDATE) { > + /* make sure that all poll memory writes by DSP are seen by CPU */ > + dma_rmb(); > + j = 0; > + } > + udelay(1); > + } > + return err; > +} > + > +static inline int fastrpc_wait_for_response(struct fastrpc_invoke_ctx *ctx, > + u32 kernel) > +{ > + int err = 0; > + > + if (kernel) { > + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) > + err = -ETIMEDOUT; > + } else { > + err = wait_for_completion_interruptible(&ctx->work); > + } > + > + return err; > +} > + > +static int fastrpc_wait_for_completion(struct fastrpc_invoke_ctx *ctx, > + u32 kernel) > +{ > + int err; > + > + do { > + switch (ctx->rsp_flags) { > + case NORMAL_RESPONSE: > + err = fastrpc_wait_for_response(ctx, kernel); > + if (err || ctx->is_work_done) > + return err; > + break; > + case POLL_MODE: > + err = poll_for_remote_response(ctx, ctx->poll_timeout); > + /* If polling timed out, move to normal response mode */ > + if (err) > + ctx->rsp_flags = NORMAL_RESPONSE; > + break; > + default: > + err = -EBADR; > + dev_dbg(ctx->fl->sctx->dev, > + "unsupported response type:0x%x\n", ctx->rsp_flags); > + break; > + } > + } while (!ctx->is_work_done); > + > + return err; > +} > + > static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > u32 handle, u32 sc, > struct fastrpc_ctx_args *cargs) > @@ -1180,16 +1275,20 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, > if (err) > goto bail; > > - if (kernel) { > - if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) > - err = -ETIMEDOUT; > - } else { > - err = wait_for_completion_interruptible(&ctx->work); > - } > + if (ctx->poll_timeout != 0 && handle > FASTRPC_MAX_STATIC_HANDLE > + && fl->pd == USER_PD) > + ctx->rsp_flags = POLL_MODE; > > + err = fastrpc_wait_for_completion(ctx, kernel); > if (err) > goto bail; > > + if (!ctx->is_work_done) { > + err = -ETIMEDOUT; > + dev_dbg(fl->sctx->dev, "Invalid workdone state for handle 0x%x, sc 0x%x\n", > + handle, sc); > + goto bail; > + } > /* make sure that all memory writes by DSP are seen by CPU */ > dma_rmb(); > /* populate all the output buffers with results */ > @@ -1769,7 +1868,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, char __user *argp) > return -EFAULT; > > /* Check if all reserved fields are zero */ > - for (i = 0; i < 16; i++) { > + for (i = 0; i < 14; i++) { > if (inv2.reserved[i] != 0) > return -EINVAL; > } > @@ -1779,6 +1878,7 @@ static int fastrpc_invokev2(struct fastrpc_user *fl, char __user *argp) > return -ENOMEM; > > cargs->crc = (void __user *)(uintptr_t)inv2.crc; > + cargs->poll_timeout = inv2.poll_timeout; > > err = fastrpc_remote_invoke(fl, &inv2.inv, cargs); > kfree(cargs); > @@ -2581,12 +2681,14 @@ static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data, > ctx = idr_find(&cctx->ctx_idr, ctxid); > spin_unlock_irqrestore(&cctx->lock, flags); > > + /* Ignore this failure as context returned will be NULL for polling mode */ > if (!ctx) { > - dev_err(&rpdev->dev, "No context ID matches response\n"); > - return -ENOENT; > + dev_dbg(&rpdev->dev, "No context ID matches response\n"); > + return 0; > } > > ctx->retval = rsp->retval; > + ctx->is_work_done = true; > complete(&ctx->work); > > /* > diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h > index 406b80555d41..1920c537bbbf 100644 > --- a/include/uapi/misc/fastrpc.h > +++ b/include/uapi/misc/fastrpc.h > @@ -84,7 +84,8 @@ struct fastrpc_invoke { > struct fastrpc_invoke_v2 { > struct fastrpc_invoke inv; > __u64 crc; > - __u32 reserved[16]; > + __u64 poll_timeout; > + __u32 reserved[14]; > }; > > struct fastrpc_init_create { > -- > 2.34.1 > -- With best wishes Dmitry