gadgetfs was the sole mainline user of the retry mechanism offered by fs/aio.c. It would set the iocb's ki_retry to a tiny function that is executed once an async read completed. The function would simply copy data that was dmaed into a kernel buffer out to the userspace buffer for the read. This use was not worth the complexity and volume of code needed to support retrying iocbs in the aio core. Many other file systems already do this kind of post processing work in their own threads today. This work that gadgetfs does pales in comparison to the work that btrfs and xfs do. This patch moves this trivial copying into a work struct that is processed by keventd. It uses the {un,}use_mm() calls which were recently promoted out of aio into a proper API. This paves the way for removing the dangerous and slow retry functionality from the aio core. Signed-off-by: Zach Brown <zach.brown@xxxxxxxxxx> --- drivers/usb/gadget/inode.c | 41 +++++++++++++++++++++++++++++++++-------- 1 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index bf0f652..694efc8 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <linux/poll.h> #include <linux/smp_lock.h> +#include <linux/mmu_context.h> #include <linux/device.h> #include <linux/moduleparam.h> @@ -522,6 +523,7 @@ struct kiocb_priv { const struct iovec *iv; unsigned long nr_segs; unsigned actual; + struct work_struct work; }; static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) @@ -545,14 +547,31 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) return value; } -static ssize_t ep_aio_read_retry(struct kiocb *iocb) +/* + * This runs in keventd. It assumes the mm context of the aio context that + * submitted the read and copies the data from the allocated kernel result + * buffer back to the userspace buffer. + * + * This used to be done by setting ki_retry to this function once the aio read + * was submitted. The caller would assume the aio context's mm and call this + * function to perform the copy. As ki_retry this function could only return + * len which would be then be given to aio_complete(). To maintain this + * behaviour we call aio_complete() with a 0 third argument. This could now + * return req->status to userspace as the third argument but I opted to + * maintain the previously existing behaviour. + */ +static void ep_aio_read_copy(struct work_struct *work) { - struct kiocb_priv *priv = iocb->private; + struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work); + struct kiocb *iocb = priv->req->context; + mm_segment_t oldfs = get_fs(); ssize_t len, total; void *to_copy; int i; - /* we "retry" to get the right mm context for this: */ + /* get the right mm context for this */ + set_fs(USER_DS); + use_mm(iocb->ki_ctx->mm); /* copy stuff into user buffers */ total = priv->actual; @@ -573,9 +592,12 @@ static ssize_t ep_aio_read_retry(struct kiocb *iocb) if (total == 0) break; } + + unuse_mm(iocb->ki_ctx->mm); + set_fs(oldfs); kfree(priv->buf); kfree(priv); - return len; + aio_complete(iocb, len, 0); } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) @@ -601,14 +623,18 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) aio_complete(iocb, req->actual ? req->actual : req->status, req->status); } else { - /* retry() won't report both; so we hide some faults */ + /* + * ep_aio_read_copy() doesn't report both; so we hide some + * faults. + */ if (unlikely(0 != req->status)) DBG(epdata->dev, "%s fault %d len %d\n", ep->name, req->status, req->actual); priv->buf = req->buf; priv->actual = req->actual; - kick_iocb(iocb); + INIT_WORK(&priv->work, ep_aio_read_copy); + schedule_work(&priv->work); } spin_unlock(&epdata->dev->lock); @@ -679,7 +705,7 @@ fail: kfree(priv); put_ep(epdata); } else - value = (iv ? -EIOCBRETRY : -EIOCBQUEUED); + value = -EIOCBQUEUED; return value; } @@ -697,7 +723,6 @@ ep_aio_read(struct kiocb *iocb, const struct iovec *iov, if (unlikely(!buf)) return -ENOMEM; - iocb->ki_retry = ep_aio_read_retry; return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); } -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html