Convert the 9p filesystem to use iov_iter_extract_pages() instead of iov_iter_get_pages(). This will pin pages or leave them unaltered rather than getting a ref on them as appropriate to the iterator. The pages need to be pinned for DIO-read rather than having refs taken on them to prevent VM copy-on-write from malfunctioning during a concurrent fork() (the result of the I/O would otherwise end up only visible to the child process and not the parent). Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Dominique Martinet <asmadeus@xxxxxxxxxxxxx> cc: Eric Van Hensbergen <ericvh@xxxxxxxxx> cc: Latchesar Ionkov <lucho@xxxxxxxxxx> cc: Christian Schoenebeck <linux_oss@xxxxxxxxxxxxx> cc: v9fs-developer@xxxxxxxxxxxxxxxxxxxxx --- net/9p/trans_common.c | 6 ++- net/9p/trans_common.h | 3 +- net/9p/trans_virtio.c | 89 ++++++++++++++----------------------------------- 3 files changed, 31 insertions(+), 67 deletions(-) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index c827f694551c..31d133412677 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -12,13 +12,15 @@ * p9_release_pages - Release pages after the transaction. * @pages: array of pages to be put * @nr_pages: size of array + * @cleanup_mode: How to clean up the pages. */ -void p9_release_pages(struct page **pages, int nr_pages) +void p9_release_pages(struct page **pages, int nr_pages, + unsigned int cleanup_mode) { int i; for (i = 0; i < nr_pages; i++) if (pages[i]) - put_page(pages[i]); + page_put_unpin(pages[i], cleanup_mode); } EXPORT_SYMBOL(p9_release_pages); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 32134db6abf3..9b20eb4f2359 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -4,4 +4,5 @@ * Author Venkateswararao Jujjuri <jvrao@xxxxxxxxxxxxxxxxxx> */ -void p9_release_pages(struct page **pages, int nr_pages); +void p9_release_pages(struct page **pages, int nr_pages, + unsigned int cleanup_mode); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index eb28b54fe5f6..561f7cbd79da 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -310,73 +310,34 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, struct iov_iter *data, int count, size_t *offs, - int *need_drop, + int *cleanup_mode, unsigned int gup_flags) { int nr_pages; int err; + int n; if (!iov_iter_count(data)) return 0; - if (!iov_iter_is_kvec(data)) { - int n; - /* - * We allow only p9_max_pages pinned. We wait for the - * Other zc request to finish here - */ - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_killable(vp_wq, - (atomic_read(&vp_pinned) < chan->p9_max_pages)); - if (err == -ERESTARTSYS) - return err; - } - n = iov_iter_get_pages_alloc(data, pages, count, offs, - gup_flags); - if (n < 0) - return n; - *need_drop = 1; - nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE); - atomic_add(nr_pages, &vp_pinned); - return n; - } else { - /* kernel buffer, no need to pin pages */ - int index; - size_t len; - void *p; - - /* we'd already checked that it's non-empty */ - while (1) { - len = iov_iter_single_seg_count(data); - if (likely(len)) { - p = data->kvec->iov_base + data->iov_offset; - break; - } - iov_iter_advance(data, 0); - } - if (len > count) - len = count; - - nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) - - (unsigned long)p / PAGE_SIZE; - - *pages = kmalloc_array(nr_pages, sizeof(struct page *), - GFP_NOFS); - if (!*pages) - return -ENOMEM; - - *need_drop = 0; - p -= (*offs = offset_in_page(p)); - for (index = 0; index < nr_pages; index++) { - if (is_vmalloc_addr(p)) - (*pages)[index] = vmalloc_to_page(p); - else - (*pages)[index] = kmap_to_page(p); - p += PAGE_SIZE; - } - iov_iter_advance(data, len); - return len; + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_killable(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; } + + n = iov_iter_extract_pages(data, pages, count, offs, gup_flags); + if (n < 0) + return n; + *cleanup_mode = iov_iter_extract_mode(data, gup_flags); + nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE); + atomic_add(nr_pages, &vp_pinned); + return n; } static void handle_rerror(struct p9_req_t *req, int in_hdr_len, @@ -431,7 +392,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, struct virtio_chan *chan = client->trans; struct scatterlist *sgs[4]; size_t offs; - int need_drop = 0; + int cleanup_mode = 0; int kicked = 0; p9_debug(P9_DEBUG_TRANS, "virtio request\n"); @@ -439,7 +400,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, if (uodata) { __le32 sz; int n = p9_get_mapped_pages(chan, &out_pages, uodata, - outlen, &offs, &need_drop, + outlen, &offs, &cleanup_mode, FOLL_DEST_BUF); if (n < 0) { err = n; @@ -459,7 +420,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, memcpy(&req->tc.sdata[0], &sz, sizeof(sz)); } else if (uidata) { int n = p9_get_mapped_pages(chan, &in_pages, uidata, - inlen, &offs, &need_drop, + inlen, &offs, &cleanup_mode, FOLL_SOURCE_BUF); if (n < 0) { err = n; @@ -546,14 +507,14 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, * Non kernel buffers are pinned, unpin them */ err_out: - if (need_drop) { + if (cleanup_mode) { if (in_pages) { p9_release_pages(in_pages, in_nr_pages); - atomic_sub(in_nr_pages, &vp_pinned); + atomic_sub(in_nr_pages, &vp_pinned, cleanup_mode); } if (out_pages) { p9_release_pages(out_pages, out_nr_pages); - atomic_sub(out_nr_pages, &vp_pinned); + atomic_sub(out_nr_pages, &vp_pinned, cleanup_mode); } /* wakeup anybody waiting for slots to pin pages */ wake_up(&vp_wq);