On Tue, Nov 08, 2022 at 10:29:33AM -0500, Eric Farman wrote: > On Tue, 2022-11-08 at 10:37 -0400, Jason Gunthorpe wrote: > > On Tue, Nov 08, 2022 at 09:19:17AM -0500, Eric Farman wrote: > > > On Tue, 2022-11-08 at 09:54 -0400, Jason Gunthorpe wrote: > > > > On Tue, Nov 08, 2022 at 08:50:53AM -0500, Matthew Rosato wrote: > > > > > > > > > FWIW, vfio-pci via s390 is working fine so far, though I'll put > > > > > it > > > > > through more paces over the next few weeks and report if I find > > > > > anything. > > > > > > > > OK great > > > > > > > > > As far as mdev drivers... > > > > > > > > > > -ccw: Sounds like Eric is already aware there is an issue and > > > > > is > > > > > investigating (I see errors as well). > > > > > > I -think- the problem for -ccw is that the new vfio_pin_pages > > > requires > > > the input addresses to be page-aligned, and while most of ours are, > > > the > > > first one in any given transaction may not be. We never bothered to > > > mask off the addresses since it was handled for us, and we needed > > > to > > > keep the offsets anyway. > > > > > > By happenstance, I had some code that would do the masking > > > ourselves > > > (for an unrelated reason); I'll see if I can get that fit on top > > > and if > > > it helps matters. After coffee. > > > > Oh, yes, that makes alot of sense. > > > > Ah, if that is how VFIO worked we could match it like below: > > That's a start. The pin appears to have worked, but the unpin fails at > the bottom of iommufd_access_unpin_pages: > > WARN_ON(!iopt_area_contig_done(&iter)); This seems like a different bug, probably a ccw driver bug. The WARN_ON is designed to detect cases where the driver is unpinning an IOVA range that is not exactly what it pinned. The pin side already does this validation, so if it fails it means pin/unpin did not have identical iova ranges. Some debugging prints should confirm this. I looked at CCW and came up with the following two things, can you look at them and finish them off? It will probably help. Thanks, Jason
>From b6884847ece19733065fa246c3bbea63cec474c3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe <jgg@xxxxxxxxxx> Date: Tue, 8 Nov 2022 15:21:04 -0400 Subject: [PATCH 1/2] vfio/ccw: Convert to use vfio_dma_rw() Do not open code a slow version of vfio_dma_rw() as copy_from_iova(). The core code provides this function now, just call it directly. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/s390/cio/vfio_ccw_cp.c | 57 ++++------------------------------ 1 file changed, 6 insertions(+), 51 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 7b02e97f4b2914..f5f6eff005b99f 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -228,51 +228,6 @@ static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) } } -/* - * Within the domain (@mdev), copy @n bytes from a guest physical - * address (@iova) to a host physical address (@to). - */ -static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, - unsigned long n) -{ - struct page_array pa = {0}; - int i, ret; - unsigned long l, m; - - ret = page_array_alloc(&pa, iova, n); - if (ret < 0) - return ret; - - ret = page_array_pin(&pa, vdev); - if (ret < 0) { - page_array_unpin_free(&pa, vdev); - return ret; - } - - l = n; - for (i = 0; i < pa.pa_nr; i++) { - void *from = kmap_local_page(pa.pa_page[i]); - - m = PAGE_SIZE; - if (i == 0) { - from += iova & (PAGE_SIZE - 1); - m -= iova & (PAGE_SIZE - 1); - } - - m = min(l, m); - memcpy(to + (n - l), from, m); - kunmap_local(from); - - l -= m; - if (l == 0) - break; - } - - page_array_unpin_free(&pa, vdev); - - return l; -} - /* * Helpers to operate ccwchain. */ @@ -471,10 +426,10 @@ static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp) int len, ret; /* Copy 2K (the most we support today) of possible CCWs */ - len = copy_from_iova(vdev, cp->guest_cp, cda, - CCWCHAIN_LEN_MAX * sizeof(struct ccw1)); - if (len) - return len; + ret = vfio_dma_rw(vdev, cda, cp->guest_cp, + CWCHAIN_LEN_MAX * sizeof(struct ccw1)); + if (ret) + return ret; /* Convert any Format-0 CCWs to Format-1 */ if (!cp->orb.cmd.fmt) @@ -572,7 +527,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, if (ccw_is_idal(ccw)) { /* Read first IDAW to see if it's 4K-aligned or not. */ /* All subsequent IDAws will be 4K-aligned. */ - ret = copy_from_iova(vdev, &iova, ccw->cda, sizeof(iova)); + ret = vfio_dma_rw(vdev, ccw->cda, &iova, sizeof(iova)); if (ret) return ret; } else { @@ -601,7 +556,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, if (ccw_is_idal(ccw)) { /* Copy guest IDAL into host IDAL */ - ret = copy_from_iova(vdev, idaws, ccw->cda, idal_len); + ret = vfio_dma_rw(vdev, ccw->cda, idaws, idal_len); if (ret) goto out_unpin; -- 2.38.1
>From 7cd2cccf37db91d18da9d041826f0460a56fc95c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe <jgg@xxxxxxxxxx> Date: Tue, 8 Nov 2022 15:31:08 -0400 Subject: [PATCH 2/2] vfio/ccw: Fix error unwinding around page_array_unpin_free() We should only call page_array_unpin() if page_array_pin() has succeeded. If page_array_pin() fails then it undoes all its changes internally. Split free and unpin into two functions and only call unpin in the one case where everything has succeeded. Add missing pa_nr = idaw_nr assignment Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/s390/cio/vfio_ccw_cp.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index f5f6eff005b99f..4eab1b2fb32dd2 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -164,9 +164,8 @@ static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) } /* Unpin the pages before releasing the memory. */ -static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev) +static void page_array_free(struct page_array *pa) { - page_array_unpin(pa, vdev, pa->pa_nr); kfree(pa->pa_iova); } @@ -558,7 +557,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, /* Copy guest IDAL into host IDAL */ ret = vfio_dma_rw(vdev, ccw->cda, idaws, idal_len); if (ret) - goto out_unpin; + goto out_free_pa; /* * Copy guest IDAWs into page_array, in case the memory they @@ -566,6 +565,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, */ for (i = 0; i < idaw_nr; i++) pa->pa_iova[i] = idaws[i]; + pa->pa_nr = idaw_nr; } else { /* * No action is required here; the iova addresses in page_array @@ -577,7 +577,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, if (ccw_does_data_transfer(ccw)) { ret = page_array_pin(pa, vdev); if (ret < 0) - goto out_unpin; + goto out_free_pa; } else { pa->pa_nr = 0; } @@ -590,8 +590,8 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, return 0; -out_unpin: - page_array_unpin_free(pa, vdev); +out_free_pa: + page_array_free(pa); out_free_idaws: kfree(idaws); out_init: @@ -697,7 +697,8 @@ void cp_free(struct channel_program *cp) cp->initialized = false; list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) { - page_array_unpin_free(chain->ch_pa + i, vdev); + page_array_unpin(pa, vdev, pa->pa_nr); + page_array_free(chain->ch_pa + i); ccwchain_cda_free(chain, i); } ccwchain_free(chain); -- 2.38.1