On 23/11/16 12:35, David Gibson wrote: > On Tue, Nov 22, 2016 at 06:29:39PM +1100, Alexey Kardashevskiy wrote: >> On 22/11/16 13:50, David Gibson wrote: >>> On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote: >>>> As mentioned in the previous patch, we are going to allow the userspace >>>> to configure container in one memory context and pass container fd to >>>> another so we are postponing memory allocations accounted against >>>> the locked memory limit. The previous patch took care of it_userspace. >>>> >>>> At the moment we create the default DMA window when the first group is >>>> attached to a container; this is done for the userspace which is not >>>> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory >>>> pre-registration - such client expects the default DMA window to exist. >>>> >>>> This postpones the default DMA window allocation till first map/unmap >>>> request happens. >>>> >>>> Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxxxx> >>>> --- >>>> drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++------------------- >>>> 1 file changed, 47 insertions(+), 51 deletions(-) >>>> >>>> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c >>>> index 442baac..1c02498 100644 >>>> --- a/drivers/vfio/vfio_iommu_spapr_tce.c >>>> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c >>>> @@ -97,6 +97,7 @@ struct tce_container { >>>> struct mutex lock; >>>> bool enabled; >>>> bool v2; >>>> + bool def_window_pending; >>>> unsigned long locked_pages; >>>> struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; >>>> struct list_head group_list; >>>> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container, >>>> WARN_ON(!ret && !(*ptbl)->it_ops->free); >>>> WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); >>>> >>>> - if (!ret && container->v2) { >>>> - ret = tce_iommu_userspace_view_alloc(*ptbl); >>>> - if (ret) >>>> - (*ptbl)->it_ops->free(*ptbl); >>>> - } >>> >>> Does this stuff for the user view belong in the previous patch? >> >> Yes it does, my mistake, will fix. >> >> >>> >>>> - >>>> - if (ret) >>>> - decrement_locked_vm(table_size >> PAGE_SHIFT); >>>> - >>>> return ret; >>>> } >>>> >>>> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container, >>>> return 0; >>>> } >>>> >>>> +static long tce_iommu_create_default_window(struct tce_container *container) >>>> +{ >>>> + long ret; >>>> + __u64 start_addr = 0; >>>> + struct tce_iommu_group *tcegrp; >>>> + struct iommu_table_group *table_group; >>>> + >>>> + if (!tce_groups_attached(container)) >>>> + return -ENODEV; >>>> + >>>> + tcegrp = list_first_entry(&container->group_list, >>>> + struct tce_iommu_group, next); >>>> + table_group = iommu_group_get_iommudata(tcegrp->grp); >>>> + if (!table_group) >>>> + return -ENODEV; >>>> + >>>> + ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K, >>>> + table_group->tce32_size, 1, &start_addr); >>>> + WARN_ON_ONCE(!ret && start_addr); >>>> + >>>> + return ret; >>>> +} >>>> + >>>> static long tce_iommu_ioctl(void *iommu_data, >>>> unsigned int cmd, unsigned long arg) >>>> { >>>> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data, >>>> VFIO_DMA_MAP_FLAG_WRITE)) >>>> return -EINVAL; >>>> >>>> + if (container->def_window_pending) { >>>> + ret = tce_iommu_create_default_window(container); >>>> + if (ret) >>>> + return ret; >>>> + container->def_window_pending = false; >>> >>> Would it make sense to clear (and maybe test) def_window_pending >>> within create_default_window()? >> >> Dunno, matter of taste I suppose. I'll move it there. >> >> >>> >>>> + } >>>> + >>>> num = tce_iommu_find_table(container, param.iova, &tbl); >>>> if (num < 0) >>>> return -ENXIO; >>>> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data, >>>> if (param.flags) >>>> return -EINVAL; >>>> >>>> + if (container->def_window_pending) { >>>> + ret = tce_iommu_create_default_window(container); >>>> + if (ret) >>>> + return ret; >>>> + container->def_window_pending = false; >>>> + } >>>> + >>>> num = tce_iommu_find_table(container, param.iova, &tbl); >>>> if (num < 0) >>>> return -ENXIO; >>>> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data, >>>> >>>> mutex_lock(&container->lock); >>>> >>>> + container->def_window_pending = false; >>> >>> Uh.. why is it cleared here, without calling >>> tce_iommu_create_default_window() AFAICT? >> >> >> It is a branch which creates new window, if we do not have a default >> window, then it will be created as the result of this ioctl(), if there is >> a default window, then the flag should be false already. > > Um.. it will create *a* window, but not necessarily the default one. > Consider this scenario: > > 1. Container is opened > 2. A group is attached > 3. Userspace, expecting the default window to be in place, creates a > second window > 4. Mapping starts > > Won't the above code mean that we create what userspace expected to be > the second window as the first window replacing the default one > instead? Uff. Yes, this is correct. This is a bug in my initial design - I should not have created a "default" window in the first place - it does not make sense at this level; and the userspace (i.e. QEMU) removes all windows on reset anyway. The only way QEMU will use this default window is when it is at 318f67ce1 "vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)" - memory preregistration is there, but ddw is not yet but it is 2 patches later where vfio_connect_container() unconditionally removes the default window. Which is unlikely scenario. Taking into account small number of DDW users (basically - QEMU v2.7.0), does it still make sense to keep this default window code? The only immediate problem I see here is QEMU will receive error from vfio_spapr_remove_window() in vfio_connect_container() as there will be no default window but I could workaround this. > >> >> >> >> >>> >>>> ret = tce_iommu_create_window(container, create.page_shift, >>>> create.window_size, create.levels, >>>> &create.start_addr); >>>> @@ -1030,6 +1061,12 @@ static long tce_iommu_ioctl(void *iommu_data, >>>> if (remove.flags) >>>> return -EINVAL; >>>> >>>> + if (container->def_window_pending && !remove.start_addr) { >>>> + container->def_window_pending = false; >>>> + return 0; >>>> + } >>>> + container->def_window_pending = false; >>>> + >>>> mutex_lock(&container->lock); >>>> >>>> ret = tce_iommu_remove_window(container, remove.start_addr); >>>> @@ -1109,9 +1146,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, >>>> static long tce_iommu_take_ownership_ddw(struct tce_container *container, >>>> struct iommu_table_group *table_group) >>>> { >>>> - long i, ret = 0; >>>> - struct iommu_table *tbl = NULL; >>>> - >>>> if (!table_group->ops->create_table || !table_group->ops->set_window || >>>> !table_group->ops->release_ownership) { >>>> WARN_ON_ONCE(1); >>>> @@ -1120,47 +1154,9 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, >>>> >>>> table_group->ops->take_ownership(table_group); >>>> >>>> - /* >>>> - * If it the first group attached, check if there is >>>> - * a default DMA window and create one if none as >>>> - * the userspace expects it to exist. >>>> - */ >>>> - if (!tce_groups_attached(container) && !container->tables[0]) { >>>> - ret = tce_iommu_create_table(container, >>>> - table_group, >>>> - 0, /* window number */ >>>> - IOMMU_PAGE_SHIFT_4K, >>>> - table_group->tce32_size, >>>> - 1, /* default levels */ >>>> - &tbl); >>>> - if (ret) >>>> - goto release_exit; >>>> - else >>>> - container->tables[0] = tbl; >>>> - } >>>> - >>>> - /* Set all windows to the new group */ >>>> - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { >>>> - tbl = container->tables[i]; >>>> - >>>> - if (!tbl) >>>> - continue; >>>> - >>>> - /* Set the default window to a new group */ >>>> - ret = table_group->ops->set_window(table_group, i, tbl); >>> >>> Uh... nothing in the new code seems to replace these set_window (and >>> unset_window) calls. What's up with that? >> >> >> tce_iommu_create_table() + set_window() is replaced with >> tce_iommu_create_default_window() which calls tce_iommu_create_window() >> which calls tce_iommu_create_table() + set_window(). >> >> I'll split this patch into 2 patches, one with the change I just explained >> and one to postpone the default window creation. > > Ah, right, I see. > > >> >> >>> >>>> - if (ret) >>>> - goto release_exit; >>>> - } >>>> + container->def_window_pending = true; >>>> >>>> return 0; >>>> - >>>> -release_exit: >>>> - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) >>>> - table_group->ops->unset_window(table_group, i); >>>> - >>>> - table_group->ops->release_ownership(table_group); >>>> - >>>> - return ret; >>>> } >>>> >>>> static int tce_iommu_attach_group(void *iommu_data, >>> >> >> > > > > -- Alexey
Attachment:
signature.asc
Description: OpenPGP digital signature