On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote: > > It doesn't mean that the S2 is globally shared across all the nesting > > translations (like ARM does), and you still have to iterate over every > > nesting DID. > > > > In light of that this design seems to have gone a bit off.. > > > > A domain should have a list of places that need invalidation, > > specifically a list of DIDs and ATCs that need an invalidation to be > > issued. > > > > Instead we now somehow have 4 different lists in the domain the > > invalidation code iterates over? > > > > So I would think this: > > > > struct dmar_domain { > > struct xarray iommu_array; /* Attached IOMMU array */ > > struct list_head devices; /* all devices' list */ > > struct list_head dev_pasids; /* all attached pasids */ > > struct list_head s1_domains; > > > > Would make sense to be collapsed into one logical list of attached > > devices: > > > > struct intel_iommu_domain_attachment { > > unsigned int did; > > ioasid_t pasid; > > struct device_domain_info *info; > > list_head item; > > }; > > > > When you attach a S1/S2 nest you allocate two of the above structs and > > one is linked on the S1 and one is linked on the S2.. > > yes, this shall work. ATC flushing should be fine. But there can be a > drawback when flushing DIDs. VT-d side, if there are multiple devices > behind the same iommu unit, just need one DID flushing is enough. But > after the above change, the number of DID flushing would increase per > the number of devices. Although no functional issue, but it submits > duplicated invalidation. At least the three server drivers all have this same problem, I would like to seem some core code to help solve it, since it is tricky and the RCU idea is good.. Collapsing invalidations can be done by sorting, I think this was Robin's suggestion. But we could also easially maintain multiple list threads hidden inside the API, or allocate a multi-level list. Something very approximately like this: Jason diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d14413916f93a0..7b2de139e7c437 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -38,6 +38,8 @@ #include "iommu-sva.h" +#include <linux/iommu-alist.h> + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); static DEFINE_IDA(iommu_global_pasid_ida); diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h new file mode 100644 index 00000000000000..7a9243f8b2b5f8 --- /dev/null +++ b/include/linux/iommu-alist.h @@ -0,0 +1,126 @@ +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/iommu.h> + +/* + * Place in an iommu_domain to keep track of what devices have been attached to + * it. + */ +struct iommu_attach_list { + spinlock_t lock; + struct list_head all; +}; + +/* + * Allocate for every time a device is attached to a domain + */ +struct iommu_attach_elm { + /* + * Note that this pointer is accessed under RCU, the driver has to be + * careful to rcu free it. + */ + struct iommu_device *iommu_device; + ioasid_t pasid; + u8 using_atc; + struct list_head all_item; + + /* May not be accessed under RCU! */ + struct device *device; +}; + +void iommu_attach_init(struct iommu_attach_list *alist) +{ + spin_lock_init(&alist->lock); +} + +int iommu_attach_add(struct iommu_attach_list *alist, + struct iommu_attach_elm *elm) +{ + struct list_head *insert_after; + + spin_lock(&alist->lock); + insert_after = list_find_sorted_insertion(alist, elm, cmp); + list_add_rcu(&elm->all_item, insert_after); + spin_unlock(&alist->lock); +} + +void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm) +{ + spin_lock(&alist->lock); + list_del_rcu(&elm->all_item); + spin_unlock(&alist->lock); + /* assumes 0 offset */ + kfree_rcu_mightsleep(elm); +} + +static struct iommu_attach_elm * +__iommu_attach_next_iommu(struct iommu_attach_list *alist, + struct iommu_attach_elm *pos, + struct iommu_attach_elm *last) +{ + struct iommu_attach_elm *next; + + do { + next = list_next_or_null_rcu(&alist->all, &pos->all_item, + struct iommu_attach_elm, all_item); + if (!next) + return NULL; + if (!last) + return next; + } while (pos->iommu_device == next->iommu_device); + return next; +} + +/* assumes 0 offset */ +#define iommu_attach_next_iommu(alist, pos, last, member) \ + container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \ + &(last)->member), \ + typeof(*pos), member) + +#define iommu_attach_for_each_iommu(pos, last, alist, member) \ + for (({ \ + RCU_LOCKDEP_WARN( \ + !rcu_read_lock_any_held(), \ + "RCU-list traversed in non-reader section!"); \ + }), \ + pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos), \ + member.all_item), \ + last = NULL; \ + pos; pos = iommu_attach_next_iommu(alist, pos, last, member)) + +/* Example */ +struct driver_domain { + struct iommu_domain domain; + struct iommu_attach_list alist; +}; + +struct driver_attach_elm { + struct iommu_attach_elm aelm; + unsigned int cache_tag; +}; + +static void example(struct driver_domain *domain) +{ + struct driver_attach_elm *elm; + struct driver_attach_elm *pos, *last; + bool need_atc; + + iommu_attach_init(&domain->alist); + + elm = kzalloc(sizeof(*elm), GFP_KERNEL); + iommu_attach_add(&domain->alist, &elm->aelm); + + rcu_read_lock(); + iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) { + invalidate_iommu(elm); + need_atc |= elm->aelm.using_atc; + } + if (need_atc) { + list_for_each_entry_rcu(pos, &domain->alist.all, + aelm.all_item) { + if (pos->aelm.using_atc) + invalidate_atc(elm); + } + } + rcu_read_unlock(); +}