On Tue, Nov 05, 2013 at 04:24:58PM +0800, Yijing Wang wrote: > Currently, DMAR driver save target pci devices pointers for drhd/rmrr/atsr > in (pci_dev *) array. This is not safe, because pci devices maybe > hot added or removed during system running. They will have new pci_dev * > pointer. So if there have two IOMMUs or more in system, these devices > will find a wrong drhd during DMA mapping. And DMAR faults will occur. > This patch save pci device id insted of (pci_dev *) to fix this issue, > Because DMAR table just provide pci device id under a specific IOMMU, > so there is no reason to bind IOMMU with the (pci_dev *). Other, here > use list to manage devices' id for IOMMU, we can easily use list helper > to manage device id. > > after remove and rescan a pci device > [ 611.857095] dmar: DRHD: handling fault status reg 2 > [ 611.857109] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff7000 > [ 611.857109] DMAR:[fault reason 02] Present bit in context entry is clear > [ 611.857524] dmar: DRHD: handling fault status reg 102 > [ 611.857534] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff6000 > [ 611.857534] DMAR:[fault reason 02] Present bit in context entry is clear > [ 611.857936] dmar: DRHD: handling fault status reg 202 > [ 611.857947] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff5000 > [ 611.857947] DMAR:[fault reason 02] Present bit in context entry is clear > [ 611.858351] dmar: DRHD: handling fault status reg 302 > [ 611.858362] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff4000 > [ 611.858362] DMAR:[fault reason 02] Present bit in context entry is clear > [ 611.860819] IPv6: ADDRCONF(NETDEV_UP): eth3: link is not ready > [ 611.860983] dmar: DRHD: handling fault status reg 402 > [ 611.860995] dmar: INTR-REMAP: Request device [[86:00.3] fault index a4 > [ 611.860995] INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear > > Signed-off-by: Yijing Wang <wangyijing@xxxxxxxxxx> > --- > drivers/iommu/dmar.c | 93 +++++++++++++------------- > drivers/iommu/intel-iommu.c | 155 ++++++++++++++++++++++++++++--------------- > include/linux/dmar.h | 20 ++++-- > 3 files changed, 159 insertions(+), 109 deletions(-) > > diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c > index 785675a..9aa65a3 100644 > --- a/drivers/iommu/dmar.c > +++ b/drivers/iommu/dmar.c > @@ -65,12 +65,13 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) > } > > static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, > - struct pci_dev **dev, u16 segment) > + u16 segment, struct list_head *head) > { > struct pci_bus *bus; > struct pci_dev *pdev = NULL; > struct acpi_dmar_pci_path *path; > int count; > + struct dmar_device *dmar_dev; > > bus = pci_find_bus(segment, scope->bus); > path = (struct acpi_dmar_pci_path *)(scope + 1); > @@ -100,7 +101,6 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, > if (!pdev) { > pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", > segment, scope->bus, path->dev, path->fn); > - *dev = NULL; > return 0; > } > if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ > @@ -111,54 +111,39 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, > pci_name(pdev)); > return -EINVAL; > } > - *dev = pdev; > + > + dmar_dev = kzalloc(sizeof(struct dmar_device), GFP_KERNEL); > + if (!dmar_dev) { > + pci_dev_put(pdev); > + return -ENOMEM; > + } > + > + dmar_dev->segment = segment; > + dmar_dev->bus = pdev->bus->number; > + dmar_dev->devfn = pdev->devfn; > + list_add_tail(&dmar_dev->list, head); > + > + pci_dev_put(pdev); > return 0; > } > > -int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, > - struct pci_dev ***devices, u16 segment) > +int __init dmar_parse_dev_scope(void *start, void *end, u16 segment, > + struct list_head *head) > { > struct acpi_dmar_device_scope *scope; > - void * tmp = start; > - int index; > int ret; > > - *cnt = 0; > - while (start < end) { > - scope = start; > - if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || > - scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) > - (*cnt)++; > - else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && > - scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) { > - pr_warn("Unsupported device scope\n"); > - } > - start += scope->length; > - } > - if (*cnt == 0) > - return 0; > - > - *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); > - if (!*devices) > - return -ENOMEM; > - > - start = tmp; > - index = 0; > while (start < end) { > scope = start; > if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || > scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { > - ret = dmar_parse_one_dev_scope(scope, > - &(*devices)[index], segment); > - if (ret) { > - kfree(*devices); > + ret = dmar_parse_one_dev_scope(scope, segment, head); > + if (ret) > return ret; > - } > - index ++; > } > start += scope->length; > } > - > + > return 0; > } > > @@ -183,6 +168,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) > dmaru->reg_base_addr = drhd->address; > dmaru->segment = drhd->segment; > dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ > + INIT_LIST_HEAD(&dmaru->head); > > ret = alloc_iommu(dmaru); > if (ret) { > @@ -193,6 +179,19 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) > return 0; > } > > +static void drhd_free(struct dmar_drhd_unit *dmaru) > +{ > + struct dmar_device *dev, *tmp; > + > + list_for_each_entry_safe(dev, tmp, &dmaru->head, > + list) { > + list_del(&dev->list); > + kfree(dev); > + } > + > + kfree(dmaru); > +} > + > static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) > { > struct acpi_dmar_hardware_unit *drhd; > @@ -205,11 +204,10 @@ static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) > > ret = dmar_parse_dev_scope((void *)(drhd + 1), > ((void *)drhd) + drhd->header.length, > - &dmaru->devices_cnt, &dmaru->devices, > - drhd->segment); > + drhd->segment, &dmaru->head); > if (ret) { > list_del(&dmaru->list); > - kfree(dmaru); > + drhd_free(dmaru); > } > return ret; > } > @@ -378,16 +376,18 @@ parse_dmar_table(void) > return ret; > } > > -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, > - struct pci_dev *dev) > +static int dmar_pci_device_match(struct pci_dev *dev, > + struct list_head *head) > { > - int index; > + struct dmar_device *dmar_dev; > > while (dev) { > - for (index = 0; index < cnt; index++) > - if (dev == devices[index]) > - return 1; > - > + list_for_each_entry(dmar_dev, head, list) > + if (dmar_dev->segment == pci_domain_nr(dev->bus) > + && dmar_dev->bus == dev->bus->number > + && dmar_dev->devfn == dev->devfn) > + return 1; > + > /* Check our parent */ > dev = dev->bus->self; You didn't change this, but it looks like this may have the same problem we've been talking about here: http://lkml.kernel.org/r/20131105232903.3790.8738.stgit@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Namely, if "dev" is a VF on a virtual bus, "dev->bus->self == NULL", so we won't search for any of the bridges leading to the VF. I proposed a pci_upstream_bridge() interface that could be used like this: /* Check our parent */ dev = pci_upstream_bridge(dev); > } > @@ -412,8 +412,7 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) > drhd->segment == pci_domain_nr(dev->bus)) > return dmaru; > > - if (dmar_pci_device_match(dmaru->devices, > - dmaru->devices_cnt, dev)) > + if (dmar_pci_device_match(dev, &dmaru->head)) > return dmaru; > } > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 15e9b57..b33fe0e 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -650,7 +650,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) > static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) > { > struct dmar_drhd_unit *drhd = NULL; > - int i; > + struct dmar_device *dmar_dev; > + struct pci_dev *pdev; > > for_each_drhd_unit(drhd) { > if (drhd->ignored) > @@ -658,16 +659,22 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) > if (segment != drhd->segment) > continue; > > - for (i = 0; i < drhd->devices_cnt; i++) { > - if (drhd->devices[i] && > - drhd->devices[i]->bus->number == bus && > - drhd->devices[i]->devfn == devfn) > - return drhd->iommu; > - if (drhd->devices[i] && > - drhd->devices[i]->subordinate && > - drhd->devices[i]->subordinate->number <= bus && > - drhd->devices[i]->subordinate->busn_res.end >= bus) > - return drhd->iommu; > + list_for_each_entry(dmar_dev, &drhd->head, list) { > + if (dmar_dev->bus == bus && > + dmar_dev->devfn == devfn) > + return drhd->iommu; > + > + pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, > + dmar_dev->bus, dmar_dev->devfn); > + if (pdev->subordinate && > + pdev->subordinate->number <= bus && > + pdev->subordinate->busn_res.end >= bus) { > + pci_dev_put(pdev); > + return drhd->iommu; I don't know the details of how device_to_iommu() is used, but this style (acquire ref to pci_dev, match it to some other object, drop pci_dev ref, return object) makes me nervous. How do we know the caller isn't depending on pci_dev to remain attached to the object? What happens if the pci_dev disappears when we do the pci_dev_put() here? > + } > + > + if (pdev) > + pci_dev_put(pdev); > } > > if (drhd->include_all) > @@ -2331,18 +2338,20 @@ static int domain_add_dev_info(struct dmar_domain *domain, > static bool device_has_rmrr(struct pci_dev *dev) > { > struct dmar_rmrr_unit *rmrr; > - int i; > + struct dmar_device *dmar_dev; > > for_each_rmrr_units(rmrr) { > - for (i = 0; i < rmrr->devices_cnt; i++) { > - /* > - * Return TRUE if this RMRR contains the device that > - * is passed in. > - */ > - if (rmrr->devices[i] == dev) > - return true; > - } > + list_for_each_entry(dmar_dev, &rmrr->head, list) > + /* > + * Return TRUE if this RMRR contains the device that > + * is passed in. > + */ > + if (dmar_dev->segment == pci_domain_nr(dev->bus) && > + dmar_dev->bus == dev->bus->number && > + dmar_dev->devfn == dev->devfn) > + return true; > } > + > return false; > } > > @@ -2451,7 +2460,7 @@ static int __init init_dmars(void) > struct dmar_rmrr_unit *rmrr; > struct pci_dev *pdev; > struct intel_iommu *iommu; > - int i, ret; > + int ret; > > /* > * for each drhd > @@ -2605,8 +2614,10 @@ static int __init init_dmars(void) > */ > printk(KERN_INFO "IOMMU: Setting RMRR:\n"); > for_each_rmrr_units(rmrr) { > - for (i = 0; i < rmrr->devices_cnt; i++) { > - pdev = rmrr->devices[i]; > + struct dmar_device *dmar_dev; > + list_for_each_entry(dmar_dev, &rmrr->head, list) { > + pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, > + dmar_dev->bus, dmar_dev->devfn); > /* > * some BIOS lists non-exist devices in DMAR > * table. > @@ -2615,9 +2626,11 @@ static int __init init_dmars(void) > continue; > ret = iommu_prepare_rmrr_dev(rmrr, pdev); > if (ret) > - printk(KERN_ERR > - "IOMMU: mapping reserved region failed\n"); > - } > + printk(KERN_ERR > + "IOMMU: mapping reserved region failed\n"); > + > + pci_dev_put(pdev); > + } > } > > iommu_prepare_isa(); > @@ -3301,30 +3314,30 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir > static void __init init_no_remapping_devices(void) > { > struct dmar_drhd_unit *drhd; > + struct dmar_device *dmar_dev; > + struct pci_dev *pdev = NULL; > > - for_each_drhd_unit(drhd) { > - if (!drhd->include_all) { > - int i; > - for (i = 0; i < drhd->devices_cnt; i++) > - if (drhd->devices[i] != NULL) > - break; > + for_each_drhd_unit(drhd) > + if (!drhd->include_all) > /* ignore DMAR unit if no pci devices exist */ > - if (i == drhd->devices_cnt) > + if (list_empty(&drhd->head)) > drhd->ignored = 1; > - } > - } > - > + > for_each_drhd_unit(drhd) { > - int i; > if (drhd->ignored || drhd->include_all) > continue; > > - for (i = 0; i < drhd->devices_cnt; i++) > - if (drhd->devices[i] && > - !IS_GFX_DEVICE(drhd->devices[i])) > + list_for_each_entry(dmar_dev, &drhd->head, list) { > + pdev = pci_get_domain_bus_and_slot(dmar_dev->segment, > + dmar_dev->bus, dmar_dev->devfn); > + if (!IS_GFX_DEVICE(pdev)) { > + pci_dev_put(pdev); > break; > + } > + pci_dev_put(pdev); > + } > > - if (i < drhd->devices_cnt) > + if (!IS_GFX_DEVICE(pdev)) I think this is clearly wrong. You acquire a pdev reference, drop the reference, then look at pdev again after dropping the reference. But as soon as you do the pci_dev_put(), you have to assume pdev is no longer valid. > continue; > > /* This IOMMU has *only* gfx devices. Either bypass it or > @@ -3333,10 +3346,15 @@ static void __init init_no_remapping_devices(void) > intel_iommu_gfx_mapped = 1; > } else { > drhd->ignored = 1; > - for (i = 0; i < drhd->devices_cnt; i++) { > - if (!drhd->devices[i]) > + list_for_each_entry(dmar_dev, &drhd->head, list) { > + pdev = pci_get_domain_bus_and_slot( > + dmar_dev->segment, > + dmar_dev->bus, > + dmar_dev->devfn); > + if (!pdev) > continue; > - drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; > + pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; > + pci_dev_put(pdev); > } > } > } > @@ -3501,11 +3519,25 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) > rmrr = (struct acpi_dmar_reserved_memory *)header; > rmrru->base_address = rmrr->base_address; > rmrru->end_address = rmrr->end_address; > + INIT_LIST_HEAD(&rmrru->head); > > dmar_register_rmrr_unit(rmrru); > return 0; > } > > +static void rmrr_free(struct dmar_rmrr_unit *rmrru) > +{ > + struct dmar_device *dev, *tmp; > + > + list_for_each_entry_safe(dev, tmp, &rmrru->head, > + list) { > + list_del(&dev->list); > + kfree(dev); > + } > + > + kfree(rmrru); > +} > + > static int __init > rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) > { > @@ -3515,11 +3547,11 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) > rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; > ret = dmar_parse_dev_scope((void *)(rmrr + 1), > ((void *)rmrr) + rmrr->header.length, > - &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); > + rmrr->segment, &rmrru->head); > > - if (ret || (rmrru->devices_cnt == 0)) { > + if (ret || list_empty(&rmrru->head)) { > list_del(&rmrru->list); > - kfree(rmrru); > + rmrr_free(rmrru); > } > return ret; > } > @@ -3538,12 +3570,25 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) > > atsru->hdr = hdr; > atsru->include_all = atsr->flags & 0x1; > + INIT_LIST_HEAD(&atsru->head); > > list_add(&atsru->list, &dmar_atsr_units); > > return 0; > } > > +static void atsr_free(struct dmar_atsr_unit *atsr) > +{ > + struct dmar_device *dev, *tmp; > + > + list_for_each_entry_safe(dev, tmp, &atsr->head, list) { > + list_del(&dev->list); > + kfree(dev); > + } > + > + kfree(atsr); > +} > + > static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) > { > int rc; > @@ -3555,11 +3600,10 @@ static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) > atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); > rc = dmar_parse_dev_scope((void *)(atsr + 1), > (void *)atsr + atsr->header.length, > - &atsru->devices_cnt, &atsru->devices, > - atsr->segment); > - if (rc || !atsru->devices_cnt) { > + atsr->segment, &atsru->head); > + if (rc || list_empty(&atsru->head)) { > list_del(&atsru->list); > - kfree(atsru); > + atsr_free(atsru); > } > > return rc; > @@ -3567,7 +3611,6 @@ static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) > > int dmar_find_matched_atsr_unit(struct pci_dev *dev) > { > - int i; > struct pci_bus *bus; > struct acpi_dmar_atsr *atsr; > struct dmar_atsr_unit *atsru; > @@ -3584,6 +3627,7 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) > > found: > for (bus = dev->bus; bus; bus = bus->parent) { > + struct dmar_device *dmar_dev; > struct pci_dev *bridge = bus->self; > > if (!bridge || !pci_is_pcie(bridge) || > @@ -3591,8 +3635,11 @@ found: > return 0; > > if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) { > - for (i = 0; i < atsru->devices_cnt; i++) > - if (atsru->devices[i] == bridge) > + list_for_each_entry(dmar_dev, &atsru->head, list) > + if (dmar_dev->segment == > + pci_domain_nr(bridge->bus) && > + dmar_dev->bus == bridge->bus->number && > + dmar_dev->devfn == bridge->devfn) > return 1; > break; > } > diff --git a/include/linux/dmar.h b/include/linux/dmar.h > index b029d1a..5317cb0 100644 > --- a/include/linux/dmar.h > +++ b/include/linux/dmar.h > @@ -32,6 +32,13 @@ struct acpi_dmar_header; > #define DMAR_INTR_REMAP 0x1 > #define DMAR_X2APIC_OPT_OUT 0x2 > > +struct dmar_device { > + struct list_head list; > + u8 segment; I think this should be u16. I didn't chase down how you're using it, but Table 8.3 in the Intel VT-d spec shows Segment Number in a DRHD structure as 16 bits. > + u8 bus; > + u8 devfn; > +}; > + > struct intel_iommu; > #ifdef CONFIG_DMAR_TABLE > extern struct acpi_table_header *dmar_tbl; > @@ -39,8 +46,7 @@ struct dmar_drhd_unit { > struct list_head list; /* list of drhd units */ > struct acpi_dmar_header *hdr; /* ACPI header */ > u64 reg_base_addr; /* register base address*/ > - struct pci_dev **devices; /* target device array */ > - int devices_cnt; /* target device count */ > + struct list_head head; /* target devices' list */ s/devices'/device/ (also below). This is not a contraction or a possessive construct, so no apostrophe is needed. > u16 segment; /* PCI domain */ > u8 ignored:1; /* ignore drhd */ > u8 include_all:1; > @@ -139,8 +145,7 @@ struct dmar_rmrr_unit { > struct acpi_dmar_header *hdr; /* ACPI header */ > u64 base_address; /* reserved base address*/ > u64 end_address; /* reserved end address */ > - struct pci_dev **devices; /* target devices */ > - int devices_cnt; /* target device count */ > + struct list_head head; /* target devices' list */ > }; > > #define for_each_rmrr_units(rmrr) \ > @@ -149,16 +154,15 @@ struct dmar_rmrr_unit { > struct dmar_atsr_unit { > struct list_head list; /* list of ATSR units */ > struct acpi_dmar_header *hdr; /* ACPI header */ > - struct pci_dev **devices; /* target devices */ > - int devices_cnt; /* target device count */ > u8 include_all:1; /* include all ports */ > + struct list_head head; /* target devices' list */ > }; > > int dmar_parse_rmrr_atsr_dev(void); > extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); > extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); > -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, > - struct pci_dev ***devices, u16 segment); > +extern int dmar_parse_dev_scope(void *start, void *end, u16 segment, > + struct list_head *head); > extern int intel_iommu_init(void); > #else /* !CONFIG_INTEL_IOMMU: */ > static inline int intel_iommu_init(void) { return -ENODEV; } > -- > 1.7.1 > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html