At a high level, this code operates primarily during iommu initialization and device-driver initialization During intel-iommu hardware initialization: In intel_iommu_init(void) * If (This is the crash kernel) . Set flag: crashdump_accepting_active_iommu (all changes below check this) . Skip disabling the iommu hardware translations In init_dmars() * Duplicate the intel iommu translation tables from the old kernel in the new kernel . The root-entry table, all context-entry tables, and all page-translation-entry tables . The duplicate tables contain updated physical addresses to link them together. . The duplicate tables are mapped into kernel virtual addresses in the new kernel which allows most of the existing iommu code to operate without change. . Do some minimal sanity-checks during the copy . Place the address of the new root-entry structure into "struct intel_iommu" * Skip setting-up new domains for 'si', 'rmrr', 'isa' . Translations for 'rmrr' and 'isa' ranges have been copied from the old kernel . This patch has not yet been tested with iommu pass-through enabled * Existing (unchanged) code near the end of dmar_init: . Loads the address of the (now new) root-entry structure from "struct intel_iommu" into the iommu hardware and does the hardware flushes. This changes the active translation tables from the ones in the old kernel to the copies in the new kernel. . This is legal because the translations in the two sets of tables are currently identical: Virtualization Technology for Directed I/O. Architecture Specification, February 2011, Rev. 1.3 (section 11.2, paragraph 2) In iommu_init_domains() * Mark as in-use all domain-id's from the old kernel . In case the new kernel contains a device that was not in the old kernel and a new, unused domain-id is actually needed, the bitmap will give us one. When a new domain is created for a device: * If (this device has a context in the old kernel) . Get domain-id, address-width, and IOVA ranges from the old kernel context; . Get address(page-entry-tables) from the copy in the new kernel; . And apply all of the above values to the new domain structure. * Else . Create a new domain as normal v1->v2: Updated patch description Signed-off-by: Bill Sumner <bill.sumner at hp.com> --- drivers/iommu/intel-iommu.c | 272 +++++++++++++++++++++++++++++++++----------- 1 file changed, 204 insertions(+), 68 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 3b357e2..58f6d87 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -21,6 +21,8 @@ * Author: Fenghua Yu <fenghua.yu at intel.com> */ +#define DEBUG 1 /* TEMPORARY */ + #include <linux/init.h> #include <linux/bitmap.h> #include <linux/debugfs.h> @@ -1357,6 +1359,12 @@ static int iommu_init_domains(struct intel_iommu *iommu) */ if (cap_caching_mode(iommu->cap)) set_bit(0, iommu->domain_ids); + +#ifdef CONFIG_CRASH_DUMP + if (crashdump_accepting_active_iommu) + intel_iommu_get_dids_from_old_kernel(iommu); +#endif /* CONFIG_CRASH_DUMP */ + return 0; } @@ -1430,7 +1438,8 @@ static struct dmar_domain *alloc_domain(void) } static int iommu_attach_domain(struct dmar_domain *domain, - struct intel_iommu *iommu) + struct intel_iommu *iommu, + int domain_number) { int num; unsigned long ndomains; @@ -1440,12 +1449,15 @@ static int iommu_attach_domain(struct dmar_domain *domain, spin_lock_irqsave(&iommu->lock, flags); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { - spin_unlock_irqrestore(&iommu->lock, flags); - printk(KERN_ERR "IOMMU: no free domain ids\n"); - return -ENOMEM; - } + if (domain_number < 0) { + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + spin_unlock_irqrestore(&iommu->lock, flags); + printk(KERN_ERR "IOMMU: no free domain ids\n"); + return -ENOMEM; + } + } else + num = domain_number; domain->id = num; set_bit(num, iommu->domain_ids); @@ -2056,8 +2068,17 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) int bus = 0, devfn = 0; int segment; int ret; + int did = -1; /* Default to "no domain_id supplied" */ domain = find_domain(pdev); + +#ifdef CONFIG_CRASH_DUMP + if (domain) + if (pr_dbg.in_crashdump && crashdump_accepting_active_iommu) + pr_debug("IOMMU: Found domain (%d) for device %s\n", + domain->id, pci_name(pdev)); +#endif /* CONFIG_CRASH_DUMP */ + if (domain) return domain; @@ -2088,6 +2109,12 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } } +#ifdef CONFIG_CRASH_DUMP + if (pr_dbg.in_crashdump && crashdump_accepting_active_iommu) + pr_debug("IOMMU: Allocating new domain for device %s\n", + pci_name(pdev)); +#endif /* CONFIG_CRASH_DUMP */ + domain = alloc_domain(); if (!domain) goto error; @@ -2102,7 +2129,26 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } iommu = drhd->iommu; - ret = iommu_attach_domain(domain, iommu); +#ifdef CONFIG_CRASH_DUMP + /* See if this device had a did & gaw in the old kernel */ + if (crashdump_accepting_active_iommu) { + did = domain_get_did_from_old_kernel(iommu, pdev); + if (did > 0 || (did == 0 && !cap_caching_mode(iommu->cap))) { + ret = domain_get_gaw_from_old_kernel(iommu, pdev); + if (ret > 0) + gaw = ret; + else + did = -1; + } else + did = -1; + } + + if (pr_dbg.in_crashdump && crashdump_accepting_active_iommu) + pr_debug("IOMMU: new domain for device %s: gaw(%d) did(%d)\n", + pci_name(pdev), gaw, did); +#endif /* CONFIG_CRASH_DUMP */ + + ret = iommu_attach_domain(domain, iommu, did); if (ret) { free_domain_mem(domain); goto error; @@ -2113,6 +2159,23 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) goto error; } +#ifdef CONFIG_CRASH_DUMP + if (crashdump_accepting_active_iommu && did >= 0) { + u64 temp_pgd; /* Top page-translation-table */ + + domain_get_ranges_from_old_kernel(domain, iommu, pdev); + + temp_pgd = domain_get_pgd_from_old_kernel(iommu, pdev); + if (temp_pgd) { + if (domain->pgd) + free_pgtable_page(domain->pgd); + domain->pgd = (struct dma_pte *)temp_pgd; + } + pr_debug("IOMMU: New Domain for device %s Did:%d Pgd: 0x%12.12llx\n", + pci_name(pdev), did, temp_pgd); + } +#endif /* CONFIG_CRASH_DUMP */ + /* register pcie-to-pci device */ if (dev_tmp) { info = alloc_devinfo_mem(); @@ -2323,7 +2386,7 @@ static int __init si_domain_init(int hw) pr_debug("Identity mapping domain is domain %d\n", si_domain->id); for_each_active_iommu(iommu, drhd) { - ret = iommu_attach_domain(si_domain, iommu); + ret = iommu_attach_domain(si_domain, iommu, (int) -1); if (ret) { domain_exit(si_domain); return -EFAULT; @@ -2531,6 +2594,10 @@ static int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; +#ifdef CONFIG_CRASH_DUMP + struct root_entry *root_old_phys; + struct root_entry *root_new_virt; +#endif /* CONFIG_CRASH_DUMP */ /* * for each drhd @@ -2578,16 +2645,41 @@ static int __init init_dmars(void) if (ret) goto error; - /* - * TBD: - * we could share the same root & context tables - * among all IOMMU's. Need to Split it later. - */ - ret = iommu_alloc_root_entry(iommu); - if (ret) { - printk(KERN_ERR "IOMMU: allocate root entry failed\n"); - goto error; +#ifdef CONFIG_CRASH_DUMP + if (crashdump_accepting_active_iommu) { + print_intel_iommu_registers(drhd); + + pr_debug("Calling copy_intel_iommu_translation_tables\n"); + pr_debug("(lists tables in OLD KERNEL during copy)\n"); + ret = copy_intel_iommu_translation_tables(drhd, + &root_old_phys, &root_new_virt); + if (ret) { + pr_err("IOMMU: Copy translate tables failed\n"); + + /* Best to stop trying */ + crashdump_accepting_active_iommu = false; + goto error; + } + iommu->root_entry = root_new_virt; + pr_debug("IOMMU: root_new_virt:0x%12.12llx phys:0x%12.12llx\n", + (u64)root_new_virt, + virt_to_phys(root_new_virt)); + } else { +#endif /* CONFIG_CRASH_DUMP */ + /* + * TBD: + * we could share the same root & context tables + * among all IOMMU's. Need to Split it later. + */ + ret = iommu_alloc_root_entry(iommu); + if (ret) { + printk(KERN_ERR "IOMMU: allocate root entry failed\n"); + goto error; + } +#ifdef CONFIG_CRASH_DUMP } +#endif /* CONFIG_CRASH_DUMP */ + if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; } @@ -2656,50 +2748,69 @@ static int __init init_dmars(void) check_tylersburg_isoch(); - /* - * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa and may fall back to static - * identity mapping if iommu_identity_mapping is set. - */ - if (iommu_identity_mapping) { - ret = iommu_prepare_static_identity_mapping(hw_pass_through); - if (ret) { - printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); - goto error; +#ifdef CONFIG_CRASH_DUMP + if (!crashdump_accepting_active_iommu) { + /* Skip setting-up new domains for si, rmrr, and the isa bus + * on the expectation that these translations + * were copied from the old kernel. + * + * NOTE: Indented the existing code below because it is now + * conditional upon the 'if' statement above. + * This pushed many of the lines over 80 characters. + * Chose to leave them and live with the 'checkpatch' warnings + * about "over 80 characters". + */ +#endif /* CONFIG_CRASH_DUMP */ + /* + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa and may fall back to static + * identity mapping if iommu_identity_mapping is set. + */ + if (iommu_identity_mapping) { + ret = iommu_prepare_static_identity_mapping(hw_pass_through); + if (ret) { + printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); + goto error; + } } - } - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - printk(KERN_INFO "IOMMU: Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* - * some BIOS lists non-exist devices in DMAR - * table. - */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR - "IOMMU: mapping reserved region failed\n"); + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); + } } - } - iommu_prepare_isa(); + iommu_prepare_isa(); +#ifdef CONFIG_CRASH_DUMP + } else { + intel_iommu_translation_tables_are_mapped = true; + pr_debug("intel_iommu_translation_tables_are_mapped = true\n"); + } +#endif /* CONFIG_CRASH_DUMP */ /* * for each drhd @@ -2893,6 +3004,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); +#ifdef CONFIG_CRASH_DUMP + if (pr_dbg.in_crashdump && crashdump_accepting_active_iommu) + pr_debug("ENTER %s paddr(0x%12.12llx) size(0x%12.12lx)\n", + __func__, paddr, size); +#endif /* CONFIG_CRASH_DUMP */ + if (iommu_no_mapping(hwdev)) return paddr; @@ -2935,6 +3052,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; start_paddr += paddr & ~PAGE_MASK; + +#ifdef CONFIG_CRASH_DUMP + if (pr_dbg.in_crashdump && crashdump_accepting_active_iommu) + pr_debug("LEAVE %s dma_addr_t(0x%16.16llx)\n", + __func__, start_paddr); +#endif /* CONFIG_CRASH_DUMP */ return start_paddr; error: @@ -3754,19 +3877,32 @@ int __init intel_iommu_init(void) return -ENODEV; } +#ifdef CONFIG_CRASH_DUMP /* - * Disable translation if already enabled prior to OS handover. + * If (This is the crash kernel) + * Set: copy iommu translate tables from old kernel + * Skip disabling the iommu hardware translations */ - for_each_drhd_unit(drhd) { - struct intel_iommu *iommu; + if (is_kdump_kernel()) { + crashdump_accepting_active_iommu = true; + pr_info("IOMMU crashdump_accepting_active_iommu = true\n"); + pr_info("IOMMU Skip disabling iommu hardware translations\n"); + } else +#endif /* CONFIG_CRASH_DUMP */ + /* + * Disable translation if already enabled prior to OS handover. + */ + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu; - if (drhd->ignored) - continue; + if (drhd->ignored) + continue; + + iommu = drhd->iommu; + if (iommu->gcmd & DMA_GCMD_TE) + iommu_disable_translation(iommu); + } - iommu = drhd->iommu; - if (iommu->gcmd & DMA_GCMD_TE) - iommu_disable_translation(iommu); - } if (dmar_dev_scope_init() < 0) { if (force_on) -- Bill Sumner <bill.sumner at hp.com>