Hi Baolu, Liang Thanks for the reply. On Thu, Oct 10, 2024 at 4:01 PM Baolu Lu <baolu.lu@xxxxxxxxxxxxxxx> wrote: > > On 2024/10/10 21:25, Liang, Kan wrote: > > On 2024-10-10 6:10 a.m., Jinpu Wang wrote: > >> Hi Greg, > >> > >> > >> On Thu, Oct 10, 2024 at 11:31 AM Greg KH<gregkh@xxxxxxxxxxxxxxxxxxx> wrote: > >>> On Thu, Oct 10, 2024 at 11:13:42AM +0200, Jinpu Wang wrote: > >>>> Hi Greg, > >>>> > >>>> On Thu, Oct 10, 2024 at 11:07 AM Greg KH<gregkh@xxxxxxxxxxxxxxxxxxx> wrote: > >>>>> On Thu, Oct 10, 2024 at 09:31:37AM +0200, Jinpu Wang wrote: > >>>>>> Hello all, > >>>>>> > >>>>>> We are experiencing a boot hang issue when booting kernel version > >>>>>> 6.1.83+ on a Dell Inc. PowerEdge R770 equipped with an Intel Xeon > >>>>>> 6710E processor. After extensive testing and use of `git bisect`, we > >>>>>> have traced the issue to commit: > >>>>>> > >>>>>> `586e19c88a0c ("iommu/vt-d: Retrieve IOMMU perfmon capability information")` > >>>>>> > >>>>>> This commit appears to be part of a larger patchset, which can be found here: > >>>>>> [Patchset on lore.kernel.org](https://lore.kernel.org/ > >>>>>> lkml/7c4b3e4e-1c5d-04f1-1891-84f686c94736@xxxxxxxxxxxxxxx/T/) > >>>>>> > >>>>>> We attempted to boot with the `intel_iommu=off` option, but the system > >>>>>> hangs in the same manner. However, the system boots successfully after > >>>>>> disabling `CONFIG_INTEL_IOMMU_PERF_EVENTS`. > >>>>> Is there any error messages? Does the latest 6.6.y tree work properly? > >>>>> If so, why not just use that, no new hardware should be using older > >>>>> kernel trees anyway 🙂 > >>>> No error, just hang, I've removed "quiet" and added "debug". > >>>> Yes, the latest 6.6.y tree works for this, but there are other > >>>> problems/dependency we have to solve. > >>> Ok, that implies that we need to add some other patch to 6.1.y, OR we > >>> can revert it from 6.1.y. Let me know what you think is the better > >>> thing to do. > >>> > >> I think better to revert both: > >> 8c91a4bfc7f8 ("iommu: Fix compilation without CONFIG_IOMMU_INTEL") > > I'm not sure about this one. May need baolu's comments. > > I can't find this commit in the mainline kernel. I guess it fixes a > compilation issue in the stable tree? If so, it depends on whether the > issue is still there. > > Thanks, > baolu Both commits are hash from stable tree for linux-6.1.y branch. I tried only revert 586e19c88a0c ("iommu/vt-d: Retrieve IOMMU perfmon capability information") There is a minor conflict in the Makefile, but it's easy to fix. I attached the patch below, Greg please consider including it. Thx!
From 286b2d88de310423b2cda454f327b3fd3600b56d Mon Sep 17 00:00:00 2001 From: Jack Wang <jinpu.wang@xxxxxxxxx> Date: Fri, 11 Oct 2024 07:10:34 +0200 Subject: [PATCH] Revert "iommu/vt-d: Retrieve IOMMU perfmon capability information" This reverts commit 586e19c88a0cb58b6ff45ae085b3dd200d862153. This commit is pulled in due to dependency for: 8c91a4bfc7f8 ("iommu: Fix compilation without CONFIG_IOMMU_INTEL") But the patch itself is part of a patchset, should not only include one, and it lead to boot hang on on Kernel 6.1.83+ with Dell PowerEdge R770 and Intel Xeon 6710E, so revert it for stable 6.1.112 Signed-off-by: Jack Wang <jinpu.wang@xxxxxxxxx> --- drivers/iommu/intel/Kconfig | 11 --- drivers/iommu/intel/Makefile | 1 - drivers/iommu/intel/dmar.c | 7 -- drivers/iommu/intel/iommu.h | 43 +-------- drivers/iommu/intel/perfmon.c | 172 ---------------------------------- drivers/iommu/intel/perfmon.h | 40 -------- 6 files changed, 1 insertion(+), 273 deletions(-) delete mode 100644 drivers/iommu/intel/perfmon.c delete mode 100644 drivers/iommu/intel/perfmon.h diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 12e1e90fdae1..b7dff5092fd2 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,15 +96,4 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. -config INTEL_IOMMU_PERF_EVENTS - def_bool y - bool "Intel IOMMU performance events" - depends on INTEL_IOMMU && PERF_EVENTS - help - Selecting this option will enable the performance monitoring - infrastructure in the Intel IOMMU. It collects information about - key events occurring during operation of the remapping hardware, - to aid performance tuning and debug. These are available on modern - processors which support Intel VT-d 4.0 and later. - endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index 29d26a437132..1e592655759d 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -8,4 +8,3 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o ifdef CONFIG_INTEL_IOMMU obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o endif -obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 345c161ffb27..9c33f1d62ed9 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,7 +34,6 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" -#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -1105,9 +1104,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; - if (alloc_iommu_pmu(iommu)) - pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); - raw_spin_lock_init(&iommu->register_lock); /* @@ -1135,7 +1131,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: - free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1151,8 +1146,6 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_sysfs_remove(&iommu->iommu); } - free_iommu_pmu(iommu); - if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index c1348bedab3b..c99cb715bd9a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -125,11 +125,6 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 -#define DMAR_PERFCAP_REG 0x300 -#define DMAR_PERFCFGOFF_REG 0x310 -#define DMAR_PERFOVFOFF_REG 0x318 -#define DMAR_PERFCNTROFF_REG 0x31c -#define DMAR_PERFEVNTCAP_REG 0x380 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -153,7 +148,6 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) -#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -185,8 +179,7 @@ * Extended Capability Register */ -#define ecap_pms(e) (((e) >> 51) & 0x1) -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -217,22 +210,6 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ -/* - * Decoding Perf Capability Register - */ -#define pcap_num_cntr(p) ((p) & 0xffff) -#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) -#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) -#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) -#define pcap_interrupt(p) (((p) >> 50) & 0x1) -/* The counter stride is calculated as 2 ^ (x+10) bytes */ -#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) - -/* - * Decoding Perf Event Capability Register - */ -#define pecap_es(p) ((p) & 0xfffffff) - /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -584,22 +561,6 @@ struct dmar_domain { iommu core */ }; -struct iommu_pmu { - struct intel_iommu *iommu; - u32 num_cntr; /* Number of counters */ - u32 num_eg; /* Number of event group */ - u32 cntr_width; /* Counter width */ - u32 cntr_stride; /* Counter Stride */ - u32 filter; /* Bitmask of filter support */ - void __iomem *base; /* the PerfMon base address */ - void __iomem *cfg_reg; /* counter configuration base address */ - void __iomem *cntr_reg; /* counter 0 address*/ - void __iomem *overflow; /* overflow status register */ - - u64 *evcap; /* Indicates all supported events */ - u32 **cntr_evcap; /* Supported events of each counter. */ -}; - struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -647,8 +608,6 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; - - struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c deleted file mode 100644 index db5791a54455..000000000000 --- a/drivers/iommu/intel/perfmon.c +++ /dev/null @@ -1,172 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Support Intel IOMMU PerfMon - * Copyright(c) 2023 Intel Corporation. - */ -#define pr_fmt(fmt) "DMAR: " fmt -#define dev_fmt(fmt) pr_fmt(fmt) - -#include <linux/dmar.h> -#include "iommu.h" -#include "perfmon.h" - -static inline void __iomem * -get_perf_reg_address(struct intel_iommu *iommu, u32 offset) -{ - u32 off = dmar_readl(iommu->reg + offset); - - return iommu->reg + off; -} - -int alloc_iommu_pmu(struct intel_iommu *iommu) -{ - struct iommu_pmu *iommu_pmu; - int i, j, ret; - u64 perfcap; - u32 cap; - - if (!ecap_pms(iommu->ecap)) - return 0; - - /* The IOMMU PMU requires the ECMD support as well */ - if (!cap_ecmds(iommu->cap)) - return -ENODEV; - - perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); - /* The performance monitoring is not supported. */ - if (!perfcap) - return -ENODEV; - - /* Sanity check for the number of the counters and event groups */ - if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) - return -ENODEV; - - /* The interrupt on overflow is required */ - if (!pcap_interrupt(perfcap)) - return -ENODEV; - - iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); - if (!iommu_pmu) - return -ENOMEM; - - iommu_pmu->num_cntr = pcap_num_cntr(perfcap); - iommu_pmu->cntr_width = pcap_cntr_width(perfcap); - iommu_pmu->filter = pcap_filters_mask(perfcap); - iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); - iommu_pmu->num_eg = pcap_num_event_group(perfcap); - - iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); - if (!iommu_pmu->evcap) { - ret = -ENOMEM; - goto free_pmu; - } - - /* Parse event group capabilities */ - for (i = 0; i < iommu_pmu->num_eg; i++) { - u64 pcap; - - pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + - i * IOMMU_PMU_CAP_REGS_STEP); - iommu_pmu->evcap[i] = pecap_es(pcap); - } - - iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); - if (!iommu_pmu->cntr_evcap) { - ret = -ENOMEM; - goto free_pmu_evcap; - } - for (i = 0; i < iommu_pmu->num_cntr; i++) { - iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); - if (!iommu_pmu->cntr_evcap[i]) { - ret = -ENOMEM; - goto free_pmu_cntr_evcap; - } - /* - * Set to the global capabilities, will adjust according - * to per-counter capabilities later. - */ - for (j = 0; j < iommu_pmu->num_eg; j++) - iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; - } - - iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); - iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); - iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); - - /* - * Check per-counter capabilities. All counters should have the - * same capabilities on Interrupt on Overflow Support and Counter - * Width. - */ - for (i = 0; i < iommu_pmu->num_cntr; i++) { - cap = dmar_readl(iommu_pmu->cfg_reg + - i * IOMMU_PMU_CFG_OFFSET + - IOMMU_PMU_CFG_CNTRCAP_OFFSET); - if (!iommu_cntrcap_pcc(cap)) - continue; - - /* - * It's possible that some counters have a different - * capability because of e.g., HW bug. Check the corner - * case here and simply drop those counters. - */ - if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || - !iommu_cntrcap_ios(cap)) { - iommu_pmu->num_cntr = i; - pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", - iommu_pmu->num_cntr); - } - - /* Clear the pre-defined events group */ - for (j = 0; j < iommu_pmu->num_eg; j++) - iommu_pmu->cntr_evcap[i][j] = 0; - - /* Override with per-counter event capabilities */ - for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { - cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + - IOMMU_PMU_CFG_CNTREVCAP_OFFSET + - (j * IOMMU_PMU_OFF_REGS_STEP)); - iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); - /* - * Some events may only be supported by a specific counter. - * Track them in the evcap as well. - */ - iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); - } - } - - iommu_pmu->iommu = iommu; - iommu->pmu = iommu_pmu; - - return 0; - -free_pmu_cntr_evcap: - for (i = 0; i < iommu_pmu->num_cntr; i++) - kfree(iommu_pmu->cntr_evcap[i]); - kfree(iommu_pmu->cntr_evcap); -free_pmu_evcap: - kfree(iommu_pmu->evcap); -free_pmu: - kfree(iommu_pmu); - - return ret; -} - -void free_iommu_pmu(struct intel_iommu *iommu) -{ - struct iommu_pmu *iommu_pmu = iommu->pmu; - - if (!iommu_pmu) - return; - - if (iommu_pmu->evcap) { - int i; - - for (i = 0; i < iommu_pmu->num_cntr; i++) - kfree(iommu_pmu->cntr_evcap[i]); - kfree(iommu_pmu->cntr_evcap); - } - kfree(iommu_pmu->evcap); - kfree(iommu_pmu); - iommu->pmu = NULL; -} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h deleted file mode 100644 index 4b0d9c1fea6f..000000000000 --- a/drivers/iommu/intel/perfmon.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* - * PERFCFGOFF_REG, PERFFRZOFF_REG - * PERFOVFOFF_REG, PERFCNTROFF_REG - */ -#define IOMMU_PMU_NUM_OFF_REGS 4 -#define IOMMU_PMU_OFF_REGS_STEP 4 - -#define IOMMU_PMU_CFG_OFFSET 0x100 -#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 -#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 -#define IOMMU_PMU_CFG_SIZE 0x8 -#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 - -#define IOMMU_PMU_CAP_REGS_STEP 8 - -#define iommu_cntrcap_pcc(p) ((p) & 0x1) -#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) -#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) -#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) - -#define iommu_event_select(p) ((p) & 0xfffffff) -#define iommu_event_group(p) (((p) >> 28) & 0xf) - -#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS -int alloc_iommu_pmu(struct intel_iommu *iommu); -void free_iommu_pmu(struct intel_iommu *iommu); -#else -static inline int -alloc_iommu_pmu(struct intel_iommu *iommu) -{ - return 0; -} - -static inline void -free_iommu_pmu(struct intel_iommu *iommu) -{ -} -#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ -- 2.43.0