--- arch/x86/Kconfig | 2 - arch/x86/include/asm/apic.h | 270 +++++--- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/hw_irq.h | 6 +- arch/x86/include/asm/io_apic.h | 2 + arch/x86/include/asm/irq.h | 4 + arch/x86/include/asm/irq_vectors.h | 8 +- arch/x86/include/asm/irqdomain.h | 1 - arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/trace/irq_vectors.h | 248 ------- arch/x86/include/asm/x2apic.h | 50 ++ arch/x86/include/asm/x86_init.h | 2 - arch/x86/kernel/apic/Makefile | 2 +- arch/x86/kernel/apic/apic.c | 239 ++++--- arch/x86/kernel/apic/apic_common.c | 46 -- arch/x86/kernel/apic/apic_flat_64.c | 10 +- arch/x86/kernel/apic/apic_noop.c | 25 +- arch/x86/kernel/apic/apic_numachip.c | 12 +- arch/x86/kernel/apic/bigsmp_32.c | 8 +- arch/x86/kernel/apic/io_apic.c | 130 ++-- arch/x86/kernel/apic/probe_32.c | 29 +- arch/x86/kernel/apic/vector.c | 1099 ++++++++++++------------------ arch/x86/kernel/apic/x2apic.h | 9 - arch/x86/kernel/apic/x2apic_cluster.c | 196 ++++-- arch/x86/kernel/apic/x2apic_phys.c | 44 +- arch/x86/kernel/apic/x2apic_uv_x.c | 17 +- arch/x86/kernel/i8259.c | 1 - arch/x86/kernel/idt.c | 12 +- arch/x86/kernel/irq.c | 101 ++- arch/x86/kernel/irqinit.c | 4 +- arch/x86/kernel/setup.c | 12 + arch/x86/kernel/smpboot.c | 99 ++- arch/x86/kernel/time.c | 5 - arch/x86/kernel/traps.c | 2 +- arch/x86/kernel/vsmp_64.c | 19 + arch/x86/kernel/x86_init.c | 1 - arch/x86/xen/apic.c | 6 +- arch/x86/xen/enlighten_pv.c | 1 - drivers/iommu/amd_iommu.c | 39 +- drivers/iommu/intel_irq_remapping.c | 38 +- drivers/pci/msi.c | 2 - init/main.c | 2 +- kernel/irq/Kconfig | 3 - 43 files changed, 1317 insertions(+), 1493 deletions(-) create mode 100644 arch/x86/include/asm/x2apic.h delete mode 100644 arch/x86/kernel/apic/apic_common.c delete mode 100644 arch/x86/kernel/apic/x2apic.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8eed3f94bfc7..1a060afd9913 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,10 +93,8 @@ config X86 select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP - select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC select GENERIC_IRQ_MIGRATION if SMP select GENERIC_IRQ_PROBE - select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP select GENERIC_SMP_IDLE_THREAD diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9e57f08bfa6..5f01671c68f2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,15 +53,6 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_frequency; -extern enum apic_intr_mode_id apic_intr_mode; -enum apic_intr_mode_id { - APIC_PIC, - APIC_VIRTUAL_WIRE, - APIC_VIRTUAL_WIRE_NO_CONFIG, - APIC_SYMMETRIC_IO, - APIC_SYMMETRIC_IO_NO_ROUTING -}; - #ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); #else /* CONFIG_SMP */ @@ -136,13 +127,14 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); -extern void apic_intr_mode_init(void); +extern void init_bsp_APIC(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern void lapic_update_tsc_freq(void); +extern int APIC_init_uniprocessor(void); #ifdef CONFIG_X86_64 static inline int apic_force_enable(unsigned long addr) @@ -153,7 +145,7 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern void apic_bsp_setup(bool upmode); +extern int apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -169,10 +161,6 @@ static inline int apic_is_clustered_box(void) #endif extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); -extern void lapic_assign_system_vectors(void); -extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); -extern void lapic_online(void); -extern void lapic_offline(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -182,9 +170,6 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } -static inline void apic_intr_mode_init(void) { } -static inline void lapic_assign_system_vectors(void) { } -static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC @@ -280,63 +265,73 @@ struct irq_data; * James Cleverdon. */ struct apic { - /* Hotpath functions first */ - void (*eoi_write)(u32 reg, u32 v); - void (*native_eoi_write)(u32 reg, u32 v); - void (*write)(u32 reg, u32 v); - u32 (*read)(u32 reg); - - /* IPI related functions */ - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); - - void (*send_IPI)(int cpu, int vector); - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); - - /* dest_logical is used by the IPI functions */ - u32 dest_logical; - u32 disable_esr; - u32 irq_delivery_mode; - u32 irq_dest_mode; - - /* Functions and data related to vector allocation */ - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); - u32 (*calc_dest_apicid)(unsigned int cpu); - - /* ICR related functions */ - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - - /* Probe, setup and smpboot functions */ - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(int apicid); - int (*apic_id_registered)(void); - - bool (*check_apicid_used)(physid_mask_t *map, int apicid); - void (*init_apic_ldr)(void); - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - void (*setup_apic_routing)(void); - int (*cpu_present_to_apicid)(int mps_cpu); - void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - int (*check_phys_apicid_present)(int phys_apicid); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); - - u32 (*get_apic_id)(unsigned long x); - u32 (*set_apic_id)(unsigned int id); + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(int apicid); + int (*apic_id_registered)(void); + + u32 irq_delivery_mode; + u32 irq_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int disable_esr; + + int dest_logical; + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, + const struct cpumask *mask); + void (*init_apic_ldr)(void); + + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + + void (*setup_apic_routing)(void); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + int (*check_phys_apicid_present)(int phys_apicid); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + unsigned int (*get_apic_id)(unsigned long x); + /* Can't be NULL on 64-bit */ + unsigned long (*set_apic_id)(unsigned int id); + + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); + + /* ipi */ + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); - void (*inquire_remote_apic)(int apicid); + void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + /* + * ->eoi_write() has the same signature as ->write(). + * + * Drivers can support both ->eoi_write() and ->write() by passing the same + * callback value. Kernel can override ->eoi_write() and fall back + * on write for EOI. + */ + void (*eoi_write)(u32 reg, u32 v); + void (*native_eoi_write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); #ifdef CONFIG_X86_32 /* @@ -351,7 +346,6 @@ struct apic { */ int (*x86_32_early_logical_apicid)(int cpu); #endif - char *name; }; /* @@ -386,7 +380,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; */ #ifdef CONFIG_SMP extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int lapic_can_unplug_cpu(void); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -470,33 +463,84 @@ static inline unsigned default_get_apic_id(unsigned long x) extern void apic_send_IPI_self(int vector); DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); #endif extern void generic_bigsmp_probe(void); + #ifdef CONFIG_X86_LOCAL_APIC #include <asm/smp.h> #define APIC_DFR_VALUE (APIC_DFR_FLAT) +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +static inline const struct cpumask *online_target_cpus(void) +{ + return cpu_online_mask; +} + DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); -extern struct apic apic_noop; static inline unsigned int read_apic_id(void) { - unsigned int reg = apic_read(APIC_ID); + unsigned int reg; + + reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } -extern int default_apic_id_valid(int apicid); +static inline int default_apic_id_valid(int apicid) +{ + return (apicid < 255); +} + extern int default_acpi_madt_oem_check(char *, char *); + extern void default_setup_apic_routing(void); -extern u32 apic_default_calc_apicid(unsigned int cpu); -extern u32 apic_flat_calc_apicid(unsigned int cpu); +extern struct apic apic_noop; + +#ifdef CONFIG_X86_32 + +static inline int noop_x86_32_early_logical_apicid(int cpu) +{ + return BAD_APICID; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, @@ -504,17 +548,71 @@ extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, struct irq_data *irqdata, unsigned int *apicid); -extern bool default_check_apicid_used(physid_mask_t *map, int apicid); -extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); -extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); -extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); + +static inline void +flat_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; +} + +static inline void +default_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + cpumask_copy(retmask, cpumask_of(cpu)); +} + +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) +{ + *retmap = *phys_map; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int phys_apicid) +{ + return physid_isset(phys_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int phys_apicid) +{ + return __default_check_phys_apicid_present(phys_apicid); +} +#else extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); +#endif #endif /* CONFIG_X86_LOCAL_APIC */ - extern void irq_enter(void); extern void irq_exit(void); diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 4011cb03ef08..0a3e808b9123 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -393,7 +393,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); -extern unsigned long system_vectors[]; +extern unsigned long used_vectors[]; #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 2851077b6051..16d48a18c069 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -16,8 +16,6 @@ #include <asm/irq_vectors.h> -#define IRQ_MATRIX_BITS NR_VECTORS - #ifndef __ASSEMBLY__ #include <linux/percpu.h> @@ -117,13 +115,15 @@ struct irq_alloc_info { struct irq_cfg { unsigned int dest_apicid; - unsigned int vector; + u8 vector; + u8 old_vector; }; extern struct irq_cfg *irq_cfg(unsigned int irq); extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); +extern void setup_vector_irq(int cpu); #ifdef CONFIG_SMP extern void send_cleanup_vector(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a8834dd546cd..5c27e146a166 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,6 +193,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); +extern void setup_ioapic_dest(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -232,6 +233,7 @@ static inline void io_apic_init_mappings(void) { } static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } +static inline void setup_ioapic_dest(void) { } #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 2395bb794c7b..d8632f8fa17d 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -26,7 +26,11 @@ extern void irq_ctx_init(int cpu); struct irq_desc; +#ifdef CONFIG_HOTPLUG_CPU +#include <linux/cpumask.h> +extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); +#endif #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 67421f649cfa..c20ffca8fef1 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,8 +102,12 @@ #define POSTED_INTR_NESTED_VECTOR 0xf0 #endif -#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef -#define LOCAL_TIMER_VECTOR 0xee +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7..4e5638539846 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -9,7 +9,6 @@ enum { /* Allocate contiguous CPU vectors */ X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, - X86_IRQ_ALLOC_LEGACY = 0x2, }; extern struct irq_domain *x86_vector_domain; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c34..7233445a20bd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1424,7 +1424,7 @@ static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC - return default_cpu_present_to_apicid(mps_cpu); + return __default_cpu_present_to_apicid(mps_cpu); #else WARN_ON_ONCE(1); return BAD_APICID; diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0..8eb139ed1a03 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -138,254 +138,6 @@ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); DEFINE_IRQ_VECTOR_EVENT(thermal_apic); #endif -TRACE_EVENT(vector_config, - - TP_PROTO(unsigned int irq, unsigned int vector, - unsigned int cpu, unsigned int apicdest), - - TP_ARGS(irq, vector, cpu, apicdest), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( unsigned int, vector ) - __field( unsigned int, cpu ) - __field( unsigned int, apicdest ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->vector = vector; - __entry->cpu = cpu; - __entry->apicdest = apicdest; - ), - - TP_printk("irq=%u vector=%u cpu=%u apicdest=0x%08x", - __entry->irq, __entry->vector, __entry->cpu, - __entry->apicdest) -); - -DECLARE_EVENT_CLASS(vector_mod, - - TP_PROTO(unsigned int irq, unsigned int vector, - unsigned int cpu, unsigned int prev_vector, - unsigned int prev_cpu), - - TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( unsigned int, vector ) - __field( unsigned int, cpu ) - __field( unsigned int, prev_vector ) - __field( unsigned int, prev_cpu ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->vector = vector; - __entry->cpu = cpu; - __entry->prev_vector = prev_vector; - __entry->prev_cpu = prev_cpu; - - ), - - TP_printk("irq=%u vector=%u cpu=%u prev_vector=%u prev_cpu=%u", - __entry->irq, __entry->vector, __entry->cpu, - __entry->prev_vector, __entry->prev_cpu) -); - -#define DEFINE_IRQ_VECTOR_MOD_EVENT(name) \ -DEFINE_EVENT_FN(vector_mod, name, \ - TP_PROTO(unsigned int irq, unsigned int vector, \ - unsigned int cpu, unsigned int prev_vector, \ - unsigned int prev_cpu), \ - TP_ARGS(irq, vector, cpu, prev_vector, prev_cpu), NULL, NULL); \ - -DEFINE_IRQ_VECTOR_MOD_EVENT(vector_update); -DEFINE_IRQ_VECTOR_MOD_EVENT(vector_clear); - -DECLARE_EVENT_CLASS(vector_reserve, - - TP_PROTO(unsigned int irq, int ret), - - TP_ARGS(irq, ret), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%u ret=%d", __entry->irq, __entry->ret) -); - -#define DEFINE_IRQ_VECTOR_RESERVE_EVENT(name) \ -DEFINE_EVENT_FN(vector_reserve, name, \ - TP_PROTO(unsigned int irq, int ret), \ - TP_ARGS(irq, ret), NULL, NULL); \ - -DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve_managed); -DEFINE_IRQ_VECTOR_RESERVE_EVENT(vector_reserve); - -TRACE_EVENT(vector_alloc, - - TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, - int ret), - - TP_ARGS(irq, vector, ret, reserved), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( unsigned int, vector ) - __field( bool, reserved ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->vector = ret < 0 ? 0 : vector; - __entry->reserved = reserved; - __entry->ret = ret > 0 ? 0 : ret; - ), - - TP_printk("irq=%u vector=%u reserved=%d ret=%d", - __entry->irq, __entry->vector, - __entry->reserved, __entry->ret) -); - -TRACE_EVENT(vector_alloc_managed, - - TP_PROTO(unsigned int irq, unsigned int vector, - int ret), - - TP_ARGS(irq, vector, ret), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( unsigned int, vector ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->vector = ret < 0 ? 0 : vector; - __entry->ret = ret > 0 ? 0 : ret; - ), - - TP_printk("irq=%u vector=%u ret=%d", - __entry->irq, __entry->vector, __entry->ret) -); - -DECLARE_EVENT_CLASS(vector_activate, - - TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), - - TP_ARGS(irq, is_managed, can_reserve, early), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( bool, is_managed ) - __field( bool, can_reserve ) - __field( bool, early ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->is_managed = is_managed; - __entry->can_reserve = can_reserve; - __entry->early = early; - ), - - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", - __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) -); - -#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ -DEFINE_EVENT_FN(vector_activate, name, \ - TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ - -DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); -DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); - -TRACE_EVENT(vector_teardown, - - TP_PROTO(unsigned int irq, bool is_managed, bool has_reserved), - - TP_ARGS(irq, is_managed, has_reserved), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( bool, is_managed ) - __field( bool, has_reserved ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->is_managed = is_managed; - __entry->has_reserved = has_reserved; - ), - - TP_printk("irq=%u is_managed=%d has_reserved=%d", - __entry->irq, __entry->is_managed, __entry->has_reserved) -); - -TRACE_EVENT(vector_setup, - - TP_PROTO(unsigned int irq, bool is_legacy, int ret), - - TP_ARGS(irq, is_legacy, ret), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( bool, is_legacy ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->is_legacy = is_legacy; - __entry->ret = ret; - ), - - TP_printk("irq=%u is_legacy=%d ret=%d", - __entry->irq, __entry->is_legacy, __entry->ret) -); - -TRACE_EVENT(vector_free_moved, - - TP_PROTO(unsigned int irq, unsigned int cpu, unsigned int vector, - bool is_managed), - - TP_ARGS(irq, cpu, vector, is_managed), - - TP_STRUCT__entry( - __field( unsigned int, irq ) - __field( unsigned int, cpu ) - __field( unsigned int, vector ) - __field( bool, is_managed ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->cpu = cpu; - __entry->vector = vector; - __entry->is_managed = is_managed; - ), - - TP_printk("irq=%u cpu=%u vector=%u is_managed=%d", - __entry->irq, __entry->cpu, __entry->vector, - __entry->is_managed) -); - - #endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h new file mode 100644 index 000000000000..78ccf28d17db --- /dev/null +++ b/arch/x86/include/asm/x2apic.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common bits for X2APIC cluster/physical modes. + */ + +#ifndef _ASM_X86_X2APIC_H +#define _ASM_X86_X2APIC_H + +#include <asm/apic.h> +#include <asm/ipi.h> +#include <linux/cpumask.h> + +static int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + +static int x2apic_apic_id_registered(void) +{ + return 1; +} + +static void +__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +{ + unsigned long cfg = __prepare_ICR(0, vector, dest); + native_x2apic_icr_write(cfg, apicid); +} + +static unsigned int x2apic_get_apic_id(unsigned long id) +{ + return id; +} + +static unsigned long x2apic_set_apic_id(unsigned int id) +{ + return id; +} + +static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) +{ + return initial_apicid >> index_msb; +} + +static void x2apic_send_IPI_self(int vector) +{ + apic_write(APIC_SELF_IPI, vector); +} + +#endif /* _ASM_X86_X2APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index aa4747569e23..5dd011a8b560 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,13 +51,11 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup - * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); - void (*intr_mode_init)(void); }; /** diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index a6fcaf16cdbf..e59e341f9921 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -7,7 +7,7 @@ # In particualr, smp_apic_timer_interrupt() is called in random places. KCOV_INSTRUMENT := n -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_common.o apic_noop.o ipi.o vector.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6e272f3ea984..f72ecd5c39a7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -211,7 +211,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* @@ -294,11 +298,14 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { + unsigned int v; + + v = apic_read(APIC_LVR); /* * - we always have APIC integrated on 64bit mode * - 82489DXs do not report # of LVT entries */ - return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2; + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* @@ -1222,100 +1229,53 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } -enum apic_intr_mode_id apic_intr_mode; - -static int __init apic_intr_mode_select(void) +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) { - /* Check kernel option */ - if (disable_apic) { - pr_info("APIC disabled via kernel command line\n"); - return APIC_PIC; - } - - /* Check BIOS */ -#ifdef CONFIG_X86_64 - /* On 64-bit, the APIC must be integrated, Check local APIC only */ - if (!boot_cpu_has(X86_FEATURE_APIC)) { - disable_apic = 1; - pr_info("APIC disabled by BIOS\n"); - return APIC_PIC; - } -#else - /* On 32-bit, the APIC may be integrated APIC or 82489DX */ - - /* Neither 82489DX nor integrated APIC ? */ - if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { - disable_apic = 1; - return APIC_PIC; - } + unsigned int value; - /* If the BIOS pretends there is an integrated APIC ? */ - if (!boot_cpu_has(X86_FEATURE_APIC) && - APIC_INTEGRATED(boot_cpu_apic_version)) { - disable_apic = 1; - pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", - boot_cpu_physical_apicid); - return APIC_PIC; - } -#endif + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) + return; - /* Check MP table or ACPI MADT configuration */ - if (!smp_found_config) { - disable_ioapic_support(); - if (!acpi_lapic) { - pr_info("APIC: ACPI MADT or MP tables are not detected\n"); - return APIC_VIRTUAL_WIRE_NO_CONFIG; - } - return APIC_VIRTUAL_WIRE; - } + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); -#ifdef CONFIG_SMP - /* If SMP should be disabled, then really disable it! */ - if (!setup_max_cpus) { - pr_info("APIC: SMP mode deactivated\n"); - return APIC_SYMMETRIC_IO_NO_ROUTING; - } + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else #endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); - return APIC_SYMMETRIC_IO; -} - -/* Init the interrupt delivery mode for the BSP */ -void __init apic_intr_mode_init(void) -{ - bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - - apic_intr_mode = apic_intr_mode_select(); - - switch (apic_intr_mode) { - case APIC_PIC: - pr_info("APIC: Keep in PIC mode(8259)\n"); - return; - case APIC_VIRTUAL_WIRE: - pr_info("APIC: Switch to virtual wire mode setup\n"); - default_setup_apic_routing(); - break; - case APIC_VIRTUAL_WIRE_NO_CONFIG: - pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); - upmode = true; - default_setup_apic_routing(); - break; - case APIC_SYMMETRIC_IO: - pr_info("APIC: Switch to symmetric I/O mode setup\n"); - default_setup_apic_routing(); - break; - case APIC_SYMMETRIC_IO_NO_ROUTING: - pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n"); - break; - } - - apic_bsp_setup(upmode); + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); } static void lapic_setup_esr(void) @@ -1539,9 +1499,7 @@ void setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - - /* Is 82489DX ? */ - if (!lapic_is_integrated()) + if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); @@ -1927,8 +1885,8 @@ void __init init_apic_mappings(void) * yeah -- we lie about apic_version * in case if apic was disabled via boot option * but it's not a problem for SMP compiled kernel - * since apic_intr_mode_select is prepared for such - * a case and disable smp mode + * since smp_sanity_check is prepared for such a case + * and disable smp mode */ boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } @@ -2284,6 +2242,44 @@ int hard_smp_processor_id(void) return read_apic_id(); } +void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +int default_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) +{ + unsigned int cpu = cpumask_first(mask); + + if (cpu >= nr_cpu_ids) + return -EINVAL; + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + irq_data_update_effective_affinity(irqdata, cpumask_of(cpu)); + return 0; +} + +int flat_cpu_mask_to_apicid(const struct cpumask *mask, + struct irq_data *irqdata, + unsigned int *apicid) + +{ + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); + unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS; + + if (!cpu_mask) + return -EINVAL; + *apicid = (unsigned int)cpu_mask; + cpumask_bits(effmsk)[0] = cpu_mask; + return 0; +} + /* * Override the generic EOI implementation with an optimized version. * Only called during early boot when only one CPU is active and with @@ -2326,27 +2322,72 @@ static void __init apic_bsp_up_setup(void) * Returns: * apic_id of BSP APIC */ -void __init apic_bsp_setup(bool upmode) +int __init apic_bsp_setup(bool upmode) { + int id; + connect_bsp_APIC(); if (upmode) apic_bsp_up_setup(); setup_local_APIC(); + if (x2apic_mode) + id = apic_read(APIC_LDR); + else + id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + enable_IO_APIC(); end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); + return id; +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor(void) +{ + if (disable_apic) { + pr_info("Apic disabled\n"); + return -1; + } +#ifdef CONFIG_X86_64 + if (!boot_cpu_has(X86_FEATURE_APIC)) { + disable_apic = 1; + pr_info("Apic disabled by BIOS\n"); + return -1; + } +#else + if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC)) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!boot_cpu_has(X86_FEATURE_APIC) && + APIC_INTEGRATED(boot_cpu_apic_version)) { + pr_err("BIOS bug, local APIC 0x%x not detected!...\n", + boot_cpu_physical_apicid); + return -1; + } +#endif + + if (!smp_found_config) + disable_ioapic_support(); + + default_setup_apic_routing(); + apic_bsp_setup(true); + return 0; } #ifdef CONFIG_UP_LATE_INIT void __init up_late_init(void) { - if (apic_intr_mode == APIC_PIC) - return; - - /* Setup local timer */ - x86_init.timers.setup_percpu_clockev(); + APIC_init_uniprocessor(); } #endif diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c deleted file mode 100644 index a360801779ae..000000000000 --- a/arch/x86/kernel/apic/apic_common.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Common functions shared between the various APIC flavours - * - * SPDX-License-Identifier: GPL-2.0 - */ -#include <linux/irq.h> -#include <asm/apic.h> - -u32 apic_default_calc_apicid(unsigned int cpu) -{ - return per_cpu(x86_cpu_to_apicid, cpu); -} - -u32 apic_flat_calc_apicid(unsigned int cpu) -{ - return 1U << cpu; -} - -bool default_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - -int default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} -EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); - -int default_check_phys_apicid_present(int phys_apicid) -{ - return physid_isset(phys_apicid, phys_cpu_present_map); -} - -int default_apic_id_valid(int apicid) -{ - return (apicid < 255); -} diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b64..dedd5a41ba48 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -static u32 set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { return (id & 0xFF) << 24; } @@ -154,10 +154,12 @@ static struct apic apic_flat __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -170,7 +172,7 @@ static struct apic apic_flat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .calc_dest_apicid = apic_flat_calc_apicid, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = flat_send_IPI_mask, @@ -247,10 +249,12 @@ static struct apic apic_physflat __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, + .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, @@ -264,7 +268,7 @@ static struct apic apic_physflat __ro_after_init = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .calc_dest_apicid = apic_default_calc_apicid, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7b659c4480c9..c8d211277315 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -84,6 +84,20 @@ static int noop_apic_id_registered(void) return physid_isset(0, phys_cpu_present_map); } +static const struct cpumask *noop_target_cpus(void) +{ + /* only BSP here */ + return cpumask_of(0); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_copy(retmask, cpumask_of(cpu)); +} + static u32 noop_apic_read(u32 reg) { WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); @@ -95,13 +109,6 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic); } -#ifdef CONFIG_X86_32 -static int noop_x86_32_early_logical_apicid(int cpu) -{ - return BAD_APICID; -} -#endif - struct apic apic_noop __ro_after_init = { .name = "noop", .probe = noop_probe, @@ -114,10 +121,12 @@ struct apic apic_noop __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, + .target_cpus = noop_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, + .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -133,7 +142,7 @@ struct apic apic_noop __ro_after_init = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .calc_dest_apicid = apic_flat_calc_apicid, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = noop_send_IPI, .send_IPI_mask = noop_send_IPI_mask, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 134e04506ab4..2fda912219a6 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x) return id; } -static u32 numachip1_set_apic_id(unsigned int id) +static unsigned long numachip1_set_apic_id(unsigned int id) { return (id & 0xff) << 24; } @@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static u32 numachip2_set_apic_id(unsigned int id) +static unsigned long numachip2_set_apic_id(unsigned int id) { return id << 24; } @@ -249,10 +249,12 @@ static const struct apic apic_numachip1 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -265,7 +267,7 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .calc_dest_apicid = apic_default_calc_apicid, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, @@ -298,10 +300,12 @@ static const struct apic apic_numachip2 __refconst = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -314,7 +318,7 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .calc_dest_apicid = apic_default_calc_apicid, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = numachip_send_IPI_one, .send_IPI_mask = numachip_send_IPI_mask, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index afee386ff711..e12fbcfc9571 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -27,9 +27,9 @@ static int bigsmp_apic_id_registered(void) return 1; } -static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { - return false; + return 0; } static int bigsmp_early_logical_apicid(int cpu) @@ -155,10 +155,12 @@ static struct apic apic_bigsmp __ro_after_init = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, + .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, @@ -171,7 +173,7 @@ static struct apic apic_bigsmp __ro_after_init = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .calc_dest_apicid = apic_default_calc_apicid, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single_phys, .send_IPI_mask = default_send_IPI_mask_sequence_phys, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc5242..18c8aca5bae7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1014,7 +1014,6 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { - info->flags |= X86_IRQ_ALLOC_LEGACY; irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, NULL); if (irq >= 0) { @@ -1587,43 +1586,6 @@ static int __init notimercheck(char *s) } __setup("no_timer_check", notimercheck); -static void __init delay_with_tsc(void) -{ - unsigned long long start, now; - unsigned long end = jiffies + 4; - - start = rdtsc(); - - /* - * We don't know the TSC frequency yet, but waiting for - * 40000000000/HZ TSC cycles is safe: - * 4 GHz == 10 jiffies - * 1 GHz == 40 jiffies - */ - do { - rep_nop(); - now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && - time_before_eq(jiffies, end)); -} - -static void __init delay_without_tsc(void) -{ - unsigned long end = jiffies + 4; - int band = 1; - - /* - * We don't know any frequency yet, but waiting for - * 40940000000/HZ cycles is safe: - * 4 GHz == 10 jiffies - * 1 GHz == 40 jiffies - * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094 - */ - do { - __delay(((1U << band++) * 10000000UL) / HZ); - } while (band < 12 && time_before_eq(jiffies, end)); -} - /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1642,12 +1604,8 @@ static int __init timer_irq_works(void) local_save_flags(flags); local_irq_enable(); - - if (boot_cpu_has(X86_FEATURE_TSC)) - delay_with_tsc(); - else - delay_without_tsc(); - + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); local_irq_restore(flags); /* @@ -1863,36 +1821,26 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(data->entry.vector, data); } -static void ioapic_configure_entry(struct irq_data *irqd) -{ - struct mp_chip_data *mpd = irqd->chip_data; - struct irq_cfg *cfg = irqd_cfg(irqd); - struct irq_pin_list *entry; - - /* - * Only update when the parent is the vector domain, don't touch it - * if the parent is the remapping domain. Check the installed - * ioapic chip to verify that. - */ - if (irqd->chip == &ioapic_chip) { - mpd->entry.dest = cfg->dest_apicid; - mpd->entry.vector = cfg->vector; - } - for_each_irq_pin(entry, mpd->irq_2_pin) - __ioapic_write_entry(entry->apic, entry->pin, mpd->entry); -} - static int ioapic_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) { struct irq_data *parent = irq_data->parent_data; + struct mp_chip_data *data = irq_data->chip_data; + struct irq_pin_list *entry; + struct irq_cfg *cfg; unsigned long flags; int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); raw_spin_lock_irqsave(&ioapic_lock, flags); - if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) - ioapic_configure_entry(irq_data); + if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { + cfg = irqd_cfg(irq_data); + data->entry.dest = cfg->dest_apicid; + data->entry.vector = cfg->vector; + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, + data->entry); + } raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; @@ -2565,9 +2513,52 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) } /* - * This function updates target affinity of IOAPIC interrupts to include - * the CPUs which came online during SMP bringup. + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be apic->target_cpus() */ +#ifdef CONFIG_SMP +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + const struct cpumask *mask; + struct irq_desc *desc; + struct irq_data *idata; + struct irq_chip *chip; + + if (skip_ioapic_setup == 1) + return; + + for_each_ioapic_pin(ioapic, pin) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + + irq = pin_2_irq(irq_entry, ioapic, pin, 0); + if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) + continue; + + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + idata = irq_desc_get_irq_data(desc); + + /* + * Honour affinities which have been set in early boot + */ + if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) + mask = irq_data_get_affinity_mask(idata); + else + mask = apic->target_cpus(); + + chip = irq_data_get_irq_chip(idata); + /* Might be lapic_chip for irq 0 */ + if (chip->irq_set_affinity) + chip->irq_set_affinity(idata, mask, false); + raw_spin_unlock_irq(&desc->lock); + } +} +#endif + #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -2991,9 +2982,12 @@ int mp_irqdomain_activate(struct irq_domain *domain, struct irq_data *irq_data, bool early) { unsigned long flags; + struct irq_pin_list *entry; + struct mp_chip_data *data = irq_data->chip_data; raw_spin_lock_irqsave(&ioapic_lock, flags); - ioapic_configure_entry(irq_data); + for_each_irq_pin(entry, data->irq_2_pin) + __ioapic_write_entry(entry->apic, entry->pin, data->entry); raw_spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fa22017de806..63287659adb6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,31 +66,6 @@ static void setup_apic_flat_routing(void) #endif } -static int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -/* - * Set up the logical destination ID. Intel recommends to set DFR, LDR and - * TPR before enabling an APIC. See e.g. "AP-388 82489DX User's Manual" - * (Intel document number 292116). - */ -static void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -static int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - /* should be called last. */ static int probe_default(void) { @@ -109,10 +84,12 @@ static struct apic apic_default __ro_after_init = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, + .target_cpus = default_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = default_check_apicid_used, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, @@ -125,7 +102,7 @@ static struct apic apic_default __ro_after_init = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .calc_dest_apicid = apic_flat_calc_apicid, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .send_IPI = default_send_IPI_single, .send_IPI_mask = default_send_IPI_mask_logical, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6a823a25eaff..e42fdd44972f 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -11,7 +11,6 @@ * published by the Free Software Foundation. */ #include <linux/interrupt.h> -#include <linux/seq_file.h> #include <linux/init.h> #include <linux/compiler.h> #include <linux/slab.h> @@ -22,30 +21,20 @@ #include <asm/desc.h> #include <asm/irq_remapping.h> -#include <asm/trace/irq_vectors.h> - struct apic_chip_data { - struct irq_cfg hw_irq_cfg; - unsigned int vector; - unsigned int prev_vector; - unsigned int cpu; - unsigned int prev_cpu; - unsigned int irq; - struct hlist_node clist; - unsigned int move_in_progress : 1, - is_managed : 1, - can_reserve : 1, - has_reserved : 1; + struct irq_cfg cfg; + cpumask_var_t domain; + cpumask_var_t old_domain; + u8 move_in_progress : 1; }; struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_searchmask; +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; static struct irq_chip lapic_controller; -static struct irq_matrix *vector_matrix; -#ifdef CONFIG_SMP -static DEFINE_PER_CPU(struct hlist_head, cleanup_list); +#ifdef CONFIG_X86_IO_APIC +static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; #endif void lock_vector_lock(void) @@ -61,37 +50,22 @@ void unlock_vector_lock(void) raw_spin_unlock(&vector_lock); } -void init_irq_alloc_info(struct irq_alloc_info *info, - const struct cpumask *mask) -{ - memset(info, 0, sizeof(*info)); - info->mask = mask; -} - -void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) +static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data) { - if (src) - *dst = *src; - else - memset(dst, 0, sizeof(*dst)); -} - -static struct apic_chip_data *apic_chip_data(struct irq_data *irqd) -{ - if (!irqd) + if (!irq_data) return NULL; - while (irqd->parent_data) - irqd = irqd->parent_data; + while (irq_data->parent_data) + irq_data = irq_data->parent_data; - return irqd->chip_data; + return irq_data->chip_data; } -struct irq_cfg *irqd_cfg(struct irq_data *irqd) +struct irq_cfg *irqd_cfg(struct irq_data *irq_data) { - struct apic_chip_data *apicd = apic_chip_data(irqd); + struct apic_chip_data *data = apic_chip_data(irq_data); - return apicd ? &apicd->hw_irq_cfg : NULL; + return data ? &data->cfg : NULL; } EXPORT_SYMBOL_GPL(irqd_cfg); @@ -102,395 +76,270 @@ struct irq_cfg *irq_cfg(unsigned int irq) static struct apic_chip_data *alloc_apic_chip_data(int node) { - struct apic_chip_data *apicd; - - apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node); - if (apicd) - INIT_HLIST_NODE(&apicd->clist); - return apicd; -} - -static void free_apic_chip_data(struct apic_chip_data *apicd) -{ - kfree(apicd); -} - -static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector, - unsigned int cpu) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); + struct apic_chip_data *data; - lockdep_assert_held(&vector_lock); - - apicd->hw_irq_cfg.vector = vector; - apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); - irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); - trace_vector_config(irqd->irq, vector, cpu, - apicd->hw_irq_cfg.dest_apicid); -} - -static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, - unsigned int newcpu) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - struct irq_desc *desc = irq_data_to_desc(irqd); - - lockdep_assert_held(&vector_lock); - - trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, - apicd->cpu); - - /* Setup the vector move, if required */ - if (apicd->vector && cpu_online(apicd->cpu)) { - apicd->move_in_progress = true; - apicd->prev_vector = apicd->vector; - apicd->prev_cpu = apicd->cpu; - } else { - apicd->prev_vector = 0; + data = kzalloc_node(sizeof(*data), GFP_KERNEL, node); + if (!data) + return NULL; + if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node)) + goto out_data; + if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node)) + goto out_domain; + return data; +out_domain: + free_cpumask_var(data->domain); +out_data: + kfree(data); + return NULL; +} + +static void free_apic_chip_data(struct apic_chip_data *data) +{ + if (data) { + free_cpumask_var(data->domain); + free_cpumask_var(data->old_domain); + kfree(data); } - - apicd->vector = newvec; - apicd->cpu = newcpu; - BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); - per_cpu(vector_irq, newcpu)[newvec] = desc; } -static void vector_assign_managed_shutdown(struct irq_data *irqd) +static int __assign_irq_vector(int irq, struct apic_chip_data *d, + const struct cpumask *mask, + struct irq_data *irqdata) { - unsigned int cpu = cpumask_first(cpu_online_mask); - - apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu); -} + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 16; + int cpu, vector; -static int reserve_managed_vector(struct irq_data *irqd) -{ - const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); - struct apic_chip_data *apicd = apic_chip_data(irqd); - unsigned long flags; - int ret; + /* + * If there is still a move in progress or the previous move has not + * been cleaned up completely, tell the caller to come back later. + */ + if (d->move_in_progress || + cpumask_intersects(d->old_domain, cpu_online_mask)) + return -EBUSY; - raw_spin_lock_irqsave(&vector_lock, flags); - apicd->is_managed = true; - ret = irq_matrix_reserve_managed(vector_matrix, affmsk); - raw_spin_unlock_irqrestore(&vector_lock, flags); - trace_vector_reserve_managed(irqd->irq, ret); - return ret; -} + /* Only try and allocate irqs on cpus that are present */ + cpumask_clear(d->old_domain); + cpumask_clear(searched_cpumask); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { + int new_cpu, offset; -static void reserve_irq_vector_locked(struct irq_data *irqd) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); + /* Get the possible target cpus for @mask/@cpu from the apic */ + apic->vector_allocation_domain(cpu, vector_cpumask, mask); - irq_matrix_reserve(vector_matrix); - apicd->can_reserve = true; - apicd->has_reserved = true; - trace_vector_reserve(irqd->irq, 0); - vector_assign_managed_shutdown(irqd); -} + /* + * Clear the offline cpus from @vector_cpumask for searching + * and verify whether the result overlaps with @mask. If true, + * then the call to apic->cpu_mask_to_apicid() will + * succeed as well. If not, no point in trying to find a + * vector in this mask. + */ + cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); + if (!cpumask_intersects(vector_searchmask, mask)) + goto next_cpu; + + if (cpumask_subset(vector_cpumask, d->domain)) { + if (cpumask_equal(vector_cpumask, d->domain)) + goto success; + /* + * Mark the cpus which are not longer in the mask for + * cleanup. + */ + cpumask_andnot(d->old_domain, d->domain, vector_cpumask); + vector = d->cfg.vector; + goto update; + } -static int reserve_irq_vector(struct irq_data *irqd) -{ - unsigned long flags; + vector = current_vector; + offset = current_offset; +next: + vector += 16; + if (vector >= FIRST_SYSTEM_VECTOR) { + offset = (offset + 1) % 16; + vector = FIRST_EXTERNAL_VECTOR + offset; + } - raw_spin_lock_irqsave(&vector_lock, flags); - reserve_irq_vector_locked(irqd); - raw_spin_unlock_irqrestore(&vector_lock, flags); - return 0; -} + /* If the search wrapped around, try the next cpu */ + if (unlikely(current_vector == vector)) + goto next_cpu; -static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - bool resvd = apicd->has_reserved; - unsigned int cpu = apicd->cpu; - int vector = apicd->vector; + if (test_bit(vector, used_vectors)) + goto next; - lockdep_assert_held(&vector_lock); + for_each_cpu(new_cpu, vector_searchmask) { + if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) + goto next; + } + /* Found one! */ + current_vector = vector; + current_offset = offset; + /* Schedule the old vector for cleanup on all cpus */ + if (d->cfg.vector) + cpumask_copy(d->old_domain, d->domain); + for_each_cpu(new_cpu, vector_searchmask) + per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); + goto update; + +next_cpu: + /* + * We exclude the current @vector_cpumask from the requested + * @mask and try again with the next online cpu in the + * result. We cannot modify @mask, so we use @vector_cpumask + * as a temporary buffer here as it will be reassigned when + * calling apic->vector_allocation_domain() above. + */ + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); + cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); + continue; + } + return -ENOSPC; +update: /* - * If the current target CPU is online and in the new requested - * affinity mask, there is no point in moving the interrupt from - * one CPU to another. + * Exclude offline cpus from the cleanup mask and set the + * move_in_progress flag when the result is not empty. */ - if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) - return 0; - - vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); - if (vector > 0) - apic_update_vector(irqd, vector, cpu); - trace_vector_alloc(irqd->irq, vector, resvd, vector); - return vector; -} - -static int assign_vector_locked(struct irq_data *irqd, - const struct cpumask *dest) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - int vector = allocate_vector(irqd, dest); - - if (vector < 0) - return vector; - - apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); + cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); + d->move_in_progress = !cpumask_empty(d->old_domain); + d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0; + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); +success: + /* + * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail + * as we already established, that mask & d->domain & cpu_online_mask + * is not empty. + * + * vector_searchmask is a subset of d->domain and has the offline + * cpus masked out. + */ + cpumask_and(vector_searchmask, vector_searchmask, mask); + BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata, + &d->cfg.dest_apicid)); return 0; } -static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest) +static int assign_irq_vector(int irq, struct apic_chip_data *data, + const struct cpumask *mask, + struct irq_data *irqdata) { + int err; unsigned long flags; - int ret; raw_spin_lock_irqsave(&vector_lock, flags); - cpumask_and(vector_searchmask, dest, cpu_online_mask); - ret = assign_vector_locked(irqd, vector_searchmask); + err = __assign_irq_vector(irq, data, mask, irqdata); raw_spin_unlock_irqrestore(&vector_lock, flags); - return ret; -} - -static int assign_irq_vector_any_locked(struct irq_data *irqd) -{ - /* Get the affinity mask - either irq_default_affinity or (user) set */ - const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); - int node = irq_data_get_node(irqd); - - if (node == NUMA_NO_NODE) - goto all; - /* Try the intersection of @affmsk and node mask */ - cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); - if (!assign_vector_locked(irqd, vector_searchmask)) - return 0; - /* Try the node mask */ - if (!assign_vector_locked(irqd, cpumask_of_node(node))) - return 0; -all: - /* Try the full affinity mask */ - cpumask_and(vector_searchmask, affmsk, cpu_online_mask); - if (!assign_vector_locked(irqd, vector_searchmask)) - return 0; - /* Try the full online mask */ - return assign_vector_locked(irqd, cpu_online_mask); -} - -static int -assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info) -{ - if (irqd_affinity_is_managed(irqd)) - return reserve_managed_vector(irqd); - if (info->mask) - return assign_irq_vector(irqd, info->mask); - /* - * Make only a global reservation with no guarantee. A real vector - * is associated at activation time. - */ - return reserve_irq_vector(irqd); + return err; } -static int -assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest) +static int assign_irq_vector_policy(int irq, int node, + struct apic_chip_data *data, + struct irq_alloc_info *info, + struct irq_data *irqdata) { - const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); - struct apic_chip_data *apicd = apic_chip_data(irqd); - int vector, cpu; - - cpumask_and(vector_searchmask, vector_searchmask, affmsk); - cpu = cpumask_first(vector_searchmask); - if (cpu >= nr_cpu_ids) - return -EINVAL; - /* set_affinity might call here for nothing */ - if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask)) + if (info && info->mask) + return assign_irq_vector(irq, data, info->mask, irqdata); + if (node != NUMA_NO_NODE && + assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0) return 0; - vector = irq_matrix_alloc_managed(vector_matrix, cpu); - trace_vector_alloc_managed(irqd->irq, vector, vector); - if (vector < 0) - return vector; - apic_update_vector(irqd, vector, cpu); - apic_update_irq_cfg(irqd, vector, cpu); - return 0; -} - -static void clear_irq_vector(struct irq_data *irqd) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - bool managed = irqd_affinity_is_managed(irqd); - unsigned int vector = apicd->vector; - - lockdep_assert_held(&vector_lock); - - if (!vector) - return; - - trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, - apicd->prev_cpu); - - per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; - irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); - apicd->vector = 0; - - /* Clean up move in progress */ - vector = apicd->prev_vector; - if (!vector) - return; - - per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; - irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); - apicd->prev_vector = 0; - apicd->move_in_progress = 0; - hlist_del_init(&apicd->clist); + return assign_irq_vector(irq, data, apic->target_cpus(), irqdata); } -static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd) +static void clear_irq_vector(int irq, struct apic_chip_data *data) { - struct apic_chip_data *apicd = apic_chip_data(irqd); - unsigned long flags; + struct irq_desc *desc; + int cpu, vector; - trace_vector_deactivate(irqd->irq, apicd->is_managed, - apicd->can_reserve, false); - - /* Regular fixed assigned interrupt */ - if (!apicd->is_managed && !apicd->can_reserve) - return; - /* If the interrupt has a global reservation, nothing to do */ - if (apicd->has_reserved) + if (!data->cfg.vector) return; - raw_spin_lock_irqsave(&vector_lock, flags); - clear_irq_vector(irqd); - if (apicd->can_reserve) - reserve_irq_vector_locked(irqd); - else - vector_assign_managed_shutdown(irqd); - raw_spin_unlock_irqrestore(&vector_lock, flags); -} - -static int activate_reserved(struct irq_data *irqd) -{ - struct apic_chip_data *apicd = apic_chip_data(irqd); - int ret; - - ret = assign_irq_vector_any_locked(irqd); - if (!ret) - apicd->has_reserved = false; - return ret; -} + vector = data->cfg.vector; + for_each_cpu_and(cpu, data->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; -static int activate_managed(struct irq_data *irqd) -{ - const struct cpumask *dest = irq_data_get_affinity_mask(irqd); - int ret; - - cpumask_and(vector_searchmask, dest, cpu_online_mask); - if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { - /* Something in the core code broke! Survive gracefully */ - pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); - return EINVAL; - } + data->cfg.vector = 0; + cpumask_clear(data->domain); - ret = assign_managed_vector(irqd, vector_searchmask); /* - * This should not happen. The vector reservation got buggered. Handle - * it gracefully. + * If move is in progress or the old_domain mask is not empty, + * i.e. the cleanup IPI has not been processed yet, we need to remove + * the old references to desc from all cpus vector tables. */ - if (WARN_ON_ONCE(ret < 0)) { - pr_err("Managed startup irq %u, no vector available\n", - irqd->irq); + if (!data->move_in_progress && cpumask_empty(data->old_domain)) + return; + + desc = irq_to_desc(irq); + for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != desc) + continue; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + break; + } } - return ret; + data->move_in_progress = 0; } -static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) +void init_irq_alloc_info(struct irq_alloc_info *info, + const struct cpumask *mask) { - struct apic_chip_data *apicd = apic_chip_data(irqd); - unsigned long flags; - int ret = 0; - - trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); - - /* Nothing to do for fixed assigned vectors */ - if (!apicd->can_reserve && !apicd->is_managed) - return 0; - - raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) - vector_assign_managed_shutdown(irqd); - else if (apicd->is_managed) - ret = activate_managed(irqd); - else if (apicd->has_reserved) - ret = activate_reserved(irqd); - raw_spin_unlock_irqrestore(&vector_lock, flags); - return ret; + memset(info, 0, sizeof(*info)); + info->mask = mask; } -static void vector_free_reserved_and_managed(struct irq_data *irqd) +void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) { - const struct cpumask *dest = irq_data_get_affinity_mask(irqd); - struct apic_chip_data *apicd = apic_chip_data(irqd); - - trace_vector_teardown(irqd->irq, apicd->is_managed, - apicd->has_reserved); - - if (apicd->has_reserved) - irq_matrix_remove_reserved(vector_matrix); - if (apicd->is_managed) - irq_matrix_remove_managed(vector_matrix, dest); + if (src) + *dst = *src; + else + memset(dst, 0, sizeof(*dst)); } static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { - struct apic_chip_data *apicd; - struct irq_data *irqd; + struct apic_chip_data *apic_data; + struct irq_data *irq_data; unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { - irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i); - if (irqd && irqd->chip_data) { + irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); + if (irq_data && irq_data->chip_data) { raw_spin_lock_irqsave(&vector_lock, flags); - clear_irq_vector(irqd); - vector_free_reserved_and_managed(irqd); - apicd = irqd->chip_data; - irq_domain_reset_irq_data(irqd); + clear_irq_vector(virq + i, irq_data->chip_data); + apic_data = irq_data->chip_data; + irq_domain_reset_irq_data(irq_data); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_apic_chip_data(apicd); + free_apic_chip_data(apic_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs()) + legacy_irq_data[virq + i] = NULL; +#endif } } } -static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, - struct apic_chip_data *apicd) -{ - unsigned long flags; - bool realloc = false; - - apicd->vector = ISA_IRQ_VECTOR(virq); - apicd->cpu = 0; - - raw_spin_lock_irqsave(&vector_lock, flags); - /* - * If the interrupt is activated, then it must stay at this vector - * position. That's usually the timer interrupt (0). - */ - if (irqd_is_activated(irqd)) { - trace_vector_setup(virq, true, 0); - apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu); - } else { - /* Release the vector */ - apicd->can_reserve = true; - clear_irq_vector(irqd); - realloc = true; - } - raw_spin_unlock_irqrestore(&vector_lock, flags); - return realloc; -} - static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { struct irq_alloc_info *info = arg; - struct apic_chip_data *apicd; - struct irq_data *irqd; + struct apic_chip_data *data; + struct irq_data *irq_data; int i, err, node; if (disable_apic) @@ -501,37 +350,34 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, return -ENOSYS; for (i = 0; i < nr_irqs; i++) { - irqd = irq_domain_get_irq_data(domain, virq + i); - BUG_ON(!irqd); - node = irq_data_get_node(irqd); - WARN_ON_ONCE(irqd->chip_data); - apicd = alloc_apic_chip_data(node); - if (!apicd) { + irq_data = irq_domain_get_irq_data(domain, virq + i); + BUG_ON(!irq_data); + node = irq_data_get_node(irq_data); +#ifdef CONFIG_X86_IO_APIC + if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i]) + data = legacy_irq_data[virq + i]; + else +#endif + data = alloc_apic_chip_data(node); + if (!data) { err = -ENOMEM; goto error; } - apicd->irq = virq + i; - irqd->chip = &lapic_controller; - irqd->chip_data = apicd; - irqd->hwirq = virq + i; - irqd_set_single_target(irqd); - /* - * Legacy vectors are already assigned when the IOAPIC - * takes them over. They stay on the same vector. This is - * required for check_timer() to work correctly as it might - * switch back to legacy mode. Only update the hardware - * config. - */ - if (info->flags & X86_IRQ_ALLOC_LEGACY) { - if (!vector_configure_legacy(virq + i, irqd, apicd)) - continue; - } - - err = assign_irq_vector_policy(irqd, info); - trace_vector_setup(virq + i, false, err); + irq_data->chip = &lapic_controller; + irq_data->chip_data = data; + irq_data->hwirq = virq + i; + err = assign_irq_vector_policy(virq + i, node, data, info, + irq_data); if (err) goto error; + /* + * If the apic destination mode is physical, then the + * effective affinity is restricted to a single target + * CPU. Mark the interrupt accordingly. + */ + if (!apic->irq_dest_mode) + irqd_set_single_target(irq_data); } return 0; @@ -541,56 +387,9 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, return err; } -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS -void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind) -{ - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; - unsigned long flags; - int irq; - - if (!irqd) { - irq_matrix_debug_show(m, vector_matrix, ind); - return; - } - - irq = irqd->irq; - if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) { - seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq)); - seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, ""); - return; - } - - apicd = irqd->chip_data; - if (!apicd) { - seq_printf(m, "%*sVector: Not assigned\n", ind, ""); - return; - } - - raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; - raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); - } -} -#endif - static const struct irq_domain_ops x86_vector_domain_ops = { - .alloc = x86_vector_alloc_irqs, - .free = x86_vector_free_irqs, - .activate = x86_vector_activate, - .deactivate = x86_vector_deactivate, -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS - .debug_show = x86_vector_debug_show, -#endif + .alloc = x86_vector_alloc_irqs, + .free = x86_vector_free_irqs, }; int __init arch_probe_nr_irqs(void) @@ -620,40 +419,35 @@ int __init arch_probe_nr_irqs(void) return legacy_pic->probe(); } -void lapic_assign_legacy_vector(unsigned int irq, bool replace) +#ifdef CONFIG_X86_IO_APIC +static void __init init_legacy_irqs(void) { + int i, node = cpu_to_node(0); + struct apic_chip_data *data; + /* - * Use assign system here so it wont get accounted as allocated - * and moveable in the cpu hotplug check and it prevents managed - * irq reservation from touching it. + * For legacy IRQ's, start with assigning irq0 to irq15 to + * ISA_IRQ_VECTOR(i) for all cpu's. */ - irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); -} - -void __init lapic_assign_system_vectors(void) -{ - unsigned int i, vector = 0; - - for_each_set_bit_from(vector, system_vectors, NR_VECTORS) - irq_matrix_assign_system(vector_matrix, vector, false); - - if (nr_legacy_irqs() > 1) - lapic_assign_legacy_vector(PIC_CASCADE_IR, false); - - /* System vectors are reserved, online it */ - irq_matrix_online(vector_matrix); - - /* Mark the preallocated legacy interrupts */ for (i = 0; i < nr_legacy_irqs(); i++) { - if (i != PIC_CASCADE_IR) - irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i)); + data = legacy_irq_data[i] = alloc_apic_chip_data(node); + BUG_ON(!data); + + data->cfg.vector = ISA_IRQ_VECTOR(i); + cpumask_setall(data->domain); + irq_set_chip_data(i, data); } } +#else +static inline void init_legacy_irqs(void) { } +#endif int __init arch_early_irq_init(void) { struct fwnode_handle *fn; + init_legacy_irqs(); + fn = irq_domain_alloc_named_fwnode("VECTOR"); BUG_ON(!fn); x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, @@ -664,115 +458,100 @@ int __init arch_early_irq_init(void) arch_init_msi_domain(x86_vector_domain); + BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); - - /* - * Allocate the vector matrix allocator data structure and limit the - * search area. - */ - vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR, - FIRST_SYSTEM_VECTOR); - BUG_ON(!vector_matrix); + BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); } -#ifdef CONFIG_SMP - -static struct irq_desc *__setup_vector_irq(int vector) +/* Initialize vector_irq on a new cpu */ +static void __setup_vector_irq(int cpu) { - int isairq = vector - ISA_IRQ_VECTOR(0); - - /* Check whether the irq is in the legacy space */ - if (isairq < 0 || isairq >= nr_legacy_irqs()) - return VECTOR_UNUSED; - /* Check whether the irq is handled by the IOAPIC */ - if (test_bit(isairq, &io_apic_irqs)) - return VECTOR_UNUSED; - return irq_to_desc(isairq); -} + struct apic_chip_data *data; + struct irq_desc *desc; + int irq, vector; -/* Online the local APIC infrastructure and initialize the vectors */ -void lapic_online(void) -{ - unsigned int vector; + /* Mark the inuse vectors */ + for_each_irq_desc(irq, desc) { + struct irq_data *idata = irq_desc_get_irq_data(desc); - lockdep_assert_held(&vector_lock); - - /* Online the vector matrix array for this CPU */ - irq_matrix_online(vector_matrix); - - /* - * The interrupt affinity logic never targets interrupts to offline - * CPUs. The exception are the legacy PIC interrupts. In general - * they are only targeted to CPU0, but depending on the platform - * they can be distributed to any online CPU in hardware. The - * kernel has no influence on that. So all active legacy vectors - * must be installed on all CPUs. All non legacy interrupts can be - * cleared. - */ - for (vector = 0; vector < NR_VECTORS; vector++) - this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); -} + data = apic_chip_data(idata); + if (!data || !cpumask_test_cpu(cpu, data->domain)) + continue; + vector = data->cfg.vector; + per_cpu(vector_irq, cpu)[vector] = desc; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + desc = per_cpu(vector_irq, cpu)[vector]; + if (IS_ERR_OR_NULL(desc)) + continue; -void lapic_offline(void) -{ - lock_vector_lock(); - irq_matrix_offline(vector_matrix); - unlock_vector_lock(); + data = apic_chip_data(irq_desc_get_irq_data(desc)); + if (!cpumask_test_cpu(cpu, data->domain)) + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; + } } -static int apic_set_affinity(struct irq_data *irqd, - const struct cpumask *dest, bool force) +/* + * Setup the vector to irq mappings. Must be called with vector_lock held. + */ +void setup_vector_irq(int cpu) { - struct apic_chip_data *apicd = apic_chip_data(irqd); - int err; + int irq; + lockdep_assert_held(&vector_lock); /* - * Core code can call here for inactive interrupts. For inactive - * interrupts which use managed or reservation mode there is no - * point in going through the vector assignment right now as the - * activation will assign a vector which fits the destination - * cpumask. Let the core code store the destination mask and be - * done with it. + * On most of the platforms, legacy PIC delivers the interrupts on the + * boot cpu. But there are certain platforms where PIC interrupts are + * delivered to multiple cpu's. If the legacy IRQ is handled by the + * legacy PIC, for the new cpu that is coming online, setup the static + * legacy vector to irq mapping: */ - if (!irqd_is_activated(irqd) && - (apicd->is_managed || apicd->can_reserve)) - return IRQ_SET_MASK_OK; + for (irq = 0; irq < nr_legacy_irqs(); irq++) + per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq); - raw_spin_lock(&vector_lock); - cpumask_and(vector_searchmask, dest, cpu_online_mask); - if (irqd_affinity_is_managed(irqd)) - err = assign_managed_vector(irqd, vector_searchmask); - else - err = assign_vector_locked(irqd, vector_searchmask); - raw_spin_unlock(&vector_lock); - return err ? err : IRQ_SET_MASK_OK; + __setup_vector_irq(cpu); } -#else -# define apic_set_affinity NULL -#endif - -static int apic_retrigger_irq(struct irq_data *irqd) +static int apic_retrigger_irq(struct irq_data *irq_data) { - struct apic_chip_data *apicd = apic_chip_data(irqd); + struct apic_chip_data *data = apic_chip_data(irq_data); unsigned long flags; + int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - apic->send_IPI(apicd->cpu, apicd->vector); + cpu = cpumask_first_and(data->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_edge(struct irq_data *data) { - irq_complete_move(irqd_cfg(irqd)); - irq_move_irq(irqd); + irq_complete_move(irqd_cfg(data)); + irq_move_irq(data); ack_APIC_irq(); } +static int apic_set_affinity(struct irq_data *irq_data, + const struct cpumask *dest, bool force) +{ + struct apic_chip_data *data = irq_data->chip_data; + int err, irq = irq_data->irq; + + if (!IS_ENABLED(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(dest, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, data, dest, irq_data); + return err ? err : IRQ_SET_MASK_OK; +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, @@ -781,98 +560,115 @@ static struct irq_chip lapic_controller = { }; #ifdef CONFIG_SMP +static void __send_cleanup_vector(struct apic_chip_data *data) +{ + raw_spin_lock(&vector_lock); + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); + data->move_in_progress = 0; + if (!cpumask_empty(data->old_domain)) + apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); + raw_spin_unlock(&vector_lock); +} -static void free_moved_vector(struct apic_chip_data *apicd) +void send_cleanup_vector(struct irq_cfg *cfg) { - unsigned int vector = apicd->prev_vector; - unsigned int cpu = apicd->prev_cpu; - bool managed = apicd->is_managed; + struct apic_chip_data *data; - /* - * This should never happen. Managed interrupts are not - * migrated except on CPU down, which does not involve the - * cleanup vector. But try to keep the accounting correct - * nevertheless. - */ - WARN_ON_ONCE(managed); - - trace_vector_free_moved(apicd->irq, cpu, vector, managed); - irq_matrix_free(vector_matrix, cpu, vector, managed); - per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; - hlist_del_init(&apicd->clist); - apicd->prev_vector = 0; - apicd->move_in_progress = 0; + data = container_of(cfg, struct apic_chip_data, cfg); + if (data->move_in_progress) + __send_cleanup_vector(data); } asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { - struct hlist_head *clhead = this_cpu_ptr(&cleanup_list); - struct apic_chip_data *apicd; - struct hlist_node *tmp; + unsigned vector, me; entering_ack_irq(); + /* Prevent vectors vanishing under us */ raw_spin_lock(&vector_lock); - hlist_for_each_entry_safe(apicd, tmp, clhead, clist) { - unsigned int irr, vector = apicd->prev_vector; + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + struct apic_chip_data *data; + struct irq_desc *desc; + unsigned int irr; + + retry: + desc = __this_cpu_read(vector_irq[vector]); + if (IS_ERR_OR_NULL(desc)) + continue; + + if (!raw_spin_trylock(&desc->lock)) { + raw_spin_unlock(&vector_lock); + cpu_relax(); + raw_spin_lock(&vector_lock); + goto retry; + } + + data = apic_chip_data(irq_desc_get_irq_data(desc)); + if (!data) + goto unlock; /* - * Paranoia: Check if the vector that needs to be cleaned - * up is registered at the APICs IRR. If so, then this is - * not the best time to clean it up. Clean it up in the - * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR - * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest - * priority external vector, so on return from this - * interrupt the device interrupt will happen first. + * Nothing to cleanup if irq migration is in progress + * or this cpu is not set in the cleanup mask. + */ + if (data->move_in_progress || + !cpumask_test_cpu(me, data->old_domain)) + goto unlock; + + /* + * We have two cases to handle here: + * 1) vector is unchanged but the target mask got reduced + * 2) vector and the target mask has changed + * + * #1 is obvious, but in #2 we have two vectors with the same + * irq descriptor: the old and the new vector. So we need to + * make sure that we only cleanup the old vector. The new + * vector has the current @vector number in the config and + * this cpu is part of the target mask. We better leave that + * one alone. */ + if (vector == data->cfg.vector && + cpumask_test_cpu(me, data->domain)) + goto unlock; + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1U << (vector % 32))) { + /* + * Check if the vector that needs to be cleanedup is + * registered at the cpu's IRR. If so, then this is not + * the best time to clean it up. Lets clean it up in the + * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR + * to myself. + */ + if (irr & (1 << (vector % 32))) { apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); - continue; + goto unlock; } - free_moved_vector(apicd); + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + cpumask_clear_cpu(me, data->old_domain); +unlock: + raw_spin_unlock(&desc->lock); } raw_spin_unlock(&vector_lock); - exiting_irq(); -} -static void __send_cleanup_vector(struct apic_chip_data *apicd) -{ - unsigned int cpu; - - raw_spin_lock(&vector_lock); - apicd->move_in_progress = 0; - cpu = apicd->prev_cpu; - if (cpu_online(cpu)) { - hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu)); - apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR); - } else { - apicd->prev_vector = 0; - } - raw_spin_unlock(&vector_lock); -} - -void send_cleanup_vector(struct irq_cfg *cfg) -{ - struct apic_chip_data *apicd; - - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (apicd->move_in_progress) - __send_cleanup_vector(apicd); + exiting_irq(); } static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector) { - struct apic_chip_data *apicd; + unsigned me; + struct apic_chip_data *data; - apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg); - if (likely(!apicd->move_in_progress)) + data = container_of(cfg, struct apic_chip_data, cfg); + if (likely(!data->move_in_progress)) return; - if (vector == apicd->vector && apicd->cpu == smp_processor_id()) - __send_cleanup_vector(apicd); + me = smp_processor_id(); + if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) + __send_cleanup_vector(data); } void irq_complete_move(struct irq_cfg *cfg) @@ -885,9 +681,10 @@ void irq_complete_move(struct irq_cfg *cfg) */ void irq_force_complete_move(struct irq_desc *desc) { - struct apic_chip_data *apicd; - struct irq_data *irqd; - unsigned int vector; + struct irq_data *irqdata; + struct apic_chip_data *data; + struct irq_cfg *cfg; + unsigned int cpu; /* * The function is called for all descriptors regardless of which @@ -898,31 +695,43 @@ void irq_force_complete_move(struct irq_desc *desc) * Check first that the chip_data is what we expect * (apic_chip_data) before touching it any further. */ - irqd = irq_domain_get_irq_data(x86_vector_domain, - irq_desc_get_irq(desc)); - if (!irqd) + irqdata = irq_domain_get_irq_data(x86_vector_domain, + irq_desc_get_irq(desc)); + if (!irqdata) return; - raw_spin_lock(&vector_lock); - apicd = apic_chip_data(irqd); - if (!apicd) - goto unlock; + data = apic_chip_data(irqdata); + cfg = data ? &data->cfg : NULL; - /* - * If prev_vector is empty, no action required. - */ - vector = apicd->prev_vector; - if (!vector) - goto unlock; + if (!cfg) + return; /* - * This is tricky. If the cleanup of the old vector has not been + * This is tricky. If the cleanup of @data->old_domain has not been * done yet, then the following setaffinity call will fail with * -EBUSY. This can leave the interrupt in a stale state. * * All CPUs are stuck in stop machine with interrupts disabled so * calling __irq_complete_move() would be completely pointless. - * + */ + raw_spin_lock(&vector_lock); + /* + * Clean out all offline cpus (including the outgoing one) from the + * old_domain mask. + */ + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); + + /* + * If move_in_progress is cleared and the old_domain mask is empty, + * then there is nothing to cleanup. fixup_irqs() will take care of + * the stale vectors on the outgoing cpu. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) { + raw_spin_unlock(&vector_lock); + return; + } + + /* * 1) The interrupt is in move_in_progress state. That means that we * have not seen an interrupt since the io_apic was reprogrammed to * the new vector. @@ -930,7 +739,7 @@ void irq_force_complete_move(struct irq_desc *desc) * 2) The interrupt has fired on the new vector, but the cleanup IPIs * have not been processed yet. */ - if (apicd->move_in_progress) { + if (data->move_in_progress) { /* * In theory there is a race: * @@ -964,43 +773,21 @@ void irq_force_complete_move(struct irq_desc *desc) * area arises. */ pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n", - irqd->irq, vector); + irqdata->irq, cfg->old_vector); } - free_moved_vector(apicd); -unlock: - raw_spin_unlock(&vector_lock); -} - -#ifdef CONFIG_HOTPLUG_CPU -/* - * Note, this is not accurate accounting, but at least good enough to - * prevent that the actual interrupt move will run out of vectors. - */ -int lapic_can_unplug_cpu(void) -{ - unsigned int rsvd, avl, tomove, cpu = smp_processor_id(); - int ret = 0; + /* + * If old_domain is not empty, then other cpus still have the irq + * descriptor set in their vector array. Clean it up. + */ + for_each_cpu(cpu, data->old_domain) + per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED; - raw_spin_lock(&vector_lock); - tomove = irq_matrix_allocated(vector_matrix); - avl = irq_matrix_available(vector_matrix, true); - if (avl < tomove) { - pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n", - cpu, tomove, avl); - ret = -ENOSPC; - goto out; - } - rsvd = irq_matrix_reserved(vector_matrix); - if (avl < rsvd) { - pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n", - rsvd, avl); - } -out: + /* Cleanup the left overs of the (half finished) move */ + cpumask_clear(data->old_domain); + data->move_in_progress = 0; raw_spin_unlock(&vector_lock); - return ret; } -#endif /* HOTPLUG_CPU */ -#endif /* SMP */ +#endif static void __init print_APIC_field(int base) { diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h deleted file mode 100644 index b107de381cb5..000000000000 --- a/arch/x86/kernel/apic/x2apic.h +++ /dev/null @@ -1,9 +0,0 @@ -/* Common bits for X2APIC cluster/physical modes. */ - -int x2apic_apic_id_valid(int apicid); -int x2apic_apic_id_registered(void); -void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); -unsigned int x2apic_get_apic_id(unsigned long id); -u32 x2apic_set_apic_id(unsigned int id); -int x2apic_phys_pkg_id(int initial_apicid, int index_msb); -void x2apic_send_IPI_self(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 622f13ca8a94..e216cf3d64d2 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -9,24 +9,22 @@ #include <linux/cpu.h> #include <asm/smp.h> -#include "x2apic.h" - -struct cluster_mask { - unsigned int clusterid; - int node; - struct cpumask mask; -}; +#include <asm/x2apic.h> static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); -static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks); -static struct cluster_mask *cluster_hotplug_mask; static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled(); } +static inline u32 x2apic_cluster(int cpu) +{ + return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; +} + static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); @@ -38,34 +36,49 @@ static void x2apic_send_IPI(int cpu, int vector) static void __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { - unsigned int cpu, clustercpu; - struct cpumask *tmpmsk; + struct cpumask *cpus_in_cluster_ptr; + struct cpumask *ipi_mask_ptr; + unsigned int cpu, this_cpu; unsigned long flags; u32 dest; x2apic_wrmsr_fence(); + local_irq_save(flags); - tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); - cpumask_copy(tmpmsk, mask); - /* If IPI should not be sent to self, clear current CPU */ - if (apic_dest != APIC_DEST_ALLINC) - cpumask_clear_cpu(smp_processor_id(), tmpmsk); + this_cpu = smp_processor_id(); - /* Collapse cpus in a cluster so a single IPI per cluster is sent */ - for_each_cpu(cpu, tmpmsk) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu); + /* + * We are to modify mask, so we need an own copy + * and be sure it's manipulated with irq off. + */ + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); + cpumask_copy(ipi_mask_ptr, mask); + + /* + * The idea is to send one IPI per cluster. + */ + for_each_cpu(cpu, ipi_mask_ptr) { + unsigned long i; + cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); dest = 0; - for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask) - dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu); + + /* Collect cpus in cluster. */ + for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { + if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) + dest |= per_cpu(x86_cpu_to_logical_apicid, i); + } if (!dest) continue; __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); - /* Remove cluster CPUs from tmpmask */ - cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask); + /* + * Cluster sibling cpus should be discared now so + * we would not send IPI them second time. + */ + cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); } local_irq_restore(flags); @@ -92,90 +105,125 @@ static void x2apic_send_IPI_all(int vector) __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static u32 x2apic_calc_apicid(unsigned int cpu) +static int +x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, + unsigned int *apicid) { - return per_cpu(x86_cpu_to_logical_apicid, cpu); -} - -static void init_x2apic_ldr(void) -{ - struct cluster_mask *cmsk = this_cpu_read(cluster_masks); - u32 cluster, apicid = apic_read(APIC_LDR); + struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata); unsigned int cpu; + u32 dest = 0; + u16 cluster; - this_cpu_write(x86_cpu_to_logical_apicid, apicid); + cpu = cpumask_first(mask); + if (cpu >= nr_cpu_ids) + return -EINVAL; - if (cmsk) - goto update; + dest = per_cpu(x86_cpu_to_logical_apicid, cpu); + cluster = x2apic_cluster(cpu); - cluster = apicid >> 16; - for_each_online_cpu(cpu) { - cmsk = per_cpu(cluster_masks, cpu); - /* Matching cluster found. Link and update it. */ - if (cmsk && cmsk->clusterid == cluster) - goto update; + cpumask_clear(effmsk); + for_each_cpu(cpu, mask) { + if (cluster != x2apic_cluster(cpu)) + continue; + dest |= per_cpu(x86_cpu_to_logical_apicid, cpu); + cpumask_set_cpu(cpu, effmsk); } - cmsk = cluster_hotplug_mask; - cluster_hotplug_mask = NULL; -update: - this_cpu_write(cluster_masks, cmsk); - cpumask_set_cpu(smp_processor_id(), &cmsk->mask); + + *apicid = dest; + return 0; } -static int alloc_clustermask(unsigned int cpu, int node) +static void init_x2apic_ldr(void) { - if (per_cpu(cluster_masks, cpu)) - return 0; - /* - * If a hotplug spare mask exists, check whether it's on the right - * node. If not, free it and allocate a new one. - */ - if (cluster_hotplug_mask) { - if (cluster_hotplug_mask->node == node) - return 0; - kfree(cluster_hotplug_mask); - } + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask), - GFP_KERNEL, node); - if (!cluster_hotplug_mask) - return -ENOMEM; - cluster_hotplug_mask->node = node; - return 0; + per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); + + cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } } +/* + * At CPU state changes, update the x2apic cluster sibling info. + */ static int x2apic_prepare_cpu(unsigned int cpu) { - if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0) + if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) return -ENOMEM; - if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) + + if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) { + free_cpumask_var(per_cpu(cpus_in_cluster, cpu)); return -ENOMEM; + } + return 0; } -static int x2apic_dead_cpu(unsigned int dead_cpu) +static int x2apic_dead_cpu(unsigned int this_cpu) { - struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); + int cpu; - cpumask_clear_cpu(dead_cpu, &cmsk->mask); - free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); + free_cpumask_var(per_cpu(ipi_mask, this_cpu)); return 0; } static int x2apic_cluster_probe(void) { + int cpu = smp_processor_id(); + int ret; + if (!x2apic_mode) return 0; - if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", - x2apic_prepare_cpu, x2apic_dead_cpu) < 0) { + ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare", + x2apic_prepare_cpu, x2apic_dead_cpu); + if (ret < 0) { pr_err("Failed to register X2APIC_PREPARE\n"); return 0; } - init_x2apic_ldr(); + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); return 1; } +static const struct cpumask *x2apic_cluster_target_cpus(void) +{ + return cpu_all_mask; +} + +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + /* + * To minimize vector pressure, default case of boot, device bringup + * etc will use a single cpu for the interrupt destination. + * + * On explicit migration requests coming from irqbalance etc, + * interrupts will be routed to the x2apic cluster (cluster-id + * derived from the first cpu in the mask) members specified + * in the mask. + */ + if (mask == x2apic_cluster_target_cpus()) + cpumask_copy(retmask, cpumask_of(cpu)); + else + cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); +} + static struct apic apic_x2apic_cluster __ro_after_init = { .name = "cluster x2apic", @@ -187,10 +235,12 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ + .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, + .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -203,7 +253,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .calc_dest_apicid = x2apic_calc_apicid, + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f8d9d69994e6..b94d35320f85 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -7,8 +7,7 @@ #include <linux/dmar.h> #include <asm/smp.h> -#include <asm/ipi.h> -#include "x2apic.h" +#include <asm/x2apic.h> int x2apic_phys; @@ -100,43 +99,6 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } -/* Common x2apic functions, also used by x2apic_cluster */ -int x2apic_apic_id_valid(int apicid) -{ - return 1; -} - -int x2apic_apic_id_registered(void) -{ - return 1; -} - -void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) -{ - unsigned long cfg = __prepare_ICR(0, vector, dest); - native_x2apic_icr_write(cfg, apicid); -} - -unsigned int x2apic_get_apic_id(unsigned long id) -{ - return id; -} - -u32 x2apic_set_apic_id(unsigned int id) -{ - return id; -} - -int x2apic_phys_pkg_id(int initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - -void x2apic_send_IPI_self(int vector) -{ - apic_write(APIC_SELF_IPI, vector); -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -148,10 +110,12 @@ static struct apic apic_x2apic_phys __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, @@ -164,7 +128,7 @@ static struct apic apic_x2apic_phys __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .calc_dest_apicid = apic_default_calc_apicid, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .send_IPI = x2apic_send_IPI, .send_IPI_mask = x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e1b8e8bf6b3c..2915c6d06821 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -568,9 +568,16 @@ static void uv_init_apic_ldr(void) { } -static u32 apic_uv_calc_apicid(unsigned int cpu) +static int +uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata, + unsigned int *apicid) { - return apic_default_calc_apicid(cpu) | uv_apicid_hibits; + int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid); + + if (!ret) + *apicid |= uv_apicid_hibits; + + return ret; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -583,7 +590,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) return id; } -static u32 set_apic_id(unsigned int id) +static unsigned long set_apic_id(unsigned int id) { /* CHECKME: Do we need to mask out the xapic extra bits? */ return id; @@ -620,10 +627,12 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* Physical */ + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, @@ -636,7 +645,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .calc_dest_apicid = apic_uv_calc_apicid, + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .send_IPI = uv_send_IPI_one, .send_IPI_mask = uv_send_IPI_mask, diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 86c4439f9d74..8f5cb2c7060c 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,7 +114,6 @@ static void make_8259A_irq(unsigned int irq) io_apic_irqs &= ~(1<<irq); irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); enable_irq(irq); - lapic_assign_legacy_vector(irq, true); } /* diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d985cef3984f..014cb2fc47ff 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -223,7 +223,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy idt_init_desc(&desc, t); write_idt_entry(idt, t->vector, &desc); if (sys) - set_bit(t->vector, system_vectors); + set_bit(t->vector, used_vectors); } } @@ -311,14 +311,14 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); - for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { + for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } - for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC - set_bit(i, system_vectors); + set_bit(i, used_vectors); set_intr_gate(i, spurious_interrupt); #else entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); @@ -356,7 +356,7 @@ void idt_invalidate(void *addr) void __init update_intr_gate(unsigned int n, const void *addr) { - if (WARN_ON_ONCE(!test_bit(n, system_vectors))) + if (WARN_ON_ONCE(!test_bit(n, used_vectors))) return; set_intr_gate(n, addr); } @@ -364,6 +364,6 @@ void __init update_intr_gate(unsigned int n, const void *addr) void alloc_intr_gate(unsigned int n, const void *addr) { BUG_ON(n < FIRST_SYSTEM_VECTOR); - if (!test_and_set_bit(n, system_vectors)) + if (!test_and_set_bit(n, used_vectors)) set_intr_gate(n, addr); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 49cfd9fe7589..52089c043160 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { + if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", @@ -333,6 +333,105 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU + +/* These two declarations are only used in check_irq_vectors_for_cpu_disable() + * below, which is protected by stop_machine(). Putting them on the stack + * results in a stack frame overflow. Dynamically allocating could result in a + * failure so declare these two cpumasks as global. + */ +static struct cpumask affinity_new, online_new; + +/* + * This cpu is going to be removed and its vectors migrated to the remaining + * online cpus. Check to see if there are enough vectors in the remaining cpus. + * This function is protected by stop_machine(). + */ +int check_irq_vectors_for_cpu_disable(void) +{ + unsigned int this_cpu, vector, this_count, count; + struct irq_desc *desc; + struct irq_data *data; + int cpu; + + this_cpu = smp_processor_id(); + cpumask_copy(&online_new, cpu_online_mask); + cpumask_clear_cpu(this_cpu, &online_new); + + this_count = 0; + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + desc = __this_cpu_read(vector_irq[vector]); + if (IS_ERR_OR_NULL(desc)) + continue; + /* + * Protect against concurrent action removal, affinity + * changes etc. + */ + raw_spin_lock(&desc->lock); + data = irq_desc_get_irq_data(desc); + cpumask_copy(&affinity_new, + irq_data_get_affinity_mask(data)); + cpumask_clear_cpu(this_cpu, &affinity_new); + + /* Do not count inactive or per-cpu irqs. */ + if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) { + raw_spin_unlock(&desc->lock); + continue; + } + + raw_spin_unlock(&desc->lock); + /* + * A single irq may be mapped to multiple cpu's + * vector_irq[] (for example IOAPIC cluster mode). In + * this case we have two possibilities: + * + * 1) the resulting affinity mask is empty; that is + * this the down'd cpu is the last cpu in the irq's + * affinity mask, or + * + * 2) the resulting affinity mask is no longer a + * subset of the online cpus but the affinity mask is + * not zero; that is the down'd cpu is the last online + * cpu in a user set affinity mask. + */ + if (cpumask_empty(&affinity_new) || + !cpumask_subset(&affinity_new, &online_new)) + this_count++; + } + /* No need to check any further. */ + if (!this_count) + return 0; + + count = 0; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We scan from FIRST_EXTERNAL_VECTOR to first system + * vector. If the vector is marked in the used vectors + * bitmap or an irq is assigned to it, we don't count + * it as available. + * + * As this is an inaccurate snapshot anyway, we can do + * this w/o holding vector_lock. + */ + for (vector = FIRST_EXTERNAL_VECTOR; + vector < FIRST_SYSTEM_VECTOR; vector++) { + if (!test_bit(vector, used_vectors) && + IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) { + if (++count == this_count) + return 0; + } + } + } + + if (count < this_count) { + pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", + this_cpu, this_count, count); + return -ERANGE; + } + return 0; +} + /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 8da3e909e967..1e4094eba15e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) @@ -91,7 +94,6 @@ void __init native_init_IRQ(void) x86_init.irqs.pre_vector_init(); idt_setup_apic_and_irq_gates(); - lapic_assign_system_vectors(); if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1..be33a5c63d20 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -136,6 +136,18 @@ RESERVE_BRK(dmi_alloc, 65536); static __initdata unsigned long _brk_start = (unsigned long)__brk_base; unsigned long _brk_end = (unsigned long)__brk_base; +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +int default_check_phys_apicid_present(int phys_apicid) +{ + return __default_check_phys_apicid_present(phys_apicid); +} +#endif + struct boot_params boot_params; /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3d01df7d7cf6..13bd986b7f90 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -253,14 +253,14 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * Lock vector_lock, set CPU online and bring the vector - * allocator online. Online must be set with vector_lock held - * to prevent a concurrent irq setup/teardown from seeing a - * half valid vector space. + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with + * vector_lock held to prevent a concurrent setup/teardown + * from seeing a half valid vector space. */ lock_vector_lock(); + setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); - lapic_online(); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); @@ -1132,10 +1132,17 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_core_cpumask(0)); } +enum { + SMP_OK, + SMP_NO_CONFIG, + SMP_NO_APIC, + SMP_FORCE_UP, +}; + /* * Various sanity checks. */ -static void __init smp_sanity_check(void) +static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); @@ -1172,6 +1179,16 @@ static void __init smp_sanity_check(void) physid_set(hard_smp_processor_id(), phys_cpu_present_map); } + /* + * If we couldn't find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config && !acpi_lapic) { + preempt_enable(); + pr_notice("SMP motherboard not detected\n"); + return SMP_NO_CONFIG; + } + /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. @@ -1182,6 +1199,29 @@ static void __init smp_sanity_check(void) physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(boot_cpu_apic_version) && + !boot_cpu_has(X86_FEATURE_APIC)) { + if (!disable_apic) { + pr_err("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); + } + return SMP_NO_APIC; + } + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + pr_info("SMP mode deactivated\n"); + return SMP_FORCE_UP; + } + + return SMP_OK; } static void __init smp_cpu_index_default(void) @@ -1196,18 +1236,9 @@ static void __init smp_cpu_index_default(void) } } -static void __init smp_get_logical_apicid(void) -{ - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - /* - * Prepare for SMP bootup. - * @max_cpus: configured maximum number of CPUs, It is a legacy parameter - * for common interface support. + * Prepare for SMP bootup. The MP table or ACPI has been read + * earlier. Just do some sanity checking here and enable APIC mode. */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { @@ -1239,27 +1270,31 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); - smp_sanity_check(); - - switch (apic_intr_mode) { - case APIC_PIC: - case APIC_VIRTUAL_WIRE_NO_CONFIG: + switch (smp_sanity_check(max_cpus)) { + case SMP_NO_CONFIG: disable_smp(); + if (APIC_init_uniprocessor()) + pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return; - case APIC_SYMMETRIC_IO_NO_ROUTING: + case SMP_NO_APIC: disable_smp(); - /* Setup local timer */ - x86_init.timers.setup_percpu_clockev(); return; - case APIC_VIRTUAL_WIRE: - case APIC_SYMMETRIC_IO: + case SMP_FORCE_UP: + disable_smp(); + apic_bsp_setup(false); + return; + case SMP_OK: break; } - /* Setup local timer */ - x86_init.timers.setup_percpu_clockev(); + if (read_apic_id() != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", + read_apic_id(), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } - smp_get_logical_apicid(); + default_setup_apic_routing(); + cpu0_logical_apicid = apic_bsp_setup(false); pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); @@ -1313,6 +1348,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) nmi_selftest(); impress_friends(); + setup_ioapic_dest(); mtrr_aps_init(); } @@ -1471,14 +1507,13 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - lapic_offline(); } int native_cpu_disable(void) { int ret; - ret = lapic_can_unplug_cpu(); + ret = check_irq_vectors_for_cpu_disable(); if (ret) return ret; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 749d189f8cd4..879af864d99a 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -85,11 +85,6 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { x86_init.timers.timer_init(); - /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. - */ - x86_init.irqs.intr_mode_init(); tsc_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 989514c94a55..4e2eb01cb3bf 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -71,7 +71,7 @@ #include <asm/proto.h> #endif -DECLARE_BITMAP(system_vectors, NR_VECTORS); +DECLARE_BITMAP(used_vectors, NR_VECTORS); static inline void cond_local_irq_enable(struct pt_regs *regs) { diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 44685fb2a192..b034b1b14b9c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -26,6 +26,9 @@ #define TOPOLOGY_REGISTER_OFFSET 0x10 +/* Flag below is initialized once during vSMP PCI initialization. */ +static int irq_routing_comply = 1; + #if defined CONFIG_PCI && defined CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: @@ -102,6 +105,9 @@ static void __init set_vsmp_pv_ops(void) if (cap & ctl & BIT(8)) { ctl &= ~BIT(8); + /* Interrupt routing set to ignore */ + irq_routing_comply = 0; + #ifdef CONFIG_PROC_FS /* Don't let users change irq affinity via procfs */ no_irq_affinity = 1; @@ -205,10 +211,23 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) return hard_smp_processor_id() >> index_msb; } +/* + * In vSMP, all cpus should be capable of handling interrupts, regardless of + * the APIC used. + */ +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + cpumask_setall(retmask); +} + static void vsmp_apic_post_init(void) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; + + if (!irq_routing_comply) + apic->vector_allocation_domain = fill_vector_allocation_domain; } void __init vsmp_init(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 1151ccd72ce9..c8fa4cd31903 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -57,7 +57,6 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, - .intr_mode_init = apic_intr_mode_init }, .oem = { diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 6b830d4cb4c8..30434b8708f2 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -31,7 +31,7 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static u32 xen_set_apic_id(unsigned int x) +static unsigned long xen_set_apic_id(unsigned int x) { WARN_ON(1); return x; @@ -161,10 +161,12 @@ static struct apic xen_pv_apic = { /* .irq_delivery_mode - used in native_compose_msi_msg only */ /* .irq_dest_mode - used in native_compose_msi_msg only */ + .target_cpus = default_target_cpus, .disable_esr = 0, /* .dest_logical - default_send_IPI_ use it but we use our own. */ .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ @@ -177,7 +179,7 @@ static struct apic xen_pv_apic = { .get_apic_id = xen_get_apic_id, .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ - .calc_dest_apicid = apic_flat_calc_apicid, + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, #ifdef CONFIG_SMP .send_IPI_mask = xen_send_IPI_mask, diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b2b3f3f6531..fbd054d6ac97 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1230,7 +1230,6 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; - x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..e2a739001c8a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4178,25 +4178,16 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_common(domain, virq, nr_irqs); } -static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, - struct amd_ir_data *ir_data, - struct irq_2_irte *irte_info, - struct irq_cfg *cfg); - static int irq_remapping_activate(struct irq_domain *domain, struct irq_data *irq_data, bool early) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; - struct irq_cfg *cfg = irqd_cfg(irq_data); - - if (!iommu) - return 0; - iommu->irte_ops->activate(data->entry, irte_info->devid, - irte_info->index); - amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg); + if (iommu) + iommu->irte_ops->activate(data->entry, irte_info->devid, + irte_info->index); return 0; } @@ -4284,22 +4275,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data); } - -static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, - struct amd_ir_data *ir_data, - struct irq_2_irte *irte_info, - struct irq_cfg *cfg) -{ - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, - irte_info->index, cfg->vector, - cfg->dest_apicid); -} - static int amd_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -4317,7 +4292,13 @@ static int amd_ir_set_affinity(struct irq_data *data, if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - amd_ir_update_irte(data, iommu, ir_data, irte_info, cfg); + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + irte_info->index, cfg->vector, cfg->dest_apicid); + /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 76a193c7fcfc..324163330eaa 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1122,24 +1122,6 @@ struct irq_remap_ops intel_irq_remap_ops = { .get_irq_domain = intel_get_irq_domain, }; -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) -{ - struct intel_ir_data *ir_data = irqd->chip_data; - struct irte *irte = &ir_data->irte_entry; - struct irq_cfg *cfg = irqd_cfg(irqd); - - /* - * Atomically updates the IRTE with the new destination, vector - * and flushes the interrupt entry cache. - */ - irte->vector = cfg->vector; - irte->dest_id = IRTE_DEST(cfg->dest_apicid); - - /* Update the hardware only if the interrupt is in remapped mode. */ - if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); -} - /* * Migrate the IO-APIC irq in the presence of intr-remapping. * @@ -1158,15 +1140,27 @@ static int intel_ir_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - struct irq_data *parent = data->parent_data; + struct intel_ir_data *ir_data = data->chip_data; + struct irte *irte = &ir_data->irte_entry; struct irq_cfg *cfg = irqd_cfg(data); + struct irq_data *parent = data->parent_data; int ret; ret = parent->chip->irq_set_affinity(parent, mask, force); if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) return ret; - intel_ir_reconfigure_irte(data, false); + /* + * Atomically updates the IRTE with the new destination, vector + * and flushes the interrupt entry cache. + */ + irte->vector = cfg->vector; + irte->dest_id = IRTE_DEST(cfg->dest_apicid); + + /* Update the hardware only if the interrupt is in remapped mode. */ + if (ir_data->irq_2_iommu.mode == IRQ_REMAPPING) + modify_irte(&ir_data->irq_2_iommu, irte); + /* * After this point, all the interrupts will start arriving * at the new destination. So, time to cleanup the previous @@ -1399,7 +1393,9 @@ static void intel_irq_remapping_free(struct irq_domain *domain, static int intel_irq_remapping_activate(struct irq_domain *domain, struct irq_data *irq_data, bool early) { - intel_ir_reconfigure_irte(irq_data, true); + struct intel_ir_data *data = irq_data->chip_data; + + modify_irte(&data->irq_2_iommu, &data->irte_entry); return 0; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index e06607167858..496ed9130600 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1441,8 +1441,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, pci_msi_domain_update_chip_ops(info); info->flags |= MSI_FLAG_ACTIVATE_EARLY; - if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) - info->flags |= MSI_FLAG_MUST_REACTIVATE; domain = msi_create_irq_domain(fwnode, info, parent); if (!domain) diff --git a/init/main.c b/init/main.c index dfec3809e740..642b88bd3c9b 100644 --- a/init/main.c +++ b/init/main.c @@ -664,12 +664,12 @@ asmlinkage __visible void __init start_kernel(void) debug_objects_mem_init(); setup_per_cpu_pageset(); numa_policy_init(); - acpi_early_init(); if (late_time_init) late_time_init(); calibrate_delay(); pid_idr_init(); anon_vma_init(); + acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 89e355866450..ac1a3e29d3b9 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -100,9 +100,6 @@ config IRQ_TIMINGS config GENERIC_IRQ_MATRIX_ALLOCATOR bool -config GENERIC_IRQ_RESERVATION_MODE - bool - config IRQ_DOMAIN_DEBUG bool "Expose hardware/virtual IRQ mapping via debugfs" depends on IRQ_DOMAIN && DEBUG_FS -- 2.15.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx