ltykernel@xxxxxxxxx writes: > From: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> > > Customer reported Hyper-V VM still responded network traffic > ack packets after kernel panic with kernel parameter "panic=0”. > This becauses vmbus driver interrupt handler still works > on the panic cpu after kernel panic. Panic cpu falls into > infinite loop of panic() with interrupt enabled at that point. > Vmbus driver can still handle network traffic. > > This confuses remote service that the panic system is still > alive when it gets ack packets. Unload vmbus channel in hv panic > callback and fix it. > > vmbus_initiate_unload() maybe double called during panic process > (e.g, hyperv_panic_event() and hv_crash_handler()). So check > and set connection state in vmbus_initiate_unload() to resolve > reenter issue. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> > --- > drivers/hv/channel_mgmt.c | 5 +++++ > drivers/hv/vmbus_drv.c | 17 +++++++++-------- > 2 files changed, 14 insertions(+), 8 deletions(-) > > diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c > index 0370364169c4..893493f2b420 100644 > --- a/drivers/hv/channel_mgmt.c > +++ b/drivers/hv/channel_mgmt.c > @@ -839,6 +839,9 @@ void vmbus_initiate_unload(bool crash) > { > struct vmbus_channel_message_header hdr; > > + if (vmbus_connection.conn_state == DISCONNECTED) > + return; > + To make this less racy, can we do something like if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) return; ? > /* Pre-Win2012R2 hosts don't support reconnect */ > if (vmbus_proto_version < VERSION_WIN8_1) > return; > @@ -857,6 +860,8 @@ void vmbus_initiate_unload(bool crash) > wait_for_completion(&vmbus_connection.unload_event); > else > vmbus_wait_for_unload(); > + > + vmbus_connection.conn_state = DISCONNECTED; > } > > static void check_ready_for_resume_event(void) > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index 029378c27421..b56b9fb9bd90 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -53,9 +53,12 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, > { > struct pt_regs *regs; > > - regs = current_pt_regs(); > + vmbus_initiate_unload(true); > > - hyperv_report_panic(regs, val); > + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { With Michael's effors to make code in drivers/hv arch agnostic, I think we need a better, arch-neutral way. > + regs = current_pt_regs(); > + hyperv_report_panic(regs, val); > + } > return NOTIFY_DONE; > } > > @@ -1391,10 +1394,12 @@ static int vmbus_bus_init(void) > } > > register_die_notifier(&hyperv_die_block); > - atomic_notifier_chain_register(&panic_notifier_list, > - &hyperv_panic_block); > } > > + /* Vmbus channel is unloaded in panic callback when panic happens.*/ > + atomic_notifier_chain_register(&panic_notifier_list, > + &hyperv_panic_block); > + > vmbus_request_offers(); > > return 0; > @@ -2204,8 +2209,6 @@ static int vmbus_bus_suspend(struct device *dev) > > vmbus_initiate_unload(false); > > - vmbus_connection.conn_state = DISCONNECTED; > - > /* Reset the event for the next resume. */ > reinit_completion(&vmbus_connection.ready_for_resume_event); > > @@ -2289,7 +2292,6 @@ static void hv_kexec_handler(void) > { > hv_stimer_global_cleanup(); > vmbus_initiate_unload(false); > - vmbus_connection.conn_state = DISCONNECTED; > /* Make sure conn_state is set as hv_synic_cleanup checks for it */ > mb(); > cpuhp_remove_state(hyperv_cpuhp_online); > @@ -2306,7 +2308,6 @@ static void hv_crash_handler(struct pt_regs *regs) > * doing the cleanup for current CPU only. This should be sufficient > * for kdump. > */ > - vmbus_connection.conn_state = DISCONNECTED; > cpu = smp_processor_id(); > hv_stimer_cleanup(cpu); > hv_synic_disable_regs(cpu); -- Vitaly