From: Michael Kelley <mhklinux@xxxxxxxxxxx> hv_synic_cleanup() currently prevents a CPU from going offline if any VMBus channel IRQs are targeted at that CPU. However, current code has a race in that an IRQ could be affinitized to the CPU after the check in hv_synic_cleanup() and before the CPU is removed from cpu_online_mask. Any channel interrupts could be lost and the channel would hang. Fix this by adding a flag for each CPU indicating if the synic is online. Filter the new affinity with these flags so that vmbus_irq_set_affinity() doesn't pick a CPU where the synic is already offline. Also add a spin lock so that vmbus_irq_set_affinity() changing the channel target_cpu and sending the MODIFYCHANNEL message to Hyper-V are atomic with respect to the checks made in hv_synic_cleanup(). hv_synic_cleanup() needs these operations to be atomic so that it can correctly count the MODIFYCHANNEL messages that need to be ack'ed by Hyper-V. Signed-off-by: Michael Kelley <mhklinux@xxxxxxxxxxx> --- drivers/hv/connection.c | 1 + drivers/hv/hv.c | 22 ++++++++++++++++++++-- drivers/hv/hyperv_vmbus.h | 2 ++ drivers/hv/vmbus_drv.c | 34 ++++++++++++++++++++++++++++------ 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index a105eecdeec2..b44ce3d39135 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -213,6 +213,7 @@ int vmbus_connect(void) INIT_LIST_HEAD(&vmbus_connection.chn_list); mutex_init(&vmbus_connection.channel_mutex); + spin_lock_init(&vmbus_connection.set_affinity_lock); /* * Setup the vmbus event connection for channel interrupt diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 76658dfc5008..89e491219129 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -338,6 +338,8 @@ int hv_synic_init(unsigned int cpu) { hv_synic_enable_regs(cpu); + cpumask_set_cpu(cpu, &vmbus_connection.synic_online); + hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT); return 0; @@ -513,6 +515,17 @@ int hv_synic_cleanup(unsigned int cpu) * TODO: Re-bind the channels to different CPUs. */ mutex_lock(&vmbus_connection.channel_mutex); + spin_lock(&vmbus_connection.set_affinity_lock); + + /* + * Once the check for channels assigned to this CPU is complete, we + * must not allow a channel to be assigned to this CPU. So mark + * the synic as no longer online. This cpumask is checked in + * vmbus_irq_set_affinity() to prevent setting the affinity of + * an IRQ to such a CPU. + */ + cpumask_clear_cpu(cpu, &vmbus_connection.synic_online); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->target_cpu == cpu) { channel_found = true; @@ -527,10 +540,11 @@ int hv_synic_cleanup(unsigned int cpu) if (channel_found) break; } + spin_unlock(&vmbus_connection.set_affinity_lock); mutex_unlock(&vmbus_connection.channel_mutex); if (channel_found) - return -EBUSY; + goto set_online; /* * channel_found == false means that any channels that were previously @@ -547,7 +561,7 @@ int hv_synic_cleanup(unsigned int cpu) if (hv_synic_event_pending()) { pr_err("Events pending when trying to offline CPU %d\n", cpu); - return -EBUSY; + goto set_online; } } @@ -557,4 +571,8 @@ int hv_synic_cleanup(unsigned int cpu) hv_synic_disable_regs(cpu); return 0; + +set_online: + cpumask_set_cpu(cpu, &vmbus_connection.synic_online); + return -EBUSY; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 571b2955b38e..92ae5af10778 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -263,6 +263,8 @@ struct vmbus_connection { struct fwnode_handle *vmbus_fwnode; struct irq_domain *vmbus_irq_domain; struct irq_chip vmbus_irq_chip; + cpumask_t synic_online; + spinlock_t set_affinity_lock; /* * VM-wide counts of MODIFYCHANNEL messages sent and completed. diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 87f2f3436136..3430ad42d7ba 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1351,6 +1351,14 @@ int vmbus_irq_set_affinity(struct irq_data *data, return -EINVAL; } + /* + * The spin lock must be held so that checking synic_online, sending + * the MODIFYCHANNEL message, and setting channel->target_cpu are + * atomic with respect to hv_synic_cleanup() clearing the CPU in + * synic_online and doing the search. + */ + spin_lock(&vmbus_connection.set_affinity_lock); + /* Don't consider CPUs that are isolated */ if (housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) cpumask_and(&tempmask, dest, @@ -1367,30 +1375,39 @@ int vmbus_irq_set_affinity(struct irq_data *data, origin_cpu = channel->target_cpu; if (cpumask_test_cpu(origin_cpu, &tempmask)) { target_cpu = origin_cpu; + spin_unlock(&vmbus_connection.set_affinity_lock); goto update_effective; } /* * Pick a CPU from the new affinity mask. As a simple heuristic to * spread out the selection when the mask contains multiple CPUs, - * start with whatever CPU was last selected. + * start with whatever CPU was last selected. Also filter out any + * CPUs where synic_online isn't set -- these CPUs are in the process + * of going offline and must not have channel interrupts assigned + * to them. */ + cpumask_and(&tempmask, &tempmask, &vmbus_connection.synic_online); target_cpu = cpumask_next_wrap(next_cpu, &tempmask, nr_cpu_ids, false); - if (target_cpu >= nr_cpu_ids) - return -EINVAL; + if (target_cpu >= nr_cpu_ids) { + ret = -EINVAL; + goto unlock; + } next_cpu = target_cpu; /* * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; * avoid sending the message and fail here for such channels. */ - if (channel->state != CHANNEL_OPENED_STATE) - return -EIO; + if (channel->state != CHANNEL_OPENED_STATE) { + ret = -EIO; + goto unlock; + } ret = vmbus_send_modifychannel(channel, hv_cpu_number_to_vp_number(target_cpu)); if (ret) - return ret; + goto unlock; /* * Warning. At this point, there is *no* guarantee that the host will @@ -1408,6 +1425,7 @@ int vmbus_irq_set_affinity(struct irq_data *data, */ channel->target_cpu = target_cpu; + spin_unlock(&vmbus_connection.set_affinity_lock); /* See init_vp_index(). */ if (hv_is_perf_channel(channel)) @@ -1422,6 +1440,10 @@ int vmbus_irq_set_affinity(struct irq_data *data, update_effective: irq_data_update_effective_affinity(data, cpumask_of(target_cpu)); return IRQ_SET_MASK_OK; + +unlock: + spin_unlock(&vmbus_connection.set_affinity_lock); + return ret; } /* -- 2.25.1