Currently, it is effectively impossible to offline CPUs. Since, most CPUs will have vmbus channels attached to them. So, as made mention of in commit d570aec0f2154 ("Drivers: hv: vmbus: Synchronize init_vp_index() vs. CPU hotplug"), rebind channels associated with CPUs that a user is trying to offline to a new "randomly" selected CPU. Cc: Boqun Feng <boqun.feng@xxxxxxxxx> Cc: Wei Liu <wei.liu@xxxxxxxxxx> Signed-off-by: Hamza Mahfooz <hamzamahfooz@xxxxxxxxxxxxxxxxxxx> --- drivers/hv/hv.c | 57 +++++++++++++++++++++++++++++++----------- drivers/hv/vmbus_drv.c | 51 +++++++++++++++++++++---------------- include/linux/hyperv.h | 1 + 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 36d9ba097ff5..42270a7a7a19 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -433,13 +433,40 @@ static bool hv_synic_event_pending(void) return pending; } +static int hv_pick_new_cpu(struct vmbus_channel *channel, + unsigned int current_cpu) +{ + int ret = 0; + int cpu; + + lockdep_assert_held(&vmbus_connection.channel_mutex); + + /* + * We can't assume that the relevant interrupts will be sent before + * the cpu is offlined on older versions of hyperv. + */ + if (vmbus_proto_version < VERSION_WIN10_V5_3) + return -EBUSY; + + cpus_read_lock(); + cpu = cpumask_next(get_random_u32_below(nr_cpu_ids), cpu_online_mask); + + if (cpu >= nr_cpu_ids || cpu == current_cpu) + cpu = VMBUS_CONNECT_CPU; + + ret = vmbus_channel_set_cpu(channel, cpu); + cpus_read_unlock(); + + return ret; +} + /* * hv_synic_cleanup - Cleanup routine for hv_synic_init(). */ int hv_synic_cleanup(unsigned int cpu) { struct vmbus_channel *channel, *sc; - bool channel_found = false; + int ret = 0; if (vmbus_connection.conn_state != CONNECTED) goto always_cleanup; @@ -456,31 +483,31 @@ int hv_synic_cleanup(unsigned int cpu) /* * Search for channels which are bound to the CPU we're about to - * cleanup. In case we find one and vmbus is still connected, we - * fail; this will effectively prevent CPU offlining. - * - * TODO: Re-bind the channels to different CPUs. + * cleanup. */ mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->target_cpu == cpu) { - channel_found = true; - break; + ret = hv_pick_new_cpu(channel, cpu); + + if (ret) { + mutex_unlock(&vmbus_connection.channel_mutex); + return ret; + } } list_for_each_entry(sc, &channel->sc_list, sc_list) { if (sc->target_cpu == cpu) { - channel_found = true; - break; + ret = hv_pick_new_cpu(channel, cpu); + + if (ret) { + mutex_unlock(&vmbus_connection.channel_mutex); + return ret; + } } } - if (channel_found) - break; } mutex_unlock(&vmbus_connection.channel_mutex); - if (channel_found) - return -EBUSY; - /* * channel_found == false means that any channels that were previously * assigned to the CPU have been reassigned elsewhere with a call of @@ -497,5 +524,5 @@ int hv_synic_cleanup(unsigned int cpu) hv_synic_disable_regs(cpu); - return 0; + return ret; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 2892b8da20a5..c256e02fa66b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1611,16 +1611,15 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) { return sprintf(buf, "%u\n", channel->target_cpu); } -static ssize_t target_cpu_store(struct vmbus_channel *channel, - const char *buf, size_t count) + +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu) { - u32 target_cpu, origin_cpu; - ssize_t ret = count; + u32 origin_cpu; + int ret = 0; - if (vmbus_proto_version < VERSION_WIN10_V4_1) - return -EIO; + lockdep_assert_held(&vmbus_connection.channel_mutex); - if (sscanf(buf, "%uu", &target_cpu) != 1) + if (vmbus_proto_version < VERSION_WIN10_V4_1) return -EIO; /* Validate target_cpu for the cpumask_test_cpu() operation below. */ @@ -1630,22 +1629,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ))) return -EINVAL; - /* No CPUs should come up or down during this. */ - cpus_read_lock(); - - if (!cpu_online(target_cpu)) { - cpus_read_unlock(); + if (!cpu_online(target_cpu)) return -EINVAL; - } /* - * Synchronizes target_cpu_store() and channel closure: + * Synchronizes vmbus_channel_set_cpu() and channel closure: * * { Initially: state = CHANNEL_OPENED } * * CPU1 CPU2 * - * [target_cpu_store()] [vmbus_disconnect_ring()] + * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()] * * LOCK channel_mutex LOCK channel_mutex * LOAD r1 = state LOAD r2 = state @@ -1660,7 +1654,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, * Note. The host processes the channel messages "sequentially", in * the order in which they are received on a per-partition basis. */ - mutex_lock(&vmbus_connection.channel_mutex); /* * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; @@ -1668,17 +1661,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, */ if (channel->state != CHANNEL_OPENED_STATE) { ret = -EIO; - goto cpu_store_unlock; + goto end; } origin_cpu = channel->target_cpu; if (target_cpu == origin_cpu) - goto cpu_store_unlock; + goto end; if (vmbus_send_modifychannel(channel, hv_cpu_number_to_vp_number(target_cpu))) { ret = -EIO; - goto cpu_store_unlock; + goto end; } /* @@ -1708,9 +1701,25 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel, origin_cpu, target_cpu); } -cpu_store_unlock: - mutex_unlock(&vmbus_connection.channel_mutex); +end: + return ret; +} + +static ssize_t target_cpu_store(struct vmbus_channel *channel, + const char *buf, size_t count) +{ + ssize_t ret = count; + u32 target_cpu; + + if (sscanf(buf, "%uu", &target_cpu) != 1) + return -EIO; + + mutex_lock(&vmbus_connection.channel_mutex); + cpus_read_lock(); + ret = vmbus_channel_set_cpu(channel, target_cpu); cpus_read_unlock(); + mutex_unlock(&vmbus_connection.channel_mutex); + return ret; } static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 02a226bcf0ed..25e9e982f1b0 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1670,6 +1670,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu); /* Get the start of the ring buffer. */ static inline void * -- 2.47.1