The offer and rescind works are currently scheduled on the so called "connect CPU". However, this is not really needed: we can synchronize the works by relying on the usage of the offer_in_progress counter and of the channel_mutex mutex. This synchronization is already in place. So, remove this unnecessary "bind to the connect CPU" constraint and update the inline comments accordingly. Suggested-by: Dexuan Cui <decui@xxxxxxxxxxxxx> Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> --- drivers/hv/channel_mgmt.c | 21 ++++++++++++++++----- drivers/hv/vmbus_drv.c | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 501c43c5851dc..aed8db59a5ff8 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -1028,11 +1028,22 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * offer comes in first and then the rescind. * Since we process these events in work elements, * and with preemption, we may end up processing - * the events out of order. Given that we handle these - * work elements on the same CPU, this is possible only - * in the case of preemption. In any case wait here - * until the offer processing has moved beyond the - * point where the channel is discoverable. + * the events out of order. We rely on the synchronization + * provided by offer_in_progress and by channel_mutex for + * ordering these events: + * + * { Initially: offer_in_progress = 1 } + * + * CPU1 CPU2 + * + * [vmbus_process_offer()] [vmbus_onoffer_rescind()] + * + * LOCK channel_mutex WAIT_ON offer_in_progress == 0 + * DECREMENT offer_in_progress LOCK channel_mutex + * INSERT chn_list SEARCH chn_list + * UNLOCK channel_mutex UNLOCK channel_mutex + * + * Forbids: CPU2's SEARCH from *not* seeing CPU1's INSERT */ while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a491d44362f9f..41ec0c95df33f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1075,8 +1075,9 @@ void vmbus_on_msg_dpc(unsigned long data) /* * The host can generate a rescind message while we * may still be handling the original offer. We deal with - * this condition by ensuring the processing is done on the - * same CPU. + * this condition by relying on the synchronization provided + * by offer_in_progress and by channel_mutex. See also the + * inline comments in vmbus_onoffer_rescind(). */ switch (hdr->msgtype) { case CHANNELMSG_RESCIND_CHANNELOFFER: @@ -1098,16 +1099,34 @@ void vmbus_on_msg_dpc(unsigned long data) * work queue: the RESCIND handler can not start to * run before the OFFER handler finishes. */ - schedule_work_on(VMBUS_CONNECT_CPU, - &ctx->work); + schedule_work(&ctx->work); break; case CHANNELMSG_OFFERCHANNEL: + /* + * The host sends the offer message of a given channel + * before sending the rescind message of the same + * channel. These messages are sent to the guest's + * connect CPU; the guest then starts processing them + * in the tasklet handler on this CPU: + * + * VMBUS_CONNECT_CPU + * + * [vmbus_on_msg_dpc()] + * atomic_inc() // CHANNELMSG_OFFERCHANNEL + * queue_work() + * ... + * [vmbus_on_msg_dpc()] + * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER + * + * We rely on the memory-ordering properties of the + * queue_work() and schedule_work() primitives, which + * guarantee that the atomic increment will be visible + * to the CPUs which will execute the offer & rescind + * works by the time these works will start execution. + */ atomic_inc(&vmbus_connection.offer_in_progress); - queue_work_on(VMBUS_CONNECT_CPU, - vmbus_connection.work_queue, - &ctx->work); - break; + fallthrough; default: queue_work(vmbus_connection.work_queue, &ctx->work); @@ -1151,9 +1170,7 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) INIT_WORK(&ctx->work, vmbus_onmessage_work); - queue_work_on(VMBUS_CONNECT_CPU, - vmbus_connection.work_queue, - &ctx->work); + queue_work(vmbus_connection.work_queue, &ctx->work); } #endif /* CONFIG_PM_SLEEP */ -- 2.24.0