>Subject: [PATCH 11/11] scsi: storvsc: Re-init stor_chns when a channel >interrupt is re-assigned > >For each storvsc_device, storvsc keeps track of the channel target CPUs >associated to the device (alloced_cpus) and it uses this information to fill a >"cache" (stor_chns) mapping CPU->channel according to a certain heuristic. >Update the alloced_cpus mask and the stor_chns array when a channel of the >storvsc device is re-assigned to a different CPU. > >Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> >Cc: "James E.J. Bottomley" <jejb@xxxxxxxxxxxxx> >Cc: "Martin K. Petersen" <martin.petersen@xxxxxxxxxx> >Cc: <linux-scsi@xxxxxxxxxxxxxxx> >--- > drivers/hv/vmbus_drv.c | 4 ++ > drivers/scsi/storvsc_drv.c | 95 ++++++++++++++++++++++++++++++++++--- >- > include/linux/hyperv.h | 3 ++ > 3 files changed, 94 insertions(+), 8 deletions(-) > >diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index >0f204640c50c2..f0be41bfcbf57 100644 >--- a/drivers/hv/vmbus_drv.c >+++ b/drivers/hv/vmbus_drv.c >@@ -1750,6 +1750,10 @@ static ssize_t target_cpu_store(struct >vmbus_channel *channel, > * in on a CPU that is different from the channel target_cpu value. > */ > >+ if (channel->change_target_cpu_callback) >+ (*channel->change_target_cpu_callback)(channel, >+ channel->target_cpu, target_cpu); >+ > channel->target_cpu = target_cpu; > channel->target_vp = hv_cpu_number_to_vp_number(target_cpu); > channel->numa_node = cpu_to_node(target_cpu); diff --git >a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index >fb41636519ee8..a680592b9d32a 100644 >--- a/drivers/scsi/storvsc_drv.c >+++ b/drivers/scsi/storvsc_drv.c >@@ -621,6 +621,63 @@ static inline struct storvsc_device >*get_in_stor_device( > > } > >+void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old, >+u32 new) { >+ struct storvsc_device *stor_device; >+ struct vmbus_channel *cur_chn; >+ bool old_is_alloced = false; >+ struct hv_device *device; >+ unsigned long flags; >+ int cpu; >+ >+ device = channel->primary_channel ? >+ channel->primary_channel->device_obj >+ : channel->device_obj; >+ stor_device = get_out_stor_device(device); >+ if (!stor_device) >+ return; >+ >+ /* See storvsc_do_io() -> get_og_chn(). */ >+ spin_lock_irqsave(&device->channel->lock, flags); >+ >+ /* >+ * Determines if the storvsc device has other channels assigned to >+ * the "old" CPU to update the alloced_cpus mask and the stor_chns >+ * array. >+ */ >+ if (device->channel != channel && device->channel->target_cpu == >old) { >+ cur_chn = device->channel; >+ old_is_alloced = true; >+ goto old_is_alloced; >+ } >+ list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) { >+ if (cur_chn == channel) >+ continue; >+ if (cur_chn->target_cpu == old) { >+ old_is_alloced = true; >+ goto old_is_alloced; >+ } >+ } >+ >+old_is_alloced: >+ if (old_is_alloced) >+ WRITE_ONCE(stor_device->stor_chns[old], cur_chn); >+ else >+ cpumask_clear_cpu(old, &stor_device->alloced_cpus); If the old cpu is not allocated, is it still necessary to do a cpumask_clear_cpu? >+ >+ /* "Flush" the stor_chns array. */ >+ for_each_possible_cpu(cpu) { >+ if (stor_device->stor_chns[cpu] && !cpumask_test_cpu( >+ cpu, &stor_device->alloced_cpus)) >+ WRITE_ONCE(stor_device->stor_chns[cpu], NULL); >+ } >+ >+ WRITE_ONCE(stor_device->stor_chns[new], channel); >+ cpumask_set_cpu(new, &stor_device->alloced_cpus); >+ >+ spin_unlock_irqrestore(&device->channel->lock, flags); } >+ > static void handle_sc_creation(struct vmbus_channel *new_sc) { > struct hv_device *device = new_sc->primary_channel->device_obj; >@@ -648,6 +705,8 @@ static void handle_sc_creation(struct vmbus_channel >*new_sc) > return; > } > >+ new_sc->change_target_cpu_callback = storvsc_change_target_cpu; >+ > /* Add the sub-channel to the array of available channels. */ > stor_device->stor_chns[new_sc->target_cpu] = new_sc; > cpumask_set_cpu(new_sc->target_cpu, &stor_device- >>alloced_cpus); @@ -876,6 +935,8 @@ static int storvsc_channel_init(struct >hv_device *device, bool is_fc) > if (stor_device->stor_chns == NULL) > return -ENOMEM; > >+ device->channel->change_target_cpu_callback = >+storvsc_change_target_cpu; >+ > stor_device->stor_chns[device->channel->target_cpu] = device- >>channel; > cpumask_set_cpu(device->channel->target_cpu, > &stor_device->alloced_cpus); >@@ -1248,8 +1309,10 @@ static struct vmbus_channel *get_og_chn(struct >storvsc_device *stor_device, > const struct cpumask *node_mask; > int num_channels, tgt_cpu; > >- if (stor_device->num_sc == 0) >+ if (stor_device->num_sc == 0) { >+ stor_device->stor_chns[q_num] = stor_device->device- >>channel; > return stor_device->device->channel; >+ } > > /* > * Our channel array is sparsley populated and we @@ -1258,7 +1321,6 >@@ static struct vmbus_channel *get_og_chn(struct storvsc_device >*stor_device, > * The strategy is simple: > * I. Ensure NUMA locality > * II. Distribute evenly (best effort) >- * III. Mapping is persistent. > */ > > node_mask = cpumask_of_node(cpu_to_node(q_num)); >@@ -1268,8 +1330,10 @@ static struct vmbus_channel *get_og_chn(struct >storvsc_device *stor_device, > if (cpumask_test_cpu(tgt_cpu, node_mask)) > num_channels++; > } >- if (num_channels == 0) >+ if (num_channels == 0) { >+ stor_device->stor_chns[q_num] = stor_device->device- >>channel; > return stor_device->device->channel; >+ } > > hash_qnum = q_num; > while (hash_qnum >= num_channels) >@@ -1295,6 +1359,7 @@ static int storvsc_do_io(struct hv_device *device, > struct storvsc_device *stor_device; > struct vstor_packet *vstor_packet; > struct vmbus_channel *outgoing_channel, *channel; >+ unsigned long flags; > int ret = 0; > const struct cpumask *node_mask; > int tgt_cpu; >@@ -1308,10 +1373,11 @@ static int storvsc_do_io(struct hv_device *device, > > request->device = device; > /* >- * Select an an appropriate channel to send the request out. >+ * Select an appropriate channel to send the request out. > */ >- if (stor_device->stor_chns[q_num] != NULL) { >- outgoing_channel = stor_device->stor_chns[q_num]; >+ /* See storvsc_change_target_cpu(). */ >+ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]); >+ if (outgoing_channel != NULL) { > if (outgoing_channel->target_cpu == q_num) { > /* > * Ideally, we want to pick a different channel if @@ - >1324,7 +1390,10 @@ static int storvsc_do_io(struct hv_device *device, > continue; > if (tgt_cpu == q_num) > continue; >- channel = stor_device->stor_chns[tgt_cpu]; >+ channel = READ_ONCE( >+ stor_device->stor_chns[tgt_cpu]); >+ if (channel == NULL) >+ continue; > if (hv_get_avail_to_write_percent( > &channel->outbound) > > ring_avail_percent_lowater) >{ >@@ -1350,7 +1419,10 @@ static int storvsc_do_io(struct hv_device *device, > for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { > if (cpumask_test_cpu(tgt_cpu, node_mask)) > continue; >- channel = stor_device->stor_chns[tgt_cpu]; >+ channel = READ_ONCE( >+ stor_device->stor_chns[tgt_cpu]); >+ if (channel == NULL) >+ continue; > if (hv_get_avail_to_write_percent( > &channel->outbound) > > ring_avail_percent_lowater) >{ >@@ -1360,7 +1432,14 @@ static int storvsc_do_io(struct hv_device *device, > } > } > } else { >+ spin_lock_irqsave(&device->channel->lock, flags); >+ outgoing_channel = stor_device->stor_chns[q_num]; >+ if (outgoing_channel != NULL) { >+ spin_unlock_irqrestore(&device->channel->lock, >flags); Checking outgoing_channel again seems unnecessary. Why not just call get_og_chn()? >+ goto found_channel; >+ } > outgoing_channel = get_og_chn(stor_device, q_num); >+ spin_unlock_irqrestore(&device->channel->lock, flags); > } With device->channel->lock, now we have one more lock on the I/O issuing path. It doesn't seem optimal as you are trying to protect the code in storvsc_change_target_cpu(), this doesn't need to block concurrent I/O issuers. Maybe moving to RCU is a better approach? Thanks, Long > > found_channel: >diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index >edfcd42319ef3..9018b89614b78 100644 >--- a/include/linux/hyperv.h >+++ b/include/linux/hyperv.h >@@ -773,6 +773,9 @@ struct vmbus_channel { > void (*onchannel_callback)(void *context); > void *channel_callback_context; > >+ void (*change_target_cpu_callback)(struct vmbus_channel *channel, >+ u32 old, u32 new); >+ > /* > * Synchronize channel scheduling and channel removal; see the inline > * comments in vmbus_chan_sched() and vmbus_reset_channel_cb(). >-- >2.24.0