Re: [PATCH V3 1/2] tcm_vhost: Use vq->private_data to indicate if the endpoint is setup

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Mon, 8 Apr 2013 10:10:08 +0300

On Wed, Apr 03, 2013 at 02:17:37PM +0800, Asias He wrote:
> Currently, vs->vs_endpoint is used indicate if the endpoint is setup or
> not. It is set or cleared in vhost_scsi_set_endpoint() or
> vhost_scsi_clear_endpoint() under the vs->dev.mutex lock. However, when
> we check it in vhost_scsi_handle_vq(), we ignored the lock.
> 
> Instead of using the vs->vs_endpoint and the vs->dev.mutex lock to
> indicate the status of the endpoint, we use per virtqueue
> vq->private_data to indicate it. In this way, we can only take the
> vq->mutex lock which is per queue and make the concurrent multiqueue
> process having less lock contention. Further, in the read side of
> vq->private_data, we can even do not take the lock if it is accessed in
> the vhost worker thread, because it is protected by "vhost rcu".
> 
> Signed-off-by: Asias He <asias@xxxxxxxxxx>

Not strictly 3.9 material itself but needed for the next one.

Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx>

> ---
>  drivers/vhost/tcm_vhost.c | 144 ++++++++++++++++++++++++++++++++--------------
>  1 file changed, 101 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
> index 61f9eab..11121ea 100644
> --- a/drivers/vhost/tcm_vhost.c
> +++ b/drivers/vhost/tcm_vhost.c
> @@ -65,9 +65,8 @@ enum {
>  
>  struct vhost_scsi {
>  	/* Protected by vhost_scsi->dev.mutex */
> -	struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET];
> +	struct tcm_vhost_tpg **vs_tpg;
>  	char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
> -	bool vs_endpoint;
>  
>  	struct vhost_dev dev;
>  	struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ];
> @@ -573,6 +572,7 @@ static void tcm_vhost_submission_work(struct work_struct *work)
>  static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
>  	struct vhost_virtqueue *vq)
>  {
> +	struct tcm_vhost_tpg **vs_tpg;
>  	struct virtio_scsi_cmd_req v_req;
>  	struct tcm_vhost_tpg *tv_tpg;
>  	struct tcm_vhost_cmd *tv_cmd;
> @@ -581,8 +581,16 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
>  	int head, ret;
>  	u8 target;
>  
> -	/* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
> -	if (unlikely(!vs->vs_endpoint))
> +	/*
> +	 * We can handle the vq only after the endpoint is setup by calling the
> +	 * VHOST_SCSI_SET_ENDPOINT ioctl.
> +	 *
> +	 * TODO: Check that we are running from vhost_worker which acts
> +	 * as read-side critical section for vhost kind of RCU.
> +	 * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
> +	 */
> +	vs_tpg = rcu_dereference_check(vq->private_data, 1);
> +	if (!vs_tpg)
>  		return;
>  
>  	mutex_lock(&vq->mutex);
> @@ -652,7 +660,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
>  
>  		/* Extract the tpgt */
>  		target = v_req.lun[1];
> -		tv_tpg = ACCESS_ONCE(vs->vs_tpg[target]);
> +		tv_tpg = ACCESS_ONCE(vs_tpg[target]);
>  
>  		/* Target does not exist, fail the request */
>  		if (unlikely(!tv_tpg)) {
> @@ -771,6 +779,20 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
>  	vhost_scsi_handle_vq(vs, vq);
>  }
>  
> +static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
> +{
> +	vhost_poll_flush(&vs->dev.vqs[index].poll);
> +}
> +
> +static void vhost_scsi_flush(struct vhost_scsi *vs)
> +{
> +	int i;
> +
> +	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
> +		vhost_scsi_flush_vq(vs, i);
> +	vhost_work_flush(&vs->dev, &vs->vs_completion_work);
> +}
> +
>  /*
>   * Called from vhost_scsi_ioctl() context to walk the list of available
>   * tcm_vhost_tpg with an active struct tcm_vhost_nexus
> @@ -781,8 +803,10 @@ static int vhost_scsi_set_endpoint(
>  {
>  	struct tcm_vhost_tport *tv_tport;
>  	struct tcm_vhost_tpg *tv_tpg;
> +	struct tcm_vhost_tpg **vs_tpg;
> +	struct vhost_virtqueue *vq;
> +	int index, ret, i, len;
>  	bool match = false;
> -	int index, ret;
>  
>  	mutex_lock(&vs->dev.mutex);
>  	/* Verify that ring has been setup correctly. */
> @@ -794,6 +818,15 @@ static int vhost_scsi_set_endpoint(
>  		}
>  	}
>  
> +	len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
> +	vs_tpg = kzalloc(len, GFP_KERNEL);
> +	if (!vs_tpg) {
> +		mutex_unlock(&vs->dev.mutex);
> +		return -ENOMEM;
> +	}
> +	if (vs->vs_tpg)
> +		memcpy(vs_tpg, vs->vs_tpg, len);
> +
>  	mutex_lock(&tcm_vhost_mutex);
>  	list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) {
>  		mutex_lock(&tv_tpg->tv_tpg_mutex);
> @@ -808,14 +841,15 @@ static int vhost_scsi_set_endpoint(
>  		tv_tport = tv_tpg->tport;
>  
>  		if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
> -			if (vs->vs_tpg[tv_tpg->tport_tpgt]) {
> +			if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) {
>  				mutex_unlock(&tv_tpg->tv_tpg_mutex);
>  				mutex_unlock(&tcm_vhost_mutex);
>  				mutex_unlock(&vs->dev.mutex);
> +				kfree(vs_tpg);
>  				return -EEXIST;
>  			}
>  			tv_tpg->tv_tpg_vhost_count++;
> -			vs->vs_tpg[tv_tpg->tport_tpgt] = tv_tpg;
> +			vs_tpg[tv_tpg->tport_tpgt] = tv_tpg;
>  			smp_mb__after_atomic_inc();
>  			match = true;
>  		}
> @@ -826,12 +860,26 @@ static int vhost_scsi_set_endpoint(
>  	if (match) {
>  		memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
>  		       sizeof(vs->vs_vhost_wwpn));
> -		vs->vs_endpoint = true;
> +		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> +			vq = &vs->vqs[i];
> +			/* Flushing the vhost_work acts as synchronize_rcu */
> +			mutex_lock(&vq->mutex);
> +			rcu_assign_pointer(vq->private_data, vs_tpg);
> +			mutex_unlock(&vq->mutex);
> +		}
>  		ret = 0;
>  	} else {
>  		ret = -EEXIST;
>  	}
>  
> +	/*
> +	 * Act as synchronize_rcu to make sure access to
> +	 * old vs->vs_tpg is finished.
> +	 */
> +	vhost_scsi_flush(vs);
> +	kfree(vs->vs_tpg);
> +	vs->vs_tpg = vs_tpg;
> +
>  	mutex_unlock(&vs->dev.mutex);
>  	return ret;
>  }
> @@ -842,6 +890,8 @@ static int vhost_scsi_clear_endpoint(
>  {
>  	struct tcm_vhost_tport *tv_tport;
>  	struct tcm_vhost_tpg *tv_tpg;
> +	struct vhost_virtqueue *vq;
> +	bool match = false;
>  	int index, ret, i;
>  	u8 target;
>  
> @@ -853,9 +903,14 @@ static int vhost_scsi_clear_endpoint(
>  			goto err_dev;
>  		}
>  	}
> +
> +	if (!vs->vs_tpg) {
> +		mutex_unlock(&vs->dev.mutex);
> +		return 0;
> +	}
> +
>  	for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
>  		target = i;
> -
>  		tv_tpg = vs->vs_tpg[target];
>  		if (!tv_tpg)
>  			continue;
> @@ -877,10 +932,27 @@ static int vhost_scsi_clear_endpoint(
>  		}
>  		tv_tpg->tv_tpg_vhost_count--;
>  		vs->vs_tpg[target] = NULL;
> -		vs->vs_endpoint = false;
> +		match = true;
>  		mutex_unlock(&tv_tpg->tv_tpg_mutex);
>  	}
> +	if (match) {
> +		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> +			vq = &vs->vqs[i];
> +			/* Flushing the vhost_work acts as synchronize_rcu */
> +			mutex_lock(&vq->mutex);
> +			rcu_assign_pointer(vq->private_data, NULL);
> +			mutex_unlock(&vq->mutex);
> +		}
> +	}
> +	/*
> +	 * Act as synchronize_rcu to make sure access to
> +	 * old vs->vs_tpg is finished.
> +	 */
> +	vhost_scsi_flush(vs);
> +	kfree(vs->vs_tpg);
> +	vs->vs_tpg = NULL;
>  	mutex_unlock(&vs->dev.mutex);
> +
>  	return 0;
>  
>  err_tpg:
> @@ -890,6 +962,24 @@ err_dev:
>  	return ret;
>  }
>  
> +static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
> +{
> +	if (features & ~VHOST_FEATURES)
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&vs->dev.mutex);
> +	if ((features & (1 << VHOST_F_LOG_ALL)) &&
> +	    !vhost_log_access_ok(&vs->dev)) {
> +		mutex_unlock(&vs->dev.mutex);
> +		return -EFAULT;
> +	}
> +	vs->dev.acked_features = features;
> +	smp_wmb();
> +	vhost_scsi_flush(vs);
> +	mutex_unlock(&vs->dev.mutex);
> +	return 0;
> +}
> +
>  static int vhost_scsi_open(struct inode *inode, struct file *f)
>  {
>  	struct vhost_scsi *s;
> @@ -930,38 +1020,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
>  	return 0;
>  }
>  
> -static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
> -{
> -	vhost_poll_flush(&vs->dev.vqs[index].poll);
> -}
> -
> -static void vhost_scsi_flush(struct vhost_scsi *vs)
> -{
> -	int i;
> -
> -	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
> -		vhost_scsi_flush_vq(vs, i);
> -	vhost_work_flush(&vs->dev, &vs->vs_completion_work);
> -}
> -
> -static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
> -{
> -	if (features & ~VHOST_FEATURES)
> -		return -EOPNOTSUPP;
> -
> -	mutex_lock(&vs->dev.mutex);
> -	if ((features & (1 << VHOST_F_LOG_ALL)) &&
> -	    !vhost_log_access_ok(&vs->dev)) {
> -		mutex_unlock(&vs->dev.mutex);
> -		return -EFAULT;
> -	}
> -	vs->dev.acked_features = features;
> -	smp_wmb();
> -	vhost_scsi_flush(vs);
> -	mutex_unlock(&vs->dev.mutex);
> -	return 0;
> -}
> -
>  static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
>  				unsigned long arg)
>  {
> -- 
> 1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html