Re: [PATCH net-next 3/3] virtio-net: reduce the CPU consumption of dim worker

Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> · Mon, 22 Jan 2024 15:42:00 +0800

On Tue, 16 Jan 2024 21:11:33 +0800, Heng Qi <hengqi@xxxxxxxxxxxxxxxxx> wrote:
> Accumulate multiple request commands to kick the device once,
> and obtain the processing results of the corresponding commands
> asynchronously. The batch command method is used to optimize the
> CPU overhead of the DIM worker caused by the guest being busy
> waiting for the command response result.
>
> On an 8-queue device, without this patch, the guest cpu overhead
> due to waiting for cvq could be 10+% and above. With this patch,
> the corresponding overhead is basically invisible.
>
> Signed-off-by: Heng Qi <hengqi@xxxxxxxxxxxxxxxxx>
> ---
>  drivers/net/virtio_net.c | 185 ++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 158 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e4305ad..9f22c85 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -33,6 +33,8 @@
>  module_param(gso, bool, 0444);
>  module_param(napi_tx, bool, 0644);
>
> +#define BATCH_CMD 25
> +
>  /* FIXME: MTU in config. */
>  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>  #define GOOD_COPY_LEN	128
> @@ -134,6 +136,9 @@ struct virtnet_interrupt_coalesce {
>  };
>
>  struct virtnet_batch_coal {
> +	struct virtio_net_ctrl_hdr hdr;
> +	virtio_net_ctrl_ack status;
> +	__u8 usable;
>  	__le32 num_entries;
>  	struct virtio_net_ctrl_coal_vq coal_vqs[];
>  };
> @@ -299,6 +304,7 @@ struct virtnet_info {
>
>  	/* Work struct for delayed refilling if we run low on memory. */
>  	struct delayed_work refill;
> +	struct delayed_work get_cvq;
>
>  	/* Is delayed refill enabled? */
>  	bool refill_enabled;
> @@ -326,6 +332,7 @@ struct virtnet_info {
>  	bool rx_dim_enabled;
>
>  	/* Interrupt coalescing settings */
> +	int cvq_cmd_nums;
>  	struct virtnet_batch_coal *batch_coal;
>  	struct virtnet_interrupt_coalesce intr_coal_tx;
>  	struct virtnet_interrupt_coalesce intr_coal_rx;
> @@ -2512,6 +2519,46 @@ static int virtnet_tx_resize(struct virtnet_info *vi,
>  	return err;
>  }
>
> +static bool virtnet_process_dim_cmd(struct virtnet_info *vi, void *res)
> +{
> +	struct virtnet_batch_coal *batch_coal;
> +	u16 queue;
> +	int i;
> +
> +	if (res != ((void *)vi)) {
> +		batch_coal = (struct virtnet_batch_coal *)res;
> +		batch_coal->usable = true;
> +		vi->cvq_cmd_nums--;
> +		for (i = 0; i < batch_coal->num_entries; i++) {
> +			queue = batch_coal->coal_vqs[i].vqn / 2;
> +			vi->rq[queue].dim.state = DIM_START_MEASURE;
> +		}
> +	} else {
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static bool virtnet_cvq_response(struct virtnet_info *vi, bool poll)
> +{
> +	unsigned tmp;
> +	void *res;
> +
> +	if (!poll) {
> +		while ((res = virtqueue_get_buf(vi->cvq, &tmp)) &&
> +		       !virtqueue_is_broken(vi->cvq))
> +			virtnet_process_dim_cmd(vi, res);
> +		return 0;
> +	}
> +
> +	while (!(res = virtqueue_get_buf(vi->cvq, &tmp)) &&
> +	       !virtqueue_is_broken(vi->cvq))
> +		cpu_relax();
> +
> +	return virtnet_process_dim_cmd(vi, res);
> +}
> +
>  /*
>   * Send command via the control virtqueue and check status.  Commands
>   * supported by the hypervisor, as indicated by feature bits, should
> @@ -2521,7 +2568,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  				 struct scatterlist *out)
>  {
>  	struct scatterlist *sgs[4], hdr, stat;
> -	unsigned out_num = 0, tmp;
> +	unsigned out_num = 0;
>  	int ret;
>
>  	/* Caller should know better */
> @@ -2555,9 +2602,9 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  	/* Spin for a response, the kick causes an ioport write, trapping
>  	 * into the hypervisor, so the request should be handled immediately.
>  	 */
> -	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
> -	       !virtqueue_is_broken(vi->cvq))
> -		cpu_relax();
> +	while (true)
> +		if (virtnet_cvq_response(vi, true))
> +			break;
>
>  	return vi->ctrl->status == VIRTIO_NET_OK;
>  }
> @@ -2709,6 +2756,7 @@ static int virtnet_close(struct net_device *dev)
>  		cancel_work_sync(&vi->rq[i].dim.work);
>  	}
>
> +	cancel_delayed_work_sync(&vi->get_cvq);
>  	return 0;
>  }
>
> @@ -3520,22 +3568,99 @@ static int virtnet_send_notf_coal_vq_cmds(struct virtnet_info *vi,
>  	return 0;
>  }
>
> +static bool virtnet_add_dim_command(struct virtnet_info *vi,
> +				    struct virtnet_batch_coal *ctrl)
> +{
> +	struct scatterlist *sgs[4], hdr, stat, out;
> +	unsigned out_num = 0;
> +	int ret;
> +
> +	/* Caller should know better */
> +	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
> +
> +	ctrl->hdr.class = VIRTIO_NET_CTRL_NOTF_COAL;
> +	ctrl->hdr.cmd = VIRTIO_NET_CTRL_NOTF_COAL_VQS_SET;
> +
> +	/* Add header */
> +	sg_init_one(&hdr, &ctrl->hdr, sizeof(ctrl->hdr));
> +	sgs[out_num++] = &hdr;
> +
> +	/* Add body */
> +	sg_init_one(&out, &ctrl->num_entries, sizeof(ctrl->num_entries) +
> +		    ctrl->num_entries * sizeof(struct virtnet_coal_entry));
> +	sgs[out_num++] = &out;
> +
> +	/* Add return status. */
> +	ctrl->status = VIRTIO_NET_OK;
> +	sg_init_one(&stat, &ctrl->status, sizeof(ctrl->status));
> +	sgs[out_num] = &stat;
> +
> +	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
> +	ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, ctrl, GFP_ATOMIC);
> +	if (ret < 0) {
> +		dev_warn(&vi->vdev->dev, "Failed to add sgs for command vq: %d\n.", ret);
> +		return false;
> +	}
> +
> +	virtqueue_kick(vi->cvq);
> +
> +	ctrl->usable = false;
> +	vi->cvq_cmd_nums++;
> +
> +	return true;
> +}

We should merge this to the function virtnet_send_command.

> +
> +static void get_cvq_work(struct work_struct *work)
> +{
> +	struct virtnet_info *vi =
> +		container_of(work, struct virtnet_info, get_cvq.work);
> +
> +	if (!rtnl_trylock()) {
> +		schedule_delayed_work(&vi->get_cvq, 5);
> +		return;
> +	}
> +
> +	if (!vi->cvq_cmd_nums)
> +		goto ret;
> +
> +	virtnet_cvq_response(vi, false);
> +
> +	if (vi->cvq_cmd_nums)
> +		schedule_delayed_work(&vi->get_cvq, 5);
> +
> +ret:
> +	rtnl_unlock();
> +}
> +
>  static void virtnet_rx_dim_work(struct work_struct *work)
>  {
>  	struct dim *dim = container_of(work, struct dim, work);
>  	struct receive_queue *rq = container_of(dim,
>  			struct receive_queue, dim);
>  	struct virtnet_info *vi = rq->vq->vdev->priv;
> +	struct virtnet_batch_coal *avail_coal;
>  	struct dim_cq_moder update_moder;
> -	struct virtnet_batch_coal *coal = vi->batch_coal;
> -	struct scatterlist sgs;
> -	int i, j = 0;
> +	int i, j = 0, position;
> +	u8 *buf;
>
>  	if (!rtnl_trylock()) {
>  		schedule_work(&dim->work);
>  		return;
>  	}
>
> +	if (vi->cvq_cmd_nums == BATCH_CMD || vi->cvq->num_free < 3 ||
> +	    vi->cvq->num_free <= (virtqueue_get_vring_size(vi->cvq) / 3))
> +		virtnet_cvq_response(vi, true);
> +
> +	for (i = 0; i < BATCH_CMD; i++) {
> +		buf = (u8 *)vi->batch_coal;
> +		position = i * (sizeof(struct virtnet_batch_coal) +
> +				vi->max_queue_pairs * sizeof(struct virtnet_coal_entry));
> +		avail_coal = (struct virtnet_batch_coal *)(&buf[position]);
> +		if (avail_coal->usable)
> +			break;

list or kmalloc here are all better way.

> +	}
> +
>  	/* Each rxq's work is queued by "net_dim()->schedule_work()"
>  	 * in response to NAPI traffic changes. Note that dim->profile_ix
>  	 * for each rxq is updated prior to the queuing action.
> @@ -3552,30 +3677,26 @@ static void virtnet_rx_dim_work(struct work_struct *work)
>  		update_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
>  		if (update_moder.usec != rq->intr_coal.max_usecs ||
>  		    update_moder.pkts != rq->intr_coal.max_packets) {
> -			coal->coal_vqs[j].vqn = cpu_to_le16(rxq2vq(i));
> -			coal->coal_vqs[j].coal.max_usecs = cpu_to_le32(update_moder.usec);
> -			coal->coal_vqs[j].coal.max_packets = cpu_to_le32(update_moder.pkts);
> +			avail_coal->coal_vqs[j].vqn = cpu_to_le16(rxq2vq(i));
> +			avail_coal->coal_vqs[j].coal.max_usecs = cpu_to_le32(update_moder.usec);
> +			avail_coal->coal_vqs[j].coal.max_packets = cpu_to_le32(update_moder.pkts);
>  			rq->intr_coal.max_usecs = update_moder.usec;
>  			rq->intr_coal.max_packets = update_moder.pkts;
>  			j++;
> -		}
> +		} else if (dim->state == DIM_APPLY_NEW_PROFILE)
> +			dim->state = DIM_START_MEASURE;
>  	}
>
>  	if (!j)
>  		goto ret;
>
> -	coal->num_entries = cpu_to_le32(j);
> -	sg_init_one(&sgs, coal, sizeof(struct virtnet_batch_coal) +
> -		    j * sizeof(struct virtio_net_ctrl_coal_vq));
> -	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
> -				  VIRTIO_NET_CTRL_NOTF_COAL_VQS_SET,
> -				  &sgs))
> -		dev_warn(&vi->vdev->dev, "Failed to add dim command\n.");
> +	avail_coal->num_entries = cpu_to_le32(j);
> +	if (!virtnet_add_dim_command(vi, avail_coal))
> +		goto ret;
>
> -	for (i = 0; i < j; i++) {
> -		rq = &vi->rq[(coal->coal_vqs[i].vqn) / 2];
> -		rq->dim.state = DIM_START_MEASURE;
> -	}
> +	virtnet_cvq_response(vi, false);

Is this usable?

> +	if (vi->cvq_cmd_nums)
> +		schedule_delayed_work(&vi->get_cvq, 1);
>
>  ret:
>  	rtnl_unlock();
> @@ -4402,7 +4523,9 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
>
>  static int virtnet_alloc_queues(struct virtnet_info *vi)
>  {
> -	int i, len;
> +	struct virtnet_batch_coal *batch_coal;
> +	int i, position;
> +	u8 *buf;
>
>  	if (vi->has_cvq) {
>  		vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
> @@ -4418,13 +4541,21 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
>  	if (!vi->rq)
>  		goto err_rq;
>
> -	len = sizeof(struct virtnet_batch_coal) +
> -	      vi->max_queue_pairs * sizeof(struct virtio_net_ctrl_coal_vq);
> -	vi->batch_coal = kzalloc(len, GFP_KERNEL);
> -	if (!vi->batch_coal)
> +	buf = kzalloc(BATCH_CMD * (sizeof(struct virtnet_batch_coal) +
> +		      vi->max_queue_pairs * sizeof(struct virtnet_coal_entry)), GFP_KERNEL);
> +	if (!buf)
>  		goto err_coal;
>
> +	vi->batch_coal = (struct virtnet_batch_coal *)buf;
> +	for (i = 0; i < BATCH_CMD; i++) {
> +		position = i * (sizeof(struct virtnet_batch_coal) +
> +				vi->max_queue_pairs * sizeof(struct virtnet_coal_entry));
> +		batch_coal = (struct virtnet_batch_coal *)(&buf[position]);
> +		batch_coal->usable = true;
> +	}
> +
>  	INIT_DELAYED_WORK(&vi->refill, refill_work);
> +	INIT_DELAYED_WORK(&vi->get_cvq, get_cvq_work);
>  	for (i = 0; i < vi->max_queue_pairs; i++) {
>  		vi->rq[i].pages = NULL;
>  		netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll,
> --
> 1.8.3.1
>