On Thu, Sep 10, 2020 at 01:13:51PM +0200, Guennadi Liakhovetski wrote: > Linux supports running the RPMsg protocol over the VirtIO transport > protocol, but currently there is only support for VirtIO clients and > no support for VirtIO servers. This patch adds a vhost-based RPMsg > server implementation, which makes it possible to use RPMsg over > VirtIO between guest VMs and the host. I now get the client/server concept you are describing above but that happened only after a lot of mental gymnastics. If you drop the whole client/server concept and concentrate on what this patch does, things will go better. I would personally go with what you have in the Kconfig: > + Vhost RPMsg API allows vhost drivers to communicate with VirtIO > + drivers on guest VMs, using the RPMsg over VirtIO protocol. It is concise but describes exactly what this patch provide. > > Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@xxxxxxxxxxxxxxx> > --- > drivers/vhost/Kconfig | 7 + > drivers/vhost/Makefile | 3 + > drivers/vhost/rpmsg.c | 370 ++++++++++++++++++++++++++++++++++++ > drivers/vhost/vhost_rpmsg.h | 74 ++++++++ > 4 files changed, 454 insertions(+) > create mode 100644 drivers/vhost/rpmsg.c > create mode 100644 drivers/vhost/vhost_rpmsg.h > > diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig > index 587fbae06182..ee1a19b7ab3d 100644 > --- a/drivers/vhost/Kconfig > +++ b/drivers/vhost/Kconfig > @@ -38,6 +38,13 @@ config VHOST_NET > To compile this driver as a module, choose M here: the module will > be called vhost_net. > > +config VHOST_RPMSG > + tristate > + select VHOST > + help > + Vhost RPMsg API allows vhost drivers to communicate with VirtIO > + drivers on guest VMs, using the RPMsg over VirtIO protocol. > + I suppose you intend this to be selectable from another config option? > config VHOST_SCSI > tristate "VHOST_SCSI TCM fabric driver" > depends on TARGET_CORE && EVENTFD > diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile > index f3e1897cce85..9cf459d59f97 100644 > --- a/drivers/vhost/Makefile > +++ b/drivers/vhost/Makefile > @@ -2,6 +2,9 @@ > obj-$(CONFIG_VHOST_NET) += vhost_net.o > vhost_net-y := net.o > > +obj-$(CONFIG_VHOST_RPMSG) += vhost_rpmsg.o > +vhost_rpmsg-y := rpmsg.o > + > obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o > vhost_scsi-y := scsi.o > > diff --git a/drivers/vhost/rpmsg.c b/drivers/vhost/rpmsg.c > new file mode 100644 > index 000000000000..0ddee5b5f017 > --- /dev/null > +++ b/drivers/vhost/rpmsg.c > @@ -0,0 +1,370 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright(c) 2020 Intel Corporation. All rights reserved. > + * > + * Author: Guennadi Liakhovetski <guennadi.liakhovetski@xxxxxxxxxxxxxxx> > + * > + * Vhost RPMsg VirtIO interface provides a set of functions to be used on the > + * host side as a counterpart to the guest side RPMsg VirtIO API, provided by > + * drivers/rpmsg/virtio_rpmsg_bus.c. This API can be used by any vhost driver to > + * handle RPMsg specific virtqueue processing. > + * Vhost drivers, using this API will use their own VirtIO device IDs, that > + * should then also be added to the ID table in virtio_rpmsg_bus.c > + */ > + > +#include <linux/compat.h> > +#include <linux/file.h> > +#include <linux/miscdevice.h> As far as I can tell the above two are not needed. > +#include <linux/module.h> > +#include <linux/mutex.h> > +#include <linux/rpmsg/virtio.h> > +#include <linux/vhost.h> > +#include <uapi/linux/rpmsg.h> > + > +#include "vhost.h" > +#include "vhost_rpmsg.h" > + > +/* > + * All virtio-rpmsg virtual queue kicks always come with just one buffer - > + * either input or output, but we can also handle split messages > + */ > +static int vhost_rpmsg_get_msg(struct vhost_virtqueue *vq, unsigned int *cnt) > +{ > + struct vhost_rpmsg *vr = container_of(vq->dev, struct vhost_rpmsg, dev); > + unsigned int out, in; > + int head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, > + NULL, NULL); > + if (head < 0) { > + vq_err(vq, "%s(): error %d getting buffer\n", > + __func__, head); > + return head; > + } > + > + /* Nothing new? */ > + if (head == vq->num) > + return head; > + > + if (vq == &vr->vq[VIRTIO_RPMSG_RESPONSE]) { > + if (out) { > + vq_err(vq, "%s(): invalid %d output in response queue\n", > + __func__, out); > + goto return_buf; > + } > + > + *cnt = in; > + } > + > + if (vq == &vr->vq[VIRTIO_RPMSG_REQUEST]) { > + if (in) { > + vq_err(vq, "%s(): invalid %d input in request queue\n", > + __func__, in); > + goto return_buf; > + } > + > + *cnt = out; > + } > + > + return head; > + > +return_buf: > + vhost_add_used(vq, head, 0); > + > + return -EINVAL; > +} > + > +static const struct vhost_rpmsg_ept *vhost_rpmsg_ept_find(struct vhost_rpmsg *vr, int addr) > +{ > + unsigned int i; > + > + for (i = 0; i < vr->n_epts; i++) > + if (vr->ept[i].addr == addr) > + return vr->ept + i; > + > + return NULL; > +} > + > +/* > + * if len < 0, then for reading a request, the complete virtual queue buffer > + * size is prepared, for sending a response, the length in the iterator is used > + */ > +int vhost_rpmsg_start_lock(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter, > + unsigned int qid, ssize_t len) > + __acquires(vq->mutex) > +{ > + struct vhost_virtqueue *vq = vr->vq + qid; > + unsigned int cnt; > + ssize_t ret; > + size_t tmp; > + > + if (qid >= VIRTIO_RPMSG_NUM_OF_VQS) > + return -EINVAL; > + > + iter->vq = vq; > + > + mutex_lock(&vq->mutex); > + vhost_disable_notify(&vr->dev, vq); > + > + iter->head = vhost_rpmsg_get_msg(vq, &cnt); > + if (iter->head == vq->num) > + iter->head = -EAGAIN; > + > + if (iter->head < 0) { > + ret = iter->head; > + goto unlock; > + } > + > + tmp = iov_length(vq->iov, cnt); > + if (tmp < sizeof(iter->rhdr)) { > + vq_err(vq, "%s(): size %zu too small\n", __func__, tmp); > + ret = -ENOBUFS; > + goto return_buf; > + } > + > + switch (qid) { > + case VIRTIO_RPMSG_REQUEST: > + if (len >= 0) { > + if (tmp < sizeof(iter->rhdr) + len) { > + ret = -ENOBUFS; > + goto return_buf; > + } > + > + tmp = len + sizeof(iter->rhdr); > + } > + > + /* len is now the size of the payload */ > + iov_iter_init(&iter->iov_iter, WRITE, vq->iov, cnt, tmp); > + > + /* Read the RPMSG header with endpoint addresses */ > + tmp = copy_from_iter(&iter->rhdr, sizeof(iter->rhdr), &iter->iov_iter); > + if (tmp != sizeof(iter->rhdr)) { > + vq_err(vq, "%s(): got %zu instead of %zu\n", __func__, > + tmp, sizeof(iter->rhdr)); > + ret = -EIO; > + goto return_buf; > + } > + > + iter->ept = vhost_rpmsg_ept_find(vr, vhost32_to_cpu(vq, iter->rhdr.dst)); > + if (!iter->ept) { > + vq_err(vq, "%s(): no endpoint with address %d\n", > + __func__, vhost32_to_cpu(vq, iter->rhdr.dst)); > + ret = -ENOENT; > + goto return_buf; > + } > + > + /* Let the endpoint read the payload */ > + if (iter->ept->read) { > + ret = iter->ept->read(vr, iter); > + if (ret < 0) > + goto return_buf; > + > + iter->rhdr.len = cpu_to_vhost16(vq, ret); > + } else { > + iter->rhdr.len = 0; > + } > + > + /* Prepare for the response phase */ > + iter->rhdr.dst = iter->rhdr.src; > + iter->rhdr.src = cpu_to_vhost32(vq, iter->ept->addr); I'm a little puzzled here - what will the response look like? And why is it prepared here? From what I can see doing so introduces coupling with function handle_rpmsg_req_single(). I think confirmation of reception should be handled by endpoints rather than in the core. > + > + break; > + case VIRTIO_RPMSG_RESPONSE: > + if (!iter->ept && iter->rhdr.dst != cpu_to_vhost32(vq, RPMSG_NS_ADDR)) { > + /* > + * Usually the iterator is configured when processing a > + * message on the request queue, but it's also possible > + * to send a message on the response queue without a > + * preceding request, in that case the iterator must > + * contain source and destination addresses. > + */ > + iter->ept = vhost_rpmsg_ept_find(vr, vhost32_to_cpu(vq, iter->rhdr.src)); > + if (!iter->ept) { > + ret = -ENOENT; > + goto return_buf; > + } > + } > + > + if (len >= 0) { > + if (tmp < sizeof(iter->rhdr) + len) { > + ret = -ENOBUFS; > + goto return_buf; > + } > + > + iter->rhdr.len = cpu_to_vhost16(vq, len); > + tmp = len + sizeof(iter->rhdr); > + } > + > + /* len is now the size of the payload */ > + iov_iter_init(&iter->iov_iter, READ, vq->iov, cnt, tmp); > + > + /* Write the RPMSG header with endpoint addresses */ > + tmp = copy_to_iter(&iter->rhdr, sizeof(iter->rhdr), &iter->iov_iter); > + if (tmp != sizeof(iter->rhdr)) { > + ret = -EIO; > + goto return_buf; > + } > + > + /* Let the endpoint write the payload */ I would specifically mention that namespace payloads are taken care of by vhost_rpmsg_ns_announce(). That makes it easier for people to connect the dots. > + if (iter->ept && iter->ept->write) { > + ret = iter->ept->write(vr, iter); > + if (ret < 0) > + goto return_buf; > + } > + > + break; > + } > + > + return 0; > + > +return_buf: > + vhost_add_used(vq, iter->head, 0); > +unlock: > + vhost_enable_notify(&vr->dev, vq); > + mutex_unlock(&vq->mutex); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_start_lock); > + > +size_t vhost_rpmsg_copy(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter, > + void *data, size_t size) > +{ > + /* > + * We could check for excess data, but copy_{to,from}_iter() don't do > + * that either > + */ > + if (iter->vq == vr->vq + VIRTIO_RPMSG_RESPONSE) > + return copy_to_iter(data, size, &iter->iov_iter); > + > + return copy_from_iter(data, size, &iter->iov_iter); > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_copy); > + > +int vhost_rpmsg_finish_unlock(struct vhost_rpmsg *vr, > + struct vhost_rpmsg_iter *iter) > + __releases(vq->mutex) > +{ > + if (iter->head >= 0) > + vhost_add_used_and_signal(iter->vq->dev, iter->vq, iter->head, > + vhost16_to_cpu(iter->vq, iter->rhdr.len) + > + sizeof(iter->rhdr)); > + > + vhost_enable_notify(&vr->dev, iter->vq); > + mutex_unlock(&iter->vq->mutex); > + > + return iter->head; > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_finish_unlock); > + > +/* > + * Return false to terminate the external loop only if we fail to obtain either > + * a request or a response buffer > + */ > +static bool handle_rpmsg_req_single(struct vhost_rpmsg *vr, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_rpmsg_iter iter; > + int ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_REQUEST, -EINVAL); > + if (!ret) > + ret = vhost_rpmsg_finish_unlock(vr, &iter); > + if (ret < 0) { > + if (ret != -EAGAIN) > + vq_err(vq, "%s(): RPMSG processing failed %d\n", > + __func__, ret); > + return false; > + } > + > + if (!iter.ept->write) > + return true; > + > + ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_RESPONSE, -EINVAL); > + if (!ret) > + ret = vhost_rpmsg_finish_unlock(vr, &iter); > + if (ret < 0) { > + vq_err(vq, "%s(): RPMSG finalising failed %d\n", __func__, ret); > + return false; > + } As I said before dealing with the "response" queue here seems to be introducing coupling with vhost_rpmsg_start_lock()... Endpoints should be doing that. > + > + return true; > +} > + > +static void handle_rpmsg_req_kick(struct vhost_work *work) > +{ > + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, > + poll.work); > + struct vhost_rpmsg *vr = container_of(vq->dev, struct vhost_rpmsg, dev); > + > + while (handle_rpmsg_req_single(vr, vq)) > + ; > +} > + > +/* > + * initialise two virtqueues with an array of endpoints, > + * request and response callbacks > + */ > +void vhost_rpmsg_init(struct vhost_rpmsg *vr, const struct vhost_rpmsg_ept *ept, > + unsigned int n_epts) > +{ > + unsigned int i; > + > + for (i = 0; i < ARRAY_SIZE(vr->vq); i++) > + vr->vq_p[i] = &vr->vq[i]; > + > + /* vq[0]: host -> guest, vq[1]: host <- guest */ > + vr->vq[VIRTIO_RPMSG_REQUEST].handle_kick = handle_rpmsg_req_kick; > + vr->vq[VIRTIO_RPMSG_RESPONSE].handle_kick = NULL; The comment depicts vq[0] followed by vq[1] but the code initialise vq[1] before vq[0], which is wildly confusing. At the very least this should be: vr->vq[VIRTIO_RPMSG_RESPONSE].handle_kick = NULL; vr->vq[VIRTIO_RPMSG_REQUEST].handle_kick = handle_rpmsg_req_kick; And even better: /* See configuration of *vq_cbs[] in rpmsg_probe() */ vr->vq[VIRTIO_RPMSG_TX].handle_kick = NULL; vr->vq[VIRTIO_RPMSG_RX].handle_kick = handle_rpmsg_req_kick; > + > + vr->ept = ept; > + vr->n_epts = n_epts; > + > + vhost_dev_init(&vr->dev, vr->vq_p, VIRTIO_RPMSG_NUM_OF_VQS, > + UIO_MAXIOV, 0, 0, true, NULL); > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_init); > + > +void vhost_rpmsg_destroy(struct vhost_rpmsg *vr) > +{ > + if (vhost_dev_has_owner(&vr->dev)) > + vhost_poll_flush(&vr->vq[VIRTIO_RPMSG_REQUEST].poll); > + > + vhost_dev_cleanup(&vr->dev); > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_destroy); > + > +/* send namespace */ > +int vhost_rpmsg_ns_announce(struct vhost_rpmsg *vr, const char *name, unsigned int src) > +{ > + struct vhost_virtqueue *vq = &vr->vq[VIRTIO_RPMSG_RESPONSE]; > + struct vhost_rpmsg_iter iter = { > + .rhdr = { > + .src = 0, > + .dst = cpu_to_vhost32(vq, RPMSG_NS_ADDR), > + }, > + }; > + struct rpmsg_ns_msg ns = { > + .addr = cpu_to_vhost32(vq, src), > + .flags = cpu_to_vhost32(vq, RPMSG_NS_CREATE), /* for rpmsg_ns_cb() */ > + }; Here we have to assume the source can be found in the endpoints registered in vhost_rpmsg_init(). I would put a check to make sure that is the case and return an error otherwise. > + int ret = vhost_rpmsg_start_lock(vr, &iter, VIRTIO_RPMSG_RESPONSE, sizeof(ns)); > + > + if (ret < 0) > + return ret; > + > + strlcpy(ns.name, name, sizeof(ns.name)); > + > + ret = vhost_rpmsg_copy(vr, &iter, &ns, sizeof(ns)); > + if (ret != sizeof(ns)) > + vq_err(iter.vq, "%s(): added %d instead of %zu bytes\n", > + __func__, ret, sizeof(ns)); > + > + ret = vhost_rpmsg_finish_unlock(vr, &iter); > + if (ret < 0) > + vq_err(iter.vq, "%s(): namespace announcement failed: %d\n", > + __func__, ret); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(vhost_rpmsg_ns_announce); > + > +MODULE_LICENSE("GPL v2"); > +MODULE_AUTHOR("Intel, Inc."); > +MODULE_DESCRIPTION("Vhost RPMsg API"); > diff --git a/drivers/vhost/vhost_rpmsg.h b/drivers/vhost/vhost_rpmsg.h > new file mode 100644 > index 000000000000..c020ea14cd16 > --- /dev/null > +++ b/drivers/vhost/vhost_rpmsg.h > @@ -0,0 +1,74 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright(c) 2020 Intel Corporation. All rights reserved. > + * > + * Author: Guennadi Liakhovetski <guennadi.liakhovetski@xxxxxxxxxxxxxxx> > + */ > + > +#ifndef VHOST_RPMSG_H > +#define VHOST_RPMSG_H > + > +#include <linux/rpmsg/virtio.h> > +#include <linux/uio.h> > + > +#include "vhost.h" > + > +/* RPMsg uses two VirtQueues: one for each direction */ > +enum { > + VIRTIO_RPMSG_RESPONSE, /* RPMsg response (host->guest) buffers */ > + VIRTIO_RPMSG_REQUEST, /* RPMsg request (guest->host) buffers */ As I said above things would be much clearer if this was VIRTIO_RPMSG_TX and VIRTIO_RPMSG_RX. I won't be commenting on the mechanic needed to access and send information on the virtqueues as it is completely foreign to me. Other than the above I think this is going somewhere. Thanks, Mathieu > + /* Keep last */ > + VIRTIO_RPMSG_NUM_OF_VQS, > +}; > + > +struct vhost_rpmsg_ept; > + > +struct vhost_rpmsg_iter { > + struct iov_iter iov_iter; > + struct rpmsg_hdr rhdr; > + struct vhost_virtqueue *vq; > + const struct vhost_rpmsg_ept *ept; > + int head; > + void *priv; > +}; > + > +struct vhost_rpmsg { > + struct vhost_dev dev; > + struct vhost_virtqueue vq[VIRTIO_RPMSG_NUM_OF_VQS]; > + struct vhost_virtqueue *vq_p[VIRTIO_RPMSG_NUM_OF_VQS]; > + const struct vhost_rpmsg_ept *ept; > + unsigned int n_epts; > +}; > + > +struct vhost_rpmsg_ept { > + ssize_t (*read)(struct vhost_rpmsg *, struct vhost_rpmsg_iter *); > + ssize_t (*write)(struct vhost_rpmsg *, struct vhost_rpmsg_iter *); > + int addr; > +}; > + > +static inline size_t vhost_rpmsg_iter_len(const struct vhost_rpmsg_iter *iter) > +{ > + return iter->rhdr.len; > +} > + > +#define VHOST_RPMSG_ITER(_vq, _src, _dst) { \ > + .rhdr = { \ > + .src = cpu_to_vhost32(_vq, _src), \ > + .dst = cpu_to_vhost32(_vq, _dst), \ > + }, \ > + } > + > +void vhost_rpmsg_init(struct vhost_rpmsg *vr, const struct vhost_rpmsg_ept *ept, > + unsigned int n_epts); > +void vhost_rpmsg_destroy(struct vhost_rpmsg *vr); > +int vhost_rpmsg_ns_announce(struct vhost_rpmsg *vr, const char *name, > + unsigned int src); > +int vhost_rpmsg_start_lock(struct vhost_rpmsg *vr, > + struct vhost_rpmsg_iter *iter, > + unsigned int qid, ssize_t len); > +size_t vhost_rpmsg_copy(struct vhost_rpmsg *vr, struct vhost_rpmsg_iter *iter, > + void *data, size_t size); > +int vhost_rpmsg_finish_unlock(struct vhost_rpmsg *vr, > + struct vhost_rpmsg_iter *iter); > + > +#endif > -- > 2.28.0 >