Re: [RFC v3 3/5] KVM: implement wire protocol

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2021-03-09 at 14:19 +0800, Jason Wang wrote:
> On 2021/2/21 8:04 下午, Elena Afanasova wrote:
> > Add ioregionfd blocking read/write operations.
> > 
> > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx>
> > ---
> > v3:
> >   - change wire protocol license
> >   - remove ioregionfd_cmd info and drop appropriate macros
> >   - fix ioregionfd state machine
> >   - add sizeless ioregions support
> >   - drop redundant check in ioregion_read/write()
> > 
> >   include/uapi/linux/ioregion.h |  30 +++++++
> >   virt/kvm/ioregion.c           | 162
> > +++++++++++++++++++++++++++++++++-
> >   2 files changed, 190 insertions(+), 2 deletions(-)
> >   create mode 100644 include/uapi/linux/ioregion.h
> > 
> > diff --git a/include/uapi/linux/ioregion.h
> > b/include/uapi/linux/ioregion.h
> > new file mode 100644
> > index 000000000000..58f9b5ba6186
> > --- /dev/null
> > +++ b/include/uapi/linux/ioregion.h
> > @@ -0,0 +1,30 @@
> > +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-
> > note) OR BSD-3-Clause) */
> > +#ifndef _UAPI_LINUX_IOREGION_H
> > +#define _UAPI_LINUX_IOREGION_H
> > +
> > +/* Wire protocol */
> > +
> > +struct ioregionfd_cmd {
> > +	__u8 cmd;
> > +	__u8 size_exponent : 4;
> > +	__u8 resp : 1;
> > +	__u8 padding[6];
> > +	__u64 user_data;
> > +	__u64 offset;
> > +	__u64 data;
> > +};
> 
> Sorry if I've asked this before. Do we need a id for each 
> request/response? E.g an ioregion fd could be used by multiple
> vCPUS. 
> VCPU needs to have a way to find which request belongs to itself or
> not?
> 
I don’t think the id is necessary here since all requests/responses are
serialized.

> 
> > +
> > +struct ioregionfd_resp {
> > +	__u64 data;
> > +	__u8 pad[24];
> > +};
> > +
> > +#define IOREGIONFD_CMD_READ    0
> > +#define IOREGIONFD_CMD_WRITE   1
> > +
> > +#define IOREGIONFD_SIZE_8BIT   0
> > +#define IOREGIONFD_SIZE_16BIT  1
> > +#define IOREGIONFD_SIZE_32BIT  2
> > +#define IOREGIONFD_SIZE_64BIT  3
> > +
> > +#endif
> > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> > index e09ef3e2c9d7..1e1c7772d274 100644
> > --- a/virt/kvm/ioregion.c
> > +++ b/virt/kvm/ioregion.c
> > @@ -3,6 +3,7 @@
> >   #include <linux/fs.h>
> >   #include <kvm/iodev.h>
> >   #include "eventfd.h"
> > +#include <uapi/linux/ioregion.h>
> >   
> >   void
> >   kvm_ioregionfd_init(struct kvm *kvm)
> > @@ -40,18 +41,175 @@ ioregion_release(struct ioregion *p)
> >   	kfree(p);
> >   }
> >   
> > +static bool
> > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, u8 opt,
> > u8 resp,
> > +	 u64 user_data, const void *val)
> > +{
> > +	switch (len) {
> > +	case 0:
> > +		break;
> > +	case 1:
> > +		cmd->size_exponent = IOREGIONFD_SIZE_8BIT;
> > +		break;
> > +	case 2:
> > +		cmd->size_exponent = IOREGIONFD_SIZE_16BIT;
> > +		break;
> > +	case 4:
> > +		cmd->size_exponent = IOREGIONFD_SIZE_32BIT;
> > +		break;
> > +	case 8:
> > +		cmd->size_exponent = IOREGIONFD_SIZE_64BIT;
> > +		break;
> > +	default:
> > +		return false;
> > +	}
> > +
> > +	if (val)
> > +		memcpy(&cmd->data, val, len);
> > +	cmd->user_data = user_data;
> > +	cmd->offset = offset;
> > +	cmd->cmd = opt;
> > +	cmd->resp = resp;
> > +
> > +	return true;
> > +}
> > +
> > +enum {
> > +	SEND_CMD,
> > +	GET_REPLY,
> > +	COMPLETE
> > +};
> > +
> > +static void
> > +ioregion_save_ctx(struct kvm_vcpu *vcpu, bool in, gpa_t addr, u8
> > state, void *val)
> > +{
> > +	vcpu->ioregion_ctx.is_interrupted = true;
> > +	vcpu->ioregion_ctx.val = val;
> > +	vcpu->ioregion_ctx.state = state;
> > +	vcpu->ioregion_ctx.addr = addr;
> > +	vcpu->ioregion_ctx.in = in;
> > +}
> > +
> >   static int
> >   ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> > gpa_t addr,
> >   	      int len, void *val)
> >   {
> > -	return -EOPNOTSUPP;
> > +	struct ioregion *p = to_ioregion(this);
> > +	union {
> > +		struct ioregionfd_cmd cmd;
> > +		struct ioregionfd_resp resp;
> > +	} buf;
> > +	int ret = 0;
> > +	int state = SEND_CMD;
> > +
> > +	if (unlikely(vcpu->ioregion_ctx.is_interrupted)) {
> > +		vcpu->ioregion_ctx.is_interrupted = false;
> > +
> > +		switch (vcpu->ioregion_ctx.state) {
> > +		case SEND_CMD:
> > +			goto send_cmd;
> > +		case GET_REPLY:
> > +			goto get_repl;
> > +		default:
> > +			return -EINVAL;
> > +		}
> > +	}
> > +
> > +send_cmd:
> > +	memset(&buf, 0, sizeof(buf));
> > +	if (!pack_cmd(&buf.cmd, addr - p->paddr, len,
> > IOREGIONFD_CMD_READ,
> > +		      1, p->user_data, NULL))
> > +		return -EOPNOTSUPP;
> > +
> > +	ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
> > +	state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
> > +	if (signal_pending(current) && state == SEND_CMD) {
> 
> Can the signal be delivered after a success of kernel_write()? If
> yes, 
> is there any side effect if we want to redo the write here?
> 
Yes, in this case the signal should be handled by KVM. There can be a
side effect when ioregion fd is broken and there is a pending signal
that isn’t related to ioregionfd. But this doesn’t seem to be an issue
since it can be handled later in ioregionfd complete operations.

> 
> > +		ioregion_save_ctx(vcpu, 1, addr, state, val);
> > +		return -EINTR;
> > +	}
> > +	if (ret != sizeof(buf.cmd)) {
> > +		ret = (ret < 0) ? ret : -EIO;
> > +		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ?
> > -EINVAL : ret;
> > +	}
> > +	if (!p->rf)
> > +		return 0;
> > +
> > +get_repl:
> > +	memset(&buf, 0, sizeof(buf));
> > +	ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0);
> > +	state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY;
> > +	if (signal_pending(current) && state == GET_REPLY) {
> > +		ioregion_save_ctx(vcpu, 1, addr, state, val);
> > +		return -EINTR;
> > +	}
> > +	if (ret != sizeof(buf.resp)) {
> > +		ret = (ret < 0) ? ret : -EIO;
> > +		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ?
> > -EINVAL : ret;
> > +	}
> 
> We may need to unify the duplicated codes here with send_cmd.
> 
Yes, will refactor.

Thank you
> 
> > +
> > +	memcpy(val, &buf.resp.data, len);
> > +
> > +	return 0;
> >   }
> >   
> >   static int
> >   ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> > gpa_t addr,
> >   		int len, const void *val)
> >   {
> > -	return -EOPNOTSUPP;
> > +	struct ioregion *p = to_ioregion(this);
> > +	union {
> > +		struct ioregionfd_cmd cmd;
> > +		struct ioregionfd_resp resp;
> > +	} buf;
> > +	int ret = 0;
> > +	int state = SEND_CMD;
> > +
> > +	if (unlikely(vcpu->ioregion_ctx.is_interrupted)) {
> > +		vcpu->ioregion_ctx.is_interrupted = false;
> > +
> > +		switch (vcpu->ioregion_ctx.state) {
> > +		case SEND_CMD:
> > +			goto send_cmd;
> > +		case GET_REPLY:
> > +			goto get_repl;
> > +		default:
> > +			return -EINVAL;
> > +		}
> > +	}
> > +
> > +send_cmd:
> > +	memset(&buf, 0, sizeof(buf));
> > +	if (!pack_cmd(&buf.cmd, addr - p->paddr, len,
> > IOREGIONFD_CMD_WRITE,
> > +		      p->posted_writes ? 0 : 1, p->user_data, val))
> > +		return -EOPNOTSUPP;
> > +
> > +	ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0);
> > +	state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD;
> > +	if (signal_pending(current) && state == SEND_CMD) {
> > +		ioregion_save_ctx(vcpu, 0, addr, state, (void *)val);
> > +		return -EINTR;
> > +	}
> > +	if (ret != sizeof(buf.cmd)) {
> > +		ret = (ret < 0) ? ret : -EIO;
> > +		return (ret == -EAGAIN || ret == -EWOULDBLOCK) ?
> > -EINVAL : ret;
> > +	}
> > +
> > +get_repl:
> > +	if (!p->posted_writes) {
> > +		memset(&buf, 0, sizeof(buf));
> > +		ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp),
> > 0);
> > +		state = (ret == sizeof(buf.resp)) ? COMPLETE :
> > GET_REPLY;
> > +		if (signal_pending(current) && state == GET_REPLY) {
> > +			ioregion_save_ctx(vcpu, 0, addr, state, (void
> > *)val);
> > +			return -EINTR;
> > +		}
> > +		if (ret != sizeof(buf.resp)) {
> > +			ret = (ret < 0) ? ret : -EIO;
> > +			return (ret == -EAGAIN || ret == -EWOULDBLOCK)
> > ? -EINVAL : ret;
> > +		}
> > +	}
> > +
> > +	return 0;
> 
> It looks to me we had more chance to unify the code with
> ioregion_read().
> 
> Thanks
> 
> 
> >   }
> >   
> >   /*




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux