On Tue, 2021-03-09 at 14:19 +0800, Jason Wang wrote: > On 2021/2/21 8:04 下午, Elena Afanasova wrote: > > Add ioregionfd blocking read/write operations. > > > > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx> > > --- > > v3: > > - change wire protocol license > > - remove ioregionfd_cmd info and drop appropriate macros > > - fix ioregionfd state machine > > - add sizeless ioregions support > > - drop redundant check in ioregion_read/write() > > > > include/uapi/linux/ioregion.h | 30 +++++++ > > virt/kvm/ioregion.c | 162 > > +++++++++++++++++++++++++++++++++- > > 2 files changed, 190 insertions(+), 2 deletions(-) > > create mode 100644 include/uapi/linux/ioregion.h > > > > diff --git a/include/uapi/linux/ioregion.h > > b/include/uapi/linux/ioregion.h > > new file mode 100644 > > index 000000000000..58f9b5ba6186 > > --- /dev/null > > +++ b/include/uapi/linux/ioregion.h > > @@ -0,0 +1,30 @@ > > +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall- > > note) OR BSD-3-Clause) */ > > +#ifndef _UAPI_LINUX_IOREGION_H > > +#define _UAPI_LINUX_IOREGION_H > > + > > +/* Wire protocol */ > > + > > +struct ioregionfd_cmd { > > + __u8 cmd; > > + __u8 size_exponent : 4; > > + __u8 resp : 1; > > + __u8 padding[6]; > > + __u64 user_data; > > + __u64 offset; > > + __u64 data; > > +}; > > Sorry if I've asked this before. Do we need a id for each > request/response? E.g an ioregion fd could be used by multiple > vCPUS. > VCPU needs to have a way to find which request belongs to itself or > not? > I don’t think the id is necessary here since all requests/responses are serialized. > > > + > > +struct ioregionfd_resp { > > + __u64 data; > > + __u8 pad[24]; > > +}; > > + > > +#define IOREGIONFD_CMD_READ 0 > > +#define IOREGIONFD_CMD_WRITE 1 > > + > > +#define IOREGIONFD_SIZE_8BIT 0 > > +#define IOREGIONFD_SIZE_16BIT 1 > > +#define IOREGIONFD_SIZE_32BIT 2 > > +#define IOREGIONFD_SIZE_64BIT 3 > > + > > +#endif > > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c > > index e09ef3e2c9d7..1e1c7772d274 100644 > > --- a/virt/kvm/ioregion.c > > +++ b/virt/kvm/ioregion.c > > @@ -3,6 +3,7 @@ > > #include <linux/fs.h> > > #include <kvm/iodev.h> > > #include "eventfd.h" > > +#include <uapi/linux/ioregion.h> > > > > void > > kvm_ioregionfd_init(struct kvm *kvm) > > @@ -40,18 +41,175 @@ ioregion_release(struct ioregion *p) > > kfree(p); > > } > > > > +static bool > > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, u8 opt, > > u8 resp, > > + u64 user_data, const void *val) > > +{ > > + switch (len) { > > + case 0: > > + break; > > + case 1: > > + cmd->size_exponent = IOREGIONFD_SIZE_8BIT; > > + break; > > + case 2: > > + cmd->size_exponent = IOREGIONFD_SIZE_16BIT; > > + break; > > + case 4: > > + cmd->size_exponent = IOREGIONFD_SIZE_32BIT; > > + break; > > + case 8: > > + cmd->size_exponent = IOREGIONFD_SIZE_64BIT; > > + break; > > + default: > > + return false; > > + } > > + > > + if (val) > > + memcpy(&cmd->data, val, len); > > + cmd->user_data = user_data; > > + cmd->offset = offset; > > + cmd->cmd = opt; > > + cmd->resp = resp; > > + > > + return true; > > +} > > + > > +enum { > > + SEND_CMD, > > + GET_REPLY, > > + COMPLETE > > +}; > > + > > +static void > > +ioregion_save_ctx(struct kvm_vcpu *vcpu, bool in, gpa_t addr, u8 > > state, void *val) > > +{ > > + vcpu->ioregion_ctx.is_interrupted = true; > > + vcpu->ioregion_ctx.val = val; > > + vcpu->ioregion_ctx.state = state; > > + vcpu->ioregion_ctx.addr = addr; > > + vcpu->ioregion_ctx.in = in; > > +} > > + > > static int > > ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, > > gpa_t addr, > > int len, void *val) > > { > > - return -EOPNOTSUPP; > > + struct ioregion *p = to_ioregion(this); > > + union { > > + struct ioregionfd_cmd cmd; > > + struct ioregionfd_resp resp; > > + } buf; > > + int ret = 0; > > + int state = SEND_CMD; > > + > > + if (unlikely(vcpu->ioregion_ctx.is_interrupted)) { > > + vcpu->ioregion_ctx.is_interrupted = false; > > + > > + switch (vcpu->ioregion_ctx.state) { > > + case SEND_CMD: > > + goto send_cmd; > > + case GET_REPLY: > > + goto get_repl; > > + default: > > + return -EINVAL; > > + } > > + } > > + > > +send_cmd: > > + memset(&buf, 0, sizeof(buf)); > > + if (!pack_cmd(&buf.cmd, addr - p->paddr, len, > > IOREGIONFD_CMD_READ, > > + 1, p->user_data, NULL)) > > + return -EOPNOTSUPP; > > + > > + ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0); > > + state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD; > > + if (signal_pending(current) && state == SEND_CMD) { > > Can the signal be delivered after a success of kernel_write()? If > yes, > is there any side effect if we want to redo the write here? > Yes, in this case the signal should be handled by KVM. There can be a side effect when ioregion fd is broken and there is a pending signal that isn’t related to ioregionfd. But this doesn’t seem to be an issue since it can be handled later in ioregionfd complete operations. > > > + ioregion_save_ctx(vcpu, 1, addr, state, val); > > + return -EINTR; > > + } > > + if (ret != sizeof(buf.cmd)) { > > + ret = (ret < 0) ? ret : -EIO; > > + return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? > > -EINVAL : ret; > > + } > > + if (!p->rf) > > + return 0; > > + > > +get_repl: > > + memset(&buf, 0, sizeof(buf)); > > + ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), 0); > > + state = (ret == sizeof(buf.resp)) ? COMPLETE : GET_REPLY; > > + if (signal_pending(current) && state == GET_REPLY) { > > + ioregion_save_ctx(vcpu, 1, addr, state, val); > > + return -EINTR; > > + } > > + if (ret != sizeof(buf.resp)) { > > + ret = (ret < 0) ? ret : -EIO; > > + return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? > > -EINVAL : ret; > > + } > > We may need to unify the duplicated codes here with send_cmd. > Yes, will refactor. Thank you > > > + > > + memcpy(val, &buf.resp.data, len); > > + > > + return 0; > > } > > > > static int > > ioregion_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, > > gpa_t addr, > > int len, const void *val) > > { > > - return -EOPNOTSUPP; > > + struct ioregion *p = to_ioregion(this); > > + union { > > + struct ioregionfd_cmd cmd; > > + struct ioregionfd_resp resp; > > + } buf; > > + int ret = 0; > > + int state = SEND_CMD; > > + > > + if (unlikely(vcpu->ioregion_ctx.is_interrupted)) { > > + vcpu->ioregion_ctx.is_interrupted = false; > > + > > + switch (vcpu->ioregion_ctx.state) { > > + case SEND_CMD: > > + goto send_cmd; > > + case GET_REPLY: > > + goto get_repl; > > + default: > > + return -EINVAL; > > + } > > + } > > + > > +send_cmd: > > + memset(&buf, 0, sizeof(buf)); > > + if (!pack_cmd(&buf.cmd, addr - p->paddr, len, > > IOREGIONFD_CMD_WRITE, > > + p->posted_writes ? 0 : 1, p->user_data, val)) > > + return -EOPNOTSUPP; > > + > > + ret = kernel_write(p->wf, &buf.cmd, sizeof(buf.cmd), 0); > > + state = (ret == sizeof(buf.cmd)) ? GET_REPLY : SEND_CMD; > > + if (signal_pending(current) && state == SEND_CMD) { > > + ioregion_save_ctx(vcpu, 0, addr, state, (void *)val); > > + return -EINTR; > > + } > > + if (ret != sizeof(buf.cmd)) { > > + ret = (ret < 0) ? ret : -EIO; > > + return (ret == -EAGAIN || ret == -EWOULDBLOCK) ? > > -EINVAL : ret; > > + } > > + > > +get_repl: > > + if (!p->posted_writes) { > > + memset(&buf, 0, sizeof(buf)); > > + ret = kernel_read(p->rf, &buf.resp, sizeof(buf.resp), > > 0); > > + state = (ret == sizeof(buf.resp)) ? COMPLETE : > > GET_REPLY; > > + if (signal_pending(current) && state == GET_REPLY) { > > + ioregion_save_ctx(vcpu, 0, addr, state, (void > > *)val); > > + return -EINTR; > > + } > > + if (ret != sizeof(buf.resp)) { > > + ret = (ret < 0) ? ret : -EIO; > > + return (ret == -EAGAIN || ret == -EWOULDBLOCK) > > ? -EINVAL : ret; > > + } > > + } > > + > > + return 0; > > It looks to me we had more chance to unify the code with > ioregion_read(). > > Thanks > > > > } > > > > /*