On Tue, 2020-12-29 at 12:00 +0000, Stefan Hajnoczi wrote: > On Tue, Dec 29, 2020 at 01:02:44PM +0300, Elena Afanasova wrote: > > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx> > > --- > > virt/kvm/ioregion.c | 157 > > ++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 157 insertions(+) > > > > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c > > index a200c3761343..8523f4126337 100644 > > --- a/virt/kvm/ioregion.c > > +++ b/virt/kvm/ioregion.c > > @@ -4,6 +4,33 @@ > > #include <kvm/iodev.h> > > #include "eventfd.h" > > > > +/* Wire protocol */ > > +struct ioregionfd_cmd { > > + __u32 info; > > + __u32 padding; > > + __u64 user_data; > > + __u64 offset; > > + __u64 data; > > +}; > > + > > +struct ioregionfd_resp { > > + __u64 data; > > + __u8 pad[24]; > > +}; > > + > > +#define IOREGIONFD_CMD_READ 0 > > +#define IOREGIONFD_CMD_WRITE 1 > > + > > +#define IOREGIONFD_SIZE_8BIT 0 > > +#define IOREGIONFD_SIZE_16BIT 1 > > +#define IOREGIONFD_SIZE_32BIT 2 > > +#define IOREGIONFD_SIZE_64BIT 3 > > + > > +#define IOREGIONFD_SIZE_OFFSET 4 > > +#define IOREGIONFD_RESP_OFFSET 6 > > +#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET) > > +#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET) > > These belong in the uapi header so userspace also has struct > ioregionfd_cmd, struct ioregionfd_resp, etc. > I'll move the wire protocol to a separate uapi header > > + > > void > > kvm_ioregionfd_init(struct kvm *kvm) > > { > > @@ -38,10 +65,100 @@ ioregion_release(struct ioregion *p) > > kfree(p); > > } > > > > +static bool > > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt, > > bool resp, > > + u64 user_data, const void *val) > > +{ > > + u64 size = 0; > > + > > + switch (len) { > > + case 1: > > + size = IOREGIONFD_SIZE_8BIT; > > + *((u8 *)&cmd->data) = val ? *(u8 *)val : 0; > > + break; > > + case 2: > > + size = IOREGIONFD_SIZE_16BIT; > > + *((u16 *)&cmd->data) = val ? *(u16 *)val : 0; > > + break; > > + case 4: > > + size = IOREGIONFD_SIZE_32BIT; > > + *((u32 *)&cmd->data) = val ? *(u32 *)val : 0; > > + break; > > + case 8: > > + size = IOREGIONFD_SIZE_64BIT; > > + *((u64 *)&cmd->data) = val ? *(u64 *)val : 0; > > + break; > > + default: > > + return false; > > > > The assignments and casts can be replaced with a single memcpy after > the > switch statement. This is also how KVM_EXIT_MMIO and Coalesced MMIO > do > it: > > memcpy(cmd->data, val, len); > Thanks for pointing it out > However, we need to make sure that cmd has been zeroed so that kernel > memory is not accidentally exposed to userspace. > > + } > > + cmd->user_data = user_data; > > + cmd->offset = offset; > > + cmd->info |= opt; > > + cmd->info |= IOREGIONFD_SIZE(size); > > + cmd->info |= IOREGIONFD_RESP(resp); > > + > > + return true; > > +} > > + > > static int > > ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, > > gpa_t addr, > > int len, void *val) > > { > > + struct ioregion *p = to_ioregion(this); > > + struct ioregionfd_cmd *cmd; > > + struct ioregionfd_resp *resp; > > + size_t buf_size; > > + void *buf; > > + int ret = 0; > > + > > + if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags & > > O_NONBLOCK)) > > + return -EINVAL; > > Another CPU could change file descriptor flags while we are running. > Therefore it might be simplest to handle kernel_write() and > kernel_read() -EAGAIN return values instead of trying to check. > Ok, I'll fix this > > + if ((addr + len - 1) > (p->paddr + p->size - 1)) > > + return -EINVAL; > > + > > + buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp)); > > + buf = kzalloc(buf_size, GFP_KERNEL); > > + if (!buf) > > + return -ENOMEM; > > + cmd = (struct ioregionfd_cmd *)buf; > > + resp = (struct ioregionfd_resp *)buf; > > I think they are small enough to declare them on the stack: > > union { > struct ioregionfd_cmd cmd; > struct ioregionfd_resp resp; > } buf; > > memset(&buf, 0, sizeof(buf)); > Ok > > + if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ, > > + 1, p->user_data, NULL)) { > > + kfree(buf); > > + return -EOPNOTSUPP; > > + } > > + > > + ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0); > > + if (ret != sizeof(*cmd)) { > > + kfree(buf); > > + return (ret < 0) ? ret : -EIO; > > + } > > + memset(buf, 0, buf_size); > > + ret = kernel_read(p->rf, resp, sizeof(*resp), 0); > > + if (ret != sizeof(*resp)) { > > + kfree(buf); > > + return (ret < 0) ? ret : -EIO; > > + } > > + > > + switch (len) { > > + case 1: > > + *(u8 *)val = (u8)resp->data; > > + break; > > + case 2: > > + *(u16 *)val = (u16)resp->data; > > + break; > > + case 4: > > + *(u32 *)val = (u32)resp->data; > > + break; > > + case 8: > > + *(u64 *)val = (u64)resp->data; > > + break; > > + default: > > + break; > > + } > > This looks inconsistent. cmd->data is treated as a packed > u8/u16/u32/864 > whereas resp->data is treated as u64? > > I was expecting memcpy(val, &resp->data, len) here not the u64 -> > u8/u16/u32/u64 conversion. I'll fix this, thanks!