On Tue, Dec 29, 2020 at 01:02:44PM +0300, Elena Afanasova wrote: > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx> > --- > virt/kvm/ioregion.c | 157 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 157 insertions(+) > > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c > index a200c3761343..8523f4126337 100644 > --- a/virt/kvm/ioregion.c > +++ b/virt/kvm/ioregion.c > @@ -4,6 +4,33 @@ > #include <kvm/iodev.h> > #include "eventfd.h" > > +/* Wire protocol */ > +struct ioregionfd_cmd { > + __u32 info; > + __u32 padding; > + __u64 user_data; > + __u64 offset; > + __u64 data; > +}; > + > +struct ioregionfd_resp { > + __u64 data; > + __u8 pad[24]; > +}; > + > +#define IOREGIONFD_CMD_READ 0 > +#define IOREGIONFD_CMD_WRITE 1 > + > +#define IOREGIONFD_SIZE_8BIT 0 > +#define IOREGIONFD_SIZE_16BIT 1 > +#define IOREGIONFD_SIZE_32BIT 2 > +#define IOREGIONFD_SIZE_64BIT 3 > + > +#define IOREGIONFD_SIZE_OFFSET 4 > +#define IOREGIONFD_RESP_OFFSET 6 > +#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET) > +#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET) These belong in the uapi header so userspace also has struct ioregionfd_cmd, struct ioregionfd_resp, etc. > + > void > kvm_ioregionfd_init(struct kvm *kvm) > { > @@ -38,10 +65,100 @@ ioregion_release(struct ioregion *p) > kfree(p); > } > > +static bool > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt, bool resp, > + u64 user_data, const void *val) > +{ > + u64 size = 0; > + > + switch (len) { > + case 1: > + size = IOREGIONFD_SIZE_8BIT; > + *((u8 *)&cmd->data) = val ? *(u8 *)val : 0; > + break; > + case 2: > + size = IOREGIONFD_SIZE_16BIT; > + *((u16 *)&cmd->data) = val ? *(u16 *)val : 0; > + break; > + case 4: > + size = IOREGIONFD_SIZE_32BIT; > + *((u32 *)&cmd->data) = val ? *(u32 *)val : 0; > + break; > + case 8: > + size = IOREGIONFD_SIZE_64BIT; > + *((u64 *)&cmd->data) = val ? *(u64 *)val : 0; > + break; > + default: > + return false; > The assignments and casts can be replaced with a single memcpy after the switch statement. This is also how KVM_EXIT_MMIO and Coalesced MMIO do it: memcpy(cmd->data, val, len); However, we need to make sure that cmd has been zeroed so that kernel memory is not accidentally exposed to userspace. + } > + cmd->user_data = user_data; > + cmd->offset = offset; > + cmd->info |= opt; > + cmd->info |= IOREGIONFD_SIZE(size); > + cmd->info |= IOREGIONFD_RESP(resp); > + > + return true; > +} > + > static int > ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, > int len, void *val) > { > + struct ioregion *p = to_ioregion(this); > + struct ioregionfd_cmd *cmd; > + struct ioregionfd_resp *resp; > + size_t buf_size; > + void *buf; > + int ret = 0; > + > + if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags & O_NONBLOCK)) > + return -EINVAL; Another CPU could change file descriptor flags while we are running. Therefore it might be simplest to handle kernel_write() and kernel_read() -EAGAIN return values instead of trying to check. > + if ((addr + len - 1) > (p->paddr + p->size - 1)) > + return -EINVAL; > + > + buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp)); > + buf = kzalloc(buf_size, GFP_KERNEL); > + if (!buf) > + return -ENOMEM; > + cmd = (struct ioregionfd_cmd *)buf; > + resp = (struct ioregionfd_resp *)buf; I think they are small enough to declare them on the stack: union { struct ioregionfd_cmd cmd; struct ioregionfd_resp resp; } buf; memset(&buf, 0, sizeof(buf)); > + if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ, > + 1, p->user_data, NULL)) { > + kfree(buf); > + return -EOPNOTSUPP; > + } > + > + ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0); > + if (ret != sizeof(*cmd)) { > + kfree(buf); > + return (ret < 0) ? ret : -EIO; > + } > + memset(buf, 0, buf_size); > + ret = kernel_read(p->rf, resp, sizeof(*resp), 0); > + if (ret != sizeof(*resp)) { > + kfree(buf); > + return (ret < 0) ? ret : -EIO; > + } > + > + switch (len) { > + case 1: > + *(u8 *)val = (u8)resp->data; > + break; > + case 2: > + *(u16 *)val = (u16)resp->data; > + break; > + case 4: > + *(u32 *)val = (u32)resp->data; > + break; > + case 8: > + *(u64 *)val = (u64)resp->data; > + break; > + default: > + break; > + } This looks inconsistent. cmd->data is treated as a packed u8/u16/u32/864 whereas resp->data is treated as u64? I was expecting memcpy(val, &resp->data, len) here not the u64 -> u8/u16/u32/u64 conversion.
Attachment:
signature.asc
Description: PGP signature