Re: [RFC 2/2] KVM: add initial support for ioregionfd blocking read/write operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2020-12-29 at 12:00 +0000, Stefan Hajnoczi wrote:
> On Tue, Dec 29, 2020 at 01:02:44PM +0300, Elena Afanasova wrote:
> > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx>
> > ---
> >  virt/kvm/ioregion.c | 157
> > ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 157 insertions(+)
> > 
> > diff --git a/virt/kvm/ioregion.c b/virt/kvm/ioregion.c
> > index a200c3761343..8523f4126337 100644
> > --- a/virt/kvm/ioregion.c
> > +++ b/virt/kvm/ioregion.c
> > @@ -4,6 +4,33 @@
> >  #include <kvm/iodev.h>
> >  #include "eventfd.h"
> >  
> > +/* Wire protocol */
> > +struct ioregionfd_cmd {
> > +	__u32 info;
> > +	__u32 padding;
> > +	__u64 user_data;
> > +	__u64 offset;
> > +	__u64 data;
> > +};
> > +
> > +struct ioregionfd_resp {
> > +	__u64 data;
> > +	__u8 pad[24];
> > +};
> > +
> > +#define IOREGIONFD_CMD_READ    0
> > +#define IOREGIONFD_CMD_WRITE   1
> > +
> > +#define IOREGIONFD_SIZE_8BIT   0
> > +#define IOREGIONFD_SIZE_16BIT  1
> > +#define IOREGIONFD_SIZE_32BIT  2
> > +#define IOREGIONFD_SIZE_64BIT  3
> > +
> > +#define IOREGIONFD_SIZE_OFFSET 4
> > +#define IOREGIONFD_RESP_OFFSET 6
> > +#define IOREGIONFD_SIZE(x) ((x) << IOREGIONFD_SIZE_OFFSET)
> > +#define IOREGIONFD_RESP(x) ((x) << IOREGIONFD_RESP_OFFSET)
> 
> These belong in the uapi header so userspace also has struct
> ioregionfd_cmd, struct ioregionfd_resp, etc.
> 
I'll move the wire protocol to a separate uapi header

> > +
> >  void
> >  kvm_ioregionfd_init(struct kvm *kvm)
> >  {
> > @@ -38,10 +65,100 @@ ioregion_release(struct ioregion *p)
> >  	kfree(p);
> >  }
> >  
> > +static bool
> > +pack_cmd(struct ioregionfd_cmd *cmd, u64 offset, u64 len, int opt,
> > bool resp,
> > +	 u64 user_data, const void *val)
> > +{
> > +	u64 size = 0;
> > +
> > +	switch (len) {
> > +	case 1:
> > +		size = IOREGIONFD_SIZE_8BIT;
> > +		*((u8 *)&cmd->data) = val ? *(u8 *)val : 0;
> > +		break;
> > +	case 2:
> > +		size = IOREGIONFD_SIZE_16BIT;
> > +		*((u16 *)&cmd->data) = val ? *(u16 *)val : 0;
> > +		break;
> > +	case 4:
> > +		size = IOREGIONFD_SIZE_32BIT;
> > +		*((u32 *)&cmd->data) = val ? *(u32 *)val : 0;
> > +		break;
> > +	case 8:
> > +		size = IOREGIONFD_SIZE_64BIT;
> > +		*((u64 *)&cmd->data) = val ? *(u64 *)val : 0;
> > +		break;
> > +	default:
> > +		return false;
> > 
> 
> The assignments and casts can be replaced with a single memcpy after
> the
> switch statement. This is also how KVM_EXIT_MMIO and Coalesced MMIO
> do
> it:
> 
>   memcpy(cmd->data, val, len);
> 
Thanks for pointing it out

> However, we need to make sure that cmd has been zeroed so that kernel
> memory is not accidentally exposed to userspace.
> 
>  +	}
> > +	cmd->user_data = user_data;
> > +	cmd->offset = offset;
> > +	cmd->info |= opt;
> > +	cmd->info |= IOREGIONFD_SIZE(size);
> > +	cmd->info |= IOREGIONFD_RESP(resp);
> > +
> > +	return true;
> > +}
> > +
> >  static int
> >  ioregion_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
> > gpa_t addr,
> >  	      int len, void *val)
> >  {
> > +	struct ioregion *p = to_ioregion(this);
> > +	struct ioregionfd_cmd *cmd;
> > +	struct ioregionfd_resp *resp;
> > +	size_t buf_size;
> > +	void *buf;
> > +	int ret = 0;
> > +
> > +	if ((p->rf->f_flags & O_NONBLOCK) || (p->wf->f_flags &
> > O_NONBLOCK))
> > +		return -EINVAL;
> 
> Another CPU could change file descriptor flags while we are running.
> Therefore it might be simplest to handle kernel_write() and
> kernel_read() -EAGAIN return values instead of trying to check.
> 
Ok, I'll fix this

> > +	if ((addr + len - 1) > (p->paddr + p->size - 1))
> > +		return -EINVAL;
> > +
> > +	buf_size = max_t(size_t, sizeof(*cmd), sizeof(*resp));
> > +	buf = kzalloc(buf_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +	cmd = (struct ioregionfd_cmd *)buf;
> > +	resp = (struct ioregionfd_resp *)buf;
> 
> I think they are small enough to declare them on the stack:
> 
>   union {
>       struct ioregionfd_cmd cmd;
>       struct ioregionfd_resp resp;
>   } buf;
> 
>   memset(&buf, 0, sizeof(buf));
> 
Ok

> > +	if (!pack_cmd(cmd, addr - p->paddr, len, IOREGIONFD_CMD_READ,
> > +		      1, p->user_data, NULL)) {
> > +		kfree(buf);
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	ret = kernel_write(p->wf, cmd, sizeof(*cmd), 0);
> > +	if (ret != sizeof(*cmd)) {
> > +		kfree(buf);
> > +		return (ret < 0) ? ret : -EIO;
> > +	}
> > +	memset(buf, 0, buf_size);
> > +	ret = kernel_read(p->rf, resp, sizeof(*resp), 0);
> > +	if (ret != sizeof(*resp)) {
> > +		kfree(buf);
> > +		return (ret < 0) ? ret : -EIO;
> > +	}
> > +
> > +	switch (len) {
> > +	case 1:
> > +		*(u8 *)val = (u8)resp->data;
> > +		break;
> > +	case 2:
> > +		*(u16 *)val = (u16)resp->data;
> > +		break;
> > +	case 4:
> > +		*(u32 *)val = (u32)resp->data;
> > +		break;
> > +	case 8:
> > +		*(u64 *)val = (u64)resp->data;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> 
> This looks inconsistent. cmd->data is treated as a packed
> u8/u16/u32/864
> whereas resp->data is treated as u64?
> 
> I was expecting memcpy(val, &resp->data, len) here not the u64 ->
> u8/u16/u32/u64 conversion.
I'll fix this, thanks!





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux