On Thu, May 07, 2020 at 11:19:40AM +0530, Kirti Wankhede wrote: > > > On 5/7/2020 6:31 AM, Yan Zhao wrote: > > On Tue, May 05, 2020 at 01:54:20AM +0800, Kirti Wankhede wrote: > > > This patch makes mtty device migration capable. Purpose od this code is > > > to test migration interface. Only stop-and-copy phase is implemented. > > > Postcopy migration is not supported. > > > > > > Actual data for mtty device migration is very less. Appended dummy data to > > > migration data stream, default 100 Mbytes. Added sysfs file > > > 'dummy_data_size_MB' to get dummy data size from user which can be used > > > to check performance of based of data size. During resuming dummy data is > > > read and discarded. > > > > > > Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx> > > > --- > > > samples/vfio-mdev/mtty.c | 602 ++++++++++++++++++++++++++++++++++++++++++++--- > > > 1 file changed, 574 insertions(+), 28 deletions(-) > > > > > > diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c > > > index bf666cce5bb7..f9194234fc6a 100644 > > > --- a/samples/vfio-mdev/mtty.c > > > +++ b/samples/vfio-mdev/mtty.c > > > @@ -44,9 +44,23 @@ > > > #define MTTY_STRING_LEN 16 > > > -#define MTTY_CONFIG_SPACE_SIZE 0xff > > > -#define MTTY_IO_BAR_SIZE 0x8 > > > -#define MTTY_MMIO_BAR_SIZE 0x100000 > > > +#define MTTY_CONFIG_SPACE_SIZE 0xff > > > +#define MTTY_IO_BAR_SIZE 0x8 > > > +#define MTTY_MMIO_BAR_SIZE 0x100000 > > > +#define MTTY_MIGRATION_REGION_SIZE 0x1000000 // 16M > > > + > > > +#define MTTY_MIGRATION_REGION_INDEX VFIO_PCI_NUM_REGIONS > > > +#define MTTY_REGIONS_MAX (MTTY_MIGRATION_REGION_INDEX + 1) > > > + > > > +/* Data section start from page aligned offset */ > > > +#define MTTY_MIGRATION_REGION_DATA_OFFSET (0x1000) > > > + > > > +/* First page is used for struct vfio_device_migration_info */ > > > +#define MTTY_MIGRATION_REGION_SIZE_MMAP \ > > > + (MTTY_MIGRATION_REGION_SIZE - MTTY_MIGRATION_REGION_DATA_OFFSET) > > > + > > > +#define MIGRATION_INFO_OFFSET(MEMBER) \ > > > + offsetof(struct vfio_device_migration_info, MEMBER) > > > #define STORE_LE16(addr, val) (*(u16 *)addr = val) > > > #define STORE_LE32(addr, val) (*(u32 *)addr = val) > > > @@ -129,6 +143,28 @@ struct serial_port { > > > u8 intr_trigger_level; /* interrupt trigger level */ > > > }; > > > +/* Migration packet */ > > > +#define PACKET_ID (u16)(0xfeedbaba) > > > + > > > +#define PACKET_FLAGS_ACTUAL_DATA (1 << 0) > > > +#define PACKET_FLAGS_DUMMY_DATA (1 << 1) > > > + > > > +#define PACKET_DATA_SIZE_MAX (8 * 1024 * 1024) > > > + > > > +struct packet { > > > + u16 id; > > > + u16 flags; > > > + u32 data_size; > > > + u8 data[]; > > > +}; > > > + > > > +enum { > > > + PACKET_STATE_NONE = 0, > > > + PACKET_STATE_PREPARED, > > > + PACKET_STATE_COPIED, > > > + PACKET_STATE_LAST, > > > +}; > > > + > > > /* State of each mdev device */ > > > struct mdev_state { > > > int irq_fd; > > > @@ -138,22 +174,37 @@ struct mdev_state { > > > u8 *vconfig; > > > struct mutex ops_lock; > > > struct mdev_device *mdev; > > > - struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS]; > > > - u32 bar_mask[VFIO_PCI_NUM_REGIONS]; > > > + struct mdev_region_info region_info[MTTY_REGIONS_MAX]; > > > + u32 bar_mask[MTTY_REGIONS_MAX]; > > > struct list_head next; > > > struct serial_port s[2]; > > > struct mutex rxtx_lock; > > > struct vfio_device_info dev_info; > > > - int nr_ports; > > > + u32 nr_ports; > > > /* List of pinned gpfns, gpfn as index and content is translated hpfn */ > > > unsigned long *gpfn_to_hpfn; > > > struct notifier_block nb; > > > + > > > + u32 device_state; > > > + u64 saved_size; > > > + void *mig_region_base; > > > + bool is_actual_data_sent; > > > + struct packet *pkt; > > > + u32 packet_state; > > > + u64 dummy_data_size; > > > }; > > > static struct mutex mdev_list_lock; > > > static struct list_head mdev_devices_list; > > > +/* > > > + * Default dummy data size set to 100 MB. To change value of dummy data size at > > > + * runtime but before migration write size in MB to sysfs file > > > + * dummy_data_size_MB > > > + */ > > > +static unsigned long user_dummy_data_size = (100 * 1024 * 1024); > > > + > > > static const struct file_operations vd_fops = { > > > .owner = THIS_MODULE, > > > }; > > > @@ -639,6 +690,288 @@ static void mdev_read_base(struct mdev_state *mdev_state) > > > } > > > } > > > +static int save_setup(struct mdev_state *mdev_state) > > > +{ > > > + mdev_state->is_actual_data_sent = false; > > > + > > > + memset(mdev_state->pkt, 0, sizeof(struct packet) + > > > + PACKET_DATA_SIZE_MAX); > > > + > > > + return 0; > > > +} > > > + > > > +static int set_device_state(struct mdev_state *mdev_state, u32 device_state) > > > +{ > > > + int ret = 0; > > > + > > > + if (mdev_state->device_state == device_state) > > > + return 0; > > > + > > > + if (device_state & VFIO_DEVICE_STATE_RUNNING) { > > > +#if defined(DEBUG) > > > + if (device_state & VFIO_DEVICE_STATE_SAVING) { > > > + pr_info("%s: %s Pre-copy\n", __func__, > > > + dev_name(mdev_dev(mdev_state->mdev))); > > > + } else > > > + pr_info("%s: %s Running\n", __func__, > > > + dev_name(mdev_dev(mdev_state->mdev))); > > > +#endif > > > + } else { > > > + if (device_state & VFIO_DEVICE_STATE_SAVING) { > > > +#if defined(DEBUG) > > > + pr_info("%s: %s Stop-n-copy\n", __func__, > > > + dev_name(mdev_dev(mdev_state->mdev))); > > > +#endif > > > + ret = save_setup(mdev_state); > > > + > > > + } else if (device_state & VFIO_DEVICE_STATE_RESUMING) { > > > +#if defined(DEBUG) > > > + pr_info("%s: %s Resuming\n", __func__, > > > + dev_name(mdev_dev(mdev_state->mdev))); > > > + } else { > > > + pr_info("%s: %s Stopped\n", __func__, > > > + dev_name(mdev_dev(mdev_state->mdev))); > > > +#endif > > > + } > > > + } > > > + > > > + mdev_state->device_state = device_state; > > > + > > > + return ret; > > > +} > > > + > > > +static u32 get_device_state(struct mdev_state *mdev_state) > > > +{ > > > + return mdev_state->device_state; > > > +} > > > + > > > +static void write_to_packet(struct packet *pkt, u8 *data, size_t size) > > > +{ > > > + if ((pkt->data_size + size) > PACKET_DATA_SIZE_MAX) { > > > + pr_err("%s: packet data overflow\n", __func__); > > > + return; > > > + } > > > + memcpy((void *)&pkt->data[pkt->data_size], (void *)data, size); > > > + pkt->data_size += size; > > > +} > > > + > > > +static void read_from_packet(struct packet *pkt, u8 *data, > > > + int index, size_t size) > > > +{ > > > + if ((index + size) > PACKET_DATA_SIZE_MAX) { > > > + pr_err("%s: packet data overflow\n", __func__); > > > + return; > > > + } > > > + > > > + memcpy((void *)data, (void *)&pkt->data[index], size); > > > +} > > > + > > > +static int save_device_data(struct mdev_state *mdev_state, u64 *pending) > > > +{ > > > + /* Save device data only during stop-and-copy phase */ > > > + if (mdev_state->device_state != VFIO_DEVICE_STATE_SAVING) { > > > + *pending = 0; > > > + return 0; > > > + } > > > + > > > + if (mdev_state->packet_state == PACKET_STATE_PREPARED) { > > > + *pending = sizeof(struct packet) + mdev_state->pkt->data_size; > > > + return 0; > > > + } > > > + > > > + if (!mdev_state->is_actual_data_sent) { > > > + > > > + /* create actual data packet */ > > > + write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports, > > > + sizeof(mdev_state->nr_ports)); > > > + write_to_packet(mdev_state->pkt, (u8 *)&mdev_state->s, > > > + sizeof(struct serial_port) * 2); > > > + > > > + write_to_packet(mdev_state->pkt, mdev_state->vconfig, > > > + MTTY_CONFIG_SPACE_SIZE); > > > + > > > + write_to_packet(mdev_state->pkt, (u8 *)mdev_state->gpfn_to_hpfn, > > > + sizeof(unsigned long) * MAX_GPFN_COUNT); > > > + > > > + mdev_state->pkt->id = PACKET_ID; > > > + mdev_state->pkt->flags = PACKET_FLAGS_ACTUAL_DATA; > > > + > > > + mdev_state->is_actual_data_sent = true; > > > + } else { > > > + /* create dummy data packet */ > > > + if (mdev_state->dummy_data_size > user_dummy_data_size) { > > > + *pending = 0; > > > + mdev_state->packet_state = PACKET_STATE_NONE; > > > + return 0; > > > + } > > > + > > > + memset(mdev_state->pkt->data, 0xa5, PACKET_DATA_SIZE_MAX); > > > + > > > + mdev_state->pkt->id = PACKET_ID; > > > + mdev_state->pkt->flags = PACKET_FLAGS_DUMMY_DATA; > > > + mdev_state->pkt->data_size = PACKET_DATA_SIZE_MAX; > > > + mdev_state->dummy_data_size += PACKET_DATA_SIZE_MAX; > > > + } > > > + > > > + *pending = sizeof(struct packet) + mdev_state->pkt->data_size; > > > + mdev_state->packet_state = PACKET_STATE_PREPARED; > > > + mdev_state->saved_size = 0; > > > + > > > + return 0; > > > +} > > > + > > > +static int copy_device_data(struct mdev_state *mdev_state) > > > +{ > > > + u64 size; > > > + > > > + if (!mdev_state->pkt || !mdev_state->mig_region_base) > > > + return -EINVAL; > > > + > > > + if (mdev_state->packet_state == PACKET_STATE_COPIED) > > > + return 0; > > > + > > > + if (!mdev_state->pkt->data_size) > > > + return 0; > > > + > > > + size = sizeof(struct packet) + mdev_state->pkt->data_size; > > > + > > > + memcpy(mdev_state->mig_region_base, mdev_state->pkt, size); > > > + > > if data area is mmaped, who is going to copy data from mdev_state->pkt > > to mdev_state->mig_region_base ? > > actually, I do see this area is mmaped in this sample. > > > > This area ia mmap and is backed by memory, see mtty_mmap(), on read access > to data_offset, packet data is copied to mmaped memory. > > > > + mdev_state->saved_size = size; > > > + mdev_state->packet_state = PACKET_STATE_COPIED; > > > + memset(mdev_state->pkt, 0, sizeof(struct packet)); > > > + return 0; > > > +} > > > + > > > +static int resume_device_data(struct mdev_state *mdev_state, u64 data_size) > > > +{ > > > + unsigned long i; > > > + > > > + if (mdev_state->device_state != VFIO_DEVICE_STATE_RESUMING) > > > + return -EINVAL; > > > + > > > + if (!mdev_state->pkt || !mdev_state->mig_region_base) > > > + return -EINVAL; > > > + > > > + memcpy(mdev_state->pkt, mdev_state->mig_region_base, data_size); > > > + > > > + if (mdev_state->pkt->flags & PACKET_FLAGS_ACTUAL_DATA) { > > > + int index = 0; > > > + /* restore device data */ > > > + read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->nr_ports, > > > + index, sizeof(mdev_state->nr_ports)); > > > + index += sizeof(mdev_state->nr_ports); > > > + > > > + read_from_packet(mdev_state->pkt, (u8 *)&mdev_state->s, > > > + index, sizeof(struct serial_port) * 2); > > > + index += sizeof(struct serial_port) * 2; > > > + > > > + read_from_packet(mdev_state->pkt, mdev_state->vconfig, > > > + index, MTTY_CONFIG_SPACE_SIZE); > > > + index += MTTY_CONFIG_SPACE_SIZE; > > > + > > > + read_from_packet(mdev_state->pkt, > > > + (u8 *)mdev_state->gpfn_to_hpfn, > > > + index, sizeof(unsigned long) * MAX_GPFN_COUNT); > > > + index += sizeof(unsigned long) * MAX_GPFN_COUNT; > > > + > > > + for (i = 0; i < MAX_GPFN_COUNT; i++) { > > > + if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL) { > > > + int ret; > > > + unsigned long hpfn; > > > + > > > + ret = vfio_pin_pages(mdev_dev(mdev_state->mdev), > > > + &i, 1, IOMMU_READ | IOMMU_WRITE, &hpfn); > > > + if (ret <= 0) { > > > + pr_err("%s: 0x%lx unpin error %d\n", > > > + __func__, i, ret); > > > + continue; > > > + } > > > + mdev_state->gpfn_to_hpfn[i] = hpfn; > > > + } > > > + } > > > + } else { > > > +#if defined(DEBUG) > > > + pr_info("%s: %s discard data 0x%llx\n", > > > + __func__, dev_name(mdev_dev(mdev_state->mdev)), > > > + data_size); > > > +#endif > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static int handle_mig_read(unsigned int index, struct mdev_state *mdev_state, > > > + loff_t offset, u8 *buf, u32 count) > > > +{ > > > + int ret = 0; > > > + u64 pending = 0; > > > + > > > + switch (offset) { > > > + case MIGRATION_INFO_OFFSET(device_state): // 0x00 > > > + *(u32 *)buf = get_device_state(mdev_state); > > > + break; > > > + > > > + case MIGRATION_INFO_OFFSET(pending_bytes): // 0x08 > > > + ret = save_device_data(mdev_state, &pending); > > > + if (ret) > > > + break; > > > + *(u64 *)buf = pending; > > > + break; > > > + > > > + case MIGRATION_INFO_OFFSET(data_offset): // 0x10 > > > + if (mdev_state->device_state & VFIO_DEVICE_STATE_SAVING) { > > > + ret = copy_device_data(mdev_state); > > > + if (ret) > > > + break; > > > + } > > > + *(u64 *)buf = MTTY_MIGRATION_REGION_DATA_OFFSET; > > what is this? > > I think macro is self explanatory, its data offset within migration region > where vendor driver has copied data and user application should data from > this offset of migration region. > ok. just mixed the data_offset with real data offset of the data area. BTW, it's really confusing to take reading of data_offset as the indicator of data preparing and writing of data_size as the indicator of data loading. Thanks Yan