You should Cc Nitesh who is working on a related feature. On Mon, Nov 13, 2017 at 06:34:48PM +0800, Wei Wang wrote: > Ping for comments, thanks. > > On 11/03/2017 04:13 PM, Wei Wang wrote: > > Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_VQ feature indicates the > > support of reporting hints of guest free pages to the host via > > virtio-balloon. The host requests the guest to report the free pages by > > sending commands via the virtio-balloon configuration registers. > > > > When the guest starts to report, the first element added to the free page > > vq is a sequence id of the start reporting command. The id is given by > > the host, and it indicates whether the following free pages correspond > > to the command. For example, the host may stop the report and start again > > with a new command id. The obsolete pages for the previous start command > > can be detected by the id dismatching on the host. The id is added to the > > vq using an output buffer, and the free pages are added to the vq using > > input buffer. > > > > Here are some explainations about the added configuration registers: > > - host2guest_cmd: a register used by the host to send commands to the > > guest. > > - guest2host_cmd: written by the guest to ACK to the host about the > > commands that have been received. The host will clear the corresponding > > bits on the host2guest_cmd register. The guest also uses this register > > to send commands to the host (e.g. when finish free page reporting). I am not sure what is the role of guest2host_cmd. Reporting of the correct cmd id seems sufficient indication that guest received the start command. Not getting any more seems sufficient to detect stop. > > - free_page_cmd_id: the sequence id of the free page report command > > given by the host. > > > > Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx> > > Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx> > > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > > Cc: Michal Hocko <mhocko@xxxxxxxxxx> > > --- > > drivers/virtio/virtio_balloon.c | 234 ++++++++++++++++++++++++++++++++---- > > include/uapi/linux/virtio_balloon.h | 11 ++ > > 2 files changed, 223 insertions(+), 22 deletions(-) > > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > > index b31fc25..4087f04 100644 > > --- a/drivers/virtio/virtio_balloon.c > > +++ b/drivers/virtio/virtio_balloon.c > > @@ -55,7 +55,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > > struct virtio_device *vdev; > > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > + > > + /* Balloon's own wq for cpu-intensive work items */ > > + struct workqueue_struct *balloon_wq; > > + /* The free page reporting work item submitted to the balloon wq */ > > + struct work_struct report_free_page_work; > > /* The balloon servicing is delegated to a freezable workqueue. */ > > struct work_struct update_balloon_stats_work; > > @@ -65,6 +70,10 @@ struct virtio_balloon { > > spinlock_t stop_update_lock; > > bool stop_update; > > + /* Stop reporting free pages */ > > + bool report_free_page_stop; I would revert logic here: bool report_free_page; > > + uint32_t free_page_cmd_id; > > + > > /* Waiting for host to ack the pages we released. */ > > wait_queue_head_t acked; > > @@ -191,6 +200,30 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, > > kick_and_wait(vq, vb->acked); > > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) > > +{ > > + int err = 0; > > + unsigned int len; > > + > > + /* Detach all the used buffers from the vq */ > > + while (virtqueue_get_buf(vq, &len)) > > + ; > > + > > + /* > > + * Since this is an optimization feature, losing a couple of free > > + * pages to report isn't important. We simply resturn without adding > > + * the page if the vq is full. > > + */ > > + if (vq->num_free) { > > + err = add_one_sg(vq, addr, size); > > + BUG_ON(err); > > + } > > + > > + /* Batch till the vq is full */ > > + if (!vq->num_free) > > + virtqueue_kick(vq); > > +} > > + > > /* > > * Send balloon pages in sgs to host. The balloon pages are recorded in the > > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > > @@ -495,9 +528,8 @@ static void stats_handle_request(struct virtio_balloon *vb) > > virtqueue_kick(vq); > > } > > -static void virtballoon_changed(struct virtio_device *vdev) > > +static void virtballoon_cmd_balloon_memory(struct virtio_balloon *vb) > > { > > - struct virtio_balloon *vb = vdev->priv; > > unsigned long flags; > > spin_lock_irqsave(&vb->stop_update_lock, flags); > > @@ -506,6 +538,50 @@ static void virtballoon_changed(struct virtio_device *vdev) > > spin_unlock_irqrestore(&vb->stop_update_lock, flags); > > } > > +static void virtballoon_cmd_report_free_page_start(struct virtio_balloon *vb) > > +{ > > + unsigned long flags; > > + > > + vb->report_free_page_stop = false; this flag is used a lot outside any locks. Why is this safe? Please add some comments explaining access to this flag. > > + spin_lock_irqsave(&vb->stop_update_lock, flags); > > + if (!vb->stop_update) > > + queue_work(vb->balloon_wq, &vb->report_free_page_work); > > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > > +} > > + > > +static void virtballoon_changed(struct virtio_device *vdev) > > +{ > > + struct virtio_balloon *vb = vdev->priv; > > + u32 host2guest_cmd, guest2host_cmd = 0; > > + > > + if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + virtballoon_cmd_balloon_memory(vb); > > + return; This might be a handy feature: one can disable balloon without hot-unplug. But I would use a separate feature flag to control it. > > + } > > + > > + virtio_cread(vb->vdev, struct virtio_balloon_config, host2guest_cmd, > > + &host2guest_cmd); > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_BALLOON_MEMORY) { > > + virtballoon_cmd_balloon_memory(vb); > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_BALLOON_MEMORY; > > + } > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START) { > > + virtballoon_cmd_report_free_page_start(vb); > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START; > > + } > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP) { > > + vb->report_free_page_stop = true; > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > > + } I am not sure why free page has start+stop but e.g. balloon has a single bit. In fact I would really just use command id. When it changes, we know a new report is needed. > > + > > + /* Ack to the host about the commands that have been received */ > > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > > + &guest2host_cmd); So the same register is used to ack stop command and to signal end of report. This seems buggy. > > +} > > + > > static inline s64 towards_target(struct virtio_balloon *vb) > > { > > s64 target; > > @@ -597,42 +673,147 @@ static void update_balloon_size_func(struct work_struct *work) > > queue_work(system_freezable_wq, work); > > } > > -static int init_vqs(struct virtio_balloon *vb) > > +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > > + unsigned long nr_pages) > > { > > - struct virtqueue *vqs[3]; > > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > > - static const char * const names[] = { "inflate", "deflate", "stats" }; > > - int err, nvqs; > > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > > + void *addr = (void *)pfn_to_kaddr(pfn); How do we know all free pages have a kaddr? > > + uint32_t len = nr_pages << PAGE_SHIFT; > > + > > + if (vb->report_free_page_stop) > > + return false; > > + > > + send_free_page_sg(vb->free_page_vq, addr, len); > > + return true; > > +} > > + > > +static void report_free_page_end(struct virtio_balloon *vb) > > +{ > > + u32 cmd = VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > > /* > > - * We expect two virtqueues: inflate and deflate, and > > - * optionally stat. > > + * The host may have already requested to stop the reporting before we > > + * finish, so no need to notify the host in this case. > > */ > > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > > + if (vb->report_free_page_stop) > > + return; > > + vb->report_free_page_stop = true; > > + > > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > > + &cmd); Wouldn't it be easier to add a buffer in the queue? > > +} > > + > > +static void report_free_page_cmd_id(struct virtio_balloon *vb) > > +{ > > + struct scatterlist sg; > > + int err; > > + > > + virtio_cread(vb->vdev, struct virtio_balloon_config, free_page_cmd_id, > > + &vb->free_page_cmd_id); > > + sg_init_one(&sg, &vb->free_page_cmd_id, sizeof(uint32_t)); > > + err = virtqueue_add_outbuf(vb->free_page_vq, &sg, 1, > > + &vb->free_page_cmd_id, GFP_KERNEL); > > + BUG_ON(err); > > +} > > + > > +static void report_free_page(struct work_struct *work) > > +{ > > + struct virtio_balloon *vb; > > + > > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > > + report_free_page_cmd_id(vb); > > + walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > > + /* > > + * The last few free page blocks that were added may not reach the > > + * batch size, but need a kick to notify the device to handle them. > > + */ > > + virtqueue_kick(vb->free_page_vq); > > + report_free_page_end(vb); > > +} > > + > > +static int init_vqs(struct virtio_balloon *vb) > > +{ > > + struct virtqueue **vqs; > > + vq_callback_t **callbacks; > > + const char **names; > > + struct scatterlist sg; > > + int i, nvqs, err = -ENOMEM; > > + > > + /* Inflateq and deflateq are used unconditionally */ > > + nvqs = 2; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > > + nvqs++; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > > + nvqs++; > > + > > + /* Allocate space for find_vqs parameters */ > > + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); > > + if (!vqs) > > + goto err_vq; > > + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); > > + if (!callbacks) > > + goto err_callback; > > + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); > > + if (!names) > > + goto err_names; > > + > > + callbacks[0] = balloon_ack; > > + names[0] = "inflate"; > > + callbacks[1] = balloon_ack; > > + names[1] = "deflate"; > > + > > + i = 2; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > > + callbacks[i] = stats_request; > > + names[i] = "stats"; > > + i++; > > + } > > + > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + callbacks[i] = NULL; > > + names[i] = "free_page_vq"; > > + } > > + > > + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, > > + NULL, NULL); > > if (err) > > - return err; > > + goto err_find; > > vb->inflate_vq = vqs[0]; > > vb->deflate_vq = vqs[1]; > > + i = 2; > > if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > > - struct scatterlist sg; > > - unsigned int num_stats; > > - vb->stats_vq = vqs[2]; > > - > > + vb->stats_vq = vqs[i++]; > > /* > > * Prime this virtqueue with one buffer so the hypervisor can > > * use it to signal us later (it can't be broken yet!). > > */ > > - num_stats = update_balloon_stats(vb); > > - > > - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > > + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); > > if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > > - < 0) > > - BUG(); > > + < 0) { > > + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", > > + __func__); > > + goto err_find; > > + } > > virtqueue_kick(vb->stats_vq); > > } > > + > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > > + vb->free_page_vq = vqs[i]; > > + > > + kfree(names); > > + kfree(callbacks); > > + kfree(vqs); > > return 0; > > + > > +err_find: > > + kfree(names); > > +err_names: > > + kfree(callbacks); > > +err_callback: > > + kfree(vqs); > > +err_vq: > > + return err; > > } > > #ifdef CONFIG_BALLOON_COMPACTION > > @@ -761,6 +942,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > > if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG)) > > xb_init(&vb->page_xb); > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + vb->balloon_wq = alloc_workqueue("balloon-wq", > > + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); > > + INIT_WORK(&vb->report_free_page_work, report_free_page); > > + vb->report_free_page_stop = true; > > + } > > + > > vb->nb.notifier_call = virtballoon_oom_notify; > > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > > err = register_oom_notifier(&vb->nb); > > @@ -825,6 +1013,7 @@ static void virtballoon_remove(struct virtio_device *vdev) > > spin_unlock_irq(&vb->stop_update_lock); > > cancel_work_sync(&vb->update_balloon_size_work); > > cancel_work_sync(&vb->update_balloon_stats_work); > > + cancel_work_sync(&vb->report_free_page_work); > > remove_common(vb); > > #ifdef CONFIG_BALLOON_COMPACTION > > @@ -878,6 +1067,7 @@ static unsigned int features[] = { > > VIRTIO_BALLOON_F_STATS_VQ, > > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > > VIRTIO_BALLOON_F_SG, > > + VIRTIO_BALLOON_F_FREE_PAGE_VQ, > > }; > > static struct virtio_driver virtio_balloon_driver = { > > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > > index 37780a7..b758484 100644 > > --- a/include/uapi/linux/virtio_balloon.h > > +++ b/include/uapi/linux/virtio_balloon.h > > @@ -35,15 +35,26 @@ > > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > > #define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ > > +#define VIRTIO_BALLOON_F_FREE_PAGE_VQ 4 /* VQ to report free pages */ > > /* Size of a PFN in the balloon interface. */ > > #define VIRTIO_BALLOON_PFN_SHIFT 12 > > +#define VIRTIO_BALLOON_CMD_BALLOON_MEMORY (1 << 0) > > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START (1 << 1) > > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP (1 << 2) > > + > > struct virtio_balloon_config { > > /* Number of pages host wants Guest to give up. */ > > __u32 num_pages; > > /* Number of pages we've actually got in balloon. */ > > __u32 actual; > > + /* Host-to-guest command, readonly by guest */ > > + __u32 host2guest_cmd; > > + /* Sequence id of the free_page report command, readonly by guest */ > > + __u32 free_page_cmd_id; > > + /* Guest-to-host command */ > > + __u32 guest2host_cmd; > > }; > > #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */