> > Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_VQ feature indicates the > support of reporting hints of guest free pages to host via virtio-balloon. > > Host requests the guest to report free pages by sending a new cmd > id to the guest via the free_page_report_cmd_id configuration register. > > When the guest starts to report, the first element added to the free page > vq is the cmd id given by host. When the guest finishes the reporting > of all the free pages, VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID is added > to the vq to tell host that the reporting is done. Host may also requests > the guest to stop the reporting in advance by sending the stop cmd id to > the guest via the configuration register. > > Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx> > Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx> > Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxxxx> > --- > drivers/virtio/virtio_balloon.c | 242 > +++++++++++++++++++++++++++++++----- > include/uapi/linux/virtio_balloon.h | 4 + > 2 files changed, 214 insertions(+), 32 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c > b/drivers/virtio/virtio_balloon.c > index a1fb52c..b9561a5 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -53,7 +53,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > + > + /* Balloon's own wq for cpu-intensive work items */ > + struct workqueue_struct *balloon_wq; > + /* The free page reporting work item submitted to the balloon wq */ > + struct work_struct report_free_page_work; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > @@ -63,6 +68,13 @@ struct virtio_balloon { > spinlock_t stop_update_lock; > bool stop_update; > > + /* Start to report free pages */ > + bool report_free_page; > + /* Stores the cmd id given by host to start the free page reporting */ > + uint32_t start_cmd_id; > + /* Stores STOP_ID as a sign to tell host that the reporting is done */ > + uint32_t stop_cmd_id; > + > /* Waiting for host to ack the pages we released. */ > wait_queue_head_t acked; > > @@ -281,6 +293,71 @@ static unsigned int update_balloon_stats(struct > virtio_balloon *vb) > return idx; > } > > +static void add_one_sg(struct virtqueue *vq, unsigned long pfn, uint32_t > len) > +{ > + struct scatterlist sg; > + unsigned int unused; > + int err; > + > + sg_init_table(&sg, 1); > + sg_set_page(&sg, pfn_to_page(pfn), len, 0); > + > + /* Detach all the used buffers from the vq */ > + while (virtqueue_get_buf(vq, &unused)) > + ; > + > + /* > + * Since this is an optimization feature, losing a couple of free > + * pages to report isn't important. We simply resturn without adding > + * the page if the vq is full. We are adding one entry each time, > + * which essentially results in no memory allocation, so the > + * GFP_KERNEL flag below can be ignored. > + */ > + if (vq->num_free) { > + err = virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); > + /* > + * This is expected to never fail, because there is always an > + * entry available on the vq. > + */ > + BUG_ON(err); > + } > +} > + > +static void batch_free_page_sg(struct virtqueue *vq, > + unsigned long pfn, > + uint32_t len) > +{ > + add_one_sg(vq, pfn, len); > + > + /* Batch till the vq is full */ > + if (!vq->num_free) > + virtqueue_kick(vq); > +} > + > +static void send_cmd_id(struct virtqueue *vq, void *addr) > +{ > + struct scatterlist sg; > + unsigned int unused; > + int err; > + > + sg_init_one(&sg, addr, sizeof(uint32_t)); > + > + /* > + * This handles the cornercase that the vq happens to be full when > + * adding a cmd id. Rarely happen in practice. > + */ > + while (!vq->num_free) > + virtqueue_get_buf(vq, &unused); > + > + err = virtqueue_add_outbuf(vq, &sg, 1, vq, GFP_KERNEL); > + /* > + * This is expected to never fail, because there is always an > + * entry available on the vq. > + */ > + BUG_ON(err); > + virtqueue_kick(vq); > +} > + > /* > * While most virtqueues communicate guest-initiated requests to the > hypervisor, > * the stats queue operates in reverse. The driver initializes the > virtqueue > @@ -316,17 +393,6 @@ static void stats_handle_request(struct virtio_balloon > *vb) > virtqueue_kick(vq); > } > > -static void virtballoon_changed(struct virtio_device *vdev) > -{ > - struct virtio_balloon *vb = vdev->priv; > - unsigned long flags; > - > - spin_lock_irqsave(&vb->stop_update_lock, flags); > - if (!vb->stop_update) > - queue_work(system_freezable_wq, &vb->update_balloon_size_work); > - spin_unlock_irqrestore(&vb->stop_update_lock, flags); > -} > - > static inline s64 towards_target(struct virtio_balloon *vb) > { > s64 target; > @@ -343,6 +409,36 @@ static inline s64 towards_target(struct virtio_balloon > *vb) > return target - vb->num_pages; > } > > +static void virtballoon_changed(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb = vdev->priv; > + unsigned long flags; > + __u32 cmd_id; > + s64 diff = towards_target(vb); > + > + if (diff) { > + spin_lock_irqsave(&vb->stop_update_lock, flags); > + if (!vb->stop_update) > + queue_work(system_freezable_wq, > + &vb->update_balloon_size_work); > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > + } > + > + virtio_cread(vb->vdev, struct virtio_balloon_config, > + free_page_report_cmd_id, &cmd_id); > + if (cmd_id == VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID) { > + WRITE_ONCE(vb->report_free_page, false); > + } else if (cmd_id != vb->start_cmd_id) { > + /* > + * Host requests to start the reporting by sending a new cmd > + * id. > + */ > + WRITE_ONCE(vb->report_free_page, true); > + vb->start_cmd_id = cmd_id; > + queue_work(vb->balloon_wq, &vb->report_free_page_work); > + } > +} > + > static void update_balloon_size(struct virtio_balloon *vb) > { > u32 actual = vb->num_pages; > @@ -417,40 +513,113 @@ static void update_balloon_size_func(struct > work_struct *work) > > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtqueue **vqs; > + vq_callback_t **callbacks; > + const char **names; > + struct scatterlist sg; > + int i, nvqs, err = -ENOMEM; > + > + /* Inflateq and deflateq are used unconditionally */ > + nvqs = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > + nvqs++; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + nvqs++; > + > + /* Allocate space for find_vqs parameters */ > + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); > + if (!vqs) > + goto err_vq; > + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); > + if (!callbacks) > + goto err_callback; > + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); > + if (!names) > + goto err_names; > + > + callbacks[0] = balloon_ack; > + names[0] = "inflate"; > + callbacks[1] = balloon_ack; > + names[1] = "deflate"; > + > + i = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + callbacks[i] = stats_request; > + names[i] = "stats"; > + i++; > + } > > - /* > - * We expect two virtqueues: inflate and deflate, and > - * optionally stat. > - */ > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > + callbacks[i] = NULL; > + names[i] = "free_page_vq"; > + } > + > + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, > + NULL, NULL); > if (err) > - return err; > + goto err_find; > > vb->inflate_vq = vqs[0]; > vb->deflate_vq = vqs[1]; > + i = 2; > if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > - struct scatterlist sg; > - unsigned int num_stats; > - vb->stats_vq = vqs[2]; > - > + vb->stats_vq = vqs[i++]; > /* > * Prime this virtqueue with one buffer so the hypervisor can > * use it to signal us later (it can't be broken yet!). > */ > - num_stats = update_balloon_stats(vb); > - > - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); > if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > - < 0) > - BUG(); > + < 0) { > + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", > + __func__); > + goto err_find; > + } > virtqueue_kick(vb->stats_vq); > } > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + vb->free_page_vq = vqs[i]; > + > + kfree(names); > + kfree(callbacks); > + kfree(vqs); > return 0; We can assign err=0 and remove above duplicate code? > + > +err_find: > + kfree(names); > +err_names: > + kfree(callbacks); > +err_callback: > + kfree(vqs); > +err_vq: > + return err; > +} > + > +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > + unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + if (!READ_ONCE(vb->report_free_page)) > + return false; > + > + batch_free_page_sg(vb->free_page_vq, pfn, len); > + > + return true; > +} > + > +static void report_free_page_func(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + /* Start by sending the obtained cmd id to the host with an outbuf */ > + send_cmd_id(vb->free_page_vq, &vb->start_cmd_id); > + walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > + /* End by sending the stop id to the host with an outbuf */ > + send_cmd_id(vb->free_page_vq, &vb->stop_cmd_id); > } > > #ifdef CONFIG_BALLOON_COMPACTION > @@ -566,6 +735,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > if (err) > goto out_free_vb; > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > + vb->balloon_wq = alloc_workqueue("balloon-wq", > + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); > + INIT_WORK(&vb->report_free_page_work, report_free_page_func); > + vb->stop_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID; > + } > + > vb->nb.notifier_call = virtballoon_oom_notify; > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > err = register_oom_notifier(&vb->nb); > @@ -630,6 +806,7 @@ static void virtballoon_remove(struct virtio_device > *vdev) > spin_unlock_irq(&vb->stop_update_lock); > cancel_work_sync(&vb->update_balloon_size_work); > cancel_work_sync(&vb->update_balloon_stats_work); > + cancel_work_sync(&vb->report_free_page_work); > > remove_common(vb); > #ifdef CONFIG_BALLOON_COMPACTION > @@ -682,6 +859,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_MUST_TELL_HOST, > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > + VIRTIO_BALLOON_F_FREE_PAGE_VQ, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/include/uapi/linux/virtio_balloon.h > b/include/uapi/linux/virtio_balloon.h > index 343d7dd..55e2456 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -34,15 +34,19 @@ > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > +#define VIRTIO_BALLOON_F_FREE_PAGE_VQ 3 /* VQ to report free pages */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > > +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0 > struct virtio_balloon_config { > /* Number of pages host wants Guest to give up. */ > __u32 num_pages; > /* Number of pages we've actually got in balloon. */ > __u32 actual; > + /* Free page report command id, readonly by guest */ > + __u32 free_page_report_cmd_id; > }; > > #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ > -- > 2.7.4 > >