Dropping the reference count of PageOffline() pages allows offlining code to skip them. However, we also have to convert PG_reserved to another flag - let's use PG_dirty - so has_unmovable_pages() will properly handle them. PG_reserved pages get detected as unmovable right away. We need the flag to see if we are onlining pages the first time, or if we allocated them via alloc_contig_range(). Properly take care of offlining code also modifying the stats and special handling in case the driver gets unloaded. Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx> Cc: Jason Wang <jasowang@xxxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Igor Mammedov <imammedo@xxxxxxxxxx> Cc: Dave Young <dyoung@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Stefan Hajnoczi <stefanha@xxxxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: David Hildenbrand <david@xxxxxxxxxx> --- drivers/virtio/virtio_mem.c | 102 ++++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 91052a37d10d..9cb31459b211 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -561,6 +561,30 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id, virtio_mem_retry(vm); } +/* + * When we unplug subblocks, we already modify stats (e.g., subtract them + * from totalram_pages). Offlining code will modify the stats, too. So + * properly fixup the stats when GOING_OFFLINE and revert that when + * CANCEL_OFFLINE. + */ +static void virtio_mem_mb_going_offline_fixup_stats(struct virtio_mem *vm, + unsigned long mb_id, + bool cancel) +{ + const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + int sb_id; + + for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + continue; + + if (cancel) + totalram_pages_add(-nr_pages); + else + totalram_pages_add(nr_pages); + } +} + /* * This callback will either be called synchonously from add_memory() or * asynchronously (e.g., triggered via user space). We have to be careful @@ -608,6 +632,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_lock(&vm->hotplug_mutex); vm->hotplug_active = true; } + virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, false); break; case MEM_GOING_ONLINE: spin_lock_irq(&vm->removal_lock); @@ -633,6 +658,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_unlock(&vm->hotplug_mutex); break; case MEM_CANCEL_OFFLINE: + virtio_mem_mb_going_offline_fixup_stats(vm, mb_id, true); + /* fall through */ case MEM_CANCEL_ONLINE: /* We might not get a MEM_GOING* if somebody else canceled */ if (vm->hotplug_active) { @@ -648,23 +675,55 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, } /* - * Set a range of pages PG_offline. + * Convert PG_reserved to PG_dirty. Needed to allow isolation code to + * not immediately consider them as unmovable. + */ +static void virtio_mem_reserved_to_dirty(unsigned long pfn, + unsigned int nr_pages) +{ + for (; nr_pages--; pfn++) { + SetPageDirty(pfn_to_page(pfn)); + ClearPageReserved(pfn_to_page(pfn)); + } +} + +/* + * Convert PG_dirty to PG_reserved. Needed so generic_online_page() + * works correctly. + */ +static void virtio_mem_dirty_to_reserved(unsigned long pfn, + unsigned int nr_pages) +{ + for (; nr_pages--; pfn++) { + SetPageReserved(pfn_to_page(pfn)); + ClearPageDirty(pfn_to_page(pfn)); + } +} + +/* + * Set a range of pages PG_offline and drop the reference. The dropped + * reference (0) and the flag allows isolation code to isolate this range + * and offline code to offline it. */ static void virtio_mem_set_fake_offline(unsigned long pfn, unsigned int nr_pages) { - for (; nr_pages--; pfn++) + for (; nr_pages--; pfn++) { __SetPageOffline(pfn_to_page(pfn)); + page_ref_dec(pfn_to_page(pfn)); + } } /* - * Clear PG_offline from a range of pages. + * Get a reference and clear PG_offline from a range of pages. */ static void virtio_mem_clear_fake_offline(unsigned long pfn, unsigned int nr_pages) { - for (; nr_pages--; pfn++) + for (; nr_pages--; pfn++) { + page_ref_inc(pfn_to_page(pfn)); __ClearPageOffline(pfn_to_page(pfn)); + } } /* @@ -679,7 +738,7 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) /* * We are always called with subblock granularity, which is at least * aligned to MAX_ORDER - 1. All pages in a subblock are either - * reserved or not. + * PG_dirty (converted PG_reserved) or not. */ BUG_ON(!IS_ALIGNED(pfn, 1 << order)); BUG_ON(!IS_ALIGNED(nr_pages, 1 << order)); @@ -690,13 +749,14 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) struct page *page = pfn_to_page(pfn + i); /* - * If the page is reserved, it was kept fake-offline when + * If the page is PG_dirty, it was kept fake-offline when * onlining the memory block. Otherwise, it was allocated * using alloc_contig_range(). */ - if (PageReserved(page)) + if (PageDirty(page)) { + virtio_mem_dirty_to_reserved(pfn + i, 1 << order); generic_online_page(page, order); - else { + } else { free_contig_range(pfn + i, 1 << order); totalram_pages_add(1 << order); } @@ -728,8 +788,10 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) */ if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) generic_online_page(page, order); - else + else { virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order); + virtio_mem_reserved_to_dirty(PFN_DOWN(addr), 1 << order); + } rcu_read_unlock(); return; } @@ -1674,7 +1736,8 @@ static int virtio_mem_probe(struct virtio_device *vdev) static void virtio_mem_remove(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; - unsigned long mb_id; + unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + unsigned long pfn, mb_id, sb_id; int rc; /* @@ -1701,6 +1764,25 @@ static void virtio_mem_remove(struct virtio_device *vdev) BUG_ON(rc); mutex_lock(&vm->hotplug_mutex); } + /* + * After we unregistered our callbacks, user space can offline + + * re-online partially plugged online blocks. Make sure they can't + * get offlined by getting a reference. Also, restore PG_reserved. + */ + virtio_mem_for_each_mb_state(vm, mb_id, + VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + continue; + pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + + sb_id * vm->subblock_size); + + if (PageDirty(pfn_to_page(pfn))) + virtio_mem_dirty_to_reserved(pfn, nr_pages); + for (; nr_pages--; pfn++) + page_ref_inc(pfn_to_page(pfn)); + } + } mutex_unlock(&vm->hotplug_mutex); /* unregister callbacks */ -- 2.21.0