From: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> The mem hot-remove msg may request non-aligned page number. Hot plug unit is 128MB. Handle the remainder pages via balloon way that allocate memory, offline pages and return mem back to host. In order to help to check whether memory range in mem hot add msg is allocated by balloon driver or not, set page private to 1 when allocate page. If the pages associated with mem range in hot add msg is present and page private is non-zero, clear offline flag and free mem to return back. Signed-off-by: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> --- drivers/hv/hv_balloon.c | 149 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 129 insertions(+), 20 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 3d8c09fe148a..f76c9bd7fe2f 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -641,6 +641,32 @@ static int hv_send_hot_remove_response( return ret; } +static void free_allocated_pages(__u64 start_frame, int num_pages) +{ + struct page *pg; + int i; + + for (i = 0; i < num_pages; i++) { + pg = pfn_to_page(i + start_frame); + + if (page_private(pfn_to_page(i))) + set_page_private(pfn_to_page(i), 0); + + __ClearPageOffline(pg); + __free_page(pg); + dm_device.num_pages_ballooned--; + } +} + +static void free_balloon_pages(struct hv_dynmem_device *dm, + union dm_mem_page_range *range_array) +{ + int num_pages = range_array->finfo.page_cnt; + __u64 start_frame = range_array->finfo.start_page; + + free_allocated_pages(start_frame, num_pages); +} + #ifdef CONFIG_MEMORY_HOTPLUG static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, unsigned long pfn) @@ -1017,6 +1043,16 @@ static unsigned long process_hot_add(unsigned long pg_start, if (pfn_cnt == 0) return 0; + /* + * Check whether page is allocated by driver via page private + * data due to remainder pages. + */ + if (present_section_nr(pfn_to_section_nr(pg_start)) + && page_private(pfn_to_page(pg_start))) { + free_allocated_pages(pg_start, pfn_cnt); + return pfn_cnt; + } + if (!dm_device.host_specified_ha_region) { covered = pfn_covered(pg_start, pfn_cnt); if (covered < 0) @@ -1194,6 +1230,82 @@ static int hv_hot_remove_from_ha_list(unsigned int nid, unsigned long nr_pages, return ret; } +static int hv_hot_remove_pages(struct dm_hot_remove_response *resp, + u64 nr_pages, unsigned long *request_index, + bool more_pages) +{ + int i, j, alloc_unit = PAGES_IN_2M; + struct page *pg; + int ret; + + for (i = 0; i < nr_pages; i += alloc_unit) { + if (*request_index >= MAX_HOT_REMOVE_ENTRIES) { + /* Flush out all hot-remove ranges. */ + ret = hv_send_hot_remove_response(resp, + *request_index, true); + if (ret) + goto free_pages; + + /* + * Continue to allocate memory for hot remove + * after resetting send buffer and array index. + */ + memset(resp, 0x00, PAGE_SIZE); + *request_index = 0; + } +retry: + pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY | + __GFP_NOMEMALLOC | __GFP_NOWARN, + get_order(alloc_unit << PAGE_SHIFT)); + if (!pg) { + if (alloc_unit == 1) { + ret = -ENOMEM; + goto free_pages; + } + + alloc_unit = 1; + goto retry; + } + + if (alloc_unit != 1) + split_page(pg, get_order(alloc_unit << PAGE_SHIFT)); + + for (j = 0; j < (1 << get_order(alloc_unit << PAGE_SHIFT)); + j++) { + __SetPageOffline(pg + j); + + /* + * Set page's private data to non-zero and use it + * to identify whehter the page is allocated by driver + * or new hot-add memory in process_hot_add(). + */ + set_page_private(pg + j, 1); + } + + resp->range_array[*request_index].finfo.start_page + = page_to_pfn(pg); + resp->range_array[*request_index].finfo.page_cnt + = alloc_unit; + (*request_index)++; + + dm_device.num_pages_ballooned += alloc_unit; + } + + ret = hv_send_hot_remove_response(resp, *request_index, more_pages); + if (ret) + goto free_pages; + + return 0; + +free_pages: + for (i = 0; i < *request_index; i++) + free_balloon_pages(&dm_device, &resp->range_array[i]); + + /* Response hot remove failure. */ + hv_send_hot_remove_response(resp, 0, false); + return ret; +} + static void hv_mem_hot_remove(unsigned int nid, u64 nr_pages) { struct dm_hot_remove_response *resp @@ -1201,9 +1313,24 @@ static void hv_mem_hot_remove(unsigned int nid, u64 nr_pages) unsigned long start_pfn = node_start_pfn(nid); unsigned long end_pfn = node_end_pfn(nid); unsigned long request_index = 0; - int remain_pages; + unsigned long remainder = nr_pages % HA_CHUNK; + int remain_pages, ret; - /* Todo: Handle request of non-aligned page number later. */ + /* + * If page number isn't aligned with memory hot plug unit, + * handle remainder pages via balloon way. + */ + if (remainder) { + memset(resp, 0x00, PAGE_SIZE); + ret = hv_hot_remove_pages(resp, remainder, &request_index, + !!(nr_pages - remainder)); + if (ret) + return; + + nr_pages -= remainder; + if (!nr_pages) + return; + } /* Search hot-remove memory region from hot add list first.*/ memset(resp, 0x00, PAGE_SIZE); @@ -1448,24 +1575,6 @@ static void post_status(struct hv_dynmem_device *dm) } -static void free_balloon_pages(struct hv_dynmem_device *dm, - union dm_mem_page_range *range_array) -{ - int num_pages = range_array->finfo.page_cnt; - __u64 start_frame = range_array->finfo.start_page; - struct page *pg; - int i; - - for (i = 0; i < num_pages; i++) { - pg = pfn_to_page(i + start_frame); - __ClearPageOffline(pg); - __free_page(pg); - dm->num_pages_ballooned--; - } -} - - - static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int num_pages, struct dm_balloon_response *bl_resp, -- 2.14.5