From: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> The ha_lock is to protect hot-add region list ha_region_list. When Hyper-V delivers hot-add memory message, handle_pg_range() goes through the list to find the hot-add region state associated with message and do hot-add memory. The lock is released in the loop before calling hv_mem_hot_add() and is reacquired in hv_mem_hot_add(). There is a race that list entry maybe freed during the slot. To avoid the race and simply the code, make hv_mem_hot_add() under protection of ha_region_list lock. There is a dead lock case when run add_memory() under ha_lock. add_memory() calls hv_online_page() inside and hv_online_page() also acquires ha_lock again. Add lock_thread in the struct hv_dynmem_device to record hv_mem_hot_add()'s thread and check lock_thread in hv_online_page(). hv_mem_hot_add() thread already holds lock during traverse hot add list and so not acquire lock in hv_online_page(). Signed-off-by: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> --- drivers/hv/hv_balloon.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 34bd73526afd..4d1a3b1e2490 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -545,6 +545,7 @@ struct hv_dynmem_device { * regions from ha_region_list. */ spinlock_t ha_lock; + struct task_struct *lock_thread; /* * A list of hot-add regions. @@ -707,12 +708,10 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; - unsigned long flags; for (i = 0; i < (size/HA_CHUNK); i++) { start_pfn = start + (i * HA_CHUNK); - spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn += HA_CHUNK; if (total_pfn > HA_CHUNK) { @@ -724,7 +723,6 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, } has->covered_end_pfn += processed_pfn; - spin_unlock_irqrestore(&dm_device.ha_lock, flags); init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = !memhp_auto_online; @@ -745,10 +743,8 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, */ do_hot_add = false; } - spin_lock_irqsave(&dm_device.ha_lock, flags); has->ha_end_pfn -= HA_CHUNK; has->covered_end_pfn -= processed_pfn; - spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; } @@ -771,8 +767,13 @@ static void hv_online_page(struct page *pg, unsigned int order) struct hv_hotadd_state *has; unsigned long flags; unsigned long pfn = page_to_pfn(pg); + int unlocked; + + if (dm_device.lock_thread != current) { + spin_lock_irqsave(&dm_device.ha_lock, flags); + unlocked = 1; + } - spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* The page belongs to a different HAS. */ if ((pfn < has->start_pfn) || @@ -782,7 +783,9 @@ static void hv_online_page(struct page *pg, unsigned int order) hv_bring_pgs_online(has, pfn, 1UL << order); break; } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); + + if (unlocked) + spin_unlock_irqrestore(&dm_device.ha_lock, flags); } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) @@ -860,6 +863,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, pg_start); spin_lock_irqsave(&dm_device.ha_lock, flags); + dm_device.lock_thread = current; list_for_each_entry(has, &dm_device.ha_region_list, list) { /* * If the pfn range we are dealing with is not in the current @@ -912,9 +916,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, } else { pfn_cnt = size; } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has); - spin_lock_irqsave(&dm_device.ha_lock, flags); } /* * If we managed to online any pages that were given to us, @@ -923,6 +925,7 @@ static unsigned long handle_pg_range(unsigned long pg_start, res = has->covered_end_pfn - old_covered_state; break; } + dm_device.lock_thread = NULL; spin_unlock_irqrestore(&dm_device.ha_lock, flags); return res; -- 2.14.5