Hi Vitaly: Thanks for your review. > From: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> > > From: Tianyu Lan <Tianyu.Lan@xxxxxxxxxxxxx> > > @@ -771,8 +767,13 @@ static void hv_online_page(struct page *pg, > unsigned int order) > > struct hv_hotadd_state *has; > > unsigned long flags; > > unsigned long pfn = page_to_pfn(pg); > > + int unlocked; > > + > > + if (dm_device.lock_thread != current) { > > With lock_thread checking you're trying to protect against taking the spinlock > twice (when this is called from add_memory()) but why not just check that > spin_is_locked() AND we sit on the same CPU as the VMBus channel > attached to the balloon device? Yes, that's another approach. > > > + spin_lock_irqsave(&dm_device.ha_lock, flags); > > + unlocked = 1; > > + } > > We set unlocked to '1' when we're actually locked, aren't we? The "unlocked" means ha_lock isn't hold before calling hv_online_page(). > > > > > - spin_lock_irqsave(&dm_device.ha_lock, flags); > > list_for_each_entry(has, &dm_device.ha_region_list, list) { > > /* The page belongs to a different HAS. */ > > if ((pfn < has->start_pfn) || > > @@ -782,7 +783,9 @@ static void hv_online_page(struct page *pg, > unsigned int order) > > hv_bring_pgs_online(has, pfn, 1UL << order); > > break; > > } > > - spin_unlock_irqrestore(&dm_device.ha_lock, flags); > > + > > + if (unlocked) > > + spin_unlock_irqrestore(&dm_device.ha_lock, flags); > > } > > > > static int pfn_covered(unsigned long start_pfn, unsigned long > > pfn_cnt) @@ -860,6 +863,7 @@ static unsigned long > handle_pg_range(unsigned long pg_start, > > pg_start); > > > > spin_lock_irqsave(&dm_device.ha_lock, flags); > > + dm_device.lock_thread = current; > > list_for_each_entry(has, &dm_device.ha_region_list, list) { > > /* > > * If the pfn range we are dealing with is not in the current > @@ > > -912,9 +916,7 @@ static unsigned long handle_pg_range(unsigned long > pg_start, > > } else { > > pfn_cnt = size; > > } > > - spin_unlock_irqrestore(&dm_device.ha_lock, flags); > > hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, > has); > > - spin_lock_irqsave(&dm_device.ha_lock, flags); > > Apart from the deadlock you mention in the commit message, add_memory > does lock_device_hotplug()/unlock_device_hotplug() which is a mutex. If > I'm not mistaken you now take the mutext under a spinlock > (&dm_device.ha_lock). Not good. Yes, you are right. I missed this. I will try other way. Nice catch. Thanks. > > > > } > > /* > > * If we managed to online any pages that were given to us, > @@ > > -923,6 +925,7 @@ static unsigned long handle_pg_range(unsigned long > pg_start, > > res = has->covered_end_pfn - old_covered_state; > > break; > > } > > + dm_device.lock_thread = NULL; > > spin_unlock_irqrestore(&dm_device.ha_lock, flags); > > > > return res; > > -- > Vitaly