[PATCH RFC v1] mm: is_mem_section_removable() overhaul

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Let's refactor that code. We want to check if we can offline memory
blocks. Add a new function is_mem_section_offlineable() for that and
make it call is_mem_section_offlineable() for each contained section.
Within is_mem_section_offlineable(), add some more sanity checks and
directly bail out if the section contains holes or if it spans multiple
zones.

The old code was inherently racy with concurrent offlining/memory
unplug. Let's avoid that and grab the device_hotplug_lock. Luckily
we are already holding it when calling from powerpc code.

Note1: If somebody wants to export this function for use in driver code, we
need a variant that takes the device_hotplug_lock()

Note2: If we could have a zombie device (not clear yet), the present
section checks would properly bail out early.

Note3: I'd prefer the mem_hotplug_lock in read, but as we are about to
change the locking on the removal path (IOW, don't hold it when removing
memory block devices), I do not want to go down that path.

Note4: For now we would have returned "removable" although we would
block offlining due to memory holes, multiple zones, or missing
sections.

Tested with DIMMs on x86-64. Compile-tested on Power.

Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: "Rafael J. Wysocki" <rafael@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Leonardo Bras <leonardo@xxxxxxxxxxxxx>
Cc: Nathan Lynch <nathanl@xxxxxxxxxxxxx>
Cc: Allison Randal <allison@xxxxxxxxxxx>
Cc: Nathan Fontenot <nfont@xxxxxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx>
Cc: Anshuman Khandual <anshuman.khandual@xxxxxxx>
Cc: lantianyu1986@xxxxxxxxx
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 .../platforms/pseries/hotplug-memory.c        | 24 ++-----
 drivers/base/memory.c                         | 37 ++++++----
 include/linux/memory.h                        |  1 +
 include/linux/memory_hotplug.h                |  5 +-
 mm/memory_hotplug.c                           | 68 +++++++++----------
 5 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c126b94d1943..8d80159465e4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -337,34 +337,24 @@ static int pseries_remove_mem_node(struct device_node *np)
 
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
-	int i, scns_per_block;
-	bool rc = true;
-	unsigned long pfn, block_sz;
-	u64 phys_addr;
+	struct memory_block *mem;
+	bool rc = false;
 
 	if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
 		return false;
 
-	block_sz = memory_block_size_bytes();
-	scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-	phys_addr = lmb->base_addr;
-
 #ifdef CONFIG_FA_DUMP
 	/*
 	 * Don't hot-remove memory that falls in fadump boot memory area
 	 * and memory that is reserved for capturing old kernel memory.
 	 */
-	if (is_fadump_memory_area(phys_addr, block_sz))
+	if (is_fadump_memory_area(lmb->base_addr,  memory_block_size_bytes()))
 		return false;
 #endif
-
-	for (i = 0; i < scns_per_block; i++) {
-		pfn = PFN_DOWN(phys_addr);
-		if (!pfn_present(pfn))
-			continue;
-
-		rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
-		phys_addr += MIN_MEMORY_BLOCK_SIZE;
+	mem = lmb_to_memblock(lmb);
+	if (mem) {
+		rc = is_memory_block_offlineable(mem);
+		put_device(&mem->dev);
 	}
 
 	return rc;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c6d288fad493..f744250c34d0 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -104,6 +104,25 @@ static ssize_t phys_index_show(struct device *dev,
 	return sprintf(buf, "%08lx\n", phys_index);
 }
 
+/*
+ * Test if a memory block is likely to be offlineable. Returns true if
+ * the block is already offline.
+ *
+ * Called under device_hotplug_lock.
+ */
+bool is_memory_block_offlineable(struct memory_block *mem)
+{
+	int i;
+
+	if (mem->state != MEM_ONLINE)
+		return true;
+
+	for (i = 0; i < sections_per_block; i++)
+		if (!is_mem_section_offlineable(mem->start_section_nr + i))
+			return false;
+	return true;
+}
+
 /*
  * Show whether the memory block is likely to be offlineable (or is already
  * offline). Once offline, the memory block could be removed. The return
@@ -114,20 +133,14 @@ static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
 			      char *buf)
 {
 	struct memory_block *mem = to_memory_block(dev);
-	unsigned long pfn;
-	int ret = 1, i;
-
-	if (mem->state != MEM_ONLINE)
-		goto out;
+	int ret;
 
-	for (i = 0; i < sections_per_block; i++) {
-		if (!present_section_nr(mem->start_section_nr + i))
-			continue;
-		pfn = section_nr_to_pfn(mem->start_section_nr + i);
-		ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
-	}
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+	ret = is_memory_block_offlineable(mem);
+	unlock_device_hotplug();
 
-out:
 	return sprintf(buf, "%d\n", ret);
 }
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0b8d791b6669..faf03eb64ecc 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -91,6 +91,7 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
 extern int walk_memory_blocks(unsigned long start, unsigned long size,
 			      void *arg, walk_memory_blocks_func_t func);
 extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
+extern bool is_memory_block_offlineable(struct memory_block *mem);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index f4d59155f3d4..8d087772f748 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -306,15 +306,14 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
-extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+extern bool is_mem_section_offlineable(unsigned long nr);
 extern void try_offline_node(int nid);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern int remove_memory(int nid, u64 start, u64 size);
 extern void __remove_memory(int nid, u64 start, u64 size);
 
 #else
-static inline bool is_mem_section_removable(unsigned long pfn,
-					unsigned long nr_pages)
+static inline bool is_mem_section_offlineable(unsigned long nr)
 {
 	return false;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7a6de9b0dcab..a6d14d2b7f0c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1128,46 +1128,51 @@ static unsigned long next_active_pageblock(unsigned long pfn)
 	return pfn + pageblock_nr_pages;
 }
 
-static bool is_pageblock_removable_nolock(unsigned long pfn)
+static int count_system_ram_pages_cb(unsigned long start_pfn,
+				     unsigned long nr_pages, void *data)
 {
-	struct page *page = pfn_to_page(pfn);
+	unsigned long *nr_system_ram_pages = data;
+
+	*nr_system_ram_pages += nr_pages;
+	return 0;
+}
+
+/*
+ * Check if a section is likely to be offlineable.
+ *
+ * Called with device_hotplug_lock.
+ */
+bool is_mem_section_offlineable(unsigned long nr)
+{
+	const unsigned long start_pfn = section_nr_to_pfn(nr);
+	const unsigned long end_pfn = start_pfn + PAGES_PER_SECTION;
+	unsigned long pfn, nr_pages = 0;
 	struct zone *zone;
 
-	/*
-	 * We have to be careful here because we are iterating over memory
-	 * sections which are not zone aware so we might end up outside of
-	 * the zone but still within the section.
-	 * We have to take care about the node as well. If the node is offline
-	 * its NODE_DATA will be NULL - see page_zone.
-	 */
-	if (!node_online(page_to_nid(page)))
+	if (!present_section_nr(nr))
 		return false;
-
-	zone = page_zone(page);
-	pfn = page_to_pfn(page);
-	if (!zone_spans_pfn(zone, pfn))
+	if (!online_section_nr(nr))
 		return false;
 
-	return !has_unmovable_pages(zone, page, MIGRATE_MOVABLE,
-				    MEMORY_OFFLINE);
-}
-
-/* Checks if this range of memory is likely to be hot-removable. */
-bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
-{
-	unsigned long end_pfn, pfn;
+	/* we don't allow to offline sections with holes */
+	walk_system_ram_range(start_pfn, PAGES_PER_SECTION, &nr_pages,
+			      count_system_ram_pages_cb);
+	if (nr_pages != PAGES_PER_SECTION)
+		return false;
 
-	end_pfn = min(start_pfn + nr_pages,
-			zone_end_pfn(page_zone(pfn_to_page(start_pfn))));
+	/* we don't allow to offline sections with mixed zones/nodes */
+	zone = test_pages_in_a_zone(start_pfn, end_pfn);
+	if (!zone)
+		return false;
 
-	/* Check the starting page of each pageblock within the range */
+	/* check each pageblock if it contains unmovable pages */
 	for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) {
-		if (!is_pageblock_removable_nolock(pfn))
+		if (has_unmovable_pages(zone, pfn_to_page(pfn), MIGRATE_MOVABLE,
+					MEMORY_OFFLINE))
 			return false;
 		cond_resched();
 	}
 
-	/* All pageblocks in the memory block are likely to be hot-removable */
 	return true;
 }
 
@@ -1436,15 +1441,6 @@ static void node_states_clear_node(int node, struct memory_notify *arg)
 		node_clear_state(node, N_MEMORY);
 }
 
-static int count_system_ram_pages_cb(unsigned long start_pfn,
-				     unsigned long nr_pages, void *data)
-{
-	unsigned long *nr_system_ram_pages = data;
-
-	*nr_system_ram_pages += nr_pages;
-	return 0;
-}
-
 static int __ref __offline_pages(unsigned long start_pfn,
 		  unsigned long end_pfn)
 {
-- 
2.24.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux