My driver allocates more than 30MB pages via alloc_page() at a time and maps them at virtual address. Totally it uses 300~400MB pages. If I run a heavy load test for a day, I cannot allocate even order=3 pages because-of the external fragmentation. I thought I needed a anti-fragmentation solution for my driver. So I looked into the compaction code but there is no allocation function. This patch gets a buddy and a pageblock in which the buddy exists. And it allocates free pages in the pageblock. So I guess it can allocate pages with less fragmentation. I've tested this patch for 48-hours and not found problem. I didn't check the amount of the external fragmentation yet because it will take several days. I'll start it ASAP. I just wonder that anybody has tried the page allocation like this. Am I going in right direction? I'll report a result after long-time test. This patch is based on 3.16. Signed-off-by: Gioh Kim <gioh.kim@xxxxxxx> --- mm/compaction.c | 59 +++++++++++++++++++++++++++++ mm/page_alloc.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) diff --git a/mm/compaction.c b/mm/compaction.c index 21bf292..7775bc6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include <linux/sysfs.h> #include <linux/balloon_compaction.h> #include <linux/page-isolation.h> +#include <linux/cpuset.h> #include "internal.h" #ifdef CONFIG_COMPACTION @@ -1289,3 +1290,61 @@ void compaction_unregister_node(struct node *node) #endif /* CONFIG_SYSFS && CONFIG_NUMA */ #endif /* CONFIG_COMPACTION */ + +unsigned long isolate_unmovable_freepages_block(unsigned long blockpfn, + unsigned long end_pfn, + int count, + struct list_head *freelist) +{ + int total_isolated = 0; + struct page *cursor, *valid_page = NULL; + unsigned long flags; + bool locked = false; + + cursor = pfn_to_page(blockpfn); + + /* Isolate free pages in a pageblock. */ + for (; blockpfn < end_pfn; blockpfn++, cursor++) { + int isolated, i; + struct page *page = cursor; + + if (!pfn_valid_within(blockpfn)) + continue; + if (!valid_page) + valid_page = page; + if (!PageBuddy(page)) + continue; + + /* Recheck this is a buddy page under lock */ + if (!PageBuddy(page)) + continue; + + /* DO NOT TOUCH CONTIGOUS PAGES */ + if (page_order(page) >= pageblock_order/2) { + blockpfn += (1 << page_order(page)) - 1; + cursor += (1 << page_order(page)) - 1; + continue; + } + + /* Found a free page, break it into order-0 pages */ + isolated = split_free_page(page); + + total_isolated += isolated; + for (i = 0; i < isolated; i++) { + list_add(&page->lru, freelist); + page++; + } + + if (total_isolated >= count) + break; + + /* If a page was split, advance to the end of it */ + if (isolated) { + blockpfn += isolated - 1; + cursor += isolated - 1; + continue; + } + } + + return total_isolated; +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 86c9a72..c782191 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6646,3 +6646,115 @@ void dump_page(struct page *page, const char *reason) dump_page_badflags(page, reason, 0); } EXPORT_SYMBOL(dump_page); + +unsigned long isolate_unmovable_freepages_block(struct compact_control *cc, + unsigned long blockpfn, + unsigned long end_pfn, + int count, + struct list_head *freelist); + +int rmqueue_compact(struct zone *zone, unsigned int order, + int migratetype, struct list_head *freepages) +{ + unsigned int current_order; + struct free_area *area; + struct page *page; + unsigned long block_start_pfn; /* start of current pageblock */ + unsigned long block_end_pfn; /* end of current pageblock */ + int total_isolated = 0; + unsigned long flags; + struct page *next; + int remain = 0; + int request = 1 << order; + + spin_lock_irqsave(&zone->lock, flags); + + current_order = 0; + page = NULL; + while (current_order <= pageblock_order) { + int isolated; + + area = &(zone->free_area[current_order]); + + if (list_empty(&area->free_list[migratetype])) { + current_order++; + continue; + } + + page = list_entry(area->free_list[migratetype].next, + struct page, lru); + + /* check migratetype of pageblock again, + some pages can be set as different migratetype + by rmqueue_fallback */ + if (get_pageblock_migratetype(page) != migratetype) + continue; + + block_start_pfn = page_to_pfn(page) & ~(pageblock_nr_pages - 1); + block_end_pfn = min(block_start_pfn + pageblock_nr_pages, + zone_end_pfn(zone)); + + isolated = isolate_unmovable_freepages_block(NULL, + block_start_pfn, + block_end_pfn, + request, + freepages); + + total_isolated += isolated; + request -= isolated; + + /* A buddy block is found but it is too big + or the buddy block has no valid page. + Anyway something wrong happened. + Try next order. + */ + if (isolated == 0) + current_order++; + + if (request <= 0) + break; + } + __mod_zone_page_state(zone, NR_ALLOC_BATCH, -total_isolated); + __count_zone_vm_events(PGALLOC, zone, total_isolated); + + spin_unlock_irqrestore(&zone->lock, flags); + + list_for_each_entry_safe(page, next, freepages, lru) { + if (remain >= (1 << order)) { + list_del(&page->lru); + /* do not free pages into hot-cold list, + but buddy list */ + atomic_dec(&page->_count); + __free_pages_ok(page, 0); + } + remain++; + } + + list_for_each_entry(page, freepages, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); + } + + return total_isolated < (1 << order) ? total_isolated : (1 << order); +} + +int alloc_pages_compact(gfp_t gfp_mask, unsigned int order, + struct list_head *freepages) +{ + enum zone_type high_zoneidx = gfp_zone(gfp_mask); + struct zone *preferred_zone; + struct zoneref *preferred_zoneref; + + + preferred_zoneref = first_zones_zonelist(node_zonelist(numa_node_id(), + gfp_mask), + high_zoneidx, + &cpuset_current_mems_allowed, + &preferred_zone); + if (!preferred_zone) + return 0; + + return rmqueue_compact(preferred_zone, order, + allocflags_to_migratetype(gfp_mask), freepages); +} +EXPORT_SYMBOL(alloc_pages_compact); -- 1.7.9.5 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>