Re: [PATCH v6] mm/filemap: remove hugetlb special casing in filemap.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 09/04/23 21:05, Sidhartha Kumar wrote:
> On 8/21/23 11:33 AM, Mike Kravetz wrote:
> > On 08/17/23 11:18, Sidhartha Kumar wrote:
> > > Remove special cased hugetlb handling code within the page cache by
> > > changing the granularity of each index to the base page size rather than
> > > the huge page size. Adds new wrappers for hugetlb code to to interact with the
> > > page cache which convert to a linear index.
> > <snip>
> > > @@ -237,7 +234,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
> > >   	if (free_folio)
> > >   		free_folio(folio);
> > > -	if (folio_test_large(folio) && !folio_test_hugetlb(folio))
> > > +	if (folio_test_large(folio))
> > >   		refs = folio_nr_pages(folio);
> > >   	folio_put_refs(folio, refs);
> > >   }
> > > @@ -858,14 +855,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
> > >   	if (!huge) {
> > >   		int error = mem_cgroup_charge(folio, NULL, gfp);
> > > -		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
> > >   		if (error)
> > >   			return error;
> > >   		charged = true;
> > > -		xas_set_order(&xas, index, folio_order(folio));
> > > -		nr = folio_nr_pages(folio);
> > >   	}
> > 
> > When a hugetlb page is added to the page cache, the ref count will now
> > be increased by folio_nr_pages.  So, the ref count for a 2MB hugetlb page
> > on x86 will be increased by 512.
> > 
> > We will need a corresponding change to migrate_huge_page_move_mapping().
> > For migration, the ref count is checked as follows:
> > 
> > 	xas_lock_irq(&xas);
> > 	expected_count = 2 + folio_has_private(src);
> Hi Mike,
> 
> Thanks for catching this. Changing this line to:
> +	expected_count = folio_expected_refs(mapping, src);
> seems to fix migration from my testing. My test was inserting a sleep() in
> the hugepage-mmap.c selftest and running the migratepages command.
> 
> With this version of the patch:
> migrate_pages(44906, 65, [0x0000000000000001], [0x0000000000000002]) = 75
> which means 75 pages did not migrate and after the change to
> folio_expected_refs():
> migrate_pages(7344, 65, [0x0000000000000001], [0x0000000000000002]) = 0
> 
> Does that change look correct to you?

I just ran the simple attached test program (don't laugh) on the suggested
change.  Command line './move-pages 2 /var/opt/oracle/hugepool/foo'.
Unfortunately, migration is not working as expected.  The source pages of
the migration are not freed.

I have not taken a closer look at the code to get an idea about root cause.
Certainly, it has to do with the ref counts.  I can look closer in a day or
two if you have not resolved the issue.
-- 
Mike Kravetz
/*
 * hugepage-mmap:
 *
 * Example of using huge page memory in a user application using the mmap
 * system call.  Before running this application, make sure that the
 * administrator has mounted the hugetlbfs filesystem (on some directory
 * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
 * example, the app is requesting memory of size 256MB that is backed by
 * huge pages.
 *
 * For the ia64 architecture, the Linux kernel reserves Region number 4 for
 * huge pages.  That means that if one requires a fixed address, a huge page
 * aligned address starting with 0x800000... will be required.  If a fixed
 * address is not required, the kernel will select an address in the proper
 * range.
 * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
 */

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#define __USE_GNU
#include <fcntl.h>
#include <errno.h>
#include <sys/types.h>
#include <time.h>
#include <numa.h>
#include <numaif.h>

#define USAGE "USAGE: %s num_hpages hugepagefile_name"
#define H_PAGESIZE (2 * 1024 * 1024)
#define B_PAGESIZE (4096)

#define ITERATIONS 100000

#define PROTECTION (PROT_READ | PROT_WRITE)
#define ADDR (void *)(0x0UL)
#define FLAGS (MAP_SHARED)

int main(int argc, char ** argv)
{
	char *f_name;
	char *sep;
	char ch;
	int fd;
	long i;
	long long hpages, bpages;
	void *addr;
	char foo;
	long count = 0;
	void **pages;
	int *nodes;
	int *status;
	int flags;
	long m_ret;
	/*
	 * HARD CODED FOR TWO NODES: 0 and 1
	 */
	unsigned long node0_mask = 01L << 0;
	unsigned long node1_mask = 01L << 1;

	if (argc != 3) {
		printf(USAGE, argv[0]);
		exit (1);
	}

	hpages = strtol(argv[1], &sep, 0);
	if (errno || hpages < 0) {
		printf("Invalid number hpages (%s)\n", argv[1]);
		printf(USAGE, argv[0]);
		exit (1);
	}
	bpages = hpages * (H_PAGESIZE / B_PAGESIZE);

	f_name = argv[2];
	fd = open(f_name, O_CREAT | O_RDWR, 0755);
	if (fd < 0) {
		printf("Open of %s failed", argv[2]);
		exit(1);
	}

	addr = mmap(ADDR, hpages * H_PAGESIZE, PROTECTION, FLAGS, fd, 0);
	if (addr == MAP_FAILED) {
		perror("mmap");
		exit (1);
	}
	printf("%ld huge pages mapped at 0x%lx\n", hpages,
		( unsigned long)addr);
	printf("Faulting in all pages\n");
	for (i=0; i < hpages; i++)
		foo = *((char *)(addr + (i * H_PAGESIZE)));

	pages = malloc(bpages * sizeof(void *));
	nodes = malloc(bpages * sizeof(int));
	status = malloc(bpages * sizeof(int));
	if (!pages || !nodes || !status) {
		printf("error allocating memory for arrays\n");
		exit (1);
	}

while (1) {
	printf("Hit any key to move hugetlb pages to node 1\n");
	read(STDIN_FILENO, &ch, 1);

	for (i=0; i < hpages; i++) {
		pages[i] = addr + (i * H_PAGESIZE);
		// pages[i] = addr + (i * H_PAGESIZE) + B_PAGESIZE;
		nodes[i] = 1;
		status[i] = -1;
		flags = MPOL_MF_MOVE_ALL;
	}
	m_ret = numa_move_pages(0, hpages, pages, nodes, status, flags);
	if (m_ret) {
		perror("move_pages");
		if (m_ret > 0)
			printf("%ld pages not migrated\n", m_ret);
	} else {
		printf("Success!\n");
	}
	for (i=0; i < hpages; i++) {
		printf("\tstatus[%d] = %d\n", i, status[i]);
		status[i] = -1;
	}

	printf("Hit any key to move hugetlb pages to node 0\n");
	read(STDIN_FILENO, &ch, 1);
	for (i=0; i < hpages; i++) {
		pages[i] = addr + (i * H_PAGESIZE);
		// pages[i] = addr + (i * H_PAGESIZE) + B_PAGESIZE;
		nodes[i] = 0;
		status[i] = -1;
		flags = MPOL_MF_MOVE_ALL;
	}
	m_ret = numa_move_pages(0, hpages, pages, nodes, status, flags);
	if (m_ret) {
		perror("move_pages");
		if (m_ret > 0)
			printf("%ld pages not migrated\n", m_ret);
	} else {
		printf("Success!\n");
	}
	for (i=0; i < hpages; i++) {
		printf("\tstatus[%d] = %d\n", i, status[i]);
		status[i] = -1;
	}
}

	munmap(addr, hpages * H_PAGESIZE);
	close(fd);

	return 0;
}

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux