On Sun, Jan 19, 2025 at 6:06 PM Jiaqi Yan <jiaqiyan@xxxxxxxxxx> wrote: > > Test based on my understanding of the memory failure recovery behavior > for HugeTLB file system, especially after file is truncated/closed. > > Signed-off-by: Jiaqi Yan <jiaqiyan@xxxxxxxxxx> > --- > tools/testing/selftests/mm/Makefile | 1 + > tools/testing/selftests/mm/hugetlb-mfr-base.c | 240 ++++++++++++++++++ > 2 files changed, 241 insertions(+) > create mode 100644 tools/testing/selftests/mm/hugetlb-mfr-base.c > > diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile > index 63ce39d024bb5..576626c93ccab 100644 > --- a/tools/testing/selftests/mm/Makefile > +++ b/tools/testing/selftests/mm/Makefile > @@ -62,6 +62,7 @@ TEST_GEN_FILES += hmm-tests > TEST_GEN_FILES += hugetlb-madvise > TEST_GEN_FILES += hugetlb-read-hwpoison > TEST_GEN_FILES += hugetlb-soft-offline > +TEST_GEN_FILES += hugetlb-mfr-base > TEST_GEN_FILES += hugepage-mmap > TEST_GEN_FILES += hugepage-mremap > TEST_GEN_FILES += hugepage-shm > diff --git a/tools/testing/selftests/mm/hugetlb-mfr-base.c b/tools/testing/selftests/mm/hugetlb-mfr-base.c > new file mode 100644 > index 0000000000000..b8eee071babe6 > --- /dev/null > +++ b/tools/testing/selftests/mm/hugetlb-mfr-base.c > @@ -0,0 +1,240 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#define _GNU_SOURCE > +#include <errno.h> > +#include <pthread.h> > +#include <signal.h> > +#include <stdbool.h> > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <unistd.h> > + > +#include <linux/magic.h> > +#include <linux/memfd.h> > +#include <sys/mman.h> > +#include <sys/prctl.h> > +#include <sys/statfs.h> > +#include <sys/types.h> > + > +#include "../kselftest.h" > +#include "vm_util.h" > + > +#define EPREFIX " !!! " > +#define BYTE_LENTH_IN_1G 0x40000000 LENGTH, but the macro name itself is also a little weird-sounding > +#define HUGETLB_FILL 0xab > + > +static void *sigbus_addr; > +static int sigbus_addr_lsb; > +static bool expecting_sigbus; > +static bool got_sigbus; > +static bool was_mceerr; > + > +static int create_hugetlbfs_file(struct statfs *file_stat) > +{ > + int fd; > + int flags = MFD_HUGETLB | MFD_HUGE_1GB; > + > + fd = memfd_create("hugetlb_tmp", flags); > + if (fd < 0) > + ksft_exit_fail_perror("Failed to memfd_create"); > + > + memset(file_stat, 0, sizeof(*file_stat)); > + if (fstatfs(fd, file_stat)) { > + close(fd); > + ksft_exit_fail_perror("Failed to fstatfs"); > + } > + if (file_stat->f_type != HUGETLBFS_MAGIC) { > + close(fd); > + ksft_exit_fail_msg("Not hugetlbfs file"); > + } > + > + ksft_print_msg("Created hugetlb_tmp file\n"); > + ksft_print_msg("hugepagesize=%#lx\n", file_stat->f_bsize); > + if (file_stat->f_bsize != BYTE_LENTH_IN_1G) > + ksft_exit_fail_msg("Hugepage size is not 1G"); > + > + return fd; > +} > + > +/* > + * SIGBUS handler for "do_hwpoison" thread that mapped and MADV_HWPOISON > + */ > +static void sigbus_handler(int signo, siginfo_t *info, void *context) > +{ > + if (!expecting_sigbus) > + ksft_exit_fail_msg("unexpected sigbus with addr=%p", > + info->si_addr); > + > + got_sigbus = true; > + was_mceerr = (info->si_code == BUS_MCEERR_AO || > + info->si_code == BUS_MCEERR_AR); > + sigbus_addr = info->si_addr; > + sigbus_addr_lsb = info->si_addr_lsb; > +} > + > +static void *do_hwpoison(void *hwpoison_addr) > +{ > + int hwpoison_size = getpagesize(); > + > + ksft_print_msg("MADV_HWPOISON hwpoison_addr=%p, len=%d\n", > + hwpoison_addr, hwpoison_size); > + if (madvise(hwpoison_addr, hwpoison_size, MADV_HWPOISON) < 0) > + ksft_exit_fail_perror("Failed to MADV_HWPOISON"); > + > + pthread_exit(NULL); > +} > + > +static void test_hwpoison_multiple_pages(unsigned char *start_addr) > +{ > + pthread_t pthread; > + int ret; > + unsigned char *hwpoison_addr; > + unsigned long offsets[] = {0x200000}; > + > + for (size_t i = 0; i < ARRAY_SIZE(offsets); ++i) { > + sigbus_addr = (void *)0xBADBADBAD; > + sigbus_addr_lsb = 0; > + was_mceerr = false; > + got_sigbus = false; > + expecting_sigbus = true; > + hwpoison_addr = start_addr + offsets[i]; > + > + ret = pthread_create(&pthread, NULL, &do_hwpoison, hwpoison_addr); > + if (ret) > + ksft_exit_fail_perror("Failed to create hwpoison thread"); > + > + ksft_print_msg("Created thread to hwpoison and access hwpoison_addr=%p\n", > + hwpoison_addr); > + > + pthread_join(pthread, NULL); > + > + if (!got_sigbus) > + ksft_test_result_fail("Didn't get a SIGBUS\n"); > + if (!was_mceerr) > + ksft_test_result_fail("Didn't get a BUS_MCEERR_A(R|O)\n"); > + if (sigbus_addr != hwpoison_addr) > + ksft_test_result_fail("Incorrect address: got=%p, expected=%p\n", > + sigbus_addr, hwpoison_addr); > + if (sigbus_addr_lsb != 30) > + ksft_test_result_fail("Incorrect address LSB: got=%d, expected=%d\n", > + sigbus_addr_lsb, pshift()); > + > + ksft_print_msg("Received expected and correct SIGBUS\n"); > + } > +} > + > +static int read_nr_hugepages(unsigned long hugepage_size, > + unsigned long *nr_hugepages) > +{ > + char buffer[256] = {0}; > + char cmd[256] = {0}; > + > + sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages", > + hugepage_size); You'll notice this is just reading a file through an external command > + FILE *cmdfile = popen(cmd, "r"); > + > + if (cmdfile == NULL) { > + ksft_perror(EPREFIX "failed to popen nr_hugepages"); > + return -1; > + } > + > + if (!fgets(buffer, sizeof(buffer), cmdfile)) { > + ksft_perror(EPREFIX "failed to read nr_hugepages"); > + pclose(cmdfile); > + return -1; > + } and this is just reading a "file" (pipe) directly, soo.... static int read_nr_hugepages(...) { char path[256]; snprintf(path, 256, /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages", hugepage_size); FILE *file = fopen(path, "r"); fscanf(file, "%lu", nr_hugepages); } (error handling omitted) -- Pedro