Re: [PATCH v2] mm/swap: fix swap_info_struct race between swapoff and get_swap_pages()

Aaron Lu <aaron.lu@xxxxxxxxx> · Thu, 6 Apr 2023 20:12:45 +0800

On Wed, Apr 05, 2023 at 12:08:47AM +0800, Rongwei Wang wrote:
> Hello
> 
> I have fix up some stuff base on Patch v1. And in order to help all readers
> and reviewers to
> 
> reproduce this bug, share a reproducer here:

I reproduced this problem under a VM this way:

$ sudo ./stress-ng --swap 1
// on another terminal
$ for i in `seq 8`; do ./swap & done
Looks simpler than yours :-)
(Didn't realize you have posted your reproducer here since I'm not CCed
and just found it after invented mine)
Then the warning message normally appear within a few seconds.

Here is the code for the above swap prog:

#include <stdio.h>
#include <stddef.h>
#include <sys/mman.h>

#define SIZE 0x100000

int main(void)
{
        int i, ret;
        void *p;

        p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
        if (p == MAP_FAILED) {
                perror("mmap");
                return -1;
        }

        ret = 0;
        while (1) {
                for (i = 0; i < SIZE; i += 0x1000)
                        ((char *)p)[i] = 1;
                ret = madvise(p, SIZE, MADV_PAGEOUT);
                if (ret != 0) {
                        perror("madvise");
                        break;
                }
        }

        return ret;
}

Unfortunately, this test prog did not work on kernels before v5.4 because
MADV_PAGEOUT is introduced in v5.4. I tested on v5.4 and the problem is
also there.

Haven't found a way to trigger swap with swap device come and go on
kernels before v5.4; tried putting the test prog in a memcg with memory
limit but then the prog is easily killed due to nowhere to swap out.

> 
> swap_bomb.sh
> 
> #!/usr/bin/env bash
> 
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v &
> stress-ng -a 1 --class vm -t 12h --metrics --times -x bigheap,stackmmap,mlock,vm-splice,mmapaddr,mmapfixed,mmapfork,mmaphuge,mmapmany,mprotect,mremap,msync,msyncmany,physpage,tmpfs,vm-addr,vm-rw,brk,vm-segv,userfaultfd,malloc,stack,munmap,dev-shm,bad-altstack,shm-sysv,pageswap,madvise,vm,shm,env,mmap
> --verify -v
> 
> 
> madvise_shared.c
> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <sys/mman.h>
> #include <unistd.h>
> 
> #define MSIZE (1024 * 1024 * 2)
> 
> int main()
> {
>         char *shm_addr;
>         unsigned long i;
> 
>         while (1) {
>                 // Map shared memory segment
>                 shm_addr =
>                     mmap(NULL, MSIZE, PROT_READ | PROT_WRITE,
>                          MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>                 if (shm_addr == MAP_FAILED) {
>                         perror("Failed to map shared memory segment");
>                         exit(EXIT_FAILURE);
>                 }
> 
>                 for (i = 0; i < MSIZE; i++) {
>                         shm_addr[i] = 1;
>                 }
> 
>                 // Advise kernel on usage pattern of shared memory
>                 if (madvise(shm_addr, MSIZE, MADV_PAGEOUT) == -1) {
>                         perror
>                             ("Failed to advise kernel on shared memory
> usage");
>                         exit(EXIT_FAILURE);
>                 }
> 
>                 for (i = 0; i < MSIZE; i++) {
>                         shm_addr[i] = 1;
>                 }
> 
>                 // Advise kernel on usage pattern of shared memory
>                 if (madvise(shm_addr, MSIZE, MADV_PAGEOUT) == -1) {
>                         perror
>                             ("Failed to advise kernel on shared memory
> usage");
>                         exit(EXIT_FAILURE);
>                 }
>                 // Use shared memory
>                 printf("Hello, shared memory: 0x%lx\n", shm_addr);
> 
>                 // Unmap shared memory segment
>                 if (munmap(shm_addr, MSIZE) == -1) {
>                         perror("Failed to unmap shared memory segment");
>                         exit(EXIT_FAILURE);
>                 }
>         }
> 
>         return 0;
> }
> 
> The bug will reproduce more quickly (about 2~5 minutes) if concurrent more
> swap_bomb.sh and madvise_shared.
> 
> Thanks.