Re: pageless memory & zsmalloc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 05, 2021 at 06:51:32PM +0100, Matthew Wilcox wrote:
> We're trying to tidy up the mess in struct page, and as part of removing
> slab from struct page, zsmalloc came on my radar because it's using some
> of slab's fields.  The eventual endgame is to get struct page down to a
> single word which points to the "memory descriptor" (ie the current
> zspage).
> 
> zsmalloc, like vmalloc, allocates order-0 pages.  Unlike vmalloc,
> zsmalloc allows compaction.  Currently (from the file):
> 
>  * Usage of struct page fields:
>  *      page->private: points to zspage
>  *      page->freelist(index): links together all component pages of a zspage
>  *              For the huge page, this is always 0, so we use this field
>  *              to store handle.
>  *      page->units: first object offset in a subpage of zspage
>  *
>  * Usage of struct page flags:
>  *      PG_private: identifies the first component page
>  *      PG_owner_priv_1: identifies the huge component page
> 
> This isn't quite everything.  For compaction, zsmalloc also uses
> page->mapping (set in __SetPageMovable()), PG_lock (to sync with
> compaction) and page->_refcount (compaction gets a refcount on the page).
> 
> Since zsmalloc is so well-contained, I propose we completely stop
> using struct page in it, as we intend to do for the rest of the users
> of struct page.  That is, the _only_ element of struct page we use is
> compound_head and it points to struct zspage.

Then, do you mean zsmalloc couldn't use page.lru to link tail pages
from head page? IOW, does zspage need to have subpage list or array?

> 
> That means every single page allocated by zsmalloc is PageTail().  Also it
> means that when isolate_movable_page() calls trylock_page(), it redirects
> to the zspage.  That means struct zspage must now have page flags as its
> first element.  Also, zspage->_refcount, and zspage->mapping must match
> their locations in struct page.  That's something that we'll get cleaned
> up eventually, but for now, we're relying on offsetof() assertions.
> 
> The good news is that trylock_zspage() no longer needs to walk the
> list of pages, calling trylock_page() on each of them.

Sounds good if we could remove the mess.

> 
> Anyway, is there a good test suite for zsmalloc()?  Particularly something
> that would exercise its interactions with compaction / migration?
> I don't have any code written yet.

This is a my toy to give a stress on those path. I ran it on KVM with
8 core and 8GB ram.

[attached memhog.c]

#!/bin/bash

swapoff -a

echo 1 > /sys/block/zram0/reset
echo 6g > /sys/block/zram0/disksize
mkswap /dev/zram0
swapon /dev/zram0

for comp_ratio in $(seq 10 10 100)
do
    ./memhog -m 1g -c $comp_ratio &
done

while true :
do
    echo 1 > /sys/block/zram0/compact &
    echo 1 > /proc/sys/vm/compact_memory &
    sleep 2
done
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <sys/mman.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

#define CHUNK_SIZE (20UL<<20)
#ifndef PAGE_SIZE
#define PAGE_SIZE (4096)
#endif

/*
 * For native build, use aarch64-linux-android-clang -pie -o memhog memhog.c
 */
void usage(char *exe)
{
    fprintf(stderr,
            "Usage: %s [options] size[k|m|g]\n"
            "    -c|--compress ratio fill memory with ratio compressible data\n"
            "    -m|--memory SIZE    allocate memory in SIZE byte chunks\n"
            "    -M|--mlock          mlock() the memory\n"
            "    -s|--sleep SEC      sleep SEC seconds during repeat cycle\n"
            "    -r|--repeat N       repeat read/write N times\n"
            "    -h|--help           show this message\n",
            exe);

    exit(1);
}

static const struct option opts[] = {
    { "compress", 1, NULL, 'c' },
    { "memory"  , 1, NULL, 'm' },
    { "mlock"   , 0, NULL, 'M' },
    { "sleep"   , 1, NULL, 's' },
    { "repeat"  , 1, NULL, 'r' },
    { "help"    , 0, NULL, 'h' },
    { NULL      , 0, NULL, 0 }
};

unsigned long long memparse(const char *ptr, char **retptr)
{
    char *endptr;

    unsigned long long ret = strtoull(ptr, &endptr, 0);

    switch (*endptr) {
        case 'G':
        case 'g':
            ret <<= 10;
        case 'M':
        case 'm':
            ret <<= 10;
        case 'K':
        case 'k':
            ret <<= 10;
            endptr++;
        default:
            break;
    }

    if (retptr)
        *retptr = endptr;

    return ret;
}

void allocate_mem(unsigned long long size, void *alloc_ptr[], int *len)
{
    int i;
    void *ptr;
    unsigned long nr_chunk = size / CHUNK_SIZE;
    int allocated = 0;

    for (i = 0; i < nr_chunk; i++) {
        ptr = mmap(NULL, CHUNK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0);
        if (ptr == MAP_FAILED) {
            printf("fail to allocate %d\n", i);
            break;
        }

        alloc_ptr[allocated++] = ptr;
    }

    *len = allocated;
}

void free_mem(void *alloc_ptr[], int len)
{
    int i;

    for (i = 0; i < len; i++)
        munmap(alloc_ptr[i], CHUNK_SIZE);
}

void fill_mem(void *ptr, void *rand_page, long int comp_ratio)
{
    int i;
    static int nr_page = CHUNK_SIZE / PAGE_SIZE;
    int zero_size = PAGE_SIZE * comp_ratio / 100;

    for (i = 0; i < nr_page; i++, ptr += PAGE_SIZE) {
        memset(ptr, 0, zero_size);
        memcpy(ptr + zero_size, rand_page, PAGE_SIZE - zero_size);
    }
}

int fill_chunk(void *alloc_ptr[], int len, long int comp_ratio)
{
    int i, ret;
    char rand_buf[PAGE_SIZE];
    int fd = open("/dev/urandom", O_RDONLY);

    if (fd < 0) {
        perror("Fail to open /dev/urandom\n");
        return 1;
    }

    ret = read(fd, rand_buf, PAGE_SIZE);
    if (ret != PAGE_SIZE) {
        perror("Fail to read /dev/urandom\n");
        return 1;
    }

    for (i = 0; i < len; i++)
        fill_mem(alloc_ptr[i], rand_buf, comp_ratio);

    close(fd);
    return 0;
}

int main(int argc, char *argv[])
{
    char buf[256] = {0,};
    unsigned long long opt_mem = 100 << 20;
    long int opt_sleep = 1;
    long int opt_reps = 10000;
    long int opt_comp_ratio = 30;
    unsigned long opt_mlock = 0;
    long int loops;
    int pid = getpid();
    int err, c, count;

    while ((c = getopt_long(argc, argv,
                    "m:Ms:r:c:", opts, NULL)) != -1) {
        switch (c) {
            case 'c':
                opt_comp_ratio = strtol(optarg, NULL, 10);
                break;
            case 'm':
                opt_mem = memparse(optarg, NULL);
                break;
            case 'M':
                opt_mlock = 1;
                break;
            case 's':
                opt_sleep = strtol(optarg, NULL, 10);
                break;
            case 'r':
                opt_reps = strtol(optarg, NULL, 10);
                break;
            case 'h':
                usage(argv[0]);
                break;
            default:
                usage(argv[0]);
        }
    }

    if (opt_mem < CHUNK_SIZE) {
        printf("memory size should be greater than %lu\n", CHUNK_SIZE);
        return 1;
    }

    /* Disable LMK/OOM killer */
    sprintf(buf, "echo -1000 > /proc/%d/oom_score_adj\n", pid);
    if (WEXITSTATUS(system(buf))) {
        fprintf(stderr, "fail to disable OOM. Maybe you need root permission\n");
        return 1;
    }

    if (opt_mlock) {
        err = mlockall(MCL_CURRENT|MCL_FUTURE|MCL_ONFAULT);
        if (err) {
            perror("Fail to mlockall\n");
            return err;
        }
    }

    printf("%llu MB allocated\n", opt_mem >> 20);
    printf("%lu loop\n", opt_reps);
    printf("%lu sleep\n", opt_sleep);
    printf("%lu comp_ratio\n", opt_comp_ratio);
    count = 0;
    loops = opt_reps;

    while (loops) {
        /* 20M * 4096 = 80G is enouggh */
        void *alloc_ptr[PAGE_SIZE];
        int len;

        count++;
retry:
        allocate_mem(opt_mem, alloc_ptr, &len);
        if (len == 0) {
            /*
             * If we couldn't allocate any memory, let's try again
             * after a while
             */
            sleep(1);
            goto retry;
        }

        if (fill_chunk(alloc_ptr, len, opt_comp_ratio)) {
            printf("Fail to fill chunck\n");
            return 1;
        }

        if (opt_sleep == -1) {
            while (1) {
                printf("Forever sleep, Bye\n");
                sleep(100000);
            }
        }

        sleep(opt_sleep);
        free_mem(alloc_ptr, len);
        if (loops != -1)
            loops--;
        printf("[%d] Pass %d\n", pid, count);
    }

    return 0;
}

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux