Re: [PATCH] memcg: charge io_uring related objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



+ Jens and Andrew

On Sat, Jul 17, 2021 at 3:58 AM Yutian Yang <nglaive@xxxxxxxxx> wrote:
>
> This patch adds accounting flags to allocations of io_uring related
> objects. Allocations of the objects are all triggerable by
> io_uring_setup() syscall from userspace.
>
> We have written a PoC to show that the missing-charging objects lead to
> breaking memcg limits. The PoC program takes around 835MB unaccounted
> memory, while it is charged for only 23MB memory usage. We evaluate the
> PoC on QEMU x86_64 v5.2.90 + Linux kernel v5.10.19 + Debian buster. All
> the limitations including ulimits and sysctl variables are set as default.
> Specifically, the MEMLOCK in prlimit is set as 65536KB and hard limit of
> NOFILE is set as 1,048,576.
>
> The PoC is written as a testcase under Linux LTP. To compile it, put the
> source to ltp_dir/testcases/kernel/syscalls/io_uring/ and make.
>
> /*------------------------- POC code ----------------------------*/
>
> #include <stdlib.h>
> #include <stdio.h>
> #include <errno.h>
> #include <string.h>
> #include <fcntl.h>
> #include "config.h"
> #include "tst_test.h"
> #include "lapi/io_uring.h"
> #include "lapi/namespaces_constants.h"
>
> #define TEST_FILE "test_file"
>
> #define QUEUE_DEPTH 1
> #define BLOCK_SZ    1024
>
> #define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE);  \
>                         } while (0)
>
> #define STACK_SIZE (16 * 1024)
>
> static char thread_stack[512][STACK_SIZE];
>
> int thread_fn(void* arg)
> {
>   struct io_uring_params p;
>   memset(&p, 0 ,sizeof(p));
>   for (int i = 0; i< 10000 ; ++i) {
>     int ringfd = io_uring_setup(QUEUE_DEPTH, &p);
>     if (ringfd == -1) {
>       errExit("io_uring_setup");
>     }
>   }
>   while(1);
>   return 0;
> }
>
> static void run(unsigned int n) {
>   int thread_pid;
>   for (int i = 0; i < 1; ++i) {
>     thread_pid = ltp_clone(SIGCHLD, thread_fn, NULL, STACK_SIZE, \
>       thread_stack[i]);
>   }
>   while(1);
> }
>
> static struct tst_test test = {
>   .test = run,
>   .tcnt = 1,
>   .timeout = -1,
> };
>
> /*-------------------------- end --------------------------------*/
>
>
> Signed-off-by: Yutian Yang <nglaive@xxxxxxxxx>
> ---
>  fs/io-wq.c    |  6 +++---
>  fs/io_uring.c | 10 +++++-----
>  2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index f72d53848..ab31d01cc 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -1086,11 +1086,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
>         if (WARN_ON_ONCE(!data->free_work || !data->do_work))
>                 return ERR_PTR(-EINVAL);
>
> -       wq = kzalloc(sizeof(*wq), GFP_KERNEL);
> +       wq = kzalloc(sizeof(*wq), GFP_KERNEL_ACCOUNT);
>         if (!wq)
>                 return ERR_PTR(-ENOMEM);
>
> -       wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
> +       wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL_ACCOUNT);
>         if (!wq->wqes)
>                 goto err_wq;
>
> @@ -1111,7 +1111,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
>
>                 if (!node_online(alloc_node))
>                         alloc_node = NUMA_NO_NODE;
> -               wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
> +               wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL_ACCOUNT, alloc_node);
>                 if (!wqe)
>                         goto err;
>                 wq->wqes[node] = wqe;
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index d0b7332ca..175fd5b0e 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1177,7 +1177,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>         struct io_ring_ctx *ctx;
>         int hash_bits;
>
> -       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
>         if (!ctx)
>                 return NULL;
>
> @@ -1195,7 +1195,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
>                 hash_bits = 1;
>         ctx->cancel_hash_bits = hash_bits;
>         ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head),
> -                                       GFP_KERNEL);
> +                                       GFP_KERNEL_ACCOUNT);
>         if (!ctx->cancel_hash)
>                 goto err;
>         __hash_init(ctx->cancel_hash, 1U << hash_bits);
> @@ -7850,7 +7850,7 @@ static int io_uring_alloc_task_context(struct task_struct *task)
>         struct io_uring_task *tctx;
>         int ret;
>
> -       tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
> +       tctx = kmalloc(sizeof(*tctx), GFP_KERNEL_ACCOUNT);
>         if (unlikely(!tctx))
>                 return -ENOMEM;

What about percpu_counter_init() in this function and io_wq_hash in
io_init_wq_offload()?

>
> @@ -8038,7 +8038,7 @@ static void io_mem_free(void *ptr)
>
>  static void *io_mem_alloc(size_t size)
>  {
> -       gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
> +       gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
>                                 __GFP_NORETRY;
>
>         return (void *) __get_free_pages(gfp_flags, get_order(size));
> @@ -9874,7 +9874,7 @@ static int __init io_uring_init(void)
>
>         BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
>         BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
> -       req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
> +       req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
>         return 0;
>  };
>  __initcall(io_uring_init);
> --
> 2.25.1
>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux