#regzbot ^introduced 10befea91b61c4e2c2d1df06a2e978d182fcf792 We are making heavy use of mqueues, and noticed a degradation of performance between 4.18 & 5.10 linux kernels. After a gross per-version tracing, we did kernel bisection between 5.8 and 5.9 and traced the issue to a 10 patches (of which 9 where skipped as they didn't boot) between: commit 10befea91b61c4e2c2d1df06a2e978d182fcf792 (HEAD, refs/bisect/bad) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:27 2020 -0700 mm: memcg/slab: use a single set of kmem_caches for all allocations and: commit 286e04b8ed7a04279ae277f0f024430246ea5eec (refs/bisect/good-286e04b8ed7a04279ae277f0f024430246ea5eec) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:20:52 2020 -0700 mm: memcg/slab: allocate obj_cgroups for non-root slab pages All of them are part of the "The new cgroup slab memory controller" patchset: https://lore.kernel.org/all/20200623174037.3951353-18-guro@xxxxxx/T/ from Roman Gushchin, which moves the accounting for page level to the object level. Measurements where done using the a test programmtest, which measures mix/average/max time mqueue_send/mqueue_rcv, and average for getppid, both measured over 100 000 runs. Results are shown in the following table +----------+--------------------------+-------------------------+----------------+ | kernel | mqueue_rcv (ns) | mqueue_send (ns) | getppid | | version | min avg max variation | min avg max variation | (ns) variation | +----------+--------------------------+-------------------------+----------------+ | 4.18.45 | 351 382 17533 base | 383 410 13178 base | 149 base | | 5.8-good | 380 392 7156 -2,55% | 376 384 6225 6,77% | 169 -11,83% | | 5.8-bad | 524 530 5310 -27,92% | 512 519 8775 -21,00% | 169 -11,83% | | 5.10 | 520 533 4078 -28,33% | 518 534 8108 -23,22% | 167 -10,78% | | 5.15 | 431 444 8440 -13,96% | 425 437 6170 -6,18% | 171 -12,87% | | 6.03 | 474 614 3881 -37,79% | 482 693 931 -40,84% | 171 -12,87% | +----------+--------------------------+-------------------------+----------------- Below you will find the following appendices: - raw GOOD bisect results - raw BAD bisect results - test code source code - bisect results Friendly, Sven Luther GOOD bisect results: ==================== # uname -a Linux axxiax86-64 5.8.0-5-286e04b8ed7a04279ae277f0f024430246ea5eec-good #1 SMP PREEMPT Fri Jul 29 11:01:19 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux # ./mqtest RX: 380/392/7156 TX: 376/384/6225 Accumulated time for 100000 getppid, 16924653ns Average time per getppid: 169ns # ./mqtest RX: 383/397/6300 TX: 377/385/8258 Accumulated time for 100000 getppid, 16915580ns Average time per getppid: 169ns # ./mqtest RX: 385/397/7690 TX: 377/385/4244 Accumulated time for 100000 getppid, 16922910ns Average time per getppid: 169ns BAD bisect results: =================== # uname -a Linux axxiax86-64 5.8.0-5-10befea91b61c4e2c2d1df06a2e978d182fcf792-bad #1 SMP PREEMPT Fri Jul 29 11:01:19 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux # ./mqtest RX: 524/530/5310 TX: 512/519/8775 Accumulated time for 100000 getppid, 16921865ns Average time per getppid: 169ns # ./mqtest RX: 520/525/8204 TX: 514/522/7420 Accumulated time for 100000 getppid, 16931493ns Average time per getppid: 169ns # ./mqtest RX: 520/525/8128 TX: 512/519/5340 Accumulated time for 100000 getppid, 16921535ns Average time per getppid: 169ns Test programm: ============== #include <stdio.h> #include <string.h> #include <stdlib.h> #include <stdint.h> #include <sys/types.h> #include <unistd.h> #include <errno.h> #include <time.h> #include <mqueue.h> struct sysc_meas { int64_t min; int64_t max; int64_t sum; int64_t count; }; static inline uint64_t __cputime_get(void) { unsigned long l, h; asm volatile("rdtsc" : "=a" (l), "=d" (h)); return ((uint64_t) h << 32) | l; } static inline int64_t __cputime_ns(uint64_t cputime) { /* Note, it is assumed thatTSC clock is 2.2G */ return (cputime * 5) / 11; } static inline uint64_t __cputime_get_ns() { return __cputime_ns(__cputime_get()); } static void sysc_meas_init(struct sysc_meas *sm) { sm->min = INT64_MAX; sm->max = INT64_MIN; sm->sum = sm->count = 0; } static void sysc_meas_add(struct sysc_meas *sm, int64_t value) { if (value > sm->max) sm->max = value; if (value < sm->min) sm->min = value; sm->sum += value; sm->count++; } static void sysc_mq(uint32_t count) { int fd; struct mq_attr mq_attr; uint64_t ts; struct sysc_meas sm_tx, sm_rx; char dummy[256]; if (!count) return; mq_unlink("/sysc_mq"); memset(&mq_attr, 0, sizeof(mq_attr)); mq_attr.mq_maxmsg = 10; mq_attr.mq_msgsize = sizeof(dummy); fd = mq_open("/sysc_mq", O_RDWR | O_NONBLOCK | O_CREAT, 0666, &mq_attr); if (fd < 0) { printf("ERROR: unable to create MQ, %d\n", -errno); exit(-1); } sysc_meas_init(&sm_rx); sysc_meas_init(&sm_tx); dummy[0] = 0; dummy[sizeof(dummy) - 1] = 0; while (count--) { ts = __cputime_get(); mq_send(fd, dummy, sizeof(dummy), 0); sysc_meas_add(&sm_tx, __cputime_get() - ts); ts = __cputime_get(); mq_receive(fd, dummy, sizeof(dummy), NULL); sysc_meas_add(&sm_rx, __cputime_get() - ts); } mq_close(fd); mq_unlink("/sysc_mq"); printf("RX: %ld/%ld/%ld\n", __cputime_ns(sm_rx.min), __cputime_ns(sm_rx.sum / sm_rx.count), __cputime_ns(sm_rx.max)); printf("TX: %ld/%ld/%ld\n", __cputime_ns(sm_tx.min), __cputime_ns(sm_tx.sum / sm_tx.count), __cputime_ns(sm_tx.max)); } int main(int argc, char *argv[]) { uint32_t loops = 100000, cnt; uint64_t ts_a, ts_b, acc = 0; if (argc > 1) loops = strtoul(argv[1], (char **)NULL, 0); cnt = loops; sysc_mq(loops); while (loops--) { ts_a = __cputime_get(); (void)getppid(); ts_b = __cputime_get(); acc += (ts_b - ts_a); } printf("Accumulated time for %d getppid, %luns\n", cnt, __cputime_ns(acc)); printf("Average time per getppid: %luns\n", (__cputime_ns(acc)/cnt)); exit (0); } Complete bisect result: ======================= commit 10befea91b61c4e2c2d1df06a2e978d182fcf792 (HEAD, refs/bisect/bad) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:27 2020 -0700 mm: memcg/slab: use a single set of kmem_caches for all allocations commit 15999eef7f25e2ea6a1c33f026166f472c5714e9 (refs/bisect/skip-15999eef7f25e2ea6a1c33f026166f472c5714e9) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:24 2020 -0700 mm: memcg/slab: remove redundant check in memcg_accumulate_slabinfo() commit c7094406fcb7cdf4fe1de8893f0613b75349773d (refs/bisect/skip-c7094406fcb7cdf4fe1de8893f0613b75349773d) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:20 2020 -0700 mm: memcg/slab: deprecate slab_root_caches commit 272911a4ad18c48f8bc449a5db945a54987dd687 (refs/bisect/skip-272911a4ad18c48f8bc449a5db945a54987dd687) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:17 2020 -0700 mm: memcg/slab: remove memcg_kmem_get_cache() commit d797b7d05405c519f7b62ea69a75cea1883863b2 (refs/bisect/skip-d797b7d05405c519f7b62ea69a75cea1883863b2) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:14 2020 -0700 mm: memcg/slab: simplify memcg cache creation commit 9855609bde03e2472b99a95e869d29ee1e78a751 (refs/bisect/skip-9855609bde03e2472b99a95e869d29ee1e78a751) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:10 2020 -0700 mm: memcg/slab: use a single set of kmem_caches for all accounted allocations commit 0f876e4dc55db5fafef774917fd66e1373c0f390 (refs/bisect/skip-0f876e4dc55db5fafef774917fd66e1373c0f390) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:06 2020 -0700 mm: memcg/slab: move memcg_kmem_bypass() to memcontrol.h commit 4330a26bc4527f1d8918c398ebc983574f761cca (refs/bisect/skip-4330a26bc4527f1d8918c398ebc983574f761cca) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:21:03 2020 -0700 mm: memcg/slab: deprecate memory.kmem.slabinfo commit f2fe7b09a52bc898ec030d4fa6f78725439c7c2c (refs/bisect/skip-f2fe7b09a52bc898ec030d4fa6f78725439c7c2c) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:20:59 2020 -0700 mm: memcg/slab: charge individual slab objects instead of pages commit 964d4bd370d559d9bd8e4abc139e85d2753956fb (refs/bisect/skip-964d4bd370d559d9bd8e4abc139e85d2753956fb) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:20:56 2020 -0700 mm: memcg/slab: save obj_cgroup for non-root slab objects commit 286e04b8ed7a04279ae277f0f024430246ea5eec (refs/bisect/good-286e04b8ed7a04279ae277f0f024430246ea5eec) Author: Roman Gushchin <guro@xxxxxx> Date: Thu Aug 6 23:20:52 2020 -0700 mm: memcg/slab: allocate obj_cgroups for non-root slab pages