severe proc dentry lock contention

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

When debugging some performance issue, i found that thousands of threads exit around same time could cause a severe spin lock contention on proc dentry "/proc/$parent_process_pid/task/", that's because threads needs to clean up their pid file from that dir when exit. Check the following standalone test case that simulated the case and perf top result on v5.7 kernel. Any idea on how to fix this?


   PerfTop:   48891 irqs/sec  kernel:95.6%  exact: 100.0% lost: 0/0 drop: 0/0 [4000Hz cycles],  (all, 72 CPUs) ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    66.10%  [kernel]                               [k] native_queued_spin_lock_slowpath
     1.13%  [kernel]                               [k] _raw_spin_lock
     0.84%  [kernel]                               [k] clear_page_erms
     0.82%  [kernel]                               [k] queued_write_lock_slowpath
     0.64%  [kernel]                               [k] proc_task_readdir
     0.61%  [kernel]                               [k] find_idlest_group.isra.95      0.61%  [kernel]                               [k] syscall_return_via_sysret
     0.55%  [kernel]                               [k] entry_SYSCALL_64
     0.49%  [kernel]                               [k] memcpy_erms
     0.46%  [kernel]                               [k] update_cfs_group
     0.41%  [kernel]                               [k] get_pid_task
     0.39%  [kernel]                               [k] _raw_spin_lock_irqsave      0.37%  [kernel]                               [k] __list_del_entry_valid      0.34%  [kernel]                               [k] get_page_from_freelist
     0.34%  [kernel]                               [k] __d_lookup
     0.32%  [kernel]                               [k] update_load_avg
     0.31%  libc-2.17.so                           [.] get_next_seq
     0.27%  [kernel]                               [k] avc_has_perm_noaudit
     0.26%  [kernel]                               [k] __sched_text_start
     0.25%  [kernel]                               [k] selinux_inode_permission
     0.25%  [kernel]                               [k] __slab_free
     0.24%  [kernel]                               [k] detach_entity_cfs_rq
     0.23%  [kernel]                               [k] zap_pte_range
     0.22%  [kernel]                               [k] _find_next_bit.constprop.1
     0.22%  libc-2.17.so                           [.] vfprintf
     0.20%  libc-2.17.so                           [.] _int_malloc
     0.19%  [kernel]                               [k] _raw_spin_lock_irq
     0.18%  [kernel]                               [k] rb_erase
     0.18%  [kernel]                               [k] pid_revalidate
     0.18%  [kernel]                               [k] lockref_get_not_dead
     0.18%  [kernel]                               [k] __alloc_pages_nodemask
     0.17%  [kernel]                               [k] set_task_cpu
     0.17%  libc-2.17.so                           [.] __strcoll_l
     0.17%  [kernel]                               [k] do_syscall_64
     0.17%  [kernel]                               [k] __vmalloc_node_range
     0.17%  libc-2.17.so                           [.] _IO_vfscanf
     0.17%  [kernel]                               [k] refcount_dec_not_one
     0.15%  [kernel]                               [k] __task_pid_nr_ns
     0.15%  [kernel]                               [k] native_irq_return_iret
     0.15%  [kernel]                               [k] free_pcppages_bulk
     0.14%  [kernel]                               [k] kmem_cache_alloc
     0.14%  [kernel]                               [k] link_path_walk
     0.14%  libc-2.17.so                           [.] _int_free
     0.14%  [kernel]                               [k] __update_load_avg_cfs_rq
     0.14%  perf.5.7.0-master.20200601.ol7.x86_64  [.] 0x00000000000eac29
     0.13%  [kernel]                               [k] kmem_cache_free
     0.13%  [kernel]                               [k] number
     0.13%  [kernel]                               [k] memset_erms
     0.12%  [kernel]                               [k] proc_pid_status
     0.12%  [kernel]                               [k] __d_lookup_rcu


=========== runme.sh ==========

#!/bin/bash

threads=${1:-10000}
prog=proc_race
while [ 1 ]; do ./$prog $threads; done &

while [ 1 ]; do
    pid=`ps aux | grep $prog | grep -v grep| awk '{print $2}'`
    if [ -z $pid ]; then continue; fi
    threadnum=`ls -l /proc/$pid/task | wc -l`
    if [ $threadnum -gt $threads ]; then
        echo kill $pid
        kill -9 $pid
    fi
done


===========proc_race.c=========


#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <ctype.h>

#define handle_error_en(en, msg) \
    do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)

#define handle_error(msg) \
    do { perror(msg); exit(EXIT_FAILURE); } while (0)

struct thread_info {
    pthread_t thread_id;
    int       thread_num;
};

static void *child_thread()
{
    int i;

    while (1) { if (!(i++ % 1000000)) sleep(1);}
    return NULL;
}

int main(int argc, char *argv[])
{
    int s, tnum, opt, num_threads;
    struct thread_info *tinfo;
    void *res;

    if (argc == 2)
        num_threads = atoi(argv[1]);
    else
        num_threads = 10000;

    tinfo = calloc(num_threads, sizeof(struct thread_info));
    if (tinfo == NULL)
        handle_error("calloc");


    for (tnum = 0; tnum < num_threads; tnum++) {
        tinfo[tnum].thread_num = tnum + 1;

        s = pthread_create(&tinfo[tnum].thread_id, NULL,
                &child_thread, NULL);
        if (s != 0)
            handle_error_en(s, "pthread_create");
    }

    for (tnum = 0; tnum < num_threads; tnum++) {
        s = pthread_join(tinfo[tnum].thread_id, &res);
        if (s != 0)
            handle_error_en(s, "pthread_join");

        free(res);
    }

    free(tinfo);
    exit(EXIT_SUCCESS);
}

==========

Thanks,

Junxiao.




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux