On Fri, 07 Nov 2008 11:57:18 +0100 Tomasz Chmielewski <mangoo@xxxxxxxx> wrote: > FUJITA Tomonori schrieb: > > (...) > > >>> This patch enables you to create more luns (but eventually, tgtd > >>> crashes) ? > >> Yes, with this patch it crashes after ~110 luns or ~60 targets. > > > > I see, thanks. > > > > > > The first problem is that tgtd can't handle the failure to create > > pthreads. I'll try to fix this soon. > > > > The second problem is seems that glibc has the limit of the maximum > > number of pthreads per process. > > > > I guess that we create too many pthreads per lun. Probably, it would > > be better to have some pthreads per target (regardless of the number > > of luns). But it takes some time to fix the second problem. > > Any news on this? > I'll be hitting this limit (~60 targets and more) on my 32 bit targets > by the end of this year and this issue makes me feel uncomfortable :( I don't loosen the limit yet but this patch should fix the segfault and deadlocks caused by the target or lun creation. Can you try this? diff --git a/usr/bs.c b/usr/bs.c index 542ef55..cd19b86 100644 --- a/usr/bs.c +++ b/usr/bs.c @@ -27,6 +27,7 @@ #include "list.h" #include "tgtd.h" +#include "tgtadm_error.h" #include "util.h" #include "bs_thread.h" @@ -68,6 +69,9 @@ retry: goto out; } + if (info->stop) + goto out; + pthread_mutex_lock(&info->finished_lock); retest: if (list_empty(&info->finished_list)) { @@ -100,7 +104,7 @@ rewrite: goto retry; out: - return NULL; + pthread_exit(NULL); } static void bs_thread_request_done(int fd, int events, void *data) @@ -141,7 +145,11 @@ static void *bs_thread_worker_fn(void *arg) struct bs_thread_info *info = arg; struct scsi_cmd *cmd; - while (1) { + pthread_mutex_lock(&info->startup_lock); + dprintf("started this thread\n"); + pthread_mutex_unlock(&info->startup_lock); + + while (!info->stop) { pthread_mutex_lock(&info->pending_lock); retest: if (list_empty(&info->pending_list)) { @@ -170,7 +178,7 @@ static void *bs_thread_worker_fn(void *arg) pthread_cond_signal(&info->finished_cond); } - return NULL; + pthread_exit(NULL); } int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn, @@ -189,35 +197,50 @@ int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn, pthread_mutex_init(&info->finished_lock, NULL); pthread_mutex_init(&info->pending_lock, NULL); + pthread_mutex_init(&info->startup_lock, NULL); ret = pipe(info->command_fd); - if (ret) + if (ret) { + eprintf("failed to create command pipe, %m\n"); goto destroy_cond_mutex; + } ret = pipe(info->done_fd); - if (ret) + if (ret) { + eprintf("failed to done command pipe, %m\n"); goto close_command_fd; + } ret = tgt_event_add(info->done_fd[0], EPOLLIN, bs_thread_request_done, info); - if (ret) + if (ret) { + eprintf("failed to add epoll event\n"); goto close_done_fd; + } ret = pthread_create(&info->ack_thread, NULL, bs_thread_ack_fn, info); - if (ret) + if (ret) { + eprintf("failed to create an ack thread, %s\n", strerror(ret)); goto event_del; + } if (nr_threads > ARRAY_SIZE(info->worker_thread)) { eprintf("too many threads %d\n", nr_threads); nr_threads = ARRAY_SIZE(info->worker_thread); } + pthread_mutex_lock(&info->startup_lock); for (i = 0; i < nr_threads; i++) { ret = pthread_create(&info->worker_thread[i], NULL, bs_thread_worker_fn, info); - if (ret) - goto destroy_threads; - } + if (ret) { + eprintf("failed to create a worker thread, %d %s\n", + i, strerror(ret)); + if (ret) + goto destroy_threads; + } + } + pthread_mutex_unlock(&info->startup_lock); rewrite: ret = write(info->command_fd[1], &ret, sizeof(ret)); if (ret < 0) { @@ -228,19 +251,17 @@ rewrite: return 0; destroy_threads: + info->stop = 1; write(info->command_fd[1], &ret, sizeof(ret)); - pthread_cancel(info->ack_thread); - pthread_cond_signal(&info->finished_cond); pthread_join(info->ack_thread, NULL); - info->stop = 1; - for (i = 0; info->worker_thread[i]; i++) { - pthread_cancel(info->worker_thread[i]); - pthread_cond_signal(&info->pending_cond); - } + eprintf("stopped the ack thread\n"); - for (i = 0; info->worker_thread[i]; i++) - pthread_join(info->worker_thread[i], NULL); + pthread_mutex_unlock(&info->startup_lock); + for (; i > 0; i--) { + pthread_join(info->worker_thread[i - 1], NULL); + eprintf("stopped the worker thread %d\n", i - 1); + } event_del: tgt_event_del(info->done_fd[0]); close_done_fd: @@ -254,8 +275,9 @@ destroy_cond_mutex: pthread_cond_destroy(&info->pending_cond); pthread_mutex_destroy(&info->finished_lock); pthread_mutex_destroy(&info->pending_lock); + pthread_mutex_destroy(&info->startup_lock); - return -1; + return TGTADM_NOMEM; } void bs_thread_close(struct bs_thread_info *info) @@ -276,6 +298,7 @@ void bs_thread_close(struct bs_thread_info *info) pthread_cond_destroy(&info->pending_cond); pthread_mutex_destroy(&info->finished_lock); pthread_mutex_destroy(&info->pending_lock); + pthread_mutex_destroy(&info->startup_lock); tgt_event_del(info->done_fd[0]); diff --git a/usr/bs_thread.h b/usr/bs_thread.h index b2975a5..9dfbbd5 100644 --- a/usr/bs_thread.h +++ b/usr/bs_thread.h @@ -20,6 +20,8 @@ struct bs_thread_info { /* protected by pending_lock */ struct list_head pending_list; + pthread_mutex_t startup_lock; + int command_fd[2]; int done_fd[2]; diff --git a/usr/tgtd.c b/usr/tgtd.c index 1a3cc02..fa0fb7b 100644 --- a/usr/tgtd.c +++ b/usr/tgtd.c @@ -30,6 +30,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/resource.h> #include <sys/epoll.h> #include "list.h" @@ -96,6 +97,37 @@ static int oom_adjust(void) return 0; } +static int nr_file_adjust(void) +{ + int ret, fd; + char path[] = "/proc/sys/fs/nr_open"; + char buf[64]; + struct rlimit rlim; + + /* Avoid oom-killer */ + fd = open(path, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open %s, %m\n", path); + return errno; + } + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + fprintf(stderr, "can't read %s, %m\n", path); + return errno; + } + close(fd); + + rlim.rlim_cur = rlim.rlim_max = atoi(buf); + + ret = setrlimit(RLIMIT_NOFILE, &rlim); + if (ret < 0) { + fprintf(stderr, "can't adjust nr_open %m\n"); + return errno; + } + + return 0; +} + int tgt_event_add(int fd, int events, event_handler_t handler, void *data) { struct epoll_event ev; @@ -340,6 +372,10 @@ int main(int argc, char **argv) if (err) exit(1); + err = nr_file_adjust(); + if (err) + exit(1); + err = log_init(program_name, LOG_SPACE_SIZE, is_daemon, is_debug); if (err) exit(1); -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html