Currently my_svc_run does not handle poll time allowing idle TCP connections to remain ESTABLISHED indefinitely. When the number of connections reaches the limit the open file descriptors (ulimit -n) then accept(2) fails with EMFILE. Since libtirpc does not handle EMFILE returned from accept(2) this get my_svc_run into a tight loop calling accept(2) resulting in the RPC service being down, it's no longer able to service any requests. Fix by removing idle connections when select(2) times out in my_svc_run and when open(2) returns EMFILE/ENFILE in auth_reload. Signed-off-by: dai.ngo@xxxxxxxxxx --- support/export/auth.c | 12 ++++++++++-- utils/mountd/svc_run.c | 10 ++++++++-- utils/statd/svc_run.c | 11 ++++++++--- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/support/export/auth.c b/support/export/auth.c index 03ce4b8a0e1e..0bb189fb4037 100644 --- a/support/export/auth.c +++ b/support/export/auth.c @@ -81,6 +81,8 @@ check_useipaddr(void) cache_flush(); } +extern void __svc_destroy_idle(int, bool_t); + unsigned int auth_reload() { @@ -91,8 +93,14 @@ auth_reload() int fd; if ((fd = open(etab.statefn, O_RDONLY)) < 0) { - xlog(L_FATAL, "couldn't open %s", etab.statefn); - } else if (fstat(fd, &stb) < 0) { + if (errno != EMFILE && errno != ENFILE) + xlog(L_FATAL, "couldn't open %s", etab.statefn); + /* remove idle */ + __svc_destroy_idle(5, FALSE); + if ((fd = open(etab.statefn, O_RDONLY)) < 0) + xlog(L_FATAL, "couldn't open %s", etab.statefn); + } + if (fstat(fd, &stb) < 0) { xlog(L_FATAL, "couldn't stat %s", etab.statefn); close(fd); } else if (last_fd != -1 && stb.st_ino == last_inode) { diff --git a/utils/mountd/svc_run.c b/utils/mountd/svc_run.c index 167b9757bde2..ada8d0ac8844 100644 --- a/utils/mountd/svc_run.c +++ b/utils/mountd/svc_run.c @@ -59,6 +59,7 @@ #include "export.h" void my_svc_run(void); +extern void __svc_destroy_idle(int , bool_t); #if defined(__GLIBC__) && LONG_MAX != INT_MAX /* bug in glibc 2.3.6 and earlier, we need @@ -95,6 +96,7 @@ my_svc_run(void) { fd_set readfds; int selret; + struct timeval tv; for (;;) { @@ -102,8 +104,10 @@ my_svc_run(void) cache_set_fds(&readfds); v4clients_set_fds(&readfds); + tv.tv_sec = 30; + tv.tv_usec = 0; selret = select(FD_SETSIZE, &readfds, - (void *) 0, (void *) 0, (struct timeval *) 0); + (void *) 0, (void *) 0, &tv); switch (selret) { @@ -113,7 +117,9 @@ my_svc_run(void) continue; xlog(L_ERROR, "my_svc_run() - select: %m"); return; - + case 0: + __svc_destroy_idle(30, FALSE); + continue; default: selret -= cache_process_req(&readfds); selret -= v4clients_process(&readfds); diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c index e343c7689860..8888788c81d0 100644 --- a/utils/statd/svc_run.c +++ b/utils/statd/svc_run.c @@ -59,6 +59,7 @@ void my_svc_exit(void); static int svc_stop = 0; +extern void __svc_destroy_idle(int , bool_t); /* * This is the global notify list onto which all SM_NOTIFY and CALLBACK @@ -85,6 +86,7 @@ my_svc_run(int sockfd) FD_SET_TYPE readfds; int selret; time_t now; + struct timeval tv; svc_stop = 0; @@ -101,7 +103,6 @@ my_svc_run(int sockfd) /* Set notify sockfd for waiting for reply */ FD_SET(sockfd, &readfds); if (notify) { - struct timeval tv; tv.tv_sec = NL_WHEN(notify) - now; tv.tv_usec = 0; @@ -111,8 +112,10 @@ my_svc_run(int sockfd) (void *) 0, (void *) 0, &tv); } else { xlog(D_GENERAL, "Waiting for client connections"); + tv.tv_sec = 30; + tv.tv_usec = 0; selret = select(FD_SETSIZE, &readfds, - (void *) 0, (void *) 0, (struct timeval *) 0); + (void *) 0, (void *) 0, &tv); } switch (selret) { @@ -124,7 +127,9 @@ my_svc_run(int sockfd) return; case 0: - /* A notify/callback timed out. */ + /* A notify/callback/wait for client timed out. */ + if (!notify) + __svc_destroy_idle(30, FALSE); continue; default: -- 2.26.2