Hi Scott- > On May 26, 2021, at 1:08 PM, Steve Dickson <SteveD@xxxxxxxxxx> wrote: > > Hey! > > Again... a few very small teaks > > On 5/25/21 2:00 PM, Scott Mayhew wrote: >> Add a global list of active upcalls and a watchdog thread that walks the >> list, looking for threads running longer than timeout seconds. By >> default, the watchdog thread will cancel these threads and report an >> error of -ETIMEDOUT to the kernel. Passing the -C option or setting >> cancel-timed-out-upcalls=0 in nfs.conf disables this behavior and >> causes the watchdog thread to simply log an error message instead. The >> upcall timeout can be specified by passing the -U option or by setting >> the upcall-timeout parameter in nfs.conf. Not a critique of this patch, but I'm wondering if there's any way to get a sense of why each stuck thread was hung so we (or the local admin) can understand the underlying problems. Maybe that's something for another day. >> Signed-off-by: Scott Mayhew <smayhew@xxxxxxxxxx> >> --- >> nfs.conf | 2 + >> utils/gssd/gssd.c | 125 +++++++++++++++++++++++++++++++++++++++- >> utils/gssd/gssd.h | 15 +++++ >> utils/gssd/gssd.man | 31 +++++++++- >> utils/gssd/gssd_proc.c | 127 ++++++++++++++++++++++++++++++++--------- >> 5 files changed, 270 insertions(+), 30 deletions(-) >> >> diff --git a/nfs.conf b/nfs.conf >> index 31994f61..7a987788 100644 >> --- a/nfs.conf >> +++ b/nfs.conf >> @@ -25,6 +25,8 @@ >> # cred-cache-directory= >> # preferred-realm= >> # set-home=1 >> +# upcall-timeout=30 >> +# cancel-timed-out-upcalls=1 >> # >> [lockd] >> # port=0 >> diff --git a/utils/gssd/gssd.c b/utils/gssd/gssd.c >> index eb440470..54a1ea3f 100644 >> --- a/utils/gssd/gssd.c >> +++ b/utils/gssd/gssd.c >> @@ -95,9 +95,15 @@ static bool use_gssproxy = false; >> pthread_mutex_t clp_lock = PTHREAD_MUTEX_INITIALIZER; >> static bool signal_received = false; >> static struct event_base *evbase = NULL; >> +pthread_mutex_t active_thread_list_lock = PTHREAD_MUTEX_INITIALIZER; >> + >> +int upcall_timeout = DEF_UPCALL_TIMEOUT; >> +static bool cancel_timed_out_upcalls = true; >> >> TAILQ_HEAD(topdir_list_head, topdir) topdir_list; >> >> +TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list; >> + >> struct topdir { >> TAILQ_ENTRY(topdir) list; >> TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list; >> @@ -436,6 +442,100 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short UNUSED(which), void *data) >> handle_krb5_upcall(clp); >> } >> >> +static int >> +scan_active_thread_list(void) >> +{ >> + struct upcall_thread_info *info; >> + struct timespec now; >> + unsigned int sleeptime; >> + bool sleeptime_set = false; >> + int err; >> + void *tret, *saveprev; >> + >> + sleeptime = upcall_timeout; >> + pthread_mutex_lock(&active_thread_list_lock); >> + clock_gettime(CLOCK_MONOTONIC, &now); >> + TAILQ_FOREACH(info, &active_thread_list, list) { >> + err = pthread_tryjoin_np(info->tid, &tret); >> + switch (err) { >> + case 0: >> + if (tret == PTHREAD_CANCELED) >> + printerr(3, "watchdog: thread id 0x%lx cancelled successfully\n", >> + info->tid); >> + saveprev = info->list.tqe_prev; >> + TAILQ_REMOVE(&active_thread_list, info, list); >> + free(info); >> + info = saveprev; >> + break; >> + case EBUSY: >> + if (now.tv_sec >= info->timeout.tv_sec) { >> + if (cancel_timed_out_upcalls && !info->cancelled) { >> + printerr(0, "watchdog: thread id 0x%lx timed out\n", >> + info->tid); >> + pthread_cancel(info->tid); >> + info->cancelled = true; >> + do_error_downcall(info->fd, info->uid, -ETIMEDOUT); >> + } else { >> + printerr(0, "watchdog: thread id 0x%lx running for %ld seconds\n", >> + info->tid, >> + now.tv_sec - info->timeout.tv_sec + upcall_timeout); > If people are going to used the -C flag they are saying they want > to ignore hung threads so I'm thinking with printerr(0) we would > be filling up their logs about messages they don't care about. > So I'm thinking we should change this to a printerr(1) > >> + } >> + } else if (!sleeptime_set) { >> + sleeptime = info->timeout.tv_sec - now.tv_sec; >> + sleeptime_set = true; >> + } >> + break; >> + default: >> + printerr(0, "watchdog: attempt to join thread id 0x%lx returned %d (%s)!\n", >> + info->tid, err, strerror(err)); >> + break; >> + } >> + } >> + pthread_mutex_unlock(&active_thread_list_lock); >> + >> + return sleeptime; >> +} >> + >> +static void * >> +watchdog_thread_fn(void *UNUSED(arg)) >> +{ >> + unsigned int sleeptime; >> + >> + for (;;) { >> + sleeptime = scan_active_thread_list(); >> + printerr(4, "watchdog: sleeping %u secs\n", sleeptime); >> + sleep(sleeptime); >> + } >> + return (void *)0; >> +} >> + >> +static int >> +start_watchdog_thread(void) >> +{ >> + pthread_attr_t attr; >> + pthread_t th; >> + int ret; >> + >> + ret = pthread_attr_init(&attr); >> + if (ret != 0) { >> + printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n", >> + ret, strerror(errno)); >> + return ret; >> + } >> + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); >> + if (ret != 0) { >> + printerr(0, "ERROR: failed to create pthread attr: ret %d: %s\n", >> + ret, strerror(errno)); >> + return ret; >> + } >> + ret = pthread_create(&th, &attr, watchdog_thread_fn, NULL); >> + if (ret != 0) { >> + printerr(0, "ERROR: pthread_create failed: ret %d: %s\n", >> + ret, strerror(errno)); >> + } >> + return ret; >> +} >> + >> static struct clnt_info * >> gssd_get_clnt(struct topdir *tdi, const char *name) >> { >> @@ -825,7 +925,7 @@ sig_die(int signal) >> static void >> usage(char *progname) >> { >> - fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H]\n", >> + fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H] [-U upcall timeout] [-C]\n", >> progname); >> exit(1); >> } >> @@ -846,6 +946,9 @@ read_gss_conf(void) >> #endif >> context_timeout = conf_get_num("gssd", "context-timeout", context_timeout); >> rpc_timeout = conf_get_num("gssd", "rpc-timeout", rpc_timeout); >> + upcall_timeout = conf_get_num("gssd", "upcall-timeout", upcall_timeout); >> + cancel_timed_out_upcalls = conf_get_bool("gssd", "cancel-timed-out-upcalls", >> + cancel_timed_out_upcalls); >> s = conf_get_str("gssd", "pipefs-directory"); >> if (!s) >> s = conf_get_str("general", "pipefs-directory"); >> @@ -887,7 +990,7 @@ main(int argc, char *argv[]) >> verbosity = conf_get_num("gssd", "verbosity", verbosity); >> rpc_verbosity = conf_get_num("gssd", "rpc-verbosity", rpc_verbosity); >> >> - while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:")) != -1) { >> + while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:U:C")) != -1) { >> switch (opt) { >> case 'f': >> fg = 1; >> @@ -938,6 +1041,11 @@ main(int argc, char *argv[]) >> case 'H': >> set_home = false; >> break; >> + case 'U': >> + upcall_timeout = atoi(optarg); >> + break; >> + case 'C': >> + cancel_timed_out_upcalls = false; >> default: >> usage(argv[0]); >> break; >> @@ -1010,6 +1118,11 @@ main(int argc, char *argv[]) >> else >> progname = argv[0]; >> >> + if (upcall_timeout > MAX_UPCALL_TIMEOUT) >> + upcall_timeout = MAX_UPCALL_TIMEOUT; >> + else if (upcall_timeout < MIN_UPCALL_TIMEOUT) >> + upcall_timeout = MIN_UPCALL_TIMEOUT; >> + >> initerr(progname, verbosity, fg); >> #ifdef HAVE_LIBTIRPC_SET_DEBUG >> /* >> @@ -1068,6 +1181,14 @@ main(int argc, char *argv[]) >> } >> event_add(inotify_ev, NULL); >> >> + TAILQ_INIT(&active_thread_list); >> + >> + rc = start_watchdog_thread(); >> + if (rc != 0) { >> + printerr(0, "ERROR: failed to start watchdog thread: %d\n", rc); >> + exit(EXIT_FAILURE); >> + } >> + >> TAILQ_INIT(&topdir_list); >> gssd_scan(); >> daemon_ready(); >> diff --git a/utils/gssd/gssd.h b/utils/gssd/gssd.h >> index 6d53647e..ad0d1d93 100644 >> --- a/utils/gssd/gssd.h >> +++ b/utils/gssd/gssd.h >> @@ -50,6 +50,11 @@ >> #define GSSD_DEFAULT_KEYTAB_FILE "/etc/krb5.keytab" >> #define GSSD_SERVICE_NAME "nfs" >> #define RPC_CHAN_BUF_SIZE 32768 >> + > I think we should add a "/* seconds */" > so you don't have dig in the code > to see what interval is >> +#define MIN_UPCALL_TIMEOUT 5 >> +#define DEF_UPCALL_TIMEOUT 30 >> +#define MAX_UPCALL_TIMEOUT 600 >> + >> /* >> * The gss mechanisms that we can handle >> */ >> @@ -91,10 +96,20 @@ struct clnt_upcall_info { >> char *service; >> }; >> >> +struct upcall_thread_info { >> + TAILQ_ENTRY(upcall_thread_info) list; >> + pthread_t tid; >> + struct timespec timeout; >> + uid_t uid; >> + int fd; >> + bool cancelled; >> +}; >> + >> void handle_krb5_upcall(struct clnt_info *clp); >> void handle_gssd_upcall(struct clnt_info *clp); >> void free_upcall_info(struct clnt_upcall_info *info); >> void gssd_free_client(struct clnt_info *clp); >> +int do_error_downcall(int k5_fd, uid_t uid, int err); >> >> >> #endif /* _RPC_GSSD_H_ */ >> diff --git a/utils/gssd/gssd.man b/utils/gssd/gssd.man >> index 9ae6def9..20fea729 100644 >> --- a/utils/gssd/gssd.man >> +++ b/utils/gssd/gssd.man >> @@ -8,7 +8,7 @@ >> rpc.gssd \- RPCSEC_GSS daemon >> .SH SYNOPSIS >> .B rpc.gssd >> -.RB [ \-DfMnlvrH ] >> +.RB [ \-DfMnlvrHC ] >> .RB [ \-k >> .IR keytab ] >> .RB [ \-p >> @@ -17,6 +17,10 @@ rpc.gssd \- RPCSEC_GSS daemon >> .IR ccachedir ] >> .RB [ \-t >> .IR timeout ] >> +.RB [ \-T >> +.IR timeout ] >> +.RB [ \-U >> +.IR timeout ] >> .RB [ \-R >> .IR realm ] >> .SH INTRODUCTION >> @@ -275,7 +279,7 @@ seconds, which allows changing Kerberos tickets and identities frequently. >> The default is no explicit timeout, which means the kernel context will live >> the lifetime of the Kerberos service ticket used in its creation. >> .TP >> -.B -T timeout >> +.BI "-T " timeout >> Timeout, in seconds, to create an RPC connection with a server while >> establishing an authenticated gss context for a user. >> The default timeout is set to 5 seconds. >> @@ -283,6 +287,18 @@ If you get messages like "WARNING: can't create tcp rpc_clnt to server >> %servername% for user with uid %uid%: RPC: Remote system error - >> Connection timed out", you should consider an increase of this timeout. >> .TP >> +.BI "-U " timeout >> +Timeout, in seconds, for upcall threads. Threads executing longer than >> +.I timeout >> +seconds will be cancelled and an error of -ETIMEDOUT will be reported >> +to the kernel. The default >> +.I timeout >> +is 30 seconds. The minimum is 5 seconds. The maximum is 600 seconds. >> +.TP >> +.B -C >> +Instead of cancelling upcall threads that have timed out, cause an error >> +message to be logged to the syslog (no error will be reported to the kernel). >> +.TP >> .B -H >> Avoids setting $HOME to "/". This allows rpc.gssd to read per user k5identity >> files versus trying to read /.k5identity for each user. >> @@ -350,6 +366,17 @@ Equivalent to >> Equivalent to >> .BR -R . >> .TP >> +.B upcall-timeout >> +Equivalent to >> +.BR -U . >> +.TP >> +.B cancel-timed-out-upcalls >> +Setting to >> +.B false >> +is equivalent to providing the >> +.B -C >> +flag. >> +.TP >> .B set-home >> Setting to >> .B false >> diff --git a/utils/gssd/gssd_proc.c b/utils/gssd/gssd_proc.c >> index ba508b30..ac7b1d1c 100644 >> --- a/utils/gssd/gssd_proc.c >> +++ b/utils/gssd/gssd_proc.c >> @@ -81,11 +81,23 @@ >> #include "gss_names.h" >> >> extern pthread_mutex_t clp_lock; >> +extern pthread_mutex_t active_thread_list_lock; >> +extern int upcall_timeout; >> +extern TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list; >> >> /* Encryption types supported by the kernel rpcsec_gss code */ >> int num_krb5_enctypes = 0; >> krb5_enctype *krb5_enctypes = NULL; >> >> +struct cleanup_args { >> + OM_uint32 *min_stat; >> + gss_buffer_t acceptor; >> + gss_buffer_t token; >> + struct authgss_private_data *pd; >> + AUTH **auth; >> + CLIENT **rpc_clnt; >> +}; >> + >> /* >> * Parse the supported encryption type information >> */ >> @@ -184,7 +196,7 @@ out_err: >> return; >> } >> >> -static int >> +int >> do_error_downcall(int k5_fd, uid_t uid, int err) >> { >> char buf[1024]; >> @@ -607,14 +619,37 @@ out: >> return auth; >> } >> >> +static void >> +cleanup_handler(void *arg) >> +{ >> + struct cleanup_args *args = (struct cleanup_args *)arg; >> + >> + gss_release_buffer(args->min_stat, args->acceptor); >> + if (args->token->value) >> + free(args->token->value); >> +#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA >> + if (args->pd->pd_ctx_hndl.length != 0 || args->pd->pd_ctx != 0) >> + authgss_free_private_data(args->pd); >> +#endif >> + if (*args->auth) >> + AUTH_DESTROY(*args->auth); >> + if (*args->rpc_clnt) >> + clnt_destroy(*args->rpc_clnt); >> +} >> + >> /* >> * this code uses the userland rpcsec gss library to create a krb5 >> * context on behalf of the kernel >> */ >> static void >> -process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, >> - char *tgtname, char *service) >> +process_krb5_upcall(struct clnt_upcall_info *info) >> { >> + struct clnt_info *clp = info->clp; >> + uid_t uid = info->uid; >> + int fd = info->fd; >> + char *srchost = info->srchost; >> + char *tgtname = info->target; >> + char *service = info->service; >> CLIENT *rpc_clnt = NULL; >> AUTH *auth = NULL; >> struct authgss_private_data pd; >> @@ -624,11 +659,13 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, >> gss_name_t gacceptor = GSS_C_NO_NAME; >> gss_OID mech; >> gss_buffer_desc acceptor = {0}; >> + struct cleanup_args cleanup_args = {&min_stat, &acceptor, &token, &pd, &auth, &rpc_clnt}; >> >> token.length = 0; >> token.value = NULL; >> memset(&pd, 0, sizeof(struct authgss_private_data)); >> >> + pthread_cleanup_push(cleanup_handler, &cleanup_args); >> /* >> * If "service" is specified, then the kernel is indicating that >> * we must use machine credentials for this request. (Regardless >> @@ -650,6 +687,7 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, >> * used for this case is not important. >> * >> */ >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); >> if (uid != 0 || (uid == 0 && root_uses_machine_creds == 0 && >> service == NULL)) { >> >> @@ -670,15 +708,21 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, >> goto out_return_error; >> } >> } >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); >> + pthread_testcancel(); >> >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); >> if (!authgss_get_private_data(auth, &pd)) { >> printerr(1, "WARNING: Failed to obtain authentication " >> "data for user with uid %d for server %s\n", >> uid, clp->servername); >> goto out_return_error; >> } >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); >> + pthread_testcancel(); >> >> /* Grab the context lifetime and acceptor name out of the ctx. */ >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); >> maj_stat = gss_inquire_context(&min_stat, pd.pd_ctx, NULL, &gacceptor, >> &lifetime_rec, &mech, NULL, NULL, NULL); >> >> @@ -690,37 +734,35 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, >> get_hostbased_client_buffer(gacceptor, mech, &acceptor); >> gss_release_name(&min_stat, &gacceptor); >> } >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); >> + pthread_testcancel(); >> >> /* >> * The serialization can mean turning pd.pd_ctx into a lucid context. If >> * that happens then the pd.pd_ctx will be unusable, so we must never >> * try to use it after this point. >> */ >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); >> if (serialize_context_for_kernel(&pd.pd_ctx, &token, &krb5oid, NULL)) { >> printerr(1, "WARNING: Failed to serialize krb5 context for " >> "user with uid %d for server %s\n", >> uid, clp->servername); >> goto out_return_error; >> } >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); >> + pthread_testcancel(); >> >> do_downcall(fd, uid, &pd, &token, lifetime_rec, &acceptor); >> >> out: >> - gss_release_buffer(&min_stat, &acceptor); >> - if (token.value) >> - free(token.value); >> -#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA >> - if (pd.pd_ctx_hndl.length != 0 || pd.pd_ctx != 0) >> - authgss_free_private_data(&pd); >> -#endif >> - if (auth) >> - AUTH_DESTROY(auth); >> - if (rpc_clnt) >> - clnt_destroy(rpc_clnt); >> + pthread_cleanup_pop(1); >> >> return; >> >> out_return_error: >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); >> + pthread_testcancel(); >> + >> do_error_downcall(fd, uid, downcall_err); >> goto out; >> } >> @@ -786,38 +828,71 @@ void free_upcall_info(struct clnt_upcall_info *info) >> } >> >> static void >> -gssd_work_thread_fn(struct clnt_upcall_info *info) >> +cleanup_clnt_upcall_info(void *arg) >> { >> - process_krb5_upcall(info->clp, info->uid, info->fd, info->srchost, info->target, info->service); >> + struct clnt_upcall_info *info = (struct clnt_upcall_info *)arg; >> + >> free_upcall_info(info); >> } >> >> +static void >> +gssd_work_thread_fn(struct clnt_upcall_info *info) >> +{ >> + pthread_cleanup_push(cleanup_clnt_upcall_info, info); >> + process_krb5_upcall(info); >> + pthread_cleanup_pop(1); >> +} >> + >> +static struct upcall_thread_info * >> +alloc_upcall_thread_info(void) >> +{ >> + struct upcall_thread_info *info; >> + >> + info = malloc(sizeof(struct upcall_thread_info)); >> + if (info == NULL) >> + return NULL; >> + memset(info, 0, sizeof(*info)); >> + return info; >> +} >> + >> static int >> -start_upcall_thread(void (*func)(struct clnt_upcall_info *), void *info) >> +start_upcall_thread(void (*func)(struct clnt_upcall_info *), struct clnt_upcall_info *info) >> { >> pthread_attr_t attr; >> pthread_t th; >> + struct upcall_thread_info *tinfo; >> int ret; >> >> + tinfo = alloc_upcall_thread_info(); >> + if (!tinfo) >> + return -ENOMEM; >> + tinfo->fd = info->fd; >> + tinfo->uid = info->uid; >> + tinfo->cancelled = false; >> + >> ret = pthread_attr_init(&attr); >> if (ret != 0) { >> printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n", >> ret, strerror(errno)); >> - return ret; >> - } >> - ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); >> - if (ret != 0) { >> - printerr(0, "ERROR: failed to create pthread attr: ret %d: " >> - "%s\n", ret, strerror(errno)); >> + free(tinfo); >> return ret; >> } >> >> ret = pthread_create(&th, &attr, (void *)func, (void *)info); >> - if (ret != 0) >> + if (ret != 0) { >> printerr(0, "ERROR: pthread_create failed: ret %d: %s\n", >> ret, strerror(errno)); >> - else >> - printerr(0, "created thread id 0x%lx\n", th); >> + free(tinfo); >> + return ret; >> + } >> + printerr(1, "created thread id 0x%lx\n", th); > This will be removed... >> + tinfo->tid = th; >> + pthread_mutex_lock(&active_thread_list_lock); >> + clock_gettime(CLOCK_MONOTONIC, &tinfo->timeout); >> + tinfo->timeout.tv_sec += upcall_timeout; >> + TAILQ_INSERT_TAIL(&active_thread_list, tinfo, list); >> + pthread_mutex_unlock(&active_thread_list_lock); >> + >> return ret; >> } >> >> > > Overall I think the code is very well written with > one exception... The lack of comments. I think it > would be very useful to let the reader know what > you are doing and why.... But by no means is > that a show stopper. Nice work! > > I'm still doing more testing and nothing > is breaking... So it is looking pretty good! > > steved. > -- Chuck Lever