On Wed, 26 May 2021, Chuck Lever III wrote: > Hi Scott- > > > > On May 26, 2021, at 1:08 PM, Steve Dickson <SteveD@xxxxxxxxxx> wrote: > > > > Hey! > > > > Again... a few very small teaks > > > > On 5/25/21 2:00 PM, Scott Mayhew wrote: > >> Add a global list of active upcalls and a watchdog thread that walks the > >> list, looking for threads running longer than timeout seconds. By > >> default, the watchdog thread will cancel these threads and report an > >> error of -ETIMEDOUT to the kernel. Passing the -C option or setting > >> cancel-timed-out-upcalls=0 in nfs.conf disables this behavior and > >> causes the watchdog thread to simply log an error message instead. The > >> upcall timeout can be specified by passing the -U option or by setting > >> the upcall-timeout parameter in nfs.conf. > > Not a critique of this patch, but I'm wondering if there's any > way to get a sense of why each stuck thread was hung so we (or > the local admin) can understand the underlying problems. Maybe > that's something for another day. I had planned on doing that originally, but most of the actual interesting stuff happens in libtirpc, libgssapi_krb5, etc. Plus, it took a bit longer than I thought it would just to get this patch working... so yes, maybe another day :) -Scott > > > >> Signed-off-by: Scott Mayhew <smayhew@xxxxxxxxxx> > >> --- > >> nfs.conf | 2 + > >> utils/gssd/gssd.c | 125 +++++++++++++++++++++++++++++++++++++++- > >> utils/gssd/gssd.h | 15 +++++ > >> utils/gssd/gssd.man | 31 +++++++++- > >> utils/gssd/gssd_proc.c | 127 ++++++++++++++++++++++++++++++++--------- > >> 5 files changed, 270 insertions(+), 30 deletions(-) > >> > >> diff --git a/nfs.conf b/nfs.conf > >> index 31994f61..7a987788 100644 > >> --- a/nfs.conf > >> +++ b/nfs.conf > >> @@ -25,6 +25,8 @@ > >> # cred-cache-directory= > >> # preferred-realm= > >> # set-home=1 > >> +# upcall-timeout=30 > >> +# cancel-timed-out-upcalls=1 > >> # > >> [lockd] > >> # port=0 > >> diff --git a/utils/gssd/gssd.c b/utils/gssd/gssd.c > >> index eb440470..54a1ea3f 100644 > >> --- a/utils/gssd/gssd.c > >> +++ b/utils/gssd/gssd.c > >> @@ -95,9 +95,15 @@ static bool use_gssproxy = false; > >> pthread_mutex_t clp_lock = PTHREAD_MUTEX_INITIALIZER; > >> static bool signal_received = false; > >> static struct event_base *evbase = NULL; > >> +pthread_mutex_t active_thread_list_lock = PTHREAD_MUTEX_INITIALIZER; > >> + > >> +int upcall_timeout = DEF_UPCALL_TIMEOUT; > >> +static bool cancel_timed_out_upcalls = true; > >> > >> TAILQ_HEAD(topdir_list_head, topdir) topdir_list; > >> > >> +TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list; > >> + > >> struct topdir { > >> TAILQ_ENTRY(topdir) list; > >> TAILQ_HEAD(clnt_list_head, clnt_info) clnt_list; > >> @@ -436,6 +442,100 @@ gssd_clnt_krb5_cb(int UNUSED(fd), short UNUSED(which), void *data) > >> handle_krb5_upcall(clp); > >> } > >> > >> +static int > >> +scan_active_thread_list(void) > >> +{ > >> + struct upcall_thread_info *info; > >> + struct timespec now; > >> + unsigned int sleeptime; > >> + bool sleeptime_set = false; > >> + int err; > >> + void *tret, *saveprev; > >> + > >> + sleeptime = upcall_timeout; > >> + pthread_mutex_lock(&active_thread_list_lock); > >> + clock_gettime(CLOCK_MONOTONIC, &now); > >> + TAILQ_FOREACH(info, &active_thread_list, list) { > >> + err = pthread_tryjoin_np(info->tid, &tret); > >> + switch (err) { > >> + case 0: > >> + if (tret == PTHREAD_CANCELED) > >> + printerr(3, "watchdog: thread id 0x%lx cancelled successfully\n", > >> + info->tid); > >> + saveprev = info->list.tqe_prev; > >> + TAILQ_REMOVE(&active_thread_list, info, list); > >> + free(info); > >> + info = saveprev; > >> + break; > >> + case EBUSY: > >> + if (now.tv_sec >= info->timeout.tv_sec) { > >> + if (cancel_timed_out_upcalls && !info->cancelled) { > >> + printerr(0, "watchdog: thread id 0x%lx timed out\n", > >> + info->tid); > >> + pthread_cancel(info->tid); > >> + info->cancelled = true; > >> + do_error_downcall(info->fd, info->uid, -ETIMEDOUT); > >> + } else { > >> + printerr(0, "watchdog: thread id 0x%lx running for %ld seconds\n", > >> + info->tid, > >> + now.tv_sec - info->timeout.tv_sec + upcall_timeout); > > If people are going to used the -C flag they are saying they want > > to ignore hung threads so I'm thinking with printerr(0) we would > > be filling up their logs about messages they don't care about. > > So I'm thinking we should change this to a printerr(1) > > > >> + } > >> + } else if (!sleeptime_set) { > >> + sleeptime = info->timeout.tv_sec - now.tv_sec; > >> + sleeptime_set = true; > >> + } > >> + break; > >> + default: > >> + printerr(0, "watchdog: attempt to join thread id 0x%lx returned %d (%s)!\n", > >> + info->tid, err, strerror(err)); > >> + break; > >> + } > >> + } > >> + pthread_mutex_unlock(&active_thread_list_lock); > >> + > >> + return sleeptime; > >> +} > >> + > >> +static void * > >> +watchdog_thread_fn(void *UNUSED(arg)) > >> +{ > >> + unsigned int sleeptime; > >> + > >> + for (;;) { > >> + sleeptime = scan_active_thread_list(); > >> + printerr(4, "watchdog: sleeping %u secs\n", sleeptime); > >> + sleep(sleeptime); > >> + } > >> + return (void *)0; > >> +} > >> + > >> +static int > >> +start_watchdog_thread(void) > >> +{ > >> + pthread_attr_t attr; > >> + pthread_t th; > >> + int ret; > >> + > >> + ret = pthread_attr_init(&attr); > >> + if (ret != 0) { > >> + printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n", > >> + ret, strerror(errno)); > >> + return ret; > >> + } > >> + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); > >> + if (ret != 0) { > >> + printerr(0, "ERROR: failed to create pthread attr: ret %d: %s\n", > >> + ret, strerror(errno)); > >> + return ret; > >> + } > >> + ret = pthread_create(&th, &attr, watchdog_thread_fn, NULL); > >> + if (ret != 0) { > >> + printerr(0, "ERROR: pthread_create failed: ret %d: %s\n", > >> + ret, strerror(errno)); > >> + } > >> + return ret; > >> +} > >> + > >> static struct clnt_info * > >> gssd_get_clnt(struct topdir *tdi, const char *name) > >> { > >> @@ -825,7 +925,7 @@ sig_die(int signal) > >> static void > >> usage(char *progname) > >> { > >> - fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H]\n", > >> + fprintf(stderr, "usage: %s [-f] [-l] [-M] [-n] [-v] [-r] [-p pipefsdir] [-k keytab] [-d ccachedir] [-t timeout] [-R preferred realm] [-D] [-H] [-U upcall timeout] [-C]\n", > >> progname); > >> exit(1); > >> } > >> @@ -846,6 +946,9 @@ read_gss_conf(void) > >> #endif > >> context_timeout = conf_get_num("gssd", "context-timeout", context_timeout); > >> rpc_timeout = conf_get_num("gssd", "rpc-timeout", rpc_timeout); > >> + upcall_timeout = conf_get_num("gssd", "upcall-timeout", upcall_timeout); > >> + cancel_timed_out_upcalls = conf_get_bool("gssd", "cancel-timed-out-upcalls", > >> + cancel_timed_out_upcalls); > >> s = conf_get_str("gssd", "pipefs-directory"); > >> if (!s) > >> s = conf_get_str("general", "pipefs-directory"); > >> @@ -887,7 +990,7 @@ main(int argc, char *argv[]) > >> verbosity = conf_get_num("gssd", "verbosity", verbosity); > >> rpc_verbosity = conf_get_num("gssd", "rpc-verbosity", rpc_verbosity); > >> > >> - while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:")) != -1) { > >> + while ((opt = getopt(argc, argv, "HDfvrlmnMp:k:d:t:T:R:U:C")) != -1) { > >> switch (opt) { > >> case 'f': > >> fg = 1; > >> @@ -938,6 +1041,11 @@ main(int argc, char *argv[]) > >> case 'H': > >> set_home = false; > >> break; > >> + case 'U': > >> + upcall_timeout = atoi(optarg); > >> + break; > >> + case 'C': > >> + cancel_timed_out_upcalls = false; > >> default: > >> usage(argv[0]); > >> break; > >> @@ -1010,6 +1118,11 @@ main(int argc, char *argv[]) > >> else > >> progname = argv[0]; > >> > >> + if (upcall_timeout > MAX_UPCALL_TIMEOUT) > >> + upcall_timeout = MAX_UPCALL_TIMEOUT; > >> + else if (upcall_timeout < MIN_UPCALL_TIMEOUT) > >> + upcall_timeout = MIN_UPCALL_TIMEOUT; > >> + > >> initerr(progname, verbosity, fg); > >> #ifdef HAVE_LIBTIRPC_SET_DEBUG > >> /* > >> @@ -1068,6 +1181,14 @@ main(int argc, char *argv[]) > >> } > >> event_add(inotify_ev, NULL); > >> > >> + TAILQ_INIT(&active_thread_list); > >> + > >> + rc = start_watchdog_thread(); > >> + if (rc != 0) { > >> + printerr(0, "ERROR: failed to start watchdog thread: %d\n", rc); > >> + exit(EXIT_FAILURE); > >> + } > >> + > >> TAILQ_INIT(&topdir_list); > >> gssd_scan(); > >> daemon_ready(); > >> diff --git a/utils/gssd/gssd.h b/utils/gssd/gssd.h > >> index 6d53647e..ad0d1d93 100644 > >> --- a/utils/gssd/gssd.h > >> +++ b/utils/gssd/gssd.h > >> @@ -50,6 +50,11 @@ > >> #define GSSD_DEFAULT_KEYTAB_FILE "/etc/krb5.keytab" > >> #define GSSD_SERVICE_NAME "nfs" > >> #define RPC_CHAN_BUF_SIZE 32768 > >> + > > I think we should add a "/* seconds */" > > so you don't have dig in the code > > to see what interval is > >> +#define MIN_UPCALL_TIMEOUT 5 > >> +#define DEF_UPCALL_TIMEOUT 30 > >> +#define MAX_UPCALL_TIMEOUT 600 > >> + > >> /* > >> * The gss mechanisms that we can handle > >> */ > >> @@ -91,10 +96,20 @@ struct clnt_upcall_info { > >> char *service; > >> }; > >> > >> +struct upcall_thread_info { > >> + TAILQ_ENTRY(upcall_thread_info) list; > >> + pthread_t tid; > >> + struct timespec timeout; > >> + uid_t uid; > >> + int fd; > >> + bool cancelled; > >> +}; > >> + > >> void handle_krb5_upcall(struct clnt_info *clp); > >> void handle_gssd_upcall(struct clnt_info *clp); > >> void free_upcall_info(struct clnt_upcall_info *info); > >> void gssd_free_client(struct clnt_info *clp); > >> +int do_error_downcall(int k5_fd, uid_t uid, int err); > >> > >> > >> #endif /* _RPC_GSSD_H_ */ > >> diff --git a/utils/gssd/gssd.man b/utils/gssd/gssd.man > >> index 9ae6def9..20fea729 100644 > >> --- a/utils/gssd/gssd.man > >> +++ b/utils/gssd/gssd.man > >> @@ -8,7 +8,7 @@ > >> rpc.gssd \- RPCSEC_GSS daemon > >> .SH SYNOPSIS > >> .B rpc.gssd > >> -.RB [ \-DfMnlvrH ] > >> +.RB [ \-DfMnlvrHC ] > >> .RB [ \-k > >> .IR keytab ] > >> .RB [ \-p > >> @@ -17,6 +17,10 @@ rpc.gssd \- RPCSEC_GSS daemon > >> .IR ccachedir ] > >> .RB [ \-t > >> .IR timeout ] > >> +.RB [ \-T > >> +.IR timeout ] > >> +.RB [ \-U > >> +.IR timeout ] > >> .RB [ \-R > >> .IR realm ] > >> .SH INTRODUCTION > >> @@ -275,7 +279,7 @@ seconds, which allows changing Kerberos tickets and identities frequently. > >> The default is no explicit timeout, which means the kernel context will live > >> the lifetime of the Kerberos service ticket used in its creation. > >> .TP > >> -.B -T timeout > >> +.BI "-T " timeout > >> Timeout, in seconds, to create an RPC connection with a server while > >> establishing an authenticated gss context for a user. > >> The default timeout is set to 5 seconds. > >> @@ -283,6 +287,18 @@ If you get messages like "WARNING: can't create tcp rpc_clnt to server > >> %servername% for user with uid %uid%: RPC: Remote system error - > >> Connection timed out", you should consider an increase of this timeout. > >> .TP > >> +.BI "-U " timeout > >> +Timeout, in seconds, for upcall threads. Threads executing longer than > >> +.I timeout > >> +seconds will be cancelled and an error of -ETIMEDOUT will be reported > >> +to the kernel. The default > >> +.I timeout > >> +is 30 seconds. The minimum is 5 seconds. The maximum is 600 seconds. > >> +.TP > >> +.B -C > >> +Instead of cancelling upcall threads that have timed out, cause an error > >> +message to be logged to the syslog (no error will be reported to the kernel). > >> +.TP > >> .B -H > >> Avoids setting $HOME to "/". This allows rpc.gssd to read per user k5identity > >> files versus trying to read /.k5identity for each user. > >> @@ -350,6 +366,17 @@ Equivalent to > >> Equivalent to > >> .BR -R . > >> .TP > >> +.B upcall-timeout > >> +Equivalent to > >> +.BR -U . > >> +.TP > >> +.B cancel-timed-out-upcalls > >> +Setting to > >> +.B false > >> +is equivalent to providing the > >> +.B -C > >> +flag. > >> +.TP > >> .B set-home > >> Setting to > >> .B false > >> diff --git a/utils/gssd/gssd_proc.c b/utils/gssd/gssd_proc.c > >> index ba508b30..ac7b1d1c 100644 > >> --- a/utils/gssd/gssd_proc.c > >> +++ b/utils/gssd/gssd_proc.c > >> @@ -81,11 +81,23 @@ > >> #include "gss_names.h" > >> > >> extern pthread_mutex_t clp_lock; > >> +extern pthread_mutex_t active_thread_list_lock; > >> +extern int upcall_timeout; > >> +extern TAILQ_HEAD(active_thread_list_head, upcall_thread_info) active_thread_list; > >> > >> /* Encryption types supported by the kernel rpcsec_gss code */ > >> int num_krb5_enctypes = 0; > >> krb5_enctype *krb5_enctypes = NULL; > >> > >> +struct cleanup_args { > >> + OM_uint32 *min_stat; > >> + gss_buffer_t acceptor; > >> + gss_buffer_t token; > >> + struct authgss_private_data *pd; > >> + AUTH **auth; > >> + CLIENT **rpc_clnt; > >> +}; > >> + > >> /* > >> * Parse the supported encryption type information > >> */ > >> @@ -184,7 +196,7 @@ out_err: > >> return; > >> } > >> > >> -static int > >> +int > >> do_error_downcall(int k5_fd, uid_t uid, int err) > >> { > >> char buf[1024]; > >> @@ -607,14 +619,37 @@ out: > >> return auth; > >> } > >> > >> +static void > >> +cleanup_handler(void *arg) > >> +{ > >> + struct cleanup_args *args = (struct cleanup_args *)arg; > >> + > >> + gss_release_buffer(args->min_stat, args->acceptor); > >> + if (args->token->value) > >> + free(args->token->value); > >> +#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA > >> + if (args->pd->pd_ctx_hndl.length != 0 || args->pd->pd_ctx != 0) > >> + authgss_free_private_data(args->pd); > >> +#endif > >> + if (*args->auth) > >> + AUTH_DESTROY(*args->auth); > >> + if (*args->rpc_clnt) > >> + clnt_destroy(*args->rpc_clnt); > >> +} > >> + > >> /* > >> * this code uses the userland rpcsec gss library to create a krb5 > >> * context on behalf of the kernel > >> */ > >> static void > >> -process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, > >> - char *tgtname, char *service) > >> +process_krb5_upcall(struct clnt_upcall_info *info) > >> { > >> + struct clnt_info *clp = info->clp; > >> + uid_t uid = info->uid; > >> + int fd = info->fd; > >> + char *srchost = info->srchost; > >> + char *tgtname = info->target; > >> + char *service = info->service; > >> CLIENT *rpc_clnt = NULL; > >> AUTH *auth = NULL; > >> struct authgss_private_data pd; > >> @@ -624,11 +659,13 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, > >> gss_name_t gacceptor = GSS_C_NO_NAME; > >> gss_OID mech; > >> gss_buffer_desc acceptor = {0}; > >> + struct cleanup_args cleanup_args = {&min_stat, &acceptor, &token, &pd, &auth, &rpc_clnt}; > >> > >> token.length = 0; > >> token.value = NULL; > >> memset(&pd, 0, sizeof(struct authgss_private_data)); > >> > >> + pthread_cleanup_push(cleanup_handler, &cleanup_args); > >> /* > >> * If "service" is specified, then the kernel is indicating that > >> * we must use machine credentials for this request. (Regardless > >> @@ -650,6 +687,7 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, > >> * used for this case is not important. > >> * > >> */ > >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); > >> if (uid != 0 || (uid == 0 && root_uses_machine_creds == 0 && > >> service == NULL)) { > >> > >> @@ -670,15 +708,21 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, > >> goto out_return_error; > >> } > >> } > >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); > >> + pthread_testcancel(); > >> > >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); > >> if (!authgss_get_private_data(auth, &pd)) { > >> printerr(1, "WARNING: Failed to obtain authentication " > >> "data for user with uid %d for server %s\n", > >> uid, clp->servername); > >> goto out_return_error; > >> } > >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); > >> + pthread_testcancel(); > >> > >> /* Grab the context lifetime and acceptor name out of the ctx. */ > >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); > >> maj_stat = gss_inquire_context(&min_stat, pd.pd_ctx, NULL, &gacceptor, > >> &lifetime_rec, &mech, NULL, NULL, NULL); > >> > >> @@ -690,37 +734,35 @@ process_krb5_upcall(struct clnt_info *clp, uid_t uid, int fd, char *srchost, > >> get_hostbased_client_buffer(gacceptor, mech, &acceptor); > >> gss_release_name(&min_stat, &gacceptor); > >> } > >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); > >> + pthread_testcancel(); > >> > >> /* > >> * The serialization can mean turning pd.pd_ctx into a lucid context. If > >> * that happens then the pd.pd_ctx will be unusable, so we must never > >> * try to use it after this point. > >> */ > >> + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); > >> if (serialize_context_for_kernel(&pd.pd_ctx, &token, &krb5oid, NULL)) { > >> printerr(1, "WARNING: Failed to serialize krb5 context for " > >> "user with uid %d for server %s\n", > >> uid, clp->servername); > >> goto out_return_error; > >> } > >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); > >> + pthread_testcancel(); > >> > >> do_downcall(fd, uid, &pd, &token, lifetime_rec, &acceptor); > >> > >> out: > >> - gss_release_buffer(&min_stat, &acceptor); > >> - if (token.value) > >> - free(token.value); > >> -#ifdef HAVE_AUTHGSS_FREE_PRIVATE_DATA > >> - if (pd.pd_ctx_hndl.length != 0 || pd.pd_ctx != 0) > >> - authgss_free_private_data(&pd); > >> -#endif > >> - if (auth) > >> - AUTH_DESTROY(auth); > >> - if (rpc_clnt) > >> - clnt_destroy(rpc_clnt); > >> + pthread_cleanup_pop(1); > >> > >> return; > >> > >> out_return_error: > >> + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); > >> + pthread_testcancel(); > >> + > >> do_error_downcall(fd, uid, downcall_err); > >> goto out; > >> } > >> @@ -786,38 +828,71 @@ void free_upcall_info(struct clnt_upcall_info *info) > >> } > >> > >> static void > >> -gssd_work_thread_fn(struct clnt_upcall_info *info) > >> +cleanup_clnt_upcall_info(void *arg) > >> { > >> - process_krb5_upcall(info->clp, info->uid, info->fd, info->srchost, info->target, info->service); > >> + struct clnt_upcall_info *info = (struct clnt_upcall_info *)arg; > >> + > >> free_upcall_info(info); > >> } > >> > >> +static void > >> +gssd_work_thread_fn(struct clnt_upcall_info *info) > >> +{ > >> + pthread_cleanup_push(cleanup_clnt_upcall_info, info); > >> + process_krb5_upcall(info); > >> + pthread_cleanup_pop(1); > >> +} > >> + > >> +static struct upcall_thread_info * > >> +alloc_upcall_thread_info(void) > >> +{ > >> + struct upcall_thread_info *info; > >> + > >> + info = malloc(sizeof(struct upcall_thread_info)); > >> + if (info == NULL) > >> + return NULL; > >> + memset(info, 0, sizeof(*info)); > >> + return info; > >> +} > >> + > >> static int > >> -start_upcall_thread(void (*func)(struct clnt_upcall_info *), void *info) > >> +start_upcall_thread(void (*func)(struct clnt_upcall_info *), struct clnt_upcall_info *info) > >> { > >> pthread_attr_t attr; > >> pthread_t th; > >> + struct upcall_thread_info *tinfo; > >> int ret; > >> > >> + tinfo = alloc_upcall_thread_info(); > >> + if (!tinfo) > >> + return -ENOMEM; > >> + tinfo->fd = info->fd; > >> + tinfo->uid = info->uid; > >> + tinfo->cancelled = false; > >> + > >> ret = pthread_attr_init(&attr); > >> if (ret != 0) { > >> printerr(0, "ERROR: failed to init pthread attr: ret %d: %s\n", > >> ret, strerror(errno)); > >> - return ret; > >> - } > >> - ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); > >> - if (ret != 0) { > >> - printerr(0, "ERROR: failed to create pthread attr: ret %d: " > >> - "%s\n", ret, strerror(errno)); > >> + free(tinfo); > >> return ret; > >> } > >> > >> ret = pthread_create(&th, &attr, (void *)func, (void *)info); > >> - if (ret != 0) > >> + if (ret != 0) { > >> printerr(0, "ERROR: pthread_create failed: ret %d: %s\n", > >> ret, strerror(errno)); > >> - else > >> - printerr(0, "created thread id 0x%lx\n", th); > >> + free(tinfo); > >> + return ret; > >> + } > >> + printerr(1, "created thread id 0x%lx\n", th); > > This will be removed... > >> + tinfo->tid = th; > >> + pthread_mutex_lock(&active_thread_list_lock); > >> + clock_gettime(CLOCK_MONOTONIC, &tinfo->timeout); > >> + tinfo->timeout.tv_sec += upcall_timeout; > >> + TAILQ_INSERT_TAIL(&active_thread_list, tinfo, list); > >> + pthread_mutex_unlock(&active_thread_list_lock); > >> + > >> return ret; > >> } > >> > >> > > > > Overall I think the code is very well written with > > one exception... The lack of comments. I think it > > would be very useful to let the reader know what > > you are doing and why.... But by no means is > > that a show stopper. Nice work! > > > > I'm still doing more testing and nothing > > is breaking... So it is looking pretty good! > > > > steved. > > > > -- > Chuck Lever > > >