Re: [RFC PATCH] libmultipath: prevent DSO unloading with astray checker threads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 05, 2020 at 12:49:52PM +0100, mwilck@xxxxxxxx wrote:
> From: Martin Wilck <mwilck@xxxxxxxx>
> 
> The multipathd tur checker thread is designed to be able to finish at
> any time, even after the tur checker itself has been freed. The
> multipathd shutdown code makes sure all the checkers have been freed
> before freeing the checker_class and calling dlclose() to unload the
> DSO, but this doesn't guarantee that the checker threads have finished.
> If one hasn't, the DSO will get unloaded while the thread still running
> code from it, causing a segfault.
> 
> This patch fixes the issue by further incrementing the DSO's refcount
> for every running thread. To avoid race conditions leading to segfaults,
> the thread's entrypoint must be in libmultipath, not in the DSO itself.
> Therefore we add a new optional checker method, libcheck_thread().
> Checkers defining this method may create a detached thread with
> entrypoint checker_thread_entry(), which will call the DSO's
> libcheck_thread and take care of the refcount handling.

I can't make this segfault. So that looks good, but it does need
libmultipath.version updated to include checker_thread_entry()

-Ben

> 
> Reported-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx>
> Signed-off-by: Martin Wilck <mwilck@xxxxxxxx>
> ---
>  libmultipath/checkers.c     | 55 ++++++++++++++++++++++++++++++++-----
>  libmultipath/checkers.h     | 17 ++++++++++++
>  libmultipath/checkers/tur.c |  7 ++---
>  3 files changed, 67 insertions(+), 12 deletions(-)
> 
> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> index f7ddd53..7d10f2a 100644
> --- a/libmultipath/checkers.c
> +++ b/libmultipath/checkers.c
> @@ -3,6 +3,9 @@
>  #include <stddef.h>
>  #include <dlfcn.h>
>  #include <sys/stat.h>
> +#include <pthread.h>
> +#include <urcu.h>
> +#include <urcu/uatomic.h>
>  
>  #include "debug.h"
>  #include "checkers.h"
> @@ -19,6 +22,7 @@ struct checker_class {
>  	int (*mp_init)(struct checker *);    /* to allocate the mpcontext */
>  	void (*free)(struct checker *);      /* to free the context */
>  	void (*reset)(void);		     /* to reset the global variables */
> +	void *(*thread)(void *);	     /* async thread entry point */
>  	const char **msgtable;
>  	short msgtable_size;
>  };
> @@ -54,19 +58,32 @@ static struct checker_class *alloc_checker_class(void)
>  	c = MALLOC(sizeof(struct checker_class));
>  	if (c) {
>  		INIT_LIST_HEAD(&c->node);
> -		c->refcount = 1;
> +		uatomic_set(&c->refcount, 1);
>  	}
>  	return c;
>  }
>  
> +/* Use uatomic_{sub,add}_return() to ensure proper memory barriers */
> +static int checker_class_ref(struct checker_class *cls)
> +{
> +	return uatomic_add_return(&cls->refcount, 1);
> +}
> +
> +static int checker_class_unref(struct checker_class *cls)
> +{
> +	return uatomic_sub_return(&cls->refcount, 1);
> +}
> +
>  void free_checker_class(struct checker_class *c)
>  {
> +	int cnt;
> +
>  	if (!c)
>  		return;
> -	c->refcount--;
> -	if (c->refcount) {
> -		condlog(4, "%s checker refcount %d",
> -			c->name, c->refcount);
> +	cnt = checker_class_unref(c);
> +	if (cnt != 0) {
> +		condlog(cnt < 0 ? 1 : 4, "%s checker refcount %d",
> +			c->name, cnt);
>  		return;
>  	}
>  	condlog(3, "unloading %s checker", c->name);
> @@ -160,7 +177,8 @@ static struct checker_class *add_checker_class(const char *multipath_dir,
>  
>  	c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_mp_init");
>  	c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset");
> -	/* These 2 functions can be NULL. call dlerror() to clear out any
> +	c->thread = (void *(*)(void*)) dlsym(c->handle, "libcheck_thread");
> +	/* These 3 functions can be NULL. call dlerror() to clear out any
>  	 * error string */
>  	dlerror();
>  
> @@ -346,6 +364,29 @@ bad_id:
>  	return generic_msg[CHECKER_MSGID_NONE];
>  }
>  
> +static void checker_cleanup_thread(void *arg)
> +{
> +	struct checker_class *cls = arg;
> +
> +	(void)checker_class_unref(cls);
> +	rcu_unregister_thread();
> +}
> +
> +void *checker_thread_entry(void *arg)
> +{
> +	struct checker *chk = arg;
> +	void *rv;
> +
> +	assert(chk && chk->cls && chk->cls->thread);
> +
> +	rcu_register_thread();
> +	(void)checker_class_ref(chk->cls);
> +	pthread_cleanup_push(checker_cleanup_thread, chk->cls);
> +	rv = chk->cls->thread(chk->context);
> +	pthread_cleanup_pop(1);
> +	return rv;
> +}
> +
>  void checker_clear_message (struct checker *c)
>  {
>  	if (!c)
> @@ -370,7 +411,7 @@ void checker_get(const char *multipath_dir, struct checker *dst,
>  	if (!src)
>  		return;
>  
> -	src->refcount++;
> +	(void)checker_class_ref(dst->cls);
>  }
>  
>  int init_checkers(const char *multipath_dir)
> diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
> index 9d5f90b..01af02d 100644
> --- a/libmultipath/checkers.h
> +++ b/libmultipath/checkers.h
> @@ -148,6 +148,23 @@ void checker_set_async (struct checker *);
>  void checker_set_fd (struct checker *, int);
>  void checker_enable (struct checker *);
>  void checker_disable (struct checker *);
> +/*
> + * checker_thread_entry(): entry point for async path checker thread
> + *
> + * Path checkers that do I/O may hang forever. To avoid blocking, some
> + * checkers therefore use asyncronous, detached threads for checking
> + * the paths. These threads may continue hanging if multipathd is stopped.
> + * In this case, we can't unload the checker DSO at exit. In order to
> + * avoid race conditions and crashes, the entry point of the thread
> + * needs to be in libmultipath, not in the DSO itself.
> + * Checker threads must use this function as entry point. It will call
> + * the DSO's "libcheck_thread" function with the checker context as
> + * argument. When libcheck_thread() returns, it will clean up and
> + * decrement the DSO's refcount. See the tur checker for a usage example.
> + *
> + * @param arg: pointer to struct checker, must have non-NULL cls->thread
> + */
> +void *checker_thread_entry (void *);
>  int checker_check (struct checker *, int);
>  int checker_is_sync(const struct checker *);
>  const char *checker_name (const struct checker *);
> diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
> index e886fcf..063c419 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -15,7 +15,6 @@
>  #include <errno.h>
>  #include <sys/time.h>
>  #include <pthread.h>
> -#include <urcu.h>
>  #include <urcu/uatomic.h>
>  
>  #include "checkers.h"
> @@ -204,7 +203,6 @@ static void cleanup_func(void *data)
>  	holders = uatomic_sub_return(&ct->holders, 1);
>  	if (!holders)
>  		cleanup_context(ct);
> -	rcu_unregister_thread();
>  }
>  
>  /*
> @@ -251,7 +249,7 @@ static void tur_deep_sleep(const struct tur_checker_context *ct)
>  #define tur_deep_sleep(x) do {} while (0)
>  #endif /* TUR_TEST_MAJOR */
>  
> -static void *tur_thread(void *ctx)
> +void *libcheck_thread(void *ctx)
>  {
>  	struct tur_checker_context *ct = ctx;
>  	int state, running;
> @@ -259,7 +257,6 @@ static void *tur_thread(void *ctx)
>  
>  	/* This thread can be canceled, so setup clean up */
>  	tur_thread_cleanup_push(ct);
> -	rcu_register_thread();
>  
>  	condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
>  		minor(ct->devt));
> @@ -394,7 +391,7 @@ int libcheck_check(struct checker * c)
>  		uatomic_set(&ct->running, 1);
>  		tur_set_async_timeout(c);
>  		setup_thread_attr(&attr, 32 * 1024, 1);
> -		r = pthread_create(&ct->thread, &attr, tur_thread, ct);
> +		r = pthread_create(&ct->thread, &attr, checker_thread_entry, c);
>  		pthread_attr_destroy(&attr);
>  		if (r) {
>  			uatomic_sub(&ct->holders, 1);
> -- 
> 2.29.0

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux