Re: [patch][rfc] lockdep: annotate reclaim context (__GFP_NOFS)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2009-01-20 at 09:39 +0100, Nick Piggin wrote:
> Hi,
> 
> I took a bit of time to clean this up since I posted the RFC.
> 
> I don't really know the lockdep code much, so this is about as
> far as I get without asking for review. I don't know if this is
> considered useful, but if it is, then maybe we can merge it and
> then fill in the bits for annotating other reclaim contexts.
> 
> The only problem is the lock usage character string won't scale
> well with more lock contexts. Why not just print out the hex
> value of the flags? Simple and easy to decode and extend.

Right, except that people already have trouble reading lockdep output..
But I see your problem, this state stuff doesn't scale too well
currently.

> ---
> 
> After noticing some code in mm/filemap.c accidentally perform a __GFP_FS
> allocation when it should not have been, I thought it might be a good idea to
> try to catch this kind of thing with lockdep.
> 
> I coded up a little idea that seems to work. It reuses the interrupt context
> discovery and annotation mechanism to reclaim contexts in order to discover
> possible deadlocks without having to actually hit them. If a lock is held
> while performing a __GFP_FS allocation, then that lock must not be taken
> during __GFP_FS reclaim. And vice versa.
> 
> Further possibilities: __GFP_IO, and __GFP_WAIT (without IO or FS). But
> filesystems are probably the most complicated with tricky locking, so let's
> start here first.
> 
> Example output:
> =================================
> [ INFO: inconsistent lock state ]
> 2.6.28-rc6-00007-ged31348-dirty #26
> ---------------------------------
> inconsistent {in-reclaim-W} -> {ov-reclaim-W} usage.
> modprobe/8526 [HC0[0]:SC0[0]:HE1:SE1] takes:
>  (testlock){--..}, at: [<ffffffffa0020055>] brd_init+0x55/0x216 [brd]

Except that with the below patch that would have had to have had 6 usage
chars :-)

> Signed-off-by: Nick Piggin <npiggin@xxxxxxx>
> ---
>  include/linux/lockdep.h |   14 +++
>  include/linux/sched.h   |    1 
>  kernel/lockdep.c        |  186 ++++++++++++++++++++++++++++++++++++++++++++----
>  mm/page_alloc.c         |   13 +++
>  4 files changed, 201 insertions(+), 13 deletions(-)

 
> +#define LOCKDEP_PF_RECLAIM_FS_BIT	1	/* Process is with a GFP_FS
> +						 * allocation context */
> +

s/with/&in/

> Index: linux-2.6/kernel/lockdep.c
> ===================================================================
> --- linux-2.6.orig/kernel/lockdep.c	2009-01-20 18:36:58.000000000 +1100
> +++ linux-2.6/kernel/lockdep.c	2009-01-20 19:19:59.000000000 +1100

> @@ -454,6 +456,10 @@ static const char *usage_str[] =
>  	[LOCK_USED_IN_SOFTIRQ_READ] =	"in-softirq-R",
>  	[LOCK_ENABLED_SOFTIRQS_READ] =	"softirq-on-R",
>  	[LOCK_ENABLED_HARDIRQS_READ] =	"hardirq-on-R",
> +	[LOCK_USED_IN_RECLAIM_FS] =	"in-reclaim-W",
> +	[LOCK_USED_IN_RECLAIM_FS_READ] = "in-reclaim-R",
> +	[LOCK_HELD_OVER_RECLAIM_FS] =	"ov-reclaim-W",
> +	[LOCK_HELD_OVER_RECLAIM_FS_READ] = "ov-reclaim-R",
>  };
>  
>  const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
> @@ -462,9 +468,10 @@ const char * __get_key_name(struct lockd
>  }
>  
>  void
> -get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
> +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3,
> +					char *c4, char *c5, char *c6)
>  {
> -	*c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
> +	*c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.', *c5 = '.', *c6 = '.';
>  
>  	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
>  		*c1 = '+';
> @@ -493,14 +500,29 @@ get_usage_chars(struct lock_class *class
>  		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
>  			*c4 = '?';
>  	}
> +
> +	if (class->usage_mask & LOCKF_USED_IN_RECLAIM_FS)
> +		*c5 = '+';
> +	else
> +		if (class->usage_mask & LOCKF_HELD_OVER_RECLAIM_FS)
> +			*c5 = '-';
> +
> +	if (class->usage_mask & LOCKF_HELD_OVER_RECLAIM_FS_READ)
> +		*c6 = '-';
> +	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {

s/SOFTIRQ/RECLAIM_FS/

> +		*c6 = '+';
> +		if (class->usage_mask & LOCKF_HELD_OVER_RECLAIM_FS_READ)
> +			*c6 = '?';
> +	}
> +
>  }
>  
>  static void print_lock_name(struct lock_class *class)
>  {
> -	char str[KSYM_NAME_LEN], c1, c2, c3, c4;
> +	char str[KSYM_NAME_LEN], c1, c2, c3, c4, c5, c6;
>  	const char *name;
>  
> -	get_usage_chars(class, &c1, &c2, &c3, &c4);
> +	get_usage_chars(class, &c1, &c2, &c3, &c4, &c5, &c6);
>  
>  	name = class->name;
>  	if (!name) {
> @@ -513,7 +535,7 @@ static void print_lock_name(struct lock_
>  		if (class->subclass)
>  			printk("/%d", class->subclass);
>  	}
> -	printk("){%c%c%c%c}", c1, c2, c3, c4);
> +	printk("){%c%c%c%c%c%c}", c1, c2, c3, c4, c5, c6);
>  }
>  
>  static void print_lockdep_cache(struct lockdep_map *lock)

I think you missed a change to check_prev_add_irq()

> @@ -1949,6 +1971,14 @@ static int softirq_verbose(struct lock_c
>  	return 0;
>  }
>  
> +static int reclaim_verbose(struct lock_class *class)
> +{
> +#if RECLAIM_VERBOSE
> +	return class_filter(class);
> +#endif
> +	return 0;
> +}
> +
>  #define STRICT_READ_CHECKS	1
>  
>  static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
> @@ -2007,6 +2037,31 @@ static int mark_lock_irq(struct task_str
>  		if (softirq_verbose(hlock_class(this)))
>  			ret = 2;
>  		break;
> +	case LOCK_USED_IN_RECLAIM_FS:
> +		if (!valid_state(curr, this, new_bit, LOCK_HELD_OVER_RECLAIM_FS))
> +			return 0;
> +		if (!valid_state(curr, this, new_bit,
> +				 LOCK_HELD_OVER_RECLAIM_FS_READ))
> +			return 0;
> +		/*
> +		 * just marked it reclaim-fs-safe, check that this lock
> +		 * took no reclaim-fs-unsafe lock in the past:
> +		 */
> +		if (!check_usage_forwards(curr, this,
> +					  LOCK_HELD_OVER_RECLAIM_FS, "reclaim-fs"))
> +			return 0;
> +#if STRICT_READ_CHECKS
> +		/*
> +		 * just marked it reclaim-fs-safe, check that this lock
> +		 * took no reclaim-fs-unsafe-read lock in the past:
> +		 */
> +		if (!check_usage_forwards(curr, this,
> +				LOCK_HELD_OVER_RECLAIM_FS_READ, "reclaim-fs-read"))
> +			return 0;
> +#endif
> +		if (reclaim_verbose(hlock_class(this)))
> +			ret = 2;
> +		break;
>  	case LOCK_USED_IN_HARDIRQ_READ:
>  		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
>  			return 0;
> @@ -2033,6 +2088,19 @@ static int mark_lock_irq(struct task_str
>  		if (softirq_verbose(hlock_class(this)))
>  			ret = 2;
>  		break;
> +	case LOCK_USED_IN_RECLAIM_FS_READ:
> +		if (!valid_state(curr, this, new_bit, LOCK_HELD_OVER_RECLAIM_FS))
> +			return 0;
> +		/*
> +		 * just marked it reclaim-fs-read-safe, check that this lock
> +		 * took no reclaim-fs-unsafe lock in the past:
> +		 */
> +		if (!check_usage_forwards(curr, this,
> +					  LOCK_HELD_OVER_RECLAIM_FS, "reclaim-fs"))
> +			return 0;
> +		if (reclaim_verbose(hlock_class(this)))
> +			ret = 2;
> +		break;
>  	case LOCK_ENABLED_HARDIRQS:
>  		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
>  			return 0;
> @@ -2085,6 +2153,32 @@ static int mark_lock_irq(struct task_str
>  		if (softirq_verbose(hlock_class(this)))
>  			ret = 2;
>  		break;
> +	case LOCK_HELD_OVER_RECLAIM_FS:
> +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_RECLAIM_FS))
> +			return 0;
> +		if (!valid_state(curr, this, new_bit,
> +				 LOCK_USED_IN_RECLAIM_FS_READ))
> +			return 0;
> +		/*
> +		 * just marked it reclaim-fs-unsafe, check that no reclaim-fs-safe
> +		 * lock in the system ever took it in the past:
> +		 */
> +		if (!check_usage_backwards(curr, this,
> +					   LOCK_USED_IN_RECLAIM_FS, "reclaim-fs"))
> +			return 0;
> +#if STRICT_READ_CHECKS
> +		/*
> +		 * just marked it softirq-unsafe, check that no
> +		 * softirq-safe-read lock in the system ever took
> +		 * it in the past:
> +		 */
> +		if (!check_usage_backwards(curr, this,
> +				   LOCK_USED_IN_RECLAIM_FS_READ, "reclaim-fs-read"))
> +			return 0;
> +#endif
> +		if (reclaim_verbose(hlock_class(this)))
> +			ret = 2;
> +		break;
>  	case LOCK_ENABLED_HARDIRQS_READ:
>  		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
>  			return 0;
> @@ -2115,6 +2209,21 @@ static int mark_lock_irq(struct task_str
>  		if (softirq_verbose(hlock_class(this)))
>  			ret = 2;
>  		break;
> +	case LOCK_HELD_OVER_RECLAIM_FS_READ:
> +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_RECLAIM_FS))
> +			return 0;
> +#if STRICT_READ_CHECKS
> +		/*
> +		 * just marked it reclaim-fs-read-unsafe, check that no
> +		 * reclaim-fs-safe lock in the system ever took it in the past:
> +		 */
> +		if (!check_usage_backwards(curr, this,
> +					   LOCK_USED_IN_RECLAIM_FS, "reclaim-fs"))
> +			return 0;
> +#endif
> +		if (reclaim_verbose(hlock_class(this)))
> +			ret = 2;
> +		break;
>  	default:
>  		WARN_ON(1);
>  		break;

This function (mark_lock_irq) just begs to be generated, it should
basically read something like this:

#define MARK_LOCK_CASE(__NEW, __EXCL)			\
	case __NEW:					\
		if (!valid_state(,,, __EXCL))		\
			return 0;			\
		if (!valid_state(,,, __EXCL##_READ)) 	\
			return 0;			\
		if (!check_usage_forwards(,, __EXCL))	\
			return 0;			\
		MARK_LOCK_STRICT_READ(__EXCL);		\
		break;					\
	case __NEW##_READ:				\
		if (!valid_state(,,, __EXCL))		\
			return 0;			\
		if (!check_usage_forwards(,, __EXCL))	\
			return 0;			\
		break;

#define MARK_LOCK_IRQ(__STATE)						\
	MARK_LOCK_CASE(LOCK_USED_IN_##__STATE, LOCK_ENABLED_##__STATE)	\
	MARK_LOCK_CASE(LOCK_ENABLED_##__STATE, LOCK_USED_IN_##__STATE)


mark_lock_irq()
{
	switch (new_bit) {
	MARK_LOCK_IRQ(HARDIRQ)
	MARK_LOCK_IRQ(SOFTIRQ)
	MARK_LOCK_IRQ(RECLAIM_FS)
	}
}

You could go one step further and generate more by doing that I did with
sched_features.h, that way you could generate all those flags in
lockdep.h as well, and adding a state is just 1 line in a header.

Lets call it lockdep_state.h

> @@ -2123,11 +2232,17 @@ static int mark_lock_irq(struct task_str
>  	return ret;
>  }
>  
> +enum mark_type {
> +	HARDIRQ,
> +	SOFTIRQ,
> +	RECLAIM_FS,
> +};
> +
>  /*
>   * Mark all held locks with a usage bit:
>   */
>  static int
> -mark_held_locks(struct task_struct *curr, int hardirq)
> +mark_held_locks(struct task_struct *curr, enum mark_type mark)
>  {
>  	enum lock_usage_bit usage_bit;
>  	struct held_lock *hlock;
> @@ -2136,17 +2251,32 @@ mark_held_locks(struct task_struct *curr
>  	for (i = 0; i < curr->lockdep_depth; i++) {
>  		hlock = curr->held_locks + i;
>  
> -		if (hardirq) {
> +		switch (mark) {
> +		case HARDIRQ:
>  			if (hlock->read)
>  				usage_bit = LOCK_ENABLED_HARDIRQS_READ;
>  			else
>  				usage_bit = LOCK_ENABLED_HARDIRQS;
> -		} else {
> +			break;
> +
> +		case SOFTIRQ:
>  			if (hlock->read)
>  				usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
>  			else
>  				usage_bit = LOCK_ENABLED_SOFTIRQS;
> +			break;
> +
> +		case RECLAIM_FS:
> +			if (hlock->read)
> +				usage_bit = LOCK_HELD_OVER_RECLAIM_FS_READ;
> +			else
> +				usage_bit = LOCK_HELD_OVER_RECLAIM_FS;
> +			break;

Would then read something like:

#define LOCKDEP_STATE(__STATE)					\
case __STATE:							\
	if (hlock->read)					\
		usage_bit = LOCK_ENABLED_##__STATE##_READ;	\
	else							\
		usage_bit = LOCK_ENABLED_##__STATE;

#include "lockdep_state.h"
#undef LOCKDEP_STATE

> +		default:
> +			BUG();
>  		}
> +
>  		if (!mark_lock(curr, hlock, usage_bit))
>  			return 0;
>  	}
> @@ -2200,7 +2330,7 @@ void trace_hardirqs_on_caller(unsigned l
>  	 * We are going to turn hardirqs on, so set the
>  	 * usage bit for all held locks:
>  	 */
> -	if (!mark_held_locks(curr, 1))
> +	if (!mark_held_locks(curr, HARDIRQ))
>  		return;
>  	/*
>  	 * If we have softirqs enabled, then set the usage
> @@ -2208,7 +2338,7 @@ void trace_hardirqs_on_caller(unsigned l
>  	 * this bit from being set before)
>  	 */
>  	if (curr->softirqs_enabled)
> -		if (!mark_held_locks(curr, 0))
> +		if (!mark_held_locks(curr, SOFTIRQ))
>  			return;
>  
>  	curr->hardirq_enable_ip = ip;
> @@ -2288,7 +2418,7 @@ void trace_softirqs_on(unsigned long ip)
>  	 * enabled too:
>  	 */
>  	if (curr->hardirqs_enabled)
> -		mark_held_locks(curr, 0);
> +		mark_held_locks(curr, SOFTIRQ);
>  }
>  
>  /*
> @@ -2317,6 +2447,18 @@ void trace_softirqs_off(unsigned long ip
>  		debug_atomic_inc(&redundant_softirqs_off);
>  }
>  
> +void trace_reclaim_fs(void)
> +{
> +	struct task_struct *curr = current;
> +
> +	if (unlikely(!debug_locks))
> +		return;
> +	if (DEBUG_LOCKS_WARN_ON(irqs_disabled()))
> +		return;
> +
> +	mark_held_locks(curr, RECLAIM_FS);
> +}
> +
>  static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
>  {
>  	/*
> @@ -2362,6 +2504,22 @@ static int mark_irqflags(struct task_str
>  		}
>  	}
>  
> +	/*
> +	 * We reuse the irq context infrastructure more broadly as a general
> +	 * context checking code. This tests GFP_FS recursion (a lock taken
> +	 * during reclaim for a GFP_FS allocation is held over a GFP_FS
> +	 * allocation).
> +	 */
> +	if (!hlock->trylock && test_bit(LOCKDEP_PF_RECLAIM_FS_BIT,
> +							&curr->lockdep_flags)) {
> +		if (hlock->read)
> +			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
> +					return 0;
> +		else
> +			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
> +					return 0;
> +	}
> +
>  	return 1;
>  }
>  
> @@ -2453,6 +2611,10 @@ static int mark_lock(struct task_struct 
>  	case LOCK_ENABLED_SOFTIRQS:
>  	case LOCK_ENABLED_HARDIRQS_READ:
>  	case LOCK_ENABLED_SOFTIRQS_READ:
> +	case LOCK_USED_IN_RECLAIM_FS:
> +	case LOCK_USED_IN_RECLAIM_FS_READ:
> +	case LOCK_HELD_OVER_RECLAIM_FS:
> +	case LOCK_HELD_OVER_RECLAIM_FS_READ:

#define LOCKDEP_STATE(__STATE)			\
	case LOCK_USED_IN_##__STATE:		\
	case LOCK_USED_IN_##__STATE##_READ:	\
	case LOCK_ENABLED_##__STATE:		\
	case LOCK_ENABLED_##__STATE##_READ:
#include "lockdep_state.h"
#undef LOCKDEP_STATE

>  		ret = mark_lock_irq(curr, this, new_bit);
>  		if (!ret)
>  			return 0;
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c	2009-01-20 18:36:58.000000000 +1100
> +++ linux-2.6/mm/page_alloc.c	2009-01-20 18:37:50.000000000 +1100
> @@ -1479,6 +1479,11 @@ __alloc_pages_internal(gfp_t gfp_mask, u
>  	unsigned long did_some_progress;
>  	unsigned long pages_reclaimed = 0;
>  
> +#ifdef CONFIG_LOCKDEP
> +	if ((gfp_mask & (__GFP_WAIT|__GFP_FS)) == (__GFP_WAIT|__GFP_FS) && !(p->flags & PF_MEMALLOC))
> +		trace_reclaim_fs();
> +#endif
> +
>  	might_sleep_if(wait);
>  
>  	if (should_fail_alloc_page(gfp_mask, order))
> @@ -1578,12 +1583,20 @@ nofail_alloc:
>  	 */
>  	cpuset_update_task_memory_state();
>  	p->flags |= PF_MEMALLOC;
> +#ifdef CONFIG_LOCKDEP
> +	if ((gfp_mask & (__GFP_WAIT|__GFP_FS)) == (__GFP_WAIT|__GFP_FS))
> +		set_bit(LOCKDEP_PF_RECLAIM_FS_BIT, &p->lockdep_flags);
> +#endif

it's impossible to get here without __GFP_WAIT.

>  	reclaim_state.reclaimed_slab = 0;
>  	p->reclaim_state = &reclaim_state;
>  
>  	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
>  
>  	p->reclaim_state = NULL;
> +#ifdef CONFIG_LOCKDEP
> +	if ((gfp_mask & (__GFP_WAIT|__GFP_FS)) == (__GFP_WAIT|__GFP_FS))
> +		clear_bit(LOCKDEP_PF_RECLAIM_FS_BIT, &p->lockdep_flags);
> +#endif
>  	p->flags &= ~PF_MEMALLOC;
>  
>  	cond_resched();

#ifdef CONFIG_PROVE_LOCKING
static inline void lockdep_set_gfp_state(gfp_t gfp_mask)
{
	current->lockdep_gfp = gfp_mask;
}

static inline void lockdep_clear_gfp_state(void)
{
	current->lockdep_gfp = 0;
}
#else
static inline void lockdep_set_gfp_state(gfp_t gfp_mask)
{
}

static inline void lockdep_clear_gfp_state(void)
{
}
#endif

?

Maybe also a lockdep_trace_gfp(gfp_t), hmm?

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux