On Mon, Nov 11 2019 at 8:59am -0500, Mikulas Patocka <mpatocka@xxxxxxxxxx> wrote: > Snapshot doesn't work with realtime kernels since the commit f79ae415b64c. > hlist_bl is implemented as a raw spinlock and the code takes two non-raw > spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes > in the realtime kernel, so they couldn't be taken inside a raw spinlock). > > This patch fixes the problem by using non-raw spinlock > exception_table_lock instead of the hlist_bl lock. > > Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> > Fixes: f79ae415b64c ("dm snapshot: Make exception tables scalable") > > --- > drivers/md/dm-snap.c | 65 ++++++++++++++++++++++++++++++++------------------- > 1 file changed, 42 insertions(+), 23 deletions(-) > > Index: linux-2.6/drivers/md/dm-snap.c > =================================================================== > --- linux-2.6.orig/drivers/md/dm-snap.c 2019-11-08 15:51:42.000000000 +0100 > +++ linux-2.6/drivers/md/dm-snap.c 2019-11-08 15:54:58.000000000 +0100 > @@ -141,6 +141,10 @@ struct dm_snapshot { > * for them to be committed. > */ > struct bio_list bios_queued_during_merge; > + > +#ifdef CONFIG_PREEMPT_RT_BASE > + spinlock_t exception_table_lock; > +#endif > }; > > /* > @@ -625,30 +629,42 @@ static uint32_t exception_hash(struct dm > > /* Lock to protect access to the completed and pending exception hash tables. */ > struct dm_exception_table_lock { > +#ifndef CONFIG_PREEMPT_RT_BASE > struct hlist_bl_head *complete_slot; > struct hlist_bl_head *pending_slot; > +#endif > }; Why not put the spinlock_t in 'struct dm_exception_table_lock' with the member name 'lock'? > static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, > struct dm_exception_table_lock *lock) > { > +#ifndef CONFIG_PREEMPT_RT_BASE > struct dm_exception_table *complete = &s->complete; > struct dm_exception_table *pending = &s->pending; > > lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; > lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; > +#endif > } > > -static void dm_exception_table_lock(struct dm_exception_table_lock *lock) > +static void dm_exception_table_lock(struct dm_snapshot *s, struct dm_exception_table_lock *lock) > { > +#ifdef CONFIG_PREEMPT_RT_BASE > + spin_lock(&s->exception_table_lock); > +#else > hlist_bl_lock(lock->complete_slot); > hlist_bl_lock(lock->pending_slot); > +#endif > } > > -static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) > +static void dm_exception_table_unlock(struct dm_snapshot *s, struct dm_exception_table_lock *lock) > { > +#ifdef CONFIG_PREEMPT_RT_BASE > + spin_unlock(&s->exception_table_lock); > +#else > hlist_bl_unlock(lock->pending_slot); > hlist_bl_unlock(lock->complete_slot); > +#endif > } > > static int dm_exception_table_init(struct dm_exception_table *et, > @@ -835,9 +851,9 @@ static int dm_add_exception(void *contex > */ > dm_exception_table_lock_init(s, old, &lock); > > - dm_exception_table_lock(&lock); > + dm_exception_table_lock(s, &lock); > dm_insert_exception(&s->complete, e); > - dm_exception_table_unlock(&lock); > + dm_exception_table_unlock(s, &lock); > > return 0; > } That way you don't need the extra 'struct dm_snapshot' arg to all the various dm_exception_table_{lock,unlock} calls. > @@ -1318,6 +1334,9 @@ static int snapshot_ctr(struct dm_target > s->first_merging_chunk = 0; > s->num_merging_chunks = 0; > bio_list_init(&s->bios_queued_during_merge); > +#ifdef CONFIG_PREEMPT_RT_BASE > + spin_lock_init(&s->exception_table_lock); > +#endif > > /* Allocate hash table for COW data */ > if (init_hash_tables(s)) { And this spin_lock_init() would go in dm_exception_table_lock_init() in appropriate #ifdef with spin_lock_init(&lock->lock) Doing it that way would seriously reduce the size of this patch. Unless I'm missing something, please submit a v2 and cc linux-rt-user mailing list and the other direct CCs suggested by others in reply to patch 2/2. Thanks, Mike -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel