Snapshot doesn't work with realtime kernels since the commit f79ae415b64c. hlist_bl is implemented as a raw spinlock and the code takes two non-raw spinlocks while holding hlist_bl (non-raw spinlocks are blocking mutexes in the realtime kernel, so they couldn't be taken inside a raw spinlock). This patch fixes the problem by using non-raw spinlock exception_table_lock instead of the hlist_bl lock. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> Fixes: f79ae415b64c ("dm snapshot: Make exception tables scalable") --- drivers/md/dm-snap.c | 65 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 23 deletions(-) Index: linux-2.6/drivers/md/dm-snap.c =================================================================== --- linux-2.6.orig/drivers/md/dm-snap.c 2019-11-08 15:51:42.000000000 +0100 +++ linux-2.6/drivers/md/dm-snap.c 2019-11-08 15:54:58.000000000 +0100 @@ -141,6 +141,10 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; + +#ifdef CONFIG_PREEMPT_RT_BASE + spinlock_t exception_table_lock; +#endif }; /* @@ -625,30 +629,42 @@ static uint32_t exception_hash(struct dm /* Lock to protect access to the completed and pending exception hash tables. */ struct dm_exception_table_lock { +#ifndef CONFIG_PREEMPT_RT_BASE struct hlist_bl_head *complete_slot; struct hlist_bl_head *pending_slot; +#endif }; static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, struct dm_exception_table_lock *lock) { +#ifndef CONFIG_PREEMPT_RT_BASE struct dm_exception_table *complete = &s->complete; struct dm_exception_table *pending = &s->pending; lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; +#endif } -static void dm_exception_table_lock(struct dm_exception_table_lock *lock) +static void dm_exception_table_lock(struct dm_snapshot *s, struct dm_exception_table_lock *lock) { +#ifdef CONFIG_PREEMPT_RT_BASE + spin_lock(&s->exception_table_lock); +#else hlist_bl_lock(lock->complete_slot); hlist_bl_lock(lock->pending_slot); +#endif } -static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) +static void dm_exception_table_unlock(struct dm_snapshot *s, struct dm_exception_table_lock *lock) { +#ifdef CONFIG_PREEMPT_RT_BASE + spin_unlock(&s->exception_table_lock); +#else hlist_bl_unlock(lock->pending_slot); hlist_bl_unlock(lock->complete_slot); +#endif } static int dm_exception_table_init(struct dm_exception_table *et, @@ -835,9 +851,9 @@ static int dm_add_exception(void *contex */ dm_exception_table_lock_init(s, old, &lock); - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); dm_insert_exception(&s->complete, e); - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); return 0; } @@ -1318,6 +1334,9 @@ static int snapshot_ctr(struct dm_target s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); +#ifdef CONFIG_PREEMPT_RT_BASE + spin_lock_init(&s->exception_table_lock); +#endif /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1651,7 +1670,7 @@ static void pending_complete(void *conte invalidate_snapshot(s, -EIO); error = 1; - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); goto out; } @@ -1660,13 +1679,13 @@ static void pending_complete(void *conte invalidate_snapshot(s, -ENOMEM); error = 1; - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); goto out; } *e = pe->e; down_read(&s->lock); - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); if (!s->valid) { up_read(&s->lock); free_completed_exception(e); @@ -1687,16 +1706,16 @@ static void pending_complete(void *conte /* Wait for conflicting reads to drain */ if (__chunk_is_tracked(s, pe->e.old_chunk)) { - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); __check_for_conflicting_io(s, pe->e.old_chunk); - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); } out: /* Remove the in-flight exception from the list */ dm_remove_exception(&pe->e); - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); snapshot_bios = bio_list_get(&pe->snapshot_bios); origin_bios = bio_list_get(&pe->origin_bios); @@ -1968,7 +1987,7 @@ static int snapshot_map(struct dm_target } down_read(&s->lock); - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_data_dir(bio) == WRITE)) { @@ -1997,7 +2016,7 @@ static int snapshot_map(struct dm_target remap_exception(s, e, bio, chunk); if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && io_overlaps_chunk(s, bio)) { - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); up_read(&s->lock); zero_exception(s, e, bio, chunk); r = DM_MAPIO_SUBMITTED; /* discard is not issued */ @@ -2024,9 +2043,9 @@ static int snapshot_map(struct dm_target if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); pe = alloc_pending_exception(s); - dm_exception_table_lock(&lock); + dm_exception_table_lock(s, &lock); e = dm_lookup_exception(&s->complete, chunk); if (e) { @@ -2037,7 +2056,7 @@ static int snapshot_map(struct dm_target pe = __find_pending_exception(s, pe, chunk); if (!pe) { - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); up_read(&s->lock); down_write(&s->lock); @@ -2063,7 +2082,7 @@ static int snapshot_map(struct dm_target if (!pe->started && io_overlaps_chunk(s, bio)) { pe->started = 1; - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); up_read(&s->lock); start_full_bio(pe, bio); @@ -2076,7 +2095,7 @@ static int snapshot_map(struct dm_target /* this is protected by the exception table lock */ pe->started = 1; - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); up_read(&s->lock); start_copy(pe); @@ -2088,7 +2107,7 @@ static int snapshot_map(struct dm_target } out_unlock: - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(s, &lock); up_read(&s->lock); out: return r; @@ -2449,7 +2468,7 @@ static int __origin_write(struct list_he dm_exception_table_lock_init(snap, chunk, &lock); down_read(&snap->lock); - dm_exception_table_lock(&lock); + dm_exception_table_lock(snap, &lock); /* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) @@ -2466,9 +2485,9 @@ static int __origin_write(struct list_he if (e) goto next_snapshot; - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(snap, &lock); pe = alloc_pending_exception(snap); - dm_exception_table_lock(&lock); + dm_exception_table_lock(snap, &lock); pe2 = __lookup_pending_exception(snap, chunk); @@ -2481,7 +2500,7 @@ static int __origin_write(struct list_he pe = __insert_pending_exception(snap, pe, chunk); if (!pe) { - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(snap, &lock); up_read(&snap->lock); invalidate_snapshot(snap, -ENOMEM); @@ -2516,7 +2535,7 @@ static int __origin_write(struct list_he } next_snapshot: - dm_exception_table_unlock(&lock); + dm_exception_table_unlock(snap, &lock); up_read(&snap->lock); if (pe_to_start_now) { -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel