On Sun, 6 Dec 2009, Mike Snitzer wrote: > On Sat, Dec 05 2009 at 9:01pm -0500, > Mikulas Patocka <mpatocka@xxxxxxxxxx> wrote: > > > On Sat, 5 Dec 2009, Mike Snitzer wrote: > > > > > Switch stop_merge() from using a busy loop to a completion event. > > > > > > stop_merge() now requests merging be shutdown using the > > > 'merge_completion' pointer (instead of the 'merge_shutdown' flag). This > > > is accomplished by testing if 'merge_completion' is not NULL in > > > snapshot_merge_process(). stop_merge() allocates its completion on the > > > stack and assigns it to the 'merge_completion' pointer in the snapshot. > > > 'merge_completion' is protected by the snapshot's lock. > > > > > > Also changed the 'merge_running' flag from int to atomic_t. > > > > No, there's a bug: > > > > > static void stop_merge(struct dm_snapshot *s) > > > { > > > - while (s->merge_running) { > > > - s->merge_shutdown = 1; > > > - msleep(1); > > > + DECLARE_COMPLETION_ONSTACK(merge_stopped); > > > + if (atomic_read(&s->merge_running)) { > > > > --- if the merge stops exactly at this point (because it gets finished or > > because of an i/o error), we are waiting for a completion that will be > > never signalled. > > Yes, valid point. But for this rare corner case we could just use > wait_for_completion_timeout() with a fairly large timeout; like 30 sec? > That actually isn't a great option (racey)... > > How about if the 'shut:' code paths also checked for s->merge_completion > and complete() if it is not NULL? Which means that check and related > complete() code would become a function. > > > > + down_write(&s->lock); > > > + s->merge_completion = &merge_stopped; > > > + up_write(&s->lock); > > > + wait_for_completion(&merge_stopped); > > > } > > > - s->merge_shutdown = 0; > > > } > > > > > > /* > > > > For Alasdair: do you get the problem? If I write it with msleep() > > correctly, you keep on complaining how unclean it is --- if it is written > > with completions and it is wrong (because they are just harder to use > > correctly than simple variables and msleep), you tend to support it. Now > > you see in practice how complex constructs tend to trigger bugs. > > > > Mike: I thought that the completion would be in struct dm_snapshot. But > > maybe, try it with wait_on_bit / wake_up_bit / test_bit / set_bit etc., it > > may be easier than completions. > > I can look at it; but I think using a completion can work. > > Mike Here it is with bits: --- Use bits instead of variables. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- drivers/md/dm-snap.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) Index: linux-2.6.32-devel/drivers/md/dm-snap.c =================================================================== --- linux-2.6.32-devel.orig/drivers/md/dm-snap.c 2009-12-07 11:37:53.000000000 +0100 +++ linux-2.6.32-devel/drivers/md/dm-snap.c 2009-12-07 11:58:38.000000000 +0100 @@ -102,12 +102,14 @@ struct dm_snapshot { spinlock_t tracked_chunk_lock; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + unsigned long bits; + /* Merge operation is in progress */ - int merge_running; +#define MERGE_RUNNING 0 /* It is requested to shut down merging */ /* Cleared back to 0 when the merging is stopped */ - int merge_shutdown; +#define SHUTDOWN_MERGE 1 /* Merging this area --- block any writes */ chunk_t merge_write_interlock; @@ -762,8 +764,8 @@ static void snapshot_merge_process(struc int must_wait; struct dm_io_region src, dest; - BUG_ON(!s->merge_running); - if (s->merge_shutdown) + BUG_ON(!test_bit(MERGE_RUNNING, &s->bits)); + if (unlikely(test_bit(SHUTDOWN_MERGE, &s->bits))) goto shut; if (!s->valid) { @@ -823,7 +825,9 @@ test_again: return; shut: - s->merge_running = 0; + clear_bit_unlock(MERGE_RUNNING, &s->bits); + smp_mb__after_clear_bit(); + wake_up_bit(&s->bits, MERGE_RUNNING); } /* This function drops s->lock */ @@ -904,15 +908,21 @@ static void merge_callback(int read_err, shut: down_write(&s->lock); release_write_interlock(s, 1); - s->merge_running = 0; + clear_bit_unlock(MERGE_RUNNING, &s->bits); + smp_mb__after_clear_bit(); + wake_up_bit(&s->bits, MERGE_RUNNING); } static void start_merge(struct dm_snapshot *s) { - if (!s->merge_running && !s->merge_shutdown) { - s->merge_running = 1; + if (!test_and_set_bit(MERGE_RUNNING, &s->bits)) snapshot_merge_process(s); - } +} + +static int wait_schedule(void *ptr) +{ + schedule(); + return 0; } /* @@ -920,11 +930,9 @@ static void start_merge(struct dm_snapsh */ static void stop_merge(struct dm_snapshot *s) { - while (s->merge_running) { - s->merge_shutdown = 1; - msleep(1); - } - s->merge_shutdown = 0; + set_bit(SHUTDOWN_MERGE, &s->bits); + wait_on_bit(&s->bits, MERGE_RUNNING, wait_schedule, TASK_UNINTERRUPTIBLE); + clear_bit(SHUTDOWN_MERGE, &s->bits); } /* @@ -991,8 +999,7 @@ static int snapshot_ctr(struct dm_target init_rwsem(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); - s->merge_running = 0; - s->merge_shutdown = 0; + s->bits = 0; s->merge_write_interlock = 0; s->merge_write_interlock_n = 0; bio_list_init(&s->merge_write_list); -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel