On Fri, 13 Feb 2009, Mikulas Patocka wrote: > Hi > > Few more questions: > - do you write to the snapshots? > - do you create/delete snapshots while the crash happens? > - do all the snapshots have the same chunk size? > - how many snapshots do you have? another question: do you resize the origin device? or the snapshots? One more test patch. (the code looks so terrible that I am thinking about submitting you a patch that erases this logic all and reimplements it). Mikulas --- drivers/md/dm-exception-store.c | 2 + drivers/md/dm-snap.c | 52 ++++++++++++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 7 deletions(-) Index: linux-2.6.28-clean/drivers/md/dm-snap.c =================================================================== --- linux-2.6.28-clean.orig/drivers/md/dm-snap.c 2009-02-13 07:49:49.000000000 +0100 +++ linux-2.6.28-clean/drivers/md/dm-snap.c 2009-02-13 08:26:54.000000000 +0100 @@ -90,6 +90,8 @@ struct dm_snap_pending_exception { int started; struct list_head list_all; + + int magic; }; /* @@ -378,10 +380,13 @@ static struct dm_snap_pending_exception atomic_inc(&s->pending_exceptions_count); pe->snap = s; + pe->primary_pe = NULL; + pe->magic = 0x12345678; spin_lock(&pending_spinlock); list_for_each_entry(pe2, &pending_all, list_all) { BUG_ON(pe2 == pe); + BUG_ON(pe2->primary_pe == pe); } list_add(&pe->list_all, &pending_all); spin_unlock(&pending_spinlock); @@ -400,13 +405,21 @@ static void free_pending_exception(struc spin_lock(&pending_spinlock); list_for_each_entry(pe2, &pending_all, list_all) { + if (pe2->primary_pe == pe && pe2 != pe) { + printk(KERN_ALERT "Freeing referenced exception, pe %p, pe->magic %x, pe2 %p, pe2->magic %x, pe2->primary_pe, %p, pe->ref_count %d\n", pe, pe->magic, pe2, pe2->magic, pe2->primary_pe, atomic_read(&pe->ref_count)); + BUG(); + } + } + list_for_each_entry(pe2, &pending_all, list_all) { if (pe2 == pe) goto found; } + printk(KERN_ALERT "Freeing free exception pe %p, magic %x, pe->primary_pe %p, pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, atomic_read(&pe->ref_count)); BUG(); found: list_del(&pe->list_all); spin_unlock(&pending_spinlock); + pe->magic = 0x90abcdef; mempool_free(pe, s->pending_pool); smp_mb__before_atomic_dec(); atomic_dec(&s->pending_exceptions_count); @@ -862,6 +875,13 @@ static struct bio *put_pending_exception primary_pe = pe->primary_pe; + if (primary_pe) { + if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) { + printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count)); + BUG(); + } + } + /* * If this pe is involved in a write to the origin and * it is the last sibling to complete then release @@ -1313,11 +1333,19 @@ static int __origin_write(struct list_he * Either every pe here has same * primary_pe or none has one yet. */ - if (pe->primary_pe) + if (pe->primary_pe) { primary_pe = pe->primary_pe; - else { + if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) { + printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count)); + BUG(); + } + } else { primary_pe = pe; first = 1; + if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) { + printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, pe->primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count)); + BUG(); + } } bio_list_add(&primary_pe->origin_bios, bio); @@ -1327,6 +1355,10 @@ static int __origin_write(struct list_he if (!pe->primary_pe) { pe->primary_pe = primary_pe; + if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) { + printk(KERN_ALERT "Bad ref count, pe %p, pe->magic %x, primary_pe %p, primary_pe->magic %x, primary_pe->ref_count %d\n", pe, pe->magic, primary_pe, primary_pe->magic, atomic_read(&primary_pe->ref_count)); + BUG(); + } get_pending_exception(primary_pe); } @@ -1350,11 +1382,17 @@ static int __origin_write(struct list_he * us here to remove the primary_pe and submit any origin_bios. */ - if (first && atomic_dec_and_test(&primary_pe->ref_count)) { - flush_bios(bio_list_get(&primary_pe->origin_bios)); - free_pending_exception(primary_pe); - /* If we got here, pe_queue is necessarily empty. */ - return r; + if (first) { + if (atomic_read(&primary_pe->ref_count) <= 0 || primary_pe->magic != 0x12345678) { + printk(KERN_ALERT "Bad ref count, primary_pe %p, primary_pe->magic %x, primary_pe->primary_pe %p, primary_pe->ref_count %d\n", primary_pe, primary_pe->magic, primary_pe->primary_pe, atomic_read(&primary_pe->ref_count)); + BUG(); + } + if (atomic_dec_and_test(&primary_pe->ref_count)) { + flush_bios(bio_list_get(&primary_pe->origin_bios)); + free_pending_exception(primary_pe); + /* If we got here, pe_queue is necessarily empty. */ + return r; + } } /* Index: linux-2.6.28-clean/drivers/md/dm-exception-store.c =================================================================== --- linux-2.6.28-clean.orig/drivers/md/dm-exception-store.c 2009-02-13 08:20:45.000000000 +0100 +++ linux-2.6.28-clean/drivers/md/dm-exception-store.c 2009-02-13 08:20:50.000000000 +0100 @@ -621,6 +621,8 @@ struct dm_snap_pending_exception { int started; struct list_head list_all; + + int magic; }; static DEFINE_SPINLOCK(callback_spinlock); -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel