Hi I've made this patch that improves kcopyd latency. When we set the bit DM_KCOPYD_EARLY_CALLBACK, the completion is called from the interrupt context instead of process context. I'd like to ask if you can benchmark it and test if it improves performance of dm-thin and dm-cache. Mikulas Index: linux-2.6/drivers/md/dm-cache-target.c =================================================================== --- linux-2.6.orig/drivers/md/dm-cache-target.c +++ linux-2.6/drivers/md/dm-cache-target.c @@ -1171,9 +1171,9 @@ static void copy(struct dm_cache_migrati c_region.count = cache->sectors_per_block; if (promote) - dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); + dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 1UL << DM_KCOPYD_EARLY_CALLBACK, copy_complete, &mg->k); else - dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); + dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 1UL << DM_KCOPYD_EARLY_CALLBACK, copy_complete, &mg->k); } static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) Index: linux-2.6/drivers/md/dm-kcopyd.c =================================================================== --- linux-2.6.orig/drivers/md/dm-kcopyd.c +++ linux-2.6/drivers/md/dm-kcopyd.c @@ -512,6 +512,10 @@ static int run_complete_job(struct kcopy return 0; } +static void null_completion(int read_err, unsigned long write_err, void *context) +{ +} + static void complete_io(unsigned long error, void *context) { struct kcopyd_job *job = (struct kcopyd_job *) context; @@ -532,10 +536,13 @@ static void complete_io(unsigned long er } } - if (op_is_write(job->rw)) + if (op_is_write(job->rw)) { + if (test_bit(DM_KCOPYD_EARLY_CALLBACK, &job->flags)) { + job->fn(job->read_err, job->write_err, job->context); + job->fn = null_completion; + } push(&kc->complete_jobs, job); - - else { + } else { job->rw = WRITE; push(&kc->io_jobs, job); } @@ -735,6 +742,7 @@ static void segment_complete(int read_er sub_job->dests[i].count = count; } + sub_job->flags &= ~(1UL << DM_KCOPYD_EARLY_CALLBACK); sub_job->fn = segment_complete; sub_job->context = sub_job; dispatch_job(sub_job); Index: linux-2.6/drivers/md/dm-thin.c =================================================================== --- linux-2.6.orig/drivers/md/dm-thin.c +++ linux-2.6/drivers/md/dm-thin.c @@ -1359,7 +1359,7 @@ static void schedule_copy(struct thin_c to.count = len; dm_kcopyd_copy(pool->copier, &from, 1, &to, - 0, copy_complete, m); + 1UL << DM_KCOPYD_EARLY_CALLBACK, copy_complete, m); /* * Do we need to zero a tail region? Index: linux-2.6/include/linux/dm-kcopyd.h =================================================================== --- linux-2.6.orig/include/linux/dm-kcopyd.h +++ linux-2.6/include/linux/dm-kcopyd.h @@ -19,8 +19,9 @@ /* FIXME: make this configurable */ #define DM_KCOPYD_MAX_REGIONS 8 -#define DM_KCOPYD_IGNORE_ERROR 1 -#define DM_KCOPYD_WRITE_SEQ 2 +#define DM_KCOPYD_IGNORE_ERROR 1 +#define DM_KCOPYD_WRITE_SEQ 2 +#define DM_KCOPYD_EARLY_CALLBACK 3 struct dm_kcopyd_throttle { unsigned throttle; -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel