Jobs can fail midway through their work. To recover, the finished chunks of work need to be undone in a job-specific way. Let padata_do_multithreaded callers specify an "undo" callback responsible for undoing one chunk of a job. To avoid multiple levels of error handling, do not allow the callback to fail. Undoing is singlethreaded to keep it simple and because it's a slow path. Signed-off-by: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> --- include/linux/padata.h | 6 +++ kernel/padata.c | 113 +++++++++++++++++++++++++++++++++++------ 2 files changed, 103 insertions(+), 16 deletions(-) diff --git a/include/linux/padata.h b/include/linux/padata.h index 1c8670a24ccf..2a9fa459463d 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -135,6 +135,10 @@ struct padata_shell { * @min_chunk: The minimum chunk size in job-specific units. This allows * the client to communicate the minimum amount of work that's * appropriate for one worker thread to do at once. + * @undo_fn: A function that undoes one chunk of the task per call. If + * error(s) occur during the job, this is called on all successfully + * completed chunks. The chunk(s) in which failure occurs should be + * handled in the thread function. * @max_threads: Max threads to use for the job, actual number may be less * depending on task size and minimum chunk size. */ @@ -145,6 +149,8 @@ struct padata_mt_job { unsigned long size; unsigned long align; unsigned long min_chunk; + + void (*undo_fn)(unsigned long start, unsigned long end, void *arg); int max_threads; }; diff --git a/kernel/padata.c b/kernel/padata.c index 1596ca22b316..d0876f861464 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -29,6 +29,7 @@ #include <linux/cpumask.h> #include <linux/err.h> #include <linux/cpu.h> +#include <linux/list_sort.h> #include <linux/padata.h> #include <linux/mutex.h> #include <linux/sched.h> @@ -42,6 +43,10 @@ struct padata_work { struct work_struct pw_work; struct list_head pw_list; /* padata_free_works linkage */ void *pw_data; + /* holds job units from padata_mt_job::start to pw_error_start */ + unsigned long pw_error_offset; + unsigned long pw_error_start; + unsigned long pw_error_end; }; static DEFINE_SPINLOCK(padata_works_lock); @@ -56,6 +61,9 @@ struct padata_mt_job_state { int nworks_fini; int error; /* first error from thread_fn */ unsigned long chunk_size; + unsigned long position; + unsigned long remaining_size; + struct list_head failed_works; }; static void padata_free_pd(struct parallel_data *pd); @@ -447,26 +455,38 @@ static void padata_mt_helper(struct work_struct *w) spin_lock(&ps->lock); - while (job->size > 0 && ps->error == 0) { - unsigned long start, size, end; + while (ps->remaining_size > 0 && ps->error == 0) { + unsigned long position, position_offset, size, end; int ret; - start = job->start; + position_offset = job->size - ps->remaining_size; + position = ps->position; /* So end is chunk size aligned if enough work remains. */ - size = roundup(start + 1, ps->chunk_size) - start; - size = min(size, job->size); - end = start + size; + size = roundup(position + 1, ps->chunk_size) - position; + size = min(size, ps->remaining_size); + end = position + size; - job->start = end; - job->size -= size; + ps->position = end; + ps->remaining_size -= size; spin_unlock(&ps->lock); - ret = job->thread_fn(start, end, job->fn_arg); + + ret = job->thread_fn(position, end, job->fn_arg); + spin_lock(&ps->lock); - /* Save first error code only. */ - if (ps->error == 0) - ps->error = ret; + if (ret) { + /* Save first error code only. */ + if (ps->error == 0) + ps->error = ret; + /* Save information about where the job failed. */ + if (job->undo_fn) { + list_move(&pw->pw_list, &ps->failed_works); + pw->pw_error_start = position; + pw->pw_error_offset = position_offset; + pw->pw_error_end = end; + } + } } ++ps->nworks_fini; @@ -477,6 +497,60 @@ static void padata_mt_helper(struct work_struct *w) complete(&ps->completion); } +static int padata_error_cmp(void *unused, const struct list_head *a, + const struct list_head *b) +{ + struct padata_work *work_a = list_entry(a, struct padata_work, pw_list); + struct padata_work *work_b = list_entry(b, struct padata_work, pw_list); + + if (work_a->pw_error_offset < work_b->pw_error_offset) + return -1; + else if (work_a->pw_error_offset > work_b->pw_error_offset) + return 1; + return 0; +} + +static void padata_undo(struct padata_mt_job_state *ps, + struct list_head *works_list, + struct padata_work *stack_work) +{ + struct list_head *failed_works = &ps->failed_works; + struct padata_mt_job *job = ps->job; + unsigned long undo_pos = job->start; + + /* Sort so the failed ranges can be checked as we go. */ + list_sort(NULL, failed_works, padata_error_cmp); + + /* Undo completed work on this node, skipping failed ranges. */ + while (undo_pos != ps->position) { + struct padata_work *failed_work; + unsigned long undo_end; + + failed_work = list_first_entry_or_null(failed_works, + struct padata_work, + pw_list); + if (failed_work) + undo_end = failed_work->pw_error_start; + else + undo_end = ps->position; + + if (undo_pos != undo_end) + job->undo_fn(undo_pos, undo_end, job->fn_arg); + + if (failed_work) { + undo_pos = failed_work->pw_error_end; + /* main thread's stack_work stays off works_list */ + if (failed_work == stack_work) + list_del(&failed_work->pw_list); + else + list_move(&failed_work->pw_list, works_list); + } else { + undo_pos = undo_end; + } + } + WARN_ON_ONCE(!list_empty(failed_works)); +} + /** * padata_do_multithreaded - run a multithreaded job * @job: Description of the job. @@ -509,10 +583,13 @@ int padata_do_multithreaded(struct padata_mt_job *job) spin_lock_init(&ps.lock); init_completion(&ps.completion); - ps.job = job; - ps.nworks = padata_work_alloc_mt(nworks, &ps, &works); - ps.nworks_fini = 0; - ps.error = 0; + INIT_LIST_HEAD(&ps.failed_works); + ps.job = job; + ps.nworks = padata_work_alloc_mt(nworks, &ps, &works); + ps.nworks_fini = 0; + ps.error = 0; + ps.position = job->start; + ps.remaining_size = job->size; /* * Chunk size is the amount of work a helper does per call to the @@ -529,11 +606,15 @@ int padata_do_multithreaded(struct padata_mt_job *job) /* Use the current thread, which saves starting a workqueue worker. */ padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK); + INIT_LIST_HEAD(&my_work.pw_list); padata_mt_helper(&my_work.pw_work); /* Wait for all the helpers to finish. */ wait_for_completion(&ps.completion); + if (ps.error && job->undo_fn) + padata_undo(&ps, &works, &my_work); + destroy_work_on_stack(&my_work.pw_work); padata_works_free(&works); return ps.error; -- 2.34.1