3.18-stable review patch. If anyone has any objections, please let me know. ------------------ From: Ian Munsie <imunsie@xxxxxxxxxxx> commit ee41d11d53c8fc4968f0816504651541d606cf40 upstream. We had a known sleep while atomic bug if a CXL device was forcefully unbound while it was in use. This could occur as a result of EEH, or manually induced with something like this while the device was in use: echo 0000:01:00.0 > /sys/bus/pci/drivers/cxl-pci/unbind The issue was that in this code path we iterated over each context and forcefully detached it with the contexts_lock spin lock held, however the detach also needed to take the spu_mutex, and call schedule. This patch changes the contexts_lock to a mutex so that we are not in atomic context while doing the detach, thereby avoiding the sleep while atomic. Also delete the related TODO comment, which suggested an alternate solution which turned out to not be workable. Signed-off-by: Ian Munsie <imunsie@xxxxxxxxxxx> Signed-off-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/misc/cxl/context.c | 15 ++++++++------- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/native.c | 7 ------- drivers/misc/cxl/pci.c | 2 +- drivers/misc/cxl/sysfs.c | 10 +++++----- 5 files changed, 15 insertions(+), 21 deletions(-) --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -82,12 +82,12 @@ int cxl_context_init(struct cxl_context * Allocating IDR! We better make sure everything's setup that * dereferences from it. */ + mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - spin_lock(&afu->contexts_lock); i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, ctx->afu->num_procs, GFP_NOWAIT); - spin_unlock(&afu->contexts_lock); idr_preload_end(); + mutex_unlock(&afu->contexts_lock); if (i < 0) return i; @@ -168,21 +168,22 @@ void cxl_context_detach_all(struct cxl_a struct cxl_context *ctx; int tmp; - rcu_read_lock(); - idr_for_each_entry(&afu->contexts_idr, ctx, tmp) + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { /* * Anything done in here needs to be setup before the IDR is * created and torn down after the IDR removed */ __detach_context(ctx); - rcu_read_unlock(); + } + mutex_unlock(&afu->contexts_lock); } void cxl_context_free(struct cxl_context *ctx) { - spin_lock(&ctx->afu->contexts_lock); + mutex_lock(&ctx->afu->contexts_lock); idr_remove(&ctx->afu->contexts_idr, ctx->pe); - spin_unlock(&ctx->afu->contexts_lock); + mutex_unlock(&ctx->afu->contexts_lock); synchronize_rcu(); free_page((u64)ctx->sstp); --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -349,7 +349,7 @@ struct cxl_afu { struct device *chardev_s, *chardev_m, *chardev_d; struct idr contexts_idr; struct dentry *debugfs; - spinlock_t contexts_lock; + struct mutex contexts_lock; struct mutex spa_mutex; spinlock_t afu_cntl_lock; --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -610,13 +610,6 @@ static inline int detach_process_native_ return 0; } -/* - * TODO: handle case when this is called inside a rcu_read_lock() which may - * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate - * & remove use a mutex lock and schedule which will not good with lock held. - * May need to write do_process_element_cmd() that handles outstanding page - * faults synchronously. - */ static inline int detach_process_native_afu_directed(struct cxl_context *ctx) { if (!ctx->pe_inserted) --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(str afu->dev.release = cxl_release_afu; afu->slice = slice; idr_init(&afu->contexts_idr); - spin_lock_init(&afu->contexts_lock); + mutex_init(&afu->contexts_lock); spin_lock_init(&afu->afu_cntl_lock); mutex_init(&afu->spa_mutex); --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct de int rc; /* Not safe to reset if it is currently in use */ - spin_lock(&afu->contexts_lock); + mutex_lock(&afu->contexts_lock); if (!idr_is_empty(&afu->contexts_idr)) { rc = -EBUSY; goto err; @@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct de rc = count; err: - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); return rc; } @@ -247,7 +247,7 @@ static ssize_t mode_store(struct device int rc = -EBUSY; /* can't change this if we have a user */ - spin_lock(&afu->contexts_lock); + mutex_lock(&afu->contexts_lock); if (!idr_is_empty(&afu->contexts_idr)) goto err; @@ -271,7 +271,7 @@ static ssize_t mode_store(struct device afu->current_mode = 0; afu->num_procs = 0; - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) return rc; @@ -280,7 +280,7 @@ static ssize_t mode_store(struct device return count; err: - spin_unlock(&afu->contexts_lock); + mutex_unlock(&afu->contexts_lock); return rc; } -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html