> Currently, nvdimm driver isn't RT compatible. > nd_region_acquire_lane() disables preemption with get_cpu() which > causes "scheduling while atomic" spews on RT, when using fio to test > pmem as block device. > > In this change, we replace get_cpu/put_cpu with local_lock_cpu/ > local_unlock_cpu, and introduce per CPU variable "ndl_local_lock". > Due to preemption on RT, this lock can avoid race condition for the > same lane on the same CPU. When CPU number is greater than the lane > number, lane can be shared among CPUs. "ndl_lock->lock" is used to > protect the lane in this situation. > > This patch is derived from Dan Williams and Pankaj Gupta's proposal from > https://www.mail-archive.com/linux-nvdimm@xxxxxxxxxxxx/msg13359.html > and https://www.spinics.net/lists/linux-rt-users/msg20280.html. > Many thanks to them. > > Cc: Dan Williams <dan.j.williams@xxxxxxxxx> > Cc: Pankaj Gupta <pagupta@xxxxxxxxxx> > Cc: linux-rt-users <linux-rt-users@xxxxxxxxxxxxxxx> > Cc: linux-nvdimm <linux-nvdimm@xxxxxxxxxxxx> > Signed-off-by: Yongxin Liu <yongxin.liu@xxxxxxxxxxxxx> This patch looks good to me. Acked-by: Pankaj Gupta <pagupta@xxxxxxxxxx> > --- > drivers/nvdimm/region_devs.c | 40 +++++++++++++++++++--------------------- > 1 file changed, 19 insertions(+), 21 deletions(-) > > diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c > index fa37afcd43ff..6c5388cf2477 100644 > --- a/drivers/nvdimm/region_devs.c > +++ b/drivers/nvdimm/region_devs.c > @@ -18,9 +18,13 @@ > #include <linux/sort.h> > #include <linux/io.h> > #include <linux/nd.h> > +#include <linux/locallock.h> > #include "nd-core.h" > #include "nd.h" > > +/* lock for tasks on the same CPU to sequence the access to the lane */ > +static DEFINE_LOCAL_IRQ_LOCK(ndl_local_lock); > + > /* > * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is > * irrelevant. > @@ -935,18 +939,15 @@ int nd_blk_region_init(struct nd_region *nd_region) > unsigned int nd_region_acquire_lane(struct nd_region *nd_region) > { > unsigned int cpu, lane; > + struct nd_percpu_lane *ndl_lock, *ndl_count; > > - cpu = get_cpu(); > - if (nd_region->num_lanes < nr_cpu_ids) { > - struct nd_percpu_lane *ndl_lock, *ndl_count; > + cpu = local_lock_cpu(ndl_local_lock); > > - lane = cpu % nd_region->num_lanes; > - ndl_count = per_cpu_ptr(nd_region->lane, cpu); > - ndl_lock = per_cpu_ptr(nd_region->lane, lane); > - if (ndl_count->count++ == 0) > - spin_lock(&ndl_lock->lock); > - } else > - lane = cpu; > + lane = cpu % nd_region->num_lanes; > + ndl_count = per_cpu_ptr(nd_region->lane, cpu); > + ndl_lock = per_cpu_ptr(nd_region->lane, lane); > + if (ndl_count->count++ == 0) > + spin_lock(&ndl_lock->lock); > > return lane; > } > @@ -954,17 +955,14 @@ EXPORT_SYMBOL(nd_region_acquire_lane); > > void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane) > { > - if (nd_region->num_lanes < nr_cpu_ids) { > - unsigned int cpu = get_cpu(); > - struct nd_percpu_lane *ndl_lock, *ndl_count; > - > - ndl_count = per_cpu_ptr(nd_region->lane, cpu); > - ndl_lock = per_cpu_ptr(nd_region->lane, lane); > - if (--ndl_count->count == 0) > - spin_unlock(&ndl_lock->lock); > - put_cpu(); > - } > - put_cpu(); > + struct nd_percpu_lane *ndl_lock, *ndl_count; > + unsigned int cpu = smp_processor_id(); > + > + ndl_count = per_cpu_ptr(nd_region->lane, cpu); > + ndl_lock = per_cpu_ptr(nd_region->lane, lane); > + if (--ndl_count->count == 0) > + spin_unlock(&ndl_lock->lock); > + local_unlock_cpu(ndl_local_lock); > } > EXPORT_SYMBOL(nd_region_release_lane); > > -- > 2.14.4 > >