NVDIMM: make it RT aware

"Liu, Yongxin" <Yongxin.Liu@xxxxxxxxxxxxx> · Thu, 28 Feb 2019 01:05:02 +0000

Hi rt-users,

Current NVDIMM driver doesn't work well with RT kernel.
nd_region_acquire_lane() disables preemption with get_cpu() which 
causes "scheduling while atomic" spews on RT, when using fio to test pmem as block device.

     BUG: scheduling while atomic: fio/2514/0x00000002
     [<ffffffffc03608d9>] nd_region_acquire_lane+0x19/0x80 [libnvdimm]
     Call Trace:
      dump_stack+0x4f/0x6a
      ? nd_region_acquire_lane+0x19/0x80 [libnvdimm]
      __schedule_bug.cold.17+0x38/0x55
      __schedule+0x484/0x6c0
      ? _raw_spin_lock+0x17/0x40
      schedule+0x3d/0xe0
      rt_spin_lock_slowlock_locked+0x118/0x2a0
      rt_spin_lock_slowlock+0x57/0x90
      rt_spin_lock+0x52/0x60
      btt_write_pg.isra.16+0x280/0x4b0 [nd_btt]
      btt_make_request+0x1b1/0x320 [nd_btt]
      generic_make_request+0x1dc/0x3f0
      submit_bio+0x49/0x140


Testing command: 
fio -filename=/dev/pmem0s -direct=1 -iodepth 1 -thread -rw=randrw -rwmixread=70 
-ioengine=psync -bs=16k -size=1G -numjobs=30 -runtime=100 -group_reporting -name=mytest


Dan Williams proposed a patch for NVDIMM with RT in this mailing list archive:
https://www.mail-archive.com/linux-nvdimm@xxxxxxxxxxxx/msg13288.html

I am pasting it here to linux-rt-users for your (rt-users') opinion.
Appreciate your valuable feedback.


Here is the patch.
--------------------------------------

drivers/nvdimm/region_devs.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 63cb01ef4ef0..0eecc8670f80 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -926,18 +926,15 @@ int nd_blk_region_init(struct nd_region *nd_region)  unsigned int nd_region_acquire_lane(struct nd_region *nd_region)  {
 	unsigned int cpu, lane;
+	struct nd_percpu_lane *ndl_lock, *ndl_count;
 
-	cpu = get_cpu();
-	if (nd_region->num_lanes < nr_cpu_ids) {
-		struct nd_percpu_lane *ndl_lock, *ndl_count;
+	cpu = get_cpu_light();
 
-		lane = cpu % nd_region->num_lanes;
-		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
-		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
-		if (ndl_count->count++ == 0)
-			spin_lock(&ndl_lock->lock);
-	} else
-		lane = cpu;
+	lane = cpu % nd_region->num_lanes;
+	ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+	ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+	if (ndl_count->count++ == 0)
+		spin_lock(&ndl_lock->lock);
 
 	return lane;
 }
@@ -945,17 +942,14 @@ EXPORT_SYMBOL(nd_region_acquire_lane);
 
 void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)  {
-	if (nd_region->num_lanes < nr_cpu_ids) {
-		unsigned int cpu = get_cpu();
-		struct nd_percpu_lane *ndl_lock, *ndl_count;
-
-		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
-		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
-		if (--ndl_count->count == 0)
-			spin_unlock(&ndl_lock->lock);
-		put_cpu();
-	}
-	put_cpu();
+	unsigned int cpu = get_cpu_light();
+	struct nd_percpu_lane *ndl_lock, *ndl_count;
+
+	ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+	ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+	if (--ndl_count->count == 0)
+		spin_unlock(&ndl_lock->lock);
+	put_cpu_light();
 }
 EXPORT_SYMBOL(nd_region_release_lane);
 
--
2.14.4




Thanks,
Yongxin