Hi rt-users, Current NVDIMM driver doesn't work well with RT kernel. nd_region_acquire_lane() disables preemption with get_cpu() which causes "scheduling while atomic" spews on RT, when using fio to test pmem as block device. BUG: scheduling while atomic: fio/2514/0x00000002 [<ffffffffc03608d9>] nd_region_acquire_lane+0x19/0x80 [libnvdimm] Call Trace: dump_stack+0x4f/0x6a ? nd_region_acquire_lane+0x19/0x80 [libnvdimm] __schedule_bug.cold.17+0x38/0x55 __schedule+0x484/0x6c0 ? _raw_spin_lock+0x17/0x40 schedule+0x3d/0xe0 rt_spin_lock_slowlock_locked+0x118/0x2a0 rt_spin_lock_slowlock+0x57/0x90 rt_spin_lock+0x52/0x60 btt_write_pg.isra.16+0x280/0x4b0 [nd_btt] btt_make_request+0x1b1/0x320 [nd_btt] generic_make_request+0x1dc/0x3f0 submit_bio+0x49/0x140 Testing command: fio -filename=/dev/pmem0s -direct=1 -iodepth 1 -thread -rw=randrw -rwmixread=70 -ioengine=psync -bs=16k -size=1G -numjobs=30 -runtime=100 -group_reporting -name=mytest Dan Williams proposed a patch for NVDIMM with RT in this mailing list archive: https://www.mail-archive.com/linux-nvdimm@xxxxxxxxxxxx/msg13288.html I am pasting it here to linux-rt-users for your (rt-users') opinion. Appreciate your valuable feedback. Here is the patch. -------------------------------------- drivers/nvdimm/region_devs.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 63cb01ef4ef0..0eecc8670f80 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -926,18 +926,15 @@ int nd_blk_region_init(struct nd_region *nd_region) unsigned int nd_region_acquire_lane(struct nd_region *nd_region) { unsigned int cpu, lane; + struct nd_percpu_lane *ndl_lock, *ndl_count; - cpu = get_cpu(); - if (nd_region->num_lanes < nr_cpu_ids) { - struct nd_percpu_lane *ndl_lock, *ndl_count; + cpu = get_cpu_light(); - lane = cpu % nd_region->num_lanes; - ndl_count = per_cpu_ptr(nd_region->lane, cpu); - ndl_lock = per_cpu_ptr(nd_region->lane, lane); - if (ndl_count->count++ == 0) - spin_lock(&ndl_lock->lock); - } else - lane = cpu; + lane = cpu % nd_region->num_lanes; + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (ndl_count->count++ == 0) + spin_lock(&ndl_lock->lock); return lane; } @@ -945,17 +942,14 @@ EXPORT_SYMBOL(nd_region_acquire_lane); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane) { - if (nd_region->num_lanes < nr_cpu_ids) { - unsigned int cpu = get_cpu(); - struct nd_percpu_lane *ndl_lock, *ndl_count; - - ndl_count = per_cpu_ptr(nd_region->lane, cpu); - ndl_lock = per_cpu_ptr(nd_region->lane, lane); - if (--ndl_count->count == 0) - spin_unlock(&ndl_lock->lock); - put_cpu(); - } - put_cpu(); + unsigned int cpu = get_cpu_light(); + struct nd_percpu_lane *ndl_lock, *ndl_count; + + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (--ndl_count->count == 0) + spin_unlock(&ndl_lock->lock); + put_cpu_light(); } EXPORT_SYMBOL(nd_region_release_lane); -- 2.14.4 Thanks, Yongxin