Re: NVDIMM: make it RT aware

Pankaj Gupta <pagupta@xxxxxxxxxx> · Fri, 1 Mar 2019 00:42:54 -0500 (EST)

Hi Yongxin,

> 
> Hi rt-users,
> 
> Current NVDIMM driver doesn't work well with RT kernel.
> nd_region_acquire_lane() disables preemption with get_cpu() which
> causes "scheduling while atomic" spews on RT, when using fio to test pmem as
> block device.
> 
>      BUG: scheduling while atomic: fio/2514/0x00000002
>      [<ffffffffc03608d9>] nd_region_acquire_lane+0x19/0x80 [libnvdimm]
>      Call Trace:
>       dump_stack+0x4f/0x6a
>       ? nd_region_acquire_lane+0x19/0x80 [libnvdimm]
>       __schedule_bug.cold.17+0x38/0x55
>       __schedule+0x484/0x6c0
>       ? _raw_spin_lock+0x17/0x40
>       schedule+0x3d/0xe0
>       rt_spin_lock_slowlock_locked+0x118/0x2a0
>       rt_spin_lock_slowlock+0x57/0x90
>       rt_spin_lock+0x52/0x60
>       btt_write_pg.isra.16+0x280/0x4b0 [nd_btt]
>       btt_make_request+0x1b1/0x320 [nd_btt]
>       generic_make_request+0x1dc/0x3f0
>       submit_bio+0x49/0x140
> 
> 
> Testing command:
> fio -filename=/dev/pmem0s -direct=1 -iodepth 1 -thread -rw=randrw
> -rwmixread=70
> -ioengine=psync -bs=16k -size=1G -numjobs=30 -runtime=100 -group_reporting
> -name=mytest
> 
> 
> Dan Williams proposed a patch for NVDIMM with RT in this mailing list
> archive:
> https://www.mail-archive.com/linux-nvdimm@xxxxxxxxxxxx/msg13288.html
> 
> I am pasting it here to linux-rt-users for your (rt-users') opinion.
> Appreciate your valuable feedback.

I gave some suggestions in nvdimm mailing list [1].
Not sure if you got a chance to look at it.  

Thanks,
Pankaj

[1] https://www.mail-archive.com/linux-nvdimm@xxxxxxxxxxxx/msg13359.html
> 
> 
> Here is the patch.
> --------------------------------------
> 
> drivers/nvdimm/region_devs.c | 36 +++++++++++++++---------------------
>  1 file changed, 15 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 63cb01ef4ef0..0eecc8670f80 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -926,18 +926,15 @@ int nd_blk_region_init(struct nd_region *nd_region)
> unsigned int nd_region_acquire_lane(struct nd_region *nd_region)  {
>  	unsigned int cpu, lane;
> +	struct nd_percpu_lane *ndl_lock, *ndl_count;
>  
> -	cpu = get_cpu();
> -	if (nd_region->num_lanes < nr_cpu_ids) {
> -		struct nd_percpu_lane *ndl_lock, *ndl_count;
> +	cpu = get_cpu_light();
>  
> -		lane = cpu % nd_region->num_lanes;
> -		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
> -		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
> -		if (ndl_count->count++ == 0)
> -			spin_lock(&ndl_lock->lock);
> -	} else
> -		lane = cpu;
> +	lane = cpu % nd_region->num_lanes;
> +	ndl_count = per_cpu_ptr(nd_region->lane, cpu);
> +	ndl_lock = per_cpu_ptr(nd_region->lane, lane);
> +	if (ndl_count->count++ == 0)
> +		spin_lock(&ndl_lock->lock);
>  
>  	return lane;
>  }
> @@ -945,17 +942,14 @@ EXPORT_SYMBOL(nd_region_acquire_lane);
>  
>  void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
>  {
> -	if (nd_region->num_lanes < nr_cpu_ids) {
> -		unsigned int cpu = get_cpu();
> -		struct nd_percpu_lane *ndl_lock, *ndl_count;
> -
> -		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
> -		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
> -		if (--ndl_count->count == 0)
> -			spin_unlock(&ndl_lock->lock);
> -		put_cpu();
> -	}
> -	put_cpu();
> +	unsigned int cpu = get_cpu_light();
> +	struct nd_percpu_lane *ndl_lock, *ndl_count;
> +
> +	ndl_count = per_cpu_ptr(nd_region->lane, cpu);
> +	ndl_lock = per_cpu_ptr(nd_region->lane, lane);
> +	if (--ndl_count->count == 0)
> +		spin_unlock(&ndl_lock->lock);
> +	put_cpu_light();
>  }
>  EXPORT_SYMBOL(nd_region_release_lane);
>  
> --
> 2.14.4
> 
> 
> 
> 
> Thanks,
> Yongxin
> 
> 
>