Re: [PATCH 7/8] zsmalloc: replace per zpage lock with pool->migrate_lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 11, 2021 at 10:07:27AM +0100, Sebastian Andrzej Siewior wrote:
> On 2021-11-10 10:54:32 [-0800], Minchan Kim wrote:
> > The zsmalloc has used a bit for spin_lock in zpage handle to keep
> > zpage object alive during several operations. However, it causes
> > the problem for PREEMPT_RT as well as introducing too complicated.
> > 
> > This patch replaces the bit spin_lock with pool->migrate_lock
> > rwlock. It could make the code simple as well as zsmalloc work
> > under PREEMPT_RT.
> > 
> > The drawback is the pool->migrate_lock is bigger granuarity than
> > per zpage lock so the contention would be higher than old when
> > both IO-related operations(i.e., zsmalloc, zsfree, zs_[map|unmap])
> > and compaction(page/zpage migration) are going in parallel(*,
> > the migrate_lock is rwlock and IO related functions are all read
> > side lock so there is no contention). However, the write-side
> > is fast enough(dominant overhead is just page copy) so it wouldn't
> > affect much. If the lock granurity becomes more problem later,
> > we could introduce table locks based on handle as a hash value.
> > 
> > Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
> …
> > index b8b098be92fa..5d4c4d254679 100644
> > --- a/mm/zsmalloc.c
> > +++ b/mm/zsmalloc.c
> > @@ -1789,6 +1767,11 @@ static void migrate_write_lock(struct zspage *zspage)
> >  	write_lock(&zspage->lock);
> >  }
> >  
> > +static void migrate_write_lock_nested(struct zspage *zspage)
> > +{
> > +	write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
> 
> I don't have this in my tree. 

I forgot it. I append it at the tail of the thread. 
I will also include it at nest revision.

> 
> > +}
> > +
> >  static void migrate_write_unlock(struct zspage *zspage)
> >  {
> >  	write_unlock(&zspage->lock);
> …
> > @@ -2077,8 +2043,13 @@ static unsigned long __zs_compact(struct zs_pool *pool,
> >  	struct zspage *dst_zspage = NULL;
> >  	unsigned long pages_freed = 0;
> >  
> > +	/* protect the race between zpage migration and zs_free */
> > +	write_lock(&pool->migrate_lock);
> > +	/* protect zpage allocation/free */
> >  	spin_lock(&class->lock);
> >  	while ((src_zspage = isolate_zspage(class, true))) {
> > +		/* protect someone accessing the zspage(i.e., zs_map_object) */
> > +		migrate_write_lock(src_zspage);
> >  
> >  		if (!zs_can_compact(class))
> >  			break;
> > @@ -2087,6 +2058,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
> >  		cc.s_page = get_first_page(src_zspage);
> >  
> >  		while ((dst_zspage = isolate_zspage(class, false))) {
> > +			migrate_write_lock_nested(dst_zspage);
> > +
> >  			cc.d_page = get_first_page(dst_zspage);
> >  			/*
> >  			 * If there is no more space in dst_page, resched
> 
> Looking at the these two chunks, the page here comes from a list, you
> remove that page from that list and this ensures that you can't lock the
> very same pages in reverse order as in:
> 
>    migrate_write_lock(dst_zspage);
>    …
>    	migrate_write_lock(src_zspage);
> 
> right?

Sure.

>From e0bfc5185bbd15c651a7a367b6d053b8c88b1e01 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@xxxxxxxxxx>
Date: Tue, 19 Oct 2021 15:34:09 -0700
Subject: [PATCH] locking/rwlocks: introduce write_lock_nested

Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>
---
 include/linux/rwlock.h          | 6 ++++++
 include/linux/rwlock_api_smp.h  | 9 +++++++++
 include/linux/spinlock_api_up.h | 1 +
 kernel/locking/spinlock.c       | 6 ++++++
 4 files changed, 22 insertions(+)

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 7ce9a51ae5c0..93086de7bf9e 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -70,6 +70,12 @@ do {								\
 #define write_lock(lock)	_raw_write_lock(lock)
 #define read_lock(lock)		_raw_read_lock(lock)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define write_lock_nested(lock, subclass)	_raw_write_lock_nested(lock, subclass)
+#else
+#define write_lock_nested(lock, subclass)	_raw_write_lock(lock)
+#endif
+
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
 #define read_lock_irqsave(lock, flags)			\
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index abfb53ab11be..e0c866177c03 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -17,6 +17,7 @@
 
 void __lockfunc _raw_read_lock(rwlock_t *lock)		__acquires(lock);
 void __lockfunc _raw_write_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)	__acquires(lock);
 void __lockfunc _raw_read_lock_bh(rwlock_t *lock)	__acquires(lock);
 void __lockfunc _raw_write_lock_bh(rwlock_t *lock)	__acquires(lock);
 void __lockfunc _raw_read_lock_irq(rwlock_t *lock)	__acquires(lock);
@@ -46,6 +47,7 @@ _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 
 #ifdef CONFIG_INLINE_WRITE_LOCK
 #define _raw_write_lock(lock) __raw_write_lock(lock)
+#define _raw_write_lock_nested(lock, subclass) __raw_write_lock_nested(lock, subclass)
 #endif
 
 #ifdef CONFIG_INLINE_READ_LOCK_BH
@@ -211,6 +213,13 @@ static inline void __raw_write_lock(rwlock_t *lock)
 	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 }
 
+static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
+}
+
 #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */
 
 static inline void __raw_write_unlock(rwlock_t *lock)
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d0d188861ad6..b8ba00ccccde 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -59,6 +59,7 @@
 #define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
 #define _raw_read_lock(lock)			__LOCK(lock)
 #define _raw_write_lock(lock)			__LOCK(lock)
+#define _raw_write_lock_nested(lock, subclass)	__LOCK(lock)
 #define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
 #define _raw_read_lock_bh(lock)			__LOCK_BH(lock)
 #define _raw_write_lock_bh(lock)		__LOCK_BH(lock)
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index c5830cfa379a..22969ec69288 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -300,6 +300,12 @@ void __lockfunc _raw_write_lock(rwlock_t *lock)
 	__raw_write_lock(lock);
 }
 EXPORT_SYMBOL(_raw_write_lock);
+
+void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)
+{
+	__raw_write_lock_nested(lock, subclass);
+}
+EXPORT_SYMBOL(_raw_write_lock_nested);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-- 
2.34.0.rc1.387.gb447b232ab-goog






[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux