[PATCH v2 1/3] lib/percpu-list: Per-cpu list with associated per-cpu locks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Linked list is used everywhere in the Linux kernel. However, if many
threads are trying to add or delete entries into the same linked list,
it can create a performance bottleneck.

This patch introduces a new per-cpu list subystem with associated
per-cpu locks for protecting each of the lists individually. This
allows list entries insertion and deletion operations to happen in
parallel instead of being serialized with a global list and lock.

List entry insertion is strictly per cpu. List deletion, however, can
happen in a cpu other than the one that did the insertion. So we still
need lock to protect the list. Because of that, there may still be
a small amount of contention when deletion is being done.

A new header file include/linux/percpu-list.h will be added with the
associated pcpu_list_head and pcpu_list_node structures. The following
functions are provided to manage the per-cpu list:

 1. int init_pcpu_list_head(struct pcpu_list_head **ppcpu_head)
 2. void pcpu_list_add(struct pcpu_list_node *node,
		       struct pcpu_list_head *head)
 3. void pcpu_list_del(struct pcpu_list *node)
 4. void pcpu_list_iterate(struct pcpu_list_head *pcpu_head,
			   bool safe,
			   struct list_head **phead,
			   int (*iter)(struct pcpu_list_node *,
				       struct pcpu_list_node *,
				       spinlock_t *, void *),
			   void *arg);

The last one is used to iterate all the list nodes in a group of
per-cpu lists. An iterator function has to be provided to access
each list node one-by-one.

Signed-off-by: Waiman Long <Waiman.Long@xxxxxxx>
---
 include/linux/percpu-list.h |   94 ++++++++++++++++++++++++++++
 lib/Makefile                |    2 +-
 lib/percpu-list.c           |  144 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/percpu-list.h
 create mode 100644 lib/percpu-list.c

diff --git a/include/linux/percpu-list.h b/include/linux/percpu-list.h
new file mode 100644
index 0000000..9c100cb
--- /dev/null
+++ b/include/linux/percpu-list.h
@@ -0,0 +1,94 @@
+/*
+ * Per-cpu list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long <waiman.long@xxxxxxx>
+ */
+#ifndef __LINUX_PERCPU_LIST_H
+#define __LINUX_PERCPU_LIST_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+
+/*
+ * include/linux/percpu-list.h
+ *
+ * A per-cpu list protected by a per-cpu spinlock.
+ *
+ * The pcpu_list_head structure contains the spinlock, the other
+ * pcpu_list_node structures only contains a pointer to the spinlock in
+ * pcpu_list_head.
+ */
+struct pcpu_list_head {
+	struct list_head list;
+	spinlock_t lock;
+};
+
+struct pcpu_list_node {
+	struct list_head list;
+	spinlock_t *lockptr;
+};
+
+#define PCPU_LIST_HEAD_INIT(name)				\
+	{							\
+		.list.prev = &name.list,			\
+		.list.next = &name.list,			\
+		.list.lock = __SPIN_LOCK_UNLOCKED(name),	\
+	}
+
+#define PCPU_LIST_NODE_INIT(name)		\
+	{					\
+		.list.prev = &name.list,	\
+		.list.next = &name.list,	\
+		.list.lockptr = NULL		\
+	}
+
+#define pcpu_list_next_entry(pos, member) list_next_entry(pos, member.list)
+
+static inline void init_pcpu_list_node(struct pcpu_list_node *node)
+{
+	INIT_LIST_HEAD(&node->list);
+	node->lockptr = NULL;
+}
+
+static inline void free_pcpu_list_head(struct pcpu_list_head **pcpu_head_handle)
+{
+	free_percpu(*pcpu_head_handle);
+	*pcpu_head_handle = NULL;
+}
+
+static inline bool pcpu_list_empty(struct pcpu_list_head *pcpu_head)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		if (!list_empty(&per_cpu_ptr(pcpu_head, cpu)->list))
+			return false;
+	return true;
+}
+
+extern int  init_pcpu_list_head(struct pcpu_list_head **ppcpu_head);
+extern void pcpu_list_add(struct pcpu_list_node *node,
+			  struct pcpu_list_head *head);
+extern void pcpu_list_del(struct pcpu_list_node *node);
+extern void pcpu_list_iterate(struct pcpu_list_head *pcpu_head,
+			      bool safe,
+			      struct list_head **phead,
+			      int (*iter)(struct pcpu_list_node *,
+					  struct pcpu_list_node **,
+					  spinlock_t *, void *),
+			      void *arg);
+
+#endif /* __LINUX_PERCPU_LIST_H */
diff --git a/lib/Makefile b/lib/Makefile
index a7c26a4..71a25d4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -27,7 +27,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o
+	 once.o percpu-list.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
diff --git a/lib/percpu-list.c b/lib/percpu-list.c
new file mode 100644
index 0000000..498b381
--- /dev/null
+++ b/lib/percpu-list.c
@@ -0,0 +1,144 @@
+/*
+ * Per-cpu list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ *
+ * Authors: Waiman Long <waiman.long@xxxxxxx>
+ */
+#include <linux/percpu-list.h>
+
+/*
+ * Initialize the per-cpu list
+ */
+int init_pcpu_list_head(struct pcpu_list_head **ppcpu_head)
+{
+	struct pcpu_list_head *pcpu_head = alloc_percpu(struct pcpu_list_head);
+	int cpu;
+
+	if (!pcpu_head)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_list_head *head = per_cpu_ptr(pcpu_head, cpu);
+
+		INIT_LIST_HEAD(&head->list);
+		head->lock = __SPIN_LOCK_UNLOCKED(&head->lock);
+	}
+
+	*ppcpu_head = pcpu_head;
+	return 0;
+}
+
+/*
+ * List selection is based on the CPU being used when the pcpu_list_add()
+ * function is called. However, deletion may be done by a different CPU.
+ * So we still need to use a lock to protect the content of the list.
+ */
+void pcpu_list_add(struct pcpu_list_node *node, struct pcpu_list_head *head)
+{
+	spinlock_t *lock;
+
+	/*
+	 * There is a very slight chance the cpu will be changed
+	 * (by preemption) before calling spin_lock(). We only need to put
+	 * the node in one of the per-cpu lists. It may not need to be
+	 * that of the current cpu.
+	 */
+	lock = this_cpu_ptr(&head->lock);
+	spin_lock(lock);
+	node->lockptr = lock;
+	list_add(&node->list, this_cpu_ptr(&head->list));
+	spin_unlock(lock);
+}
+
+/*
+ * Delete a node from a percpu list
+ *
+ * We need to check the lock pointer again after taking the lock to guard
+ * against concurrent delete of the same node. If the lock pointer changes
+ * (becomes NULL or to a different one), we assume that the deletion was done
+ * elsewhere.
+ */
+void pcpu_list_del(struct pcpu_list_node *node)
+{
+	spinlock_t *lock = READ_ONCE(node->lockptr);
+
+	if (unlikely(!lock))
+		return;
+
+	spin_lock(lock);
+	if (likely(lock == node->lockptr)) {
+		list_del_init(&node->list);
+		node->lockptr = NULL;
+	}
+	spin_unlock(lock);
+}
+
+/**
+ * Iteration function over all the nodes of a group of per-cpu lists.
+ * @pcpu_head:	the per-cpu list head
+ * @safe:	true if the iteration has to be safe against removal of node
+ * @phead:	a handle to save the per-cpu list head pointer
+ * @iter:	the iterator function to be called for each node
+ * @arg:	the argument (structure pointer) to be passed to func
+ *
+ * This function does not return any value. Returned value from the iterator
+ * function has to be passed via the structure pointed to by arg argument.
+ *
+ * The supplied iterator function should have the following prototype:
+ * int func(struct pcpu_list_node *node, struct pcpu_list_node *next,
+ *	    spinlock_t *lock, void *arg)
+ * where
+ *  node:  the current per-cpu list node
+ *  pnext: a handle to the next per-cpu list node (defined only if safe=true)
+ *  lock:  the spinlock that has currently be acquired
+ *  arg:   the structure pointer passed into pcpu_list_iterate()
+ *
+ * A non-zero return value will cause it to break out of the iteration loop
+ * and return immediately. The iterator function must release the lock before
+ * returning a non-zero value.
+ */
+void pcpu_list_iterate(struct pcpu_list_head *pcpu_head,
+		       bool safe,
+		       struct list_head **phead,
+		       int (*iter)(struct pcpu_list_node *,
+				   struct pcpu_list_node **,
+				   spinlock_t *, void *),
+		       void *arg)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		spinlock_t	 *lock = &per_cpu_ptr(pcpu_head, cpu)->lock;
+		struct list_head *head = &per_cpu_ptr(pcpu_head, cpu)->list;
+		struct pcpu_list_node *node, *next;
+
+		if (list_empty(head))
+			continue;
+
+		if (phead)
+			*phead = head;	/* Save current list head */
+
+		spin_lock(lock);
+		if (safe) {
+			list_for_each_entry_safe(node, next, head, list)
+				if (iter(node, &next, lock, arg))
+					return;
+		} else {
+			list_for_each_entry(node, head, list)
+				if (iter(node, NULL, lock, arg))
+					return;
+		}
+		spin_unlock(lock);
+	}
+}
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux