[PATCH 1/4] struct rwmem: an abstraction of the memory argument to read/write

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds a structure and interface to represent the segments of memory
which are acting as the source or destination for a read or write operation.

Callers would fill this structure and then pass it down the rw path.

The intent is to let stages in the rw path make specific calls against this
API and structure instead of working with, say, struct iovec natively.

The main intent of this is to enable kernel calls into the rw path which
specify memory with page/offset/len tuples.

Another potential benefit of this is the reduction in iterations over iovecs at
various points in the kernel.  Each iov_length(iov) call, for example, could be
translated into rwm->total_bytes.  O_DIRECTs check of memory alignment is
changed into a single test against rwm->boundary_bits.

I imagine this might integrate well with the iov_iter interface, though I
haven't examined that in any depth.
---
 fs/Makefile           |    2 +-
 fs/rwmem.c            |   92 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rwmem.h |   29 +++++++++++++++
 3 files changed, 122 insertions(+), 1 deletions(-)
 create mode 100644 fs/rwmem.c
 create mode 100644 include/linux/rwmem.h

diff --git a/fs/Makefile b/fs/Makefile
index 500cf15..c342365 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
-		stack.o
+		stack.o rwmem.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/rwmem.c b/fs/rwmem.c
new file mode 100644
index 0000000..0433ba4
--- /dev/null
+++ b/fs/rwmem.c
@@ -0,0 +1,92 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <linux/rwmem.h>
+
+static inline unsigned long pages_spanned(unsigned long addr,
+					  unsigned long bytes)
+{
+	return ((addr + bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+		(addr >> PAGE_SHIFT);
+}
+
+void rwmem_iovec_init(struct rwmem *rwm)
+{
+	struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+	struct iovec *iov;
+	unsigned long i;
+
+	rwm->total_bytes = 0;
+	rwm->nr_pages = 0;
+	rwm->boundary_bits = 0;
+
+	for (i = 0; i < rwm->nr_segs; i++) {
+		iov = &rwi->iov[i];
+
+		rwm->total_bytes += iov->iov_len;
+		rwm->nr_pages += pages_spanned((unsigned long)iov->iov_base,
+						    iov->iov_len);
+		rwm->boundary_bits |= (unsigned long)iov->iov_base |
+				      (unsigned long)iov->iov_len;
+	}
+}
+
+/*
+ * Returns the offset of the start of a segment within its first page.
+ */
+unsigned long rwmem_iovec_seg_page_offset(struct rwmem *rwm, unsigned long i)
+{
+	struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+	BUG_ON(i >= rwm->nr_segs);
+	return (unsigned long)rwi->iov[i].iov_base & ~PAGE_MASK;
+}
+
+/*
+ * Returns the total bytes in the given segment.
+ */
+unsigned long rwmem_iovec_seg_bytes(struct rwmem *rwm, unsigned long i)
+{
+	struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+	BUG_ON(i >= rwm->nr_segs);
+	return rwi->iov[i].iov_len;
+}
+
+int rwmem_iovec_get_seg_pages(struct rwmem *rwm, unsigned long i,
+			      unsigned long *cursor, struct page **pages,
+			      unsigned long max_pages, int write)
+{
+	struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+	struct iovec *iov;
+	int ret;
+
+	BUG_ON(i >= rwm->nr_segs);
+	iov = &rwi->iov[i];
+
+	if (*cursor == 0)
+		*cursor = (unsigned long)iov->iov_base;
+
+	max_pages = min(pages_spanned(*cursor, iov->iov_len -
+				      (*cursor - (unsigned long)iov->iov_base)),
+			max_pages);
+
+	down_read(&current->mm->mmap_sem);
+	ret = get_user_pages(current, current->mm, *cursor, max_pages, write,
+			     0, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (ret > 0) { 
+		*cursor += ret * PAGE_SIZE;
+		if (*cursor >= (unsigned long)iov->iov_base + iov->iov_len)
+			*cursor = ~0;
+	}
+
+	return ret;
+}
+
+struct rwmem_ops rwmem_iovec_ops = {
+	.init			= rwmem_iovec_init,
+	.seg_page_offset	= rwmem_iovec_seg_page_offset,
+	.seg_bytes		= rwmem_iovec_seg_bytes,
+	.get_seg_pages		= rwmem_iovec_get_seg_pages,
+};
diff --git a/include/linux/rwmem.h b/include/linux/rwmem.h
new file mode 100644
index 0000000..666f9f4
--- /dev/null
+++ b/include/linux/rwmem.h
@@ -0,0 +1,29 @@
+#ifndef _LINUX_RWMEM_H
+#define _LINUX_RWMEM_H
+
+struct rwmwm_ops;
+
+struct rwmem {
+	struct rwmem_ops	*ops;
+	size_t			total_bytes;
+	unsigned long		boundary_bits;
+	unsigned long		nr_pages;
+	unsigned short		nr_segs;
+};
+
+struct rwmem_ops {
+	void (*init)(struct rwmem *rwm);
+	unsigned long (*seg_page_offset)(struct rwmem *rwm, unsigned long i);
+	unsigned long (*seg_bytes)(struct rwmem *rwm, unsigned long i);
+	int (*get_seg_pages)(struct rwmem *rwm, unsigned long i,
+			     unsigned long *cursor, struct page **pages,
+			     unsigned long max_pages, int write);
+};
+
+struct rwmem_iovec {
+	struct rwmem		rwmem;
+	const struct iovec	*iov;
+};
+struct rwmem_ops rwmem_iovec_ops;
+
+#endif
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux