On 2016-02-12 01:14, Ahmed S. Darwish wrote: > Memfd is a simple memory sharing mechanism, added by the systemd/kdbus > developers, to share pages between processes in an anonymous, no global > registry needed, no mount-point required, relatively secure, manner. > > This patch introduces the necessary building blocks for using memfd > shared memory transfers in PulseAudio. > > Memfd support shall also help us in laying out the necessary (but not > yet sufficient) groundwork for application sandboxing, protecting PA > from ts clients, and protecting clients data from each other. > > We plan to exclusively use memfds, instead of POSIX SHM, on the way > forward. > > Signed-off-by: Ahmed S. Darwish <darwish.07 at gmail.com> > --- > configure.ac | 19 ++++++ > src/Makefile.am | 5 ++ > src/pulsecore/memfd-wrappers.h | 68 +++++++++++++++++++ > src/pulsecore/shm.c | 146 ++++++++++++++++++++++++++++------------- > src/pulsecore/shm.h | 16 ++++- > 5 files changed, 209 insertions(+), 45 deletions(-) > create mode 100644 src/pulsecore/memfd-wrappers.h > > diff --git a/configure.ac b/configure.ac > index dcd0110..256a196 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -607,6 +607,23 @@ AC_DEFINE(HAVE_DLADDR, [1], [Have dladdr?]) > > AM_ICONV > > +#### Linux memfd_create(2) SHM support #### > + > +AC_ARG_ENABLE([memfd], > + AS_HELP_STRING([--disable-memfd], [Disable Linux memfd shared memory])) > + > +AS_IF([test "x$enable_memfd" != "xno"], > + AC_CHECK_DECL(SYS_memfd_create, [HAVE_MEMFD=1], [HAVE_MEMFD=0], [#include <sys/syscall.h>]), > + [HAVE_MEMFD=0]) > + > +AS_IF([test "x$enable_memfd" = "xyes" && test "x$HAVE_MEMFD" = "x0"], > + [AC_MSG_ERROR([*** Your Linux kernel does not support memfd shared memory. > + *** Use linux v3.17 or higher for such a feature.])]) > + > +AC_SUBST(HAVE_MEMFD) > +AM_CONDITIONAL([HAVE_MEMFD], [test "x$HAVE_MEMFD" = x1]) > +AS_IF([test "x$HAVE_MEMFD" = "x1"], AC_DEFINE([HAVE_MEMFD], 1, [Have memfd shared memory.])) > + > #### X11 (optional) #### > > AC_ARG_ENABLE([x11], > @@ -1539,6 +1556,7 @@ AC_OUTPUT > > # ========================================================================== > > +AS_IF([test "x$HAVE_MEMFD" = "x1"], ENABLE_MEMFD=yes, ENABLE_MEMFD=no) > AS_IF([test "x$HAVE_X11" = "x1"], ENABLE_X11=yes, ENABLE_X11=no) > AS_IF([test "x$HAVE_OSS_OUTPUT" = "x1"], ENABLE_OSS_OUTPUT=yes, ENABLE_OSS_OUTPUT=no) > AS_IF([test "x$HAVE_OSS_WRAPPER" = "x1"], ENABLE_OSS_WRAPPER=yes, ENABLE_OSS_WRAPPER=no) > @@ -1600,6 +1618,7 @@ echo " > CPPFLAGS: ${CPPFLAGS} > LIBS: ${LIBS} > > + Enable memfd shared memory: ${ENABLE_MEMFD} > Enable X11: ${ENABLE_X11} > Enable OSS Output: ${ENABLE_OSS_OUTPUT} > Enable OSS Wrapper: ${ENABLE_OSS_WRAPPER} > diff --git a/src/Makefile.am b/src/Makefile.am > index f3a62fb..be8d3d7 100644 > --- a/src/Makefile.am > +++ b/src/Makefile.am > @@ -729,6 +729,11 @@ libpulsecommon_ at PA_MAJORMINOR@_la_CFLAGS = $(AM_CFLAGS) $(LIBJSON_CFLAGS) $(LIBS > libpulsecommon_ at PA_MAJORMINOR@_la_LDFLAGS = $(AM_LDFLAGS) $(AM_LIBLDFLAGS) -avoid-version > libpulsecommon_ at PA_MAJORMINOR@_la_LIBADD = $(AM_LIBADD) $(LIBJSON_LIBS) $(LIBWRAP_LIBS) $(WINSOCK_LIBS) $(LTLIBICONV) $(LIBSNDFILE_LIBS) > > +if HAVE_MEMFD > +libpulsecommon_ at PA_MAJORMINOR@_la_SOURCES += \ > + pulsecore/memfd-wrappers.h > +endif > + > if HAVE_X11 > libpulsecommon_ at PA_MAJORMINOR@_la_SOURCES += \ > pulse/client-conf-x11.c pulse/client-conf-x11.h \ > diff --git a/src/pulsecore/memfd-wrappers.h b/src/pulsecore/memfd-wrappers.h > new file mode 100644 > index 0000000..3bed9b2 > --- /dev/null > +++ b/src/pulsecore/memfd-wrappers.h > @@ -0,0 +1,68 @@ > +#ifndef foopulsememfdwrappershfoo > +#define foopulsememfdwrappershfoo > + > +/*** > + This file is part of PulseAudio. > + > + Copyright 2016 Ahmed S. Darwish <darwish.07 at gmail.com> > + > + PulseAudio is free software; you can redistribute it and/or modify > + it under the terms of the GNU Lesser General Public License as > + published by the Free Software Foundation; either version 2.1 of the > + License, or (at your option) any later version. > + > + PulseAudio is distributed in the hope that it will be useful, but > + WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. > +***/ > + > +#ifdef HAVE_MEMFD > + > +#include <sys/syscall.h> > +#include <fcntl.h> > + > +/* > + * No glibc wrappers exist for memfd_create(2), so provide our own. > + * > + * Also define memfd fcntl sealing macros. While they are already > + * defined in the kernel header file <linux/fcntl.h>, that file as > + * a whole conflicts with the original glibc header <fnctl.h>. > + */ > + > +static inline int memfd_create(const char *name, unsigned int flags) { > + return syscall(SYS_memfd_create, name, flags); > +} > + > +/* memfd_create(2) flags */ > + > +#ifndef MFD_CLOEXEC > +#define MFD_CLOEXEC 0x0001U > +#endif > + > +#ifndef MFD_ALLOW_SEALING > +#define MFD_ALLOW_SEALING 0x0002U > +#endif > + > +/* fcntl() seals-related flags */ > + > +#ifndef F_LINUX_SPECIFIC_BASE > +#define F_LINUX_SPECIFIC_BASE 1024 > +#endif > + > +#ifndef F_ADD_SEALS > +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) > +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) > + > +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ > +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ > +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ > +#define F_SEAL_WRITE 0x0008 /* prevent writes */ > +#endif > + > +#endif /* HAVE_MEMFD */ > + > +#endif > diff --git a/src/pulsecore/shm.c b/src/pulsecore/shm.c > index 4c0ecf1..0ca4fb0 100644 > --- a/src/pulsecore/shm.c > +++ b/src/pulsecore/shm.c > @@ -45,6 +45,7 @@ > #include <pulse/xmalloc.h> > #include <pulse/gccmacro.h> > > +#include <pulsecore/memfd-wrappers.h> > #include <pulsecore/core-error.h> > #include <pulsecore/log.h> > #include <pulsecore/random.h> > @@ -88,7 +89,12 @@ struct shm_marker { > uint64_t _reserved4; > } PA_GCC_PACKED; > > -#define SHM_MARKER_SIZE PA_ALIGN(sizeof(struct shm_marker)) > +static inline size_t shm_marker_size(pa_shm *m) { > + if (m->type == PA_MEM_TYPE_SHARED_POSIX) > + return PA_ALIGN(sizeof(struct shm_marker)); > + > + return 0; > +} > > #ifdef HAVE_SHM_OPEN > static char *segment_name(char *fn, size_t l, unsigned id) { > @@ -100,8 +106,8 @@ static char *segment_name(char *fn, size_t l, unsigned id) { > int pa_shm_create(pa_shm *m, pa_mem_type_t type, size_t size, mode_t mode) { > #ifdef HAVE_SHM_OPEN > char fn[32]; > - int fd = -1; > #endif > + int fd = -1; > struct shm_marker *marker; > > pa_assert(m); > @@ -118,16 +124,33 @@ int pa_shm_create(pa_shm *m, pa_mem_type_t type, size_t size, mode_t mode) { > /* Round up to make it page aligned */ > size = PA_PAGE_ALIGN(size); > > -#ifdef HAVE_SHM_OPEN > pa_random(&m->id, sizeof(m->id)); > - segment_name(fn, sizeof(fn), m->id); > > - if ((fd = shm_open(fn, O_RDWR|O_CREAT|O_EXCL, mode)) < 0) { > - pa_log("shm_open() failed: %s", pa_cstrerror(errno)); > + switch (type) { > +#ifdef HAVE_SHM_OPEN > + case PA_MEM_TYPE_SHARED_POSIX: > + segment_name(fn, sizeof(fn), m->id); > + fd = shm_open(fn, O_RDWR|O_CREAT|O_EXCL, mode); > + m->per_type.posix_shm.do_unlink = true; > + break; > +#endif > +#ifdef HAVE_MEMFD > + case PA_MEM_TYPE_SHARED_MEMFD: > + fd = memfd_create("memfd", MFD_ALLOW_SEALING); > + m->per_type.memfd.fd = fd; These failure paths look a bit hairy. If mmap() fails e g, will this not lead to double close of the same fd, first in this function and then later in pa_shm_free? I e, we should not set "m->per_type.memfd.fd = fd" until after all "goto fail"s? > + break; > +#endif > + default: > goto fail; > } > > - m->mem.size = size + SHM_MARKER_SIZE; > + if (fd < 0) { > + pa_log("%s open() failed: %s", pa_mem_type_to_string(type), pa_cstrerror(errno)); > + goto fail; > + } > + > + m->type = type; > + m->mem.size = size + shm_marker_size(m); > > if (ftruncate(fd, (off_t) m->mem.size) < 0) { > pa_log("ftruncate() failed: %s", pa_cstrerror(errno)); > @@ -143,25 +166,29 @@ int pa_shm_create(pa_shm *m, pa_mem_type_t type, size_t size, mode_t mode) { > goto fail; > } > > - /* We store our PID at the end of the shm block, so that we > - * can check for dead shm segments later */ > - marker = (struct shm_marker*) ((uint8_t*) m->mem.ptr + m->mem.size - SHM_MARKER_SIZE); > - pa_atomic_store(&marker->pid, (int) getpid()); > - pa_atomic_store(&marker->marker, SHM_MARKER); > + if (type == PA_MEM_TYPE_SHARED_POSIX) { > + /* We store our PID at the end of the shm block, so that we > + * can check for dead shm segments later */ > + marker = (struct shm_marker*) ((uint8_t*) m->mem.ptr + m->mem.size - shm_marker_size(m)); > + pa_atomic_store(&marker->pid, (int) getpid()); > + pa_atomic_store(&marker->marker, SHM_MARKER); > + } > > - pa_assert_se(pa_close(fd) == 0); > - m->do_unlink = true; > -#else > - goto fail; > -#endif > + /* For memfds, we keep the fd open until we pass it to the other > + * PA endpoint over the unix domain socket */ > + if (type != PA_MEM_TYPE_SHARED_MEMFD) > + pa_assert_se(pa_close(fd) == 0); > > return 0; > > fail: > > -#ifdef HAVE_SHM_OPEN > +#if defined(HAVE_SHM_OPEN) || defined(HAVE_MEMFD) > if (fd >= 0) { > - shm_unlink(fn); > +#ifdef HAVE_SHM_OPEN > + if (type == PA_MEM_TYPE_SHARED_POSIX) > + shm_unlink(fn); > +#endif > pa_close(fd); > } > #endif > @@ -178,19 +205,28 @@ void pa_shm_free(pa_shm *m) { > pa_assert(m->mem.ptr != MAP_FAILED); > #endif > > -#ifdef HAVE_SHM_OPEN > +#if defined(HAVE_SHM_OPEN) || defined(HAVE_MEMFD) > if (munmap(m->mem.ptr, PA_PAGE_ALIGN(m->mem.size)) < 0) > pa_log("munmap() failed: %s", pa_cstrerror(errno)); > > - if (m->do_unlink) { > +#ifdef HAVE_SHM_OPEN > + if (m->type == PA_MEM_TYPE_SHARED_POSIX && m->per_type.posix_shm.do_unlink) { > char fn[32]; > > segment_name(fn, sizeof(fn), m->id); > if (shm_unlink(fn) < 0) > pa_log(" shm_unlink(%s) failed: %s", fn, pa_cstrerror(errno)); > } > +#endif > +#ifdef HAVE_MEMFD > + /* Memfds are the only SHM type where the file descriptor is not > + * directly closed after memory mapping. */ > + if (m->type == PA_MEM_TYPE_SHARED_MEMFD && m->per_type.memfd.fd != -1) > + pa_assert_se(pa_close(m->per_type.memfd.fd) == 0); > +#endif > + > #else > - /* We shouldn't be here without shm support */ > + /* We shouldn't be here without shm or memfd support */ > pa_assert_not_reached(); > #endif > > @@ -243,9 +279,9 @@ void pa_shm_punch(pa_shm *m, size_t offset, size_t size) { > #endif > } > > -#ifdef HAVE_SHM_OPEN > +#if defined(HAVE_SHM_OPEN) || defined(HAVE_MEMFD) > > -static int shm_attach(pa_shm *m, unsigned id, bool writable, bool for_cleanup) { > +static int shm_attach(pa_shm *m, pa_mem_type_t type, unsigned id, int memfd_fd, bool writable, bool for_cleanup) { > char fn[32]; > int fd = -1; > int prot; > @@ -253,11 +289,27 @@ static int shm_attach(pa_shm *m, unsigned id, bool writable, bool for_cleanup) { > > pa_assert(m); > > - segment_name(fn, sizeof(fn), m->id = id); > - > - if ((fd = shm_open(fn, writable ? O_RDWR : O_RDONLY, 0)) < 0) { > - if ((errno != EACCES && errno != ENOENT) || !for_cleanup) > - pa_log("shm_open() failed: %s", pa_cstrerror(errno)); > + switch (type) { > +#ifdef HAVE_SHM_OPEN > + case PA_MEM_TYPE_SHARED_POSIX: > + pa_assert(memfd_fd == -1); > + segment_name(fn, sizeof(fn), id); > + if ((fd = shm_open(fn, writable ? O_RDWR : O_RDONLY, 0)) < 0) { > + if ((errno != EACCES && errno != ENOENT) || !for_cleanup) > + pa_log("shm_open() failed: %s", pa_cstrerror(errno)); > + goto fail; > + } > + m->per_type.posix_shm.do_unlink = false; > + break; > +#endif > +#ifdef HAVE_MEMFD > + case PA_MEM_TYPE_SHARED_MEMFD: > + pa_assert(memfd_fd != -1); > + fd = memfd_fd; > + m->per_type.memfd.fd = -1; /* -1; check comments above pa_shm_attch() for rationale */ > + break; > +#endif > + default: > goto fail; > } > > @@ -267,13 +319,15 @@ static int shm_attach(pa_shm *m, unsigned id, bool writable, bool for_cleanup) { > } > > if (st.st_size <= 0 || > - st.st_size > (off_t) (PA_MAX_SHM_SIZE+SHM_MARKER_SIZE) || > + st.st_size > (off_t) PA_MAX_SHM_SIZE + (off_t) shm_marker_size(m) || > PA_ALIGN((size_t) st.st_size) != (size_t) st.st_size) { > pa_log("Invalid shared memory segment size"); > goto fail; > } > > + m->type = type; > m->mem.size = (size_t) st.st_size; > + m->id = id; > > prot = writable ? PROT_READ | PROT_WRITE : PROT_READ; > if ((m->mem.ptr = mmap(NULL, PA_PAGE_ALIGN(m->mem.size), prot, MAP_SHARED, fd, (off_t) 0)) == MAP_FAILED) { > @@ -281,9 +335,9 @@ static int shm_attach(pa_shm *m, unsigned id, bool writable, bool for_cleanup) { > goto fail; > } > > - m->do_unlink = false; > - > - pa_assert_se(pa_close(fd) == 0); > + /* Do not close file descriptors which are not our own */ > + if (type != PA_MEM_TYPE_SHARED_MEMFD) > + pa_assert_se(pa_close(fd) == 0); I believe the fd should be closed regardless of type? Now you seem to leak an fd if type == PA_MEM_TYPE_SHARED_MEMFD? > > return 0; > > @@ -293,18 +347,22 @@ fail: > > return -1; > } > +#endif > > -int pa_shm_attach(pa_shm *m, unsigned id, bool writable) { > - return shm_attach(m, id, writable, false); > -} > - > -#else /* HAVE_SHM_OPEN */ > - > -int pa_shm_attach(pa_shm *m, unsigned id, bool writable) { > +/* Note: In case of attaching memfd SHM areas, the caller maintains ownership > + * of the passed fd and has the responsibility of closing it when appropriate. */ > +static int NEW_API_pa_shm_attach(pa_shm *m, pa_mem_type_t type, unsigned id, int memfd_fd, bool writable) { It's more common to just do _pa_shm_attach instead of NEW_API_pa_shm_attach, but since you remove it again in the next patch, it doesn't matter. > +#if defined(HAVE_SHM_OPEN) || defined(HAVE_MEMFD) > + return shm_attach(m, type, id, memfd_fd, writable, false); > +#else > return -1; > +#endif > } > > -#endif /* HAVE_SHM_OPEN */ > +/* Compatibility version until the new API is used in external sources */ > +int pa_shm_attach(pa_shm *m, unsigned id, bool writable) { > + return NEW_API_pa_shm_attach(m, PA_MEM_TYPE_SHARED_POSIX, id, -1, writable); > +} > > int pa_shm_cleanup(void) { > > @@ -335,15 +393,15 @@ int pa_shm_cleanup(void) { > if (pa_atou(de->d_name + SHM_ID_LEN, &id) < 0) > continue; > > - if (shm_attach(&seg, id, false, true) < 0) > + if (shm_attach(&seg, PA_MEM_TYPE_SHARED_POSIX, id, -1, false, true) < 0) > continue; > > - if (seg.mem.size < SHM_MARKER_SIZE) { > + if (seg.mem.size < shm_marker_size(&seg)) { > pa_shm_free(&seg); > continue; > } > > - m = (struct shm_marker*) ((uint8_t*) seg.mem.ptr + seg.mem.size - SHM_MARKER_SIZE); > + m = (struct shm_marker*) ((uint8_t*) seg.mem.ptr + seg.mem.size - shm_marker_size(&seg)); > > if (pa_atomic_load(&m->marker) != SHM_MARKER) { > pa_shm_free(&seg); > diff --git a/src/pulsecore/shm.h b/src/pulsecore/shm.h > index 887f516..1c8ce83 100644 > --- a/src/pulsecore/shm.h > +++ b/src/pulsecore/shm.h > @@ -32,7 +32,21 @@ typedef struct pa_shm { > pa_mem mem; /* Parent; must be first */ > pa_mem_type_t type; > unsigned id; > - bool do_unlink:1; > + > + union { > + struct { > + bool do_unlink; > + } posix_shm; > + struct { > + /* To avoid fd leaks, we keep this fd open only until we pass it > + * to the other PA endpoint over the unix domain socket. > + * > + * When we don't have ownership for the memfd fd in question (e.g. > + * memfd segment attachment), or the file descriptor has now been > + * closed, this is set to -1. */ > + int fd; > + } memfd; > + } per_type; > } pa_shm; > > int pa_shm_create(pa_shm *m, pa_mem_type_t type, size_t size, mode_t mode); > > > Regards, > -- David Henningsson, Canonical Ltd. https://launchpad.net/~diwic