As of commit v5.9-rc1~160^2~3 the Linux kernel has close_range() syscall, which closes not just one FD but whole range. In glibc this is exposed by automatically generated wrapper of the same name. In musl, this is not exposed, yet, but we can call the syscall() directly. In either case, we have to deal with a situation, when the kernel we're running under does not have the syscall as glibc deliberately does not implement fallback. Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- meson.build | 1 + src/util/vircommand.c | 117 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index a4b52b6156..ecfc1b6bdf 100644 --- a/meson.build +++ b/meson.build @@ -573,6 +573,7 @@ libvirt_export_dynamic = cc.first_supported_link_argument([ # check availability of various common functions (non-fatal if missing) functions = [ + 'close_range', 'closefrom', 'elf_aux_info', 'explicit_bzero', diff --git a/src/util/vircommand.c b/src/util/vircommand.c index b8b8d48f92..e826f5f348 100644 --- a/src/util/vircommand.c +++ b/src/util/vircommand.c @@ -479,13 +479,128 @@ virExecCommon(virCommand *cmd, gid_t *groups, int ngroups) return 0; } -# ifdef WITH_CLOSEFROM +# if defined(WITH_CLOSE_RANGE) || \ + (defined(WITH_SYS_SYSCALL_H) && defined(SYS_close_range)) +# define USE_CLOSE_RANGE +# elif defined(WITH_CLOSEFROM) # define USE_CLOSEFROM # else # define USE_GENERIC # endif +# ifdef USE_CLOSE_RANGE +static int +virCloseRange(unsigned int first, + unsigned int last, + unsigned int flags) +{ + + static virTristateBool has_close_range = VIR_TRISTATE_BOOL_ABSENT; + int fd; + int ret = -1; + + VIR_DEBUG("first=%u, last=%u, flags=0x%x, has_close_range=%d", + first, last, flags, has_close_range); + + if (has_close_range != VIR_TRISTATE_BOOL_NO) { +# if WITH_CLOSE_RANGE + ret = close_range(first, last, flags); +# else + ret = syscall(SYS_close_range, first, last, flags); +# endif + } + + if (ret == 0) { + if (has_close_range == VIR_TRISTATE_BOOL_ABSENT) + has_close_range = VIR_TRISTATE_BOOL_YES; + return 0; + } + + if (errno == ENOSYS) { + if (has_close_range == VIR_TRISTATE_BOOL_ABSENT) { + VIR_DEBUG("Kernel does not support close_range, falling back to naive implementation"); + has_close_range = VIR_TRISTATE_BOOL_NO; + } + } else { + return ret; + } + + /* glibc does not implement fallback, we have to implement it ourselves. */ + if (flags != 0) { + errno = EINVAL; + return -1; + } + + for (fd = first; fd <= last; fd++) { + int tmpfd = fd; + VIR_MASS_CLOSE(tmpfd); + } + + return 0; +} + + +static int +virCommandMassClose(virCommand *cmd, + int childin, + int childout, + int childerr) +{ + g_autoptr(virBitmap) fds = virBitmapNew(3); + ssize_t first; + ssize_t last; + int openmax = sysconf(_SC_OPEN_MAX); + size_t i; + + virBitmapSetBitExpand(fds, childin); + virBitmapSetBitExpand(fds, childout); + virBitmapSetBitExpand(fds, childerr); + + for (i = 0; i < cmd->npassfd; i++) { + int fd = cmd->passfd[i].fd; + + virBitmapSetBitExpand(fds, fd); + + if (virSetInherit(fd, true) < 0) { + virReportSystemError(errno, _("failed to preserve fd %1$d"), fd); + return -1; + } + } + + first = 2; + while ((last = virBitmapNextSetBit(fds, first)) >= 0) { + if (first + 1 == last) { + first = last; + continue; + } + + /* Preserve @first and @last and close everything in between. */ + if (virCloseRange(first + 1, last - 1, 0) < 0) { + virReportSystemError(errno, + _("Unable to mass close FDs (first=%1$zd, last=%2$zd)"), + first + 1, last - 1); + return -1; + } + + first = last; + } + + if (openmax < 0) + openmax = INT_MAX; + + if (virCloseRange(first + 1, openmax, 0) < 0) { + virReportSystemError(errno, + _("Unable to mass close FDs (first=%1$zd, last=%2$d"), + first + 1, openmax); + return -1; + } + + return 0; +} +# endif /* USE_CLOSE_RANGE */ + + # ifdef USE_CLOSEFROM static int virCommandMassClose(virCommand *cmd, -- 2.39.3