If FD_CLOFORK is 1, when a fork occurs, the corresponding file descriptor will be closed for the child process. IOW, the file descriptor isn't inheritable. FD_CLOFORK is used as IBM does. O_CLOFORK is also added to avoid the additional fcntl(2) after open(2). Signed-off-by: Changli Gao <xiaosuo@xxxxxxxxx> --- fs/fcntl.c | 27 +++++++++++++++++++++++++++ fs/file.c | 22 ++++++++++++++++++++-- include/asm-generic/fcntl.h | 5 +++++ include/linux/fdtable.h | 2 ++ include/linux/file.h | 1 + 5 files changed, 55 insertions(+), 2 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 22764c7..8127744 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -50,6 +50,31 @@ static int get_close_on_exec(unsigned int fd) return res; } +void set_close_on_fork(unsigned int fd, int flag) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (flag) + FD_SET(fd, fdt->close_on_fork); + else + FD_CLR(fd, fdt->close_on_fork); + spin_unlock(&files->file_lock); +} + +static int get_close_on_fork(unsigned int fd) +{ + struct files_struct *files = current->files; + struct fdtable *fdt; + int res; + rcu_read_lock(); + fdt = files_fdtable(files); + res = FD_ISSET(fd, fdt->close_on_fork); + rcu_read_unlock(); + return res; +} + SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { int err = -EBADF; @@ -358,10 +383,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; + err |= get_close_on_fork(fd) ? FD_CLOFORK : 0; break; case F_SETFD: err = 0; set_close_on_exec(fd, arg & FD_CLOEXEC); + set_close_on_fork(fd, arg & FD_CLOFORK); break; case F_GETFL: err = filp->f_flags; diff --git a/fs/file.c b/fs/file.c index 0be3447..ef79197 100644 --- a/fs/file.c +++ b/fs/file.c @@ -133,6 +133,8 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memset((char *)(nfdt->open_fds) + cpy, 0, set); memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); memset((char *)(nfdt->close_on_exec) + cpy, 0, set); + memcpy(nfdt->close_on_fork, ofdt->close_on_fork, cpy); + memset((char *)(nfdt->close_on_fork) + cpy, 0, set); } static struct fdtable * alloc_fdtable(unsigned int nr) @@ -170,12 +172,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr) goto out_fdt; fdt->fd = (struct file **)data; data = alloc_fdmem(max_t(unsigned int, - 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); + 3 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); if (!data) goto out_arr; fdt->open_fds = (fd_set *)data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = (fd_set *)data; + data += nr / BITS_PER_BYTE; + fdt->close_on_fork = (fd_set *)data; fdt->next = NULL; return fdt; @@ -303,6 +307,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->close_on_fork = (fd_set *)&newf->close_on_fork_init; new_fdt->open_fds = (fd_set *)&newf->open_fds_init; new_fdt->fd = &newf->fd_array[0]; new_fdt->next = NULL; @@ -350,11 +355,18 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) old_fdt->open_fds->fds_bits, open_files/8); memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); + memcpy(new_fdt->close_on_fork->fds_bits, + old_fdt->close_on_fork->fds_bits, open_files/8); for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; if (f) { - get_file(f); + if (FD_ISSET(open_files - i, new_fdt->close_on_fork)) { + FD_CLR(open_files - i, new_fdt->open_fds); + f = NULL; + } else { + get_file(f); + } } else { /* * The fd may be claimed in the fd bitmap but not yet @@ -380,6 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(&new_fdt->open_fds->fds_bits[start], 0, left); memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + memset(&new_fdt->close_on_fork->fds_bits[start], 0, left); } rcu_assign_pointer(newf->fdt, new_fdt); @@ -416,6 +429,7 @@ struct files_struct init_files = { .max_fds = NR_OPEN_DEFAULT, .fd = &init_files.fd_array[0], .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .close_on_fork = (fd_set *)&init_files.close_on_fork_init, .open_fds = (fd_set *)&init_files.open_fds_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), @@ -461,6 +475,10 @@ repeat: FD_SET(fd, fdt->close_on_exec); else FD_CLR(fd, fdt->close_on_exec); + if (flags & O_CLOFORK) + FD_SET(fd, fdt->close_on_fork); + else + FD_CLR(fd, fdt->close_on_fork); error = fd; #if 1 /* Sanity check */ diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 84793c7..2a7c474 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -88,6 +88,10 @@ #define O_NDELAY O_NONBLOCK #endif +#ifndef O_CLOFORK +#define O_CLOFORK 020000000 /* set close_on_fork */ +#endif + #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ #define F_SETFD 2 /* set/clear close_on_exec */ @@ -131,6 +135,7 @@ struct f_owner_ex { /* for F_[GET|SET]FL */ #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ +#define FD_CLOFORK 2 /* for posix fcntl() and lockf() */ #ifndef F_RDLCK diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 133c0ba..bb9f0be 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -33,6 +33,7 @@ struct fdtable { unsigned int max_fds; struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; + fd_set *close_on_fork; fd_set *open_fds; struct rcu_head rcu; struct fdtable *next; @@ -54,6 +55,7 @@ struct files_struct { spinlock_t file_lock ____cacheline_aligned_in_smp; int next_fd; struct embedded_fd_set close_on_exec_init; + struct embedded_fd_set close_on_fork_init; struct embedded_fd_set open_fds_init; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; diff --git a/include/linux/file.h b/include/linux/file.h index 21a7995..c592d1f 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -32,6 +32,7 @@ extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd); extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern void set_close_on_exec(unsigned int fd, int flag); +extern void set_close_on_fork(unsigned int fd, int flag); extern void put_filp(struct file *); extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd(void); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html