On Mon, 2015-04-20 at 10:15 -0700, Eric Dumazet wrote: > On Mon, 2015-04-20 at 17:10 +0200, Mateusz Guzik wrote: > > > Sorry for spam but I came up with another hack. :) > > > > The idea is that we can have a variable which would signify the that > > given thread is playing with fd table in fd_install (kind of a lock > > embedded into task_struct). We would also have a flag in files struct > > indicating that a thread would like to resize it. > > > > expand_fdtable would set the flag and iterate over all threads waiting > > for all of them to have the var set to 0. > > The opposite : you have to block them in some way and add a rcu_sched() > or something. Here is the patch I cooked here but not yet tested. diff --git a/fs/file.c b/fs/file.c index 93c5f89c248b..d72bdcacd4df 100644 --- a/fs/file.c +++ b/fs/file.c @@ -145,17 +145,23 @@ static int expand_fdtable(struct files_struct *files, int nr) { struct fdtable *new_fdt, *cur_fdt; + files->resize_in_progress++; spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); + /* make sure all __fd_install() have seen resize_in_progress */ + synchronize_sched(); spin_lock(&files->file_lock); - if (!new_fdt) + if (!new_fdt) { + files->resize_in_progress--; return -ENOMEM; + } /* * extremely unlikely race - sysctl_nr_open decreased between the check in * caller and alloc_fdtable(). Cheaper to catch it here... */ if (unlikely(new_fdt->max_fds <= nr)) { __free_fdtable(new_fdt); + files->resize_in_progress--; return -EMFILE; } /* @@ -173,6 +179,9 @@ static int expand_fdtable(struct files_struct *files, int nr) /* Somebody else expanded, so undo our attempt */ __free_fdtable(new_fdt); } + /* coupled with smp_rmb() in __fd_install() */ + smp_wmb(); + files->resize_in_progress--; return 1; } @@ -256,6 +265,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); + newf->resize_in_progress = 0; newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; @@ -553,11 +563,21 @@ void __fd_install(struct files_struct *files, unsigned int fd, struct file *file) { struct fdtable *fdt; - spin_lock(&files->file_lock); + + rcu_read_lock_sched(); + + while (unlikely(READ_ONCE(files->resize_in_progress))) { + rcu_read_unlock_sched(); + yield(); + rcu_read_lock_sched(); + } + /* coupled with smp_wmb() in expand_fdtable() */ + smp_rmb(); fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); - spin_unlock(&files->file_lock); + + rcu_read_unlock_sched(); } void fd_install(unsigned int fd, struct file *file) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..7496a0d73a7c 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,7 @@ struct files_struct { * read mostly part */ atomic_t count; + int resize_in_progress; struct fdtable __rcu *fdt; struct fdtable fdtab; /* -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html