Re: [PATCH 07/14] Change unshare_fs_struct() to never fail.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 16 Jul 2024, Jeff Layton wrote:
> On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > nfsd threads need to not share the init fs_struct as they need to
> > manipulate umask independently.  So they call unshare_fs_struct() and
> > are the only user of that function.
> > 
> > In the unlikely event that unshare_fs_struct() fails, the thread will
> > exit calling svc_exit_thread() BEFORE svc_thread_should_stop() reports
> > 'true'.
> > 
> > This is a problem because svc_exit_thread() assumes that
> > svc_stop_threads() is running and consequently (in the nfsd case)
> > nfsd_mutex is held.  This ensures that the list_del_rcu() call in
> > svc_exit_thread() cannot race with any other manipulation of
> > ->sp_all_threads.
> > 
> > While it would be possible to add some other exclusion, doing so would
> > introduce unnecessary complexity.  unshare_fs_struct() does not fail in
> > practice.  So the simplest solution is to make this explicit.  i.e.  use
> > __GFP_NOFAIL which is safe on such a small allocation - about 64 bytes.
> > 
> 
> I know some folks are trying hard to get rid of (or minimize the use
> of) __GFP_NOFAIL. This might not be a long term solution.

Other folk are trying to make NOFAIL a standard option.

See
  https://lore.kernel.org/all/22363d0a-71db-4ba7-b5e1-8bb515811d1c@moroto.mountain/
and surrounding.  In that email Dan suggests GFP_SMALL as a standard
option that is used for smallish allocations and never fails (and warns
in the allocation is bigger than X).

Also
  https://lwn.net/Articles/964793/

> 
> > Change unshare_fs_struct() to not return any error, and remove the error
> > handling from nfsd().
> > 
> > An alternate approach would be to create a variant of
> > kthread_create_on_node() which didn't set CLONE_FS.
> > 
> 
> This sounds like it might be the better approach. I guess you could
> just add a set of CLONE_* flags to struct kthread_create_info and fix
> up the callers to set that appropriately?

I tried that first.  I didn't like it.  Lots of effort for little gain,
where __GFP_NOFAIL fixed the same problem more cleanly.
For reference (in case I do need it eventually) below is a patch from my
'git stash' history.

NeilBrown


 fs/fs_struct.c             | 23 -----------------------
 fs/nfsd/nfssvc.c           | 14 +++++---------
 include/linux/fs_struct.h  |  1 -
 include/linux/kthread.h    |  8 ++++++++
 include/linux/sunrpc/svc.h |  1 +
 kernel/kthread.c           | 33 +++++++++++++++++++--------------
 net/sunrpc/svc.c           |  6 ++++--
 7 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 64c2d0814ed6..a94764084c8c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -130,29 +130,6 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 	return fs;
 }
 
-int unshare_fs_struct(void)
-{
-	struct fs_struct *fs = current->fs;
-	struct fs_struct *new_fs = copy_fs_struct(fs);
-	int kill;
-
-	if (!new_fs)
-		return -ENOMEM;
-
-	task_lock(current);
-	spin_lock(&fs->lock);
-	kill = !--fs->users;
-	current->fs = new_fs;
-	spin_unlock(&fs->lock);
-	task_unlock(current);
-
-	if (kill)
-		free_fs_struct(fs);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(unshare_fs_struct);
-
 int current_umask(void)
 {
 	return current->fs->umask;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index c0d17b92b249..d37b9cbbc250 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -666,6 +666,7 @@ int nfsd_create_serv(struct net *net)
 	if (serv == NULL)
 		return -ENOMEM;
 
+	serv->sv_unshare_fs = true;
 	serv->sv_maxconn = nn->max_connections;
 	error = svc_bind(serv, net);
 	if (error < 0) {
@@ -915,14 +916,10 @@ nfsd(void *vrqstp)
 	struct net *net = perm_sock->xpt_net;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	/* At this point, the thread shares current->fs
-	 * with the init process. We need to create files with the
-	 * umask as defined by the client instead of init's umask. */
-	if (unshare_fs_struct() < 0) {
-		printk("Unable to start nfsd thread: out of memory\n");
-		goto out;
-	}
-
+	/* Thread was created with CLONE_FS disabled so we have
+	 * a private current->fs in which we can control umask
+	 * for file creation.
+	 */
 	current->fs->umask = 0;
 
 	atomic_inc(&nfsd_th_cnt);
@@ -943,7 +940,6 @@ nfsd(void *vrqstp)
 
 	atomic_dec(&nfsd_th_cnt);
 
-out:
 	/* Release the thread */
 	svc_exit_thread(rqstp);
 	return 0;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 783b48dedb72..a854bfa4708c 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -22,7 +22,6 @@ extern void set_fs_root(struct fs_struct *, const struct path *);
 extern void set_fs_pwd(struct fs_struct *, const struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void free_fs_struct(struct fs_struct *);
-extern int unshare_fs_struct(void);
 
 static inline void get_fs_root(struct fs_struct *fs, struct path *root)
 {
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index b11f53c1ba2e..222779a40389 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -24,6 +24,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
  * the stopped state.  This is just a helper for kthread_create_on_node();
  * see the documentation there for more details.
  */
+#define kthread_create_on_node(threadfn, data, node, namefmt, arg...) \
+	kthread_create_on_node_flags(threadfn, data, NUMA_NO_NODE, CLONE_FS, namefmt, ##arg)
 #define kthread_create(threadfn, data, namefmt, arg...) \
 	kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
 
@@ -33,6 +35,12 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 					  unsigned int cpu,
 					  const char *namefmt);
 
+struct task_struct *kthread_create_on_node_flags(int (*threadfn)(void *data),
+						 void *data,
+						 int node,
+						 int flags,
+						 const char *namefmt, ...);
+
 void get_kthread_comm(char *buf, size_t buf_size, struct task_struct *tsk);
 bool set_kthread_struct(struct task_struct *p);
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 23617da0e565..405f8ec8a505 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -87,6 +87,7 @@ struct svc_serv {
 	unsigned int		sv_nrpools;	/* number of thread pools */
 	struct svc_pool *	sv_pools;	/* array of thread pools */
 	int			(*sv_threadfn)(void *data);
+	bool			sv_unshare_fs;	/* Does serv need umask? */
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct lwq		sv_cb_list;	/* queue for callback requests
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c5e40830c1f2..e97cbab40034 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -42,6 +42,7 @@ struct kthread_create_info
 	int (*threadfn)(void *data);
 	void *data;
 	int node;
+	int clone_flags;
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
@@ -409,7 +410,7 @@ static void create_kthread(struct kthread_create_info *create)
 #endif
 	/* We want our own signal handler (we take no signals by default). */
 	pid = kernel_thread(kthread, create, create->full_name,
-			    CLONE_FS | CLONE_FILES | SIGCHLD);
+			    create->clone_flags | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
 		/* Release the structure when caller killed by a fatal signal. */
 		struct completion *done = xchg(&create->done, NULL);
@@ -424,11 +425,12 @@ static void create_kthread(struct kthread_create_info *create)
 	}
 }
 
-static __printf(4, 0)
-struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
-						    void *data, int node,
-						    const char namefmt[],
-						    va_list args)
+static __printf(5, 0)
+struct task_struct *__kthread_create_on_node_flags(int (*threadfn)(void *data),
+						   void *data,
+						   int node, int clone_flags,
+						   const char namefmt[],
+						   va_list args)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct task_struct *task;
@@ -440,6 +442,7 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
 	create->threadfn = threadfn;
 	create->data = data;
 	create->node = node;
+	create->clone_flags = clone_flags;
 	create->done = &done;
 	create->full_name = kvasprintf(GFP_KERNEL, namefmt, args);
 	if (!create->full_name) {
@@ -500,21 +503,23 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
  *
  * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
  */
-struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
-					   void *data, int node,
-					   const char namefmt[],
-					   ...)
+struct task_struct *kthread_create_on_node_flags(int (*threadfn)(void *data),
+						 void *data, int node,
+						 int clone_flags,
+						 const char namefmt[],
+						 ...)
 {
 	struct task_struct *task;
 	va_list args;
 
 	va_start(args, namefmt);
-	task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
+	task = __kthread_create_on_node_flags(threadfn, data, node, clone_flags,
+					      namefmt, args);
 	va_end(args);
 
 	return task;
 }
-EXPORT_SYMBOL(kthread_create_on_node);
+EXPORT_SYMBOL(kthread_create_on_node_flags);
 
 static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
 {
@@ -870,8 +875,8 @@ __kthread_create_worker(int cpu, unsigned int flags,
 	if (cpu >= 0)
 		node = cpu_to_node(cpu);
 
-	task = __kthread_create_on_node(kthread_worker_fn, worker,
-						node, namefmt, args);
+	task = __kthread_create_on_node_flags(kthread_worker_fn, worker,
+					      node, CLONE_FS, namefmt, args);
 	if (IS_ERR(task))
 		goto fail_task;
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b4b1276d4e8..a3c94778b547 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -781,8 +781,10 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		rqstp = svc_prepare_thread(serv, chosen_pool, node);
 		if (IS_ERR(rqstp))
 			return PTR_ERR(rqstp);
-		task = kthread_create_on_node(serv->sv_threadfn, rqstp,
-					      node, "%s", serv->sv_name);
+		task = kthread_create_on_node_flags(serv->sv_threadfn, rqstp,
+						    node,
+						    serv->sv_unshare_fs ? 0 : CLONE_FS,
+						    "%s", serv->sv_name);
 		if (IS_ERR(task)) {
 			svc_exit_thread(rqstp);
 			return PTR_ERR(task);





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux