Re: [PATCH] fuse: Abort connection if FUSE server get stuck

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 12/10/24 18:16, etmartin4313@xxxxxxxxx wrote:
> From: Etienne Martineau <etmartin4313@xxxxxxxxx>
> 
> This patch abort connection if HUNG_TASK_PANIC is set and a FUSE server
> is getting stuck for too long.
> 
> Without this patch, an unresponsive / buggy / malicious FUSE server can
> leave the clients in D state for a long period of time and on system where
> HUNG_TASK_PANIC is set, trigger a catastrophic reload.
> 
> So, if HUNG_TASK_PANIC checking is enabled, we should wake up periodically
> to abort connections that exceed the timeout value which is define to be
> half the HUNG_TASK_TIMEOUT period, which keeps overhead low.
> 
> This patch introduce a list of request waiting for answer that is time
> sorted to minimize the overhead.
> 
> When HUNG_TASK_PANIC is enable there is a timeout check per connection
> that is running at low frequency only if there are active FUSE request
> pending.
> 
> A FUSE client can get into D state as such ( see below Scenario #1 / #2 )
>  1) request_wait_answer() -> wait_event() is UNINTERRUPTIBLE
>     OR
>  2) request_wait_answer() -> wait_event_(interruptible / killable) is head
>     of line blocking for subsequent clients accessing the same file
> 
> 	Scenario #1:
> 	2716 pts/2    D+     0:00 cat
> 	$ cat /proc/2716/stack
> 		[<0>] request_wait_answer+0x22e/0x340
> 		[<0>] __fuse_simple_request+0xd8/0x2c0
> 		[<0>] fuse_perform_write+0x3ec/0x760
> 		[<0>] fuse_file_write_iter+0x3d5/0x3f0
> 		[<0>] vfs_write+0x313/0x430
> 		[<0>] ksys_write+0x6a/0xf0
> 		[<0>] __x64_sys_write+0x19/0x30
> 		[<0>] x64_sys_call+0x18ab/0x26f0
> 		[<0>] do_syscall_64+0x7c/0x170
> 		[<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> 	Scenario #2:
> 	2962 pts/2    S+     0:00 cat
> 	2963 pts/3    D+     0:00 cat
> 	$ cat /proc/2962/stack
> 		[<0>] request_wait_answer+0x140/0x340
> 		[<0>] __fuse_simple_request+0xd8/0x2c0
> 		[<0>] fuse_perform_write+0x3ec/0x760
> 		[<0>] fuse_file_write_iter+0x3d5/0x3f0
> 		[<0>] vfs_write+0x313/0x430
> 		[<0>] ksys_write+0x6a/0xf0
> 		[<0>] __x64_sys_write+0x19/0x30
> 		[<0>] x64_sys_call+0x18ab/0x26f0
> 		[<0>] do_syscall_64+0x7c/0x170
> 		[<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 	$ cat /proc/2963/stack
> 		[<0>] fuse_file_write_iter+0x252/0x3f0
> 		[<0>] vfs_write+0x313/0x430
> 		[<0>] ksys_write+0x6a/0xf0
> 		[<0>] __x64_sys_write+0x19/0x30
> 		[<0>] x64_sys_call+0x18ab/0x26f0
> 		[<0>] do_syscall_64+0x7c/0x170
> 		[<0>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Please note that this patch doesn't prevent the HUNG_TASK_WARNING.
> 
> Signed-off-by: Etienne Martineau <etmartin4313@xxxxxxxxx>
> ---
>  fs/fuse/dev.c                | 56 ++++++++++++++++++++++++++++++++++--
>  fs/fuse/fuse_i.h             | 14 +++++++++
>  fs/fuse/inode.c              |  4 +++
>  include/linux/sched/sysctl.h |  8 ++++--
>  kernel/hung_task.c           |  3 +-
>  5 files changed, 79 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 27ccae63495d..294b6ad8a90f 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -21,6 +21,8 @@
>  #include <linux/swap.h>
>  #include <linux/splice.h>
>  #include <linux/sched.h>
> +#include <linux/completion.h>
> +#include <linux/sched/sysctl.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include "fuse_trace.h"
> @@ -418,18 +420,56 @@ static int queue_interrupt(struct fuse_req *req)
>  	return 0;
>  }
>  
> +/*
> + * Prevent hung task timer from firing at us
> + * Periodically poll the request waiting list on a per-connection basis
> + * and abort if the oldest request exceed the timeout. The oldest request
> + * is the first element on the list by definition
> + */
> +void fuse_wait_answer_timeout(struct work_struct *wk)
> +{
> +	unsigned long hang_check_timer = sysctl_hung_task_timeout_secs * (HZ / 2);
> +	struct fuse_conn *fc = container_of(wk, struct fuse_conn, work.work);
> +	struct fuse_req *req;
> +
> +	spin_lock(&fc->lock);
> +	req = list_first_entry_or_null(&fc->req_waiting, struct fuse_req, timeout_list);
> +	if (req && time_after(jiffies, req->wait_start + hang_check_timer)) {
> +		spin_unlock(&fc->lock);
> +		fuse_abort_conn(fc);
> +		return;
> +	}
> +
> +	/* Keep the ball rolling but don't re-arm when only one req is pending */
> +	if (atomic_read(&fc->num_waiting) != 1)
> +		queue_delayed_work(system_wq, &fc->work, hang_check_timer);
> +	spin_unlock(&fc->lock);
> +}
> +
>  static void request_wait_answer(struct fuse_req *req)
>  {
> +	unsigned long hang_check_timer = sysctl_hung_task_timeout_secs * (HZ / 2);
> +	unsigned int hang_check = sysctl_hung_task_panic;
>  	struct fuse_conn *fc = req->fm->fc;
>  	struct fuse_iqueue *fiq = &fc->iq;
>  	int err;
>  
> +	if (hang_check) {
> +		spin_lock(&fc->lock);
> +		/* Get the ball rolling if we are the first request */
> +		if (atomic_read(&fc->num_waiting) == 1)
> +			queue_delayed_work(system_wq, &fc->work, hang_check_timer);
> +		req->wait_start = jiffies;
> +		list_add_tail(&req->timeout_list, &fc->req_waiting);
> +		spin_unlock(&fc->lock);
> +	}
> +
>  	if (!fc->no_interrupt) {
>  		/* Any signal may interrupt this */
>  		err = wait_event_interruptible(req->waitq,
>  					test_bit(FR_FINISHED, &req->flags));
>  		if (!err)
> -			return;
> +			goto out;
>  
>  		set_bit(FR_INTERRUPTED, &req->flags);
>  		/* matches barrier in fuse_dev_do_read() */
> @@ -443,7 +483,7 @@ static void request_wait_answer(struct fuse_req *req)
>  		err = wait_event_killable(req->waitq,
>  					test_bit(FR_FINISHED, &req->flags));
>  		if (!err)
> -			return;
> +			goto out;
>  
>  		spin_lock(&fiq->lock);
>  		/* Request is not yet in userspace, bail out */
> @@ -452,7 +492,7 @@ static void request_wait_answer(struct fuse_req *req)
>  			spin_unlock(&fiq->lock);
>  			__fuse_put_request(req);
>  			req->out.h.error = -EINTR;
> -			return;
> +			goto out;
>  		}
>  		spin_unlock(&fiq->lock);
>  	}
> @@ -462,6 +502,16 @@ static void request_wait_answer(struct fuse_req *req)
>  	 * Wait it out.
>  	 */
>  	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
> +
> +out:
> +	if (hang_check) {
> +		spin_lock(&fc->lock);
> +		/* Stop the timeout check if we are the last request */
> +		if (atomic_read(&fc->num_waiting) == 1)
> +			cancel_delayed_work_sync(&fc->work);
> +		list_del(&req->timeout_list);
> +		spin_unlock(&fc->lock);
> +	}
>  }
>  
>  static void __fuse_request_send(struct fuse_req *req)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 74744c6f2860..7cbfbd8e4e54 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -438,6 +438,12 @@ struct fuse_req {
>  
>  	/** fuse_mount this request belongs to */
>  	struct fuse_mount *fm;
> +
> +	/** Entry on req_waiting list */
> +	struct list_head timeout_list;
> +
> +	/** Wait start time in jiffies */
> +	unsigned long wait_start;
>  };
>  
>  struct fuse_iqueue;
> @@ -923,6 +929,12 @@ struct fuse_conn {
>  	/** IDR for backing files ids */
>  	struct idr backing_files_map;
>  #endif
> +
> +	/** Request wait timeout */
> +	struct delayed_work work;
> +
> +	/** List of request waiting for answer */
> +	struct list_head req_waiting;
>  };
>  
>  /*
> @@ -1190,6 +1202,8 @@ void fuse_request_end(struct fuse_req *req);
>  /* Abort all requests */
>  void fuse_abort_conn(struct fuse_conn *fc);
>  void fuse_wait_aborted(struct fuse_conn *fc);
> +/* Connection timeout */
> +void fuse_wait_answer_timeout(struct work_struct *wk);
>  
>  /**
>   * Invalidate inode attributes
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 3ce4f4e81d09..ce78c2b5ad8c 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -23,6 +23,7 @@
>  #include <linux/exportfs.h>
>  #include <linux/posix_acl.h>
>  #include <linux/pid_namespace.h>
> +#include <linux/completion.h>
>  #include <uapi/linux/magic.h>
>  
>  MODULE_AUTHOR("Miklos Szeredi <miklos@xxxxxxxxxx>");
> @@ -964,6 +965,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
>  	INIT_LIST_HEAD(&fc->entry);
>  	INIT_LIST_HEAD(&fc->devices);
>  	atomic_set(&fc->num_waiting, 0);
> +	INIT_DELAYED_WORK(&fc->work, fuse_wait_answer_timeout);
> +	INIT_LIST_HEAD(&fc->req_waiting);


https://www.spinics.net/lists//linux-fsdevel/msg283639.html


Thanks,
Bernd


PS: In my fuse-io-uring series global lists are eliminated.




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux