Re: [PATCH RESEND v9 2/3] fuse: add optional kernel-enforced timeout for requests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2024-11-14 at 11:13 -0800, Joanne Koong wrote:
> There are situations where fuse servers can become unresponsive or
> stuck, for example if the server is deadlocked. Currently, there's no
> good way to detect if a server is stuck and needs to be killed manually.
> 
> This commit adds an option for enforcing a timeout (in minutes) for
> requests where if the timeout elapses without the server responding to
> the request, the connection will be automatically aborted.
> 

I haven't been keeping up with the earlier series, but I think I agree
with Sergey that this timeout would be better expressed in seconds.

Most filesystems that deal with timeouts (NFS, CIFS, etc.) specify them
as a number of seconds, and expressing this in minutes goes against
that convention. It also seems rather coarse-grained. I could easily
see a situation where 5 minutes is too short, but 6 minutes is too
long.

With that too, you probably wouldn't need patch #1. You could treat it
as a 32-bit integer and just clamp the value as necessary.

> Please note that these timeouts are not 100% precise. The request may
> take an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the requested max
> timeout due to how it's internally implemented.
> 
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> Reviewed-by: Bernd Schubert <bschubert@xxxxxxx>
> ---
>  fs/fuse/dev.c    | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/fuse_i.h | 21 +++++++++++++
>  fs/fuse/inode.c  | 21 +++++++++++++
>  3 files changed, 122 insertions(+)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 29fc61a072ba..536aa4525e8f 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -45,6 +45,82 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
>  	return READ_ONCE(file->private_data);
>  }
>  
> +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req)
> +{
> +	return jiffies > req->create_time + fc->timeout.req_timeout;
> +}
> +
> +/*
> + * Check if any requests aren't being completed by the specified request
> + * timeout. To do so, we:
> + * - check the fiq pending list
> + * - check the bg queue
> + * - check the fpq io and processing lists
> + *
> + * To make this fast, we only check against the head request on each list since
> + * these are generally queued in order of creation time (eg newer requests get
> + * queued to the tail). We might miss a few edge cases (eg requests transitioning
> + * between lists, re-sent requests at the head of the pending list having a
> + * later creation time than other requests on that list, etc.) but that is fine
> + * since if the request never gets fulfilled, it will eventually be caught.
> + */
> +void fuse_check_timeout(struct timer_list *timer)
> +{
> +	struct fuse_conn *fc = container_of(timer, struct fuse_conn, timeout.timer);
> +	struct fuse_iqueue *fiq = &fc->iq;
> +	struct fuse_req *req;
> +	struct fuse_dev *fud;
> +	struct fuse_pqueue *fpq;
> +	bool expired = false;
> +	int i;
> +
> +	spin_lock(&fiq->lock);
> +	req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list);
> +	if (req)
> +		expired = request_expired(fc, req);
> +	spin_unlock(&fiq->lock);
> +	if (expired)
> +		goto abort_conn;
> +
> +	spin_lock(&fc->bg_lock);
> +	req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list);
> +	if (req)
> +		expired = request_expired(fc, req);
> +	spin_unlock(&fc->bg_lock);
> +	if (expired)
> +		goto abort_conn;
> +
> +	spin_lock(&fc->lock);
> +	if (!fc->connected) {
> +		spin_unlock(&fc->lock);
> +		return;
> +	}
> +	list_for_each_entry(fud, &fc->devices, entry) {
> +		fpq = &fud->pq;
> +		spin_lock(&fpq->lock);
> +		req = list_first_entry_or_null(&fpq->io, struct fuse_req, list);
> +		if (req && request_expired(fc, req))
> +			goto fpq_abort;
> +
> +		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) {
> +			req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list);
> +			if (req && request_expired(fc, req))
> +				goto fpq_abort;
> +		}
> +		spin_unlock(&fpq->lock);
> +	}
> +	spin_unlock(&fc->lock);
> +
> +	mod_timer(&fc->timeout.timer, jiffies + FUSE_TIMEOUT_TIMER_FREQ);
> +	return;
> +
> +fpq_abort:
> +	spin_unlock(&fpq->lock);
> +	spin_unlock(&fc->lock);
> +abort_conn:
> +	fuse_abort_conn(fc);
> +}
> +
>  static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>  {
>  	INIT_LIST_HEAD(&req->list);
> @@ -53,6 +129,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>  	refcount_set(&req->count, 1);
>  	__set_bit(FR_PENDING, &req->flags);
>  	req->fm = fm;
> +	req->create_time = jiffies;
>  }
>  
>  static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
> @@ -2308,6 +2385,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
>  		spin_unlock(&fc->lock);
>  
>  		end_requests(&to_end);
> +
> +		if (fc->timeout.req_timeout)
> +			timer_delete(&fc->timeout.timer);
>  	} else {
>  		spin_unlock(&fc->lock);
>  	}
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index d35c37ccf9b5..9092201c4e0b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -438,6 +438,9 @@ struct fuse_req {
>  
>  	/** fuse_mount this request belongs to */
>  	struct fuse_mount *fm;
> +
> +	/** When (in jiffies) the request was created */
> +	unsigned long create_time;
>  };
>  
>  struct fuse_iqueue;
> @@ -528,6 +531,16 @@ struct fuse_pqueue {
>  	struct list_head io;
>  };
>  
> +/* Frequency (in seconds) of request timeout checks, if opted into */
> +#define FUSE_TIMEOUT_TIMER_FREQ 60 * HZ
> +
> +struct fuse_timeout {
> +	struct timer_list timer;
> +
> +	/* Request timeout (in jiffies). 0 = no timeout */
> +	unsigned long req_timeout;
> +};
> +
>  /**
>   * Fuse device instance
>   */
> @@ -574,6 +587,8 @@ struct fuse_fs_context {
>  	enum fuse_dax_mode dax_mode;
>  	unsigned int max_read;
>  	unsigned int blksize;
> +	/*  Request timeout (in minutes). 0 = no timeout (infinite wait) */
> +	unsigned int req_timeout;
>  	const char *subtype;
>  
>  	/* DAX device, may be NULL */
> @@ -920,6 +935,9 @@ struct fuse_conn {
>  	/** IDR for backing files ids */
>  	struct idr backing_files_map;
>  #endif
> +
> +	/** Only used if the connection enforces request timeouts */
> +	struct fuse_timeout timeout;
>  };
>  
>  /*
> @@ -1181,6 +1199,9 @@ void fuse_request_end(struct fuse_req *req);
>  void fuse_abort_conn(struct fuse_conn *fc);
>  void fuse_wait_aborted(struct fuse_conn *fc);
>  
> +/* Check if any requests timed out */
> +void fuse_check_timeout(struct timer_list *timer);
> +
>  /**
>   * Invalidate inode attributes
>   */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index f1779ff3f8d1..ee006f09cd04 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -735,6 +735,7 @@ enum {
>  	OPT_ALLOW_OTHER,
>  	OPT_MAX_READ,
>  	OPT_BLKSIZE,
> +	OPT_REQUEST_TIMEOUT,
>  	OPT_ERR
>  };
>  
> @@ -749,6 +750,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
>  	fsparam_u32	("max_read",		OPT_MAX_READ),
>  	fsparam_u32	("blksize",		OPT_BLKSIZE),
>  	fsparam_string	("subtype",		OPT_SUBTYPE),
> +	fsparam_u16	("request_timeout",	OPT_REQUEST_TIMEOUT),
>  	{}
>  };
>  
> @@ -844,6 +846,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
>  		ctx->blksize = result.uint_32;
>  		break;
>  
> +	case OPT_REQUEST_TIMEOUT:
> +		ctx->req_timeout = result.uint_16;
> +		break;
> +
>  	default:
>  		return -EINVAL;
>  	}
> @@ -973,6 +979,8 @@ void fuse_conn_put(struct fuse_conn *fc)
>  
>  		if (IS_ENABLED(CONFIG_FUSE_DAX))
>  			fuse_dax_conn_free(fc);
> +		if (fc->timeout.req_timeout)
> +			timer_shutdown_sync(&fc->timeout.timer);
>  		if (fiq->ops->release)
>  			fiq->ops->release(fiq);
>  		put_pid_ns(fc->pid_ns);
> @@ -1691,6 +1699,18 @@ int fuse_init_fs_context_submount(struct fs_context *fsc)
>  }
>  EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
>  
> +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx)
> +{
> +	if (ctx->req_timeout) {
> +		if (check_mul_overflow(ctx->req_timeout * 60, HZ, &fc->timeout.req_timeout))
> +			fc->timeout.req_timeout = ULONG_MAX;
> +		timer_setup(&fc->timeout.timer, fuse_check_timeout, 0);
> +		mod_timer(&fc->timeout.timer, jiffies + FUSE_TIMEOUT_TIMER_FREQ);
> +	} else {
> +		fc->timeout.req_timeout = 0;
> +	}
> +}
> +
>  int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  {
>  	struct fuse_dev *fud = NULL;
> @@ -1753,6 +1773,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  	fc->destroy = ctx->destroy;
>  	fc->no_control = ctx->no_control;
>  	fc->no_force_umount = ctx->no_force_umount;
> +	fuse_init_fc_timeout(fc, ctx);
>  
>  	err = -ENOMEM;
>  	root = fuse_get_root_inode(sb, ctx->rootmode);

-- 
Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux