On Thu, 2024-11-14 at 11:13 -0800, Joanne Koong wrote: > There are situations where fuse servers can become unresponsive or > stuck, for example if the server is deadlocked. Currently, there's no > good way to detect if a server is stuck and needs to be killed manually. > > This commit adds an option for enforcing a timeout (in minutes) for > requests where if the timeout elapses without the server responding to > the request, the connection will be automatically aborted. > I haven't been keeping up with the earlier series, but I think I agree with Sergey that this timeout would be better expressed in seconds. Most filesystems that deal with timeouts (NFS, CIFS, etc.) specify them as a number of seconds, and expressing this in minutes goes against that convention. It also seems rather coarse-grained. I could easily see a situation where 5 minutes is too short, but 6 minutes is too long. With that too, you probably wouldn't need patch #1. You could treat it as a 32-bit integer and just clamp the value as necessary. > Please note that these timeouts are not 100% precise. The request may > take an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the requested max > timeout due to how it's internally implemented. > > Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx> > Reviewed-by: Bernd Schubert <bschubert@xxxxxxx> > --- > fs/fuse/dev.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/fuse_i.h | 21 +++++++++++++ > fs/fuse/inode.c | 21 +++++++++++++ > 3 files changed, 122 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 29fc61a072ba..536aa4525e8f 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -45,6 +45,82 @@ static struct fuse_dev *fuse_get_dev(struct file *file) > return READ_ONCE(file->private_data); > } > > +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) > +{ > + return jiffies > req->create_time + fc->timeout.req_timeout; > +} > + > +/* > + * Check if any requests aren't being completed by the specified request > + * timeout. To do so, we: > + * - check the fiq pending list > + * - check the bg queue > + * - check the fpq io and processing lists > + * > + * To make this fast, we only check against the head request on each list since > + * these are generally queued in order of creation time (eg newer requests get > + * queued to the tail). We might miss a few edge cases (eg requests transitioning > + * between lists, re-sent requests at the head of the pending list having a > + * later creation time than other requests on that list, etc.) but that is fine > + * since if the request never gets fulfilled, it will eventually be caught. > + */ > +void fuse_check_timeout(struct timer_list *timer) > +{ > + struct fuse_conn *fc = container_of(timer, struct fuse_conn, timeout.timer); > + struct fuse_iqueue *fiq = &fc->iq; > + struct fuse_req *req; > + struct fuse_dev *fud; > + struct fuse_pqueue *fpq; > + bool expired = false; > + int i; > + > + spin_lock(&fiq->lock); > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fiq->lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->bg_lock); > + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fc->bg_lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->lock); > + if (!fc->connected) { > + spin_unlock(&fc->lock); > + return; > + } > + list_for_each_entry(fud, &fc->devices, entry) { > + fpq = &fud->pq; > + spin_lock(&fpq->lock); > + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + > + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { > + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + } > + spin_unlock(&fpq->lock); > + } > + spin_unlock(&fc->lock); > + > + mod_timer(&fc->timeout.timer, jiffies + FUSE_TIMEOUT_TIMER_FREQ); > + return; > + > +fpq_abort: > + spin_unlock(&fpq->lock); > + spin_unlock(&fc->lock); > +abort_conn: > + fuse_abort_conn(fc); > +} > + > static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) > { > INIT_LIST_HEAD(&req->list); > @@ -53,6 +129,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) > refcount_set(&req->count, 1); > __set_bit(FR_PENDING, &req->flags); > req->fm = fm; > + req->create_time = jiffies; > } > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > @@ -2308,6 +2385,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > spin_unlock(&fc->lock); > > end_requests(&to_end); > + > + if (fc->timeout.req_timeout) > + timer_delete(&fc->timeout.timer); > } else { > spin_unlock(&fc->lock); > } > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index d35c37ccf9b5..9092201c4e0b 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -438,6 +438,9 @@ struct fuse_req { > > /** fuse_mount this request belongs to */ > struct fuse_mount *fm; > + > + /** When (in jiffies) the request was created */ > + unsigned long create_time; > }; > > struct fuse_iqueue; > @@ -528,6 +531,16 @@ struct fuse_pqueue { > struct list_head io; > }; > > +/* Frequency (in seconds) of request timeout checks, if opted into */ > +#define FUSE_TIMEOUT_TIMER_FREQ 60 * HZ > + > +struct fuse_timeout { > + struct timer_list timer; > + > + /* Request timeout (in jiffies). 0 = no timeout */ > + unsigned long req_timeout; > +}; > + > /** > * Fuse device instance > */ > @@ -574,6 +587,8 @@ struct fuse_fs_context { > enum fuse_dax_mode dax_mode; > unsigned int max_read; > unsigned int blksize; > + /* Request timeout (in minutes). 0 = no timeout (infinite wait) */ > + unsigned int req_timeout; > const char *subtype; > > /* DAX device, may be NULL */ > @@ -920,6 +935,9 @@ struct fuse_conn { > /** IDR for backing files ids */ > struct idr backing_files_map; > #endif > + > + /** Only used if the connection enforces request timeouts */ > + struct fuse_timeout timeout; > }; > > /* > @@ -1181,6 +1199,9 @@ void fuse_request_end(struct fuse_req *req); > void fuse_abort_conn(struct fuse_conn *fc); > void fuse_wait_aborted(struct fuse_conn *fc); > > +/* Check if any requests timed out */ > +void fuse_check_timeout(struct timer_list *timer); > + > /** > * Invalidate inode attributes > */ > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index f1779ff3f8d1..ee006f09cd04 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -735,6 +735,7 @@ enum { > OPT_ALLOW_OTHER, > OPT_MAX_READ, > OPT_BLKSIZE, > + OPT_REQUEST_TIMEOUT, > OPT_ERR > }; > > @@ -749,6 +750,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { > fsparam_u32 ("max_read", OPT_MAX_READ), > fsparam_u32 ("blksize", OPT_BLKSIZE), > fsparam_string ("subtype", OPT_SUBTYPE), > + fsparam_u16 ("request_timeout", OPT_REQUEST_TIMEOUT), > {} > }; > > @@ -844,6 +846,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) > ctx->blksize = result.uint_32; > break; > > + case OPT_REQUEST_TIMEOUT: > + ctx->req_timeout = result.uint_16; > + break; > + > default: > return -EINVAL; > } > @@ -973,6 +979,8 @@ void fuse_conn_put(struct fuse_conn *fc) > > if (IS_ENABLED(CONFIG_FUSE_DAX)) > fuse_dax_conn_free(fc); > + if (fc->timeout.req_timeout) > + timer_shutdown_sync(&fc->timeout.timer); > if (fiq->ops->release) > fiq->ops->release(fiq); > put_pid_ns(fc->pid_ns); > @@ -1691,6 +1699,18 @@ int fuse_init_fs_context_submount(struct fs_context *fsc) > } > EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); > > +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx) > +{ > + if (ctx->req_timeout) { > + if (check_mul_overflow(ctx->req_timeout * 60, HZ, &fc->timeout.req_timeout)) > + fc->timeout.req_timeout = ULONG_MAX; > + timer_setup(&fc->timeout.timer, fuse_check_timeout, 0); > + mod_timer(&fc->timeout.timer, jiffies + FUSE_TIMEOUT_TIMER_FREQ); > + } else { > + fc->timeout.req_timeout = 0; > + } > +} > + > int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) > { > struct fuse_dev *fud = NULL; > @@ -1753,6 +1773,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) > fc->destroy = ctx->destroy; > fc->no_control = ctx->no_control; > fc->no_force_umount = ctx->no_force_umount; > + fuse_init_fc_timeout(fc, ctx); > > err = -ENOMEM; > root = fuse_get_root_inode(sb, ctx->rootmode); -- Jeff Layton <jlayton@xxxxxxxxxx>