Signed-off-by: Etienne Martineau <etmartin4313@xxxxxxxxx> --- fs/fuse/dev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 13 +++++++++ fs/fuse/inode.c | 7 +++++ 3 files changed, 88 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 27ccae63495d..351787363444 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -21,6 +21,8 @@ #include <linux/swap.h> #include <linux/splice.h> #include <linux/sched.h> +#include <linux/completion.h> +#include <linux/sched/sysctl.h> #define CREATE_TRACE_POINTS #include "fuse_trace.h" @@ -307,11 +309,75 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { }; EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); +static void fuse_timeout_checkpoint_in(struct fuse_req *req) +{ + int idx; + struct fuse_conn *fc = req->fm->fc; + + if (!sysctl_hung_task_timeout_secs) + return; + req->checkpoint_time = jiffies; + idx = (req->checkpoint_time >> FUSE_TIMEOUT_JIFFIES) % FUSE_TIMEOUT_DISTRIBUTION; + atomic_inc(&fc->timeout_distribution[idx]); +} + +static void fuse_timeout_checkpoint_out(struct fuse_req *req) +{ + int idx; + struct fuse_conn *fc = req->fm->fc; + + if (!sysctl_hung_task_timeout_secs) + return; + idx = (req->checkpoint_time >> FUSE_TIMEOUT_JIFFIES) % FUSE_TIMEOUT_DISTRIBUTION; + atomic_dec(&fc->timeout_distribution[idx]); +} + +/* + * The fuse timeout logic maintain a time distribution sliding window + * made of FUSE_TIMEOUT_DISTRIBUTION entries where each entries span over a + * number of jiffies defined by FUSE_TIMEOUT_JIFFIES in base 2. + * Just before sending the request to the server, we increment the coresponding + * distribution entry and once the request is ack back, we decrement that same + * entry because we remember req->checkpoint_time. + * Now, a timeout can be detected by simply looking at the old entries in the + * distribution and see if there is something hanging past a certain point. + */ +void fuse_check_timeout(struct work_struct *wk) +{ + int idx, x; + struct timespec64 t; + struct fuse_conn *fc = container_of(wk, struct fuse_conn, work.work); + + if (!atomic_read(&fc->num_waiting)) + goto out; + + /* Current position in the distribution */ + idx = (jiffies >> FUSE_TIMEOUT_JIFFIES) % FUSE_TIMEOUT_DISTRIBUTION; + + /* Walk back and trigger abort when detecting old entries in the second half */ + for (x = 0; x < FUSE_TIMEOUT_DISTRIBUTION; x++, + idx == 0 ? idx = FUSE_TIMEOUT_DISTRIBUTION-1 : idx--) { + /* First half, keep going */ + if (x < FUSE_TIMEOUT_DISTRIBUTION>>1) + continue; + if (atomic_read(&fc->timeout_distribution[idx])) { + pr_info("fuse %u is stuck, aborting\n", fc->dev); + fuse_abort_conn(fc); + return; + } + } +out: + if (sysctl_hung_task_timeout_secs) + queue_delayed_work(system_wq, &fc->work, + sysctl_hung_task_timeout_secs * (HZ / 2)); +} + static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) { req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); + fuse_timeout_checkpoint_in(req); trace_fuse_request_send(req); fiq->ops->send_req(fiq, req); } @@ -359,6 +425,7 @@ void fuse_request_end(struct fuse_req *req) if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; + fuse_timeout_checkpoint_out(req); trace_fuse_request_end(req); /* * test_and_set_bit() implies smp_mb() between bit @@ -2260,6 +2327,7 @@ void fuse_abort_conn(struct fuse_conn *fc) LIST_HEAD(to_end); unsigned int i; + cancel_delayed_work(&fc->work); /* Background queuing checks fc->connected under bg_lock */ spin_lock(&fc->bg_lock); fc->connected = 0; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 74744c6f2860..243d482a057d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -44,6 +44,12 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 +/** Request timeout handling */ +#define FUSE_TIMEOUT_DISTRIBUTION_SHIFT 3 +#define FUSE_TIMEOUT_DISTRIBUTION (1L << FUSE_TIMEOUT_DISTRIBUTION_SHIFT) +#define FUSE_TIMEOUT_JIFFIES (order_base_2( \ + (sysctl_hung_task_timeout_secs * HZ) >> FUSE_TIMEOUT_DISTRIBUTION_SHIFT)) + /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; @@ -438,6 +444,8 @@ struct fuse_req { /** fuse_mount this request belongs to */ struct fuse_mount *fm; + + unsigned long checkpoint_time; }; struct fuse_iqueue; @@ -923,8 +931,12 @@ struct fuse_conn { /** IDR for backing files ids */ struct idr backing_files_map; #endif + + atomic_t timeout_distribution[FUSE_TIMEOUT_DISTRIBUTION]; + struct delayed_work work; }; + /* * Represents a mounted filesystem, potentially a submount. * @@ -1190,6 +1202,7 @@ void fuse_request_end(struct fuse_req *req); /* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); void fuse_wait_aborted(struct fuse_conn *fc); +void fuse_check_timeout(struct work_struct *wk); /** * Invalidate inode attributes diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3ce4f4e81d09..9f8fe17801c4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -23,6 +23,8 @@ #include <linux/exportfs.h> #include <linux/posix_acl.h> #include <linux/pid_namespace.h> +#include <linux/completion.h> +#include <linux/sched/sysctl.h> #include <uapi/linux/magic.h> MODULE_AUTHOR("Miklos Szeredi <miklos@xxxxxxxxxx>"); @@ -1002,6 +1004,7 @@ void fuse_conn_put(struct fuse_conn *fc) struct fuse_iqueue *fiq = &fc->iq; struct fuse_sync_bucket *bucket; + cancel_delayed_work_sync(&fc->work); if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); if (fiq->ops->release) @@ -1785,6 +1788,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + INIT_DELAYED_WORK(&fc->work, fuse_check_timeout); + if (sysctl_hung_task_timeout_secs) + queue_delayed_work(system_wq, &fc->work, + sysctl_hung_task_timeout_secs * (HZ / 2)); err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode); -- 2.34.1