This patch needs the attached adjustment folding in. David --- commit 2c0cccc7b29a051fadb6816d31f526e4dd45ddf5 Author: David Howells <dhowells@xxxxxxxxxx> Date: Thu Nov 7 22:22:49 2024 +0000 netfs: Fix folio abandonment The mechanism to handle the abandonment of a folio being read due to an error occurring in a subrequest (such as if a signal occurs) will correctly abandon folios if they're entirely processed in one go; but if the constituent subrequests aren't processed in the same scheduled work item, the note that the first one failed will get lost if no folios are processed (the ABANDON_SREQ note isn't transferred to the NETFS_RREQ_FOLIO_ABANDON flag unless we process a folio). Fix this by simplifying the way the mechanism works. Replace the flag with a variable that records the file position to which we should abandon folios. Any folio that overlaps this region at the time of unlocking must be abandoned (and reread). This works because subrequests are processed in order of file position and each folio is processed as soon as enough subrequest transference is available to cover it - so when the abandonment point is moved, it covers only folios that draw data from the dodgy region. Also make sure that NETFS_SREQ_FAILED is set on failure and that stream->failed is set when we cut the stream short. Signed-off: David Howells <dhowells@xxxxxxxxxx> cc: Jeff Layton <jlayton@xxxxxxxxxx> cc: netfs@xxxxxxxxxxxxxxx cc: linux-fsdevel@xxxxxxxxxxxxxxx diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 73f51039c2fe..7f3a3c056c6e 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -46,7 +46,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, struct netfs_folio *finfo; struct folio *folio = folioq_folio(folioq, slot); - if (unlikely(test_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags))) { + if (unlikely(folio_pos(folio) < rreq->abandon_to)) { trace_netfs_folio(folio, netfs_folio_trace_abandon); goto just_unlock; } @@ -126,8 +126,6 @@ static void netfs_read_unlock_folios(struct netfs_io_request *rreq, if (*notes & COPY_TO_CACHE) set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); - if (*notes & ABANDON_SREQ) - set_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags); folio = folioq_folio(folioq, slot); if (WARN_ONCE(!folio_test_locked(folio), @@ -152,7 +150,6 @@ static void netfs_read_unlock_folios(struct netfs_io_request *rreq, *notes |= MADE_PROGRESS; clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); - clear_bit(NETFS_RREQ_FOLIO_ABANDON, &rreq->flags); /* Clean up the head folioq. If we clear an entire folioq, then * we can get rid of it provided it's not also the tail folioq @@ -251,6 +248,12 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) notes |= COPY_TO_CACHE; + if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { + rreq->abandon_to = front->start + front->len; + front->transferred = front->len; + transferred = front->len; + trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); + } if (front->start + transferred >= rreq->cleaned_to + fsize || test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) netfs_read_unlock_folios(rreq, ¬es); @@ -268,6 +271,7 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) stream->error = front->error; rreq->error = front->error; set_bit(NETFS_RREQ_FAILED, &rreq->flags); + stream->failed = true; } notes |= MADE_PROGRESS | ABANDON_SREQ; } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { @@ -566,6 +570,7 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); } else { netfs_stat(&netfs_n_rh_download_failed); + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); } trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); set_bit(NETFS_RREQ_PAUSE, &rreq->flags); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c00cffa1da13..4af7208e1360 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -260,6 +260,7 @@ struct netfs_io_request { atomic64_t issued_to; /* Write issuer folio cursor */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ + unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ unsigned char front_folio_order; /* Order (size) of front folio */ refcount_t ref; @@ -271,7 +272,6 @@ struct netfs_io_request { #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_FOLIO_ABANDON 7 /* Abandon failed folio from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index cf14545ca2bd..22eb77b1f5e6 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -56,6 +56,7 @@ EM(netfs_rreq_trace_free, "FREE ") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ + EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \