The current code assumes offline and busy volume states apply to all instances of a volume, not just the one on the server that returned VOFFLINE or VBUSY. Fix that by moving the flags recording this to the afs_server_entry struct that is used to represent a particular instance of a volume on a specific server. Further, add a sleep for when we have iterated through all the servers so that we don't keep poking the server every few milliseconds. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Marc Dionne <marc.dionne@xxxxxxxxxxxx> cc: linux-afs@xxxxxxxxxxxxxxxxxxx --- fs/afs/internal.h | 7 ++++--- fs/afs/rotate.c | 40 ++++++++++++++++++++++++-------------- include/trace/events/afs.h | 1 + 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d1031241d11b..eb59b0487f8b 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -603,6 +603,9 @@ struct afs_server_entry { struct afs_volume *volume; struct list_head slink; /* Link in server->volumes */ time64_t cb_expires_at; /* Time at which volume-level callback expires */ + unsigned long flags; +#define AFS_SE_VOLUME_OFFLINE 0 /* Set if volume offline notice given */ +#define AFS_SE_VOLUME_BUSY 1 /* Set if volume busy notice given */ }; struct afs_server_list { @@ -636,9 +639,7 @@ struct afs_volume { #define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ #define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */ #define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ -#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ -#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ -#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */ +#define AFS_VOLUME_MAYBE_NO_IBULK 4 /* - T if some servers don't have InlineBulkStatus */ #ifdef CONFIG_AFS_FSCACHE struct fscache_volume *cache; /* Caching cookie */ #endif diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 3f656dcb0adf..cb0ab1c2c401 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -111,7 +111,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op, /* * Post volume busy note. */ -static void afs_busy(struct afs_volume *volume, u32 abort_code) +static void afs_busy(struct afs_operation *op, u32 abort_code) { const char *m; @@ -122,7 +122,8 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) default: m = "busy"; break; } - pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m); + pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n", + op->volume->vid, op->volume->name, &op->server->uuid, m); } /* @@ -130,6 +131,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) */ static bool afs_sleep_and_retry(struct afs_operation *op) { + trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0); if (!(op->flags & AFS_OPERATION_UNINTR)) { msleep_interruptible(1000); if (signal_pending(current)) { @@ -180,6 +182,10 @@ bool afs_select_fileserver(struct afs_operation *op) /* Evaluate the result of the previous operation, if there was one. */ switch (op->call_error) { case 0: + clear_bit(AFS_SE_VOLUME_OFFLINE, + &op->server_list->servers[op->server_index].flags); + clear_bit(AFS_SE_VOLUME_BUSY, + &op->server_list->servers[op->server_index].flags); op->cumul_error.responded = true; fallthrough; default: @@ -297,18 +303,16 @@ bool afs_select_fileserver(struct afs_operation *op) * expected to come back but it might take a long time (could be * days). */ - if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) { - afs_busy(op->volume, abort_code); - clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); + if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE, + &op->server_list->servers[op->server_index].flags)) { + afs_busy(op, abort_code); + clear_bit(AFS_SE_VOLUME_BUSY, + &op->server_list->servers[op->server_index].flags); } if (op->flags & AFS_OPERATION_NO_VSLEEP) { afs_op_set_error(op, -EADV); goto failed; } - if (op->flags & AFS_OPERATION_CUR_ONLY) { - afs_op_set_error(op, -ESTALE); - goto failed; - } goto busy; case VRESTARTING: /* The fileserver is either shutting down or starting up. */ @@ -329,9 +333,11 @@ bool afs_select_fileserver(struct afs_operation *op) afs_op_set_error(op, -EBUSY); goto failed; } - if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) { - afs_busy(op->volume, abort_code); - clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); + if (!test_and_set_bit(AFS_SE_VOLUME_BUSY, + &op->server_list->servers[op->server_index].flags)) { + afs_busy(op, abort_code); + clear_bit(AFS_SE_VOLUME_OFFLINE, + &op->server_list->servers[op->server_index].flags); } busy: if (op->flags & AFS_OPERATION_CUR_ONLY) { @@ -411,8 +417,10 @@ bool afs_select_fileserver(struct afs_operation *op) default: afs_op_accumulate_error(op, error, abort_code); failed_but_online: - clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); - clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); + clear_bit(AFS_SE_VOLUME_OFFLINE, + &op->server_list->servers[op->server_index].flags); + clear_bit(AFS_SE_VOLUME_BUSY, + &op->server_list->servers[op->server_index].flags); goto failed; } @@ -640,8 +648,10 @@ bool afs_select_fileserver(struct afs_operation *op) * of them were busy. */ trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0); - if (op->flags & AFS_OPERATION_VBUSY) + if (op->flags & AFS_OPERATION_VBUSY) { + afs_sleep_and_retry(op); goto restart_from_beginning; + } rcu_read_lock(); for (i = 0; i < op->server_list->nr_servers; i++) { diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 804d9e147314..ac50fa687429 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -446,6 +446,7 @@ enum yfs_cm_operation { #define afs_rotate_traces \ EM(afs_rotate_trace_aborted, "Abortd") \ + EM(afs_rotate_trace_busy_sleep, "BsySlp") \ EM(afs_rotate_trace_check_vol_status, "VolStt") \ EM(afs_rotate_trace_failed, "Failed") \ EM(afs_rotate_trace_iter, "Iter ") \