Re: [PATCH] NFSv4: Fix a potential sleep while atomic in nfs4_do_reclaim()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 8/3/19 7:40 AM, Trond Myklebust wrote:
> John Hubbard reports seeing the following stack trace:
> 
> nfs4_do_reclaim
>    rcu_read_lock /* we are now in_atomic() and must not sleep */
>        nfs4_purge_state_owners
>            nfs4_free_state_owner
>                nfs4_destroy_seqid_counter
>                    rpc_destroy_wait_queue
>                        cancel_delayed_work_sync
>                            __cancel_work_timer
>                                __flush_work
>                                    start_flush_work
>                                        might_sleep:
>                                         (kernel/workqueue.c:2975: BUG)
> 
> The solution is to separate out the freeing of the state owners
> from nfs4_purge_state_owners(), and perform that outside the atomic
> context.
> 

All better now--this definitely fixes it. I can reboot the server, and
of course that backtrace is gone. Then the client mounts hang, so I do
a mount -a -o remount, and after about 1 minute, the client mounts
start working again, with no indication of problems. I assume that the
pause is by design--timing out somewhere, to recover from the server
going missing for a while. If so, then all is well.


thanks,
-- 
John Hubbard
NVIDIA

> Reported-by: John Hubbard <jhubbard@xxxxxxxxxx>
> Fixes: 0aaaf5c424c7f ("NFS: Cache state owners after files are closed")
> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> ---
>  fs/nfs/nfs4_fs.h    |  3 ++-
>  fs/nfs/nfs4client.c |  5 ++++-
>  fs/nfs/nfs4state.c  | 27 ++++++++++++++++++++++-----
>  3 files changed, 28 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index d778dad9a75e..3564da1ba8a1 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -465,7 +465,8 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session,
>  
>  extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t);
>  extern void nfs4_put_state_owner(struct nfs4_state_owner *);
> -extern void nfs4_purge_state_owners(struct nfs_server *);
> +extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *);
> +extern void nfs4_free_state_owners(struct list_head *head);
>  extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
>  extern void nfs4_put_open_state(struct nfs4_state *);
>  extern void nfs4_close_state(struct nfs4_state *, fmode_t);
> diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
> index 616393a01c06..da6204025a2d 100644
> --- a/fs/nfs/nfs4client.c
> +++ b/fs/nfs/nfs4client.c
> @@ -758,9 +758,12 @@ int nfs41_walk_client_list(struct nfs_client *new,
>  
>  static void nfs4_destroy_server(struct nfs_server *server)
>  {
> +	LIST_HEAD(freeme);
> +
>  	nfs_server_return_all_delegations(server);
>  	unset_pnfs_layoutdriver(server);
> -	nfs4_purge_state_owners(server);
> +	nfs4_purge_state_owners(server, &freeme);
> +	nfs4_free_state_owners(&freeme);
>  }
>  
>  /*
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index d03b9cf42bd0..a4e866b2b43b 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -624,24 +624,39 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
>  /**
>   * nfs4_purge_state_owners - Release all cached state owners
>   * @server: nfs_server with cached state owners to release
> + * @head: resulting list of state owners
>   *
>   * Called at umount time.  Remaining state owners will be on
>   * the LRU with ref count of zero.
> + * Note that the state owners are not freed, but are added
> + * to the list @head, which can later be used as an argument
> + * to nfs4_free_state_owners.
>   */
> -void nfs4_purge_state_owners(struct nfs_server *server)
> +void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
>  {
>  	struct nfs_client *clp = server->nfs_client;
>  	struct nfs4_state_owner *sp, *tmp;
> -	LIST_HEAD(doomed);
>  
>  	spin_lock(&clp->cl_lock);
>  	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
> -		list_move(&sp->so_lru, &doomed);
> +		list_move(&sp->so_lru, head);
>  		nfs4_remove_state_owner_locked(sp);
>  	}
>  	spin_unlock(&clp->cl_lock);
> +}
>  
> -	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
> +/**
> + * nfs4_purge_state_owners - Release all cached state owners
> + * @head: resulting list of state owners
> + *
> + * Frees a list of state owners that was generated by
> + * nfs4_purge_state_owners
> + */
> +void nfs4_free_state_owners(struct list_head *head)
> +{
> +	struct nfs4_state_owner *sp, *tmp;
> +
> +	list_for_each_entry_safe(sp, tmp, head, so_lru) {
>  		list_del(&sp->so_lru);
>  		nfs4_free_state_owner(sp);
>  	}
> @@ -1865,12 +1880,13 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
>  	struct nfs4_state_owner *sp;
>  	struct nfs_server *server;
>  	struct rb_node *pos;
> +	LIST_HEAD(freeme);
>  	int status = 0;
>  
>  restart:
>  	rcu_read_lock();
>  	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
> -		nfs4_purge_state_owners(server);
> +		nfs4_purge_state_owners(server, &freeme);
>  		spin_lock(&clp->cl_lock);
>  		for (pos = rb_first(&server->state_owners);
>  		     pos != NULL;
> @@ -1899,6 +1915,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
>  		spin_unlock(&clp->cl_lock);
>  	}
>  	rcu_read_unlock();
> +	nfs4_free_state_owners(&freeme);
>  	return 0;
>  }
>  
> 



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux