Hello Roberto! > On Mar 3, 2021, at 10:33 AM, Roberto Bergantinos Corpas <rbergant@xxxxxxxxxx> wrote: > > Would be interesting to promote DS availability logging outside debug > so that we are more aware that I/O is diverted to MDS and some part > of the infraestructure failed. > > Also added logging for failed DS connection attempts. Given that this enables remote system behavior to generate kernel log traffic that can fill the local root partition, I'd like to see either: - the explicit use of rate limiting, or - these dprintks replaced with tracepoints > Signed-off-by: Roberto Bergantinos Corpas <rbergant@xxxxxxxxxx> > --- > fs/nfs/filelayout/filelayout.c | 4 ++-- > fs/nfs/flexfilelayout/flexfilelayout.c | 6 +++--- > fs/nfs/pnfs_nfs.c | 6 +++++- > 3 files changed, 10 insertions(+), 6 deletions(-) > > diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c > index 7f5aa0403e16..fef2d31a501a 100644 > --- a/fs/nfs/filelayout/filelayout.c > +++ b/fs/nfs/filelayout/filelayout.c > @@ -181,7 +181,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, > case -EIO: > case -ETIMEDOUT: > case -EPIPE: > - dprintk("%s DS connection error %d\n", __func__, > + pr_warn("%s DS connection error %d\n", __func__, > task->tk_status); > nfs4_mark_deviceid_unavailable(devid); > pnfs_error_mark_layout_for_return(inode, lseg); > @@ -190,7 +190,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, > fallthrough; > default: > reset: > - dprintk("%s Retry through MDS. Error %d\n", __func__, > + pr_warn("%s Retry through MDS. Error %d\n", __func__, > task->tk_status); > return -NFS4ERR_RESET_TO_MDS; > } > diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c > index a163533446fa..7150d94e80e6 100644 > --- a/fs/nfs/flexfilelayout/flexfilelayout.c > +++ b/fs/nfs/flexfilelayout/flexfilelayout.c > @@ -1129,7 +1129,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, > case -EIO: > case -ETIMEDOUT: > case -EPIPE: > - dprintk("%s DS connection error %d\n", __func__, > + pr_warn("%s DS connection error %d\n", __func__, > task->tk_status); > nfs4_delete_deviceid(devid->ld, devid->nfs_client, > &devid->deviceid); > @@ -1139,7 +1139,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, > if (ff_layout_avoid_mds_available_ds(lseg)) > return -NFS4ERR_RESET_TO_PNFS; > reset: > - dprintk("%s Retry through MDS. Error %d\n", __func__, > + pr_warn("%s Retry through MDS. Error %d\n", __func__, > task->tk_status); > return -NFS4ERR_RESET_TO_MDS; > } > @@ -1167,7 +1167,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, > nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); > goto out_retry; > default: > - dprintk("%s DS connection error %d\n", __func__, > + pr_warn("%s DS connection error %d\n", __func__, > task->tk_status); > nfs4_delete_deviceid(devid->ld, devid->nfs_client, > &devid->deviceid); > diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c > index 679767ac258d..322661a48348 100644 > --- a/fs/nfs/pnfs_nfs.c > +++ b/fs/nfs/pnfs_nfs.c > @@ -934,8 +934,11 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, > (struct sockaddr *)&da->da_addr, > da->da_addrlen, IPPROTO_TCP, > timeo, retrans, minor_version); > - if (IS_ERR(clp)) > + if (IS_ERR(clp)) { > + pr_warn("%s: DS: %s unable to connect with address %s, error: %ld\n", > + __func__, ds->ds_remotestr, da->da_remotestr, PTR_ERR(clp)); > continue; > + } > > status = nfs4_init_ds_session(clp, > mds_srv->nfs_client->cl_lease_time); > @@ -949,6 +952,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, > } > > if (IS_ERR(clp)) { > + pr_warn("%s: no DS available\n", __func__); > status = PTR_ERR(clp); > goto out; > } > -- > 2.21.0 > -- Chuck Lever