Trond, Chuck thanks for feedback. I understand the point Sure we can turn those printks into tracepoints, done. However original point was flag somehow DS issues outside debug, i.e. user intervention. What about just a pr_warn_once just for the case when at _nfs4_pnfs_v4_ds_connect we could not reach any DS ? On Thu, Mar 4, 2021 at 9:35 PM Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> wrote: > > On Thu, 2021-03-04 at 20:20 +0000, Chuck Lever wrote: > > Hello Roberto! > > > > > On Mar 3, 2021, at 10:33 AM, Roberto Bergantinos Corpas < > > > rbergant@xxxxxxxxxx> wrote: > > > > > > Would be interesting to promote DS availability logging outside > > > debug > > > so that we are more aware that I/O is diverted to MDS and some part > > > of the infraestructure failed. > > > > > > Also added logging for failed DS connection attempts. > > > > Given that this enables remote system behavior to generate > > kernel log traffic that can fill the local root partition, > > I'd like to see either: > > > > - the explicit use of rate limiting, or > > > > - these dprintks replaced with tracepoints > > I cannot accept a pr_warn(), even a rate limited one, for a timeout > error or for a connection error in the data path. Those are just too > nasty to deal with in a syslog. > > Tracepoints would be acceptable. > > > > > > > > Signed-off-by: Roberto Bergantinos Corpas <rbergant@xxxxxxxxxx> > > > --- > > > fs/nfs/filelayout/filelayout.c | 4 ++-- > > > fs/nfs/flexfilelayout/flexfilelayout.c | 6 +++--- > > > fs/nfs/pnfs_nfs.c | 6 +++++- > > > 3 files changed, 10 insertions(+), 6 deletions(-) > > > > > > diff --git a/fs/nfs/filelayout/filelayout.c > > > b/fs/nfs/filelayout/filelayout.c > > > index 7f5aa0403e16..fef2d31a501a 100644 > > > --- a/fs/nfs/filelayout/filelayout.c > > > +++ b/fs/nfs/filelayout/filelayout.c > > > @@ -181,7 +181,7 @@ static int filelayout_async_handle_error(struct > > > rpc_task *task, > > > case -EIO: > > > case -ETIMEDOUT: > > > case -EPIPE: > > > - dprintk("%s DS connection error %d\n", __func__, > > > + pr_warn("%s DS connection error %d\n", __func__, > > > task->tk_status); > > > nfs4_mark_deviceid_unavailable(devid); > > > pnfs_error_mark_layout_for_return(inode, lseg); > > > @@ -190,7 +190,7 @@ static int filelayout_async_handle_error(struct > > > rpc_task *task, > > > fallthrough; > > > default: > > > reset: > > > - dprintk("%s Retry through MDS. Error %d\n", > > > __func__, > > > + pr_warn("%s Retry through MDS. Error %d\n", > > > __func__, > > > task->tk_status); > > > return -NFS4ERR_RESET_TO_MDS; > > > } > > > diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c > > > b/fs/nfs/flexfilelayout/flexfilelayout.c > > > index a163533446fa..7150d94e80e6 100644 > > > --- a/fs/nfs/flexfilelayout/flexfilelayout.c > > > +++ b/fs/nfs/flexfilelayout/flexfilelayout.c > > > @@ -1129,7 +1129,7 @@ static int > > > ff_layout_async_handle_error_v4(struct rpc_task *task, > > > case -EIO: > > > case -ETIMEDOUT: > > > case -EPIPE: > > > - dprintk("%s DS connection error %d\n", __func__, > > > + pr_warn("%s DS connection error %d\n", __func__, > > > task->tk_status); > > > nfs4_delete_deviceid(devid->ld, devid->nfs_client, > > > &devid->deviceid); > > > @@ -1139,7 +1139,7 @@ static int > > > ff_layout_async_handle_error_v4(struct rpc_task *task, > > > if (ff_layout_avoid_mds_available_ds(lseg)) > > > return -NFS4ERR_RESET_TO_PNFS; > > > reset: > > > - dprintk("%s Retry through MDS. Error %d\n", > > > __func__, > > > + pr_warn("%s Retry through MDS. Error %d\n", > > > __func__, > > > task->tk_status); > > > return -NFS4ERR_RESET_TO_MDS; > > > } > > > @@ -1167,7 +1167,7 @@ static int > > > ff_layout_async_handle_error_v3(struct rpc_task *task, > > > nfs_inc_stats(lseg->pls_layout->plh_inode, > > > NFSIOS_DELAY); > > > goto out_retry; > > > default: > > > - dprintk("%s DS connection error %d\n", __func__, > > > + pr_warn("%s DS connection error %d\n", __func__, > > > task->tk_status); > > > nfs4_delete_deviceid(devid->ld, devid->nfs_client, > > > &devid->deviceid); > > > diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c > > > index 679767ac258d..322661a48348 100644 > > > --- a/fs/nfs/pnfs_nfs.c > > > +++ b/fs/nfs/pnfs_nfs.c > > > @@ -934,8 +934,11 @@ static int _nfs4_pnfs_v4_ds_connect(struct > > > nfs_server *mds_srv, > > > (struct sockaddr > > > *)&da->da_addr, > > > da->da_addrlen, > > > IPPROTO_TCP, > > > timeo, retrans, > > > minor_version); > > > - if (IS_ERR(clp)) > > > + if (IS_ERR(clp)) { > > > + pr_warn("%s: DS: %s unable to > > > connect with address %s, error: %ld\n", > > > + __func__, ds->ds_remotestr, > > > da->da_remotestr, PTR_ERR(clp)); > > > continue; > > > + } > > > > > > status = nfs4_init_ds_session(clp, > > > mds_srv->nfs_client- > > > >cl_lease_time); > > > @@ -949,6 +952,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct > > > nfs_server *mds_srv, > > > } > > > > > > if (IS_ERR(clp)) { > > > + pr_warn("%s: no DS available\n", __func__); > > > status = PTR_ERR(clp); > > > goto out; > > > } > > > -- > > > 2.21.0 > > > > > > > -- > > Chuck Lever > > > > > > > > -- > Trond Myklebust > Linux NFS client maintainer, Hammerspace > trond.myklebust@xxxxxxxxxxxxxxx > >