From: Andy Adamson <andros@xxxxxxxxxx> If a data server is unavailable, go through MDS. Mark the deviceid containing the data server as a negative cache entry. Do not try to connect to any data server on a deviceid marked as a negative cache entry. Mark any layout that tries to use the marked deviceid as failed. Inodes with a layout marked as fails will not use the layout for I/O, and will not perform any more layoutgets. Inodes without a layout will still do layoutget, but the layout will get marked immediately. Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> --- fs/nfs/nfs4filelayout.c | 6 ++++-- fs/nfs/nfs4filelayout.h | 3 +++ fs/nfs/nfs4filelayoutdev.c | 39 +++++++++++++++++++++++++++++---------- fs/nfs/pnfs.c | 18 ++++++++++++++---- fs/nfs/pnfs.h | 4 ++++ 5 files changed, 54 insertions(+), 16 deletions(-) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index d925af6..9b9a81c 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -146,7 +146,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) idx = nfs4_fl_calc_ds_index(lseg, offset); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + /* Either layout fh index faulty, or ds connect failed */ + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } dprintk("%s USE DS:ip %x %hu\n", __func__, @@ -161,7 +163,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->args.offset = filelayout_get_dserver_offset(lseg, offset); data->fldata.orig_offset = offset; - /* Perform an asynchronous read */ + /* Perform an asynchronous read to ds */ nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, &filelayout_read_call_ops); return PNFS_ATTEMPTED; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index f884b0c..7e33bd8 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -96,5 +96,8 @@ extern struct nfs4_file_layout_dsaddr * nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); +void filelayout_mark_devid_negative(struct nfs_client *clp, + struct pnfs_deviceid_node *devid, + int err, u32 ds_ipaddr); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 9bb13f5..8642109 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -558,27 +558,46 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, loff_t offset) i = _nfs4_fl_calc_j_index(lseg, offset); return flseg->fh_array[i]; } +void +filelayout_mark_devid_negative(struct nfs_client *mds_clp, + struct pnfs_deviceid_node *devid, + int err, u32 ds_addr) +{ + u32 *p = (u32 *)&devid->de_id; + + printk(KERN_ERR "NFS: data server %x connection error %d." + " Deviceid [%x%x%x%x] marked out of use.\n", + ds_addr, err, p[0], p[1], p[2], p[3]); + + pnfs_mark_devid_negative(mds_clp, devid); +} struct nfs4_pnfs_ds * nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) { - struct nfs4_file_layout_dsaddr *dsaddr; + struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; + struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; - dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; - if (dsaddr->ds_list[ds_idx] == NULL) { - printk(KERN_ERR "%s: No data server for device id!\n", - __func__); + if (ds == NULL) { + printk(KERN_ERR "%s: No data server for offset index %d\n", + __func__, ds_idx); return NULL; } - if (!dsaddr->ds_list[ds_idx]->ds_clp) { + if (!ds->ds_clp) { + struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; - err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode), - dsaddr->ds_list[ds_idx]); + /* Already tried to connect, don't try again */ + if (dsaddr->deviceid.de_flags & NFS4_DEVICE_ID_NEG_ENTRY) { + dprintk("%s Deviceid marked out of use\n", __func__); + return NULL; + } + err = nfs4_ds_connect(s, ds); if (err) { - printk(KERN_ERR "%s nfs4_ds_connect error %d\n", - __func__, err); + filelayout_mark_devid_negative(s->nfs_client, + &dsaddr->deviceid, err, + ntohl(ds->ds_ip_addr)); return NULL; } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4c49109..72786ec 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -759,15 +759,16 @@ pnfs_update_layout(struct inode *ino, dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } + + /* If LAYOUTGET or pNFS I/O already failed once we don't try again */ + if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) + goto out_unlock; + /* Check to see if the layout for the given range already exists */ lseg = pnfs_find_lseg(lo, iomode); if (lseg) goto out_unlock; - /* if LAYOUTGET already failed once we don't try again */ - if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) - goto out_unlock; - if (pnfs_layoutgets_blocked(lo, NULL, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); @@ -1089,3 +1090,12 @@ pnfs_put_deviceid_cache(struct nfs_client *clp) } } EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); + +void +pnfs_mark_devid_negative(struct nfs_client *clp, struct pnfs_deviceid_node *d) +{ + spin_lock(&clp->cl_devid_cache->dc_lock); + d->de_flags |= NFS4_DEVICE_ID_NEG_ENTRY; + spin_unlock(&clp->cl_devid_cache->dc_lock); +} +EXPORT_SYMBOL_GPL(pnfs_mark_devid_negative); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index cbbcdfa..25a4e25 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -135,6 +135,8 @@ struct pnfs_deviceid_node { struct hlist_node de_node; struct nfs4_deviceid de_id; atomic_t de_ref; + unsigned long de_flags; +#define NFS4_DEVICE_ID_NEG_ENTRY 1 }; struct pnfs_deviceid_cache { @@ -155,6 +157,8 @@ extern struct pnfs_deviceid_node *pnfs_add_deviceid( struct pnfs_deviceid_node *); extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *devid); +extern void pnfs_mark_devid_negative(struct nfs_client *clp, + struct pnfs_deviceid_node *d); extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); -- 1.6.6 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html