Do you know a repro scenario where you can get the server to return EAGAIN or EBUSY? SInce close is also issued from other paths than the one you issued retries from (_cifsFileInfo_put) - are there other cases we should be retrying? e.g. error paths in do_create and atomic_open, cifs_open, cifs_close_dir, find_cifs_entry Also do you know a scenario where we can repro the negative total open files count? On Fri, Mar 22, 2024 at 2:33 AM Ritvik Budhiraja <budhirajaritviksmb@xxxxxxxxx> wrote: > > Attaching the updated patch > > > On Fri, 15 Mar 2024 at 01:12, Ritvik Budhiraja <budhirajaritviksmb@xxxxxxxxx> wrote: >> >> In the current implementation, CIFS close sends a close to the server >> and does not check for the success of the server close. This patch adds >> functionality to check for server close return status and retries >> in case of an EBUSY or EAGAIN error >> >> Signed-off-by: Ritvik Budhiraja <rbudhiraja@xxxxxxxxxxxxx> >> --- >> fs/smb/client/cifsfs.c | 11 +++++++ >> fs/smb/client/cifsglob.h | 7 +++-- >> fs/smb/client/file.c | 63 ++++++++++++++++++++++++++++++++++++---- >> fs/smb/client/smb1ops.c | 4 +-- >> fs/smb/client/smb2ops.c | 9 +++--- >> 5 files changed, 80 insertions(+), 14 deletions(-) >> >> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c >> index fb368b191eef..e4b2ded86fce 100644 >> --- a/fs/smb/client/cifsfs.c >> +++ b/fs/smb/client/cifsfs.c >> @@ -160,6 +160,7 @@ struct workqueue_struct *decrypt_wq; >> struct workqueue_struct *fileinfo_put_wq; >> struct workqueue_struct *cifsoplockd_wq; >> struct workqueue_struct *deferredclose_wq; >> +struct workqueue_struct *serverclose_wq; >> __u32 cifs_lock_secret; >> >> /* >> @@ -1890,6 +1891,13 @@ init_cifs(void) >> goto out_destroy_cifsoplockd_wq; >> } >> >> + serverclose_wq = alloc_workqueue("serverclose", >> + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); >> + if (!serverclose_wq) { >> + rc = -ENOMEM; >> + goto out_destroy_serverclose_wq; >> + } >> + >> rc = cifs_init_inodecache(); >> if (rc) >> goto out_destroy_deferredclose_wq; >> @@ -1964,6 +1972,8 @@ init_cifs(void) >> destroy_workqueue(decrypt_wq); >> out_destroy_cifsiod_wq: >> destroy_workqueue(cifsiod_wq); >> +out_destroy_serverclose_wq: >> + destroy_workqueue(serverclose_wq); >> out_clean_proc: >> cifs_proc_clean(); >> return rc; >> @@ -1993,6 +2003,7 @@ exit_cifs(void) >> destroy_workqueue(cifsoplockd_wq); >> destroy_workqueue(decrypt_wq); >> destroy_workqueue(fileinfo_put_wq); >> + destroy_workqueue(serverclose_wq); >> destroy_workqueue(cifsiod_wq); >> cifs_proc_clean(); >> } >> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h >> index 53c75cfb33ab..c99bc3b3ff56 100644 >> --- a/fs/smb/client/cifsglob.h >> +++ b/fs/smb/client/cifsglob.h >> @@ -429,10 +429,10 @@ struct smb_version_operations { >> /* set fid protocol-specific info */ >> void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); >> /* close a file */ >> - void (*close)(const unsigned int, struct cifs_tcon *, >> + int (*close)(const unsigned int, struct cifs_tcon *, >> struct cifs_fid *); >> /* close a file, returning file attributes and timestamps */ >> - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, >> + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, >> struct cifsFileInfo *pfile_info); >> /* send a flush request to the server */ >> int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); >> @@ -1420,6 +1420,7 @@ struct cifsFileInfo { >> bool invalidHandle:1; /* file closed via session abend */ >> bool swapfile:1; >> bool oplock_break_cancelled:1; >> + bool offload:1; /* offload final part of _put to a wq */ >> unsigned int oplock_epoch; /* epoch from the lease break */ >> __u32 oplock_level; /* oplock/lease level from the lease break */ >> int count; >> @@ -1428,6 +1429,7 @@ struct cifsFileInfo { >> struct cifs_search_info srch_inf; >> struct work_struct oplock_break; /* work for oplock breaks */ >> struct work_struct put; /* work for the final part of _put */ >> + struct work_struct serverclose; /* work for serverclose */ >> struct delayed_work deferred; >> bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ >> char *symlink_target; >> @@ -2085,6 +2087,7 @@ extern struct workqueue_struct *decrypt_wq; >> extern struct workqueue_struct *fileinfo_put_wq; >> extern struct workqueue_struct *cifsoplockd_wq; >> extern struct workqueue_struct *deferredclose_wq; >> +extern struct workqueue_struct *serverclose_wq; >> extern __u32 cifs_lock_secret; >> >> extern mempool_t *cifs_mid_poolp; >> diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c >> index c3b8e7091a4d..c1379ec27dcd 100644 >> --- a/fs/smb/client/file.c >> +++ b/fs/smb/client/file.c >> @@ -445,6 +445,7 @@ cifs_down_write(struct rw_semaphore *sem) >> } >> >> static void cifsFileInfo_put_work(struct work_struct *work); >> +void serverclose_work(struct work_struct *work); >> >> struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, >> struct tcon_link *tlink, __u32 oplock, >> @@ -491,6 +492,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, >> cfile->tlink = cifs_get_tlink(tlink); >> INIT_WORK(&cfile->oplock_break, cifs_oplock_break); >> INIT_WORK(&cfile->put, cifsFileInfo_put_work); >> + INIT_WORK(&cfile->serverclose, serverclose_work); >> INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); >> mutex_init(&cfile->fh_mutex); >> spin_lock_init(&cfile->file_info_lock); >> @@ -582,6 +584,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) >> cifsFileInfo_put_final(cifs_file); >> } >> >> +void serverclose_work(struct work_struct *work) >> +{ >> + struct cifsFileInfo *cifs_file = container_of(work, >> + struct cifsFileInfo, serverclose); >> + >> + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); >> + >> + struct TCP_Server_Info *server = tcon->ses->server; >> + int rc; >> + int retries = 0; >> + int MAX_RETRIES = 4; >> + >> + do { >> + if (server->ops->close_getattr) >> + rc = server->ops->close_getattr(0, tcon, cifs_file); >> + else if (server->ops->close) >> + rc = server->ops->close(0, tcon, &cifs_file->fid); >> + >> + if (rc == -EBUSY || rc == -EAGAIN) { >> + retries++; >> + msleep(250); >> + } >> + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) >> + ); >> + >> + if (retries == MAX_RETRIES) >> + printk(KERN_WARNING "[CIFS_CLOSE] Serverclose failed %d times, giving up\n", MAX_RETRIES); >> + >> + if (cifs_file->offload) >> + queue_work(fileinfo_put_wq, &cifs_file->put); >> + else >> + cifsFileInfo_put_final(cifs_file); >> +} >> + >> /** >> * cifsFileInfo_put - release a reference of file priv data >> * >> @@ -622,10 +658,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, >> struct cifs_fid fid = {}; >> struct cifs_pending_open open; >> bool oplock_break_cancelled; >> + bool serverclose_offloaded = false; >> >> spin_lock(&tcon->open_file_lock); >> spin_lock(&cifsi->open_file_lock); >> spin_lock(&cifs_file->file_info_lock); >> + >> + cifs_file->offload = offload; >> if (--cifs_file->count > 0) { >> spin_unlock(&cifs_file->file_info_lock); >> spin_unlock(&cifsi->open_file_lock); >> @@ -667,13 +706,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, >> if (!tcon->need_reconnect && !cifs_file->invalidHandle) { >> struct TCP_Server_Info *server = tcon->ses->server; >> unsigned int xid; >> + int rc; >> >> xid = get_xid(); >> if (server->ops->close_getattr) >> - server->ops->close_getattr(xid, tcon, cifs_file); >> + rc = server->ops->close_getattr(xid, tcon, cifs_file); >> else if (server->ops->close) >> - server->ops->close(xid, tcon, &cifs_file->fid); >> + rc = server->ops->close(xid, tcon, &cifs_file->fid); >> _free_xid(xid); >> + >> + if (rc == -EBUSY || rc == -EAGAIN) { >> + // Server close failed, hence offloading it as an async op >> + queue_work(serverclose_wq, &cifs_file->serverclose); >> + serverclose_offloaded = true; >> + } >> } >> >> if (oplock_break_cancelled) >> @@ -681,10 +727,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, >> >> cifs_del_pending_open(&open); >> >> - if (offload) >> - queue_work(fileinfo_put_wq, &cifs_file->put); >> - else >> - cifsFileInfo_put_final(cifs_file); >> + // if serverclose has been offloaded to wq (on failure), it will >> + // handle offloading put as well. If serverclose not offloaded, >> + // we need to handle offloading put here. >> + if (!serverclose_offloaded) { >> + if (offload) >> + queue_work(fileinfo_put_wq, &cifs_file->put); >> + else >> + cifsFileInfo_put_final(cifs_file); >> + } >> } >> >> int cifs_open(struct inode *inode, struct file *file) >> diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c >> index a9eaba8083b0..212ec6f66ec6 100644 >> --- a/fs/smb/client/smb1ops.c >> +++ b/fs/smb/client/smb1ops.c >> @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) >> cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); >> } >> >> -static void >> +static int >> cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, >> struct cifs_fid *fid) >> { >> - CIFSSMBClose(xid, tcon, fid->netfid); >> + return CIFSSMBClose(xid, tcon, fid->netfid); >> } >> >> static int >> diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c >> index 4695433fcf39..1dcd4944958f 100644 >> --- a/fs/smb/client/smb2ops.c >> +++ b/fs/smb/client/smb2ops.c >> @@ -1411,14 +1411,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) >> memcpy(cfile->fid.create_guid, fid->create_guid, 16); >> } >> >> -static void >> +static int >> smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, >> struct cifs_fid *fid) >> { >> - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); >> + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); >> } >> >> -static void >> +static int >> smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, >> struct cifsFileInfo *cfile) >> { >> @@ -1429,7 +1429,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, >> rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, >> cfile->fid.volatile_fid, &file_inf); >> if (rc) >> - return; >> + return rc; >> >> inode = d_inode(cfile->dentry); >> >> @@ -1458,6 +1458,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, >> >> /* End of file and Attributes should not have to be updated on close */ >> spin_unlock(&inode->i_lock); >> + return rc; >> } >> >> static int >> -- >> 2.34.1 >> -- Thanks, Steve