On Fri, Mar 22, 2024 at 11:47 PM Tom Talpey <tom@xxxxxxxxxx> wrote: > > [resending as plain text stupid phone] > > Aren't these local errors, triggered by failure to send the close? Servers can fail the close too of course, which should also be retried, if appropriate to the error. > > Tom. > I agree. I think we should retry all errors a finite number of times, unless we know that it is a known non-retryable error. > Mar 22, 2024 10:50:10 AM Steve French <smfrench@xxxxxxxxx>: > > > Do you know a repro scenario where you can get the server to return > > EAGAIN or EBUSY? > > > > SInce close is also issued from other paths than the one you issued > > retries from (_cifsFileInfo_put) - are there other cases we should be > > retrying? e.g. error paths in do_create and atomic_open, cifs_open, > > cifs_close_dir, find_cifs_entry > > > > Also do you know a scenario where we can repro the negative total open > > files count? > > > > On Fri, Mar 22, 2024 at 2:33 AM Ritvik Budhiraja > > <budhirajaritviksmb@xxxxxxxxx> wrote: > >> > >> Attaching the updated patch > >> > >> > >> On Fri, 15 Mar 2024 at 01:12, Ritvik Budhiraja <budhirajaritviksmb@xxxxxxxxx> wrote: > >>> > >>> In the current implementation, CIFS close sends a close to the server > >>> and does not check for the success of the server close. This patch adds > >>> functionality to check for server close return status and retries > >>> in case of an EBUSY or EAGAIN error > >>> > >>> Signed-off-by: Ritvik Budhiraja <rbudhiraja@xxxxxxxxxxxxx> > >>> --- > >>> fs/smb/client/cifsfs.c | 11 +++++++ > >>> fs/smb/client/cifsglob.h | 7 +++-- > >>> fs/smb/client/file.c | 63 ++++++++++++++++++++++++++++++++++++---- > >>> fs/smb/client/smb1ops.c | 4 +-- > >>> fs/smb/client/smb2ops.c | 9 +++--- > >>> 5 files changed, 80 insertions(+), 14 deletions(-) > >>> > >>> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c > >>> index fb368b191eef..e4b2ded86fce 100644 > >>> --- a/fs/smb/client/cifsfs.c > >>> +++ b/fs/smb/client/cifsfs.c > >>> @@ -160,6 +160,7 @@ struct workqueue_struct *decrypt_wq; > >>> struct workqueue_struct *fileinfo_put_wq; > >>> struct workqueue_struct *cifsoplockd_wq; > >>> struct workqueue_struct *deferredclose_wq; > >>> +struct workqueue_struct *serverclose_wq; > >>> __u32 cifs_lock_secret; > >>> > >>> /* > >>> @@ -1890,6 +1891,13 @@ init_cifs(void) > >>> goto out_destroy_cifsoplockd_wq; > >>> } > >>> > >>> + serverclose_wq = alloc_workqueue("serverclose", > >>> + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); > >>> + if (!serverclose_wq) { > >>> + rc = -ENOMEM; > >>> + goto out_destroy_serverclose_wq; > >>> + } > >>> + > >>> rc = cifs_init_inodecache(); > >>> if (rc) > >>> goto out_destroy_deferredclose_wq; > >>> @@ -1964,6 +1972,8 @@ init_cifs(void) > >>> destroy_workqueue(decrypt_wq); > >>> out_destroy_cifsiod_wq: > >>> destroy_workqueue(cifsiod_wq); > >>> +out_destroy_serverclose_wq: > >>> + destroy_workqueue(serverclose_wq); > >>> out_clean_proc: > >>> cifs_proc_clean(); > >>> return rc; > >>> @@ -1993,6 +2003,7 @@ exit_cifs(void) > >>> destroy_workqueue(cifsoplockd_wq); > >>> destroy_workqueue(decrypt_wq); > >>> destroy_workqueue(fileinfo_put_wq); > >>> + destroy_workqueue(serverclose_wq); > >>> destroy_workqueue(cifsiod_wq); > >>> cifs_proc_clean(); > >>> } > >>> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h > >>> index 53c75cfb33ab..c99bc3b3ff56 100644 > >>> --- a/fs/smb/client/cifsglob.h > >>> +++ b/fs/smb/client/cifsglob.h > >>> @@ -429,10 +429,10 @@ struct smb_version_operations { > >>> /* set fid protocol-specific info */ > >>> void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); > >>> /* close a file */ > >>> - void (*close)(const unsigned int, struct cifs_tcon *, > >>> + int (*close)(const unsigned int, struct cifs_tcon *, > >>> struct cifs_fid *); > >>> /* close a file, returning file attributes and timestamps */ > >>> - void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, > >>> + int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, > >>> struct cifsFileInfo *pfile_info); > >>> /* send a flush request to the server */ > >>> int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); > >>> @@ -1420,6 +1420,7 @@ struct cifsFileInfo { > >>> bool invalidHandle:1; /* file closed via session abend */ > >>> bool swapfile:1; > >>> bool oplock_break_cancelled:1; > >>> + bool offload:1; /* offload final part of _put to a wq */ > >>> unsigned int oplock_epoch; /* epoch from the lease break */ > >>> __u32 oplock_level; /* oplock/lease level from the lease break */ > >>> int count; > >>> @@ -1428,6 +1429,7 @@ struct cifsFileInfo { > >>> struct cifs_search_info srch_inf; > >>> struct work_struct oplock_break; /* work for oplock breaks */ > >>> struct work_struct put; /* work for the final part of _put */ > >>> + struct work_struct serverclose; /* work for serverclose */ > >>> struct delayed_work deferred; > >>> bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ > >>> char *symlink_target; > >>> @@ -2085,6 +2087,7 @@ extern struct workqueue_struct *decrypt_wq; > >>> extern struct workqueue_struct *fileinfo_put_wq; > >>> extern struct workqueue_struct *cifsoplockd_wq; > >>> extern struct workqueue_struct *deferredclose_wq; > >>> +extern struct workqueue_struct *serverclose_wq; > >>> extern __u32 cifs_lock_secret; > >>> > >>> extern mempool_t *cifs_mid_poolp; > >>> diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c > >>> index c3b8e7091a4d..c1379ec27dcd 100644 > >>> --- a/fs/smb/client/file.c > >>> +++ b/fs/smb/client/file.c > >>> @@ -445,6 +445,7 @@ cifs_down_write(struct rw_semaphore *sem) > >>> } > >>> > >>> static void cifsFileInfo_put_work(struct work_struct *work); > >>> +void serverclose_work(struct work_struct *work); > >>> > >>> struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, > >>> struct tcon_link *tlink, __u32 oplock, > >>> @@ -491,6 +492,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, > >>> cfile->tlink = cifs_get_tlink(tlink); > >>> INIT_WORK(&cfile->oplock_break, cifs_oplock_break); > >>> INIT_WORK(&cfile->put, cifsFileInfo_put_work); > >>> + INIT_WORK(&cfile->serverclose, serverclose_work); > >>> INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); > >>> mutex_init(&cfile->fh_mutex); > >>> spin_lock_init(&cfile->file_info_lock); > >>> @@ -582,6 +584,40 @@ static void cifsFileInfo_put_work(struct work_struct *work) > >>> cifsFileInfo_put_final(cifs_file); > >>> } > >>> > >>> +void serverclose_work(struct work_struct *work) > >>> +{ > >>> + struct cifsFileInfo *cifs_file = container_of(work, > >>> + struct cifsFileInfo, serverclose); > >>> + > >>> + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); > >>> + > >>> + struct TCP_Server_Info *server = tcon->ses->server; > >>> + int rc; > >>> + int retries = 0; > >>> + int MAX_RETRIES = 4; > >>> + > >>> + do { > >>> + if (server->ops->close_getattr) > >>> + rc = server->ops->close_getattr(0, tcon, cifs_file); > >>> + else if (server->ops->close) > >>> + rc = server->ops->close(0, tcon, &cifs_file->fid); > >>> + > >>> + if (rc == -EBUSY || rc == -EAGAIN) { > >>> + retries++; > >>> + msleep(250); > >>> + } > >>> + } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES) > >>> + ); > >>> + > >>> + if (retries == MAX_RETRIES) > >>> + printk(KERN_WARNING "[CIFS_CLOSE] Serverclose failed %d times, giving up\n", MAX_RETRIES); > >>> + > >>> + if (cifs_file->offload) > >>> + queue_work(fileinfo_put_wq, &cifs_file->put); > >>> + else > >>> + cifsFileInfo_put_final(cifs_file); > >>> +} > >>> + > >>> /** > >>> * cifsFileInfo_put - release a reference of file priv data > >>> * > >>> @@ -622,10 +658,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, > >>> struct cifs_fid fid = {}; > >>> struct cifs_pending_open open; > >>> bool oplock_break_cancelled; > >>> + bool serverclose_offloaded = false; > >>> > >>> spin_lock(&tcon->open_file_lock); > >>> spin_lock(&cifsi->open_file_lock); > >>> spin_lock(&cifs_file->file_info_lock); > >>> + > >>> + cifs_file->offload = offload; > >>> if (--cifs_file->count > 0) { > >>> spin_unlock(&cifs_file->file_info_lock); > >>> spin_unlock(&cifsi->open_file_lock); > >>> @@ -667,13 +706,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, > >>> if (!tcon->need_reconnect && !cifs_file->invalidHandle) { > >>> struct TCP_Server_Info *server = tcon->ses->server; > >>> unsigned int xid; > >>> + int rc; > >>> > >>> xid = get_xid(); > >>> if (server->ops->close_getattr) > >>> - server->ops->close_getattr(xid, tcon, cifs_file); > >>> + rc = server->ops->close_getattr(xid, tcon, cifs_file); > >>> else if (server->ops->close) > >>> - server->ops->close(xid, tcon, &cifs_file->fid); > >>> + rc = server->ops->close(xid, tcon, &cifs_file->fid); > >>> _free_xid(xid); > >>> + > >>> + if (rc == -EBUSY || rc == -EAGAIN) { > >>> + // Server close failed, hence offloading it as an async op > >>> + queue_work(serverclose_wq, &cifs_file->serverclose); > >>> + serverclose_offloaded = true; > >>> + } > >>> } > >>> > >>> if (oplock_break_cancelled) > >>> @@ -681,10 +727,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, > >>> > >>> cifs_del_pending_open(&open); > >>> > >>> - if (offload) > >>> - queue_work(fileinfo_put_wq, &cifs_file->put); > >>> - else > >>> - cifsFileInfo_put_final(cifs_file); > >>> + // if serverclose has been offloaded to wq (on failure), it will > >>> + // handle offloading put as well. If serverclose not offloaded, > >>> + // we need to handle offloading put here. > >>> + if (!serverclose_offloaded) { > >>> + if (offload) > >>> + queue_work(fileinfo_put_wq, &cifs_file->put); > >>> + else > >>> + cifsFileInfo_put_final(cifs_file); > >>> + } > >>> } > >>> > >>> int cifs_open(struct inode *inode, struct file *file) > >>> diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c > >>> index a9eaba8083b0..212ec6f66ec6 100644 > >>> --- a/fs/smb/client/smb1ops.c > >>> +++ b/fs/smb/client/smb1ops.c > >>> @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) > >>> cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode); > >>> } > >>> > >>> -static void > >>> +static int > >>> cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon, > >>> struct cifs_fid *fid) > >>> { > >>> - CIFSSMBClose(xid, tcon, fid->netfid); > >>> + return CIFSSMBClose(xid, tcon, fid->netfid); > >>> } > >>> > >>> static int > >>> diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c > >>> index 4695433fcf39..1dcd4944958f 100644 > >>> --- a/fs/smb/client/smb2ops.c > >>> +++ b/fs/smb/client/smb2ops.c > >>> @@ -1411,14 +1411,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) > >>> memcpy(cfile->fid.create_guid, fid->create_guid, 16); > >>> } > >>> > >>> -static void > >>> +static int > >>> smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, > >>> struct cifs_fid *fid) > >>> { > >>> - SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); > >>> + return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); > >>> } > >>> > >>> -static void > >>> +static int > >>> smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, > >>> struct cifsFileInfo *cfile) > >>> { > >>> @@ -1429,7 +1429,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, > >>> rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid, > >>> cfile->fid.volatile_fid, &file_inf); > >>> if (rc) > >>> - return; > >>> + return rc; > >>> > >>> inode = d_inode(cfile->dentry); > >>> > >>> @@ -1458,6 +1458,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, > >>> > >>> /* End of file and Attributes should not have to be updated on close */ > >>> spin_unlock(&inode->i_lock); > >>> + return rc; > >>> } > >>> > >>> static int > >>> -- > >>> 2.34.1 > >>> > > > > > > -- > > Thanks, > > > > Steve > -- Regards, Shyam