Re: [PATCH] Retrying on failed server close

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Mar 22, 2024 at 11:47 PM Tom Talpey <tom@xxxxxxxxxx> wrote:
>
> [resending as plain text stupid phone]
>
> Aren't these local errors, triggered by failure to send the close? Servers can fail the close too of course, which should also be retried, if appropriate to the error.
>
> Tom.
>

I agree. I think we should retry all errors a finite number of times,
unless we know that it is a known non-retryable error.

> Mar 22, 2024 10:50:10 AM Steve French <smfrench@xxxxxxxxx>:
>
> > Do you know a repro scenario where you can get the server to return
> > EAGAIN or EBUSY?
> >
> > SInce close is also issued from other paths than the one you issued
> > retries from (_cifsFileInfo_put) - are there other cases we should be
> > retrying?  e.g. error paths in do_create and atomic_open, cifs_open,
> > cifs_close_dir, find_cifs_entry
> >
> > Also do you know a scenario where we can repro the negative total open
> > files count?
> >
> > On Fri, Mar 22, 2024 at 2:33 AM Ritvik Budhiraja
> > <budhirajaritviksmb@xxxxxxxxx> wrote:
> >>
> >> Attaching the updated patch
> >>
> >>
> >> On Fri, 15 Mar 2024 at 01:12, Ritvik Budhiraja <budhirajaritviksmb@xxxxxxxxx> wrote:
> >>>
> >>> In the current implementation, CIFS close sends a close to the server
> >>> and does not check for the success of the server close. This patch adds
> >>> functionality to check for server close return status and retries
> >>> in case of an EBUSY or EAGAIN error
> >>>
> >>> Signed-off-by: Ritvik Budhiraja <rbudhiraja@xxxxxxxxxxxxx>
> >>> ---
> >>> fs/smb/client/cifsfs.c   | 11 +++++++
> >>> fs/smb/client/cifsglob.h |  7 +++--
> >>> fs/smb/client/file.c     | 63 ++++++++++++++++++++++++++++++++++++----
> >>> fs/smb/client/smb1ops.c  |  4 +--
> >>> fs/smb/client/smb2ops.c  |  9 +++---
> >>> 5 files changed, 80 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
> >>> index fb368b191eef..e4b2ded86fce 100644
> >>> --- a/fs/smb/client/cifsfs.c
> >>> +++ b/fs/smb/client/cifsfs.c
> >>> @@ -160,6 +160,7 @@ struct workqueue_struct     *decrypt_wq;
> >>> struct workqueue_struct        *fileinfo_put_wq;
> >>> struct workqueue_struct        *cifsoplockd_wq;
> >>> struct workqueue_struct        *deferredclose_wq;
> >>> +struct workqueue_struct        *serverclose_wq;
> >>> __u32 cifs_lock_secret;
> >>>
> >>> /*
> >>> @@ -1890,6 +1891,13 @@ init_cifs(void)
> >>>                 goto out_destroy_cifsoplockd_wq;
> >>>         }
> >>>
> >>> +       serverclose_wq = alloc_workqueue("serverclose",
> >>> +                                          WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
> >>> +       if (!serverclose_wq) {
> >>> +               rc = -ENOMEM;
> >>> +               goto out_destroy_serverclose_wq;
> >>> +       }
> >>> +
> >>>         rc = cifs_init_inodecache();
> >>>         if (rc)
> >>>                 goto out_destroy_deferredclose_wq;
> >>> @@ -1964,6 +1972,8 @@ init_cifs(void)
> >>>         destroy_workqueue(decrypt_wq);
> >>> out_destroy_cifsiod_wq:
> >>>         destroy_workqueue(cifsiod_wq);
> >>> +out_destroy_serverclose_wq:
> >>> +       destroy_workqueue(serverclose_wq);
> >>> out_clean_proc:
> >>>         cifs_proc_clean();
> >>>         return rc;
> >>> @@ -1993,6 +2003,7 @@ exit_cifs(void)
> >>>         destroy_workqueue(cifsoplockd_wq);
> >>>         destroy_workqueue(decrypt_wq);
> >>>         destroy_workqueue(fileinfo_put_wq);
> >>> +       destroy_workqueue(serverclose_wq);
> >>>         destroy_workqueue(cifsiod_wq);
> >>>         cifs_proc_clean();
> >>> }
> >>> diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
> >>> index 53c75cfb33ab..c99bc3b3ff56 100644
> >>> --- a/fs/smb/client/cifsglob.h
> >>> +++ b/fs/smb/client/cifsglob.h
> >>> @@ -429,10 +429,10 @@ struct smb_version_operations {
> >>>         /* set fid protocol-specific info */
> >>>         void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
> >>>         /* close a file */
> >>> -       void (*close)(const unsigned int, struct cifs_tcon *,
> >>> +       int (*close)(const unsigned int, struct cifs_tcon *,
> >>>                       struct cifs_fid *);
> >>>         /* close a file, returning file attributes and timestamps */
> >>> -       void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
> >>> +       int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
> >>>                       struct cifsFileInfo *pfile_info);
> >>>         /* send a flush request to the server */
> >>>         int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
> >>> @@ -1420,6 +1420,7 @@ struct cifsFileInfo {
> >>>         bool invalidHandle:1;   /* file closed via session abend */
> >>>         bool swapfile:1;
> >>>         bool oplock_break_cancelled:1;
> >>> +       bool offload:1; /* offload final part of _put to a wq */
> >>>         unsigned int oplock_epoch; /* epoch from the lease break */
> >>>         __u32 oplock_level; /* oplock/lease level from the lease break */
> >>>         int count;
> >>> @@ -1428,6 +1429,7 @@ struct cifsFileInfo {
> >>>         struct cifs_search_info srch_inf;
> >>>         struct work_struct oplock_break; /* work for oplock breaks */
> >>>         struct work_struct put; /* work for the final part of _put */
> >>> +       struct work_struct serverclose; /* work for serverclose */
> >>>         struct delayed_work deferred;
> >>>         bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
> >>>         char *symlink_target;
> >>> @@ -2085,6 +2087,7 @@ extern struct workqueue_struct *decrypt_wq;
> >>> extern struct workqueue_struct *fileinfo_put_wq;
> >>> extern struct workqueue_struct *cifsoplockd_wq;
> >>> extern struct workqueue_struct *deferredclose_wq;
> >>> +extern struct workqueue_struct *serverclose_wq;
> >>> extern __u32 cifs_lock_secret;
> >>>
> >>> extern mempool_t *cifs_mid_poolp;
> >>> diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
> >>> index c3b8e7091a4d..c1379ec27dcd 100644
> >>> --- a/fs/smb/client/file.c
> >>> +++ b/fs/smb/client/file.c
> >>> @@ -445,6 +445,7 @@ cifs_down_write(struct rw_semaphore *sem)
> >>> }
> >>>
> >>> static void cifsFileInfo_put_work(struct work_struct *work);
> >>> +void serverclose_work(struct work_struct *work);
> >>>
> >>> struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
> >>>                                        struct tcon_link *tlink, __u32 oplock,
> >>> @@ -491,6 +492,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
> >>>         cfile->tlink = cifs_get_tlink(tlink);
> >>>         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
> >>>         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
> >>> +       INIT_WORK(&cfile->serverclose, serverclose_work);
> >>>         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
> >>>         mutex_init(&cfile->fh_mutex);
> >>>         spin_lock_init(&cfile->file_info_lock);
> >>> @@ -582,6 +584,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
> >>>         cifsFileInfo_put_final(cifs_file);
> >>> }
> >>>
> >>> +void serverclose_work(struct work_struct *work)
> >>> +{
> >>> +       struct cifsFileInfo *cifs_file = container_of(work,
> >>> +                       struct cifsFileInfo, serverclose);
> >>> +
> >>> +       struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
> >>> +
> >>> +       struct TCP_Server_Info *server = tcon->ses->server;
> >>> +       int rc;
> >>> +       int retries = 0;
> >>> +       int MAX_RETRIES = 4;
> >>> +
> >>> +       do {
> >>> +               if (server->ops->close_getattr)
> >>> +                       rc = server->ops->close_getattr(0, tcon, cifs_file);
> >>> +               else if (server->ops->close)
> >>> +                       rc = server->ops->close(0, tcon, &cifs_file->fid);
> >>> +
> >>> +               if (rc == -EBUSY || rc == -EAGAIN) {
> >>> +                       retries++;
> >>> +                       msleep(250);
> >>> +               }
> >>> +       } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
> >>> +       );
> >>> +
> >>> +       if (retries == MAX_RETRIES)
> >>> +               printk(KERN_WARNING "[CIFS_CLOSE] Serverclose failed %d times, giving up\n", MAX_RETRIES);
> >>> +
> >>> +       if (cifs_file->offload)
> >>> +               queue_work(fileinfo_put_wq, &cifs_file->put);
> >>> +       else
> >>> +               cifsFileInfo_put_final(cifs_file);
> >>> +}
> >>> +
> >>> /**
> >>>   * cifsFileInfo_put - release a reference of file priv data
> >>>   *
> >>> @@ -622,10 +658,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
> >>>         struct cifs_fid fid = {};
> >>>         struct cifs_pending_open open;
> >>>         bool oplock_break_cancelled;
> >>> +       bool serverclose_offloaded = false;
> >>>
> >>>         spin_lock(&tcon->open_file_lock);
> >>>         spin_lock(&cifsi->open_file_lock);
> >>>         spin_lock(&cifs_file->file_info_lock);
> >>> +
> >>> +       cifs_file->offload = offload;
> >>>         if (--cifs_file->count > 0) {
> >>>                 spin_unlock(&cifs_file->file_info_lock);
> >>>                 spin_unlock(&cifsi->open_file_lock);
> >>> @@ -667,13 +706,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
> >>>         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
> >>>                 struct TCP_Server_Info *server = tcon->ses->server;
> >>>                 unsigned int xid;
> >>> +               int rc;
> >>>
> >>>                 xid = get_xid();
> >>>                 if (server->ops->close_getattr)
> >>> -                       server->ops->close_getattr(xid, tcon, cifs_file);
> >>> +                       rc = server->ops->close_getattr(xid, tcon, cifs_file);
> >>>                 else if (server->ops->close)
> >>> -                       server->ops->close(xid, tcon, &cifs_file->fid);
> >>> +                       rc = server->ops->close(xid, tcon, &cifs_file->fid);
> >>>                 _free_xid(xid);
> >>> +
> >>> +               if (rc == -EBUSY || rc == -EAGAIN) {
> >>> +                       // Server close failed, hence offloading it as an async op
> >>> +                       queue_work(serverclose_wq, &cifs_file->serverclose);
> >>> +                       serverclose_offloaded = true;
> >>> +               }
> >>>         }
> >>>
> >>>         if (oplock_break_cancelled)
> >>> @@ -681,10 +727,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
> >>>
> >>>         cifs_del_pending_open(&open);
> >>>
> >>> -       if (offload)
> >>> -               queue_work(fileinfo_put_wq, &cifs_file->put);
> >>> -       else
> >>> -               cifsFileInfo_put_final(cifs_file);
> >>> +       // if serverclose has been offloaded to wq (on failure), it will
> >>> +       // handle offloading put as well. If serverclose not offloaded,
> >>> +       // we need to handle offloading put here.
> >>> +       if (!serverclose_offloaded) {
> >>> +               if (offload)
> >>> +                       queue_work(fileinfo_put_wq, &cifs_file->put);
> >>> +               else
> >>> +                       cifsFileInfo_put_final(cifs_file);
> >>> +       }
> >>> }
> >>>
> >>> int cifs_open(struct inode *inode, struct file *file)
> >>> diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
> >>> index a9eaba8083b0..212ec6f66ec6 100644
> >>> --- a/fs/smb/client/smb1ops.c
> >>> +++ b/fs/smb/client/smb1ops.c
> >>> @@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
> >>>         cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
> >>> }
> >>>
> >>> -static void
> >>> +static int
> >>> cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
> >>>                 struct cifs_fid *fid)
> >>> {
> >>> -       CIFSSMBClose(xid, tcon, fid->netfid);
> >>> +       return CIFSSMBClose(xid, tcon, fid->netfid);
> >>> }
> >>>
> >>> static int
> >>> diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
> >>> index 4695433fcf39..1dcd4944958f 100644
> >>> --- a/fs/smb/client/smb2ops.c
> >>> +++ b/fs/smb/client/smb2ops.c
> >>> @@ -1411,14 +1411,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
> >>>         memcpy(cfile->fid.create_guid, fid->create_guid, 16);
> >>> }
> >>>
> >>> -static void
> >>> +static int
> >>> smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
> >>>                 struct cifs_fid *fid)
> >>> {
> >>> -       SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
> >>> +       return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
> >>> }
> >>>
> >>> -static void
> >>> +static int
> >>> smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
> >>>                    struct cifsFileInfo *cfile)
> >>> {
> >>> @@ -1429,7 +1429,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
> >>>         rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
> >>>                    cfile->fid.volatile_fid, &file_inf);
> >>>         if (rc)
> >>> -               return;
> >>> +               return rc;
> >>>
> >>>         inode = d_inode(cfile->dentry);
> >>>
> >>> @@ -1458,6 +1458,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
> >>>
> >>>         /* End of file and Attributes should not have to be updated on close */
> >>>         spin_unlock(&inode->i_lock);
> >>> +       return rc;
> >>> }
> >>>
> >>> static int
> >>> --
> >>> 2.34.1
> >>>
> >
> >
> > --
> > Thanks,
> >
> > Steve
>


-- 
Regards,
Shyam





[Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux