From: Paul Aurich <paul@xxxxxxxxxxxxxx> commit 3fa640d035e5ae526769615c35cb9ed4be6e3662 upstream. The unmount process (cifs_kill_sb() calling close_all_cached_dirs()) can race with various cached directory operations, which ultimately results in dentries not being dropped and these kernel BUGs: BUG: Dentry ffff88814f37e358{i=1000000000080,n=/} still in use (2) [unmount of cifs cifs] VFS: Busy inodes after unmount of cifs (cifs) ------------[ cut here ]------------ kernel BUG at fs/super.c:661! This happens when a cfid is in the process of being cleaned up when, and has been removed from the cfids->entries list, including: - Receiving a lease break from the server - Server reconnection triggers invalidate_all_cached_dirs(), which removes all the cfids from the list - The laundromat thread decides to expire an old cfid. To solve these problems, dropping the dentry is done in queued work done in a newly-added cfid_put_wq workqueue, and close_all_cached_dirs() flushes that workqueue after it drops all the dentries of which it's aware. This is a global workqueue (rather than scoped to a mount), but the queued work is minimal. The final cleanup work for cleaning up a cfid is performed via work queued in the serverclose_wq workqueue; this is done separate from dropping the dentries so that close_all_cached_dirs() doesn't block on any server operations. Both of these queued works expect to invoked with a cfid reference and a tcon reference to avoid those objects from being freed while the work is ongoing. While we're here, add proper locking to close_all_cached_dirs(), and locking around the freeing of cfid->dentry. Fixes: ebe98f1447bb ("cifs: enable caching of directories for which a lease is held") Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Paul Aurich <paul@xxxxxxxxxxxxxx> Signed-off-by: Steve French <stfrench@xxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- fs/smb/client/cached_dir.c | 156 ++++++++++++++++++++++++++++++++++++--------- fs/smb/client/cached_dir.h | 6 + fs/smb/client/cifsfs.c | 12 +++ fs/smb/client/cifsglob.h | 3 fs/smb/client/inode.c | 3 fs/smb/client/trace.h | 3 6 files changed, 147 insertions(+), 36 deletions(-) --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -17,6 +17,11 @@ static void free_cached_dir(struct cache static void smb2_close_cached_fid(struct kref *ref); static void cfids_laundromat_worker(struct work_struct *work); +struct cached_dir_dentry { + struct list_head entry; + struct dentry *dentry; +}; + static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, const char *path, bool lookup_only, @@ -470,7 +475,10 @@ void close_all_cached_dirs(struct cifs_s struct cifs_tcon *tcon; struct tcon_link *tlink; struct cached_fids *cfids; + struct cached_dir_dentry *tmp_list, *q; + LIST_HEAD(entry); + spin_lock(&cifs_sb->tlink_tree_lock); for (node = rb_first(root); node; node = rb_next(node)) { tlink = rb_entry(node, struct tcon_link, tl_rbnode); tcon = tlink_tcon(tlink); @@ -479,11 +487,30 @@ void close_all_cached_dirs(struct cifs_s cfids = tcon->cfids; if (cfids == NULL) continue; + spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { - dput(cfid->dentry); + tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC); + if (tmp_list == NULL) + break; + spin_lock(&cfid->fid_lock); + tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; + spin_unlock(&cfid->fid_lock); + + list_add_tail(&tmp_list->entry, &entry); } + spin_unlock(&cfids->cfid_list_lock); + } + spin_unlock(&cifs_sb->tlink_tree_lock); + + list_for_each_entry_safe(tmp_list, q, &entry, entry) { + list_del(&tmp_list->entry); + dput(tmp_list->dentry); + kfree(tmp_list); } + + /* Flush any pending work that will drop dentries */ + flush_workqueue(cfid_put_wq); } /* @@ -494,14 +521,18 @@ void invalidate_all_cached_dirs(struct c { struct cached_fids *cfids = tcon->cfids; struct cached_fid *cfid, *q; - LIST_HEAD(entry); if (cfids == NULL) return; + /* + * Mark all the cfids as closed, and move them to the cfids->dying list. + * They'll be cleaned up later by cfids_invalidation_worker. Take + * a reference to each cfid during this process. + */ spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { - list_move(&cfid->entry, &entry); + list_move(&cfid->entry, &cfids->dying); cfids->num_entries--; cfid->is_open = false; cfid->on_list = false; @@ -514,26 +545,47 @@ void invalidate_all_cached_dirs(struct c } else kref_get(&cfid->refcount); } + /* + * Queue dropping of the dentries once locks have been dropped + */ + if (!list_empty(&cfids->dying)) + queue_work(cfid_put_wq, &cfids->invalidation_work); spin_unlock(&cfids->cfid_list_lock); - - list_for_each_entry_safe(cfid, q, &entry, entry) { - list_del(&cfid->entry); - cancel_work_sync(&cfid->lease_break); - /* - * Drop the ref-count from above, either the lease-ref (if there - * was one) or the extra one acquired. - */ - kref_put(&cfid->refcount, smb2_close_cached_fid); - } } static void -smb2_cached_lease_break(struct work_struct *work) +cached_dir_offload_close(struct work_struct *work) { struct cached_fid *cfid = container_of(work, - struct cached_fid, lease_break); + struct cached_fid, close_work); + struct cifs_tcon *tcon = cfid->tcon; + + WARN_ON(cfid->on_list); kref_put(&cfid->refcount, smb2_close_cached_fid); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); +} + +/* + * Release the cached directory's dentry, and then queue work to drop cached + * directory itself (closing on server if needed). + * + * Must be called with a reference to the cached_fid and a reference to the + * tcon. + */ +static void cached_dir_put_work(struct work_struct *work) +{ + struct cached_fid *cfid = container_of(work, struct cached_fid, + put_work); + struct dentry *dentry; + + spin_lock(&cfid->fid_lock); + dentry = cfid->dentry; + cfid->dentry = NULL; + spin_unlock(&cfid->fid_lock); + + dput(dentry); + queue_work(serverclose_wq, &cfid->close_work); } int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) @@ -560,8 +612,10 @@ int cached_dir_lease_break(struct cifs_t cfid->on_list = false; cfids->num_entries--; - queue_work(cifsiod_wq, - &cfid->lease_break); + ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_cached_lease_break); + queue_work(cfid_put_wq, &cfid->put_work); spin_unlock(&cfids->cfid_list_lock); return true; } @@ -583,7 +637,8 @@ static struct cached_fid *init_cached_di return NULL; } - INIT_WORK(&cfid->lease_break, smb2_cached_lease_break); + INIT_WORK(&cfid->close_work, cached_dir_offload_close); + INIT_WORK(&cfid->put_work, cached_dir_put_work); INIT_LIST_HEAD(&cfid->entry); INIT_LIST_HEAD(&cfid->dirents.entries); mutex_init(&cfid->dirents.de_mutex); @@ -596,6 +651,9 @@ static void free_cached_dir(struct cache { struct cached_dirent *dirent, *q; + WARN_ON(work_pending(&cfid->close_work)); + WARN_ON(work_pending(&cfid->put_work)); + dput(cfid->dentry); cfid->dentry = NULL; @@ -613,10 +671,30 @@ static void free_cached_dir(struct cache kfree(cfid); } +static void cfids_invalidation_worker(struct work_struct *work) +{ + struct cached_fids *cfids = container_of(work, struct cached_fids, + invalidation_work); + struct cached_fid *cfid, *q; + LIST_HEAD(entry); + + spin_lock(&cfids->cfid_list_lock); + /* move cfids->dying to the local list */ + list_cut_before(&entry, &cfids->dying, &cfids->dying); + spin_unlock(&cfids->cfid_list_lock); + + list_for_each_entry_safe(cfid, q, &entry, entry) { + list_del(&cfid->entry); + /* Drop the ref-count acquired in invalidate_all_cached_dirs */ + kref_put(&cfid->refcount, smb2_close_cached_fid); + } +} + static void cfids_laundromat_worker(struct work_struct *work) { struct cached_fids *cfids; struct cached_fid *cfid, *q; + struct dentry *dentry; LIST_HEAD(entry); cfids = container_of(work, struct cached_fids, laundromat_work.work); @@ -642,18 +720,28 @@ static void cfids_laundromat_worker(stru list_for_each_entry_safe(cfid, q, &entry, entry) { list_del(&cfid->entry); - /* - * Cancel and wait for the work to finish in case we are racing - * with it. - */ - cancel_work_sync(&cfid->lease_break); - /* - * Drop the ref-count from above, either the lease-ref (if there - * was one) or the extra one acquired. - */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + + spin_lock(&cfid->fid_lock); + dentry = cfid->dentry; + cfid->dentry = NULL; + spin_unlock(&cfid->fid_lock); + + dput(dentry); + if (cfid->is_open) { + spin_lock(&cifs_tcp_ses_lock); + ++cfid->tcon->tc_count; + trace_smb3_tcon_ref(cfid->tcon->debug_id, cfid->tcon->tc_count, + netfs_trace_tcon_ref_get_cached_laundromat); + spin_unlock(&cifs_tcp_ses_lock); + queue_work(serverclose_wq, &cfid->close_work); + } else + /* + * Drop the ref-count from above, either the lease-ref (if there + * was one) or the extra one acquired. + */ + kref_put(&cfid->refcount, smb2_close_cached_fid); } - queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); } @@ -666,9 +754,11 @@ struct cached_fids *init_cached_dirs(voi return NULL; spin_lock_init(&cfids->cfid_list_lock); INIT_LIST_HEAD(&cfids->entries); + INIT_LIST_HEAD(&cfids->dying); + INIT_WORK(&cfids->invalidation_work, cfids_invalidation_worker); INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker); - queue_delayed_work(cifsiod_wq, &cfids->laundromat_work, + queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); return cfids; @@ -687,12 +777,18 @@ void free_cached_dirs(struct cached_fids return; cancel_delayed_work_sync(&cfids->laundromat_work); + cancel_work_sync(&cfids->invalidation_work); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { cfid->on_list = false; cfid->is_open = false; list_move(&cfid->entry, &entry); + } + list_for_each_entry_safe(cfid, q, &cfids->dying, entry) { + cfid->on_list = false; + cfid->is_open = false; + list_move(&cfid->entry, &entry); } spin_unlock(&cfids->cfid_list_lock); --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -44,7 +44,8 @@ struct cached_fid { spinlock_t fid_lock; struct cifs_tcon *tcon; struct dentry *dentry; - struct work_struct lease_break; + struct work_struct put_work; + struct work_struct close_work; struct smb2_file_all_info file_all_info; struct cached_dirents dirents; }; @@ -53,10 +54,13 @@ struct cached_fid { struct cached_fids { /* Must be held when: * - accessing the cfids->entries list + * - accessing the cfids->dying list */ spinlock_t cfid_list_lock; int num_entries; struct list_head entries; + struct list_head dying; + struct work_struct invalidation_work; struct delayed_work laundromat_work; }; --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -156,6 +156,7 @@ struct workqueue_struct *fileinfo_put_wq struct workqueue_struct *cifsoplockd_wq; struct workqueue_struct *deferredclose_wq; struct workqueue_struct *serverclose_wq; +struct workqueue_struct *cfid_put_wq; __u32 cifs_lock_secret; /* @@ -1899,9 +1900,16 @@ init_cifs(void) goto out_destroy_deferredclose_wq; } + cfid_put_wq = alloc_workqueue("cfid_put_wq", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!cfid_put_wq) { + rc = -ENOMEM; + goto out_destroy_serverclose_wq; + } + rc = cifs_init_inodecache(); if (rc) - goto out_destroy_serverclose_wq; + goto out_destroy_cfid_put_wq; rc = init_mids(); if (rc) @@ -1963,6 +1971,8 @@ out_destroy_mids: destroy_mids(); out_destroy_inodecache: cifs_destroy_inodecache(); +out_destroy_cfid_put_wq: + destroy_workqueue(cfid_put_wq); out_destroy_serverclose_wq: destroy_workqueue(serverclose_wq); out_destroy_deferredclose_wq: --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2022,7 +2022,7 @@ require use of the stronger protocol */ * cifsInodeInfo->lock_sem cifsInodeInfo->llist cifs_init_once * ->can_cache_brlcks * cifsInodeInfo->deferred_lock cifsInodeInfo->deferred_closes cifsInodeInfo_alloc - * cached_fid->fid_mutex cifs_tcon->crfid tcon_info_alloc + * cached_fids->cfid_list_lock cifs_tcon->cfids->entries init_cached_dirs * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search @@ -2111,6 +2111,7 @@ extern struct workqueue_struct *fileinfo extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; extern struct workqueue_struct *serverclose_wq; +extern struct workqueue_struct *cfid_put_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2412,13 +2412,10 @@ cifs_dentry_needs_reval(struct dentry *d return true; if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) { - spin_lock(&cfid->fid_lock); if (cfid->time && cifs_i->time > cfid->time) { - spin_unlock(&cfid->fid_lock); close_cached_dir(cfid); return false; } - spin_unlock(&cfid->fid_lock); close_cached_dir(cfid); } /* --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -27,6 +27,8 @@ EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \ EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \ EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \ + EM(netfs_trace_tcon_ref_get_cached_laundromat, "GET Ch-Lau") \ + EM(netfs_trace_tcon_ref_get_cached_lease_break, "GET Ch-Lea") \ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ @@ -35,6 +37,7 @@ EM(netfs_trace_tcon_ref_new, "NEW ") \ EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \ EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \ + EM(netfs_trace_tcon_ref_put_cached_close, "PUT Ch-Cls") \ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ Patches currently in stable-queue which might be from paul@xxxxxxxxxxxxxx are queue-6.6/smb-cached-directories-can-be-more-than-root-file-ha.patch queue-6.6/smb-don-t-leak-cfid-when-reconnect-races-with-open_cached_dir.patch queue-6.6/smb-during-unmount-ensure-all-cached-dir-instances-drop-their-dentry.patch queue-6.6/smb-prevent-use-after-free-due-to-open_cached_dir-error-paths.patch