Re: [PATCH v2] ceph: fix NULL pointer dereference for req->r_session

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Nov 8, 2022 at 6:50 AM <xiubli@xxxxxxxxxx> wrote:
>
> From: Xiubo Li <xiubli@xxxxxxxxxx>
>
> The request's r_session maybe changed when it was forwarded or
> resent.
>
> Cc: stable@xxxxxxxxxxxxxxx
> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> ---
>  fs/ceph/caps.c | 88 +++++++++++++++++++-------------------------------
>  1 file changed, 33 insertions(+), 55 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 894adfb4a092..172f18f7459d 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>         struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
>         struct ceph_inode_info *ci = ceph_inode(inode);
>         struct ceph_mds_request *req1 = NULL, *req2 = NULL;
> +       struct ceph_mds_session *s, **sessions = NULL;

Hi Xiubo,

Nit: mixing pointers and double pointers coupled with differing
initialization is generally frowned upon.  Keep it on two lines as
before:

    struct ceph_mds_session **sessions = NULL;
    struct ceph_mds_session *s;

>         unsigned int max_sessions;
> -       int ret, err = 0;
> +       int i, ret, err = 0;
>
>         spin_lock(&ci->i_unsafe_lock);
>         if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
> @@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>         }
>         spin_unlock(&ci->i_unsafe_lock);
>
> -       /*
> -        * The mdsc->max_sessions is unlikely to be changed
> -        * mostly, here we will retry it by reallocating the
> -        * sessions array memory to get rid of the mdsc->mutex
> -        * lock.
> -        */
> -retry:
> -       max_sessions = mdsc->max_sessions;
> -
>         /*
>          * Trigger to flush the journal logs in all the relevant MDSes
>          * manually, or in the worst case we must wait at most 5 seconds
>          * to wait the journal logs to be flushed by the MDSes periodically.
>          */
> +       mutex_lock(&mdsc->mutex);
> +       max_sessions = mdsc->max_sessions;
> +       sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
> +       if (!sessions) {
> +               mutex_unlock(&mdsc->mutex);
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
>         if ((req1 || req2) && likely(max_sessions)) {

Just curious, when can max_sessions be zero here?

> -               struct ceph_mds_session **sessions = NULL;
> -               struct ceph_mds_session *s;
>                 struct ceph_mds_request *req;
> -               int i;
> -
> -               sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
> -               if (!sessions) {
> -                       err = -ENOMEM;
> -                       goto out;
> -               }
>
>                 spin_lock(&ci->i_unsafe_lock);
>                 if (req1) {
> @@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>                                 s = req->r_session;
>                                 if (!s)
>                                         continue;
> -                               if (unlikely(s->s_mds >= max_sessions)) {
> -                                       spin_unlock(&ci->i_unsafe_lock);
> -                                       for (i = 0; i < max_sessions; i++) {
> -                                               s = sessions[i];
> -                                               if (s)
> -                                                       ceph_put_mds_session(s);
> -                                       }
> -                                       kfree(sessions);
> -                                       goto retry;
> -                               }
> +                               if (unlikely(s->s_mds >= max_sessions))
> +                                       continue;

Nit: this could be combined with the previous condition:

    if (!s || unlikely(s->s_mds >= max_sessions))
            continue;

>                                 if (!sessions[s->s_mds]) {
>                                         s = ceph_get_mds_session(s);
>                                         sessions[s->s_mds] = s;
> @@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>                                 s = req->r_session;
>                                 if (!s)
>                                         continue;
> -                               if (unlikely(s->s_mds >= max_sessions)) {
> -                                       spin_unlock(&ci->i_unsafe_lock);
> -                                       for (i = 0; i < max_sessions; i++) {
> -                                               s = sessions[i];
> -                                               if (s)
> -                                                       ceph_put_mds_session(s);
> -                                       }
> -                                       kfree(sessions);
> -                                       goto retry;
> -                               }
> +                               if (unlikely(s->s_mds >= max_sessions))
> +                                       continue;

ditto

>                                 if (!sessions[s->s_mds]) {
>                                         s = ceph_get_mds_session(s);
>                                         sessions[s->s_mds] = s;
> @@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>                         }
>                 }
>                 spin_unlock(&ci->i_unsafe_lock);
> +       }
> +       mutex_unlock(&mdsc->mutex);
>
> -               /* the auth MDS */
> -               spin_lock(&ci->i_ceph_lock);
> -               if (ci->i_auth_cap) {
> -                     s = ci->i_auth_cap->session;
> -                     if (!sessions[s->s_mds])
> -                             sessions[s->s_mds] = ceph_get_mds_session(s);
> -               }
> -               spin_unlock(&ci->i_ceph_lock);
> +       /* the auth MDS */
> +       spin_lock(&ci->i_ceph_lock);

Why was this "auth MDS" block moved outside of max_sessions > 0
branch?  Logically, it very much belongs there.  Is there a problem
with taking ci->i_ceph_lock under mdsc->mutex?

> +       if (ci->i_auth_cap) {
> +               s = ci->i_auth_cap->session;
> +               if (!sessions[s->s_mds] &&
> +                   likely(s->s_mds < max_sessions))

This is wrong: s->s_mds must be checked against max_sessions before
indexing into sessions array.  Also, the entire condition should fit on
a single line.

> +                       sessions[s->s_mds] = ceph_get_mds_session(s);
> +       }
> +       spin_unlock(&ci->i_ceph_lock);
>
> -               /* send flush mdlog request to MDSes */
> -               for (i = 0; i < max_sessions; i++) {
> -                       s = sessions[i];
> -                       if (s) {
> -                               send_flush_mdlog(s);
> -                               ceph_put_mds_session(s);
> -                       }
> +       /* send flush mdlog request to MDSes */
> +       for (i = 0; i < max_sessions; i++) {
> +               s = sessions[i];
> +               if (s) {
> +                       send_flush_mdlog(s);
> +                       ceph_put_mds_session(s);
>                 }
> -               kfree(sessions);
>         }
>
>         dout("%s %p wait on tid %llu %llu\n", __func__,
> @@ -2428,6 +2405,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
>                 ceph_mdsc_put_request(req1);
>         if (req2)
>                 ceph_mdsc_put_request(req2);
> +       kfree(sessions);

Nit: since sessions array is allocated after references to req1 and
req2 are grabbed, I would free it before these references are put.

Thanks,

                Ilya



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux